For TCP tap connections we keep track of both the IP address and port for each side of a connection as seen by the guest. We're planning to track similar information in a number of other places as well. To assist with this, create a new structure: struct demiflow to track both sides of a connection or other logical packet flow as seen from a single "side" of passt. Also add a small helper function for initializing this structure. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- flow.h | 41 ++++++++++++++++++++++++++++++++++++ tcp.c | 59 ++++++++++++++++++++++++++-------------------------- tcp_conn.h | 11 ++-------- tcp_splice.c | 1 + 4 files changed, 74 insertions(+), 38 deletions(-) create mode 100644 flow.h diff --git a/flow.h b/flow.h new file mode 100644 index 0000000..f7c0981 --- /dev/null +++ b/flow.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson <david(a)gibson.dropbear.id.au> + * + * Tracking for logical "flows" of packets. + */ +#ifndef FLOW_H +#define FLOW_H + +/** + * struct demiflow - Describes a logical packet flow as seen from one "side" + * @caddr: Correspondent address (remote address from passt's PoV) + * @faddr: Forwarding address (local address from passt's PoV) + * @cport: Correspondent port + * @fport: Forwarding port + */ +struct demiflow { + union inany_addr faddr; + union inany_addr caddr; + in_port_t fport, cport; +}; + +/** demiflow_from_af - Initialize a demiflow from addresses + * @df: demiflow to initialize + * @af: Address family for @faddr and @caddr + * @faddr: Forwarding address (pointer to in_addr or in6_addr) + * @fport: Forwarding port + * @caddr: Correspondent address (pointer to in_addr or in6_addr) + * @cport: Correspondent port + */ +static inline void demiflow_from_af(struct demiflow *df, int af, + const void *faddr, in_port_t fport, + const void *caddr, in_port_t cport) +{ + inany_from_af(&df->faddr, af, faddr); + inany_from_af(&df->caddr, af, caddr); + df->fport = fport; + df->cport = cport; +} + +#endif /* FLOW_H */ diff --git a/tcp.c b/tcp.c index 6c4d71e..c1875c3 100644 --- a/tcp.c +++ b/tcp.c @@ -302,6 +302,7 @@ #include "tcp_splice.h" #include "log.h" #include "inany.h" +#include "flow.h" #include "tcp_conn.h" @@ -399,7 +400,7 @@ struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */ #define OPT_SACK 5 #define OPT_TS 8 -#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr)) +#define CONN_V4(conn) (!!inany_v4(&(conn)->tapflow.faddr)) #define CONN_V6(conn) (!CONN_V4(conn)) #define CONN_IS_CLOSING(conn) \ ((conn->events & ESTABLISHED) && \ @@ -852,7 +853,7 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn) int i; for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) - if (inany_equals(&conn->faddr, low_rtt_dst + i)) + if (inany_equals(&conn->tapflow.faddr, low_rtt_dst + i)) return 1; return 0; @@ -874,7 +875,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, return; for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) { - if (inany_equals(&conn->faddr, low_rtt_dst + i)) + if (inany_equals(&conn->tapflow.faddr, low_rtt_dst + i)) return; if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i)) hole = i; @@ -886,7 +887,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, if (hole == -1) return; - low_rtt_dst[hole++] = conn->faddr; + low_rtt_dst[hole++] = conn->tapflow.faddr; if (hole == LOW_RTT_TABLE_SIZE) hole = 0; inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any); @@ -1151,8 +1152,8 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn, const union inany_addr *faddr, in_port_t cport, in_port_t fport) { - if (inany_equals(&conn->faddr, faddr) && - conn->cport == cport && conn->fport == fport) + if (inany_equals(&conn->tapflow.faddr, faddr) && + conn->tapflow.cport == cport && conn->tapflow.fport == fport) return 1; return 0; @@ -1194,7 +1195,8 @@ static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *faddr, static unsigned int tcp_conn_hash(const struct ctx *c, const struct tcp_tap_conn *conn) { - return tcp_hash(c, &conn->faddr, conn->cport, conn->fport); + return tcp_hash(c, &conn->tapflow.faddr, + conn->tapflow.cport, conn->tapflow.fport); } /** @@ -1206,7 +1208,8 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn) { int b; - b = tcp_hash(c, &conn->faddr, conn->cport, conn->fport); + b = tcp_hash(c, &conn->tapflow.faddr, + conn->tapflow.cport, conn->tapflow.fport); conn->next_index = tc_hash[b] ? CONN_IDX(tc_hash[b]) : -1; tc_hash[b] = conn; @@ -1425,13 +1428,13 @@ static size_t tcp_l2_buf_fill_headers(const struct ctx *c, void *p, size_t plen, const uint16_t *check, uint32_t seq) { - const struct in_addr *a4 = inany_v4(&conn->faddr); + const struct in_addr *a4 = inany_v4(&conn->tapflow.faddr); size_t ip_len, tlen; #define SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq) \ do { \ - b->th.source = htons(conn->fport); \ - b->th.dest = htons(conn->cport); \ + b->th.source = htons(conn->tapflow.fport); \ + b->th.dest = htons(conn->tapflow.cport); \ b->th.seq = htonl(seq); \ b->th.ack_seq = htonl(conn->seq_ack_to_tap); \ if (conn->events & ESTABLISHED) { \ @@ -1449,7 +1452,7 @@ do { \ ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr); b->iph.tot_len = htons(ip_len); b->iph.saddr = a4->s_addr; - b->iph.daddr = inany_v4(&conn->caddr)->s_addr; + b->iph.daddr = inany_v4(&conn->tapflow.caddr)->s_addr; if (check) b->iph.check = *check; @@ -1467,8 +1470,8 @@ do { \ ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr); b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr)); - b->ip6h.saddr = conn->faddr.a6; - b->ip6h.daddr = conn->caddr.a6; + b->ip6h.saddr = conn->tapflow.faddr.a6; + b->ip6h.daddr = conn->tapflow.caddr.a6; memset(b->ip6h.flow_lbl, 0, 3); @@ -1829,10 +1832,10 @@ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn, union inany_addr dst; in_port_t dstport; } __attribute__((__packed__)) in = { - .src = conn->faddr, - .srcport = conn->cport, - .dst = conn->caddr, - .dstport = conn->fport, + .src = conn->tapflow.faddr, + .srcport = conn->tapflow.cport, + .dst = conn->tapflow.caddr, + .dstport = conn->tapflow.fport, }; uint32_t ns, seq = 0; @@ -2049,10 +2052,8 @@ static void tcp_conn_from_tap(struct ctx *c, if (!(conn->wnd_from_tap = (htons(th->window) >> conn->ws_from_tap))) conn->wnd_from_tap = 1; - inany_from_af(&conn->faddr, af, daddr); - inany_from_af(&conn->caddr, af, saddr); - conn->fport = ntohs(th->dest); - conn->cport = ntohs(th->source); + demiflow_from_af(&conn->tapflow, af, daddr, ntohs(th->dest), + saddr, ntohs(th->source)); if (af == AF_INET) { sa = (struct sockaddr *)&addr4; @@ -2719,18 +2720,18 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union epoll_ref ref, conn->ws_to_tap = conn->ws_from_tap = 0; conn_event(c, conn, SOCK_ACCEPTED); - inany_from_sockaddr(&conn->faddr, &conn->fport, sa); - tcp_snat_inbound(c, &conn->faddr); + inany_from_sockaddr(&conn->tapflow.faddr, &conn->tapflow.fport, sa); + tcp_snat_inbound(c, &conn->tapflow.faddr); if (CONN_V4(conn)) { - inany_from_af(&conn->caddr, AF_INET, &c->ip4.addr_seen); + inany_from_af(&conn->tapflow.caddr, AF_INET, &c->ip4.addr_seen); } else { - if (IN6_IS_ADDR_LINKLOCAL(&conn->faddr.a6)) - conn->caddr.a6 = c->ip6.addr_ll_seen; + if (IN6_IS_ADDR_LINKLOCAL(&conn->tapflow.faddr.a6)) + conn->tapflow.caddr.a6 = c->ip6.addr_ll_seen; else - conn->caddr.a6 = c->ip6.addr_seen; + conn->tapflow.caddr.a6 = c->ip6.addr_seen; } - conn->cport = ref.r.p.tcp.tcp.index; + conn->tapflow.cport = ref.r.p.tcp.tcp.index; tcp_seq_init(c, conn, now); tcp_hash_insert(c, conn); diff --git a/tcp_conn.h b/tcp_conn.h index 9151c18..92d4637 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -35,10 +35,7 @@ extern const char *tcp_common_flag_str[]; * @ws_to_tap: Window scaling factor advertised to tap/guest * @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS * @seq_dup_ack_approx: Last duplicate ACK number sent to tap - * @caddr: Guest side correspondent address (guest's local address) - * @faddr: Guest side forwarding address (guest's remote address) - * @cport: Guest side correspondent port (guest's local port) - * @fport: Guest side forwarding port (guest's remote port) + * @tapflow: Tap(guest)-side demiflow * @wnd_from_tap: Last window size from tap, unscaled (as received) * @wnd_to_tap: Sending window advertised to tap, unscaled (as sent) * @seq_to_tap: Next sequence for packets to tap @@ -105,11 +102,7 @@ struct tcp_tap_conn { uint8_t seq_dup_ack_approx; - - union inany_addr caddr; - union inany_addr faddr; - in_port_t cport; - in_port_t fport; + struct demiflow tapflow; uint16_t wnd_from_tap; uint16_t wnd_to_tap; diff --git a/tcp_splice.c b/tcp_splice.c index 71256b0..a1aeff7 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -53,6 +53,7 @@ #include "log.h" #include "tcp_splice.h" #include "inany.h" +#include "flow.h" #include "tcp_conn.h" -- 2.41.0