tcp_tap_conn has several fields to track addresses and ports as seen by the guest/namespace. However we now have general fields for this in the common flowside struct. Use those instead of protocol specific fields. So far we've only explicitly tracked the guest side forwarding address in the TCP connection - the remote address from the guest's point of view. The tap side endpoint address - the local address from the guest's point of view - was assumed to always be one of the handful of guest addresses we track as addr_seen (one each for IPv4, IPv6 global and IPv6 link-local). struct flowside expects both addresses, and we will want to use the endpoint address in future. So, we need to add code to determine that address and record it in the flowside. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- flow.h | 20 +++++++++++++++ tcp.c | 75 ++++++++++++++++++++++++++++-------------------------- tcp_conn.h | 8 ------ 3 files changed, 59 insertions(+), 44 deletions(-) diff --git a/flow.h b/flow.h index e090ba0..e7126e4 100644 --- a/flow.h +++ b/flow.h @@ -45,6 +45,26 @@ struct flowside { static_assert(_Alignof(struct flowside) == _Alignof(uint32_t), "Unexpected alignment for struct flowside"); +/** flowside_from_af - Initialize flowside from addresses + * @fside: flowside to initialize + * @pif: pif if of this flowside + * @af: Address family (AF_INET or AF_INET6) + * @faddr: Forwarding address (pointer to in_addr or in6_addr) + * @fport: Forwarding port + * @eaddr: Endpoint address (pointer to in_addr or in6_addr) + * @eport: Endpoint port + */ +static inline void flowside_from_af(struct flowside *fside, uint8_t pif, int af, + const void *faddr, in_port_t fport, + const void *eaddr, in_port_t eport) +{ + fside->pif = pif; + inany_from_af(&fside->faddr, af, faddr); + inany_from_af(&fside->eaddr, af, eaddr); + fside->fport = fport; + fside->eport = eport; +} + /** flowside_complete - Check if flowside is fully initialized * @fside: flowside to check */ diff --git a/tcp.c b/tcp.c index ddabc34..7ef20b1 100644 --- a/tcp.c +++ b/tcp.c @@ -394,7 +394,9 @@ struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */ #define OPT_SACK 5 #define OPT_TS 8 -#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr)) +#define TAPFSIDE(conn) (&(conn)->f.side[TAPSIDE]) + +#define CONN_V4(conn) (!!inany_v4(&TAPFSIDE(conn)->faddr)) #define CONN_V6(conn) (!CONN_V4(conn)) #define CONN_IS_CLOSING(conn) \ ((conn->events & ESTABLISHED) && \ @@ -845,7 +847,7 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn) int i; for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) - if (inany_equals(&conn->faddr, low_rtt_dst + i)) + if (inany_equals(&TAPFSIDE(conn)->faddr, low_rtt_dst + i)) return 1; return 0; @@ -867,7 +869,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, return; for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) { - if (inany_equals(&conn->faddr, low_rtt_dst + i)) + if (inany_equals(&TAPFSIDE(conn)->faddr, low_rtt_dst + i)) return; if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i)) hole = i; @@ -879,7 +881,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, if (hole == -1) return; - low_rtt_dst[hole++] = conn->faddr; + low_rtt_dst[hole++] = TAPFSIDE(conn)->faddr; if (hole == LOW_RTT_TABLE_SIZE) hole = 0; inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any); @@ -1144,8 +1146,8 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn, const union inany_addr *faddr, in_port_t eport, in_port_t fport) { - if (inany_equals(&conn->faddr, faddr) && - conn->eport == eport && conn->fport == fport) + if (inany_equals(&TAPFSIDE(conn)->faddr, faddr) && + TAPFSIDE(conn)->eport == eport && TAPFSIDE(conn)->fport == fport) return 1; return 0; @@ -1179,7 +1181,8 @@ static uint64_t tcp_hash(const struct ctx *c, const union inany_addr *faddr, static uint64_t tcp_conn_hash(const struct ctx *c, const struct tcp_tap_conn *conn) { - return tcp_hash(c, &conn->faddr, conn->eport, conn->fport); + return tcp_hash(c, &TAPFSIDE(conn)->faddr, + TAPFSIDE(conn)->eport, TAPFSIDE(conn)->fport); } /** @@ -1365,13 +1368,13 @@ static size_t tcp_l2_buf_fill_headers(const struct ctx *c, void *p, size_t plen, const uint16_t *check, uint32_t seq) { - const struct in_addr *a4 = inany_v4(&conn->faddr); + const struct in_addr *a4 = inany_v4(&TAPFSIDE(conn)->faddr); size_t ip_len, tlen; #define SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq) \ do { \ - b->th.source = htons(conn->fport); \ - b->th.dest = htons(conn->eport); \ + b->th.source = htons(TAPFSIDE(conn)->fport); \ + b->th.dest = htons(TAPFSIDE(conn)->eport); \ b->th.seq = htonl(seq); \ b->th.ack_seq = htonl(conn->seq_ack_to_tap); \ if (conn->events & ESTABLISHED) { \ @@ -1389,7 +1392,7 @@ do { \ ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr); b->iph.tot_len = htons(ip_len); b->iph.saddr = a4->s_addr; - b->iph.daddr = c->ip4.addr_seen.s_addr; + b->iph.daddr = inany_v4(&TAPFSIDE(conn)->eaddr)->s_addr; if (check) b->iph.check = *check; @@ -1407,11 +1410,8 @@ do { \ ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr); b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr)); - b->ip6h.saddr = conn->faddr.a6; - if (IN6_IS_ADDR_LINKLOCAL(&b->ip6h.saddr)) - b->ip6h.daddr = c->ip6.addr_ll_seen; - else - b->ip6h.daddr = c->ip6.addr_seen; + b->ip6h.saddr = TAPFSIDE(conn)->faddr.a6; + b->ip6h.daddr = TAPFSIDE(conn)->eaddr.a6; memset(b->ip6h.flow_lbl, 0, 3); @@ -1739,26 +1739,21 @@ static void tcp_tap_window_update(struct tcp_tap_conn *conn, unsigned wnd) /** * tcp_seq_init() - Calculate initial sequence number according to RFC 6528 * @c: Execution context - * @conn: TCP connection, with faddr, fport and eport populated + * @conn: TCP connection, with faddr, fport, eaddr, eport populated * @now: Current timestamp */ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn, const struct timespec *now) { struct siphash_state state = SIPHASH_INIT(c->hash_secret); - union inany_addr aany; uint64_t hash; uint32_t ns; - if (CONN_V4(conn)) - inany_from_af(&aany, AF_INET, &c->ip4.addr); - else - inany_from_af(&aany, AF_INET6, &c->ip6.addr); - - inany_siphash_feed(&state, &conn->faddr); - inany_siphash_feed(&state, &aany); + inany_siphash_feed(&state, &TAPFSIDE(conn)->faddr); + inany_siphash_feed(&state, &TAPFSIDE(conn)->eaddr); hash = siphash_final(&state, 36, - (uint64_t)conn->fport << 16 | conn->eport); + (uint64_t)TAPFSIDE(conn)->fport << 16 | + TAPFSIDE(conn)->eport); /* 32ns ticks, overflows 32 bits every 137s */ ns = (now->tv_sec * 1000000000 + now->tv_nsec) >> 5; @@ -1925,8 +1920,6 @@ static void tcp_conn_from_tap(struct ctx *c, socklen_t sl; int s, mss; - (void)saddr; - if (!(flow = flow_alloc())) return; @@ -1953,6 +1946,10 @@ static void tcp_conn_from_tap(struct ctx *c, conn = &flow->tcp; conn->f.type = FLOW_TCP; + flowside_from_af(TAPFSIDE(conn), PIF_TAP, af, + daddr, ntohs(th->dest), saddr, ntohs(th->source)); + ASSERT(flowside_complete(TAPFSIDE(conn))); + conn->sock = s; conn->timer = -1; conn_event(c, conn, TAP_SYN_RCVD); @@ -1972,8 +1969,6 @@ static void tcp_conn_from_tap(struct ctx *c, if (!(conn->wnd_from_tap = (htons(th->window) >> conn->ws_from_tap))) conn->wnd_from_tap = 1; - inany_from_af(&conn->faddr, af, daddr); - if (af == AF_INET) { sa = (struct sockaddr *)&addr4; sl = sizeof(addr4); @@ -1982,9 +1977,6 @@ static void tcp_conn_from_tap(struct ctx *c, sl = sizeof(addr6); } - conn->fport = ntohs(th->dest); - conn->eport = ntohs(th->source); - conn->seq_init_from_tap = ntohl(th->seq); conn->seq_from_tap = conn->seq_init_from_tap + 1; conn->seq_ack_to_tap = conn->seq_from_tap; @@ -2674,10 +2666,21 @@ static void tcp_tap_conn_from_sock(struct ctx *c, conn->ws_to_tap = conn->ws_from_tap = 0; conn_event(c, conn, SOCK_ACCEPTED); - inany_from_sockaddr(&conn->faddr, &conn->fport, sa); - conn->eport = ref.port; + TAPFSIDE(conn)->pif = PIF_HOST; + inany_from_sockaddr(&TAPFSIDE(conn)->faddr, &TAPFSIDE(conn)->fport, sa); + tcp_snat_inbound(c, &TAPFSIDE(conn)->faddr); + + if (CONN_V4(conn)) { + inany_from_af(&TAPFSIDE(conn)->eaddr, AF_INET, &c->ip4.addr_seen); + } else { + if (IN6_IS_ADDR_LINKLOCAL(&TAPFSIDE(conn)->faddr.a6)) + TAPFSIDE(conn)->eaddr.a6 = c->ip6.addr_ll_seen; + else + TAPFSIDE(conn)->eaddr.a6 = c->ip6.addr_seen; + } + TAPFSIDE(conn)->eport = ref.port; - tcp_snat_inbound(c, &conn->faddr); + ASSERT(flowside_complete(TAPFSIDE(conn))); tcp_seq_init(c, conn, now); tcp_hash_insert(c, conn); diff --git a/tcp_conn.h b/tcp_conn.h index 1a4dcf2..8d9c7bd 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -23,9 +23,6 @@ * @ws_to_tap: Window scaling factor advertised to tap/guest * @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS * @seq_dup_ack_approx: Last duplicate ACK number sent to tap - * @faddr: Guest side forwarding address (guest's remote address) - * @eport: Guest side endpoint port (guest's local port) - * @fport: Guest side forwarding port (guest's remote port) * @wnd_from_tap: Last window size from tap, unscaled (as received) * @wnd_to_tap: Sending window advertised to tap, unscaled (as sent) * @seq_to_tap: Next sequence for packets to tap @@ -91,11 +88,6 @@ struct tcp_tap_conn { uint8_t seq_dup_ack_approx; - - union inany_addr faddr; - in_port_t eport; - in_port_t fport; - uint16_t wnd_from_tap; uint16_t wnd_to_tap; -- 2.43.0