On Fri, 2 Feb 2024 15:11:35 +0100 Laurent Vivier <lvivier(a)redhat.com> wrote:Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- tcp.c | 224 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 129 insertions(+), 95 deletions(-) diff --git a/tcp.c b/tcp.c index 2fd6bc2eda53..20ad8a4e5271 100644 --- a/tcp.c +++ b/tcp.c @@ -1320,87 +1320,98 @@ void tcp_defer_handler(struct ctx *c) tcp_l2_data_buf_flush(c); } +static void tcp_set_tcp_header(struct tcphdr *th, + const struct tcp_tap_conn *conn, uint32_t seq)Ah, nice, thanks, it adds a few lines but it's much better than that macro soup. I think the names of this function and the following ones, though, are now a bit less consistent: filling and setting are pretty much the same thing, and ipv4_fill_headers() only works for TCP packets. What about tcp_fill_header(), tcp_fill_ipv4_header(), and tcp_fill_ipv6_header()? Or _set_ for everything.+{ + th->source = htons(conn->fport); + th->dest = htons(conn->eport); + th->seq = htonl(seq); + th->ack_seq = htonl(conn->seq_ack_to_tap); + if (conn->events & ESTABLISHED) { + th->window = htons(conn->wnd_to_tap); + } else { + unsigned wnd = conn->wnd_to_tap << conn->ws_to_tap; + + th->window = htons(MIN(wnd, USHRT_MAX)); + } +} + /** - * tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers + * ipv4_fill_headers() - Fill 802.3, IPv4, TCP headers in pre-cooked buffers * @c: Execution context * @conn: Connection pointer - * @p: Pointer to any type of TCP pre-cooked buffer + * @iph: Pointer to IPv4 header, immediately followed by a TCP header * @plen: Payload length (including TCP header options) * @check: Checksum, if already known * @seq: Sequence number for this segment * - * Return: frame length including L2 headers, host order + * Return: IP frame length including L2 headers, host order */ -static size_t tcp_l2_buf_fill_headers(const struct ctx *c, - const struct tcp_tap_conn *conn, - void *p, size_t plen, - const uint16_t *check, uint32_t seq) + +static size_t ipv4_fill_headers(const struct ctx *c, + const struct tcp_tap_conn *conn, + struct iphdr *iph, size_t plen, + const uint16_t *check, uint32_t seq) { + struct tcphdr *th = (void *)(iph + 1);We should check this against gcc 11.2, because I had a warning with the previous attempt at this, below:- /* gcc 11.2 would complain on data = (char *)(th + 1); */Besides, void * will be promoted to struct tcphdr *, but can't we just use the right cast right away? That is, struct tcphdr *th = (struct tcphdr *)(iph + 1); Either way, this should go after the next line (declarations from longest to shortest).const struct in_addr *a4 = inany_v4(&conn->faddr); - size_t ip_len, tlen; - -#define SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq) \ -do { \ - b->th.source = htons(conn->fport); \ - b->th.dest = htons(conn->eport); \ - b->th.seq = htonl(seq); \ - b->th.ack_seq = htonl(conn->seq_ack_to_tap); \ - if (conn->events & ESTABLISHED) { \ - b->th.window = htons(conn->wnd_to_tap); \ - } else { \ - unsigned wnd = conn->wnd_to_tap << conn->ws_to_tap; \ - \ - b->th.window = htons(MIN(wnd, USHRT_MAX)); \ - } \ -} while (0) - - if (a4) { - struct tcp4_l2_buf_t *b = (struct tcp4_l2_buf_t *)p; - - ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr); - b->iph.tot_len = htons(ip_len); - b->iph.saddr = a4->s_addr; - b->iph.daddr = c->ip4.addr_seen.s_addr; - - b->iph.check = check ? *check : - ipv4_hdr_checksum(&b->iph, IPPROTO_TCP); - - SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq); - - b->th.check = tcp_update_check_tcp4(&b->iph); - - tlen = tap_iov_len(c, &b->taph, ip_len); - } else { - struct tcp6_l2_buf_t *b = (struct tcp6_l2_buf_t *)p; + size_t ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr);...and this should be the first one.- ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + iph->tot_len = htons(ip_len); + iph->saddr = a4->s_addr; + iph->daddr = c->ip4.addr_seen.s_addr; - b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr)); - b->ip6h.saddr = conn->faddr.a6; - if (IN6_IS_ADDR_LINKLOCAL(&b->ip6h.saddr)) - b->ip6h.daddr = c->ip6.addr_ll_seen; - else - b->ip6h.daddr = c->ip6.addr_seen; + iph->check = check ? *check : ipv4_hdr_checksum(iph, IPPROTO_TCP); + + tcp_set_tcp_header(th, conn, seq); + + th->check = tcp_update_check_tcp4(iph); + + return ip_len; +} + +/** + * ipv6_fill_headers() - Fill 802.3, IPv6, TCP headers in pre-cooked buffers + * @c: Execution context + * @conn: Connection pointer + * @ip6h: Pointer to IPv6 header, immediately followed by a TCP header + * @plen: Payload length (including TCP header options) + * @check: Checksum, if already known + * @seq: Sequence number for this segment + * + * Return: IP frame length including L2 headers, host order + */ + +static size_t ipv6_fill_headers(const struct ctx *c, + const struct tcp_tap_conn *conn, + struct ipv6hdr *ip6h, size_t plen, + uint32_t seq) +{ + struct tcphdr *th = (void *)(ip6h + 1); + size_t ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr); - memset(b->ip6h.flow_lbl, 0, 3); + ip6h->payload_len = htons(plen + sizeof(struct tcphdr)); + ip6h->saddr = conn->faddr.a6; + if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr)) + ip6h->daddr = c->ip6.addr_ll_seen; + else + ip6h->daddr = c->ip6.addr_seen; - SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq); + memset(ip6h->flow_lbl, 0, 3); - b->th.check = tcp_update_check_tcp6(&b->ip6h); + tcp_set_tcp_header(th, conn, seq); - b->ip6h.hop_limit = 255; - b->ip6h.version = 6; - b->ip6h.nexthdr = IPPROTO_TCP; + th->check = tcp_update_check_tcp6(ip6h); - b->ip6h.flow_lbl[0] = (conn->sock >> 16) & 0xf; - b->ip6h.flow_lbl[1] = (conn->sock >> 8) & 0xff; - b->ip6h.flow_lbl[2] = (conn->sock >> 0) & 0xff; + ip6h->hop_limit = 255; + ip6h->version = 6; + ip6h->nexthdr = IPPROTO_TCP; - tlen = tap_iov_len(c, &b->taph, ip_len); - } -#undef SET_TCP_HEADER_COMMON_V4_V6 + ip6h->flow_lbl[0] = (conn->sock >> 16) & 0xf; + ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff; + ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff; - return tlen; + return ip_len; } /** @@ -1520,27 +1531,21 @@ static void tcp_update_seqack_from_tap(const struct ctx *c, } /** - * tcp_send_flag() - Send segment with flags to tap (no payload) + * do_tcp_send_flag() - Send segment with flags to tap (no payload) * @c: Execution context * @conn: Connection pointer * @flags: TCP flags: if not set, send segment only if ACK is due * * Return: negative error code on connection reset, 0 otherwiseThis should be adjusted -- it took me a while to realise what you mean by 0 and 1 now.*/ -static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) + +static int do_tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags, struct tcphdr *th, char *data, size_t optlen)Maybe tcp_fill_flag_header() - Prepare header for flags-only segment (no payload)? > { > uint32_t prev_ack_to_tap = conn->seq_ack_to_tap; > uint32_t prev_wnd_to_tap = conn->wnd_to_tap; > - struct tcp4_l2_flags_buf_t *b4 = NULL; > - struct tcp6_l2_flags_buf_t *b6 = NULL; > struct tcp_info tinfo = { 0 }; > socklen_t sl = sizeof(tinfo); > int s = conn->sock; > - size_t optlen = 0; > - struct iovec *iov; > - struct tcphdr *th; > - char *data; > - void *p; > > if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap) && > !flags && conn->wnd_to_tap) > @@ -1562,26 +1567,9 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) > if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags) > return 0; > > - if (CONN_V4(conn)) { > - iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used; > - p = b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++; > - th = &b4->th; > -- /* gcc 11.2 would complain on data = (char *)(th + 1); */> - data = b4->opts; > - } else { > - iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used; > - p = b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++; > - th = &b6->th; > - data = b6->opts; > - } > - > if (flags & SYN) { > int mss; > > - /* Options: MSS, NOP and window scale (8 bytes) */ > - optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN; > - This is very header-specific, whereas tcp_send_flag() isn't anymore. Perhaps the new function could take a pointer to optlen and change it instead?*data++ = OPT_MSS; *data++ = OPT_MSS_LEN; @@ -1624,9 +1612,6 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) th->syn = !!(flags & SYN); th->fin = !!(flags & FIN); - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, p, optlen, - NULL, conn->seq_to_tap); - if (th->ack) { if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap)) conn_flag(c, conn, ~ACK_TO_TAP_DUE); @@ -1641,8 +1626,38 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) if (th->fin || th->syn) conn->seq_to_tap++; + return 1; +} + +static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) +{ + size_t optlen = 0; + struct iovec *iov; + size_t ip_len; + int ret; + + /* Options: MSS, NOP and window scale (8 bytes) */ + if (flags & SYN) + optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN; + if (CONN_V4(conn)) { + struct tcp4_l2_flags_buf_t *b4; + + iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used; + b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++; + + ret = do_tcp_send_flag(c, conn, flags, &b4->th, b4->opts, + optlen); + if (ret <= 0) + return ret; + + ip_len = ipv4_fill_headers(c, conn, &b4->iph, optlen, + NULL, conn->seq_to_tap); + + iov->iov_len = tap_iov_len(c, &b4->taph, ip_len); + if (flags & DUP_ACK) { + memcpy(b4 + 1, b4, sizeof(*b4)); (iov + 1)->iov_len = iov->iov_len; tcp4_l2_flags_buf_used++; @@ -1651,6 +1666,21 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) if (tcp4_l2_flags_buf_used > ARRAY_SIZE(tcp4_l2_flags_buf) - 2) tcp_l2_flags_buf_flush(c); } else { + struct tcp6_l2_flags_buf_t *b6; + + iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used; + b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++; + + ret = do_tcp_send_flag(c, conn, flags, &b6->th, b6->opts, + optlen); + if (ret <= 0) + return ret; + + ip_len = ipv6_fill_headers(c, conn, &b6->ip6h, optlen, + conn->seq_to_tap); + + iov->iov_len = tap_iov_len(c, &b6->taph, ip_len); + if (flags & DUP_ACK) { memcpy(b6 + 1, b6, sizeof(*b6)); (iov + 1)->iov_len = iov->iov_len; @@ -2050,6 +2080,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, { uint32_t *seq_update = &conn->seq_to_tap; struct iovec *iov; + size_t ip_len; if (CONN_V4(conn)) { struct tcp4_l2_buf_t *b = &tcp4_l2_buf[tcp4_l2_buf_used]; @@ -2058,9 +2089,11 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update; tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen; + ip_len = ipv4_fill_headers(c, conn, &b->iph, plen, + check, seq); + iov = tcp4_l2_iov + tcp4_l2_buf_used++; - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen, - check, seq); + iov->iov_len = tap_iov_len(c, &b->taph, ip_len); if (tcp4_l2_buf_used > ARRAY_SIZE(tcp4_l2_buf) - 1) tcp_l2_data_buf_flush(c); } else if (CONN_V6(conn)) { @@ -2069,9 +2102,10 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update; tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen; + ip_len = ipv6_fill_headers(c, conn, &b->ip6h, plen, seq); + iov = tcp6_l2_iov + tcp6_l2_buf_used++; - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen, - NULL, seq); + iov->iov_len = tap_iov_len(c, &b->taph, ip_len); if (tcp6_l2_buf_used > ARRAY_SIZE(tcp6_l2_buf) - 1) tcp_l2_data_buf_flush(c); }-- Stefano