This should save us some memory and code. Jon Maloy (4): tcp: create unified struct for IPv4 and IPv6 header tcp: update ip address in l2 tap queues on the fly tcp: unify l2 TCPv4 and TCPv6 queues and structures tcp: change prefix tcp4_ to tcp_ where applicable tcp.c | 22 ++--- tcp_buf.c | 259 +++++++++++++++++------------------------------------- tcp_buf.h | 3 + 3 files changed, 98 insertions(+), 186 deletions(-) -- 2.45.2
As a preparation for unifying the l2 queues for TCPv4 and TCPv6 traffic we prepare a unified struct that has space for both IP address types. With this change, the arrays tcp4_l2_iov and tcp4_l2_iov, as well as tcp4_l2_flags_iov and tcp4_l2_flags_iov, are identical in structure and size. Signed-off-by: Jon Maloy <jmaloy(a)redhat.com> --- tcp_buf.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/tcp_buf.c b/tcp_buf.c index c31e9f3..1af4786 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -52,6 +52,18 @@ struct tcp_payload_t { } __attribute__ ((packed, aligned(__alignof__(unsigned int)))); #endif +/** + * struct iphdr_t - Area with space for both IPv4 and IPv6 header + * @ip4: IPv4 header + * @ip6: IPv6 header + */ +struct iphdr_t { + union { + struct iphdr ip4; + struct ipv6hdr ip6; + }; +}; + /** * struct tcp_flags_t - TCP header and data to send zero-length * segments (flags) @@ -72,7 +84,7 @@ static struct ethhdr tcp4_eth_src; static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers */ -static struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp4_payload_ip[TCP_FRAMES_MEM]; /* TCP segments with payload for IPv4 frames */ static struct tcp_payload_t tcp4_payload[TCP_FRAMES_MEM]; @@ -84,7 +96,7 @@ static unsigned int tcp4_payload_used; static struct tap_hdr tcp4_flags_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers for TCP segment without payload */ -static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp4_flags_ip[TCP_FRAMES_MEM]; /* TCP segments without payload for IPv4 frames */ static struct tcp_flags_t tcp4_flags[TCP_FRAMES_MEM]; @@ -95,7 +107,7 @@ static struct ethhdr tcp6_eth_src; static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv6 headers */ -static struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp6_payload_ip[TCP_FRAMES_MEM]; /* TCP headers and data for IPv6 frames */ static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM]; @@ -107,7 +119,7 @@ static unsigned int tcp6_payload_used; static struct tap_hdr tcp6_flags_tap_hdr[TCP_FRAMES_MEM]; /* IPv6 headers for TCP segment without payload */ -static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp6_flags_ip[TCP_FRAMES_MEM]; /* TCP segment without payload for IPv6 frames */ static struct tcp_flags_t tcp6_flags[TCP_FRAMES_MEM]; @@ -144,13 +156,13 @@ void tcp_sock4_iov_init(const struct ctx *c) tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) { - tcp4_payload_ip[i] = iph; + tcp4_payload_ip[i].ip4 = iph; tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4; tcp4_payload[i].th.ack = 1; } for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) { - tcp4_flags_ip[i] = iph; + tcp4_flags_ip[i].ip4 = iph; tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4; tcp4_flags[i].th.ack = 1; } @@ -160,7 +172,8 @@ void tcp_sock4_iov_init(const struct ctx *c) iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]); iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]); + iov[TCP_IOV_IP].iov_base = &tcp4_payload_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp4_payload_ip[i].ip4); iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i]; } @@ -170,7 +183,8 @@ void tcp_sock4_iov_init(const struct ctx *c) iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]); iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]); + iov[TCP_IOV_IP].iov_base = &tcp4_flags_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp4_flags_ip[i].ip4); iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i]; } } @@ -188,13 +202,13 @@ void tcp_sock6_iov_init(const struct ctx *c) tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) { - tcp6_payload_ip[i] = ip6; + tcp6_payload_ip[i].ip6 = ip6; tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4; tcp6_payload[i].th.ack = 1; } for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) { - tcp6_flags_ip[i] = ip6; + tcp6_flags_ip[i].ip6 = ip6; tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4; tcp6_flags[i].th .ack = 1; } @@ -204,7 +218,8 @@ void tcp_sock6_iov_init(const struct ctx *c) iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]); iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]); + iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp6_payload_ip[i].ip6); iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i]; } @@ -213,7 +228,8 @@ void tcp_sock6_iov_init(const struct ctx *c) iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]); iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]); + iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp6_flags_ip[i].ip6); iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i]; } } -- 2.45.2
On Fri, Sep 06, 2024 at 05:34:24PM -0400, Jon Maloy wrote:As a preparation for unifying the l2 queues for TCPv4 and TCPv6 traffic we prepare a unified struct that has space for both IP address types. With this change, the arrays tcp4_l2_iov and tcp4_l2_iov, as well as tcp4_l2_flags_iov and tcp4_l2_flags_iov, are identical in structure and size. Signed-off-by: Jon Maloy <jmaloy(a)redhat.com> --- tcp_buf.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/tcp_buf.c b/tcp_buf.c index c31e9f3..1af4786 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -52,6 +52,18 @@ struct tcp_payload_t { } __attribute__ ((packed, aligned(__alignof__(unsigned int)))); #endif +/** + * struct iphdr_t - Area with space for both IPv4 and IPv6 header + * @ip4: IPv4 header + * @ip6: IPv6 header + */ +struct iphdr_t { + union { + struct iphdr ip4; + struct ipv6hdr ip6; + }; +};So, I haven't read the rest of the series yet, so it's possible something there would clarify this. Having the two headers in a union does require that essentially the whole header be re-initialized for every packet. For UDP, what we do instead is have both headers in paralllel, partially initialised. For each packet we update the appropriate header as needed, and point our IOV to the correct header (v4 or v6). I don't know if that's a better approach or not: it's plausible the dcache savings outweigh the extra re-initialisation, but it's plausible they don't too. I wonder if it might be better sticking with the same approach as UDP for consistency, at least until we have specific data suggesting one or the other approach is better.+ /** * struct tcp_flags_t - TCP header and data to send zero-length * segments (flags) @@ -72,7 +84,7 @@ static struct ethhdr tcp4_eth_src; static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers */ -static struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp4_payload_ip[TCP_FRAMES_MEM]; /* TCP segments with payload for IPv4 frames */ static struct tcp_payload_t tcp4_payload[TCP_FRAMES_MEM]; @@ -84,7 +96,7 @@ static unsigned int tcp4_payload_used; static struct tap_hdr tcp4_flags_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers for TCP segment without payload */ -static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp4_flags_ip[TCP_FRAMES_MEM]; /* TCP segments without payload for IPv4 frames */ static struct tcp_flags_t tcp4_flags[TCP_FRAMES_MEM]; @@ -95,7 +107,7 @@ static struct ethhdr tcp6_eth_src; static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv6 headers */ -static struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp6_payload_ip[TCP_FRAMES_MEM]; /* TCP headers and data for IPv6 frames */ static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM]; @@ -107,7 +119,7 @@ static unsigned int tcp6_payload_used; static struct tap_hdr tcp6_flags_tap_hdr[TCP_FRAMES_MEM]; /* IPv6 headers for TCP segment without payload */ -static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp6_flags_ip[TCP_FRAMES_MEM]; /* TCP segment without payload for IPv6 frames */ static struct tcp_flags_t tcp6_flags[TCP_FRAMES_MEM]; @@ -144,13 +156,13 @@ void tcp_sock4_iov_init(const struct ctx *c) tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) { - tcp4_payload_ip[i] = iph; + tcp4_payload_ip[i].ip4 = iph; tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4; tcp4_payload[i].th.ack = 1; } for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) { - tcp4_flags_ip[i] = iph; + tcp4_flags_ip[i].ip4 = iph; tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4; tcp4_flags[i].th.ack = 1; } @@ -160,7 +172,8 @@ void tcp_sock4_iov_init(const struct ctx *c) iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]); iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]); + iov[TCP_IOV_IP].iov_base = &tcp4_payload_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp4_payload_ip[i].ip4); iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i]; } @@ -170,7 +183,8 @@ void tcp_sock4_iov_init(const struct ctx *c) iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]); iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]); + iov[TCP_IOV_IP].iov_base = &tcp4_flags_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp4_flags_ip[i].ip4); iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i]; } } @@ -188,13 +202,13 @@ void tcp_sock6_iov_init(const struct ctx *c) tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) { - tcp6_payload_ip[i] = ip6; + tcp6_payload_ip[i].ip6 = ip6; tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4; tcp6_payload[i].th.ack = 1; } for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) { - tcp6_flags_ip[i] = ip6; + tcp6_flags_ip[i].ip6 = ip6; tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4; tcp6_flags[i].th .ack = 1; } @@ -204,7 +218,8 @@ void tcp_sock6_iov_init(const struct ctx *c) iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]); iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]); + iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp6_payload_ip[i].ip6); iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i]; } @@ -213,7 +228,8 @@ void tcp_sock6_iov_init(const struct ctx *c) iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]); iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]); + iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp6_flags_ip[i].ip6); iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i]; } }-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson
l2 tap queue entries are currently initialized at system start, and reused with preset headers through its whole life time. The only fields we need to update per message are things like payload size and checksums. If we want to reuse these entries between ipv4 and ipv6 messages we will now need to initialize the complete ip header on the fly. We do this here. Signed-off-by: Jon Maloy <jmaloy(a)redhat.com> --- tcp.c | 14 +++++++++----- tcp_buf.c | 6 ++++-- tcp_buf.h | 2 ++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tcp.c b/tcp.c index 77c62f0..006e503 100644 --- a/tcp.c +++ b/tcp.c @@ -920,6 +920,7 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, ASSERT(src4 && dst4); + *iph = tcp_payload_ip4; iph->tot_len = htons(l3len); iph->saddr = src4->s_addr; iph->daddr = dst4->s_addr; @@ -956,6 +957,7 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, const struct flowside *tapside = TAPFLOW(conn); size_t l4len = dlen + sizeof(*th); + *ip6h = tcp_payload_ip6; ip6h->payload_len = htons(l4len); ip6h->saddr = tapside->oaddr.a6; ip6h->daddr = tapside->eaddr.a6; @@ -995,16 +997,18 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, const struct in_addr *a4 = inany_v4(&tapside->oaddr); if (a4) { + iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr); return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, check, seq); + } else { + iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr); + return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, + iov[TCP_IOV_IP].iov_base, + iov[TCP_IOV_PAYLOAD].iov_base, dlen, + seq); } - - return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, - iov[TCP_IOV_IP].iov_base, - iov[TCP_IOV_PAYLOAD].iov_base, dlen, - seq); } /** diff --git a/tcp_buf.c b/tcp_buf.c index 1af4786..6e6549f 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -84,6 +84,7 @@ static struct ethhdr tcp4_eth_src; static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers */ +struct iphdr tcp_payload_ip4; static struct iphdr_t tcp4_payload_ip[TCP_FRAMES_MEM]; /* TCP segments with payload for IPv4 frames */ static struct tcp_payload_t tcp4_payload[TCP_FRAMES_MEM]; @@ -107,6 +108,7 @@ static struct ethhdr tcp6_eth_src; static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv6 headers */ +struct ipv6hdr tcp_payload_ip6; static struct iphdr_t tcp6_payload_ip[TCP_FRAMES_MEM]; /* TCP headers and data for IPv6 frames */ static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM]; @@ -154,9 +156,9 @@ void tcp_sock4_iov_init(const struct ctx *c) int i; tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); + tcp_payload_ip4 = iph; for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) { - tcp4_payload_ip[i].ip4 = iph; tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4; tcp4_payload[i].th.ack = 1; } @@ -200,9 +202,9 @@ void tcp_sock6_iov_init(const struct ctx *c) int i; tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); + tcp_payload_ip6 = ip6; for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) { - tcp6_payload_ip[i].ip6 = ip6; tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4; tcp6_payload[i].th.ack = 1; } diff --git a/tcp_buf.h b/tcp_buf.h index 3db4c56..d3d0d7f 100644 --- a/tcp_buf.h +++ b/tcp_buf.h @@ -13,4 +13,6 @@ void tcp_payload_flush(struct ctx *c); int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn); int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags); +extern struct iphdr tcp_payload_ip4; +extern struct ipv6hdr tcp_payload_ip6; #endif /*TCP_BUF_H */ -- 2.45.2
On Fri, Sep 06, 2024 at 05:34:25PM -0400, Jon Maloy wrote:l2 tap queue entries are currently initialized at system start, and reused with preset headers through its whole life time. The only fields we need to update per message are things like payload size and checksums. If we want to reuse these entries between ipv4 and ipv6 messages we will now need to initialize the complete ip header on the fly. We do this here.Comments from 1/4 relevant here too.Signed-off-by: Jon Maloy <jmaloy(a)redhat.com> --- tcp.c | 14 +++++++++----- tcp_buf.c | 6 ++++-- tcp_buf.h | 2 ++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tcp.c b/tcp.c index 77c62f0..006e503 100644 --- a/tcp.c +++ b/tcp.c @@ -920,6 +920,7 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, ASSERT(src4 && dst4); + *iph = tcp_payload_ip4; iph->tot_len = htons(l3len); iph->saddr = src4->s_addr; iph->daddr = dst4->s_addr; @@ -956,6 +957,7 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, const struct flowside *tapside = TAPFLOW(conn); size_t l4len = dlen + sizeof(*th); + *ip6h = tcp_payload_ip6; ip6h->payload_len = htons(l4len); ip6h->saddr = tapside->oaddr.a6; ip6h->daddr = tapside->eaddr.a6; @@ -995,16 +997,18 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, const struct in_addr *a4 = inany_v4(&tapside->oaddr); if (a4) { + iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr); return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, check, seq); + } else { + iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr); + return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, + iov[TCP_IOV_IP].iov_base, + iov[TCP_IOV_PAYLOAD].iov_base, dlen, + seq);Personally I marginally prefer this formatting, but one of the static checkers is going to whinge about an else clause after a 'return' in the if clause.} - - return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, - iov[TCP_IOV_IP].iov_base, - iov[TCP_IOV_PAYLOAD].iov_base, dlen, - seq); } /** diff --git a/tcp_buf.c b/tcp_buf.c index 1af4786..6e6549f 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -84,6 +84,7 @@ static struct ethhdr tcp4_eth_src; static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers */ +struct iphdr tcp_payload_ip4; static struct iphdr_t tcp4_payload_ip[TCP_FRAMES_MEM]; /* TCP segments with payload for IPv4 frames */ static struct tcp_payload_t tcp4_payload[TCP_FRAMES_MEM]; @@ -107,6 +108,7 @@ static struct ethhdr tcp6_eth_src; static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv6 headers */ +struct ipv6hdr tcp_payload_ip6; static struct iphdr_t tcp6_payload_ip[TCP_FRAMES_MEM]; /* TCP headers and data for IPv6 frames */ static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM]; @@ -154,9 +156,9 @@ void tcp_sock4_iov_init(const struct ctx *c) int i; tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); + tcp_payload_ip4 = iph; for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) { - tcp4_payload_ip[i].ip4 = iph; tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4; tcp4_payload[i].th.ack = 1; } @@ -200,9 +202,9 @@ void tcp_sock6_iov_init(const struct ctx *c) int i; tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); + tcp_payload_ip6 = ip6; for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) { - tcp6_payload_ip[i].ip6 = ip6; tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4; tcp6_payload[i].th.ack = 1; } diff --git a/tcp_buf.h b/tcp_buf.h index 3db4c56..d3d0d7f 100644 --- a/tcp_buf.h +++ b/tcp_buf.h @@ -13,4 +13,6 @@ void tcp_payload_flush(struct ctx *c); int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn); int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags); +extern struct iphdr tcp_payload_ip4; +extern struct ipv6hdr tcp_payload_ip6; #endif /*TCP_BUF_H */-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson
Following the preparations in the previous commits, we can now remove the queues dedicated for TCPv6 and move that traffic over to the queues currently used for TCPv4. Signed-off-by: Jon Maloy <jmaloy(a)redhat.com> --- tcp.c | 8 ++- tcp_buf.c | 158 +++++++++--------------------------------------------- tcp_buf.h | 1 + 3 files changed, 28 insertions(+), 139 deletions(-) diff --git a/tcp.c b/tcp.c index 006e503..19cf9e5 100644 --- a/tcp.c +++ b/tcp.c @@ -998,12 +998,14 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, if (a4) { iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr); + tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, check, seq); } else { iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr); + tcp4_eth_src.h_proto = htons_constant(ETH_P_IPV6); return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, @@ -2508,11 +2510,7 @@ int tcp_init(struct ctx *c) { ASSERT(!c->no_tcp); - if (c->ifi4) - tcp_sock4_iov_init(c); - - if (c->ifi6) - tcp_sock6_iov_init(c); + tcp_sock4_iov_init(c); memset(init_sock_pool4, 0xff, sizeof(init_sock_pool4)); memset(init_sock_pool6, 0xff, sizeof(init_sock_pool6)); diff --git a/tcp_buf.c b/tcp_buf.c index 6e6549f..92c4d73 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -80,7 +80,7 @@ struct tcp_flags_t { #endif /* Ethernet header for IPv4 frames */ -static struct ethhdr tcp4_eth_src; +struct ethhdr tcp4_eth_src; static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers */ @@ -104,36 +104,14 @@ static struct tcp_flags_t tcp4_flags[TCP_FRAMES_MEM]; static unsigned int tcp4_flags_used; /* Ethernet header for IPv6 frames */ -static struct ethhdr tcp6_eth_src; - -static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM]; -/* IPv6 headers */ -struct ipv6hdr tcp_payload_ip6; -static struct iphdr_t tcp6_payload_ip[TCP_FRAMES_MEM]; -/* TCP headers and data for IPv6 frames */ -static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM]; - -static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516"); - -/* References tracking the owner connection of frames in the tap outqueue */ -static struct tcp_tap_conn *tcp6_frame_conns[TCP_FRAMES_MEM]; -static unsigned int tcp6_payload_used; - -static struct tap_hdr tcp6_flags_tap_hdr[TCP_FRAMES_MEM]; -/* IPv6 headers for TCP segment without payload */ -static struct iphdr_t tcp6_flags_ip[TCP_FRAMES_MEM]; -/* TCP segment without payload for IPv6 frames */ -static struct tcp_flags_t tcp6_flags[TCP_FRAMES_MEM]; - -static unsigned int tcp6_flags_used; +struct ipv6hdr tcp_payload_ip6; /* recvmsg()/sendmsg() data for tap */ static struct iovec iov_sock [TCP_FRAMES_MEM + 1]; static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; -static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; -static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; + /** * tcp_update_l2_buf() - Update Ethernet header buffers with addresses * @eth_d: Ethernet destination address, NULL if unchanged @@ -142,7 +120,6 @@ static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) { eth_update_mac(&tcp4_eth_src, eth_d, eth_s); - eth_update_mac(&tcp6_eth_src, eth_d, eth_s); } /** @@ -191,61 +168,12 @@ void tcp_sock4_iov_init(const struct ctx *c) } } -/** - * tcp_sock6_iov_init() - Initialise scatter-gather L2 buffers for IPv6 sockets - * @c: Execution context - */ -void tcp_sock6_iov_init(const struct ctx *c) -{ - struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP); - struct iovec *iov; - int i; - - tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); - tcp_payload_ip6 = ip6; - - for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) { - tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4; - tcp6_payload[i].th.ack = 1; - } - - for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) { - tcp6_flags_ip[i].ip6 = ip6; - tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4; - tcp6_flags[i].th .ack = 1; - } - - for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp6_l2_iov[i]; - - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]); - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(tcp6_payload_ip[i].ip6); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i]; - } - - for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp6_l2_flags_iov[i]; - - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]); - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(tcp6_flags_ip[i].ip6); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i]; - } -} - /** * tcp_flags_flush() - Send out buffers for segments with no data (flags) * @c: Execution context */ void tcp_flags_flush(const struct ctx *c) { - tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS, - tcp6_flags_used); - tcp6_flags_used = 0; - tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS, tcp4_flags_used); tcp4_flags_used = 0; @@ -287,14 +215,6 @@ void tcp_payload_flush(struct ctx *c) { size_t m; - m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS, - tcp6_payload_used); - if (m != tcp6_payload_used) { - tcp_revert_seq(c, &tcp6_frame_conns[m], &tcp6_l2_iov[m], - tcp6_payload_used - m); - } - tcp6_payload_used = 0; - m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS, tcp4_payload_used); if (m != tcp4_payload_used) { @@ -321,21 +241,13 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) uint32_t seq; int ret; - if (CONN_V4(conn)) - iov = tcp4_l2_flags_iov[tcp4_flags_used++]; - else - iov = tcp6_l2_flags_iov[tcp6_flags_used++]; - + iov = tcp4_l2_flags_iov[tcp4_flags_used++]; payload = iov[TCP_IOV_PAYLOAD].iov_base; - seq = conn->seq_to_tap; ret = tcp_prepare_flags(c, conn, flags, &payload->th, payload->opts, &optlen); if (ret <= 0) { - if (CONN_V4(conn)) - tcp4_flags_used--; - else - tcp6_flags_used--; + tcp4_flags_used--; return ret; } @@ -346,10 +258,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) struct iovec *dup_iov; int i; - if (CONN_V4(conn)) - dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++]; - else - dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++]; + dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++]; for (i = 0; i < TCP_NUM_IOVS; i++) memcpy(dup_iov[i].iov_base, iov[i].iov_base, @@ -357,13 +266,8 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len; } - if (CONN_V4(conn)) { - if (tcp4_flags_used > TCP_FRAMES_MEM - 2) - tcp_flags_flush(c); - } else { - if (tcp6_flags_used > TCP_FRAMES_MEM - 2) - tcp_flags_flush(c); - } + if (tcp4_flags_used > TCP_FRAMES_MEM - 2) + tcp_flags_flush(c); return 0; } @@ -379,36 +283,26 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn, ssize_t dlen, int no_csum, uint32_t seq) { + struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1]; + const uint16_t *check = NULL; struct iovec *iov; size_t l4len; conn->seq_to_tap = seq + dlen; - if (CONN_V4(conn)) { - struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1]; - const uint16_t *check = NULL; - - if (no_csum) { - struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base; - check = &iph->check; - } + if (CONN_V4(conn) && no_csum) { + struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base; - tcp4_frame_conns[tcp4_payload_used] = conn; - - iov = tcp4_l2_iov[tcp4_payload_used++]; - l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq); - iov[TCP_IOV_PAYLOAD].iov_len = l4len; - if (tcp4_payload_used > TCP_FRAMES_MEM - 1) - tcp_payload_flush(c); - } else if (CONN_V6(conn)) { - tcp6_frame_conns[tcp6_payload_used] = conn; - - iov = tcp6_l2_iov[tcp6_payload_used++]; - l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq); - iov[TCP_IOV_PAYLOAD].iov_len = l4len; - if (tcp6_payload_used > TCP_FRAMES_MEM - 1) - tcp_payload_flush(c); + check = &iph->check; } + + tcp4_frame_conns[tcp4_payload_used] = conn; + + iov = tcp4_l2_iov[tcp4_payload_used++]; + l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq); + iov[TCP_IOV_PAYLOAD].iov_len = l4len; + if (tcp4_payload_used > TCP_FRAMES_MEM - 1) + tcp_payload_flush(c); } /** @@ -472,19 +366,15 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) mh_sock.msg_iovlen = fill_bufs; } - if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) || - (!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) { + if ((v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM)) { tcp_payload_flush(c); /* Silence Coverity CWE-125 false positive */ - tcp4_payload_used = tcp6_payload_used = 0; + tcp4_payload_used = 0; } for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) { - if (v4) - iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data; - else - iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data; + iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data; iov->iov_len = mss; } if (iov_rem) diff --git a/tcp_buf.h b/tcp_buf.h index d3d0d7f..d7cdbaf 100644 --- a/tcp_buf.h +++ b/tcp_buf.h @@ -13,6 +13,7 @@ void tcp_payload_flush(struct ctx *c); int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn); int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags); +extern struct ethhdr tcp4_eth_src; extern struct iphdr tcp_payload_ip4; extern struct ipv6hdr tcp_payload_ip6; #endif /*TCP_BUF_H */ -- 2.45.2
On Fri, Sep 06, 2024 at 05:34:26PM -0400, Jon Maloy wrote:Following the preparations in the previous commits, we can now remove the queues dedicated for TCPv6 and move that traffic over to the queues currently used for TCPv4. Signed-off-by: Jon Maloy <jmaloy(a)redhat.com> --- tcp.c | 8 ++- tcp_buf.c | 158 +++++++++--------------------------------------------- tcp_buf.h | 1 + 3 files changed, 28 insertions(+), 139 deletions(-) diff --git a/tcp.c b/tcp.c index 006e503..19cf9e5 100644 --- a/tcp.c +++ b/tcp.c @@ -998,12 +998,14 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, if (a4) { iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr); + tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, check, seq); } else { iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr); + tcp4_eth_src.h_proto = htons_constant(ETH_P_IPV6); return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, @@ -2508,11 +2510,7 @@ int tcp_init(struct ctx *c) { ASSERT(!c->no_tcp); - if (c->ifi4) - tcp_sock4_iov_init(c); - - if (c->ifi6) - tcp_sock6_iov_init(c); + tcp_sock4_iov_init(c);I'd prefer to see the renames to remove the now incorrect '4' and '6' folded in here, rather than waiting for the next patch.memset(init_sock_pool4, 0xff, sizeof(init_sock_pool4)); memset(init_sock_pool6, 0xff, sizeof(init_sock_pool6)); diff --git a/tcp_buf.c b/tcp_buf.c index 6e6549f..92c4d73 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -80,7 +80,7 @@ struct tcp_flags_t { #endif /* Ethernet header for IPv4 frames */ -static struct ethhdr tcp4_eth_src; +struct ethhdr tcp4_eth_src;Ditto. Also, as with the IP header, for UDP we still have separate ethernet header structures for v4 and v6 and just update the IOV per-packet to point to the right one.static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers */ @@ -104,36 +104,14 @@ static struct tcp_flags_t tcp4_flags[TCP_FRAMES_MEM]; static unsigned int tcp4_flags_used; /* Ethernet header for IPv6 frames */ -static struct ethhdr tcp6_eth_src; - -static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM]; -/* IPv6 headers */ -struct ipv6hdr tcp_payload_ip6; -static struct iphdr_t tcp6_payload_ip[TCP_FRAMES_MEM]; -/* TCP headers and data for IPv6 frames */ -static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM]; - -static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516"); - -/* References tracking the owner connection of frames in the tap outqueue */ -static struct tcp_tap_conn *tcp6_frame_conns[TCP_FRAMES_MEM]; -static unsigned int tcp6_payload_used; - -static struct tap_hdr tcp6_flags_tap_hdr[TCP_FRAMES_MEM]; -/* IPv6 headers for TCP segment without payload */ -static struct iphdr_t tcp6_flags_ip[TCP_FRAMES_MEM]; -/* TCP segment without payload for IPv6 frames */ -static struct tcp_flags_t tcp6_flags[TCP_FRAMES_MEM]; - -static unsigned int tcp6_flags_used; +struct ipv6hdr tcp_payload_ip6; /* recvmsg()/sendmsg() data for tap */ static struct iovec iov_sock [TCP_FRAMES_MEM + 1]; static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; -static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; -static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; + /** * tcp_update_l2_buf() - Update Ethernet header buffers with addresses * @eth_d: Ethernet destination address, NULL if unchanged @@ -142,7 +120,6 @@ static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) { eth_update_mac(&tcp4_eth_src, eth_d, eth_s); - eth_update_mac(&tcp6_eth_src, eth_d, eth_s); } /** @@ -191,61 +168,12 @@ void tcp_sock4_iov_init(const struct ctx *c) } } -/** - * tcp_sock6_iov_init() - Initialise scatter-gather L2 buffers for IPv6 sockets - * @c: Execution context - */ -void tcp_sock6_iov_init(const struct ctx *c) -{ - struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP); - struct iovec *iov; - int i; - - tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); - tcp_payload_ip6 = ip6; - - for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) { - tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4; - tcp6_payload[i].th.ack = 1; - } - - for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) { - tcp6_flags_ip[i].ip6 = ip6; - tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4; - tcp6_flags[i].th .ack = 1; - } - - for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp6_l2_iov[i]; - - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]); - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(tcp6_payload_ip[i].ip6); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i]; - } - - for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp6_l2_flags_iov[i]; - - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]); - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src); - iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(tcp6_flags_ip[i].ip6); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i]; - } -} - /** * tcp_flags_flush() - Send out buffers for segments with no data (flags) * @c: Execution context */ void tcp_flags_flush(const struct ctx *c) { - tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS, - tcp6_flags_used); - tcp6_flags_used = 0; - tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS, tcp4_flags_used); tcp4_flags_used = 0; @@ -287,14 +215,6 @@ void tcp_payload_flush(struct ctx *c) { size_t m; - m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS, - tcp6_payload_used); - if (m != tcp6_payload_used) { - tcp_revert_seq(c, &tcp6_frame_conns[m], &tcp6_l2_iov[m], - tcp6_payload_used - m); - } - tcp6_payload_used = 0; - m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS, tcp4_payload_used); if (m != tcp4_payload_used) { @@ -321,21 +241,13 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) uint32_t seq; int ret; - if (CONN_V4(conn)) - iov = tcp4_l2_flags_iov[tcp4_flags_used++]; - else - iov = tcp6_l2_flags_iov[tcp6_flags_used++]; - + iov = tcp4_l2_flags_iov[tcp4_flags_used++]; payload = iov[TCP_IOV_PAYLOAD].iov_base; - seq = conn->seq_to_tap; ret = tcp_prepare_flags(c, conn, flags, &payload->th, payload->opts, &optlen); if (ret <= 0) { - if (CONN_V4(conn)) - tcp4_flags_used--; - else - tcp6_flags_used--; + tcp4_flags_used--; return ret; } @@ -346,10 +258,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) struct iovec *dup_iov; int i; - if (CONN_V4(conn)) - dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++]; - else - dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++]; + dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++]; for (i = 0; i < TCP_NUM_IOVS; i++) memcpy(dup_iov[i].iov_base, iov[i].iov_base, @@ -357,13 +266,8 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len; } - if (CONN_V4(conn)) { - if (tcp4_flags_used > TCP_FRAMES_MEM - 2) - tcp_flags_flush(c); - } else { - if (tcp6_flags_used > TCP_FRAMES_MEM - 2) - tcp_flags_flush(c); - } + if (tcp4_flags_used > TCP_FRAMES_MEM - 2) + tcp_flags_flush(c); return 0; } @@ -379,36 +283,26 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn, ssize_t dlen, int no_csum, uint32_t seq) { + struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1]; + const uint16_t *check = NULL; struct iovec *iov; size_t l4len; conn->seq_to_tap = seq + dlen; - if (CONN_V4(conn)) { - struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1]; - const uint16_t *check = NULL; - - if (no_csum) { - struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base; - check = &iph->check; - } + if (CONN_V4(conn) && no_csum) { + struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base; - tcp4_frame_conns[tcp4_payload_used] = conn; - - iov = tcp4_l2_iov[tcp4_payload_used++]; - l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq); - iov[TCP_IOV_PAYLOAD].iov_len = l4len; - if (tcp4_payload_used > TCP_FRAMES_MEM - 1) - tcp_payload_flush(c); - } else if (CONN_V6(conn)) { - tcp6_frame_conns[tcp6_payload_used] = conn; - - iov = tcp6_l2_iov[tcp6_payload_used++]; - l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq); - iov[TCP_IOV_PAYLOAD].iov_len = l4len; - if (tcp6_payload_used > TCP_FRAMES_MEM - 1) - tcp_payload_flush(c); + check = &iph->check; } + + tcp4_frame_conns[tcp4_payload_used] = conn; + + iov = tcp4_l2_iov[tcp4_payload_used++]; + l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq); + iov[TCP_IOV_PAYLOAD].iov_len = l4len; + if (tcp4_payload_used > TCP_FRAMES_MEM - 1) + tcp_payload_flush(c); } /** @@ -472,19 +366,15 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) mh_sock.msg_iovlen = fill_bufs; } - if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) || - (!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) { + if ((v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM)) { tcp_payload_flush(c); /* Silence Coverity CWE-125 false positive */ - tcp4_payload_used = tcp6_payload_used = 0; + tcp4_payload_used = 0; } for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) { - if (v4) - iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data; - else - iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data; + iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data; iov->iov_len = mss; } if (iov_rem) diff --git a/tcp_buf.h b/tcp_buf.h index d3d0d7f..d7cdbaf 100644 --- a/tcp_buf.h +++ b/tcp_buf.h @@ -13,6 +13,7 @@ void tcp_payload_flush(struct ctx *c); int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn); int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags); +extern struct ethhdr tcp4_eth_src; extern struct iphdr tcp_payload_ip4; extern struct ipv6hdr tcp_payload_ip6; #endif /*TCP_BUF_H */-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson
The queues and structures previously used for TCPv4 traffic are now used for both IP protocols. We therefore need to replace the prefix tcp4_* with the generic tcp_ everywhere applicable. There are no functional changes in this commit. Signed-off-by: Jon Maloy <jmaloy(a)redhat.com> --- tcp.c | 4 +- tcp_buf.c | 115 +++++++++++++++++++++++++++--------------------------- tcp_buf.h | 2 +- 3 files changed, 60 insertions(+), 61 deletions(-) diff --git a/tcp.c b/tcp.c index 19cf9e5..433dce4 100644 --- a/tcp.c +++ b/tcp.c @@ -998,14 +998,14 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, if (a4) { iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr); - tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); + tcp_eth_src.h_proto = htons_constant(ETH_P_IP); return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, check, seq); } else { iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr); - tcp4_eth_src.h_proto = htons_constant(ETH_P_IPV6); + tcp_eth_src.h_proto = htons_constant(ETH_P_IPV6); return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, diff --git a/tcp_buf.c b/tcp_buf.c index 92c4d73..0476184 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -80,28 +80,27 @@ struct tcp_flags_t { #endif /* Ethernet header for IPv4 frames */ -struct ethhdr tcp4_eth_src; +struct ethhdr tcp_eth_src; -static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM]; +static struct tap_hdr tcp_payload_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers */ struct iphdr tcp_payload_ip4; -static struct iphdr_t tcp4_payload_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp_payload_ip[TCP_FRAMES_MEM]; /* TCP segments with payload for IPv4 frames */ -static struct tcp_payload_t tcp4_payload[TCP_FRAMES_MEM]; +static struct tcp_payload_t tcp_payload[TCP_FRAMES_MEM]; -static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516"); +static_assert(sizeof(tcp_payload[0].data) > MSS4, "MSS4 is greater than 65516"); /* References tracking the owner connection of frames in the tap outqueue */ -static struct tcp_tap_conn *tcp4_frame_conns[TCP_FRAMES_MEM]; -static unsigned int tcp4_payload_used; +static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM]; +static unsigned int tcp_payload_used; -static struct tap_hdr tcp4_flags_tap_hdr[TCP_FRAMES_MEM]; +static struct tap_hdr tcp_flags_tap_hdr[TCP_FRAMES_MEM]; /* IPv4 headers for TCP segment without payload */ -static struct iphdr_t tcp4_flags_ip[TCP_FRAMES_MEM]; +static struct iphdr_t tcp_flags_ip[TCP_FRAMES_MEM]; /* TCP segments without payload for IPv4 frames */ -static struct tcp_flags_t tcp4_flags[TCP_FRAMES_MEM]; - -static unsigned int tcp4_flags_used; +static struct tcp_flags_t tcp_flags[TCP_FRAMES_MEM]; +static unsigned int tcp_flags_used; /* Ethernet header for IPv6 frames */ struct ipv6hdr tcp_payload_ip6; @@ -109,8 +108,8 @@ struct ipv6hdr tcp_payload_ip6; /* recvmsg()/sendmsg() data for tap */ static struct iovec iov_sock [TCP_FRAMES_MEM + 1]; -static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; -static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; +static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS]; +static struct iovec tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS]; /** * tcp_update_l2_buf() - Update Ethernet header buffers with addresses @@ -119,7 +118,7 @@ static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS]; */ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) { - eth_update_mac(&tcp4_eth_src, eth_d, eth_s); + eth_update_mac(&tcp_eth_src, eth_d, eth_s); } /** @@ -132,39 +131,39 @@ void tcp_sock4_iov_init(const struct ctx *c) struct iovec *iov; int i; - tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); + tcp_eth_src.h_proto = htons_constant(ETH_P_IP); tcp_payload_ip4 = iph; - for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) { - tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4; - tcp4_payload[i].th.ack = 1; + for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) { + tcp_payload[i].th.doff = sizeof(struct tcphdr) / 4; + tcp_payload[i].th.ack = 1; } - for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) { - tcp4_flags_ip[i].ip4 = iph; - tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4; - tcp4_flags[i].th.ack = 1; + for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) { + tcp_flags_ip[i].ip4 = iph; + tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4; + tcp_flags[i].th.ack = 1; } for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp4_l2_iov[i]; + iov = tcp_l2_iov[i]; - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]); - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); - iov[TCP_IOV_IP].iov_base = &tcp4_payload_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(tcp4_payload_ip[i].ip4); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i]; + iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_payload_tap_hdr[i]); + iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp_eth_src); + iov[TCP_IOV_IP].iov_base = &tcp_payload_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp_payload_ip[i].ip4); + iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i]; } for (i = 0; i < TCP_FRAMES_MEM; i++) { - iov = tcp4_l2_flags_iov[i]; - - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]); - iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; - iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src); - iov[TCP_IOV_IP].iov_base = &tcp4_flags_ip[i]; - iov[TCP_IOV_IP].iov_len = sizeof(tcp4_flags_ip[i].ip4); - iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i]; + iov = tcp_l2_flags_iov[i]; + + iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]); + iov[TCP_IOV_ETH].iov_base = &tcp_eth_src; + iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp_eth_src); + iov[TCP_IOV_IP].iov_base = &tcp_flags_ip[i]; + iov[TCP_IOV_IP].iov_len = sizeof(tcp_flags_ip[i].ip4); + iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i]; } } @@ -174,9 +173,9 @@ void tcp_sock4_iov_init(const struct ctx *c) */ void tcp_flags_flush(const struct ctx *c) { - tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS, - tcp4_flags_used); - tcp4_flags_used = 0; + tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS, + tcp_flags_used); + tcp_flags_used = 0; } /** @@ -215,13 +214,13 @@ void tcp_payload_flush(struct ctx *c) { size_t m; - m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS, - tcp4_payload_used); - if (m != tcp4_payload_used) { - tcp_revert_seq(c, &tcp4_frame_conns[m], &tcp4_l2_iov[m], - tcp4_payload_used - m); + m = tap_send_frames(c, &tcp_l2_iov[0][0], TCP_NUM_IOVS, + tcp_payload_used); + if (m != tcp_payload_used) { + tcp_revert_seq(c, &tcp_frame_conns[m], &tcp_l2_iov[m], + tcp_payload_used - m); } - tcp4_payload_used = 0; + tcp_payload_used = 0; } /** @@ -241,13 +240,13 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) uint32_t seq; int ret; - iov = tcp4_l2_flags_iov[tcp4_flags_used++]; + iov = tcp_l2_flags_iov[tcp_flags_used++]; payload = iov[TCP_IOV_PAYLOAD].iov_base; seq = conn->seq_to_tap; ret = tcp_prepare_flags(c, conn, flags, &payload->th, payload->opts, &optlen); if (ret <= 0) { - tcp4_flags_used--; + tcp_flags_used--; return ret; } @@ -258,7 +257,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) struct iovec *dup_iov; int i; - dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++]; + dup_iov = tcp_l2_flags_iov[tcp_flags_used++]; for (i = 0; i < TCP_NUM_IOVS; i++) memcpy(dup_iov[i].iov_base, iov[i].iov_base, @@ -266,7 +265,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len; } - if (tcp4_flags_used > TCP_FRAMES_MEM - 2) + if (tcp_flags_used > TCP_FRAMES_MEM - 2) tcp_flags_flush(c); return 0; @@ -283,7 +282,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn, ssize_t dlen, int no_csum, uint32_t seq) { - struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1]; + struct iovec *iov_prev = tcp_l2_iov[tcp_payload_used - 1]; const uint16_t *check = NULL; struct iovec *iov; size_t l4len; @@ -296,12 +295,12 @@ static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn, check = &iph->check; } - tcp4_frame_conns[tcp4_payload_used] = conn; + tcp_frame_conns[tcp_payload_used] = conn; - iov = tcp4_l2_iov[tcp4_payload_used++]; + iov = tcp_l2_iov[tcp_payload_used++]; l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq); iov[TCP_IOV_PAYLOAD].iov_len = l4len; - if (tcp4_payload_used > TCP_FRAMES_MEM - 1) + if (tcp_payload_used > TCP_FRAMES_MEM - 1) tcp_payload_flush(c); } @@ -366,15 +365,15 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) mh_sock.msg_iovlen = fill_bufs; } - if ((v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM)) { + if ((v4 && tcp_payload_used + fill_bufs > TCP_FRAMES_MEM)) { tcp_payload_flush(c); /* Silence Coverity CWE-125 false positive */ - tcp4_payload_used = 0; + tcp_payload_used = 0; } for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) { - iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data; + iov->iov_base = &tcp_payload[tcp_payload_used + i].data; iov->iov_len = mss; } if (iov_rem) @@ -422,7 +421,7 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) dlen = mss; seq = conn->seq_to_tap; for (i = 0; i < send_bufs; i++) { - int no_csum = i && i != send_bufs - 1 && tcp4_payload_used; + int no_csum = i && i != send_bufs - 1 && tcp_payload_used; if (i == send_bufs - 1) dlen = last_len; diff --git a/tcp_buf.h b/tcp_buf.h index d7cdbaf..3dc3c95 100644 --- a/tcp_buf.h +++ b/tcp_buf.h @@ -13,7 +13,7 @@ void tcp_payload_flush(struct ctx *c); int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn); int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags); -extern struct ethhdr tcp4_eth_src; +extern struct ethhdr tcp_eth_src; extern struct iphdr tcp_payload_ip4; extern struct ipv6hdr tcp_payload_ip6; #endif /*TCP_BUF_H */ -- 2.45.2
On Fri, 6 Sep 2024 17:34:23 -0400 Jon Maloy <jmaloy(a)redhat.com> wrote:This should save us some memory and code. Jon Maloy (4): tcp: create unified struct for IPv4 and IPv6 header tcp: update ip address in l2 tap queues on the fly tcp: unify l2 TCPv4 and TCPv6 queues and structures tcp: change prefix tcp4_ to tcp_ where applicable tcp.c | 22 ++--- tcp_buf.c | 259 +++++++++++++++++------------------------------------- tcp_buf.h | 3 + 3 files changed, 98 insertions(+), 186 deletions(-)The saving in lines of code is nice (68 lines excluding comments), I'm not quite sure about memory savings and overhead. Memory-wise, if one IP version is disabled, we won't initialise any related buffer, so in that case we won't save any memory with these changes. If both IPv4 and IPv6 are enabled, we'll halve the memory usage for inbound payload buffers (about 8 MiB instead of 16 MiB): $ nm -td -P passt.avx2 | grep "tcp6_payload " tcp6_payload b 48607968 8388608 but that also halves the total amount of available buffers. If we have occasional usage of one IP family, these changes decrease waste, but if both IPv4 and IPv6 are used consistently, this simply means we're cutting the amount of buffers in half. However, we could double TCP_FRAMES_MEM and have, as a result, a more optimised management of buffers. But I'm not sure it makes sense considered the potential overhead. That is, there's a reason why I originally added two separate sets of buffers for IPv4 and IPv6: to enable pre-computations of headers, so that we avoid populating the full headers at every received packet. This series reverses that. There's the possibility that improvements in data locality and lower data cache usage outweigh that (as David mentioned in his comment to 1/4), but that should be demonstrated first. By the way, I tested this series, the clang-tidy test fails (see David's comment to 2/4) and the "TCP throughput over IPv6: guest to host" test consistently hangs as we switch to the 65520 bytes MTU. It looks like the iperf3 client fails to connect altogether, but I didn't really investigate. -- Stefano