For vhost-user, we will need to spread TCP payload over several buffers. To re-use tcp_update_check_tcp[4|6](), provide an iovec rather than a pointer to a buffer. This series updates also csum_iov() and pcap_iov() to add an offset of bytes to skip in the iovec array. It's based on top of "tcp: Use tcp_payload_t rather than tcphdr" that is added in the series for convenience. v3 adds "udp: Update UDP checksum using an iovec array" that modifies csum_udp4() and csum_udp6() in the same way. Laurent Vivier (5): tcp: Use tcp_payload_t rather than tcphdr pcap: Add an offset argument in pcap_iov() checksum: Add an offset argument in csum_iov() tcp: Update TCP checksum using an iovec array udp: Update UDP checksum using an iovec array checksum.c | 46 +++++++++++----- checksum.h | 7 +-- iov.c | 1 - pcap.c | 5 +- pcap.h | 2 +- tap.c | 14 +++-- tap.h | 2 +- tcp.c | 140 +++++++++++++++++++++++++++++++++++++++---------- tcp_buf.c | 29 ---------- tcp_internal.h | 29 ++++++++++ udp.c | 17 ++++-- 11 files changed, 206 insertions(+), 86 deletions(-) -- 2.46.0
As tcp_update_check_tcp4() and tcp_update_check_tcp6() compute the checksum using the TCP header and the TCP payload, it is clearer to use a pointer to tcp_payload_t that includes tcphdr and payload rather than a pointer to tcphdr (and guessing TCP header is followed by the payload). Move tcp_payload_t and tcp_flags_t to tcp_internal.h. (They will be used also by vhost-user). Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au> --- tcp.c | 42 ++++++++++++++++++++++-------------------- tcp_buf.c | 29 ----------------------------- tcp_internal.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 49 deletions(-) diff --git a/tcp.c b/tcp.c index 1962fcc469ed..c9472d905520 100644 --- a/tcp.c +++ b/tcp.c @@ -757,32 +757,34 @@ static void tcp_sock_set_bufsize(const struct ctx *c, int s) /** * tcp_update_check_tcp4() - Update TCP checksum from stored one * @iph: IPv4 header - * @th: TCP header followed by TCP payload + * @bp: TCP header followed by TCP payload */ -static void tcp_update_check_tcp4(const struct iphdr *iph, struct tcphdr *th) +static void tcp_update_check_tcp4(const struct iphdr *iph, + struct tcp_payload_t *bp) { uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr); struct in_addr saddr = { .s_addr = iph->saddr }; struct in_addr daddr = { .s_addr = iph->daddr }; uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); - th->check = 0; - th->check = csum(th, l4len, sum); + bp->th.check = 0; + bp->th.check = csum(bp, l4len, sum); } /** * tcp_update_check_tcp6() - Calculate TCP checksum for IPv6 * @ip6h: IPv6 header - * @th: TCP header followed by TCP payload + * @bp: TCP header followed by TCP payload */ -static void tcp_update_check_tcp6(struct ipv6hdr *ip6h, struct tcphdr *th) +static void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, + struct tcp_payload_t *bp) { uint16_t l4len = ntohs(ip6h->payload_len); uint32_t sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, &ip6h->saddr, &ip6h->daddr); - th->check = 0; - th->check = csum(th, l4len, sum); + bp->th.check = 0; + bp->th.check = csum(bp, l4len, sum); } /** @@ -902,7 +904,7 @@ static void tcp_fill_header(struct tcphdr *th, * @conn: Connection pointer * @taph: tap backend specific header * @iph: Pointer to IPv4 header - * @th: Pointer to TCP header + * @bp: Pointer to TCP header followed by TCP payload * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment @@ -912,14 +914,14 @@ static void tcp_fill_header(struct tcphdr *th, */ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, struct tap_hdr *taph, - struct iphdr *iph, struct tcphdr *th, + struct iphdr *iph, struct tcp_payload_t *bp, size_t dlen, const uint16_t *check, uint32_t seq, bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); const struct in_addr *src4 = inany_v4(&tapside->oaddr); const struct in_addr *dst4 = inany_v4(&tapside->eaddr); - size_t l4len = dlen + sizeof(*th); + size_t l4len = dlen + sizeof(bp->th); size_t l3len = l4len + sizeof(*iph); ASSERT(src4 && dst4); @@ -931,12 +933,12 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, iph->check = check ? *check : csum_ip4_header(l3len, IPPROTO_TCP, *src4, *dst4); - tcp_fill_header(th, conn, seq); + tcp_fill_header(&bp->th, conn, seq); if (no_tcp_csum) - th->check = 0; + bp->th.check = 0; else - tcp_update_check_tcp4(iph, th); + tcp_update_check_tcp4(iph, bp); tap_hdr_update(taph, l3len + sizeof(struct ethhdr)); @@ -948,7 +950,7 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, * @conn: Connection pointer * @taph: tap backend specific header * @ip6h: Pointer to IPv6 header - * @th: Pointer to TCP header + * @bp: Pointer to TCP header followed by TCP payload * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment @@ -958,11 +960,11 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, */ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, struct tap_hdr *taph, - struct ipv6hdr *ip6h, struct tcphdr *th, + struct ipv6hdr *ip6h, struct tcp_payload_t *bp, size_t dlen, uint32_t seq, bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); - size_t l4len = dlen + sizeof(*th); + size_t l4len = dlen + sizeof(bp->th); ip6h->payload_len = htons(l4len); ip6h->saddr = tapside->oaddr.a6; @@ -976,12 +978,12 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff; ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff; - tcp_fill_header(th, conn, seq); + tcp_fill_header(&bp->th, conn, seq); if (no_tcp_csum) - th->check = 0; + bp->th.check = 0; else - tcp_update_check_tcp6(ip6h, th); + tcp_update_check_tcp6(ip6h, bp); tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr)); diff --git a/tcp_buf.c b/tcp_buf.c index ffbff5e4b485..238827b01d90 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -38,35 +38,6 @@ (c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM) /* Static buffers */ -/** - * struct tcp_payload_t - TCP header and data to send segments with payload - * @th: TCP header - * @data: TCP data - */ -struct tcp_payload_t { - struct tcphdr th; - uint8_t data[IP_MAX_MTU - sizeof(struct tcphdr)]; -#ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))); /* For AVX2 checksum routines */ -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); -#endif - -/** - * struct tcp_flags_t - TCP header and data to send zero-length - * segments (flags) - * @th: TCP header - * @opts TCP options - */ -struct tcp_flags_t { - struct tcphdr th; - char opts[OPT_MSS_LEN + OPT_WS_LEN + 1]; -#ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))); -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); -#endif - /* Ethernet header for IPv4 frames */ static struct ethhdr tcp4_eth_src; diff --git a/tcp_internal.h b/tcp_internal.h index de06db1438d6..2f74ffeff8f3 100644 --- a/tcp_internal.h +++ b/tcp_internal.h @@ -63,6 +63,35 @@ enum tcp_iov_parts { TCP_NUM_IOVS }; +/** + * struct tcp_payload_t - TCP header and data to send segments with payload + * @th: TCP header + * @data: TCP data + */ +struct tcp_payload_t { + struct tcphdr th; + uint8_t data[IP_MAX_MTU - sizeof(struct tcphdr)]; +#ifdef __AVX2__ +} __attribute__ ((packed, aligned(32))); /* For AVX2 checksum routines */ +#else +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); +#endif + +/** + * struct tcp_flags_t - TCP header and data to send zero-length + * segments (flags) + * @th: TCP header + * @opts TCP options + */ +struct tcp_flags_t { + struct tcphdr th; + char opts[OPT_MSS_LEN + OPT_WS_LEN + 1]; +#ifdef __AVX2__ +} __attribute__ ((packed, aligned(32))); +#else +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); +#endif + extern char tcp_buf_discard [MAX_WINDOW]; void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, -- 2.46.0
The offset is passed directly to pcap_frame() and allows any headers that are not part of the frame to capture to be skipped. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au> --- pcap.c | 5 +++-- pcap.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pcap.c b/pcap.c index e6b5ced4a9f8..6ee6cdfd261a 100644 --- a/pcap.c +++ b/pcap.c @@ -138,9 +138,10 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n, * @iov: Pointer to the array of struct iovec describing the I/O vector * containing packet data to write, including L2 header * @iovcnt: Number of buffers (@iov entries) + * @offset: Offset of the L2 frame within the full data length */ /* cppcheck-suppress unusedFunction */ -void pcap_iov(const struct iovec *iov, size_t iovcnt) +void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset) { struct timespec now; @@ -148,7 +149,7 @@ void pcap_iov(const struct iovec *iov, size_t iovcnt) return; clock_gettime(CLOCK_REALTIME, &now); - pcap_frame(iov, iovcnt, 0, &now); + pcap_frame(iov, iovcnt, offset, &now); } /** diff --git a/pcap.h b/pcap.h index 533923749222..9795f2e8adc5 100644 --- a/pcap.h +++ b/pcap.h @@ -9,7 +9,7 @@ void pcap(const char *pkt, size_t l2len); void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n, size_t offset); -void pcap_iov(const struct iovec *iov, size_t iovcnt); +void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset); void pcap_init(struct ctx *c); #endif /* PCAP_H */ -- 2.46.0
The offset allows any headers that are not part of the data to checksum to be skipped. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- Notes: v5: - correctly set the length for the partial csum_unfolded() v3: - reorder @offset and @init v2: - check iov_skip_bytes() return value checksum.c | 16 ++++++++++++++-- checksum.h | 3 ++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/checksum.c b/checksum.c index 006614fcbb28..05d002ab0c25 100644 --- a/checksum.c +++ b/checksum.c @@ -59,6 +59,7 @@ #include "util.h" #include "ip.h" #include "checksum.h" +#include "iov.h" /* Checksums are optional for UDP over IPv4, so we usually just set * them to 0. Change this to 1 to calculate real UDP over IPv4 @@ -497,16 +498,27 @@ uint16_t csum(const void *buf, size_t len, uint32_t init) * * @iov Pointer to the array of IO vectors * @n Length of the array + * @offset: Offset of the data to checksum within the full data length * @init Initial 32-bit checksum, 0 for no pre-computed checksum * * Return: 16-bit folded, complemented checksum */ /* cppcheck-suppress unusedFunction */ -uint16_t csum_iov(const struct iovec *iov, size_t n, uint32_t init) +uint16_t csum_iov(const struct iovec *iov, size_t n, size_t offset, + uint32_t init) { unsigned int i; + size_t first; - for (i = 0; i < n; i++) + i = iov_skip_bytes(iov, n, offset, &first); + if (i >= n) + return (uint16_t)~csum_fold(init); + + init = csum_unfolded((char *)iov[i].iov_base + first, + iov[i].iov_len - first, init); + i++; + + for (; i < n; i++) init = csum_unfolded(iov[i].iov_base, iov[i].iov_len, init); return (uint16_t)~csum_fold(init); diff --git a/checksum.h b/checksum.h index c5964ac78921..49f7472dd1b6 100644 --- a/checksum.h +++ b/checksum.h @@ -32,6 +32,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, const void *payload, size_t dlen); uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init); uint16_t csum(const void *buf, size_t len, uint32_t init); -uint16_t csum_iov(const struct iovec *iov, size_t n, uint32_t init); +uint16_t csum_iov(const struct iovec *iov, size_t n, size_t offset, + uint32_t init); #endif /* CHECKSUM_H */ -- 2.46.0
On Fri, Sep 27, 2024 at 03:53:47PM +0200, Laurent Vivier wrote:The offset allows any headers that are not part of the data to checksum to be skipped. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com>Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au>--- Notes: v5: - correctly set the length for the partial csum_unfolded() v3: - reorder @offset and @init v2: - check iov_skip_bytes() return value checksum.c | 16 ++++++++++++++-- checksum.h | 3 ++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/checksum.c b/checksum.c index 006614fcbb28..05d002ab0c25 100644 --- a/checksum.c +++ b/checksum.c @@ -59,6 +59,7 @@ #include "util.h" #include "ip.h" #include "checksum.h" +#include "iov.h" /* Checksums are optional for UDP over IPv4, so we usually just set * them to 0. Change this to 1 to calculate real UDP over IPv4 @@ -497,16 +498,27 @@ uint16_t csum(const void *buf, size_t len, uint32_t init) * * @iov Pointer to the array of IO vectors * @n Length of the array + * @offset: Offset of the data to checksum within the full data length * @init Initial 32-bit checksum, 0 for no pre-computed checksum * * Return: 16-bit folded, complemented checksum */ /* cppcheck-suppress unusedFunction */ -uint16_t csum_iov(const struct iovec *iov, size_t n, uint32_t init) +uint16_t csum_iov(const struct iovec *iov, size_t n, size_t offset, + uint32_t init) { unsigned int i; + size_t first; - for (i = 0; i < n; i++) + i = iov_skip_bytes(iov, n, offset, &first); + if (i >= n) + return (uint16_t)~csum_fold(init); + + init = csum_unfolded((char *)iov[i].iov_base + first, + iov[i].iov_len - first, init); + i++; + + for (; i < n; i++) init = csum_unfolded(iov[i].iov_base, iov[i].iov_len, init); return (uint16_t)~csum_fold(init); diff --git a/checksum.h b/checksum.h index c5964ac78921..49f7472dd1b6 100644 --- a/checksum.h +++ b/checksum.h @@ -32,6 +32,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, const void *payload, size_t dlen); uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init); uint16_t csum(const void *buf, size_t len, uint32_t init); -uint16_t csum_iov(const struct iovec *iov, size_t n, uint32_t init); +uint16_t csum_iov(const struct iovec *iov, size_t n, size_t offset, + uint32_t init); #endif /* CHECKSUM_H */-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson
TCP header and payload are supposed to be in the same buffer, and tcp_update_check_tcp4()/tcp_update_check_tcp6() compute the checksum from the base address of the header using the length of the IP payload. In the future (for vhost-user) we need to dispatch the TCP header and the TCP payload through several buffers. To be able to manage that, we provide an iovec array that points to the data of the TCP frame. We provide also an offset to be able to provide an array that contains the TCP frame embedded in an lower level frame, and this offset points to the TCP header inside the iovec array. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- Notes: v5: - s/IPv6/IPv4/ - reintroduce ip6h and iph to avoid iov_size() - check pointer alignment before casting to the type v4: - replace die() by err() in tcp_update_check_tcp6() too v3: - replace die() by err() and return - add more information in the error message v2: - s/payload_offset/l4offset/ - check memory address of the checksum (alignment, iovec boundaries) checksum.c | 1 - iov.c | 1 - tcp.c | 122 ++++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 102 insertions(+), 22 deletions(-) diff --git a/checksum.c b/checksum.c index 05d002ab0c25..cf850196cca0 100644 --- a/checksum.c +++ b/checksum.c @@ -503,7 +503,6 @@ uint16_t csum(const void *buf, size_t len, uint32_t init) * * Return: 16-bit folded, complemented checksum */ -/* cppcheck-suppress unusedFunction */ uint16_t csum_iov(const struct iovec *iov, size_t n, size_t offset, uint32_t init) { diff --git a/iov.c b/iov.c index 3f9e229a305f..9116dda94247 100644 --- a/iov.c +++ b/iov.c @@ -25,7 +25,6 @@ #include "util.h" #include "iov.h" - /* iov_skip_bytes() - Skip leading bytes of an IO vector * @iov: IO vector * @n: Number of entries in @iov diff --git a/tcp.c b/tcp.c index c9472d905520..df3147a673d1 100644 --- a/tcp.c +++ b/tcp.c @@ -755,36 +755,106 @@ static void tcp_sock_set_bufsize(const struct ctx *c, int s) } /** - * tcp_update_check_tcp4() - Update TCP checksum from stored one + * tcp_update_check_tcp4() - Calculate TCP checksum for IPv4 * @iph: IPv4 header - * @bp: TCP header followed by TCP payload + * @iov: Pointer to the array of IO vectors + * @iov_cnt: Length of the array + * @l4offset: IPv4 payload offset in the iovec array */ -static void tcp_update_check_tcp4(const struct iphdr *iph, - struct tcp_payload_t *bp) +void tcp_update_check_tcp4(const struct iphdr *iph, + const struct iovec *iov, int iov_cnt, + size_t l4offset) { uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr); struct in_addr saddr = { .s_addr = iph->saddr }; struct in_addr daddr = { .s_addr = iph->daddr }; - uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + size_t check_ofs; + __sum16 *check; + int check_idx; + uint32_t sum; + uintptr_t ptr; + + sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + + check_idx = iov_skip_bytes(iov, iov_cnt, + l4offset + offsetof(struct tcphdr, check), + &check_ofs); + + if (check_idx >= iov_cnt) { + err("TCP4 buffer is too small, iov size %zd, check offset %zd", + iov_size(iov, iov_cnt), + l4offset + offsetof(struct tcphdr, check)); + return; + } - bp->th.check = 0; - bp->th.check = csum(bp, l4len, sum); + if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) { + err("TCP4 checksum field memory is not contiguous " + "check_ofs %zd check_idx %d iov_len %zd", + check_ofs, check_idx, iov[check_idx].iov_len); + return; + } + + ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); + if (ptr & (__alignof__(*check) - 1)) { + err("TCP4 checksum field is not correctly aligned in memory"); + return; + } + + check = (__sum16 *)ptr; + + *check = 0; + *check = csum_iov(iov, iov_cnt, l4offset, sum); } /** * tcp_update_check_tcp6() - Calculate TCP checksum for IPv6 * @ip6h: IPv6 header - * @bp: TCP header followed by TCP payload + * @iov: Pointer to the array of IO vectors + * @iov_cnt: Length of the array + * @l4offset: IPv6 payload offset in the iovec array */ -static void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, - struct tcp_payload_t *bp) +void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, + const struct iovec *iov, int iov_cnt, + size_t l4offset) { uint16_t l4len = ntohs(ip6h->payload_len); - uint32_t sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, - &ip6h->saddr, &ip6h->daddr); + size_t check_ofs; + __sum16 *check; + int check_idx; + uint32_t sum; + uintptr_t ptr; + + sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, &ip6h->saddr, + &ip6h->daddr); + + check_idx = iov_skip_bytes(iov, iov_cnt, + l4offset + offsetof(struct tcphdr, check), + &check_ofs); + + if (check_idx >= iov_cnt) { + err("TCP6 buffer is too small, iov size %zd, check offset %zd", + iov_size(iov, iov_cnt), + l4offset + offsetof(struct tcphdr, check)); + return; + } + + if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) { + err("TCP6 checksum field memory is not contiguous " + "check_ofs %zd check_idx %d iov_len %zd", + check_ofs, check_idx, iov[check_idx].iov_len); + return; + } + + ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); + if (ptr & (__alignof__(*check) - 1)) { + err("TCP6 checksum field is not correctly aligned in memory"); + return; + } - bp->th.check = 0; - bp->th.check = csum(bp, l4len, sum); + check = (__sum16 *)ptr; + + *check = 0; + *check = csum_iov(iov, iov_cnt, l4offset, sum); } /** @@ -935,10 +1005,16 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, tcp_fill_header(&bp->th, conn, seq); - if (no_tcp_csum) + if (no_tcp_csum) { bp->th.check = 0; - else - tcp_update_check_tcp4(iph, bp); + } else { + const struct iovec iov = { + .iov_base = bp, + .iov_len = ntohs(iph->tot_len) - sizeof(struct iphdr), + }; + + tcp_update_check_tcp4(iph, &iov, 1, 0); + } tap_hdr_update(taph, l3len + sizeof(struct ethhdr)); @@ -980,10 +1056,16 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, tcp_fill_header(&bp->th, conn, seq); - if (no_tcp_csum) + if (no_tcp_csum) { bp->th.check = 0; - else - tcp_update_check_tcp6(ip6h, bp); + } else { + const struct iovec iov = { + .iov_base = bp, + .iov_len = ntohs(ip6h->payload_len) + }; + + tcp_update_check_tcp6(ip6h, &iov, 1, 0); + } tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr)); -- 2.46.0
On Fri, Sep 27, 2024 at 03:53:48PM +0200, Laurent Vivier wrote:TCP header and payload are supposed to be in the same buffer, and tcp_update_check_tcp4()/tcp_update_check_tcp6() compute the checksum from the base address of the header using the length of the IP payload. In the future (for vhost-user) we need to dispatch the TCP header and the TCP payload through several buffers. To be able to manage that, we provide an iovec array that points to the data of the TCP frame. We provide also an offset to be able to provide an array that contains the TCP frame embedded in an lower level frame, and this offset points to the TCP header inside the iovec array. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com>Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au>--- Notes: v5: - s/IPv6/IPv4/ - reintroduce ip6h and iph to avoid iov_size() - check pointer alignment before casting to the type v4: - replace die() by err() in tcp_update_check_tcp6() too v3: - replace die() by err() and return - add more information in the error message v2: - s/payload_offset/l4offset/ - check memory address of the checksum (alignment, iovec boundaries) checksum.c | 1 - iov.c | 1 - tcp.c | 122 ++++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 102 insertions(+), 22 deletions(-) diff --git a/checksum.c b/checksum.c index 05d002ab0c25..cf850196cca0 100644 --- a/checksum.c +++ b/checksum.c @@ -503,7 +503,6 @@ uint16_t csum(const void *buf, size_t len, uint32_t init) * * Return: 16-bit folded, complemented checksum */ -/* cppcheck-suppress unusedFunction */ uint16_t csum_iov(const struct iovec *iov, size_t n, size_t offset, uint32_t init) { diff --git a/iov.c b/iov.c index 3f9e229a305f..9116dda94247 100644 --- a/iov.c +++ b/iov.c @@ -25,7 +25,6 @@ #include "util.h" #include "iov.h" - /* iov_skip_bytes() - Skip leading bytes of an IO vector * @iov: IO vector * @n: Number of entries in @iov diff --git a/tcp.c b/tcp.c index c9472d905520..df3147a673d1 100644 --- a/tcp.c +++ b/tcp.c @@ -755,36 +755,106 @@ static void tcp_sock_set_bufsize(const struct ctx *c, int s) } /** - * tcp_update_check_tcp4() - Update TCP checksum from stored one + * tcp_update_check_tcp4() - Calculate TCP checksum for IPv4 * @iph: IPv4 header - * @bp: TCP header followed by TCP payload + * @iov: Pointer to the array of IO vectors + * @iov_cnt: Length of the array + * @l4offset: IPv4 payload offset in the iovec array */ -static void tcp_update_check_tcp4(const struct iphdr *iph, - struct tcp_payload_t *bp) +void tcp_update_check_tcp4(const struct iphdr *iph, + const struct iovec *iov, int iov_cnt, + size_t l4offset) { uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr); struct in_addr saddr = { .s_addr = iph->saddr }; struct in_addr daddr = { .s_addr = iph->daddr }; - uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + size_t check_ofs; + __sum16 *check; + int check_idx; + uint32_t sum; + uintptr_t ptr; + + sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + + check_idx = iov_skip_bytes(iov, iov_cnt, + l4offset + offsetof(struct tcphdr, check), + &check_ofs); + + if (check_idx >= iov_cnt) { + err("TCP4 buffer is too small, iov size %zd, check offset %zd", + iov_size(iov, iov_cnt), + l4offset + offsetof(struct tcphdr, check)); + return; + } - bp->th.check = 0; - bp->th.check = csum(bp, l4len, sum); + if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) { + err("TCP4 checksum field memory is not contiguous " + "check_ofs %zd check_idx %d iov_len %zd", + check_ofs, check_idx, iov[check_idx].iov_len); + return; + } + + ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); + if (ptr & (__alignof__(*check) - 1)) { + err("TCP4 checksum field is not correctly aligned in memory"); + return; + } + + check = (__sum16 *)ptr; + + *check = 0; + *check = csum_iov(iov, iov_cnt, l4offset, sum); } /** * tcp_update_check_tcp6() - Calculate TCP checksum for IPv6 * @ip6h: IPv6 header - * @bp: TCP header followed by TCP payload + * @iov: Pointer to the array of IO vectors + * @iov_cnt: Length of the array + * @l4offset: IPv6 payload offset in the iovec array */ -static void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, - struct tcp_payload_t *bp) +void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, + const struct iovec *iov, int iov_cnt, + size_t l4offset) { uint16_t l4len = ntohs(ip6h->payload_len); - uint32_t sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, - &ip6h->saddr, &ip6h->daddr); + size_t check_ofs; + __sum16 *check; + int check_idx; + uint32_t sum; + uintptr_t ptr; + + sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, &ip6h->saddr, + &ip6h->daddr); + + check_idx = iov_skip_bytes(iov, iov_cnt, + l4offset + offsetof(struct tcphdr, check), + &check_ofs); + + if (check_idx >= iov_cnt) { + err("TCP6 buffer is too small, iov size %zd, check offset %zd", + iov_size(iov, iov_cnt), + l4offset + offsetof(struct tcphdr, check)); + return; + } + + if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) { + err("TCP6 checksum field memory is not contiguous " + "check_ofs %zd check_idx %d iov_len %zd", + check_ofs, check_idx, iov[check_idx].iov_len); + return; + } + + ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); + if (ptr & (__alignof__(*check) - 1)) { + err("TCP6 checksum field is not correctly aligned in memory"); + return; + } - bp->th.check = 0; - bp->th.check = csum(bp, l4len, sum); + check = (__sum16 *)ptr; + + *check = 0; + *check = csum_iov(iov, iov_cnt, l4offset, sum); } /** @@ -935,10 +1005,16 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, tcp_fill_header(&bp->th, conn, seq); - if (no_tcp_csum) + if (no_tcp_csum) { bp->th.check = 0; - else - tcp_update_check_tcp4(iph, bp); + } else { + const struct iovec iov = { + .iov_base = bp, + .iov_len = ntohs(iph->tot_len) - sizeof(struct iphdr), + }; + + tcp_update_check_tcp4(iph, &iov, 1, 0); + } tap_hdr_update(taph, l3len + sizeof(struct ethhdr)); @@ -980,10 +1056,16 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, tcp_fill_header(&bp->th, conn, seq); - if (no_tcp_csum) + if (no_tcp_csum) { bp->th.check = 0; - else - tcp_update_check_tcp6(ip6h, bp); + } else { + const struct iovec iov = { + .iov_base = bp, + .iov_len = ntohs(ip6h->payload_len) + }; + + tcp_update_check_tcp6(ip6h, &iov, 1, 0); + } tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr));-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson
On Fri, 27 Sep 2024 15:53:48 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:[...] +void tcp_update_check_tcp4(const struct iphdr *iph, + const struct iovec *iov, int iov_cnt, + size_t l4offset) { uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr); struct in_addr saddr = { .s_addr = iph->saddr }; struct in_addr daddr = { .s_addr = iph->daddr }; - uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + size_t check_ofs; + __sum16 *check; + int check_idx; + uint32_t sum; + uintptr_t ptr; + + sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + + check_idx = iov_skip_bytes(iov, iov_cnt, + l4offset + offsetof(struct tcphdr, check), + &check_ofs); + + if (check_idx >= iov_cnt) { + err("TCP4 buffer is too small, iov size %zd, check offset %zd", + iov_size(iov, iov_cnt), + l4offset + offsetof(struct tcphdr, check)); + return; + } - bp->th.check = 0; - bp->th.check = csum(bp, l4len, sum); + if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) { + err("TCP4 checksum field memory is not contiguous " + "check_ofs %zd check_idx %d iov_len %zd", + check_ofs, check_idx, iov[check_idx].iov_len); + return; + } + + ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); + if (ptr & (__alignof__(*check) - 1)) { + err("TCP4 checksum field is not correctly aligned in memory"); + return; + } + + check = (__sum16 *)ptr;clang-tidy (14.0.6) says: /home/sbrivio/passt/tcp.c:803:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ /home/sbrivio/passt/tcp.c:854:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ ...I'm still trying to figure out what that actually means. If it's trivial I can fix that up on merge. -- Stefano
On Tue, 1 Oct 2024 09:29:28 +0200 Stefano Brivio <sbrivio(a)redhat.com> wrote:On Fri, 27 Sep 2024 15:53:48 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:Something like this? -- diff --git a/tcp.c b/tcp.c index ffd82e1..2b8edda 100644 --- a/tcp.c +++ b/tcp.c @@ -772,7 +772,7 @@ void tcp_update_check_tcp4(const struct iphdr *iph, __sum16 *check; int check_idx; uint32_t sum; - uintptr_t ptr; + char *ptr; sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); @@ -794,8 +794,8 @@ void tcp_update_check_tcp4(const struct iphdr *iph, return; } - ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); - if (ptr & (__alignof__(*check) - 1)) { + ptr = (char *)iov[check_idx].iov_base + check_ofs; + if ((uintptr_t)ptr & (__alignof__(*check) - 1)) { err("TCP4 checksum field is not correctly aligned in memory"); return; } @@ -822,7 +822,7 @@ void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, __sum16 *check; int check_idx; uint32_t sum; - uintptr_t ptr; + char *ptr; sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, &ip6h->saddr, &ip6h->daddr); @@ -845,8 +845,8 @@ void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, return; } - ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); - if (ptr & (__alignof__(*check) - 1)) { + ptr = (char *)iov[check_idx].iov_base + check_ofs; + if ((uintptr_t)ptr & (__alignof__(*check) - 1)) { err("TCP6 checksum field is not correctly aligned in memory"); return; } -- it doesn't really improve on the "optimization opportunities" reported by clang-tidy but it silences the warning without much fuss. I can apply this on top if it looks okay to you. -- Stefano[...] +void tcp_update_check_tcp4(const struct iphdr *iph, + const struct iovec *iov, int iov_cnt, + size_t l4offset) { uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr); struct in_addr saddr = { .s_addr = iph->saddr }; struct in_addr daddr = { .s_addr = iph->daddr }; - uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + size_t check_ofs; + __sum16 *check; + int check_idx; + uint32_t sum; + uintptr_t ptr; + + sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + + check_idx = iov_skip_bytes(iov, iov_cnt, + l4offset + offsetof(struct tcphdr, check), + &check_ofs); + + if (check_idx >= iov_cnt) { + err("TCP4 buffer is too small, iov size %zd, check offset %zd", + iov_size(iov, iov_cnt), + l4offset + offsetof(struct tcphdr, check)); + return; + } - bp->th.check = 0; - bp->th.check = csum(bp, l4len, sum); + if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) { + err("TCP4 checksum field memory is not contiguous " + "check_ofs %zd check_idx %d iov_len %zd", + check_ofs, check_idx, iov[check_idx].iov_len); + return; + } + + ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); + if (ptr & (__alignof__(*check) - 1)) { + err("TCP4 checksum field is not correctly aligned in memory"); + return; + } + + check = (__sum16 *)ptr;clang-tidy (14.0.6) says: /home/sbrivio/passt/tcp.c:803:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ /home/sbrivio/passt/tcp.c:854:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ ...I'm still trying to figure out what that actually means. If it's trivial I can fix that up on merge.
On 01/10/2024 20:22, Stefano Brivio wrote:On Tue, 1 Oct 2024 09:29:28 +0200 Stefano Brivio <sbrivio(a)redhat.com> wrote:It's ok for me. Do you want I resend a version with it or can you update the patch on the merge? Thanks, LaurentOn Fri, 27 Sep 2024 15:53:48 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:Something like this?[...] +void tcp_update_check_tcp4(const struct iphdr *iph, + const struct iovec *iov, int iov_cnt, + size_t l4offset) { uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr); struct in_addr saddr = { .s_addr = iph->saddr }; struct in_addr daddr = { .s_addr = iph->daddr }; - uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + size_t check_ofs; + __sum16 *check; + int check_idx; + uint32_t sum; + uintptr_t ptr; + + sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + + check_idx = iov_skip_bytes(iov, iov_cnt, + l4offset + offsetof(struct tcphdr, check), + &check_ofs); + + if (check_idx >= iov_cnt) { + err("TCP4 buffer is too small, iov size %zd, check offset %zd", + iov_size(iov, iov_cnt), + l4offset + offsetof(struct tcphdr, check)); + return; + } - bp->th.check = 0; - bp->th.check = csum(bp, l4len, sum); + if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) { + err("TCP4 checksum field memory is not contiguous " + "check_ofs %zd check_idx %d iov_len %zd", + check_ofs, check_idx, iov[check_idx].iov_len); + return; + } + + ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); + if (ptr & (__alignof__(*check) - 1)) { + err("TCP4 checksum field is not correctly aligned in memory"); + return; + } + + check = (__sum16 *)ptr;clang-tidy (14.0.6) says: /home/sbrivio/passt/tcp.c:803:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ /home/sbrivio/passt/tcp.c:854:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ ...I'm still trying to figure out what that actually means. If it's trivial I can fix that up on merge.-- diff --git a/tcp.c b/tcp.c index ffd82e1..2b8edda 100644 --- a/tcp.c +++ b/tcp.c @@ -772,7 +772,7 @@ void tcp_update_check_tcp4(const struct iphdr *iph, __sum16 *check; int check_idx; uint32_t sum; - uintptr_t ptr; + char *ptr; sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); @@ -794,8 +794,8 @@ void tcp_update_check_tcp4(const struct iphdr *iph, return; } - ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); - if (ptr & (__alignof__(*check) - 1)) { + ptr = (char *)iov[check_idx].iov_base + check_ofs; + if ((uintptr_t)ptr & (__alignof__(*check) - 1)) { err("TCP4 checksum field is not correctly aligned in memory"); return; } @@ -822,7 +822,7 @@ void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, __sum16 *check; int check_idx; uint32_t sum; - uintptr_t ptr; + char *ptr; sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, &ip6h->saddr, &ip6h->daddr); @@ -845,8 +845,8 @@ void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, return; } - ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); - if (ptr & (__alignof__(*check) - 1)) { + ptr = (char *)iov[check_idx].iov_base + check_ofs; + if ((uintptr_t)ptr & (__alignof__(*check) - 1)) { err("TCP6 checksum field is not correctly aligned in memory"); return; }
On 2 October 2024 09:39:23 CEST, Laurent Vivier <lvivier(a)redhat.com> wrote:On 01/10/2024 20:22, Stefano Brivio wrote:No no, thanks, I'll update this on merge. -- StefanoOn Tue, 1 Oct 2024 09:29:28 +0200 Stefano Brivio <sbrivio(a)redhat.com> wrote:It's ok for me. Do you want I resend a version with it or can you update the patch on the merge?On Fri, 27 Sep 2024 15:53:48 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:Something like this?[...] +void tcp_update_check_tcp4(const struct iphdr *iph, + const struct iovec *iov, int iov_cnt, + size_t l4offset) { uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr); struct in_addr saddr = { .s_addr = iph->saddr }; struct in_addr daddr = { .s_addr = iph->daddr }; - uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + size_t check_ofs; + __sum16 *check; + int check_idx; + uint32_t sum; + uintptr_t ptr; + + sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); + + check_idx = iov_skip_bytes(iov, iov_cnt, + l4offset + offsetof(struct tcphdr, check), + &check_ofs); + + if (check_idx >= iov_cnt) { + err("TCP4 buffer is too small, iov size %zd, check offset %zd", + iov_size(iov, iov_cnt), + l4offset + offsetof(struct tcphdr, check)); + return; + } - bp->th.check = 0; - bp->th.check = csum(bp, l4len, sum); + if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) { + err("TCP4 checksum field memory is not contiguous " + "check_ofs %zd check_idx %d iov_len %zd", + check_ofs, check_idx, iov[check_idx].iov_len); + return; + } + + ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); + if (ptr & (__alignof__(*check) - 1)) { + err("TCP4 checksum field is not correctly aligned in memory"); + return; + } + + check = (__sum16 *)ptr;clang-tidy (14.0.6) says: /home/sbrivio/passt/tcp.c:803:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ /home/sbrivio/passt/tcp.c:854:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ ...I'm still trying to figure out what that actually means. If it's trivial I can fix that up on merge.
On 02/10/2024 11:00, Stefano Brivio wrote:On 2 October 2024 09:39:23 CEST, Laurent Vivier <lvivier(a)redhat.com> wrote:I also removed the static of tcp_update_check_tcp4() and tcp_update_check_tcp6(). I need this later in my series but not here. Could you update or do you want I send a new version? Thanks, LaurentOn 01/10/2024 20:22, Stefano Brivio wrote:No no, thanks, I'll update this on merge.On Tue, 1 Oct 2024 09:29:28 +0200 Stefano Brivio <sbrivio(a)redhat.com> wrote:It's ok for me. Do you want I resend a version with it or can you update the patch on the merge?On Fri, 27 Sep 2024 15:53:48 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote: > [...] > > +void tcp_update_check_tcp4(const struct iphdr *iph, > + const struct iovec *iov, int iov_cnt, > + size_t l4offset) > { > uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr); > struct in_addr saddr = { .s_addr = iph->saddr }; > struct in_addr daddr = { .s_addr = iph->daddr }; > - uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); > + size_t check_ofs; > + __sum16 *check; > + int check_idx; > + uint32_t sum; > + uintptr_t ptr; > + > + sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr); > + > + check_idx = iov_skip_bytes(iov, iov_cnt, > + l4offset + offsetof(struct tcphdr, check), > + &check_ofs); > + > + if (check_idx >= iov_cnt) { > + err("TCP4 buffer is too small, iov size %zd, check offset %zd", > + iov_size(iov, iov_cnt), > + l4offset + offsetof(struct tcphdr, check)); > + return; > + } > - bp->th.check = 0; > - bp->th.check = csum(bp, l4len, sum); > + if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) { > + err("TCP4 checksum field memory is not contiguous " > + "check_ofs %zd check_idx %d iov_len %zd", > + check_ofs, check_idx, iov[check_idx].iov_len); > + return; > + } > + > + ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs); > + if (ptr & (__alignof__(*check) - 1)) { > + err("TCP4 checksum field is not correctly aligned in memory"); > + return; > + } > + > + check = (__sum16 *)ptr; clang-tidy (14.0.6) says: /home/sbrivio/passt/tcp.c:803:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ /home/sbrivio/passt/tcp.c:854:10: error: integer to pointer cast pessimizes optimization opportunities [performance-no-int-to-ptr,-warnings-as-errors] check = (__sum16 *)ptr; ^ ...I'm still trying to figure out what that actually means. If it's trivial I can fix that up on merge.Something like this?
As for tcp_update_check_tcp4()/tcp_update_check_tcp6(), change csum_udp4() and csum_udp6() to use an iovec array. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- checksum.c | 29 ++++++++++++++++++----------- checksum.h | 4 ++-- tap.c | 14 +++++++++++--- tap.h | 2 +- udp.c | 17 +++++++++++++---- 5 files changed, 45 insertions(+), 21 deletions(-) diff --git a/checksum.c b/checksum.c index cf850196cca0..c673993d6f9c 100644 --- a/checksum.c +++ b/checksum.c @@ -166,22 +166,24 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol, * @udp4hr: UDP header, initialised apart from checksum * @saddr: IPv4 source address * @daddr: IPv4 destination address - * @payload: UDP packet payload - * @dlen: Length of @payload (not including UDP header) + * @iov: Pointer to the array of IO vectors + * @iov_cnt: Length of the array + * @offset: UDP payload offset in the iovec array */ void csum_udp4(struct udphdr *udp4hr, struct in_addr saddr, struct in_addr daddr, - const void *payload, size_t dlen) + const struct iovec *iov, int iov_cnt, size_t offset) { /* UDP checksums are optional, so don't bother */ udp4hr->check = 0; if (UDP4_REAL_CHECKSUMS) { - uint16_t l4len = dlen + sizeof(struct udphdr); + uint16_t l4len = iov_size(iov, iov_cnt) - offset + + sizeof(struct udphdr); uint32_t psum = proto_ipv4_header_psum(l4len, IPPROTO_UDP, saddr, daddr); psum = csum_unfolded(udp4hr, sizeof(struct udphdr), psum); - udp4hr->check = csum(payload, dlen, psum); + udp4hr->check = csum_iov(iov, iov_cnt, offset, psum); } } @@ -227,19 +229,24 @@ uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol, /** * csum_udp6() - Calculate and set checksum for a UDP over IPv6 packet * @udp6hr: UDP header, initialised apart from checksum - * @payload: UDP packet payload - * @dlen: Length of @payload (not including UDP header) + * @saddr: Source address + * @daddr: Destination address + * @iov: Pointer to the array of IO vectors + * @iov_cnt: Length of the array + * @offset: UDP payload offset in the iovec array */ void csum_udp6(struct udphdr *udp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, - const void *payload, size_t dlen) + const struct iovec *iov, int iov_cnt, size_t offset) { - uint32_t psum = proto_ipv6_header_psum(dlen + sizeof(struct udphdr), - IPPROTO_UDP, saddr, daddr); + uint16_t l4len = iov_size(iov, iov_cnt) - offset + + sizeof(struct udphdr); + uint32_t psum = proto_ipv6_header_psum(l4len, IPPROTO_UDP, + saddr, daddr); udp6hr->check = 0; psum = csum_unfolded(udp6hr, sizeof(struct udphdr), psum); - udp6hr->check = csum(payload, dlen, psum); + udp6hr->check = csum_iov(iov, iov_cnt, offset, psum); } /** diff --git a/checksum.h b/checksum.h index 49f7472dd1b6..31ba322fa98c 100644 --- a/checksum.h +++ b/checksum.h @@ -19,14 +19,14 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol, struct in_addr saddr, struct in_addr daddr); void csum_udp4(struct udphdr *udp4hr, struct in_addr saddr, struct in_addr daddr, - const void *payload, size_t dlen); + const struct iovec *iov, int iov_cnt, size_t offset); void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t dlen); uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol, const struct in6_addr *saddr, const struct in6_addr *daddr); void csum_udp6(struct udphdr *udp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, - const void *payload, size_t dlen); + const struct iovec *iov, int iov_cnt, size_t offset); void csum_icmp6(struct icmp6hdr *icmp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, const void *payload, size_t dlen); diff --git a/tap.c b/tap.c index 41af6a6d0c85..c53a39b79e62 100644 --- a/tap.c +++ b/tap.c @@ -172,11 +172,15 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport, struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP); struct udphdr *uh = tap_push_ip4h(ip4h, src, dst, l4len, IPPROTO_UDP); char *data = (char *)(uh + 1); + const struct iovec iov = { + .iov_base = (void *)in, + .iov_len = dlen + }; uh->source = htons(sport); uh->dest = htons(dport); uh->len = htons(l4len); - csum_udp4(uh, src, dst, in, dlen); + csum_udp4(uh, src, dst, &iov, 1, 0); memcpy(data, in, dlen); tap_send_single(c, buf, dlen + (data - buf)); @@ -247,7 +251,7 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h, void tap_udp6_send(const struct ctx *c, const struct in6_addr *src, in_port_t sport, const struct in6_addr *dst, in_port_t dport, - uint32_t flow, const void *in, size_t dlen) + uint32_t flow, void *in, size_t dlen) { size_t l4len = dlen + sizeof(struct udphdr); char buf[USHRT_MAX]; @@ -255,11 +259,15 @@ void tap_udp6_send(const struct ctx *c, struct udphdr *uh = tap_push_ip6h(ip6h, src, dst, l4len, IPPROTO_UDP, flow); char *data = (char *)(uh + 1); + const struct iovec iov = { + .iov_base = in, + .iov_len = dlen + }; uh->source = htons(sport); uh->dest = htons(dport); uh->len = htons(l4len); - csum_udp6(uh, src, dst, in, dlen); + csum_udp6(uh, src, dst, &iov, 1, 0); memcpy(data, in, dlen); tap_send_single(c, buf, dlen + (data - buf)); diff --git a/tap.h b/tap.h index ec9e2acec460..85f1e8473711 100644 --- a/tap.h +++ b/tap.h @@ -53,7 +53,7 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c, void tap_udp6_send(const struct ctx *c, const struct in6_addr *src, in_port_t sport, const struct in6_addr *dst, in_port_t dport, - uint32_t flow, const void *in, size_t dlen); + uint32_t flow, void *in, size_t dlen); void tap_icmp6_send(const struct ctx *c, const struct in6_addr *src, const struct in6_addr *dst, const void *in, size_t l4len); diff --git a/udp.c b/udp.c index 7b2831386db8..ac34279db253 100644 --- a/udp.c +++ b/udp.c @@ -321,10 +321,15 @@ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, bp->uh.source = htons(toside->oport); bp->uh.dest = htons(toside->eport); bp->uh.len = htons(l4len); - if (no_udp_csum) + if (no_udp_csum) { bp->uh.check = 0; - else - csum_udp4(&bp->uh, *src, *dst, bp->data, dlen); + } else { + const struct iovec iov = { + .iov_base = bp->data, + .iov_len = dlen + }; + csum_udp4(&bp->uh, *src, *dst, &iov, 1, 0); + } return l4len; } @@ -363,8 +368,12 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, */ bp->uh.check = 0xffff; } else { + const struct iovec iov = { + .iov_base = bp->data, + .iov_len = dlen + }; csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, - bp->data, dlen); + &iov, 1, 0); } return l4len; -- 2.46.0
On Fri, Sep 27, 2024 at 03:53:49PM +0200, Laurent Vivier wrote:As for tcp_update_check_tcp4()/tcp_update_check_tcp6(), change csum_udp4() and csum_udp6() to use an iovec array. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com>Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au>--- checksum.c | 29 ++++++++++++++++++----------- checksum.h | 4 ++-- tap.c | 14 +++++++++++--- tap.h | 2 +- udp.c | 17 +++++++++++++---- 5 files changed, 45 insertions(+), 21 deletions(-) diff --git a/checksum.c b/checksum.c index cf850196cca0..c673993d6f9c 100644 --- a/checksum.c +++ b/checksum.c @@ -166,22 +166,24 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol, * @udp4hr: UDP header, initialised apart from checksum * @saddr: IPv4 source address * @daddr: IPv4 destination address - * @payload: UDP packet payload - * @dlen: Length of @payload (not including UDP header) + * @iov: Pointer to the array of IO vectors + * @iov_cnt: Length of the array + * @offset: UDP payload offset in the iovec array */ void csum_udp4(struct udphdr *udp4hr, struct in_addr saddr, struct in_addr daddr, - const void *payload, size_t dlen) + const struct iovec *iov, int iov_cnt, size_t offset) { /* UDP checksums are optional, so don't bother */ udp4hr->check = 0; if (UDP4_REAL_CHECKSUMS) { - uint16_t l4len = dlen + sizeof(struct udphdr); + uint16_t l4len = iov_size(iov, iov_cnt) - offset + + sizeof(struct udphdr); uint32_t psum = proto_ipv4_header_psum(l4len, IPPROTO_UDP, saddr, daddr); psum = csum_unfolded(udp4hr, sizeof(struct udphdr), psum); - udp4hr->check = csum(payload, dlen, psum); + udp4hr->check = csum_iov(iov, iov_cnt, offset, psum); } } @@ -227,19 +229,24 @@ uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol, /** * csum_udp6() - Calculate and set checksum for a UDP over IPv6 packet * @udp6hr: UDP header, initialised apart from checksum - * @payload: UDP packet payload - * @dlen: Length of @payload (not including UDP header) + * @saddr: Source address + * @daddr: Destination address + * @iov: Pointer to the array of IO vectors + * @iov_cnt: Length of the array + * @offset: UDP payload offset in the iovec array */ void csum_udp6(struct udphdr *udp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, - const void *payload, size_t dlen) + const struct iovec *iov, int iov_cnt, size_t offset) { - uint32_t psum = proto_ipv6_header_psum(dlen + sizeof(struct udphdr), - IPPROTO_UDP, saddr, daddr); + uint16_t l4len = iov_size(iov, iov_cnt) - offset + + sizeof(struct udphdr); + uint32_t psum = proto_ipv6_header_psum(l4len, IPPROTO_UDP, + saddr, daddr); udp6hr->check = 0; psum = csum_unfolded(udp6hr, sizeof(struct udphdr), psum); - udp6hr->check = csum(payload, dlen, psum); + udp6hr->check = csum_iov(iov, iov_cnt, offset, psum); } /** diff --git a/checksum.h b/checksum.h index 49f7472dd1b6..31ba322fa98c 100644 --- a/checksum.h +++ b/checksum.h @@ -19,14 +19,14 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol, struct in_addr saddr, struct in_addr daddr); void csum_udp4(struct udphdr *udp4hr, struct in_addr saddr, struct in_addr daddr, - const void *payload, size_t dlen); + const struct iovec *iov, int iov_cnt, size_t offset); void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t dlen); uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol, const struct in6_addr *saddr, const struct in6_addr *daddr); void csum_udp6(struct udphdr *udp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, - const void *payload, size_t dlen); + const struct iovec *iov, int iov_cnt, size_t offset); void csum_icmp6(struct icmp6hdr *icmp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, const void *payload, size_t dlen); diff --git a/tap.c b/tap.c index 41af6a6d0c85..c53a39b79e62 100644 --- a/tap.c +++ b/tap.c @@ -172,11 +172,15 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport, struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP); struct udphdr *uh = tap_push_ip4h(ip4h, src, dst, l4len, IPPROTO_UDP); char *data = (char *)(uh + 1); + const struct iovec iov = { + .iov_base = (void *)in, + .iov_len = dlen + }; uh->source = htons(sport); uh->dest = htons(dport); uh->len = htons(l4len); - csum_udp4(uh, src, dst, in, dlen); + csum_udp4(uh, src, dst, &iov, 1, 0); memcpy(data, in, dlen); tap_send_single(c, buf, dlen + (data - buf)); @@ -247,7 +251,7 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h, void tap_udp6_send(const struct ctx *c, const struct in6_addr *src, in_port_t sport, const struct in6_addr *dst, in_port_t dport, - uint32_t flow, const void *in, size_t dlen) + uint32_t flow, void *in, size_t dlen) { size_t l4len = dlen + sizeof(struct udphdr); char buf[USHRT_MAX]; @@ -255,11 +259,15 @@ void tap_udp6_send(const struct ctx *c, struct udphdr *uh = tap_push_ip6h(ip6h, src, dst, l4len, IPPROTO_UDP, flow); char *data = (char *)(uh + 1); + const struct iovec iov = { + .iov_base = in, + .iov_len = dlen + }; uh->source = htons(sport); uh->dest = htons(dport); uh->len = htons(l4len); - csum_udp6(uh, src, dst, in, dlen); + csum_udp6(uh, src, dst, &iov, 1, 0); memcpy(data, in, dlen); tap_send_single(c, buf, dlen + (data - buf)); diff --git a/tap.h b/tap.h index ec9e2acec460..85f1e8473711 100644 --- a/tap.h +++ b/tap.h @@ -53,7 +53,7 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c, void tap_udp6_send(const struct ctx *c, const struct in6_addr *src, in_port_t sport, const struct in6_addr *dst, in_port_t dport, - uint32_t flow, const void *in, size_t dlen); + uint32_t flow, void *in, size_t dlen); void tap_icmp6_send(const struct ctx *c, const struct in6_addr *src, const struct in6_addr *dst, const void *in, size_t l4len); diff --git a/udp.c b/udp.c index 7b2831386db8..ac34279db253 100644 --- a/udp.c +++ b/udp.c @@ -321,10 +321,15 @@ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, bp->uh.source = htons(toside->oport); bp->uh.dest = htons(toside->eport); bp->uh.len = htons(l4len); - if (no_udp_csum) + if (no_udp_csum) { bp->uh.check = 0; - else - csum_udp4(&bp->uh, *src, *dst, bp->data, dlen); + } else { + const struct iovec iov = { + .iov_base = bp->data, + .iov_len = dlen + }; + csum_udp4(&bp->uh, *src, *dst, &iov, 1, 0); + } return l4len; } @@ -363,8 +368,12 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, */ bp->uh.check = 0xffff; } else { + const struct iovec iov = { + .iov_base = bp->data, + .iov_len = dlen + }; csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, - bp->data, dlen); + &iov, 1, 0); } return l4len;-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson
On Fri, 27 Sep 2024 15:53:44 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:For vhost-user, we will need to spread TCP payload over several buffers. To re-use tcp_update_check_tcp[4|6](), provide an iovec rather than a pointer to a buffer. This series updates also csum_iov() and pcap_iov() to add an offset of bytes to skip in the iovec array. It's based on top of "tcp: Use tcp_payload_t rather than tcphdr" that is added in the series for convenience. v3 adds "udp: Update UDP checksum using an iovec array" that modifies csum_udp4() and csum_udp6() in the same way. Laurent Vivier (5): tcp: Use tcp_payload_t rather than tcphdr pcap: Add an offset argument in pcap_iov() checksum: Add an offset argument in csum_iov() tcp: Update TCP checksum using an iovec array udp: Update UDP checksum using an iovec arrayI *think* something in this series causes two test failures. I wasn't able to reproduce them without this series (tried five times), but I don't always reproduce them with it (tried about 10 times, I didn't get any failure for 3 runs, one or both in the other runs). The first one looks like this (copy and paste to a buffer to see it): -- ns$ ip -j -4 addr show|jq -rM '.[] | select(.ifname == "enp9s0").addr_info[0].local' │...passed. 88.198.0.164 │ ns$ ip -j -4 route show|jq -rM '.[] | select(.dst == "default").gateway' │Starting test: TCP/IPv4: ns to host (spliced): big transfer 88.198.0.161 │? cmp /home/sbrivio/passt/test/big.bin /tmp/passt-tests-Fe5Kr9/pasta/tcp/test_big.bin ns$ ip -j link show | jq -rM '.[] | select(.ifname == "enp9s0").mtu' │...passed. 65520 │ ns$ /sbin/dhclient -6 --no-pid enp9s0 │Starting test: TCP/IPv4: ns to host (via tap): big transfer ns$ ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "enp9s0").addr_info[] | select(.prefixlen == 128).local] | .[0]' │? cmp /home/sbrivio/passt/test/big.bin /tmp/passt-tests-Fe5Kr9/pasta/tcp/test_big.bin 2a01:4f8:222:904::2 │...passed. ns$ ip -j -6 route show|jq -rM '.[] | select(.dst == "default").gateway' │ fe80::1 │Starting test: TCP/IPv4: host to ns (spliced): small transfer ns$ which socat ip jq >/dev/null │? cmp /home/sbrivio/passt/test/small.bin /tmp/passt-tests-Fe5Kr9/pasta/tcp/test_ns_small.bin ns$ socat -u TCP4-LISTEN:10002 OPEN:/tmp/passt-tests-Fe5Kr9/pasta/tcp/test_ns_big.bin,create,trunc │...passed. ns$ socat -u OPEN:/home/sbrivio/passt/test/big.bin TCP4:127.0.0.1:10003 │ ns$ ip -j -4 route show|jq -rM '.[] | select(.dst == "default").gateway' │Starting test: TCP/IPv4: ns to host (spliced): small transfer 88.198.0.161 │? cmp /home/sbrivio/passt/test/small.bin /tmp/passt-tests-Fe5Kr9/pasta/tcp/test_small.bin ns$ socat -u OPEN:/home/sbrivio/passt/test/big.bin TCP4:88.198.0.161:10003 │...passed. ns$ socat -u TCP4-LISTEN:10002 OPEN:/tmp/passt-tests-Fe5Kr9/pasta/tcp/test_ns_small.bin,create,trunc │ ns$ socat OPEN:/home/sbrivio/passt/test/small.bin TCP4:127.0.0.1:10003 │Starting test: TCP/IPv4: ns to host (via tap): small transfer ns$ ip -j -4 route show|jq -rM '.[] | select(.dst == "default").gateway' │? cmp /home/sbrivio/passt/test/small.bin /tmp/passt-tests-Fe5Kr9/pasta/tcp/test_small.bin 88.198.0.161 │...passed. ns$ socat -u OPEN:/home/sbrivio/passt/test/small.bin TCP4:88.198.0.161:10003 │ ns$ socat -u TCP6-LISTEN:10002 OPEN:/tmp/passt-tests-Fe5Kr9/pasta/tcp/test_ns_big.bin,create,trunc │Starting test: TCP/IPv6: host to ns (spliced): big transfer │ ──namespace─────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────────┴──pasta/tcp [7/12] - TCP/IPv6: host to ns (spliced): big transfer─────────────────────────────────── host$ ip -j -6 route show|jq -rM '[.[] | select(.dst == "default").gateway] | .[0]' │ router: 88.198.0.161 fe80::1 │DNS: host$ which ip jq >/dev/null │ 185.12.64.1 host$ ip -j -4 addr show|jq -rM '.[] | select(.ifname == "enp9s0").addr_info[0].local' │ 185.12.64.2 88.198.0.164 │ NAT to host ::1: fe80::1 host$ ip -j -4 route show|jq -rM '[.[] | select(.dst == "default").gateway] | .[0]' │NDP/DHCPv6: 88.198.0.161 │ assign: 2a01:4f8:222:904::2 host$ ip -j -6 route show|jq -rM '[.[] | select(.dst == "default").dev] | .[0]' │ router: fe80::1 enp9s0 │ our link-local: fe80::1 host$ ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "enp9s0").addr_info[] | select(.scope == "global" and .depreca│DNS: ted != true).local] | .[0]' │ 2a01:4ff:ff00::add:2 2a01:4f8:222:904::2 │ 2a01:4ff:ff00::add:1 host$ ip -j -6 route show|jq -rM '[.[] | select(.dst == "default").gateway] | .[0]' │NDP: received RS, sending RA fe80::1 │DHCP: offer to discover host$ which socat ip jq >/dev/null │ from 06:91:e7:c0:12:b1 host$ socat -u OPEN:/home/sbrivio/passt/test/big.bin TCP4:127.0.0.1:10002 │DHCP: ack to request host$ socat -u TCP4-LISTEN:10003,bind=127.0.0.1 OPEN:/tmp/passt-tests-Fe5Kr9/pasta/tcp/test_big.bin,create,trunc │ from 06:91:e7:c0:12:b1 host$ socat -u TCP4-LISTEN:10003 OPEN:/tmp/passt-tests-Fe5Kr9/pasta/tcp/test_big.bin,create,trunc │DHCPv6: received SOLICIT, sending ADVERTISE host$ socat OPEN:/home/sbrivio/passt/test/small.bin TCP4:127.0.0.1:10002 │DHCPv6: received REQUEST/RENEW/CONFIRM, sending REPLY host$ socat -u TCP4-LISTEN:10003,bind=127.0.0.1 OPEN:/tmp/passt-tests-Fe5Kr9/pasta/tcp/test_small.bin,create,trunc │NDP: received NS, sending NA host$ socat -u TCP4-LISTEN:10003 OPEN:/tmp/passt-tests-Fe5Kr9/pasta/tcp/test_small.bin,create,trunc │NDP: received NS, sending NA host$ socat -u OPEN:/home/sbrivio/passt/test/big.bin TCP6:[::1]:10002 │NDP: received NS, sending NA host$ │ ──host──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──pasta──────────────────────────────────────────────────────────────────────────────────────────────────────────────── Testing commit: 3195dc6 udp: Update UDP checksum using an iovec array PASS: 23 | FAIL: 0 | 2024-10-02T13:53:46+00:00 -- the second one is a UDP transfer in the pasta_podman/bats test (mismatch in received data) for which I just accidentally lost the logs... but I'll try to reproduce that again in a bit and share them. -- Stefano
On Wed, 2 Oct 2024 16:32:38 +0200 Stefano Brivio <sbrivio(a)redhat.com> wrote:On Fri, 27 Sep 2024 15:53:44 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:Grr, never mind, this seem to be caused by "[PATCH v3 0/4] Don't expose container loopback services to the host" or something that came before. I'll now re-test a few times both series separately and report back. -- StefanoFor vhost-user, we will need to spread TCP payload over several buffers. To re-use tcp_update_check_tcp[4|6](), provide an iovec rather than a pointer to a buffer. This series updates also csum_iov() and pcap_iov() to add an offset of bytes to skip in the iovec array. It's based on top of "tcp: Use tcp_payload_t rather than tcphdr" that is added in the series for convenience. v3 adds "udp: Update UDP checksum using an iovec array" that modifies csum_udp4() and csum_udp6() in the same way. Laurent Vivier (5): tcp: Use tcp_payload_t rather than tcphdr pcap: Add an offset argument in pcap_iov() checksum: Add an offset argument in csum_iov() tcp: Update TCP checksum using an iovec array udp: Update UDP checksum using an iovec arrayI *think* something in this series causes two test failures. I wasn't able to reproduce them without this series (tried five times), but I don't always reproduce them with it (tried about 10 times, I didn't get any failure for 3 runs, one or both in the other runs). The first one looks like this (copy and paste to a buffer to see it):