With vhost-user we can disable the checksum of UDP and TCP. Add a generic parameter for each of them to disable the checksum. v2: s/UPD/UDP/ Add David's R-b in PATCH 2 Laurent Vivier (2): udp: Allow checksum to be disabled tcp: Allow checksum to be disabled tcp.c | 24 +++++++++++++++++------- tcp_buf.c | 8 +++++--- tcp_internal.h | 3 ++- udp.c | 41 +++++++++++++++++++++++++++++++---------- 4 files changed, 55 insertions(+), 21 deletions(-) -- 2.46.0
We can need not to set the UDP checksum. Add a parameter to udp_update_hdr4() and udp_update_hdr6() to disable it. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- Notes: v2: s/UPD/UDP/ udp.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/udp.c b/udp.c index 2ba00c9c20a8..95151efb9c46 100644 --- a/udp.c +++ b/udp.c @@ -298,11 +298,13 @@ static void udp_splice_send(const struct ctx *c, size_t start, size_t n, * @bp: Pointer to udp_payload_t to update * @toside: Flowside for destination side * @dlen: Length of UDP payload + * @no_udp_csum: Do not set UDP checksum * * Return: size of IPv4 payload (UDP header + data) */ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen) + const struct flowside *toside, size_t dlen, + bool no_udp_csum) { const struct in_addr *src = inany_v4(&toside->oaddr); const struct in_addr *dst = inany_v4(&toside->eaddr); @@ -319,7 +321,10 @@ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, bp->uh.source = htons(toside->oport); bp->uh.dest = htons(toside->eport); bp->uh.len = htons(l4len); - csum_udp4(&bp->uh, *src, *dst, bp->data, dlen); + if (no_udp_csum) + bp->uh.check = 0; + else + csum_udp4(&bp->uh, *src, *dst, bp->data, dlen); return l4len; } @@ -330,11 +335,13 @@ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, * @bp: Pointer to udp_payload_t to update * @toside: Flowside for destination side * @dlen: Length of UDP payload + * @no_udp_csum: Do not set UDP checksum * * Return: size of IPv6 payload (UDP header + data) */ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen) + const struct flowside *toside, size_t dlen, + bool no_udp_csum) { uint16_t l4len = dlen + sizeof(bp->uh); @@ -348,7 +355,16 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, bp->uh.source = htons(toside->oport); bp->uh.dest = htons(toside->eport); bp->uh.len = ip6h->payload_len; - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, bp->data, dlen); + if (no_udp_csum) { + /* O is an invalid checksum for UDP IPv6 and dropped by + * the kernel stack, even if the checksum is disabled by virtio + * flags. We need to put any non-zero value here. + */ + bp->uh.check = 0xffff; + } else { + csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, + bp->data, dlen); + } return l4len; } @@ -358,9 +374,11 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, * @mmh: Receiving mmsghdr array * @idx: Index of the datagram to prepare * @toside: Flowside for destination side + * @no_udp_csum: Do not set UDP checksum */ -static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx, - const struct flowside *toside) +static void udp_tap_prepare(const struct mmsghdr *mmh, + unsigned idx, const struct flowside *toside, + bool no_udp_csum) { struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx]; struct udp_payload_t *bp = &udp_payload[idx]; @@ -368,13 +386,15 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx, size_t l4len; if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) { - l4len = udp_update_hdr6(&bm->ip6h, bp, toside, mmh[idx].msg_len); + l4len = udp_update_hdr6(&bm->ip6h, bp, toside, + mmh[idx].msg_len, no_udp_csum); tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + sizeof(udp6_eth_hdr)); (*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h); } else { - l4len = udp_update_hdr4(&bm->ip4h, bp, toside, mmh[idx].msg_len); + l4len = udp_update_hdr4(&bm->ip4h, bp, toside, + mmh[idx].msg_len, no_udp_csum); tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + sizeof(udp4_eth_hdr)); (*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr); @@ -565,7 +585,8 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref, udp_splice_prepare(udp_mh_recv, i); } else if (batchpif == PIF_TAP) { udp_tap_prepare(udp_mh_recv, i, - flowside_at_sidx(batchsidx)); + flowside_at_sidx(batchsidx), + false); } if (++i >= n) @@ -636,7 +657,7 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, if (pif_is_socket(topif)) udp_splice_prepare(udp_mh_recv, i); else if (topif == PIF_TAP) - udp_tap_prepare(udp_mh_recv, i, toside); + udp_tap_prepare(udp_mh_recv, i, toside, false); /* Restore sockaddr length clobbered by recvmsg() */ udp_mh_recv[i].msg_hdr.msg_namelen = sizeof(udp_meta[i].s_in); } -- 2.46.0
On Tue, Sep 17, 2024 at 09:30:57AM +0200, Laurent Vivier wrote:We can need not to set the UDP checksum. Add a parameter to udp_update_hdr4() and udp_update_hdr6() to disable it. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com>Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au> -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson
Nits only: On Tue, 17 Sep 2024 09:30:57 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:We can need not to set the UDP checksum. Add a parameter to udp_update_hdr4() and udp_update_hdr6() to disable it. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- Notes: v2: s/UPD/UDP/ udp.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/udp.c b/udp.c index 2ba00c9c20a8..95151efb9c46 100644 --- a/udp.c +++ b/udp.c @@ -298,11 +298,13 @@ static void udp_splice_send(const struct ctx *c, size_t start, size_t n, * @bp: Pointer to udp_payload_t to update * @toside: Flowside for destination side * @dlen: Length of UDP payload + * @no_udp_csum: Do not set UDP checksumThe description of all the other arguments are aligned, with tabs. You could just add one tab to all the others and have them aligned. Actually, this could simply be 'no_csum', it's obviously UDP. Same for no_tcp_csum in 2/2.* * Return: size of IPv4 payload (UDP header + data) */ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen) + const struct flowside *toside, size_t dlen, + bool no_udp_csum) { const struct in_addr *src = inany_v4(&toside->oaddr); const struct in_addr *dst = inany_v4(&toside->eaddr); @@ -319,7 +321,10 @@ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, bp->uh.source = htons(toside->oport); bp->uh.dest = htons(toside->eport); bp->uh.len = htons(l4len); - csum_udp4(&bp->uh, *src, *dst, bp->data, dlen); + if (no_udp_csum) + bp->uh.check = 0; + else + csum_udp4(&bp->uh, *src, *dst, bp->data, dlen); return l4len; } @@ -330,11 +335,13 @@ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, * @bp: Pointer to udp_payload_t to update * @toside: Flowside for destination side * @dlen: Length of UDP payload + * @no_udp_csum: Do not set UDP checksumSame here.* * Return: size of IPv6 payload (UDP header + data) */ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen) + const struct flowside *toside, size_t dlen, + bool no_udp_csum) { uint16_t l4len = dlen + sizeof(bp->uh); @@ -348,7 +355,16 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, bp->uh.source = htons(toside->oport); bp->uh.dest = htons(toside->eport); bp->uh.len = ip6h->payload_len; - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, bp->data, dlen); + if (no_udp_csum) { + /* O is an invalid checksum for UDP IPv6 and dropped byI'm not sure how this happened, but that's the letter O, not the number 0. Maybe you could spell it out, "Zero".+ * the kernel stack, even if the checksum is disabled by virtio + * flags. We need to put any non-zero value here. + */ + bp->uh.check = 0xffff; + } else { + csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, + bp->data, dlen); + } return l4len; } @@ -358,9 +374,11 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, * @mmh: Receiving mmsghdr array * @idx: Index of the datagram to prepare * @toside: Flowside for destination side + * @no_udp_csum: Do not set UDP checksum */ -static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx, - const struct flowside *toside) +static void udp_tap_prepare(const struct mmsghdr *mmh, + unsigned idx, const struct flowside *toside, + bool no_udp_csum) { struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx]; struct udp_payload_t *bp = &udp_payload[idx]; @@ -368,13 +386,15 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx, size_t l4len; if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) { - l4len = udp_update_hdr6(&bm->ip6h, bp, toside, mmh[idx].msg_len); + l4len = udp_update_hdr6(&bm->ip6h, bp, toside, + mmh[idx].msg_len, no_udp_csum); tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + sizeof(udp6_eth_hdr)); (*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h); } else { - l4len = udp_update_hdr4(&bm->ip4h, bp, toside, mmh[idx].msg_len); + l4len = udp_update_hdr4(&bm->ip4h, bp, toside, + mmh[idx].msg_len, no_udp_csum); tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + sizeof(udp4_eth_hdr)); (*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr); @@ -565,7 +585,8 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref, udp_splice_prepare(udp_mh_recv, i); } else if (batchpif == PIF_TAP) { udp_tap_prepare(udp_mh_recv, i, - flowside_at_sidx(batchsidx)); + flowside_at_sidx(batchsidx), + false); } if (++i >= n) @@ -636,7 +657,7 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, if (pif_is_socket(topif)) udp_splice_prepare(udp_mh_recv, i); else if (topif == PIF_TAP) - udp_tap_prepare(udp_mh_recv, i, toside); + udp_tap_prepare(udp_mh_recv, i, toside, false); /* Restore sockaddr length clobbered by recvmsg() */ udp_mh_recv[i].msg_hdr.msg_namelen = sizeof(udp_meta[i].s_in); }Other than that, this and 2/2 look good to me. -- Stefano
On Wed, Sep 18, 2024 at 01:07:00AM +0200, Stefano Brivio wrote:Nits only: On Tue, 17 Sep 2024 09:30:57 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:I had been going to suggest the same thing, but then I realised the current name does disambiguate it from the IPv4 header checksum. -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibsonWe can need not to set the UDP checksum. Add a parameter to udp_update_hdr4() and udp_update_hdr6() to disable it. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- Notes: v2: s/UPD/UDP/ udp.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/udp.c b/udp.c index 2ba00c9c20a8..95151efb9c46 100644 --- a/udp.c +++ b/udp.c @@ -298,11 +298,13 @@ static void udp_splice_send(const struct ctx *c, size_t start, size_t n, * @bp: Pointer to udp_payload_t to update * @toside: Flowside for destination side * @dlen: Length of UDP payload + * @no_udp_csum: Do not set UDP checksumThe description of all the other arguments are aligned, with tabs. You could just add one tab to all the others and have them aligned. Actually, this could simply be 'no_csum', it's obviously UDP. Same for no_tcp_csum in 2/2.
On Wed, 18 Sep 2024 09:41:37 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:On Wed, Sep 18, 2024 at 01:07:00AM +0200, Stefano Brivio wrote:Ah, right, never mind then. -- StefanoNits only: On Tue, 17 Sep 2024 09:30:57 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:I had been going to suggest the same thing, but then I realised the current name does disambiguate it from the IPv4 header checksum.We can need not to set the UDP checksum. Add a parameter to udp_update_hdr4() and udp_update_hdr6() to disable it. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- Notes: v2: s/UPD/UDP/ udp.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/udp.c b/udp.c index 2ba00c9c20a8..95151efb9c46 100644 --- a/udp.c +++ b/udp.c @@ -298,11 +298,13 @@ static void udp_splice_send(const struct ctx *c, size_t start, size_t n, * @bp: Pointer to udp_payload_t to update * @toside: Flowside for destination side * @dlen: Length of UDP payload + * @no_udp_csum: Do not set UDP checksumThe description of all the other arguments are aligned, with tabs. You could just add one tab to all the others and have them aligned. Actually, this could simply be 'no_csum', it's obviously UDP. Same for no_tcp_csum in 2/2.
We can need not to set TCP checksum. Add a parameter to tcp_fill_headers4() and tcp_fill_headers6() to disable it. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au> --- tcp.c | 24 +++++++++++++++++------- tcp_buf.c | 8 +++++--- tcp_internal.h | 3 ++- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/tcp.c b/tcp.c index f9fe1b9a1330..a44d7b977214 100644 --- a/tcp.c +++ b/tcp.c @@ -903,6 +903,7 @@ static void tcp_fill_header(struct tcphdr *th, * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment + * @no_tcp_csum: Do not set TCP checksum * * Return: The IPv4 payload length, host order */ @@ -910,7 +911,7 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, struct tap_hdr *taph, struct iphdr *iph, struct tcphdr *th, size_t dlen, const uint16_t *check, - uint32_t seq) + uint32_t seq, bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); const struct in_addr *src4 = inany_v4(&tapside->oaddr); @@ -929,7 +930,10 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, tcp_fill_header(th, conn, seq); - tcp_update_check_tcp4(iph, th); + if (no_tcp_csum) + th->check = 0; + else + tcp_update_check_tcp4(iph, th); tap_hdr_update(taph, l3len + sizeof(struct ethhdr)); @@ -945,13 +949,14 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment + * @no_tcp_csum: Do not set TCP checksum * * Return: The IPv6 payload length, host order */ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, struct tap_hdr *taph, struct ipv6hdr *ip6h, struct tcphdr *th, - size_t dlen, uint32_t seq) + size_t dlen, uint32_t seq, bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); size_t l4len = dlen + sizeof(*th); @@ -970,7 +975,10 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, tcp_fill_header(th, conn, seq); - tcp_update_check_tcp6(ip6h, th); + if (no_tcp_csum) + th->check = 0; + else + tcp_update_check_tcp6(ip6h, th); tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr)); @@ -984,12 +992,14 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment + * @no_tcp_csum: Do not set TCP checksum * * Return: IP payload length, host order */ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, struct iovec *iov, size_t dlen, - const uint16_t *check, uint32_t seq) + const uint16_t *check, uint32_t seq, + bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); const struct in_addr *a4 = inany_v4(&tapside->oaddr); @@ -998,13 +1008,13 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, - check, seq); + check, seq, no_tcp_csum); } return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, - seq); + seq, no_tcp_csum); } /** diff --git a/tcp_buf.c b/tcp_buf.c index 1a398461a34b..10a663bdfc26 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -320,7 +320,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) return ret; } - l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq); + l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false); iov[TCP_IOV_PAYLOAD].iov_len = l4len; if (flags & DUP_ACK) { @@ -381,7 +381,8 @@ static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn, tcp4_frame_conns[tcp4_payload_used] = conn; iov = tcp4_l2_iov[tcp4_payload_used++]; - l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq); + l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, + false); iov[TCP_IOV_PAYLOAD].iov_len = l4len; if (tcp4_payload_used > TCP_FRAMES_MEM - 1) tcp_payload_flush(c); @@ -389,7 +390,8 @@ static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn, tcp6_frame_conns[tcp6_payload_used] = conn; iov = tcp6_l2_iov[tcp6_payload_used++]; - l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq); + l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq, + false); iov[TCP_IOV_PAYLOAD].iov_len = l4len; if (tcp6_payload_used > TCP_FRAMES_MEM - 1) tcp_payload_flush(c); diff --git a/tcp_internal.h b/tcp_internal.h index aa8bb64f1f33..e7fe735bfcb4 100644 --- a/tcp_internal.h +++ b/tcp_internal.h @@ -91,7 +91,8 @@ void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn); size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, struct iovec *iov, size_t dlen, - const uint16_t *check, uint32_t seq); + const uint16_t *check, uint32_t seq, + bool no_tcp_csum); int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, int force_seq, struct tcp_info *tinfo); int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, int flags, -- 2.46.0