With vhost-user we can disable the checksum of UDP and TCP. Add a generic parameter for each of them to disable the checksum. v3: align parameters description with tab s/O/0/ v2: s/UPD/UDP/ Add David's R-b in PATCH 2 Laurent Vivier (2): udp: Allow checksum to be disabled tcp: Allow checksum to be disabled tcp.c | 52 ++++++++++++++++++++++++++------------------ tcp_buf.c | 8 ++++--- tcp_internal.h | 3 ++- udp.c | 58 ++++++++++++++++++++++++++++++++++---------------- 4 files changed, 78 insertions(+), 43 deletions(-) -- 2.46.0
We can need not to set the UDP checksum. Add a parameter to udp_update_hdr4() and udp_update_hdr6() to disable it. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- Notes: v3: align parameters description with tab s/O/0/ v2: s/UPD/UDP/ udp.c | 58 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/udp.c b/udp.c index 2ba00c9c20a8..7b2831386db8 100644 --- a/udp.c +++ b/udp.c @@ -294,15 +294,17 @@ static void udp_splice_send(const struct ctx *c, size_t start, size_t n, /** * udp_update_hdr4() - Update headers for one IPv4 datagram - * @ip4h: Pre-filled IPv4 header (except for tot_len and saddr) - * @bp: Pointer to udp_payload_t to update - * @toside: Flowside for destination side - * @dlen: Length of UDP payload + * @ip4h: Pre-filled IPv4 header (except for tot_len and saddr) + * @bp: Pointer to udp_payload_t to update + * @toside: Flowside for destination side + * @dlen: Length of UDP payload + * @no_udp_csum: Do not set UDP checksum * * Return: size of IPv4 payload (UDP header + data) */ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen) + const struct flowside *toside, size_t dlen, + bool no_udp_csum) { const struct in_addr *src = inany_v4(&toside->oaddr); const struct in_addr *dst = inany_v4(&toside->eaddr); @@ -319,22 +321,28 @@ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, bp->uh.source = htons(toside->oport); bp->uh.dest = htons(toside->eport); bp->uh.len = htons(l4len); - csum_udp4(&bp->uh, *src, *dst, bp->data, dlen); + if (no_udp_csum) + bp->uh.check = 0; + else + csum_udp4(&bp->uh, *src, *dst, bp->data, dlen); return l4len; } /** * udp_update_hdr6() - Update headers for one IPv6 datagram - * @ip6h: Pre-filled IPv6 header (except for payload_len and addresses) - * @bp: Pointer to udp_payload_t to update - * @toside: Flowside for destination side - * @dlen: Length of UDP payload + * @ip6h: Pre-filled IPv6 header (except for payload_len and + * addresses) + * @bp: Pointer to udp_payload_t to update + * @toside: Flowside for destination side + * @dlen: Length of UDP payload + * @no_udp_csum: Do not set UDP checksum * * Return: size of IPv6 payload (UDP header + data) */ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen) + const struct flowside *toside, size_t dlen, + bool no_udp_csum) { uint16_t l4len = dlen + sizeof(bp->uh); @@ -348,7 +356,16 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, bp->uh.source = htons(toside->oport); bp->uh.dest = htons(toside->eport); bp->uh.len = ip6h->payload_len; - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, bp->data, dlen); + if (no_udp_csum) { + /* 0 is an invalid checksum for UDP IPv6 and dropped by + * the kernel stack, even if the checksum is disabled by virtio + * flags. We need to put any non-zero value here. + */ + bp->uh.check = 0xffff; + } else { + csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, + bp->data, dlen); + } return l4len; } @@ -358,9 +375,11 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, * @mmh: Receiving mmsghdr array * @idx: Index of the datagram to prepare * @toside: Flowside for destination side + * @no_udp_csum: Do not set UDP checksum */ -static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx, - const struct flowside *toside) +static void udp_tap_prepare(const struct mmsghdr *mmh, + unsigned idx, const struct flowside *toside, + bool no_udp_csum) { struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx]; struct udp_payload_t *bp = &udp_payload[idx]; @@ -368,13 +387,15 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx, size_t l4len; if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) { - l4len = udp_update_hdr6(&bm->ip6h, bp, toside, mmh[idx].msg_len); + l4len = udp_update_hdr6(&bm->ip6h, bp, toside, + mmh[idx].msg_len, no_udp_csum); tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + sizeof(udp6_eth_hdr)); (*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h); } else { - l4len = udp_update_hdr4(&bm->ip4h, bp, toside, mmh[idx].msg_len); + l4len = udp_update_hdr4(&bm->ip4h, bp, toside, + mmh[idx].msg_len, no_udp_csum); tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + sizeof(udp4_eth_hdr)); (*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr); @@ -565,7 +586,8 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref, udp_splice_prepare(udp_mh_recv, i); } else if (batchpif == PIF_TAP) { udp_tap_prepare(udp_mh_recv, i, - flowside_at_sidx(batchsidx)); + flowside_at_sidx(batchsidx), + false); } if (++i >= n) @@ -636,7 +658,7 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, if (pif_is_socket(topif)) udp_splice_prepare(udp_mh_recv, i); else if (topif == PIF_TAP) - udp_tap_prepare(udp_mh_recv, i, toside); + udp_tap_prepare(udp_mh_recv, i, toside, false); /* Restore sockaddr length clobbered by recvmsg() */ udp_mh_recv[i].msg_hdr.msg_namelen = sizeof(udp_meta[i].s_in); } -- 2.46.0
We can need not to set TCP checksum. Add a parameter to tcp_fill_headers4() and tcp_fill_headers6() to disable it. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au> --- Notes: v3: align parameters description with tab tcp.c | 52 ++++++++++++++++++++++++++++++-------------------- tcp_buf.c | 8 +++++--- tcp_internal.h | 3 ++- 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/tcp.c b/tcp.c index f9fe1b9a1330..7307ea74f25c 100644 --- a/tcp.c +++ b/tcp.c @@ -896,13 +896,14 @@ static void tcp_fill_header(struct tcphdr *th, /** * tcp_fill_headers4() - Fill 802.3, IPv4, TCP headers in pre-cooked buffers - * @conn: Connection pointer - * @taph: tap backend specific header - * @iph: Pointer to IPv4 header - * @th: Pointer to TCP header - * @dlen: TCP payload length - * @check: Checksum, if already known - * @seq: Sequence number for this segment + * @conn: Connection pointer + * @taph: tap backend specific header + * @iph: Pointer to IPv4 header + * @th: Pointer to TCP header + * @dlen: TCP payload length + * @check: Checksum, if already known + * @seq: Sequence number for this segment + * @no_tcp_csum: Do not set TCP checksum * * Return: The IPv4 payload length, host order */ @@ -910,7 +911,7 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, struct tap_hdr *taph, struct iphdr *iph, struct tcphdr *th, size_t dlen, const uint16_t *check, - uint32_t seq) + uint32_t seq, bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); const struct in_addr *src4 = inany_v4(&tapside->oaddr); @@ -929,7 +930,10 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, tcp_fill_header(th, conn, seq); - tcp_update_check_tcp4(iph, th); + if (no_tcp_csum) + th->check = 0; + else + tcp_update_check_tcp4(iph, th); tap_hdr_update(taph, l3len + sizeof(struct ethhdr)); @@ -938,20 +942,21 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn, /** * tcp_fill_headers6() - Fill 802.3, IPv6, TCP headers in pre-cooked buffers - * @conn: Connection pointer - * @taph: tap backend specific header - * @ip6h: Pointer to IPv6 header - * @th: Pointer to TCP header - * @dlen: TCP payload length - * @check: Checksum, if already known - * @seq: Sequence number for this segment + * @conn: Connection pointer + * @taph: tap backend specific header + * @ip6h: Pointer to IPv6 header + * @th: Pointer to TCP header + * @dlen: TCP payload length + * @check: Checksum, if already known + * @seq: Sequence number for this segment + * @no_tcp_csum: Do not set TCP checksum * * Return: The IPv6 payload length, host order */ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, struct tap_hdr *taph, struct ipv6hdr *ip6h, struct tcphdr *th, - size_t dlen, uint32_t seq) + size_t dlen, uint32_t seq, bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); size_t l4len = dlen + sizeof(*th); @@ -970,7 +975,10 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, tcp_fill_header(th, conn, seq); - tcp_update_check_tcp6(ip6h, th); + if (no_tcp_csum) + th->check = 0; + else + tcp_update_check_tcp6(ip6h, th); tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr)); @@ -984,12 +992,14 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn, * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment + * @no_tcp_csum: Do not set TCP checksum * * Return: IP payload length, host order */ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, struct iovec *iov, size_t dlen, - const uint16_t *check, uint32_t seq) + const uint16_t *check, uint32_t seq, + bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); const struct in_addr *a4 = inany_v4(&tapside->oaddr); @@ -998,13 +1008,13 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, - check, seq); + check, seq, no_tcp_csum); } return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, iov[TCP_IOV_PAYLOAD].iov_base, dlen, - seq); + seq, no_tcp_csum); } /** diff --git a/tcp_buf.c b/tcp_buf.c index 1a398461a34b..10a663bdfc26 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -320,7 +320,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) return ret; } - l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq); + l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false); iov[TCP_IOV_PAYLOAD].iov_len = l4len; if (flags & DUP_ACK) { @@ -381,7 +381,8 @@ static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn, tcp4_frame_conns[tcp4_payload_used] = conn; iov = tcp4_l2_iov[tcp4_payload_used++]; - l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq); + l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, + false); iov[TCP_IOV_PAYLOAD].iov_len = l4len; if (tcp4_payload_used > TCP_FRAMES_MEM - 1) tcp_payload_flush(c); @@ -389,7 +390,8 @@ static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn, tcp6_frame_conns[tcp6_payload_used] = conn; iov = tcp6_l2_iov[tcp6_payload_used++]; - l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq); + l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq, + false); iov[TCP_IOV_PAYLOAD].iov_len = l4len; if (tcp6_payload_used > TCP_FRAMES_MEM - 1) tcp_payload_flush(c); diff --git a/tcp_internal.h b/tcp_internal.h index aa8bb64f1f33..e7fe735bfcb4 100644 --- a/tcp_internal.h +++ b/tcp_internal.h @@ -91,7 +91,8 @@ void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn); size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, struct iovec *iov, size_t dlen, - const uint16_t *check, uint32_t seq); + const uint16_t *check, uint32_t seq, + bool no_tcp_csum); int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, int force_seq, struct tcp_info *tinfo); int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, int flags, -- 2.46.0
On Wed, 18 Sep 2024 15:13:26 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:With vhost-user we can disable the checksum of UDP and TCP. Add a generic parameter for each of them to disable the checksum. v3: align parameters description with tab s/O/0/ v2: s/UPD/UDP/ Add David's R-b in PATCH 2 Laurent Vivier (2): udp: Allow checksum to be disabled tcp: Allow checksum to be disabledApplied. -- Stefano