On Tue, May 19, 2026 at 05:56:13PM +0200, Laurent Vivier wrote:
Change udp_update_hdr4() and udp_update_hdr6() to take an iov_tail pointing at the UDP frame instead of a contiguous udp_payload_t buffer and explicit data length. This lets vhost-user pass scatter-gather virtqueue buffers directly without an intermediate copy.
The UDP header is built into a local struct udphdr and written back with IOV_PUSH_HEADER(). On the tap side, udp_tap_prepare() wraps the existing udp_payload_t in a two-element iov to match the new interface.
Signed-off-by: Laurent Vivier
--- iov.c | 1 - udp.c | 74 +++++++++++++++++----------------- udp_internal.h | 6 ++- udp_vu.c | 106 ++++++++++++++++++------------------------------- 4 files changed, 78 insertions(+), 109 deletions(-)
Lovely :).
Reviewed-by: David Gibson
diff --git a/iov.c b/iov.c index 6a5d7d35b67f..9248ba95a9f2 100644 --- a/iov.c +++ b/iov.c @@ -367,7 +367,6 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align) * * Return: number of bytes written */ -/* cppcheck-suppress unusedFunction */ size_t iov_push_header_(struct iov_tail *tail, const void *v, size_t len) { size_t l; diff --git a/udp.c b/udp.c index 66dc7766868c..cfc7b2439881 100644 --- a/udp.c +++ b/udp.c @@ -255,20 +255,22 @@ static void udp_iov_init(const struct ctx *c) /** * udp_update_hdr4() - Update headers for one IPv4 datagram * @ip4h: Pre-filled IPv4 header (except for tot_len and saddr) - * @bp: Pointer to udp_payload_t to update + * @uh: UDP header to fill + * @payload: UDP payload * @toside: Flowside for destination side * @dlen: Length of UDP payload * @no_udp_csum: Do not set UDP checksum * - * Return: size of IPv4 payload (UDP header + data) + * Return: size of datagram (UDP header + data) */ -size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, +size_t udp_update_hdr4(struct iphdr *ip4h, struct udphdr *uh, + struct iov_tail *payload, const struct flowside *toside, size_t dlen, bool no_udp_csum) { const struct in_addr *src = inany_v4(&toside->oaddr); const struct in_addr *dst = inany_v4(&toside->eaddr); - size_t l4len = dlen + sizeof(bp->uh); + size_t l4len = dlen + sizeof(*uh); size_t l3len = l4len + sizeof(*ip4h);
assert(src && dst); @@ -278,19 +280,13 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, ip4h->saddr = src->s_addr; ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst);
- bp->uh.source = htons(toside->oport); - bp->uh.dest = htons(toside->eport); - bp->uh.len = htons(l4len); - if (no_udp_csum) { - bp->uh.check = 0; - } else { - const struct iovec iov = { - .iov_base = bp->data, - .iov_len = dlen - }; - struct iov_tail data = IOV_TAIL(&iov, 1, 0); - csum_udp4(&bp->uh, *src, *dst, &data, dlen); - } + uh->source = htons(toside->oport); + uh->dest = htons(toside->eport); + uh->len = htons(l4len); + if (no_udp_csum) + uh->check = 0; + else + csum_udp4(uh, *src, *dst, payload, dlen);
return l4len; } @@ -299,18 +295,20 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, * udp_update_hdr6() - Update headers for one IPv6 datagram * @ip6h: Pre-filled IPv6 header (except for payload_len and * addresses) - * @bp: Pointer to udp_payload_t to update + * @uh: UDP header to fill + * @payload: UDP payload * @toside: Flowside for destination side * @dlen: Length of UDP payload * @no_udp_csum: Do not set UDP checksum * - * Return: size of IPv6 payload (UDP header + data) + * Return: size of datagram (UDP header + data) */ -size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, +size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udphdr *uh, + struct iov_tail *payload, const struct flowside *toside, size_t dlen, bool no_udp_csum) { - uint16_t l4len = dlen + sizeof(bp->uh); + uint16_t l4len = dlen + sizeof(*uh);
ip6h->payload_len = htons(l4len); ip6h->daddr = toside->eaddr.a6; @@ -319,23 +317,19 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, ip6h->nexthdr = IPPROTO_UDP; ip6h->hop_limit = 255;
- bp->uh.source = htons(toside->oport); - bp->uh.dest = htons(toside->eport); - bp->uh.len = ip6h->payload_len; + uh->source = htons(toside->oport); + uh->dest = htons(toside->eport); + uh->len = htons(l4len); + if (no_udp_csum) { /* 0 is an invalid checksum for UDP IPv6 and dropped by * the kernel stack, even if the checksum is disabled by virtio * flags. We need to put any non-zero value here. */ - bp->uh.check = 0xffff; + uh->check = 0xffff; } else { - const struct iovec iov = { - .iov_base = bp->data, - .iov_len = dlen - }; - struct iov_tail data = IOV_TAIL(&iov, 1, 0); - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data, - dlen); + csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, + payload, dlen); }
return l4len; @@ -372,15 +366,20 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, bool no_udp_csum) { struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx]; + struct udphdr *uh = (*tap_iov)[UDP_IOV_PAYLOAD].iov_base; + struct iov_tail payload = IOV_TAIL(&(*tap_iov)[UDP_IOV_PAYLOAD], 1, + sizeof(*uh)); struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base; - struct udp_payload_t *bp = &udp_payload[idx]; struct udp_meta_t *bm = &udp_meta[idx]; size_t l4len, l2len;
+ l4len = sizeof(struct udphdr) + mmh[idx].msg_len;
Nit: sizeof(*uh).
+ (*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len; + eth_update_mac(eh, NULL, tap_omac); if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) { - l4len = udp_update_hdr6(&bm->ip6h, bp, toside, - mmh[idx].msg_len, no_udp_csum); + udp_update_hdr6(&bm->ip6h, uh, &payload, toside, + mmh[idx].msg_len, no_udp_csum);
l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN); tap_hdr_update(&bm->taph, l2len); @@ -388,8 +387,8 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, eh->h_proto = htons_constant(ETH_P_IPV6); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h); } else { - l4len = udp_update_hdr4(&bm->ip4h, bp, toside, - mmh[idx].msg_len, no_udp_csum); + udp_update_hdr4(&bm->ip4h, uh, &payload, toside, + mmh[idx].msg_len, no_udp_csum);
l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN); tap_hdr_update(&bm->taph, l2len); @@ -397,7 +396,6 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, eh->h_proto = htons_constant(ETH_P_IP); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h); } - (*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
udp_tap_pad(*tap_iov); } diff --git a/udp_internal.h b/udp_internal.h index 64e457748324..361cc7495a01 100644 --- a/udp_internal.h +++ b/udp_internal.h @@ -25,10 +25,12 @@ struct udp_payload_t { } __attribute__ ((packed, aligned(__alignof__(unsigned int)))); #endif
-size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, +size_t udp_update_hdr4(struct iphdr *ip4h, struct udphdr *uh, + struct iov_tail *payload, const struct flowside *toside, size_t dlen, bool no_udp_csum); -size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, +size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udphdr *uh, + struct iov_tail *payload, const struct flowside *toside, size_t dlen, bool no_udp_csum); void udp_sock_fwd(const struct ctx *c, int s, int rule_hint, diff --git a/udp_vu.c b/udp_vu.c index 74bf79d57969..888367ef4e16 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -55,40 +55,33 @@ static size_t udp_vu_hdrlen(bool v6)
/** * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers - * @iov: IO vector for the frame (in/out) - * @cnt: Number of available entries in @iov (input) - * Number of used entries in @iov to store the datagram (output) + * @payload: UDP payload
Nit: "Buffer(s) for UDP payload" maybe?
+ * @cnt: Number of used entries in @payload to store the datagram (output) * Unchanged on failure * @s: Socket to receive from - * @v6: Set for IPv6 connections * * Return: size of received data, -1 on error */ -static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6) +static ssize_t udp_vu_sock_recv(struct iov_tail *payload, size_t *cnt, int s) { struct iovec msg_iov[VIRTQUEUE_MAX_SIZE]; struct msghdr msg = { 0 }; - struct iov_tail payload; - size_t hdrlen, iov_used; + size_t iov_used; ssize_t dlen;
- /* compute L2 header length */ - hdrlen = udp_vu_hdrlen(v6); - - payload = IOV_TAIL(iov, *cnt, hdrlen); - msg.msg_iov = msg_iov; - msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload); + msg.msg_iovlen = iov_tail_clone(msg.msg_iov, ARRAY_SIZE(msg_iov), + payload);
/* read data from the socket */ dlen = recvmsg(s, &msg, 0); if (dlen < 0) return -1;
- iov_used = iov_skip_bytes(iov, *cnt, - MAX(dlen + hdrlen, VNET_HLEN + ETH_ZLEN), - NULL); - if (iov_used < *cnt) + iov_used = iov_skip_bytes(payload->iov, payload->cnt, + MAX(dlen + payload->off, + VNET_HLEN + ETH_ZLEN), NULL); + if (iov_used < payload->cnt) iov_used++; *cnt = iov_used; /* one iovec per element */
@@ -98,69 +91,44 @@ static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6) /** * udp_vu_prepare() - Prepare the packet header * @c: Execution context - * @iov: IO vector for the frame (including vnet header) + * @data: IO vector tail for the L2 frame, on return points to the L4 header + * @payload: UDP payload * @toside: Address information for one side of the flow * @dlen: Packet data length */ -static void udp_vu_prepare(const struct ctx *c, const struct iovec *iov, - const struct flowside *toside, ssize_t dlen) +static void udp_vu_prepare(const struct ctx *c, struct iov_tail *data, + struct iov_tail *payload, + const struct flowside *toside, size_t dlen) { - struct ethhdr *eh; + bool ipv4 = inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr); + struct ethhdr eh; + struct udphdr uh;
/* ethernet header */ - eh = vu_eth(iov[0].iov_base); + memcpy(eh.h_dest, c->guest_mac, sizeof(eh.h_dest)); + memcpy(eh.h_source, c->our_tap_mac, sizeof(eh.h_source));
- memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); - memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source)); + if (ipv4) + eh.h_proto = htons(ETH_P_IP); + else + eh.h_proto = htons(ETH_P_IPV6); + IOV_PUSH_HEADER(data, eh);
/* initialize header */ - if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { - struct iphdr *iph = vu_ip(iov[0].iov_base); - struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base); - - eh->h_proto = htons(ETH_P_IP); + if (ipv4) { + struct iphdr iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
- *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP); + udp_update_hdr4(&iph, &uh, payload, toside, dlen, !*c->pcap);
- udp_update_hdr4(iph, bp, toside, dlen, true); + IOV_PUSH_HEADER(data, iph); } else { - struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base); - struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base); + struct ipv6hdr ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
- eh->h_proto = htons(ETH_P_IPV6); + udp_update_hdr6(&ip6h, &uh, payload, toside, dlen, !*c->pcap);
- *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP); - - udp_update_hdr6(ip6h, bp, toside, dlen, true); - } -} - -/** - * udp_vu_csum() - Calculate and set checksum for a UDP packet - * @toside: Address information for one side of the flow - * @iov: IO vector for the frame - * @cnt: Number of IO vector entries - * @dlen: Data length - */ -static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov, - size_t cnt, size_t dlen) -{ - const struct in_addr *src4 = inany_v4(&toside->oaddr); - const struct in_addr *dst4 = inany_v4(&toside->eaddr); - char *base = iov[0].iov_base; - struct udp_payload_t *bp; - struct iov_tail data; - - if (src4 && dst4) { - bp = vu_payloadv4(base); - data = IOV_TAIL(iov, cnt, (char *)&bp->data - base); - csum_udp4(&bp->uh, *src4, *dst4, &data, dlen); - } else { - bp = vu_payloadv6(base); - data = IOV_TAIL(iov, cnt, (char *)&bp->data - base); - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data, - dlen); + IOV_PUSH_HEADER(data, ip6h); } + IOV_PUSH_HEADER(data, uh); }
/** @@ -198,6 +166,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
for (i = 0; i < n; i++) { unsigned elem_cnt, elem_used, j, k; + struct iov_tail payload; size_t iov_cnt; ssize_t dlen;
@@ -207,7 +176,8 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) if (elem_cnt == 0) break;
- dlen = udp_vu_sock_recv(iov_vu, &iov_cnt, s, v6); + payload = IOV_TAIL(iov_vu, iov_cnt, hdrlen); + dlen = udp_vu_sock_recv(&payload, &iov_cnt, s); if (dlen < 0) { vu_queue_rewind(vq, elem_cnt); break; @@ -227,9 +197,9 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) vu_queue_rewind(vq, elem_cnt - elem_used);
if (iov_cnt > 0) { - udp_vu_prepare(c, iov_vu, toside, dlen); + struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, VNET_HLEN); + udp_vu_prepare(c, &data, &payload, toside, dlen); if (*c->pcap) { - udp_vu_csum(toside, iov_vu, iov_cnt, dlen); pcap_iov(iov_vu, iov_cnt, VNET_HLEN, hdrlen + dlen - VNET_HLEN); } -- 2.54.0
-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson