On Tue, Jun 16, 2026 at 02:51:25PM +0200, Laurent Vivier wrote:
Thread the queue pair parameter through the UDP socket handler and send path, replacing hardcoded QPAIR_DEFAULT values. This is the UDP counterpart to the equivalent TCP and ICMP changes.
The queue pair is passed from passt_worker() through udp_listen_sock_handler(), udp_sock_handler(), udp_sock_fwd(), and down into udp_sock_errs()/udp_sock_recverr() for ICMP error generation, as well as udp_vu_sock_to_tap() for vhost-user delivery.
As for TCP, passing the qpair through to-guest functions that take a flow parameter doesn't seem right to me.
On the flow handling side, flow_defer_handler() receives the queue pair and passes it to udp_flow_defer() and udp_flush_flow(), so that deferred UDP datagrams are forwarded on the correct queue.
No functional change.
Signed-off-by: Laurent Vivier
--- flow.c | 5 +++-- flow.h | 3 ++- passt.c | 8 +++++--- tap.c | 4 ++-- udp.c | 45 +++++++++++++++++++++++++-------------------- udp.h | 9 +++++---- udp_flow.c | 24 ++++++++++++++---------- udp_flow.h | 4 ++-- udp_internal.h | 3 ++- udp_vu.c | 5 +++-- udp_vu.h | 3 ++- 11 files changed, 65 insertions(+), 48 deletions(-) diff --git a/flow.c b/flow.c index 565ed2b2f7e7..c93b73549c90 100644 --- a/flow.c +++ b/flow.c @@ -884,7 +884,8 @@ flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif, * @c: Execution context * @now: Current timestamp */ -void flow_defer_handler(const struct ctx *c, const struct timespec *now) +void flow_defer_handler(const struct ctx *c, const struct timespec *now, + unsigned int qpair) { struct flow_free_cluster *free_head = NULL; unsigned *last_next = &flow_first_free; @@ -923,7 +924,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now) closed = icmp_ping_timer(c, &flow->ping, now); break; case FLOW_UDP: - closed = udp_flow_defer(c, &flow->udp, now); + closed = udp_flow_defer(c, &flow->udp, now, qpair);
Surely qpair could be different for each flow?
if (!closed && timer) closed = udp_flow_timer(c, &flow->udp, now); break; diff --git a/flow.h b/flow.h index 6c6a9260aa23..cae259fe7037 100644 --- a/flow.h +++ b/flow.h @@ -270,7 +270,8 @@ void flow_epollid_set(struct flow_common *f, int epollid); int flow_epoll_set(const struct flow_common *f, int command, uint32_t events, int fd, unsigned int sidei); void flow_epollid_register(int epollid, int epollfd); -void flow_defer_handler(const struct ctx *c, const struct timespec *now); +void flow_defer_handler(const struct ctx *c, const struct timespec *now, + unsigned int qpair); int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage, int fd); int flow_migrate_source_pre(struct ctx *c, const struct migrate_stage *stage, diff --git a/passt.c b/passt.c index 9569f920ee28..41239991451f 100644 --- a/passt.c +++ b/passt.c @@ -106,7 +106,7 @@ static void post_handler(struct ctx *c, const struct timespec *now, if (!c->no_tcp) tcp_defer_handler(c, now, qpair);
- flow_defer_handler(c, now); + flow_defer_handler(c, now, qpair); fwd_scan_ports_timer(c, now);
if (!c->no_ndp) @@ -265,10 +265,12 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events) tcp_timer_handler(c, ref, QPAIR_DEFAULT); break; case EPOLL_TYPE_UDP_LISTEN: - udp_listen_sock_handler(c, ref, eventmask, &now); + udp_listen_sock_handler(c, ref, eventmask, &now, + QPAIR_DEFAULT); break; case EPOLL_TYPE_UDP: - udp_sock_handler(c, ref, eventmask, &now); + udp_sock_handler(c, ref, eventmask, &now, + QPAIR_DEFAULT); break; case EPOLL_TYPE_PING: icmp_sock_handler(c, ref); diff --git a/tap.c b/tap.c index ba2a573fa630..e8fd3661ebb5 100644 --- a/tap.c +++ b/tap.c @@ -875,7 +875,7 @@ append: if (c->no_udp) continue; for (k = 0; k < p->count; ) - k += udp_tap_handler(c, PIF_TAP, AF_INET, + k += udp_tap_handler(c, qpair, PIF_TAP, AF_INET, &seq->saddr, &seq->daddr, seq->ttl, p, k, now); } @@ -1124,7 +1124,7 @@ append: if (c->no_udp) continue; for (k = 0; k < p->count; ) - k += udp_tap_handler(c, PIF_TAP, AF_INET6, + k += udp_tap_handler(c, qpair, PIF_TAP, AF_INET6, &seq->saddr, &seq->daddr, seq->hop_limit, p, k, now); } diff --git a/udp.c b/udp.c index a295cb0e97cf..e91d44aa33d6 100644 --- a/udp.c +++ b/udp.c @@ -403,13 +403,14 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, /** * udp_send_tap_icmp4() - Construct and send ICMPv4 to local peer * @c: Execution context + * @qpair: Queue pair on which to send the ICMPv4 packet * @ee: Extended error descriptor * @toside: Destination side of flow * @saddr: Address of ICMP generating node * @in: First bytes (max 8) of original UDP message body * @dlen: Length of the read part of original UDP message body */ -static void udp_send_tap_icmp4(const struct ctx *c, +static void udp_send_tap_icmp4(const struct ctx *c, unsigned int qpair, const struct sock_extended_err *ee, const struct flowside *toside, struct in_addr saddr, @@ -445,13 +446,14 @@ static void udp_send_tap_icmp4(const struct ctx *c, /* Try to obtain the MAC address of the generating node */ saddr_any = inany_from_v4(saddr); fwd_neigh_mac_get(c, &saddr_any, tap_omac); - tap_icmp4_send(c, QPAIR_DEFAULT, saddr, eaddr, &msg, tap_omac, msglen); + tap_icmp4_send(c, qpair, saddr, eaddr, &msg, tap_omac, msglen); }
/** * udp_send_tap_icmp6() - Construct and send ICMPv6 to local peer * @c: Execution context + * @qpair: Queue pair on which to send the ICMPv6 packet * @ee: Extended error descriptor * @toside: Destination side of flow * @saddr: Address of ICMP generating node @@ -459,7 +461,7 @@ static void udp_send_tap_icmp4(const struct ctx *c, * @dlen: Length of the read part of original UDP message body * @flow: IPv6 flow identifier */ -static void udp_send_tap_icmp6(const struct ctx *c, +static void udp_send_tap_icmp6(const struct ctx *c, unsigned int qpair, const struct sock_extended_err *ee, const struct flowside *toside, const struct in6_addr *saddr, @@ -493,7 +495,7 @@ static void udp_send_tap_icmp6(const struct ctx *c,
/* Try to obtain the MAC address of the generating node */ fwd_neigh_mac_get(c, (union inany_addr *) saddr, tap_omac); - tap_icmp6_send(c, QPAIR_DEFAULT, saddr, eaddr, &msg, tap_omac, msglen); + tap_icmp6_send(c, qpair, saddr, eaddr, &msg, tap_omac, msglen); }
/** @@ -546,7 +548,7 @@ static int udp_pktinfo(struct msghdr *msg, union inany_addr *dst) * #syscalls recvmsg */ static int udp_sock_recverr(const struct ctx *c, int s, flow_sidx_t sidx, - uint8_t pif, in_port_t port) + uint8_t pif, in_port_t port, unsigned int qpair) { char buf[PKTINFO_SPACE + RECVERR_SPACE]; const struct sock_extended_err *ee; @@ -653,12 +655,12 @@ static int udp_sock_recverr(const struct ctx *c, int s, flow_sidx_t sidx, if (hdr->cmsg_level == IPPROTO_IP && (o4 = inany_v4(&otap)) && inany_v4(&toside->eaddr)) { dlen = MIN(dlen, ICMP4_MAX_DLEN); - udp_send_tap_icmp4(c, ee, toside, *o4, data, dlen); + udp_send_tap_icmp4(c, qpair, ee, toside, *o4, data, dlen); return 1; }
if (hdr->cmsg_level == IPPROTO_IPV6 && !inany_v4(&toside->eaddr)) { - udp_send_tap_icmp6(c, ee, toside, &otap.a6, data, dlen, + udp_send_tap_icmp6(c, qpair, ee, toside, &otap.a6, data, dlen, FLOW_IDX(uflow)); return 1; } @@ -685,7 +687,7 @@ fail: * Return: number of errors handled, or < 0 if we have an unrecoverable error */ static int udp_sock_errs(const struct ctx *c, int s, flow_sidx_t sidx, - uint8_t pif, in_port_t port) + uint8_t pif, in_port_t port, unsigned int qpair) { unsigned n_err = 0; socklen_t errlen; @@ -694,7 +696,7 @@ static int udp_sock_errs(const struct ctx *c, int s, flow_sidx_t sidx, assert(!c->no_udp);
/* Empty the error queue */ - while ((rc = udp_sock_recverr(c, s, sidx, pif, port)) > 0) + while ((rc = udp_sock_recverr(c, s, sidx, pif, port, qpair)) > 0) n_err += rc;
if (rc < 0) @@ -853,7 +855,8 @@ static void udp_buf_sock_to_tap(const struct ctx *c, int s, int n, * @now: Current timestamp */ void udp_sock_fwd(const struct ctx *c, int s, int rule_hint, - uint8_t frompif, in_port_t port, const struct timespec *now) + uint8_t frompif, in_port_t port, const struct timespec *now, + unsigned int qpair) { union sockaddr_inany src; union inany_addr dst; @@ -869,7 +872,7 @@ void udp_sock_fwd(const struct ctx *c, int s, int rule_hint, strerror_(-rc)); /* Clear errors & carry on */ if (udp_sock_errs(c, s, FLOW_SIDX_NONE, - frompif, port) < 0) { + frompif, port, qpair) < 0) { err_ratelimit(now, "UDP: Unrecoverable error on listening socket: (%s port %hu)", pif_name(frompif), port); @@ -886,7 +889,7 @@ void udp_sock_fwd(const struct ctx *c, int s, int rule_hint, udp_sock_to_sock(c, s, 1, tosidx); } else if (topif == PIF_TAP) { if (c->mode == MODE_VU) - udp_vu_sock_to_tap(c, s, 1, tosidx); + udp_vu_sock_to_tap(c, s, 1, tosidx, qpair); else udp_buf_sock_to_tap(c, s, 1, tosidx); } else if (flow_sidx_valid(tosidx)) { @@ -919,11 +922,11 @@ void udp_sock_fwd(const struct ctx *c, int s, int rule_hint, */ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, - const struct timespec *now) + const struct timespec *now, unsigned int qpair) { if (events & (EPOLLERR | EPOLLIN)) { udp_sock_fwd(c, ref.fd, ref.listen.rule, - ref.listen.pif, ref.listen.port, now); + ref.listen.pif, ref.listen.port, now, qpair); } }
@@ -933,16 +936,17 @@ void udp_listen_sock_handler(const struct ctx *c, * @ref: epoll reference * @events: epoll events bitmap * @now: Current timestamp + * @qpair: Queue pair to process */ -void udp_sock_handler(const struct ctx *c, union epoll_ref ref, - uint32_t events, const struct timespec *now) +void udp_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, + const struct timespec *now, unsigned int qpair)
How would the caller know the right qpair before looking at the flow?
{ struct udp_flow *uflow = udp_at_sidx(ref.flowside);
assert(!c->no_udp && uflow);
if (events & EPOLLERR) { - if (udp_sock_errs(c, ref.fd, ref.flowside, PIF_NONE, 0) < 0) { + if (udp_sock_errs(c, ref.fd, ref.flowside, PIF_NONE, 0, qpair) < 0) { flow_err(uflow, "Unrecoverable error on flow socket"); goto fail; } @@ -969,7 +973,7 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref, } else if (topif == PIF_TAP) { if (c->mode == MODE_VU) { udp_vu_sock_to_tap(c, s, UDP_MAX_FRAMES, - tosidx); + tosidx, qpair); } else { udp_buf_sock_to_tap(c, s, n, tosidx); } @@ -991,6 +995,7 @@ fail: /** * udp_tap_handler() - Handle packets from tap * @c: Execution context + * @qpair: Queue pair to process * @pif: pif on which the packet is arriving * @af: Address family, AF_INET or AF_INET6 * @saddr: Source address @@ -1004,7 +1009,7 @@ fail: * * #syscalls sendmmsg */ -int udp_tap_handler(const struct ctx *c, uint8_t pif, +int udp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, uint8_t ttl, const struct pool *p, int idx, const struct timespec *now) @@ -1037,7 +1042,7 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, src = ntohs(uh->source); dst = ntohs(uh->dest);
- tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now); + tosidx = udp_flow_from_tap(c, qpair, pif, af, saddr, daddr, src, dst, now); if (!(uflow = udp_at_sidx(tosidx))) { char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
diff --git a/udp.h b/udp.h index 42d7a1c708cc..35b12ea2c9a6 100644 --- a/udp.h +++ b/udp.h @@ -12,10 +12,11 @@ #include "fwd.h"
void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref, - uint32_t events, const struct timespec *now); -void udp_sock_handler(const struct ctx *c, union epoll_ref ref, - uint32_t events, const struct timespec *now); -int udp_tap_handler(const struct ctx *c, uint8_t pif, + uint32_t events, const struct timespec *now, + unsigned int qpair); +void udp_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, + const struct timespec *now, unsigned int qpair); +int udp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, uint8_t ttl, const struct pool *p, int idx, const struct timespec *now); diff --git a/udp_flow.c b/udp_flow.c index 35417bc48a39..143f265493fa 100644 --- a/udp_flow.c +++ b/udp_flow.c @@ -127,6 +127,7 @@ static int udp_flow_sock(const struct ctx *c, /** * udp_flow_new() - Common setup for a new UDP flow * @c: Execution context + * @qpair: Queue pair for the flow * @flow: Initiated flow * @rule_hint: Index of forwarding rule, or -1 if unknown * @now: Timestamp @@ -136,8 +137,9 @@ static int udp_flow_sock(const struct ctx *c, * * #syscalls getsockname */ -static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow, - int rule_hint, const struct timespec *now) +static flow_sidx_t udp_flow_new(const struct ctx *c, unsigned int qpair, + union flow *flow, int rule_hint, + const struct timespec *now) { struct udp_flow *uflow = NULL; const struct flowside *tgt; @@ -152,6 +154,7 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow, uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0; uflow->activity[INISIDE] = 1; uflow->activity[TGTSIDE] = 0; + (void)qpair;
flow_foreach_sidei(sidei) { if (pif_is_socket(uflow->f.pif[sidei])) @@ -254,12 +257,13 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, return FLOW_SIDX_NONE; }
- return udp_flow_new(c, flow, rule_hint, now); + return udp_flow_new(c, QPAIR_DEFAULT, flow, rule_hint, now); }
/** * udp_flow_from_tap() - Find or create UDP flow for tap packets * @c: Execution context + * @qpair: Queue pair for the flow * @pif: pif on which the packet is arriving * @af: Address family, AF_INET or AF_INET6 * @saddr: Source address on guest side @@ -270,7 +274,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, * Return: sidx for the destination side of the flow for this packet, or * FLOW_SIDX_NONE if we couldn't find or create a flow. */ -flow_sidx_t udp_flow_from_tap(const struct ctx *c, +flow_sidx_t udp_flow_from_tap(const struct ctx *c, unsigned int qpair, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, in_port_t srcport, in_port_t dstport, @@ -310,7 +314,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, return FLOW_SIDX_NONE; }
- return udp_flow_new(c, flow, FWD_NO_HINT, now); + return udp_flow_new(c, qpair, flow, FWD_NO_HINT, now); }
/** @@ -322,12 +326,12 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, */ static void udp_flush_flow(const struct ctx *c, const struct udp_flow *uflow, unsigned sidei, - const struct timespec *now) + const struct timespec *now, unsigned int qpair) { /* We don't know exactly where the datagrams will come from, but we know * they'll have an interface and oport matching this flow */ udp_sock_fwd(c, uflow->s[sidei], -1, uflow->f.pif[sidei], - uflow->f.side[sidei].oport, now); + uflow->f.side[sidei].oport, now, qpair); }
/** @@ -339,14 +343,14 @@ static void udp_flush_flow(const struct ctx *c, * Return: true if the connection is ready to free, false otherwise */ bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow, - const struct timespec *now) + const struct timespec *now, unsigned int qpair) { if (uflow->flush0) { - udp_flush_flow(c, uflow, INISIDE, now); + udp_flush_flow(c, uflow, INISIDE, now, qpair); uflow->flush0 = false; } if (uflow->flush1) { - udp_flush_flow(c, uflow, TGTSIDE, now); + udp_flush_flow(c, uflow, TGTSIDE, now, qpair); uflow->flush1 = false; } return uflow->closed; diff --git a/udp_flow.h b/udp_flow.h index 62cc9b3aae1f..5a297c61646a 100644 --- a/udp_flow.h +++ b/udp_flow.h @@ -44,14 +44,14 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, const union inany_addr *dst, in_port_t port, const union sockaddr_inany *s_in, int rule_hint, const struct timespec *now); -flow_sidx_t udp_flow_from_tap(const struct ctx *c, +flow_sidx_t udp_flow_from_tap(const struct ctx *c, unsigned int qpair, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, in_port_t srcport, in_port_t dstport, const struct timespec *now); void udp_flow_close(const struct ctx *c, struct udp_flow *uflow); bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow, - const struct timespec *now); + const struct timespec *now, unsigned int qpair); bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, const struct timespec *now); void udp_flow_activity(struct udp_flow *uflow, unsigned int sidei, diff --git a/udp_internal.h b/udp_internal.h index 361cc7495a01..0cd6da49fc05 100644 --- a/udp_internal.h +++ b/udp_internal.h @@ -34,6 +34,7 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udphdr *uh, const struct flowside *toside, size_t dlen, bool no_udp_csum); void udp_sock_fwd(const struct ctx *c, int s, int rule_hint, - uint8_t frompif, in_port_t port, const struct timespec *now); + uint8_t frompif, in_port_t port, const struct timespec *now, + unsigned int qpair);
#endif /* UDP_INTERNAL_H */ diff --git a/udp_vu.c b/udp_vu.c index b1a8ad76a691..864e7a99b8d9 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -141,13 +141,14 @@ static void udp_vu_prepare(const struct ctx *c, struct iov_tail *data, * @n: Maximum number of datagrams to forward * @tosidx: Flow & side to forward data from @s to */ -void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) +void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx, + unsigned int qpair) { const struct flowside *toside = flowside_at_sidx(tosidx); bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)); static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE]; static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE]; - int rx_queue = QPAIR_TOGUEST_QUEUE(QPAIR_DEFAULT); + int rx_queue = QPAIR_TOGUEST_QUEUE(qpair); struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[rx_queue]; size_t hdrlen = udp_vu_hdrlen(v6); diff --git a/udp_vu.h b/udp_vu.h index 1e38af35ad4e..40ab28119b10 100644 --- a/udp_vu.h +++ b/udp_vu.h @@ -10,6 +10,7 @@
void udp_vu_listen_sock_data(const struct ctx *c, union epoll_ref ref, const struct timespec *now); -void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx); +void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx, + unsigned int qpair);
#endif /* UDP_VU_H */ -- 2.54.0
-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson