On Tue, Jun 16, 2026 at 02:51:24PM +0200, Laurent Vivier wrote:
Thread a qpair parameter from the entry points (tcp_sock_handler, tcp_timer_handler, tcp_tap_handler, tcp_defer_handler) through every intermediate function down to the vhost-user send functions, so callers explicitly select the target RX virtqueue instead of hardcoding QPAIR_DEFAULT.
Add a qpair parameter to tcp_send_flag(), tcp_data_from_sock(), tcp_rst_do() and its tcp_rst() macro, tcp_rewind_seq(), tcp_data_from_tap(), tcp_conn_from_sock_finish(), tcp_connect_finish(), tcp_tap_window_update(), tcp_conn_from_tap(), tcp_rst_no_conn(), tcp_keepalive(), and tcp_inactivity().
For the to-guest functions which take a connection parameter, this seems odd to me. Can't they deduce the right queue from the connection? It makes sense, obviously, for anything without a connection parameter. And it makes sense to me for from-guest functions, since the guest might have migrated the connection to a different queue since last we heard about it.
tcp_vu_send_flag() and tcp_vu_data_from_sock() now use the passed qpair to select the RX virtqueue instead of always using QPAIR_DEFAULT.
The buffer-based path (tcp_buf.c) does not thread qpair since it is only used in non-vhost-user mode.
No functional change.
Signed-off-by: Laurent Vivier
--- passt.c | 12 +-- tap.c | 8 +- tcp.c | 199 ++++++++++++++++++++++++++++--------------------- tcp.h | 15 ++-- tcp_buf.c | 10 +-- tcp_internal.h | 7 +- tcp_vu.c | 19 +++-- tcp_vu.h | 6 +- 8 files changed, 157 insertions(+), 119 deletions(-) diff --git a/passt.c b/passt.c index 71eb4f0192e2..9569f920ee28 100644 --- a/passt.c +++ b/passt.c @@ -98,11 +98,13 @@ struct passt_stats { * post_handler() - Run periodic and deferred tasks for L4 protocol handlers * @c: Execution context * @now: Current timestamp + * @qpair: Queue pair to process */ -static void post_handler(struct ctx *c, const struct timespec *now) +static void post_handler(struct ctx *c, const struct timespec *now, + unsigned int qpair) { if (!c->no_tcp) - tcp_defer_handler(c, now); + tcp_defer_handler(c, now, qpair);
flow_defer_handler(c, now); fwd_scan_ports_timer(c, now); @@ -251,7 +253,7 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events) pasta_netns_quit_timer_handler(c, ref); break; case EPOLL_TYPE_TCP: - tcp_sock_handler(c, ref, eventmask); + tcp_sock_handler(c, ref, eventmask, QPAIR_DEFAULT); break; case EPOLL_TYPE_TCP_SPLICE: tcp_splice_sock_handler(c, ref, eventmask, &now); @@ -260,7 +262,7 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events) tcp_listen_handler(c, ref, &now); break; case EPOLL_TYPE_TCP_TIMER: - tcp_timer_handler(c, ref); + tcp_timer_handler(c, ref, QPAIR_DEFAULT); break; case EPOLL_TYPE_UDP_LISTEN: udp_listen_sock_handler(c, ref, eventmask, &now); @@ -300,7 +302,7 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events) print_stats(c, &stats, &now); }
- post_handler(c, &now); + post_handler(c, &now, QPAIR_DEFAULT);
migrate_handler(c); } diff --git a/tap.c b/tap.c index 66dcb83665a7..ba2a573fa630 100644 --- a/tap.c +++ b/tap.c @@ -717,8 +717,6 @@ static int tap4_handler(struct ctx *c, unsigned int qpair, unsigned int i, j, seq_count; struct tap4_l4_t *seq;
- (void)qpair; - if (!c->ifi4 || !pool_tap4->count) return pool_tap4->count;
@@ -870,7 +868,7 @@ append: if (c->no_tcp) continue; for (k = 0; k < p->count; ) - k += tcp_tap_handler(c, PIF_TAP, AF_INET, + k += tcp_tap_handler(c, qpair, PIF_TAP, AF_INET, &seq->saddr, &seq->daddr, 0, p, k, now); } else if (seq->protocol == IPPROTO_UDP) { @@ -955,8 +953,6 @@ static int tap6_handler(struct ctx *c, unsigned int qpair, unsigned int i, j, seq_count = 0; struct tap6_l4_t *seq;
- (void)qpair; - if (!c->ifi6 || !pool_tap6->count) return pool_tap6->count;
@@ -1121,7 +1117,7 @@ append: if (c->no_tcp) continue; for (k = 0; k < p->count; ) - k += tcp_tap_handler(c, PIF_TAP, AF_INET6, + k += tcp_tap_handler(c, qpair, PIF_TAP, AF_INET6, &seq->saddr, &seq->daddr, seq->flow_lbl, p, k, now); } else if (seq->protocol == IPPROTO_UDP) { diff --git a/tcp.c b/tcp.c index c127b3132e5a..7f8e68a31994 100644 --- a/tcp.c +++ b/tcp.c @@ -1258,16 +1258,18 @@ static void tcp_update_seqack_from_tap(const struct ctx *c, * tcp_rewind_seq() - Rewind sequence to tap and socket offset to current ACK * @c: Execution context * @conn: Connection pointer + * @qpair: Queue pair to process * * Return: 0 on success, -1 on failure, with connection reset */ -static int tcp_rewind_seq(const struct ctx *c, struct tcp_tap_conn *conn) +static int tcp_rewind_seq(const struct ctx *c, struct tcp_tap_conn *conn, + unsigned int qpair) { conn->seq_to_tap = conn->seq_ack_from_tap; conn->events &= ~TAP_FIN_SENT;
if (tcp_set_peek_offset(conn, 0)) { - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); return -1; }
@@ -1371,16 +1373,17 @@ int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn, * @c: Execution context * @conn: Connection pointer * @flags: TCP flags: if not set, send segment only if ACK is due + * @qpair: Queue pair to process * * Return: negative error code on fatal connection failure, 0 otherwise */ static int tcp_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, - int flags) + int flags, unsigned int qpair) { int ret;
if (c->mode == MODE_VU) - ret = tcp_vu_send_flag(c, conn, flags); + ret = tcp_vu_send_flag(c, conn, flags, qpair); else ret = tcp_buf_send_flag(c, conn, flags);
@@ -1425,14 +1428,16 @@ static void tcp_sock_rst(const struct ctx *c, struct tcp_tap_conn *conn) * tcp_rst_do() - Reset a tap connection: send RST segment on both sides, close * @c: Execution context * @conn: Connection pointer + * @qpair: Queue pair to process */ -void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn) +void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn, + unsigned int qpair) { if (conn->events == CLOSED) return;
/* Send RST on tap */ - tcp_send_flag(c, conn, RST); + tcp_send_flag(c, conn, RST, qpair);
tcp_sock_rst(c, conn); } @@ -1459,11 +1464,13 @@ static void tcp_get_tap_ws(struct tcp_tap_conn *conn, * @c: Execution context * @conn: Connection pointer * @wnd: Window value, host order, unscaled + * @qpair: Queue pair to process * * Return: false on zero window (not stored to wnd_from_tap), true otherwise */ static bool tcp_tap_window_update(const struct ctx *c, - struct tcp_tap_conn *conn, unsigned wnd) + struct tcp_tap_conn *conn, unsigned wnd, + unsigned int qpair)
In particular threading the qpair through this function, that's chiefly not about sending _or_ receiving seems odd..
{ wnd = MIN(MAX_WINDOW, wnd << conn->ws_from_tap);
@@ -1474,7 +1481,7 @@ static bool tcp_tap_window_update(const struct ctx *c, * that no data beyond the updated window will be acknowledged. */ if (!wnd && SEQ_LT(conn->seq_ack_from_tap, conn->seq_to_tap)) { - tcp_rewind_seq(c, conn); + tcp_rewind_seq(c, conn, qpair);
.. just to pass here, which only cares about it for an error case.
return false; }
@@ -1646,6 +1653,7 @@ static void tcp_bind_outbound(const struct ctx *c, /** * tcp_conn_from_tap() - Handle connection request (SYN segment) from tap * @c: Execution context + * @qpair: Queue pair for the flow * @af: Address family, AF_INET or AF_INET6 * @saddr: Source address, pointer to in_addr or in6_addr * @daddr: Destination address, pointer to in_addr or in6_addr @@ -1656,10 +1664,11 @@ static void tcp_bind_outbound(const struct ctx *c, * * #syscalls:vu getsockname */ -static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af, - const void *saddr, const void *daddr, - const struct tcphdr *th, const char *opts, - size_t optlen, const struct timespec *now) +static void tcp_conn_from_tap(const struct ctx *c, unsigned int qpair, + sa_family_t af, const void *saddr, + const void *daddr, const struct tcphdr *th, + const char *opts, size_t optlen, + const struct timespec *now) { in_port_t srcport = ntohs(th->source); in_port_t dstport = ntohs(th->dest); @@ -1760,7 +1769,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
if (connect(s, &sa.sa, socklen_inany(&sa))) { if (errno != EINPROGRESS) { - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); goto cancel; }
@@ -1768,7 +1777,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af, } else { tcp_get_sndbuf(conn);
- if (tcp_send_flag(c, conn, SYN | ACK)) + if (tcp_send_flag(c, conn, SYN | ACK, qpair)) goto cancel;
conn_event(c, conn, TAP_SYN_ACK_SENT); @@ -1830,15 +1839,17 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq) * tcp_data_from_sock() - Handle new data from socket, queue to tap, in window * @c: Execution context * @conn: Connection pointer + * @qpair: Queue pair to process * * Return: negative on connection reset, 0 otherwise * * #syscalls recvmsg */ -static int tcp_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) +static int tcp_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn, + unsigned int qpair) { if (c->mode == MODE_VU) - return tcp_vu_data_from_sock(c, conn); + return tcp_vu_data_from_sock(c, conn, qpair);
return tcp_buf_data_from_sock(c, conn); } @@ -1866,13 +1877,15 @@ static ssize_t tcp_packet_data_len(const struct tcphdr *th, size_t l4len) * @conn: Connection pointer * @p: Pool of TCP packets, with TCP headers * @idx: Index of first data packet in pool + * @qpair: Queue pair to process * * #syscalls sendmsg * * Return: count of consumed packets */ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn, - const struct pool *p, int idx) + const struct pool *p, int idx, + unsigned int qpair) { int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1, partial_send = 0; uint16_t max_ack_seq_wnd = conn->wnd_from_tap; @@ -1922,7 +1935,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn, "keep-alive sequence: %u, previous: %u", seq, conn->seq_from_tap);
- if (tcp_send_flag(c, conn, ACK)) + if (tcp_send_flag(c, conn, ACK, qpair)) return -1;
tcp_timer_ctl(c, conn); @@ -1933,7 +1946,8 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
if (p->count == 1) { tcp_tap_window_update(c, conn, - ntohs(th->window)); + ntohs(th->window), + qpair); return 1; }
@@ -1959,7 +1973,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn, * well. */ if (!ntohs(th->window)) - tcp_rewind_seq(c, conn); + tcp_rewind_seq(c, conn, qpair);
max_ack_seq_wnd = ntohs(th->window); max_ack_seq = ack_seq; @@ -2024,17 +2038,17 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn, if (ack && !tcp_sock_consume(conn, max_ack_seq)) tcp_update_seqack_from_tap(c, conn, max_ack_seq);
- tcp_tap_window_update(c, conn, max_ack_seq_wnd); + tcp_tap_window_update(c, conn, max_ack_seq_wnd, qpair);
if (retr) { flow_trace(conn, "fast re-transmit, ACK: %u, previous sequence: %u", conn->seq_ack_from_tap, conn->seq_to_tap);
- if (tcp_rewind_seq(c, conn)) + if (tcp_rewind_seq(c, conn, qpair)) return -1;
- tcp_data_from_sock(c, conn); + tcp_data_from_sock(c, conn, qpair); }
if (!iov_i) @@ -2050,7 +2064,7 @@ eintr: * Then swiftly looked away and left. */ conn->seq_from_tap = seq_from_tap; - if (tcp_send_flag(c, conn, ACK)) + if (tcp_send_flag(c, conn, ACK, qpair)) return -1; }
@@ -2058,7 +2072,7 @@ eintr: goto eintr;
if (errno == EAGAIN || errno == EWOULDBLOCK) { - if (tcp_send_flag(c, conn, ACK | DUP_ACK)) + if (tcp_send_flag(c, conn, ACK | DUP_ACK, qpair)) return -1;
uint32_t events = tcp_conn_epoll_events(conn->events, @@ -2094,7 +2108,7 @@ out: */ if (conn->seq_dup_ack_approx != (conn->seq_from_tap & 0xff)) { conn->seq_dup_ack_approx = conn->seq_from_tap & 0xff; - if (tcp_send_flag(c, conn, ACK | DUP_ACK)) + if (tcp_send_flag(c, conn, ACK | DUP_ACK, qpair)) return -1; } return p->count - idx; @@ -2109,7 +2123,7 @@ out:
conn_event(c, conn, TAP_FIN_RCVD); } else { - if (tcp_send_flag(c, conn, ACK_IF_NEEDED)) + if (tcp_send_flag(c, conn, ACK_IF_NEEDED, qpair)) return -1; }
@@ -2123,13 +2137,15 @@ out: * @th: TCP header of SYN, ACK segment: caller MUST ensure it's there * @opts: Pointer to start of options * @optlen: Bytes in options: caller MUST ensure available length + * @qpair: Queue pair to process */ static void tcp_conn_from_sock_finish(const struct ctx *c, struct tcp_tap_conn *conn, const struct tcphdr *th, - const char *opts, size_t optlen) + const char *opts, size_t optlen, + unsigned int qpair) { - tcp_tap_window_update(c, conn, ntohs(th->window)); + tcp_tap_window_update(c, conn, ntohs(th->window), qpair); tcp_get_tap_ws(conn, opts, optlen);
/* First value is not scaled */ @@ -2144,24 +2160,25 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
conn_event(c, conn, ESTABLISHED); if (tcp_set_peek_offset(conn, 0)) { - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); return; }
- if (tcp_send_flag(c, conn, ACK)) { - tcp_rst(c, conn); + if (tcp_send_flag(c, conn, ACK, qpair)) { + tcp_rst(c, conn, qpair); return; }
/* The client might have sent data already, which we didn't * dequeue waiting for SYN,ACK from tap -- check now. */ - tcp_data_from_sock(c, conn); + tcp_data_from_sock(c, conn, qpair); }
/** * tcp_rst_no_conn() - Send RST in response to a packet with no connection * @c: Execution context + * @qpair: Queue pair on which to send the reply * @af: Address family, AF_INET or AF_INET6 * @saddr: Source address of the packet we're responding to * @daddr: Destination address of the packet we're responding to @@ -2169,7 +2186,7 @@ static void tcp_conn_from_sock_finish(const struct ctx *c, * @th: TCP header of the packet we're responding to * @l4len: Packet length, including TCP header */ -static void tcp_rst_no_conn(const struct ctx *c, int af, +static void tcp_rst_no_conn(const struct ctx *c, unsigned int qpair, int af, const void *saddr, const void *daddr, uint32_t flow_lbl, const struct tcphdr *th, size_t l4len) @@ -2227,12 +2244,13 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
tcp_update_csum(psum, rsth, &payload, 0); rst_l2len = ((char *)rsth - buf) + sizeof(*rsth); - tap_send_single(c, QPAIR_DEFAULT, buf, rst_l2len); + tap_send_single(c, qpair, buf, rst_l2len); }
/** * tcp_tap_handler() - Handle packets from tap and state transitions * @c: Execution context + * @qpair: Queue pair on which to send packets * @pif: pif on which the packet is arriving * @af: Address family, AF_INET or AF_INET6 * @saddr: Source address @@ -2244,9 +2262,10 @@ static void tcp_rst_no_conn(const struct ctx *c, int af, * * Return: count of consumed packets */ -int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, - const void *saddr, const void *daddr, uint32_t flow_lbl, - const struct pool *p, int idx, const struct timespec *now) +int tcp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif, + sa_family_t af, const void *saddr, const void *daddr, + uint32_t flow_lbl, const struct pool *p, int idx, + const struct timespec *now) { struct tcp_tap_conn *conn; struct tcphdr th_storage; @@ -2283,10 +2302,11 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, /* New connection from tap */ if (!flow) { if (opts && th->syn && !th->ack) - tcp_conn_from_tap(c, af, saddr, daddr, th, + tcp_conn_from_tap(c, qpair, af, saddr, daddr, th, opts, optlen, now); else - tcp_rst_no_conn(c, af, saddr, daddr, flow_lbl, th, l4len); + tcp_rst_no_conn(c, qpair, af, saddr, daddr, flow_lbl, th, + l4len); return 1; }
@@ -2310,7 +2330,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, /* Establishing connection from socket */ if (conn->events & SOCK_ACCEPTED) { if (th->syn && th->ack && !th->fin) { - tcp_conn_from_sock_finish(c, conn, th, opts, optlen); + tcp_conn_from_sock_finish(c, conn, th, opts, optlen, qpair); return 1; }
@@ -2337,7 +2357,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, goto reset; }
- if (tcp_send_flag(c, conn, ACK)) + if (tcp_send_flag(c, conn, ACK, qpair)) goto reset;
conn_event(c, conn, SOCK_FIN_SENT); @@ -2348,8 +2368,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, if (!th->ack) goto reset;
- if (tcp_tap_window_update(c, conn, ntohs(th->window))) - tcp_data_from_sock(c, conn); + if (tcp_tap_window_update(c, conn, ntohs(th->window), qpair)) + tcp_data_from_sock(c, conn, qpair);
if (p->count - idx == 1) return 1; @@ -2380,12 +2400,12 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, "fast re-transmit, ACK: %u, previous sequence: %u", ntohl(th->ack_seq), conn->seq_to_tap);
- if (tcp_rewind_seq(c, conn)) + if (tcp_rewind_seq(c, conn, qpair)) return -1; }
- if (tcp_tap_window_update(c, conn, ntohs(th->window)) || retr) - tcp_data_from_sock(c, conn); + if (tcp_tap_window_update(c, conn, ntohs(th->window), qpair) || retr) + tcp_data_from_sock(c, conn, qpair);
if (conn->seq_ack_from_tap == conn->seq_to_tap) { if (th->ack && conn->events & TAP_FIN_SENT) @@ -2400,7 +2420,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, }
/* Established connections accepting data from tap */ - count = tcp_data_from_tap(c, conn, p, idx); + count = tcp_data_from_tap(c, conn, p, idx, qpair); if (count == -1) goto reset;
@@ -2419,7 +2439,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, }
conn_event(c, conn, SOCK_FIN_SENT); - if (tcp_send_flag(c, conn, ACK)) + if (tcp_send_flag(c, conn, ACK, qpair)) goto reset;
ack_due = 0; @@ -2449,7 +2469,7 @@ reset: * remaining packets in the batch, since they'd be invalidated when our * RST is received, even if otherwise good. */ - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); return p->count - idx; }
@@ -2457,20 +2477,22 @@ reset: * tcp_connect_finish() - Handle completion of connect() from EPOLLOUT event * @c: Execution context * @conn: Connection pointer + * @qpair: Queue pair to process */ -static void tcp_connect_finish(const struct ctx *c, struct tcp_tap_conn *conn) +static void tcp_connect_finish(const struct ctx *c, struct tcp_tap_conn *conn, + unsigned int qpair) { socklen_t sl; int so;
sl = sizeof(so); if (getsockopt(conn->sock, SOL_SOCKET, SO_ERROR, &so, &sl) || so) { - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); return; }
- if (tcp_send_flag(c, conn, SYN | ACK)) { - tcp_rst(c, conn); + if (tcp_send_flag(c, conn, SYN | ACK, qpair)) { + tcp_rst(c, conn, qpair); return; }
@@ -2513,7 +2535,7 @@ static void tcp_tap_conn_from_sock(const struct ctx *c, union flow *flow,
conn->wnd_from_tap = WINDOW_DEFAULT;
- if (tcp_send_flag(c, conn, SYN)) { + if (tcp_send_flag(c, conn, SYN, QPAIR_DEFAULT)) { conn_flag(c, conn, CLOSING); FLOW_ACTIVATE(conn); return; @@ -2604,12 +2626,14 @@ cancel: * tcp_timer_handler() - timerfd events: close, send ACK, retransmit, or reset * @c: Execution context * @ref: epoll reference of timer (not connection) + * @qpair: Queue pair to process * * #syscalls timerfd_gettime|timerfd_gettime64 * #syscalls arm:timerfd_gettime64 i686:timerfd_gettime64 * #syscalls arm:timerfd_settime64 i686:timerfd_settime64 */ -void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) +void tcp_timer_handler(const struct ctx *c, union epoll_ref ref, + unsigned int qpair)
This one seems even stranger to me. This is triggered off a timer, not a tap side event, how would we even know the relevant qpair except by looking at the connection.
{ struct itimerspec check_armed = { { 0 }, { 0 } }; struct tcp_tap_conn *conn = &FLOW(ref.flow)->tcp; @@ -2628,8 +2652,8 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) return;
if (conn->flags & ACK_TO_TAP_DUE) { - if (tcp_send_flag(c, conn, ACK_IF_NEEDED)) { - tcp_rst(c, conn); + if (tcp_send_flag(c, conn, ACK_IF_NEEDED, qpair)) { + tcp_rst(c, conn, qpair); return; } tcp_timer_ctl(c, conn); @@ -2641,11 +2665,11 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) max = MIN(TCP_MAX_RETRIES, max); if (conn->retries >= max) { flow_dbg(conn, "handshake timeout"); - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); } else { flow_trace(conn, "SYN timeout, retry"); - if (tcp_send_flag(c, conn, SYN)) { - tcp_rst(c, conn); + if (tcp_send_flag(c, conn, SYN, qpair)) { + tcp_rst(c, conn, qpair); return; } conn->retries++; @@ -2654,7 +2678,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) } } else if (conn->retries == TCP_MAX_RETRIES) { flow_dbg(conn, "retransmissions count exceeded"); - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); } else { flow_dbg(conn, "ACK timeout, retry");
@@ -2662,10 +2686,10 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) conn->wnd_from_tap = 1; /* Zero-window probe */
conn->retries++; - if (tcp_rewind_seq(c, conn)) + if (tcp_rewind_seq(c, conn, qpair)) return;
- tcp_data_from_sock(c, conn); + tcp_data_from_sock(c, conn, qpair); tcp_timer_ctl(c, conn); } } @@ -2676,9 +2700,10 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) * @c: Execution context * @ref: epoll reference * @events: epoll events bitmap + * @qpair: Queue pair to process */ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref, - uint32_t events) + uint32_t events, unsigned int qpair)
Likewise here, how could the caller know a reasonable qpair based just on the socket side event?
{ struct tcp_tap_conn *conn = conn_at_sidx(ref.flowside);
@@ -2689,7 +2714,7 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref, return;
if (events & EPOLLERR) { - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); return; }
@@ -2708,13 +2733,13 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref, conn_event(c, conn, SOCK_FIN_RCVD);
if (events & EPOLLIN) - tcp_data_from_sock(c, conn); + tcp_data_from_sock(c, conn, qpair);
if (events & EPOLLOUT) { tcp_epoll_ctl(conn); if (tcp_update_seqack_wnd(c, conn, false, NULL) && - tcp_send_flag(c, conn, ACK)) { - tcp_rst(c, conn); + tcp_send_flag(c, conn, ACK, qpair)) { + tcp_rst(c, conn, qpair); return; } } @@ -2724,7 +2749,7 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
/* EPOLLHUP during handshake: reset */ if (events & EPOLLHUP) { - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); return; }
@@ -2734,7 +2759,7 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
if (conn->events == TAP_SYN_RCVD) { if (events & EPOLLOUT) - tcp_connect_finish(c, conn); + tcp_connect_finish(c, conn, qpair); /* Data? Check later */ } } @@ -2939,9 +2964,11 @@ int tcp_init(struct ctx *c)
/** * tcp_keepalive() - Send keepalives for connections which need it - * @: Execution context + * @c: Execution context + * @qpair: Queue pair to process */ -static void tcp_keepalive(struct ctx *c, const struct timespec *now) +static void tcp_keepalive(struct ctx *c, const struct timespec *now, + unsigned int qpair)
Here we're scanning the flow table, and each connection there could be associated with a different queue, so again, this parameter doesn't make sense to me.
{ union flow *flow;
@@ -2956,8 +2983,8 @@ static void tcp_keepalive(struct ctx *c, const struct timespec *now) if (conn->tap_inactive) { flow_dbg(conn, "No tap activity for least %us, send keepalive", KEEPALIVE_INTERVAL); - if (tcp_send_flag(c, conn, KEEPALIVE)) - tcp_rst(c, conn); + if (tcp_send_flag(c, conn, KEEPALIVE, qpair)) + tcp_rst(c, conn, qpair); }
/* Ready to check fot next interval */ @@ -2967,9 +2994,11 @@ static void tcp_keepalive(struct ctx *c, const struct timespec *now)
/** * tcp_inactivity() - Scan for and close long-inactive connections - * @: Execution context + * @c: Execution context + * @qpair: Queue pair to process */ -static void tcp_inactivity(struct ctx *c, const struct timespec *now) +static void tcp_inactivity(struct ctx *c, const struct timespec *now, + unsigned int qpair)
Same here.
{ union flow *flow;
@@ -2986,7 +3015,7 @@ static void tcp_inactivity(struct ctx *c, const struct timespec *now) /* No activity in this interval, reset */ flow_dbg(conn, "Inactive for at least %us, resetting", INACTIVITY_INTERVAL); - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); }
/* Ready to check fot next interval */ @@ -2998,9 +3027,11 @@ static void tcp_inactivity(struct ctx *c, const struct timespec *now) * tcp_defer_handler() - Handler for TCP deferred tasks * @c: Execution context * @now: Current timestamp + * @qpair: Queue pair to process */ /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */ -void tcp_defer_handler(struct ctx *c, const struct timespec *now) +void tcp_defer_handler(struct ctx *c, const struct timespec *now, + unsigned int qpair) { tcp_payload_flush(c);
@@ -3013,8 +3044,8 @@ void tcp_defer_handler(struct ctx *c, const struct timespec *now) if (c->mode == MODE_PASTA) tcp_splice_refill(c);
- tcp_keepalive(c, now); - tcp_inactivity(c, now); + tcp_keepalive(c, now, qpair); + tcp_inactivity(c, now, qpair); }
/** @@ -3988,10 +4019,10 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd if (tcp_set_peek_offset(conn, peek_offset)) goto fail;
- if (tcp_send_flag(c, conn, ACK)) + if (tcp_send_flag(c, conn, ACK, QPAIR_DEFAULT)) goto fail;
- tcp_data_from_sock(c, conn); + tcp_data_from_sock(c, conn, QPAIR_DEFAULT);
if ((rc = tcp_epoll_ctl(conn))) { flow_dbg(conn, @@ -4009,7 +4040,7 @@ fail: }
conn->flags = 0; /* Not waiting for ACK, don't schedule timer */ - tcp_rst(c, conn); + tcp_rst(c, conn, QPAIR_DEFAULT);
return 0; } diff --git a/tcp.h b/tcp.h index 3262a807e5d4..490f1b140e44 100644 --- a/tcp.h +++ b/tcp.h @@ -18,18 +18,21 @@
struct ctx;
-void tcp_timer_handler(const struct ctx *c, union epoll_ref ref); +void tcp_timer_handler(const struct ctx *c, union epoll_ref ref, + unsigned int qpair); void tcp_listen_handler(const struct ctx *c, union epoll_ref ref, const struct timespec *now); void tcp_sock_handler(const struct ctx *c, union epoll_ref ref, - uint32_t events); -int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, - const void *saddr, const void *daddr, uint32_t flow_lbl, - const struct pool *p, int idx, const struct timespec *now); + uint32_t events, unsigned int qpair); +int tcp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif, + sa_family_t af, const void *saddr, const void *daddr, + uint32_t flow_lbl, const struct pool *p, int idx, + const struct timespec *now); int tcp_listen(const struct ctx *c, uint8_t pif, unsigned rule, const union inany_addr *addr, const char *ifname, in_port_t port); int tcp_init(struct ctx *c); -void tcp_defer_handler(struct ctx *c, const struct timespec *now); +void tcp_defer_handler(struct ctx *c, const struct timespec *now, + unsigned int qpair);
void tcp_update_l2_buf(const unsigned char *eth_d);
diff --git a/tcp_buf.c b/tcp_buf.c index ca356089dc0b..ae8bebca5107 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -124,7 +124,7 @@ static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns, conn->seq_to_tap = seq; peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap; if (tcp_set_peek_offset(conn, peek_offset)) - tcp_rst(c, conn); + tcp_rst(c, conn, QPAIR_DEFAULT); } }
@@ -334,7 +334,7 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) conn->seq_to_tap = conn->seq_ack_from_tap; already_sent = 0; if (tcp_set_peek_offset(conn, 0)) { - tcp_rst(c, conn); + tcp_rst(c, conn, QPAIR_DEFAULT); return -1; } } @@ -356,7 +356,7 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) }
if (tcp_prepare_iov(&mh_sock, iov_sock, already_sent, fill_bufs)) { - tcp_rst(c, conn); + tcp_rst(c, conn, QPAIR_DEFAULT); return -1; }
@@ -381,7 +381,7 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
if (len < 0) { if (errno != EAGAIN && errno != EWOULDBLOCK) { - tcp_rst(c, conn); + tcp_rst(c, conn, QPAIR_DEFAULT); return -errno; }
@@ -410,7 +410,7 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
ret = tcp_buf_send_flag(c, conn, FIN | ACK); if (ret) { - tcp_rst(c, conn); + tcp_rst(c, conn, QPAIR_DEFAULT); return ret; }
diff --git a/tcp_internal.h b/tcp_internal.h index 40472c9973c8..22f8825adccc 100644 --- a/tcp_internal.h +++ b/tcp_internal.h @@ -174,11 +174,12 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn, conn_event_do(c, conn, event); \ } while (0)
-void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn); -#define tcp_rst(c, conn) \ +void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn, + unsigned int qpair); +#define tcp_rst(c, conn, qpair) \ do { \ flow_dbg((conn), "TCP reset at %s:%i", __func__, __LINE__); \ - tcp_rst_do(c, conn); \ + tcp_rst_do(c, conn, qpair); \ } while (0)
struct tcp_info_linux; diff --git a/tcp_vu.c b/tcp_vu.c index 9ef6b5242c9c..4f76f599156f 100644 --- a/tcp_vu.c +++ b/tcp_vu.c @@ -116,15 +116,17 @@ static int tcp_vu_send_dup(const struct ctx *c, struct vu_virtq *vq, * @c: Execution context * @conn: Connection pointer * @flags: TCP flags: if not set, send segment only if ACK is due + * @qpair: Queue pair to process * * Return: -ECONNRESET on fatal connection error, * -EAGAIN if vhost-user buffers are unavailable, * 0 otherwise */ -int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) +int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags, + unsigned int qpair) { struct vu_dev *vdev = c->vdev; - int rx_queue = QPAIR_TOGUEST_QUEUE(QPAIR_DEFAULT); + int rx_queue = QPAIR_TOGUEST_QUEUE(qpair); struct vu_virtq *vq = &vdev->vq[rx_queue]; size_t optlen, hdrlen, iov_cnt, iov_used; struct vu_virtq_element flags_elem[2]; @@ -424,13 +426,14 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn, * in window * @c: Execution context * @conn: Connection pointer + * @qpair: Queue pair to process * * Return: negative on connection reset, 0 otherwise */ -int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) +int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn, + unsigned int qpair) { uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap; - unsigned int qpair = QPAIR_DEFAULT; int rx_queue = QPAIR_TOGUEST_QUEUE(qpair); struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[rx_queue]; @@ -454,7 +457,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) conn->seq_to_tap = conn->seq_ack_from_tap; already_sent = 0; if (tcp_set_peek_offset(conn, 0)) { - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); return -1; } } @@ -477,7 +480,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) &elem_cnt, &frame_cnt); if (len < 0) { if (len != -EAGAIN && len != -EWOULDBLOCK) { - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); return len; }
@@ -498,9 +501,9 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) /* See tcp_buf_data_from_sock() */ conn->seq_ack_to_tap = conn->seq_from_tap;
- ret = tcp_vu_send_flag(c, conn, FIN | ACK); + ret = tcp_vu_send_flag(c, conn, FIN | ACK, qpair); if (ret) { - tcp_rst(c, conn); + tcp_rst(c, conn, qpair); return ret; }
diff --git a/tcp_vu.h b/tcp_vu.h index 6ab6057f352a..ae48420f4906 100644 --- a/tcp_vu.h +++ b/tcp_vu.h @@ -6,7 +6,9 @@ #ifndef TCP_VU_H #define TCP_VU_H
-int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags); -int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn); +int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags, + unsigned int qpair); +int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn, + unsigned int qpair);
#endif /*TCP_VU_H */ -- 2.54.0
-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson