I couldn't find out why this patch breaks the the pasta_podman/bats test, yet, that is: not ok 19 [505] Single TCP port forwarding, IPv4, tap # (from function `bail-now' in file test/podman/test/system/helpers.bash, line 235, # from function `assert' in file test/podman/test/system/helpers.bash, line 929, # from function `pasta_test_do' in file test/podman/test/system/505-networking-pasta.bats, line 239, # in test file test/podman/test/system/505-networking-pasta.bats, line 472) # `pasta_test_do' failed # # [22:54:18.306131353] $ test/podman/bin/podman rm -t 0 --all --force --ignore # # [22:54:18.367462243] $ test/podman/bin/podman ps --all --external --format {{.ID}} {{.Names}} # # [22:54:18.394935392] $ test/podman/bin/podman images --all --format {{.Repository}}:{{.Tag}} {{.ID}} # [22:54:18.419773379] quay.io/libpod/testimage:20240123 1f6acd4c4a1d # # [22:54:19.246631856] $ test/podman/bin/podman info --format {{.Host.Pasta.Executable}} # [22:54:20.084392405] /home/sbrivio/passt/pasta # # [22:54:20.167980222] $ test/podman/bin/podman run --net=pasta -p [88.198.0.164]:5727:5727/tcp quay.io/libpod/testimage:20240123 sh -c for port in $(seq 5727 5727); do socat -u TCP4-LISTEN:${port},bind=[88.198.0.164] STDOUT & done; wait # [22:54:37.256040883] x2024/06/12 20:54:37 socat[3] E read(6, 0x7fe675cd6000, 8192): Connection reset by peer # #/vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv # #| FAIL: Mismatch between data sent and received # #| expected: = x # #| actual: x2024/06/12 20:54:37 socat\[3\] E read\(6\, 0x7fe675cd6000\, 8192\): Connection reset by peer # #\^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ meaning that the data transfer is actually fine, but we reset the connection instead of an orderly shutdown. I found a few issues while looking for that: On Wed, 12 Jun 2024 17:47:27 +0200 Laurent Vivier <lvivier(a)redhat.com> wrote:This commit isolates the internal data structure management used for storing data (e.g., tcp4_l2_flags_iov[], tcp6_l2_flags_iov[], tcp4_flags_ip[], tcp4_flags[], ...) from the tcp_send_flag() function. The extracted functionality is relocated to a new function named tcp_fill_flag_header(). tcp_fill_flag_header() is now a generic function that accepts parameters such as struct tcphdr and a data pointer. tcp_send_flag() utilizes this parameter to pass memory pointers from tcp4_l2_flags_iov[] and tcp6_l2_flags_iov[]. This separation sets the stage for utilizing tcp_fill_flag_header() to set the memory provided by the guest via vhost-user in future developments. Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- Notes: v6: - rename tcp_fill_flag_header() to tcp_prepare_flags() - set optlen to 0 in tcp_prepare_flags() v5: - use tcp_fill_flag_header() rather than tcp_fill_headers4() and tcp_fill_headers6(). tcp.c | 72 +++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/tcp.c b/tcp.c index dd8d46e08628..6800209d4122 100644 --- a/tcp.c +++ b/tcp.c @@ -1567,24 +1567,25 @@ static void tcp_update_seqack_from_tap(const struct ctx *c, } /** - * tcp_send_flag() - Send segment with flags to tap (no payload) + * tcp_prepare_flags() - Prepare header for flags-only segment (no payload) * @c: Execution context * @conn: Connection pointer * @flags: TCP flags: if not set, send segment only if ACK is due + * @th: TCP header to update + * @data: buffer to store TCP option + * @optlen: size of the TCP option buffer (output parameter) * - * Return: negative error code on connection reset, 0 otherwise + * Return: < 0 error code on connection reset, + * 0 if there is no flag to send + * 1 otherwiseThis is often called with if (tcp_send_flag(...)) or if (!tcp_send_flag(...)). Those need to be replaced with if (tcp_send_flag(...) < 0) or if (tcp_send_flag(...) >= 0) in the callers.*/ -static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) +static int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, + int flags, struct tcphdr *th, char *data, + size_t *optlen) { - struct tcp_flags_t *payload; struct tcp_info tinfo = { 0 }; socklen_t sl = sizeof(tinfo); int s = conn->sock; - size_t optlen = 0; - struct tcphdr *th; - struct iovec *iov; - size_t l4len; - char *data; if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap) && !flags && conn->wnd_to_tap) @@ -1606,20 +1607,11 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags) return 0; - if (CONN_V4(conn)) - iov = tcp4_l2_flags_iov[tcp4_flags_used++]; - else - iov = tcp6_l2_flags_iov[tcp6_flags_used++]; - - payload = iov[TCP_IOV_PAYLOAD].iov_base; - th = &payload->th; - data = payload->opts; - if (flags & SYN) { int mss; /* Options: MSS, NOP and window scale (8 bytes) */ - optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN; + *optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN; *data++ = OPT_MSS; *data++ = OPT_MSS_LEN; @@ -1651,19 +1643,16 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) *data++ = conn->ws_to_tap; } else if (!(flags & RST)) { flags |= ACK; + *optlen = 0;*optlen also needs to be set to 0 if (flags & RST), say: } else { *optlen = 0; if (!(flags & RST)) flags |= ACK; }} - th->doff = (sizeof(*th) + optlen) / 4; + th->doff = (sizeof(*th) + *optlen) / 4; th->ack = !!(flags & ACK); th->rst = !!(flags & RST); th->syn = !!(flags & SYN); th->fin = !!(flags & FIN); - l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL, - conn->seq_to_tap); - iov[TCP_IOV_PAYLOAD].iov_len = l4len; - if (th->ack) { if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap)) conn_flag(c, conn, ~ACK_TO_TAP_DUE); @@ -1678,6 +1667,41 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) if (th->fin || th->syn) conn->seq_to_tap++; + return 1; +} + +/** + * tcp_send_flag() - Send segment with flags to tap (no payload) + * @c: Execution context + * @conn: Connection pointer + * @flags: TCP flags: if not set, send segment only if ACK is due + * + * Return: negative error code on connection reset, 0 otherwise + */ +static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) +{ + struct tcp_flags_t *payload; + struct iovec *iov; + size_t optlen; + size_t l4len; + int ret; + + if (CONN_V4(conn)) + iov = tcp4_l2_flags_iov[tcp4_flags_used++]; + else + iov = tcp6_l2_flags_iov[tcp6_flags_used++];We increase the counters here, but we don't decrease them back if we hit if (ret <= 0) return ret; later.+ + payload = iov[TCP_IOV_PAYLOAD].iov_base; + + ret = tcp_prepare_flags(c, conn, flags, &payload->th, + payload->opts, &optlen); + if (ret <= 0) + return ret; + + l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL, + conn->seq_to_tap); + iov[TCP_IOV_PAYLOAD].iov_len = l4len; + if (flags & DUP_ACK) { struct iovec *dup_iov; int i;Here's the diff I have so far, I'm not necessarily recommending any of that, it was just a quick try: diff --git a/tcp.c b/tcp.c index 6800209..6bff6bc 100644 --- a/tcp.c +++ b/tcp.c @@ -1644,6 +1644,8 @@ static int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, } else if (!(flags & RST)) { flags |= ACK; *optlen = 0; + } else { + *optlen = 0; } th->doff = (sizeof(*th) + *optlen) / 4; @@ -1695,8 +1697,14 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) ret = tcp_prepare_flags(c, conn, flags, &payload->th, payload->opts, &optlen); - if (ret <= 0) + if (ret <= 0) { + if (CONN_V4(conn)) + tcp4_flags_used--; + else + tcp6_flags_used--; + return ret; + } l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL, conn->seq_to_tap); @@ -1738,7 +1746,7 @@ static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn) if (conn->events == CLOSED) return; - if (!tcp_send_flag(c, conn, RST)) + if (tcp_send_flag(c, conn, RST) >= 0) conn_event(c, conn, CLOSED); } @@ -2111,7 +2119,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, } else { tcp_get_sndbuf(conn); - if (tcp_send_flag(c, conn, SYN | ACK)) + if (tcp_send_flag(c, conn, SYN | ACK) < 0) goto cancel; conn_event(c, conn, TAP_SYN_ACK_SENT); @@ -2282,9 +2290,11 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn) if (!len) { if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) { - if ((ret = tcp_send_flag(c, conn, FIN | ACK))) { + int rc = tcp_send_flag(c, conn, FIN | ACK); + + if (rc < 0) { tcp_rst(c, conn); - return ret; + return rc; } conn_event(c, conn, TAP_FIN_SENT); @@ -2716,7 +2726,7 @@ static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn) return; } - if (tcp_send_flag(c, conn, SYN | ACK)) + if (tcp_send_flag(c, conn, SYN | ACK) < 0) return; conn_event(c, conn, TAP_SYN_ACK_SENT); -- Stefano