Hi,
I think the problem can be because tcp_l2_buf_fill_headers() has been moved out of
tcp_prepare_flags() and so moved after:
if (th->fin || th->syn)
conn->seq_to_tap++;
and con->seq_to_tap is also a parameter of tcp_l2_buf_fill_headers(). So it is
increased
before and not after.
Could you try:
diff --git a/tcp.c b/tcp.c
index 6800209d4122..647f42291fcf 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1607,6 +1607,7 @@ static int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn
*conn,
if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags)
return 0;
+ *optlen = 0;
if (flags & SYN) {
int mss;
@@ -1643,7 +1644,6 @@ static int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn
*conn,
*data++ = conn->ws_to_tap;
} else if (!(flags & RST)) {
flags |= ACK;
- *optlen = 0;
}
th->doff = (sizeof(*th) + *optlen) / 4;
@@ -1663,10 +1663,6 @@ static int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn
*conn,
if (th->fin)
conn_flag(c, conn, ACK_FROM_TAP_DUE);
- /* RFC 793, 3.1: "[...] and the first data octet is ISN+1." */
- if (th->fin || th->syn)
- conn->seq_to_tap++;
-
return 1;
}
@@ -1702,6 +1698,10 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn,
int flags)
conn->seq_to_tap);
iov[TCP_IOV_PAYLOAD].iov_len = l4len;
+ /* RFC 793, 3.1: "[...] and the first data octet is ISN+1." */
+ if (th->fin || th->syn)
+ conn->seq_to_tap++;
+
if (flags & DUP_ACK) {
struct iovec *dup_iov;
int i;
Thanks,
Laurent
On 13/06/2024 08:07, Stefano Brivio wrote:
On Wed, 12 Jun 2024 23:22:10 +0200
Stefano Brivio <sbrivio(a)redhat.com> wrote:
I couldn't find out why this patch breaks
the the pasta_podman/bats
test, yet, that is:
not ok 19 [505] Single TCP port forwarding, IPv4, tap
# (from function `bail-now' in file test/podman/test/system/helpers.bash, line 235,
# from function `assert' in file test/podman/test/system/helpers.bash, line 929,
# from function `pasta_test_do' in file
test/podman/test/system/505-networking-pasta.bats, line 239,
# in test file test/podman/test/system/505-networking-pasta.bats, line 472)
# `pasta_test_do' failed
#
# [22:54:18.306131353] $ test/podman/bin/podman rm -t 0 --all --force --ignore
#
# [22:54:18.367462243] $ test/podman/bin/podman ps --all --external --format {{.ID}}
{{.Names}}
#
# [22:54:18.394935392] $ test/podman/bin/podman images --all --format
{{.Repository}}:{{.Tag}} {{.ID}}
# [22:54:18.419773379] quay.io/libpod/testimage:20240123 1f6acd4c4a1d
#
# [22:54:19.246631856] $ test/podman/bin/podman info --format {{.Host.Pasta.Executable}}
# [22:54:20.084392405] /home/sbrivio/passt/pasta
#
# [22:54:20.167980222] $ test/podman/bin/podman run --net=pasta -p
[88.198.0.164]:5727:5727/tcp quay.io/libpod/testimage:20240123 sh -c for port in $(seq
5727 5727); do socat -u
TCP4-LISTEN:${port},bind=[88.198.0.164] STDOUT & done; wait
# [22:54:37.256040883] x2024/06/12 20:54:37 socat[3] E read(6, 0x7fe675cd6000, 8192):
Connection reset by peer
# #/vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
# #| FAIL: Mismatch between data sent and received
# #| expected: = x
# #| actual: x2024/06/12 20:54:37 socat\[3\] E read\(6\, 0x7fe675cd6000\, 8192\):
Connection reset by peer
# #\^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
meaning that the data transfer is actually fine, but we reset the
connection instead of an orderly shutdown.
I found a few issues while looking for that:
On Wed, 12 Jun 2024 17:47:27 +0200
Laurent Vivier <lvivier(a)redhat.com> wrote:
This commit isolates the internal data structure
management used for storing
data (e.g., tcp4_l2_flags_iov[], tcp6_l2_flags_iov[], tcp4_flags_ip[],
tcp4_flags[], ...) from the tcp_send_flag() function. The extracted
functionality is relocated to a new function named tcp_fill_flag_header().
tcp_fill_flag_header() is now a generic function that accepts parameters such
as struct tcphdr and a data pointer. tcp_send_flag() utilizes this parameter to
pass memory pointers from tcp4_l2_flags_iov[] and tcp6_l2_flags_iov[].
This separation sets the stage for utilizing tcp_fill_flag_header() to
set the memory provided by the guest via vhost-user in future developments.
Signed-off-by: Laurent Vivier <lvivier(a)redhat.com>
---
Notes:
v6:
- rename tcp_fill_flag_header() to tcp_prepare_flags()
- set optlen to 0 in tcp_prepare_flags()
v5:
- use tcp_fill_flag_header() rather than tcp_fill_headers4() and
tcp_fill_headers6().
tcp.c | 72 +++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 48 insertions(+), 24 deletions(-)
diff --git a/tcp.c b/tcp.c
index dd8d46e08628..6800209d4122 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1567,24 +1567,25 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
}
/**
- * tcp_send_flag() - Send segment with flags to tap (no payload)
+ * tcp_prepare_flags() - Prepare header for flags-only segment (no payload)
* @c: Execution context
* @conn: Connection pointer
* @flags: TCP flags: if not set, send segment only if ACK is due
+ * @th: TCP header to update
+ * @data: buffer to store TCP option
+ * @optlen: size of the TCP option buffer (output parameter)
*
- * Return: negative error code on connection reset, 0 otherwise
+ * Return: < 0 error code on connection reset,
+ * 0 if there is no flag to send
+ * 1 otherwise
This is often called with if (tcp_send_flag(...)) or
if (!tcp_send_flag(...)). Those need to be replaced with
if (tcp_send_flag(...) < 0) or if (tcp_send_flag(...) >= 0) in the
callers.
Ah, no, sorry, you already took care of this in the new
tcp_send_flag(). Then there must be something else...
*/
-static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
+static int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn,
+ int flags, struct tcphdr *th, char *data,
+ size_t *optlen)
{
- struct tcp_flags_t *payload;
struct tcp_info tinfo = { 0 };
socklen_t sl = sizeof(tinfo);
int s = conn->sock;
- size_t optlen = 0;
- struct tcphdr *th;
- struct iovec *iov;
- size_t l4len;
- char *data;
if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap) &&
!flags && conn->wnd_to_tap)
@@ -1606,20 +1607,11 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn
*conn, int flags)
if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags)
return 0;
- if (CONN_V4(conn))
- iov = tcp4_l2_flags_iov[tcp4_flags_used++];
- else
- iov = tcp6_l2_flags_iov[tcp6_flags_used++];
-
- payload = iov[TCP_IOV_PAYLOAD].iov_base;
- th = &payload->th;
- data = payload->opts;
-
if (flags & SYN) {
int mss;
/* Options: MSS, NOP and window scale (8 bytes) */
- optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN;
+ *optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN;
*data++ = OPT_MSS;
*data++ = OPT_MSS_LEN;
@@ -1651,19 +1643,16 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn
*conn, int flags)
*data++ = conn->ws_to_tap;
} else if (!(flags & RST)) {
flags |= ACK;
+ *optlen = 0;
*optlen also needs to be set to 0 if (flags & RST), say:
} else {
*optlen = 0;
if (!(flags & RST))
flags |= ACK;
}
}
- th->doff = (sizeof(*th) + optlen) / 4;
+ th->doff = (sizeof(*th) + *optlen) / 4;
th->ack = !!(flags & ACK);
th->rst = !!(flags & RST);
th->syn = !!(flags & SYN);
th->fin = !!(flags & FIN);
- l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL,
- conn->seq_to_tap);
- iov[TCP_IOV_PAYLOAD].iov_len = l4len;
-
if (th->ack) {
if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap))
conn_flag(c, conn, ~ACK_TO_TAP_DUE);
@@ -1678,6 +1667,41 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn,
int flags)
if (th->fin || th->syn)
conn->seq_to_tap++;
+ return 1;
+}
+
+/**
+ * tcp_send_flag() - Send segment with flags to tap (no payload)
+ * @c: Execution context
+ * @conn: Connection pointer
+ * @flags: TCP flags: if not set, send segment only if ACK is due
+ *
+ * Return: negative error code on connection reset, 0 otherwise
+ */
+static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
+{
+ struct tcp_flags_t *payload;
+ struct iovec *iov;
+ size_t optlen;
+ size_t l4len;
+ int ret;
+
+ if (CONN_V4(conn))
+ iov = tcp4_l2_flags_iov[tcp4_flags_used++];
+ else
+ iov = tcp6_l2_flags_iov[tcp6_flags_used++];
We increase the counters here, but we don't decrease them back if we
hit if (ret <= 0) return ret; later.
+
+ payload = iov[TCP_IOV_PAYLOAD].iov_base;
+
+ ret = tcp_prepare_flags(c, conn, flags, &payload->th,
+ payload->opts, &optlen);
+ if (ret <= 0)
+ return ret;
+
+ l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL,
+ conn->seq_to_tap);
+ iov[TCP_IOV_PAYLOAD].iov_len = l4len;
+
if (flags & DUP_ACK) {
struct iovec *dup_iov;
int i;
Here's the diff I have so far, I'm not necessarily recommending any of
that, it was just a quick try:
diff --git a/tcp.c b/tcp.c
index 6800209..6bff6bc 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1644,6 +1644,8 @@ static int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn
*conn,
} else if (!(flags & RST)) {
flags |= ACK;
*optlen = 0;
+ } else {
+ *optlen = 0;
}
th->doff = (sizeof(*th) + *optlen) / 4;
@@ -1695,8 +1697,14 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn,
int flags)
ret = tcp_prepare_flags(c, conn, flags, &payload->th,
payload->opts, &optlen);
- if (ret <= 0)
+ if (ret <= 0) {
+ if (CONN_V4(conn))
+ tcp4_flags_used--;
+ else
+ tcp6_flags_used--;
+
return ret;
+ }
l4len = tcp_l2_buf_fill_headers(c, conn, iov, optlen, NULL,
conn->seq_to_tap);
@@ -1738,7 +1746,7 @@ static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn)
if (conn->events == CLOSED)
return;
- if (!tcp_send_flag(c, conn, RST))
+ if (tcp_send_flag(c, conn, RST) >= 0)
conn_event(c, conn, CLOSED);
}
@@ -2111,7 +2119,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
} else {
tcp_get_sndbuf(conn);
- if (tcp_send_flag(c, conn, SYN | ACK))
+ if (tcp_send_flag(c, conn, SYN | ACK) < 0)
goto cancel;
conn_event(c, conn, TAP_SYN_ACK_SENT);
@@ -2282,9 +2290,11 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn
*conn)
if (!len) {
if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) {
- if ((ret = tcp_send_flag(c, conn, FIN | ACK))) {
+ int rc = tcp_send_flag(c, conn, FIN | ACK);
+
+ if (rc < 0) {
tcp_rst(c, conn);
- return ret;
+ return rc;
}
conn_event(c, conn, TAP_FIN_SENT);
@@ -2716,7 +2726,7 @@ static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn
*conn)
return;
}
- if (tcp_send_flag(c, conn, SYN | ACK))
+ if (tcp_send_flag(c, conn, SYN | ACK) < 0)
return;
conn_event(c, conn, TAP_SYN_ACK_SENT);