As we discussed on email, this adds support for sending an RST in response to packets from the guest which don't match an existing flow and are neither SYN (requesting a new connection) nor themselves RST. This is a sligjhtly larger patch than I'd like, but I can't really see a way to simplify it without making fairly extensive reworks to share more code with paths for RST where there is a known connection. That would end up being more churn. David Gibson (1): tcp: Send RST in response to guest packets that match no connection tap.c | 13 +++++------ tap.h | 6 +++++ tcp.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 7 deletions(-) -- 2.48.1
Currently, if a non-SYN TCP packet arrives which doesn't match any existing connection, we simply ignore it. However RFC 9293, section 3.10.7.1 says we should respond with an RST to a non-SYN, non-RST packet that's for a CLOSED (i.e. non-existent) connection. This can arise in practice with migration, in cases where some error means we have to discard a connection. We destroy the connection with tcp_rst() in that case, but because the guest is stopped, we may not be able to deliver the RST packet on the tap interface immediately. This change ensures an RST will be sent if the guest tries to use the connection again. A similar situation can arise if a passt/pasta instance is killed or crashes, but is then replaced with another attached to the same guest. This can leave the guest with stale connections that the new passt instance isn't aware of. It's better to send an RST so the guest knows quickly these are broken, rather than letting them linger until they time out. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tap.c | 13 +++++------ tap.h | 6 +++++ tcp.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 7 deletions(-) diff --git a/tap.c b/tap.c index 44b0fc0f..3c6a3e33 100644 --- a/tap.c +++ b/tap.c @@ -122,7 +122,7 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c, * * Return: pointer at which to write the packet's payload */ -static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto) +void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto) { struct ethhdr *eh = (struct ethhdr *)buf; @@ -143,8 +143,8 @@ static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto) * * Return: pointer at which to write the packet's payload */ -static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src, - struct in_addr dst, size_t l4len, uint8_t proto) +void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src, + struct in_addr dst, size_t l4len, uint8_t proto) { uint16_t l3len = l4len + sizeof(*ip4h); @@ -229,10 +229,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst, * * Return: pointer at which to write the packet's payload */ -static void *tap_push_ip6h(struct ipv6hdr *ip6h, - const struct in6_addr *src, - const struct in6_addr *dst, - size_t l4len, uint8_t proto, uint32_t flow) +void *tap_push_ip6h(struct ipv6hdr *ip6h, + const struct in6_addr *src, const struct in6_addr *dst, + size_t l4len, uint8_t proto, uint32_t flow) { ip6h->payload_len = htons(l4len); ip6h->priority = 0; diff --git a/tap.h b/tap.h index a476a125..390ac127 100644 --- a/tap.h +++ b/tap.h @@ -42,6 +42,9 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len) thdr->vnet_len = htonl(l2len); } +void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto); +void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src, + struct in_addr dst, size_t l4len, uint8_t proto); void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport, struct in_addr dst, in_port_t dport, const void *in, size_t dlen); @@ -49,6 +52,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst, const void *in, size_t l4len); const struct in6_addr *tap_ip6_daddr(const struct ctx *c, const struct in6_addr *src); +void *tap_push_ip6h(struct ipv6hdr *ip6h, + const struct in6_addr *src, const struct in6_addr *dst, + size_t l4len, uint8_t proto, uint32_t flow); void tap_udp6_send(const struct ctx *c, const struct in6_addr *src, in_port_t sport, const struct in6_addr *dst, in_port_t dport, diff --git a/tcp.c b/tcp.c index b3aa9a2c..50670547 100644 --- a/tcp.c +++ b/tcp.c @@ -1868,6 +1868,76 @@ static void tcp_conn_from_sock_finish(const struct ctx *c, tcp_data_from_sock(c, conn); } +/** + * tcp_no_conn() - Respond to a non-SYN packet matching no connection + * @c: Execution context + * @af: Address family, AF_INET or AF_INET6 + * @saddr: Source address of the packet we're responding to + * @daddr: Destination address of the packet we're responding to + * @th: TCP header of the packet we're responding to* + * @l4len: Packet length, including TCP header + */ +void tcp_no_conn(const struct ctx *c, int af, + const void *saddr, const void *daddr, + const struct tcphdr *th, size_t l4len) +{ + struct iov_tail payload = IOV_TAIL(NULL, 0, 0); + struct tcphdr *rsth; + char buf[USHRT_MAX]; + uint32_t psum = 0; + size_t rst_l2len; + + /* Don't respond to RSTs without a connection */ + if (th->rst) + return; + + if (af == AF_INET) { + struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP); + const struct in_addr *rst_src = daddr; + const struct in_addr *rst_dst = saddr; + + rsth = tap_push_ip4h(ip4h, *rst_src, *rst_dst, + sizeof(*rsth), IPPROTO_TCP); + psum = proto_ipv4_header_psum(sizeof(*rsth), IPPROTO_TCP, + *rst_src, *rst_dst); + + } else { + struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6); + const struct in6_addr *rst_src = daddr; + const struct in6_addr *rst_dst = saddr; + + /* FIXME: do we need to take the flow id from the IPv6 header + * we're responding to? + */ + rsth = tap_push_ip6h(ip6h, rst_src, rst_dst, + sizeof(*rsth), IPPROTO_TCP, 0); + psum = proto_ipv6_header_psum(sizeof(*rsth), IPPROTO_TCP, + rst_src, rst_dst); + } + + memset(rsth, 0, sizeof(*rsth)); + + rsth->source = th->dest; + rsth->dest = th->source; + rsth->rst = 1; + rsth->doff = sizeof(*rsth) / 4UL; + + /* Sequence matching logic from RFC9293 section 3.10.7.1 */ + if (th->ack) { + rsth->seq = th->ack_seq; + } else { + size_t dlen = l4len - th->doff * 4UL; + uint32_t ack = ntohl(th->seq) + dlen; + + rsth->ack_seq = htonl(ack); + rsth->ack = 1; + } + + tcp_update_csum(psum, rsth, &payload); + rst_l2len = ((char *)rsth - buf) + sizeof(*rsth); + tap_send_single(c, buf, rst_l2len); +} + /** * tcp_tap_handler() - Handle packets from tap and state transitions * @c: Execution context @@ -1915,6 +1985,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, if (opts && th->syn && !th->ack) tcp_conn_from_tap(c, af, saddr, daddr, th, opts, optlen, now); + else + tcp_no_conn(c, af, saddr, daddr, th, len); return 1; } -- 2.48.1
I had just nits so I originally planned to ignore some and fix some up on merge but then I realised something else, so here are nits and concern: On Fri, 28 Feb 2025 15:48:44 +1100 David Gibson <david(a)gibson.dropbear.id.au> wrote:Currently, if a non-SYN TCP packet arrives which doesn't match any existing connection, we simply ignore it. However RFC 9293, section 3.10.7.1 says we should respond with an RST to a non-SYN, non-RST packet that's for a CLOSED (i.e. non-existent) connection. This can arise in practice with migration, in cases where some error means we have to discard a connection. We destroy the connection with tcp_rst() in that case, but because the guest is stopped, we may not be able to deliver the RST packet on the tap interface immediately. This change ensures an RST will be sent if the guest tries to use the connection again. A similar situation can arise if a passt/pasta instance is killed or crashes, but is then replaced with another attached to the same guest. This can leave the guest with stale connections that the new passt instance isn't aware of. It's better to send an RST so the guest knows quickly these are broken, rather than letting them linger until they time out. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tap.c | 13 +++++------ tap.h | 6 +++++ tcp.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 7 deletions(-) diff --git a/tap.c b/tap.c index 44b0fc0f..3c6a3e33 100644 --- a/tap.c +++ b/tap.c @@ -122,7 +122,7 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c, * * Return: pointer at which to write the packet's payload */ -static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto) +void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto) { struct ethhdr *eh = (struct ethhdr *)buf; @@ -143,8 +143,8 @@ static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto) * * Return: pointer at which to write the packet's payload */ -static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src, - struct in_addr dst, size_t l4len, uint8_t proto) +void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src, + struct in_addr dst, size_t l4len, uint8_t proto) { uint16_t l3len = l4len + sizeof(*ip4h); @@ -229,10 +229,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst, * * Return: pointer at which to write the packet's payload */ -static void *tap_push_ip6h(struct ipv6hdr *ip6h, - const struct in6_addr *src, - const struct in6_addr *dst, - size_t l4len, uint8_t proto, uint32_t flow) +void *tap_push_ip6h(struct ipv6hdr *ip6h, + const struct in6_addr *src, const struct in6_addr *dst, + size_t l4len, uint8_t proto, uint32_t flow) { ip6h->payload_len = htons(l4len); ip6h->priority = 0; diff --git a/tap.h b/tap.h index a476a125..390ac127 100644 --- a/tap.h +++ b/tap.h @@ -42,6 +42,9 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len) thdr->vnet_len = htonl(l2len); } +void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto); +void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src, + struct in_addr dst, size_t l4len, uint8_t proto); void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport, struct in_addr dst, in_port_t dport, const void *in, size_t dlen); @@ -49,6 +52,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst, const void *in, size_t l4len); const struct in6_addr *tap_ip6_daddr(const struct ctx *c, const struct in6_addr *src); +void *tap_push_ip6h(struct ipv6hdr *ip6h, + const struct in6_addr *src, const struct in6_addr *dst, + size_t l4len, uint8_t proto, uint32_t flow); void tap_udp6_send(const struct ctx *c, const struct in6_addr *src, in_port_t sport, const struct in6_addr *dst, in_port_t dport, diff --git a/tcp.c b/tcp.c index b3aa9a2c..50670547 100644 --- a/tcp.c +++ b/tcp.c @@ -1868,6 +1868,76 @@ static void tcp_conn_from_sock_finish(const struct ctx *c, tcp_data_from_sock(c, conn); } +/** + * tcp_no_conn() - Respond to a non-SYN packet matching no connectionThe name makes sense when seen from the perspective of the caller, but not so much as a stand-alone thing. I don't have great ideas and tcp_no_conn() sounds fine anyway. I was thinking of tcp_reply_spurious(), tcp_rst_unrelated(), tcp_rst_invalid() (in netfilter terms), tcp_handle_stray() or things like that.+ * @c: Execution context + * @af: Address family, AF_INET or AF_INET6 + * @saddr: Source address of the packet we're responding to + * @daddr: Destination address of the packet we're responding to + * @th: TCP header of the packet we're responding to*"to*"+ * @l4len: Packet length, including TCP header + */ +void tcp_no_conn(const struct ctx *c, int af, + const void *saddr, const void *daddr, + const struct tcphdr *th, size_t l4len) +{ + struct iov_tail payload = IOV_TAIL(NULL, 0, 0); + struct tcphdr *rsth; + char buf[USHRT_MAX]; + uint32_t psum = 0; + size_t rst_l2len; + + /* Don't respond to RSTs without a connection */ + if (th->rst) + return; + + if (af == AF_INET) { + struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP); + const struct in_addr *rst_src = daddr; + const struct in_addr *rst_dst = saddr; + + rsth = tap_push_ip4h(ip4h, *rst_src, *rst_dst, + sizeof(*rsth), IPPROTO_TCP); + psum = proto_ipv4_header_psum(sizeof(*rsth), IPPROTO_TCP, + *rst_src, *rst_dst); + + } else { + struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6); + const struct in6_addr *rst_src = daddr; + const struct in6_addr *rst_dst = saddr; + + /* FIXME: do we need to take the flow id from the IPv6 header + * we're responding to? + */I wanted to check if we actually need to, then I realised my test won't actually tell, but let me share the steps because it can at least be used to check the mechanism as a whole: $ ./pasta -p http.pcap --config-net -- wget http://example.com -O /dev/null 2>/dev/null $ tshark -r http.pcap -Y 'http.request.version' -w http_get.pcap $ ./pasta -p replay.pcap --config-net -I i -- tcpreplay -Wi i http_get.pcap Saving packet capture to replay.pcap Actual: 1 packets (200 bytes) sent in 0.000003 seconds Rated: 66666666.6 Bps, 533.33 Mbps, 333333.33 pps Statistics for network device: i Successful packets: 1 Failed packets: 0 Truncated packets: 0 Retried packets (ENOBUFS): 0 Retried packets (EAGAIN): 0 and now we can have a look at the RST: $ tshark -r replay.pcap -Y tcp 3 0.099072 2a01:4f8:222:904::2 → 2600:1408:ec00:36::1736:7f31 HTTP 200 GET / HTTP/1.1 4 0.099111 2600:1408:ec00:36::1736:7f31 → 2a01:4f8:222:904::2 TCP 74 80 → 53378 [RST] Seq=1 Win=0 Len=0 Back to the flow label: tcp_v6_send_reset(), which handles this case in the kernel, copies it from the message we're replying to, so we probably should as well (other than making obvious RFC 6437 sense). Judging from __inet6_lookup_skb(), __inet6_lookup_established(), inet6_match(), etc., called in turn from tcp_v6_rcv(), it doesn't _seem_ to be strictly necessary. Conversely, passt commit 16b08367a57f ("tap: Fill the IPv6 flow label field to represent flow association") was needed because tcp_v6_syn_recv_sock() actually checks that. So, on one hand, I think it's much safer to do that, because the kernel could decide to check the label on every packet at some point, and I would call it a feature rather than user breakage. On the other hand, it adds some complexity (should tcp_tap_handler() just dump that only as needed? Should we make that part of the flow lookup logic instead...?) that we don't strictly need right now and we can take care of it as a follow-up, so I don't have a particular preference. I just wanted to point out that we don't need to, but I think we should eventually copy the label. I'm fine either way for this fix, though.+ rsth = tap_push_ip6h(ip6h, rst_src, rst_dst, + sizeof(*rsth), IPPROTO_TCP, 0); + psum = proto_ipv6_header_psum(sizeof(*rsth), IPPROTO_TCP, + rst_src, rst_dst); + } + + memset(rsth, 0, sizeof(*rsth)); + + rsth->source = th->dest; + rsth->dest = th->source; + rsth->rst = 1; + rsth->doff = sizeof(*rsth) / 4UL; + + /* Sequence matching logic from RFC9293 section 3.10.7.1 */$ grep "RFC [0-9]" *.c | wc -l 24 $ grep "RFC[0-9]" *.c | wc -l 1 Call me a troglodyte but I would never find the reference without the space.+ if (th->ack) { + rsth->seq = th->ack_seq; + } else { + size_t dlen = l4len - th->doff * 4UL; + uint32_t ack = ntohl(th->seq) + dlen; + + rsth->ack_seq = htonl(ack); + rsth->ack = 1; + } + + tcp_update_csum(psum, rsth, &payload); + rst_l2len = ((char *)rsth - buf) + sizeof(*rsth); + tap_send_single(c, buf, rst_l2len); +} + /** * tcp_tap_handler() - Handle packets from tap and state transitions * @c: Execution context @@ -1915,6 +1985,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, if (opts && th->syn && !th->ack) tcp_conn_from_tap(c, af, saddr, daddr, th, opts, optlen, now); + else + tcp_no_conn(c, af, saddr, daddr, th, len); return 1; }-- Stefano
On Fri, Feb 28, 2025 at 02:10:22PM +0100, Stefano Brivio wrote:I had just nits so I originally planned to ignore some and fix some up on merge but then I realised something else, so here are nits and concern: On Fri, 28 Feb 2025 15:48:44 +1100 David Gibson <david(a)gibson.dropbear.id.au> wrote:I've gone with tcp_rst_no_conn() for the next spin, which I hope is a little better.Currently, if a non-SYN TCP packet arrives which doesn't match any existing connection, we simply ignore it. However RFC 9293, section 3.10.7.1 says we should respond with an RST to a non-SYN, non-RST packet that's for a CLOSED (i.e. non-existent) connection. This can arise in practice with migration, in cases where some error means we have to discard a connection. We destroy the connection with tcp_rst() in that case, but because the guest is stopped, we may not be able to deliver the RST packet on the tap interface immediately. This change ensures an RST will be sent if the guest tries to use the connection again. A similar situation can arise if a passt/pasta instance is killed or crashes, but is then replaced with another attached to the same guest. This can leave the guest with stale connections that the new passt instance isn't aware of. It's better to send an RST so the guest knows quickly these are broken, rather than letting them linger until they time out. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tap.c | 13 +++++------ tap.h | 6 +++++ tcp.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 7 deletions(-) diff --git a/tap.c b/tap.c index 44b0fc0f..3c6a3e33 100644 --- a/tap.c +++ b/tap.c @@ -122,7 +122,7 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c, * * Return: pointer at which to write the packet's payload */ -static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto) +void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto) { struct ethhdr *eh = (struct ethhdr *)buf; @@ -143,8 +143,8 @@ static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto) * * Return: pointer at which to write the packet's payload */ -static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src, - struct in_addr dst, size_t l4len, uint8_t proto) +void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src, + struct in_addr dst, size_t l4len, uint8_t proto) { uint16_t l3len = l4len + sizeof(*ip4h); @@ -229,10 +229,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst, * * Return: pointer at which to write the packet's payload */ -static void *tap_push_ip6h(struct ipv6hdr *ip6h, - const struct in6_addr *src, - const struct in6_addr *dst, - size_t l4len, uint8_t proto, uint32_t flow) +void *tap_push_ip6h(struct ipv6hdr *ip6h, + const struct in6_addr *src, const struct in6_addr *dst, + size_t l4len, uint8_t proto, uint32_t flow) { ip6h->payload_len = htons(l4len); ip6h->priority = 0; diff --git a/tap.h b/tap.h index a476a125..390ac127 100644 --- a/tap.h +++ b/tap.h @@ -42,6 +42,9 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len) thdr->vnet_len = htonl(l2len); } +void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto); +void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src, + struct in_addr dst, size_t l4len, uint8_t proto); void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport, struct in_addr dst, in_port_t dport, const void *in, size_t dlen); @@ -49,6 +52,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst, const void *in, size_t l4len); const struct in6_addr *tap_ip6_daddr(const struct ctx *c, const struct in6_addr *src); +void *tap_push_ip6h(struct ipv6hdr *ip6h, + const struct in6_addr *src, const struct in6_addr *dst, + size_t l4len, uint8_t proto, uint32_t flow); void tap_udp6_send(const struct ctx *c, const struct in6_addr *src, in_port_t sport, const struct in6_addr *dst, in_port_t dport, diff --git a/tcp.c b/tcp.c index b3aa9a2c..50670547 100644 --- a/tcp.c +++ b/tcp.c @@ -1868,6 +1868,76 @@ static void tcp_conn_from_sock_finish(const struct ctx *c, tcp_data_from_sock(c, conn); } +/** + * tcp_no_conn() - Respond to a non-SYN packet matching no connectionThe name makes sense when seen from the perspective of the caller, but not so much as a stand-alone thing. I don't have great ideas and tcp_no_conn() sounds fine anyway. I was thinking of tcp_reply_spurious(), tcp_rst_unrelated(), tcp_rst_invalid() (in netfilter terms), tcp_handle_stray() or things like that.Fixed.+ * @c: Execution context + * @af: Address family, AF_INET or AF_INET6 + * @saddr: Source address of the packet we're responding to + * @daddr: Destination address of the packet we're responding to + * @th: TCP header of the packet we're responding to*"to*"Right. I figured we really should, it was just trickier to implement, because we don't have obvious access to the IPv6 header at this point.+ * @l4len: Packet length, including TCP header + */ +void tcp_no_conn(const struct ctx *c, int af, + const void *saddr, const void *daddr, + const struct tcphdr *th, size_t l4len) +{ + struct iov_tail payload = IOV_TAIL(NULL, 0, 0); + struct tcphdr *rsth; + char buf[USHRT_MAX]; + uint32_t psum = 0; + size_t rst_l2len; + + /* Don't respond to RSTs without a connection */ + if (th->rst) + return; + + if (af == AF_INET) { + struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP); + const struct in_addr *rst_src = daddr; + const struct in_addr *rst_dst = saddr; + + rsth = tap_push_ip4h(ip4h, *rst_src, *rst_dst, + sizeof(*rsth), IPPROTO_TCP); + psum = proto_ipv4_header_psum(sizeof(*rsth), IPPROTO_TCP, + *rst_src, *rst_dst); + + } else { + struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6); + const struct in6_addr *rst_src = daddr; + const struct in6_addr *rst_dst = saddr; + + /* FIXME: do we need to take the flow id from the IPv6 header + * we're responding to? + */I wanted to check if we actually need to, then I realised my test won't actually tell, but let me share the steps because it can at least be used to check the mechanism as a whole: $ ./pasta -p http.pcap --config-net -- wget http://example.com -O /dev/null 2>/dev/null $ tshark -r http.pcap -Y 'http.request.version' -w http_get.pcap $ ./pasta -p replay.pcap --config-net -I i -- tcpreplay -Wi i http_get.pcap Saving packet capture to replay.pcap Actual: 1 packets (200 bytes) sent in 0.000003 seconds Rated: 66666666.6 Bps, 533.33 Mbps, 333333.33 pps Statistics for network device: i Successful packets: 1 Failed packets: 0 Truncated packets: 0 Retried packets (ENOBUFS): 0 Retried packets (EAGAIN): 0 and now we can have a look at the RST: $ tshark -r replay.pcap -Y tcp 3 0.099072 2a01:4f8:222:904::2 → 2600:1408:ec00:36::1736:7f31 HTTP 200 GET / HTTP/1.1 4 0.099111 2600:1408:ec00:36::1736:7f31 → 2a01:4f8:222:904::2 TCP 74 80 → 53378 [RST] Seq=1 Win=0 Len=0 Back to the flow label: tcp_v6_send_reset(), which handles this case in the kernel, copies it from the message we're replying to, so we probably should as well (other than making obvious RFC 6437 sense).Judging from __inet6_lookup_skb(), __inet6_lookup_established(), inet6_match(), etc., called in turn from tcp_v6_rcv(), it doesn't _seem_ to be strictly necessary. Conversely, passt commit 16b08367a57f ("tap: Fill the IPv6 flow label field to represent flow association") was needed because tcp_v6_syn_recv_sock() actually checks that. So, on one hand, I think it's much safer to do that, because the kernel could decide to check the label on every packet at some point, and I would call it a feature rather than user breakage. On the other hand, it adds some complexity (should tcp_tap_handler() just dump that only as needed? Should we make that part of the flow lookup logic instead...?) that we don't strictly need right now and we can take care of it as a follow-up, so I don't have a particular preference. I just wanted to point out that we don't need to, but I think we should eventually copy the label. I'm fine either way for this fix, though.I had a look into this... and discovered it raised several additional questions. So I'll respin without changing that behaviour and we can maybe look at this later.Sure, easily fixed. Troglodyte ;-p.+ rsth = tap_push_ip6h(ip6h, rst_src, rst_dst, + sizeof(*rsth), IPPROTO_TCP, 0); + psum = proto_ipv6_header_psum(sizeof(*rsth), IPPROTO_TCP, + rst_src, rst_dst); + } + + memset(rsth, 0, sizeof(*rsth)); + + rsth->source = th->dest; + rsth->dest = th->source; + rsth->rst = 1; + rsth->doff = sizeof(*rsth) / 4UL; + + /* Sequence matching logic from RFC9293 section 3.10.7.1 */$ grep "RFC [0-9]" *.c | wc -l 24 $ grep "RFC[0-9]" *.c | wc -l 1 Call me a troglodyte but I would never find the reference without the space.-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson+ if (th->ack) { + rsth->seq = th->ack_seq; + } else { + size_t dlen = l4len - th->doff * 4UL; + uint32_t ack = ntohl(th->seq) + dlen; + + rsth->ack_seq = htonl(ack); + rsth->ack = 1; + } + + tcp_update_csum(psum, rsth, &payload); + rst_l2len = ((char *)rsth - buf) + sizeof(*rsth); + tap_send_single(c, buf, rst_l2len); +} + /** * tcp_tap_handler() - Handle packets from tap and state transitions * @c: Execution context @@ -1915,6 +1985,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, if (opts && th->syn && !th->ack) tcp_conn_from_tap(c, af, saddr, daddr, th, opts, optlen, now); + else + tcp_no_conn(c, af, saddr, daddr, th, len); return 1; }