On Fri, Sep 05, 2025 at 10:11:51PM -0400, Jon Maloy wrote:
We forward the incoming mac address through the tap interface when receiving incoming packets from network local hosts.
This is a part of the solution to bug https://bugs.passt.top/show_bug.cgi?id=120
Signed-off-by: Jon Maloy
LGTM, one observation below.
--- v3: - Adapted to the signature change in nl_mac_get() function, so that we now consider only the template interface when checking the ARP/NDP table. v4: - Adapted to previous name changes in this series v5: - Added lookup in ARP/NDP cache and/or table on incoming messages in case flow->tap_omac wasn't initialized at flow creation, i.e., the flow was initiated from the guest. --- passt.c | 7 +++---- passt.h | 3 +-- pasta.c | 2 +- tap.c | 2 +- tcp.c | 13 +++++++++++-- tcp.h | 2 +- tcp_buf.c | 37 +++++++++++++++++-------------------- tcp_internal.h | 4 ++-- tcp_vu.c | 5 ++--- 9 files changed, 39 insertions(+), 36 deletions(-)
diff --git a/passt.c b/passt.c index 2a28e20..adf7b19 100644 --- a/passt.c +++ b/passt.c @@ -149,11 +149,10 @@ static void timer_init(struct ctx *c, const struct timespec *now) /** * proto_update_l2_buf() - Update scatter-gather L2 buffers in protocol handlers * @eth_d: Ethernet destination address, NULL if unchanged - * @eth_s: Ethernet source address, NULL if unchanged */ -void proto_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) +void proto_update_l2_buf(const unsigned char *eth_d) { - tcp_update_l2_buf(eth_d, eth_s); + tcp_update_l2_buf(eth_d); udp_update_l2_buf(eth_d); }
@@ -256,7 +255,7 @@ int main(int argc, char **argv) if ((!c.no_udp && udp_init(&c)) || (!c.no_tcp && tcp_init(&c))) _exit(EXIT_FAILURE);
- proto_update_l2_buf(c.guest_mac, c.our_tap_mac); + proto_update_l2_buf(c.guest_mac);
if (c.ifi4 && !c.no_dhcp) dhcp_init(); diff --git a/passt.h b/passt.h index 4cfd6eb..2c5b3e1 100644 --- a/passt.h +++ b/passt.h @@ -324,7 +324,6 @@ struct ctx { bool migrate_exit; };
-void proto_update_l2_buf(const unsigned char *eth_d, - const unsigned char *eth_s); +void proto_update_l2_buf(const unsigned char *eth_d);
#endif /* PASST_H */ diff --git a/pasta.c b/pasta.c index 687406b..a42cfd8 100644 --- a/pasta.c +++ b/pasta.c @@ -411,7 +411,7 @@ void pasta_ns_conf(struct ctx *c) } }
- proto_update_l2_buf(c->guest_mac, NULL); + proto_update_l2_buf(c->guest_mac); }
/** diff --git a/tap.c b/tap.c index 7ba6399..74557e1 100644 --- a/tap.c +++ b/tap.c @@ -1097,7 +1097,7 @@ void tap_add_packet(struct ctx *c, struct iov_tail *data,
if (memcmp(c->guest_mac, eh->h_source, ETH_ALEN)) { memcpy(c->guest_mac, eh->h_source, ETH_ALEN); - proto_update_l2_buf(c->guest_mac, NULL); + proto_update_l2_buf(c->guest_mac); }
switch (ntohs(eh->h_proto)) { diff --git a/tcp.c b/tcp.c index dba5fdc..23b45b8 100644 --- a/tcp.c +++ b/tcp.c @@ -919,6 +919,7 @@ static void tcp_fill_header(struct tcphdr *th,
/** * tcp_fill_headers() - Fill 802.3, IP, TCP headers + * @c: Execution context * @conn: Connection pointer * @taph: tap backend specific header * @ip4h: Pointer to IPv4 header, or NULL @@ -929,14 +930,15 @@ static void tcp_fill_header(struct tcphdr *th, * @seq: Sequence number for this segment * @no_tcp_csum: Do not set TCP checksum */ -void tcp_fill_headers(const struct tcp_tap_conn *conn, - struct tap_hdr *taph, +void tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn, + struct tap_hdr *taph, struct ethhdr *eh, struct iphdr *ip4h, struct ipv6hdr *ip6h, struct tcphdr *th, struct iov_tail *payload, const uint16_t *ip4_check, uint32_t seq, bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); size_t l4len = iov_tail_size(payload) + sizeof(*th); + uint8_t *omac = conn->f.tap_omac; size_t l3len = l4len; uint32_t psum = 0;
@@ -962,6 +964,7 @@ void tcp_fill_headers(const struct tcp_tap_conn *conn, psum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, *src4, *dst4); } + eh->h_proto = htons_constant(ETH_P_IP); }
if (ip6h) { @@ -982,8 +985,14 @@ void tcp_fill_headers(const struct tcp_tap_conn *conn, &ip6h->saddr, &ip6h->daddr); } + eh->h_proto = htons_constant(ETH_P_IPV6); }
+ /* Make one attempt to find true MAC address in ARP/NDP table */ + if (mac_undefined(omac)) + fwd_neigh_mac_get(c, &tapside->oaddr, omac); + eth_update_mac(eh, NULL, omac);
It might be nice to do this in tcp_{buf,vu}_data_from_sock() rather than here. It does mean we duplicate it in two places, but avoids calling mac_undefined() for every packet.
+ tcp_fill_header(th, conn, seq);
if (no_tcp_csum) diff --git a/tcp.h b/tcp.h index 234a803..c1b8385 100644 --- a/tcp.h +++ b/tcp.h @@ -24,7 +24,7 @@ int tcp_init(struct ctx *c); void tcp_timer(struct ctx *c, const struct timespec *now); void tcp_defer_handler(struct ctx *c);
-void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s); +void tcp_update_l2_buf(const unsigned char *eth_d);
extern bool peek_offset_cap;
diff --git a/tcp_buf.c b/tcp_buf.c index bc898de..7d8746e 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -40,8 +40,7 @@ /* Static buffers */
/* Ethernet header for IPv4 and IPv6 frames */ -static struct ethhdr tcp4_eth_src; -static struct ethhdr tcp6_eth_src; +static struct ethhdr tcp_eth_hdr[TCP_FRAMES_MEM];
static struct tap_hdr tcp_payload_tap_hdr[TCP_FRAMES_MEM];
@@ -67,12 +66,13 @@ static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS]; /** * tcp_update_l2_buf() - Update Ethernet header buffers with addresses * @eth_d: Ethernet destination address, NULL if unchanged - * @eth_s: Ethernet source address, NULL if unchanged */ -void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) +void tcp_update_l2_buf(const unsigned char *eth_d) { - eth_update_mac(&tcp4_eth_src, eth_d, eth_s); - eth_update_mac(&tcp6_eth_src, eth_d, eth_s); + int i; + + for (i = 0; i < TCP_FRAMES_MEM; i++) + eth_update_mac(&tcp_eth_hdr[i], eth_d, NULL); }
/** @@ -85,9 +85,6 @@ void tcp_sock_iov_init(const struct ctx *c) struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP); int i;
- tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6); - tcp4_eth_src.h_proto = htons_constant(ETH_P_IP); - for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) { tcp6_payload_ip[i] = ip6; tcp4_payload_ip[i] = iph; @@ -149,13 +146,15 @@ void tcp_payload_flush(const struct ctx *c)
/** * tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers + * @c: Execution context * @conn: Connection pointer * @iov: Pointer to an array of iovec of TCP pre-cooked buffers * @check: Checksum, if already known * @seq: Sequence number for this segment * @no_tcp_csum: Do not set TCP checksum */ -static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, +static void tcp_l2_buf_fill_headers(const struct ctx *c, + struct tcp_tap_conn *conn, struct iovec *iov, const uint16_t *check, uint32_t seq, bool no_tcp_csum) { @@ -164,6 +163,7 @@ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, struct tap_hdr *taph = iov[TCP_IOV_TAP].iov_base; const struct flowside *tapside = TAPFLOW(conn); const struct in_addr *a4 = inany_v4(&tapside->oaddr); + struct ethhdr *eh = iov[TCP_IOV_ETH].iov_base; struct ipv6hdr *ip6h = NULL; struct iphdr *ip4h = NULL;
@@ -172,7 +172,7 @@ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, else ip6h = iov[TCP_IOV_IP].iov_base;
- tcp_fill_headers(conn, taph, ip4h, ip6h, th, &tail, + tcp_fill_headers(c, conn, taph, eh, ip4h, ip6h, th, &tail, check, seq, no_tcp_csum); }
@@ -194,14 +194,12 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) int ret;
iov = tcp_l2_iov[tcp_payload_used]; - if (CONN_V4(conn)) { + if (CONN_V4(conn)) iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]); - iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; - } else { + else iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]); - iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; - }
+ iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp_eth_hdr[tcp_payload_used]); payload = iov[TCP_IOV_PAYLOAD].iov_base; seq = conn->seq_to_tap; ret = tcp_prepare_flags(c, conn, flags, &payload->th, @@ -212,7 +210,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) tcp_payload_used++; l4len = optlen + sizeof(struct tcphdr); iov[TCP_IOV_PAYLOAD].iov_len = l4len; - tcp_l2_buf_fill_headers(conn, iov, NULL, seq, false); + tcp_l2_buf_fill_headers(c, conn, iov, NULL, seq, false);
if (flags & DUP_ACK) { struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++]; @@ -259,11 +257,10 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, check = &iph->check; } iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]); - iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; } else if (CONN_V6(conn)) { iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]); - iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; } + iov[TCP_IOV_ETH].iov_base = &tcp_eth_hdr[tcp_payload_used]; payload = iov[TCP_IOV_PAYLOAD].iov_base; payload->th.th_off = sizeof(struct tcphdr) / 4; payload->th.th_x2 = 0; @@ -271,7 +268,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, payload->th.ack = 1; payload->th.psh = push; iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr); - tcp_l2_buf_fill_headers(conn, iov, check, seq, false); + tcp_l2_buf_fill_headers(c, conn, iov, check, seq, false); if (++tcp_payload_used > TCP_FRAMES_MEM - 1) tcp_payload_flush(c); } diff --git a/tcp_internal.h b/tcp_internal.h index 36c6533..25f4cae 100644 --- a/tcp_internal.h +++ b/tcp_internal.h @@ -166,8 +166,8 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
struct tcp_info_linux;
-void tcp_fill_headers(const struct tcp_tap_conn *conn, - struct tap_hdr *taph, +void tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn, + struct tap_hdr *taph, struct ethhdr *eh, struct iphdr *ip4h, struct ipv6hdr *ip6h, struct tcphdr *th, struct iov_tail *payload, const uint16_t *ip4_check, uint32_t seq, bool no_tcp_csum); diff --git a/tcp_vu.c b/tcp_vu.c index cb39bc2..c7e289d 100644 --- a/tcp_vu.c +++ b/tcp_vu.c @@ -135,7 +135,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) flags_elem[0].in_sg[0].iov_len = hdrlen + optlen; payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
- tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload, + tcp_fill_headers(c, conn, NULL, eh, ip4h, ip6h, th, &payload, NULL, seq, !*c->pcap);
if (*c->pcap) { @@ -315,7 +315,6 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn, eh = vu_eth(base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); - memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
/* initialize header */
@@ -339,7 +338,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn, th->ack = 1; th->psh = push;
- tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload, + tcp_fill_headers(c, conn, NULL, eh, ip4h, ip6h, th, &payload, *check, conn->seq_to_tap, no_tcp_csum); if (ip4h) *check = &ip4h->check; -- 2.50.1
-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson