On Wed, Jul 23, 2025 at 9:06 AM David Gibson
On Wed, Jul 09, 2025 at 07:47:41PM +0200, Eugenio Pérez wrote:
So vhost kernel is able to access the TCP buffers.
Signed-off-by: Eugenio Pérez
--- tap.c | 14 +++++++++++--- tcp_buf.c | 14 ++++---------- tcp_buf.h | 19 +++++++++++++++++++ 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/tap.c b/tap.c index 0656294..8b3ec45 100644 --- a/tap.c +++ b/tap.c @@ -63,6 +63,8 @@ #include "vhost_user.h" #include "vu_common.h"
+#include "tcp_buf.h" +
I don't love including the pretty specific content of tcp_buf.h into the mostly protocol unaware tap.c. Though I do realise that avoiding it will probably have other tradeoffs.
I was more focused on testing the performance of the solution than making it pretty, but I'm a big fan of hiding the buffers somehow :). I just didn't explore the options.
/* Maximum allowed frame lengths (including L2 header) */
/* Verify that an L2 frame length limit is large enough to contain the header, @@ -136,8 +138,8 @@ static union { char buf[offsetof(struct vring_used, ring[VHOST_NDESCS])]; } vring_used_0 __attribute__((aligned(PAGE_SIZE))), vring_used_1 __attribute__((aligned(PAGE_SIZE)));
-/* all descs ring + 2rings * 2vqs + tx pkt buf + rx pkt buf */ -#define N_VHOST_REGIONS 6 +/* all descs ring + 2rings * 2vqs + tx pkt buf + rx pkt buf + TCP virtio hdr + TCP eth(src,dst) + TCP ip hdr */ +#define N_VHOST_REGIONS 12
Hmm. Keeping this and the region initialisation in sync is pretty clunky. I recall that before I went on leave we were discussing just exposing pasta's whole data segment to vhost; was there a reason for changing that plan, or just that you haven't implemented it so far?
union { struct vhost_memory mem; char buf[offsetof(struct vhost_memory, regions[N_VHOST_REGIONS])]; @@ -1635,7 +1637,13 @@ static int tap_ns_tun(void *arg) vhost_memory.mem.regions[3] = VHOST_MEMORY_REGION(vring_used_0); vhost_memory.mem.regions[4] = VHOST_MEMORY_REGION(vring_used_1); vhost_memory.mem.regions[5] = VHOST_MEMORY_REGION(pkt_buf); - static_assert(5 < N_VHOST_REGIONS); + vhost_memory.mem.regions[6] = VHOST_MEMORY_REGION(tcp_payload_tap_hdr); + vhost_memory.mem.regions[7] = VHOST_MEMORY_REGION(tcp4_eth_src); + vhost_memory.mem.regions[8] = VHOST_MEMORY_REGION(tcp6_eth_src); + vhost_memory.mem.regions[9] = VHOST_MEMORY_REGION(tcp4_payload_ip); + vhost_memory.mem.regions[10] = VHOST_MEMORY_REGION(tcp6_payload_ip); + vhost_memory.mem.regions[11] = VHOST_MEMORY_REGION(tcp_payload); + static_assert(11 < N_VHOST_REGIONS);
If all the regions are global variables, you could put them into a static const array, then define N_VHOST_REGIONS via ARRAY_SIZE().
I'm ok with that, just trying to make all the variables as narrow scoped as possible. The array will not be used after the syscall.
#undef VHOST_MEMORY_REGION #undef VHOST_MEMORY_REGION_PTR
diff --git a/tcp_buf.c b/tcp_buf.c index 2fbd056..c999d2e 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -22,8 +22,6 @@
#include
-#include
- #include "util.h" #include "ip.h" #include "iov.h" @@ -35,24 +33,20 @@ #include "tcp_internal.h" #include "tcp_buf.h" -#define TCP_FRAMES_MEM 128 -#define TCP_FRAMES \ - (c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM) - /* Static buffers */
/* Ethernet header for IPv4 and IPv6 frames */ -static struct ethhdr tcp4_eth_src; -static struct ethhdr tcp6_eth_src; +struct ethhdr tcp4_eth_src; +struct ethhdr tcp6_eth_src;
-static struct virtio_net_hdr_mrg_rxbuf tcp_payload_tap_hdr[TCP_FRAMES_MEM]; +struct virtio_net_hdr_mrg_rxbuf tcp_payload_tap_hdr[TCP_FRAMES_MEM];
/* IP headers for IPv4 and IPv6 */ struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM]; struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM];
/* TCP segments with payload for IPv4 and IPv6 frames */ -static struct tcp_payload_t tcp_payload[TCP_FRAMES_MEM]; +struct tcp_payload_t tcp_payload[TCP_FRAMES_MEM];
static_assert(MSS4 <= sizeof(tcp_payload[0].data), "MSS4 is greater than 65516"); static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516"); diff --git a/tcp_buf.h b/tcp_buf.h index 54f5e53..7ae2536 100644 --- a/tcp_buf.h +++ b/tcp_buf.h @@ -6,9 +6,28 @@ #ifndef TCP_BUF_H #define TCP_BUF_H
+#include
+ +#include "tcp_conn.h" +#include "tcp_internal.h" + void tcp_sock_iov_init(const struct ctx *c); void tcp_payload_flush(const struct ctx *c); +struct tcp_tap_conn; int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn); int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags); +#define TCP_FRAMES_MEM 128 +#define TCP_FRAMES \ +(c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM) + +extern struct virtio_net_hdr_mrg_rxbuf tcp_payload_tap_hdr[TCP_FRAMES_MEM]; +extern struct tcp_payload_t tcp_payload[TCP_FRAMES_MEM]; + +extern struct ethhdr tcp4_eth_src; +extern struct ethhdr tcp6_eth_src; + +extern struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM]; +extern struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM]; + #endif /*TCP_BUF_H */
-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson