Currently we attempt to size pool_tap[46] so they have room for the maximum possible number of packets that could fit in pkt_buf, TAP_MSGS. However, the calculation isn't quite correct: TAP_MSGS is based on ETH_ZLEN (60) as the minimum possible L2 frame size. But, we don't enforce that L2 frames are at least ETH_ZLEN when we receive them from the tap backend, and since we're dealing with virtual interfaces we don't have the physical Ethernet limitations requiring that length. Indeed it is possible to generate a legitimate frame smaller than that (e.g. a zero-payload UDP/IPv4 frame on the 'pasta' backend is only 42 bytes long). It's also unclear if this limit is sufficient for vhost-user which isn't limited by the size of pkt_buf as the other modes are. We could attempt to correct the calculation, but that would leave us with even larger arrays, which in practice rarely accumulate more than a handful of packets. So, instead, put an arbitrary cap on the number of packets we can put in a batch, and if we run out of space, process and flush the batch. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- packet.c | 13 ++++++++++++- packet.h | 3 +++ passt.h | 2 -- tap.c | 18 +++++++++++++++--- tap.h | 3 ++- vu_common.c | 3 ++- 6 files changed, 34 insertions(+), 8 deletions(-) diff --git a/packet.c b/packet.c index 5bfa7304..b68580cc 100644 --- a/packet.c +++ b/packet.c @@ -22,6 +22,17 @@ #include "util.h" #include "log.h" +/** + * pool_full() - Is a packet pool full? + * @p: Pointer to packet pool + * + * Return: true if the pool is full, false if more packets can be added + */ +bool pool_full(const struct pool *p) +{ + return p->count >= p->size; +} + /** * packet_add_do() - Add data as packet descriptor to given pool * @p: Existing pool @@ -35,7 +46,7 @@ void packet_add_do(struct pool *p, size_t len, const char *start, { size_t idx = p->count; - if (idx >= p->size) { + if (pool_full(p)) { trace("add packet index %zu to pool with size %zu, %s:%i", idx, p->size, func, line); return; diff --git a/packet.h b/packet.h index 98eb8812..3618f213 100644 --- a/packet.h +++ b/packet.h @@ -6,6 +6,8 @@ #ifndef PACKET_H #define PACKET_H +#include <stdbool.h> + /** * struct pool - Generic pool of packets stored in nmemory * @size: Number of usable descriptors for the pool @@ -23,6 +25,7 @@ void packet_add_do(struct pool *p, size_t len, const char *start, void *packet_get_do(const struct pool *p, const size_t idx, size_t offset, size_t len, size_t *left, const char *func, int line); +bool pool_full(const struct pool *p); void pool_flush(struct pool *p); #define packet_add(p, len, start) \ diff --git a/passt.h b/passt.h index 0dd4efa0..81b2787f 100644 --- a/passt.h +++ b/passt.h @@ -70,8 +70,6 @@ static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data), #define TAP_BUF_BYTES \ ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE) -#define TAP_MSGS \ - DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t)) #define PKT_BUF_BYTES MAX(TAP_BUF_BYTES, 0) extern char pkt_buf [PKT_BUF_BYTES]; diff --git a/tap.c b/tap.c index 68231f09..42370a26 100644 --- a/tap.c +++ b/tap.c @@ -61,6 +61,8 @@ #include "vhost_user.h" #include "vu_common.h" +#define TAP_MSGS 256 + /* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */ static PACKET_POOL_P(pool_tap4, TAP_MSGS); static PACKET_POOL_P(pool_tap6, TAP_MSGS); @@ -966,8 +968,10 @@ void tap_handler(struct ctx *c, const struct timespec *now) * @c: Execution context * @l2len: Total L2 packet length * @p: Packet buffer + * @now: Current timestamp */ -void tap_add_packet(struct ctx *c, ssize_t l2len, char *p) +void tap_add_packet(struct ctx *c, ssize_t l2len, char *p, + const struct timespec *now) { const struct ethhdr *eh; @@ -983,9 +987,17 @@ void tap_add_packet(struct ctx *c, ssize_t l2len, char *p) switch (ntohs(eh->h_proto)) { case ETH_P_ARP: case ETH_P_IP: + if (pool_full(pool_tap4)) { + tap4_handler(c, pool_tap4, now); + pool_flush(pool_tap4); + } packet_add(pool_tap4, l2len, p); break; case ETH_P_IPV6: + if (pool_full(pool_tap6)) { + tap6_handler(c, pool_tap6, now); + pool_flush(pool_tap6); + } packet_add(pool_tap6, l2len, p); break; default: @@ -1066,7 +1078,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now) p += sizeof(uint32_t); n -= sizeof(uint32_t); - tap_add_packet(c, l2len, p); + tap_add_packet(c, l2len, p, now); p += l2len; n -= l2len; @@ -1129,7 +1141,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now) len > (ssize_t)ETH_MAX_MTU) continue; - tap_add_packet(c, len, pkt_buf + n); + tap_add_packet(c, len, pkt_buf + n, now); } tap_handler(c, now); diff --git a/tap.h b/tap.h index 9731f70c..c9acf860 100644 --- a/tap.h +++ b/tap.h @@ -73,6 +73,7 @@ void tap_sock_reset(struct ctx *c); void tap_backend_init(struct ctx *c); void tap_flush_pools(void); void tap_handler(struct ctx *c, const struct timespec *now); -void tap_add_packet(struct ctx *c, ssize_t l2len, char *p); +void tap_add_packet(struct ctx *c, ssize_t l2len, char *p, + const struct timespec *now); #endif /* TAP_H */ diff --git a/vu_common.c b/vu_common.c index bb794193..38511230 100644 --- a/vu_common.c +++ b/vu_common.c @@ -157,7 +157,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index, tap_add_packet(vdev->context, elem[count].out_sg[0].iov_len - hdrlen, - (char *)elem[count].out_sg[0].iov_base + hdrlen); + (char *)elem[count].out_sg[0].iov_base + hdrlen, + now); count++; } tap_handler(vdev->context, now); -- 2.47.1