struct pool, which represents a batch of packets includes values giving the buffer in which all the packets lie - or for vhost_user a link to the vu_dev_region array in which the packets sit. Originally that made sense because we stored each packet as an offset and length within that buffer. However dd143e389 ("packet: replace struct desc by struct iovec") replaced the offset and length with a struct iovec which can directly reference a packet anywhere in memory. This means we no longer need the buffer reference to interpret packets from the pool. So there's really no need to check where the packet sits. We can remove the buf reference and all checks associated with it. As a bonus this removes the special case for vhost-user. Similarly the old representation used a 16-bit length, so there were some checks that packets didn't exceed that. That's also no longer necessary with the struct iovec which uses a size_t length. I think under an unlikely set of circumstances it might have been possible to hit that 16-bit limit for a legitimate packet: other parts of the code place a limit of 65535 bytes on the L2 frame, however that doesn't include the length tag used by the qemu socket protocol. That tag *is* included in the packet as stored in the pool, however, meaning we could get a 65539 byte packet at this level. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- packet.c | 60 ---------------------------------------------------- packet.h | 36 +++++++++---------------------- tap.c | 45 +++++++++++++-------------------------- tap.h | 1 - vhost_user.c | 2 -- vu_common.c | 28 ------------------------ 6 files changed, 25 insertions(+), 147 deletions(-) diff --git a/packet.c b/packet.c index 03a11e6a..5bfa7304 100644 --- a/packet.c +++ b/packet.c @@ -22,46 +22,6 @@ #include "util.h" #include "log.h" -/** - * packet_check_range() - Check if a packet memory range is valid - * @p: Packet pool - * @offset: Offset of data range in packet descriptor - * @len: Length of desired data range - * @start: Start of the packet descriptor - * @func: For tracing: name of calling function - * @line: For tracing: caller line of function call - * - * Return: 0 if the range is valid, -1 otherwise - */ -static int packet_check_range(const struct pool *p, size_t offset, size_t len, - const char *start, const char *func, int line) -{ - if (p->buf_size == 0) { - int ret; - - ret = vu_packet_check_range((void *)p->buf, offset, len, start); - - if (ret == -1) - trace("cannot find region, %s:%i", func, line); - - return ret; - } - - if (start < p->buf) { - trace("packet start %p before buffer start %p, " - "%s:%i", (void *)start, (void *)p->buf, func, line); - return -1; - } - - if (start + len + offset > p->buf + p->buf_size) { - trace("packet offset plus length %zu from size %zu, " - "%s:%i", start - p->buf + len + offset, - p->buf_size, func, line); - return -1; - } - - return 0; -} /** * packet_add_do() - Add data as packet descriptor to given pool * @p: Existing pool @@ -81,14 +41,6 @@ void packet_add_do(struct pool *p, size_t len, const char *start, return; } - if (packet_check_range(p, 0, len, start, func, line)) - return; - - if (len > UINT16_MAX) { - trace("add packet length %zu, %s:%i", len, func, line); - return; - } - p->pkt[idx].iov_base = (void *)start; p->pkt[idx].iov_len = len; @@ -118,14 +70,6 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset, return NULL; } - if (len > UINT16_MAX) { - if (func) { - trace("packet data length %zu, %s:%i", - len, func, line); - } - return NULL; - } - if (len + offset > p->pkt[idx].iov_len) { if (func) { trace("data length %zu, offset %zu from length %zu, " @@ -135,10 +79,6 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset, return NULL; } - if (packet_check_range(p, offset, len, p->pkt[idx].iov_base, - func, line)) - return NULL; - if (left) *left = p->pkt[idx].iov_len - offset - len; diff --git a/packet.h b/packet.h index 85ee5508..98eb8812 100644 --- a/packet.h +++ b/packet.h @@ -7,25 +7,17 @@ #define PACKET_H /** - * struct pool - Generic pool of packets stored in a buffer - * @buf: Buffer storing packet descriptors, - * a struct vu_dev_region array for passt vhost-user mode - * @buf_size: Total size of buffer, - * 0 for passt vhost-user mode + * struct pool - Generic pool of packets stored in nmemory * @size: Number of usable descriptors for the pool * @count: Number of used descriptors for the pool * @pkt: Descriptors: see macros below */ struct pool { - char *buf; - size_t buf_size; size_t size; size_t count; struct iovec pkt[]; }; -int vu_packet_check_range(void *buf, size_t offset, size_t len, - const char *start); void packet_add_do(struct pool *p, size_t len, const char *start, const char *func, int line); void *packet_get_do(const struct pool *p, const size_t idx, @@ -42,35 +34,27 @@ void pool_flush(struct pool *p); #define packet_get_try(p, idx, offset, len, left) \ packet_get_do(p, idx, offset, len, left, NULL, 0) -#define PACKET_POOL_DECL(_name, _size, _buf) \ +#define PACKET_POOL_DECL(_name, _size) \ struct _name ## _t { \ - char *buf; \ - size_t buf_size; \ size_t size; \ size_t count; \ struct iovec pkt[_size]; \ } -#define PACKET_POOL_INIT_NOCAST(_size, _buf, _buf_size) \ +#define PACKET_POOL_INIT_NOCAST(_size) \ { \ - .buf_size = _buf_size, \ - .buf = _buf, \ .size = _size, \ } -#define PACKET_POOL(name, size, buf, buf_size) \ - PACKET_POOL_DECL(name, size, buf) name = \ - PACKET_POOL_INIT_NOCAST(size, buf, buf_size) +#define PACKET_POOL(name, size) \ + PACKET_POOL_DECL(name, size) name = \ + PACKET_POOL_INIT_NOCAST(size) -#define PACKET_INIT(name, size, buf, buf_size) \ - (struct name ## _t) PACKET_POOL_INIT_NOCAST(size, buf, buf_size) +#define PACKET_INIT(name, size) \ + (struct name ## _t) PACKET_POOL_INIT_NOCAST(size) -#define PACKET_POOL_NOINIT(name, size, buf) \ - PACKET_POOL_DECL(name, size, buf) name ## _storage; \ - static struct pool *name = (struct pool *)&name ## _storage - -#define PACKET_POOL_P(name, size, buf, buf_size) \ - PACKET_POOL(name ## _storage, size, buf, buf_size); \ +#define PACKET_POOL_P(name, size) \ + PACKET_POOL(name ## _storage, size); \ struct pool *name = (struct pool *)&name ## _storage #endif /* PACKET_H */ diff --git a/tap.c b/tap.c index cd32a901..68231f09 100644 --- a/tap.c +++ b/tap.c @@ -62,8 +62,8 @@ #include "vu_common.h" /* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */ -static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf); -static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf); +static PACKET_POOL_P(pool_tap4, TAP_MSGS); +static PACKET_POOL_P(pool_tap6, TAP_MSGS); #define TAP_SEQS 128 /* Different L4 tuples in one batch */ #define FRAGMENT_MSG_RATE 10 /* # seconds between fragment warnings */ @@ -462,7 +462,7 @@ void eth_update_mac(struct ethhdr *eh, memcpy(eh->h_source, eth_s, sizeof(eh->h_source)); } -PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf); +PACKET_POOL_DECL(pool_l4, UIO_MAXIOV); /** * struct l4_seq4_t - Message sequence for one protocol handler call, IPv4 @@ -618,7 +618,7 @@ resume: if (!eh) continue; if (ntohs(eh->h_proto) == ETH_P_ARP) { - PACKET_POOL_P(pkt, 1, in->buf, in->buf_size); + PACKET_POOL_P(pkt, 1); packet_add(pkt, l2len, (char *)eh); arp(c, pkt); @@ -658,7 +658,7 @@ resume: continue; if (iph->protocol == IPPROTO_ICMP) { - PACKET_POOL_P(pkt, 1, in->buf, in->buf_size); + PACKET_POOL_P(pkt, 1); if (c->no_icmp) continue; @@ -677,7 +677,7 @@ resume: continue; if (iph->protocol == IPPROTO_UDP) { - PACKET_POOL_P(pkt, 1, in->buf, in->buf_size); + PACKET_POOL_P(pkt, 1); packet_add(pkt, l2len, (char *)eh); if (dhcp(c, pkt)) @@ -829,7 +829,7 @@ resume: } if (proto == IPPROTO_ICMPV6) { - PACKET_POOL_P(pkt, 1, in->buf, in->buf_size); + PACKET_POOL_P(pkt, 1); if (c->no_icmp) continue; @@ -854,7 +854,7 @@ resume: uh = (struct udphdr *)l4h; if (proto == IPPROTO_UDP) { - PACKET_POOL_P(pkt, 1, in->buf, in->buf_size); + PACKET_POOL_P(pkt, 1); packet_add(pkt, l4len, l4h); @@ -1381,36 +1381,21 @@ static void tap_sock_tun_init(struct ctx *c) } /** - * tap_sock_update_pool() - Set the buffer base and size for the pool of packets - * @base: Buffer base - * @size Buffer size + * tap_backend_init() - Create and set up AF_UNIX socket or + * tuntap file descriptor + * @c: Execution context */ -void tap_sock_update_pool(void *base, size_t size) +void tap_backend_init(struct ctx *c) { int i; - pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS, base, size); - pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS, base, size); - for (i = 0; i < TAP_SEQS; i++) { - tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, base, size); - tap6_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, base, size); + tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV); + tap6_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV); } -} -/** - * tap_backend_init() - Create and set up AF_UNIX socket or - * tuntap file descriptor - * @c: Execution context - */ -void tap_backend_init(struct ctx *c) -{ - if (c->mode == MODE_VU) { - tap_sock_update_pool(NULL, 0); + if (c->mode == MODE_VU) vu_init(c); - } else { - tap_sock_update_pool(pkt_buf, sizeof(pkt_buf)); - } if (c->fd_tap != -1) { /* Passed as --fd */ ASSERT(c->one_off); diff --git a/tap.h b/tap.h index dfbd8b9e..9731f70c 100644 --- a/tap.h +++ b/tap.h @@ -70,7 +70,6 @@ void tap_handler_passt(struct ctx *c, uint32_t events, const struct timespec *now); int tap_sock_unix_open(char *sock_path); void tap_sock_reset(struct ctx *c); -void tap_sock_update_pool(void *base, size_t size); void tap_backend_init(struct ctx *c); void tap_flush_pools(void); void tap_handler(struct ctx *c, const struct timespec *now); diff --git a/vhost_user.c b/vhost_user.c index 4b8558fa..77d36766 100644 --- a/vhost_user.c +++ b/vhost_user.c @@ -494,8 +494,6 @@ static bool vu_set_mem_table_exec(struct vu_dev *vdev, ASSERT(vdev->nregions < VHOST_USER_MAX_RAM_SLOTS - 1); vdev->regions[vdev->nregions].mmap_addr = 0; - tap_sock_update_pool(vdev->regions, 0); - return false; } diff --git a/vu_common.c b/vu_common.c index 299b5a32..bb794193 100644 --- a/vu_common.c +++ b/vu_common.c @@ -18,34 +18,6 @@ #include "pcap.h" #include "vu_common.h" -/** - * vu_packet_check_range() - Check if a given memory zone is contained in - * a mapped guest memory region - * @buf: Array of the available memory regions - * @offset: Offset of data range in packet descriptor - * @size: Length of desired data range - * @start: Start of the packet descriptor - * - * Return: 0 if the zone is in a mapped memory region, -1 otherwise - */ -int vu_packet_check_range(void *buf, size_t offset, size_t len, - const char *start) -{ - struct vu_dev_region *dev_region; - - for (dev_region = buf; dev_region->mmap_addr; dev_region++) { - /* NOLINTNEXTLINE(performance-no-int-to-ptr) */ - char *m = (char *)(uintptr_t)dev_region->mmap_addr; - - if (m <= start && - start + offset + len <= m + dev_region->mmap_offset + - dev_region->size) - return 0; - } - - return -1; -} - /** * vu_init_elem() - initialize an array of virtqueue elements with 1 iov in each * @elem: Array of virtqueue elements to initialize -- 2.47.1