Just one comment here:
On Fri, 6 Sep 2024 18:04:47 +0200
Laurent Vivier
Add virtio.c and virtio.h that define the functions needed to manage virtqueues.
Signed-off-by: Laurent Vivier
--- Makefile | 4 +- util.h | 8 + virtio.c | 665 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ virtio.h | 185 ++++++++++++++++ 4 files changed, 860 insertions(+), 2 deletions(-) create mode 100644 virtio.c create mode 100644 virtio.h diff --git a/Makefile b/Makefile index 01fada45adc7..e9a154bdd718 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS) PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \ icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \ ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \ - tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c + tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c virtio.c QRAP_SRCS = qrap.c SRCS = $(PASST_SRCS) $(QRAP_SRCS)
@@ -57,7 +57,7 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \ flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \ lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \ siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \ - udp.h udp_flow.h util.h + udp.h udp_flow.h util.h virtio.h HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include
\nstruct tcp_info x = { .tcpi_snd_wnd = 0 }; diff --git a/util.h b/util.h index 1463c92153d5..0960903ccaec 100644 --- a/util.h +++ b/util.h @@ -134,6 +134,14 @@ static inline uint32_t ntohl_unaligned(const void *p) return ntohl(val); } +static inline void barrier(void) { __asm__ __volatile__("" ::: "memory"); } +#define smp_mb() do { barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); } while (0) +#define smp_mb_release() do { barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); } while (0) +#define smp_mb_acquire() do { barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); } while (0) + +#define smp_wmb() smp_mb_release() +#define smp_rmb() smp_mb_acquire() + #define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 8) int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags, void *arg); diff --git a/virtio.c b/virtio.c new file mode 100644 index 000000000000..380590afbca3 --- /dev/null +++ b/virtio.c @@ -0,0 +1,665 @@ +// SPDX-License-Identifier: GPL-2.0-or-later AND BSD-3-Clause +/* + * virtio API, vring and virtqueue functions definition + * + * Copyright Red Hat + * Author: Laurent Vivier
+ */ + +/* Some parts copied from QEMU subprojects/libvhost-user/libvhost-user.c + * originally licensed under the following terms: + * + * -- + * + * Copyright IBM, Corp. 2007 + * Copyright (c) 2016 Red Hat, Inc. + * + * Authors: + * Anthony Liguori + * Marc-André Lureau + * Victor Kaplansky + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + * Some parts copied from QEMU hw/virtio/virtio.c + * licensed under the following terms: + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * -- + * + * virtq_used_event() and virtq_avail_event() from + * https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html#... + * licensed under the following terms: + * + * -- + * + * This header is BSD licensed so anyone can use the definitions + * to implement compatible drivers/servers. + * + * Copyright 2007, 2009, IBM Corporation + * Copyright 2011, Red Hat, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ‘‘AS IS’’ AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "virtio.h" + +#define VIRTQUEUE_MAX_SIZE 1024 + +/** + * vu_gpa_to_va() - Translate guest physical address to our virtual address. + * @dev: Vhost-user device + * @plen: Physical length to map (input), capped to region (output) + * @guest_addr: Guest physical address + * + * Return: virtual address in our address space of the guest physical address + */ +static void *vu_gpa_to_va(struct vu_dev *dev, uint64_t *plen, uint64_t guest_addr) +{ + unsigned int i; + + if (*plen == 0) + return NULL; + + /* Find matching memory region. */ + for (i = 0; i < dev->nregions; i++) { + const struct vu_dev_region *r = &dev->regions[i]; + + if ((guest_addr >= r->gpa) && + (guest_addr < (r->gpa + r->size))) { + if ((guest_addr + *plen) > (r->gpa + r->size)) + *plen = r->gpa + r->size - guest_addr; + /* NOLINTNEXTLINE(performance-no-int-to-ptr) */ + return (void *)(guest_addr - r->gpa + r->mmap_addr + + r->mmap_offset); + } + } + + return NULL; +} + +/** + * vring_avail_flags() - Read the available ring flags + * @vq: Virtqueue + * + * Return: the available ring descriptor flags of the given virtqueue + */ +static inline uint16_t vring_avail_flags(const struct vu_virtq *vq) +{ + return le16toh(vq->vring.avail->flags); +} + +/** + * vring_avail_idx() - Read the available ring index + * @vq: Virtqueue + * + * Return: the available ring index of the given virtqueue + */ +static inline uint16_t vring_avail_idx(struct vu_virtq *vq) +{ + vq->shadow_avail_idx = le16toh(vq->vring.avail->idx); + + return vq->shadow_avail_idx; +} + +/** + * vring_avail_ring() - Read an available ring entry + * @vq: Virtqueue + * @i: Index of the entry to read + * + * Return: the ring entry content (head of the descriptor chain) + */ +static inline uint16_t vring_avail_ring(const struct vu_virtq *vq, int i) +{ + return le16toh(vq->vring.avail->ring[i]); +} + +/** + * virtq_used_event - Get location of used event indices + * (only with VIRTIO_F_EVENT_IDX) + * @vq Virtqueue + * + * Return: return the location of the used event index + */ +static inline uint16_t *virtq_used_event(const struct vu_virtq *vq) +{ + /* For backwards compat, used event index is at *end* of avail ring. */ + return &vq->vring.avail->ring[vq->vring.num]; +} + +/** + * vring_get_used_event() - Get the used event from the available ring + * @vq Virtqueue + * + * Return: the used event (available only if VIRTIO_RING_F_EVENT_IDX is set) + * used_event is a performant alternative where the driver + * specifies how far the device can progress before a notification + * is required. + */ +static inline uint16_t vring_get_used_event(const struct vu_virtq *vq) +{ + return le16toh(*virtq_used_event(vq)); +} + +/** + * virtqueue_get_head() - Get the head of the descriptor chain for a given + * index + * @vq: Virtqueue + * @idx: Available ring entry index + * @head: Head of the descriptor chain + */ +static void virtqueue_get_head(const struct vu_virtq *vq, + unsigned int idx, unsigned int *head) +{ + /* Grab the next descriptor number they're advertising, and increment + * the index we've seen. + */ + *head = vring_avail_ring(vq, idx % vq->vring.num); + + /* If their number is silly, that's a fatal mistake. */ + if (*head >= vq->vring.num) + die("vhost-user: Guest says index %u is available", *head); +} + +/** + * virtqueue_read_indirect_desc() - Copy virtio ring descriptors from guest + * memory + * @dev: Vhost-user device + * @desc: Destination address to copy the descriptors to + * @addr: Guest memory address to copy from + * @len: Length of memory to copy + * + * Return: -1 if there is an error, 0 otherwise + */ +static int virtqueue_read_indirect_desc(struct vu_dev *dev, struct vring_desc *desc, + uint64_t addr, size_t len) +{ + uint64_t read_len; + + if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) + return -1; + + if (len == 0) + return -1; + + while (len) { + const struct vring_desc *orig_desc; + + read_len = len; + orig_desc = vu_gpa_to_va(dev, &read_len, addr);
In case you missed this in my review of v3 (I'm not sure if it's a valid concern): -- Should we also return if read_len < sizeof(struct vring_desc) after this call? Can that ever happen, if we pick a particular value of addr so that it's almost at the end of a region? -- -- Stefano