Add virtio.c and virtio.h that define the functions
needed
to manage virtqueues.
Signed-off-by: Laurent Vivier <lvivier(a)redhat.com>
---
Makefile | 4 +-
util.h | 8 +
virtio.c | 665 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
virtio.h | 185 ++++++++++++++++
4 files changed, 860 insertions(+), 2 deletions(-)
create mode 100644 virtio.c
create mode 100644 virtio.h
diff --git a/Makefile b/Makefile
index 01fada45adc7..e9a154bdd718 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
- tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c
+ tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c virtio.c
QRAP_SRCS = qrap.c
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
@@ -57,7 +57,7 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h
fwd.h \
flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
- udp.h udp_flow.h util.h
+ udp.h udp_flow.h util.h virtio.h
HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
diff --git a/util.h b/util.h
index 1463c92153d5..0960903ccaec 100644
--- a/util.h
+++ b/util.h
@@ -134,6 +134,14 @@ static inline uint32_t ntohl_unaligned(const void *p)
return ntohl(val);
}
+static inline void barrier(void) { __asm__ __volatile__("" :::
"memory"); }
+#define smp_mb() do { barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); } while (0)
+#define smp_mb_release() do { barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); }
while (0)
+#define smp_mb_acquire() do { barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); }
while (0)
+
+#define smp_wmb() smp_mb_release()
+#define smp_rmb() smp_mb_acquire()
+
#define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 8)
int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
void *arg);
diff --git a/virtio.c b/virtio.c
new file mode 100644
index 000000000000..380590afbca3
--- /dev/null
+++ b/virtio.c
@@ -0,0 +1,665 @@
+// SPDX-License-Identifier: GPL-2.0-or-later AND BSD-3-Clause
+/*
+ * virtio API, vring and virtqueue functions definition
+ *
+ * Copyright Red Hat
+ * Author: Laurent Vivier <lvivier(a)redhat.com>
+ */
+
+/* Some parts copied from QEMU subprojects/libvhost-user/libvhost-user.c
+ * originally licensed under the following terms:
+ *
+ * --
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Anthony Liguori <aliguori(a)us.ibm.com>
+ * Marc-André Lureau <mlureau(a)redhat.com>
+ * Victor Kaplansky <victork(a)redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ *
+ * Some parts copied from QEMU hw/virtio/virtio.c
+ * licensed under the following terms:
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ * Anthony Liguori <aliguori(a)us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * --
+ *
+ * virtq_used_event() and virtq_avail_event() from
+ *
https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html…
+ * licensed under the following terms:
+ *
+ * --
+ *
+ * This header is BSD licensed so anyone can use the definitions
+ * to implement compatible drivers/servers.
+ *
+ * Copyright 2007, 2009, IBM Corporation
+ * Copyright 2011, Red Hat, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ‘‘AS IS’’ AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <endian.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+
+#include "util.h"
+#include "virtio.h"
+
+#define VIRTQUEUE_MAX_SIZE 1024
+
+/**
+ * vu_gpa_to_va() - Translate guest physical address to our virtual address.
+ * @dev: Vhost-user device
+ * @plen: Physical length to map (input), capped to region (output)
+ * @guest_addr: Guest physical address
+ *
+ * Return: virtual address in our address space of the guest physical address
+ */
+static void *vu_gpa_to_va(struct vu_dev *dev, uint64_t *plen, uint64_t guest_addr)
+{
+ unsigned int i;
+
+ if (*plen == 0)
+ return NULL;
+
+ /* Find matching memory region. */
+ for (i = 0; i < dev->nregions; i++) {
+ const struct vu_dev_region *r = &dev->regions[i];
+
+ if ((guest_addr >= r->gpa) &&
+ (guest_addr < (r->gpa + r->size))) {
+ if ((guest_addr + *plen) > (r->gpa + r->size))
+ *plen = r->gpa + r->size - guest_addr;
+ /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+ return (void *)(guest_addr - r->gpa + r->mmap_addr +
+ r->mmap_offset);
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * vring_avail_flags() - Read the available ring flags
+ * @vq: Virtqueue
+ *
+ * Return: the available ring descriptor flags of the given virtqueue
+ */
+static inline uint16_t vring_avail_flags(const struct vu_virtq *vq)
+{
+ return le16toh(vq->vring.avail->flags);
+}
+
+/**
+ * vring_avail_idx() - Read the available ring index
+ * @vq: Virtqueue
+ *
+ * Return: the available ring index of the given virtqueue
+ */
+static inline uint16_t vring_avail_idx(struct vu_virtq *vq)
+{
+ vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
+
+ return vq->shadow_avail_idx;
+}
+
+/**
+ * vring_avail_ring() - Read an available ring entry
+ * @vq: Virtqueue
+ * @i: Index of the entry to read
+ *
+ * Return: the ring entry content (head of the descriptor chain)
+ */
+static inline uint16_t vring_avail_ring(const struct vu_virtq *vq, int i)
+{
+ return le16toh(vq->vring.avail->ring[i]);
+}
+
+/**
+ * virtq_used_event - Get location of used event indices
+ * (only with VIRTIO_F_EVENT_IDX)
+ * @vq Virtqueue
+ *
+ * Return: return the location of the used event index
+ */
+static inline uint16_t *virtq_used_event(const struct vu_virtq *vq)
+{
+ /* For backwards compat, used event index is at *end* of avail ring. */
+ return &vq->vring.avail->ring[vq->vring.num];
+}
+
+/**
+ * vring_get_used_event() - Get the used event from the available ring
+ * @vq Virtqueue
+ *
+ * Return: the used event (available only if VIRTIO_RING_F_EVENT_IDX is set)
+ * used_event is a performant alternative where the driver
+ * specifies how far the device can progress before a notification
+ * is required.
+ */
+static inline uint16_t vring_get_used_event(const struct vu_virtq *vq)
+{
+ return le16toh(*virtq_used_event(vq));
+}
+
+/**
+ * virtqueue_get_head() - Get the head of the descriptor chain for a given
+ * index
+ * @vq: Virtqueue
+ * @idx: Available ring entry index
+ * @head: Head of the descriptor chain
+ */
+static void virtqueue_get_head(const struct vu_virtq *vq,
+ unsigned int idx, unsigned int *head)
+{
+ /* Grab the next descriptor number they're advertising, and increment
+ * the index we've seen.
+ */
+ *head = vring_avail_ring(vq, idx % vq->vring.num);
+
+ /* If their number is silly, that's a fatal mistake. */
+ if (*head >= vq->vring.num)
+ die("vhost-user: Guest says index %u is available", *head);
+}
+
+/**
+ * virtqueue_read_indirect_desc() - Copy virtio ring descriptors from guest
+ * memory
+ * @dev: Vhost-user device
+ * @desc: Destination address to copy the descriptors to
+ * @addr: Guest memory address to copy from
+ * @len: Length of memory to copy
+ *
+ * Return: -1 if there is an error, 0 otherwise
+ */
+static int virtqueue_read_indirect_desc(struct vu_dev *dev, struct vring_desc *desc,
+ uint64_t addr, size_t len)
+{
+ uint64_t read_len;
+
+ if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc)))
+ return -1;
+
+ if (len == 0)
+ return -1;
+
+ while (len) {
+ const struct vring_desc *orig_desc;
+
+ read_len = len;
+ orig_desc = vu_gpa_to_va(dev, &read_len, addr);
In case you missed this in my review of v3 (I'm not sure if it's a
valid concern):
--
Should we also return if read_len < sizeof(struct vring_desc) after
this call? Can that ever happen, if we pick a particular value of addr
so that it's almost at the end of a region?
--
--
Stefano