Re: [PATCH v2 3/4] vhost-user: introduce vhost-user API

19 Jul 2024

On Fri, 12 Jul 2024 17:32:43 +0200
Laurent Vivier  wrote:
...
Add vhost_user.c and vhost_user.h that define the functions needed
to implement vhost-user backend.
Signed-off-by: Laurent Vivier 
---
 Makefile     |    4 +-
 iov.c        |    1 -
 vhost_user.c | 1267 ++++++++++++++++++++++++++++++++++++++++++++++++++
 vhost_user.h |  197 ++++++++
 virtio.c     |    5 -
 virtio.h     |    2 +-
 6 files changed, 1467 insertions(+), 9 deletions(-)
 create mode 100644 vhost_user.c
 create mode 100644 vhost_user.h

diff --git a/Makefile b/Makefile
index 39613a7cf1f2..b2da6ad62103 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
 PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
  icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
  ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
-	tcp_buf.c tcp_splice.c udp.c util.c virtio.c
+	tcp_buf.c tcp_splice.c udp.c util.c vhost_user.c virtio.c
 QRAP_SRCS = qrap.c
 SRCS = $(PASST_SRCS) $(QRAP_SRCS)
@@ -57,7 +57,7 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
  flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
  lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
  siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
-	udp.h util.h virtio.h
+	udp.h util.h vhost_user.h virtio.h
 HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include \nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
diff --git a/iov.c b/iov.c
index 3f9e229a305f..3741db21790f 100644
--- a/iov.c
+++ b/iov.c
@@ -68,7 +68,6 @@ size_t iov_skip_bytes(const struct iovec *iov, size_t n,
  *
  * Returns:    The number of bytes successfully copied.
  */
-/* cppcheck-suppress unusedFunction */
 size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
          size_t offset, const void *buf, size_t bytes)
 {
diff --git a/vhost_user.c b/vhost_user.c
new file mode 100644
index 000000000000..23ec4326995d
--- /dev/null
+++ b/vhost_user.c
@@ -0,0 +1,1267 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: Laurent Vivier 
+ *
+ * vhost-user API, command management and virtio interface
+ */
+/* some parts from QEMU subprojects/libvhost-user/libvhost-user.c */
Same here about attribution.
...
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "util.h"
+#include "passt.h"
+#include "tap.h"
+#include "vhost_user.h"
+
+/* vhost-user version we are compatible with */
+#define VHOST_USER_VERSION 1
+
+/**
+ * vu_print_capabilities() - print vhost-user capabilities
+ * 			     this is part of the vhost-user backend
+ * 			     convention.
+ */
+/* cppcheck-suppress unusedFunction */
+void vu_print_capabilities(void)
+{
+	printf("{\n");
+	printf("  \"type\": \"net\"\n");
+	printf("}\n");
I think this should be info() (added bonus: it adds newlines by itself).
...
+	exit(EXIT_SUCCESS);
+}
+
+/**
+ * vu_request_to_string() - convert a vhost-user request number to its name
+ * @req:	request number
+ *
+ * Return: the name of request number
+ */
+static const char *vu_request_to_string(unsigned int req)
+{
+	if (req < VHOST_USER_MAX) {
+#define REQ(req) [req] = #req
+		static const char * const vu_request_str[] = {
+			REQ(VHOST_USER_NONE),
+			REQ(VHOST_USER_GET_FEATURES),
+			REQ(VHOST_USER_SET_FEATURES),
+			REQ(VHOST_USER_SET_OWNER),
+			REQ(VHOST_USER_RESET_OWNER),
+			REQ(VHOST_USER_SET_MEM_TABLE),
+			REQ(VHOST_USER_SET_LOG_BASE),
+			REQ(VHOST_USER_SET_LOG_FD),
+			REQ(VHOST_USER_SET_VRING_NUM),
+			REQ(VHOST_USER_SET_VRING_ADDR),
+			REQ(VHOST_USER_SET_VRING_BASE),
+			REQ(VHOST_USER_GET_VRING_BASE),
+			REQ(VHOST_USER_SET_VRING_KICK),
+			REQ(VHOST_USER_SET_VRING_CALL),
+			REQ(VHOST_USER_SET_VRING_ERR),
+			REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
+			REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
+			REQ(VHOST_USER_GET_QUEUE_NUM),
+			REQ(VHOST_USER_SET_VRING_ENABLE),
+			REQ(VHOST_USER_SEND_RARP),
+			REQ(VHOST_USER_NET_SET_MTU),
+			REQ(VHOST_USER_SET_BACKEND_REQ_FD),
+			REQ(VHOST_USER_IOTLB_MSG),
+			REQ(VHOST_USER_SET_VRING_ENDIAN),
+			REQ(VHOST_USER_GET_CONFIG),
+			REQ(VHOST_USER_SET_CONFIG),
+			REQ(VHOST_USER_POSTCOPY_ADVISE),
+			REQ(VHOST_USER_POSTCOPY_LISTEN),
+			REQ(VHOST_USER_POSTCOPY_END),
+			REQ(VHOST_USER_GET_INFLIGHT_FD),
+			REQ(VHOST_USER_SET_INFLIGHT_FD),
+			REQ(VHOST_USER_GPU_SET_SOCKET),
+			REQ(VHOST_USER_VRING_KICK),
+			REQ(VHOST_USER_GET_MAX_MEM_SLOTS),
+			REQ(VHOST_USER_ADD_MEM_REG),
+			REQ(VHOST_USER_REM_MEM_REG),
+			REQ(VHOST_USER_MAX),
+		};
+#undef REQ
+		return vu_request_str[req];
+	}
+
+	return "unknown";
+}
+
+/**
+ * qva_to_va() -  Translate front-end (QEMU) virtual address to our virtual
+ * 		  address.
No period needed at the end of the description, it's not a proper
sentence.
...
+ * @dev:		Vhost-user device
+ * @qemu_addr:		front-end userspace address
+ *
+ * Return: the memory address in our process virtual address space.
+ */
+static void *qva_to_va(struct vu_dev *dev, uint64_t qemu_addr)
This whole function is _almost_ the same as vu_gpa_to_va() from 2/4...
could we just use/adjust that one with, say, 'plen' set to NULL?
...
+{
+	unsigned int i;
+
+	/* Find matching memory region.  */
+	for (i = 0; i < dev->nregions; i++) {
+		const struct vu_dev_region *r = &dev->regions[i];
+
+		if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
+			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+			return (void *)(qemu_addr - r->qva + r->mmap_addr +
+					r->mmap_offset);
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * vmsg_close_fds() - Close all file descriptors of a given message
+ * @vmsg:	Vhost-user message with the list of the file descriptors
+ */
+static void vmsg_close_fds(const struct vhost_user_msg *vmsg)
+{
+	int i;
+
+	for (i = 0; i < vmsg->fd_num; i++)
+		close(vmsg->fds[i]);
+}
+
+/**
+ * vu_remove_watch() - Remove a file descriptor from an our passt epoll
+ * 		       file descriptor
+ * @vdev:	Vhost-user device
+ * @fd:		file descriptor to remove
+ */
+static void vu_remove_watch(const struct vu_dev *vdev, int fd)
+{
+	(void)vdev;
+	(void)fd;
+}
+
+/**
+ * vmsg_set_reply_u64() - Set reply payload.u64 and clear request flags
+ * 			  and fd_num
+ * @vmsg:	Vhost-user message
+ * @val:	64bit value to reply
+ */
+static void vmsg_set_reply_u64(struct vhost_user_msg *vmsg, uint64_t val)
+{
+	vmsg->hdr.flags = 0; /* defaults will be set by vu_send_reply() */
+	vmsg->hdr.size = sizeof(vmsg->payload.u64);
+	vmsg->payload.u64 = val;
+	vmsg->fd_num = 0;
+}
+
+/**
+ * vu_message_read_default() - Read incoming vhost-user message from the
+ * 			       front-end
+ * @conn_fd:	Vhost-user command socket
+ * @vmsg:	Vhost-user message
+ *
+ * Return: -1 there is an error,
+ *          0 if recvmsg() has been interrupted,
+ *          1 if a message has been received
+ */
+static int vu_message_read_default(int conn_fd, struct vhost_user_msg *vmsg)
+{
+	char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS *
+		     sizeof(int))] = { 0 };
+	struct iovec iov = {
+		.iov_base = (char *)vmsg,
+		.iov_len = VHOST_USER_HDR_SIZE,
+	};
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = control,
+		.msg_controllen = sizeof(control),
+	};
+	size_t fd_size;
+	struct cmsghdr *cmsg;
+	ssize_t ret, sz_payload;
+
+	ret = recvmsg(conn_fd, &msg, MSG_DONTWAIT);
+	if (ret < 0) {
+		if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
+			return 0;
+		return -1;
+	}
+
+	vmsg->fd_num = 0;
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
+	     cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		if (cmsg->cmsg_level == SOL_SOCKET &&
+		    cmsg->cmsg_type == SCM_RIGHTS) {
+			fd_size = cmsg->cmsg_len - CMSG_LEN(0);
+			vmsg->fd_num = fd_size / sizeof(int);
+			memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
Coverity Scan is not really happy about using fd_size as received by
recvmsg() without sanitising it. This isn't really security-relevant
because if the hypervisor wants to affect its connectivity, it can
already do so, but it would be nice to make this robust.

I guess you could check that it doesn't exceed
VHOST_MEMORY_BASELINE_NREGIONS? I see you have this as assert() in
vu_message_write().
...
+			break;
+		}
+	}
+
+	sz_payload = vmsg->hdr.size;
+	if ((size_t)sz_payload > sizeof(vmsg->payload)) {
+		vu_panic("Error: too big message request: %d,"
Same in this patch about using die() instead.
...
+			 " size: vmsg->size: %zd, "
+			 "while sizeof(vmsg->payload) = %zu",
+			 vmsg->hdr.request, sz_payload, sizeof(vmsg->payload));
+	}
+
+	if (sz_payload) {
+		do {
+			ret = recv(conn_fd, &vmsg->payload, sz_payload, 0);
+		} while (ret < 0 && (errno == EINTR || errno == EAGAIN));
+
+		if (ret < sz_payload)
+			vu_panic("Error while reading: %s",
+				 strerror(errno));
+	}
+
+	return 1;
+}
+
+/**
+ * vu_message_write() - send a message to the front-end
+ * @conn_fd:	Vhost-user command socket
+ * @vmsg:	Vhost-user message
+ *
+ * #syscalls:vu sendmsg
+ */
+static void vu_message_write(int conn_fd, struct vhost_user_msg *vmsg)
+{
+	int rc;
+	const uint8_t *p = (uint8_t *)vmsg;
+	char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = { 0 };
+	struct iovec iov = {
+		.iov_base = (char *)vmsg,
+		.iov_len = VHOST_USER_HDR_SIZE,
+	};
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = control,
+	};
+
+	memset(control, 0, sizeof(control));
+	assert(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS);
+	if (vmsg->fd_num > 0) {
+		size_t fdsize = vmsg->fd_num * sizeof(int);
+		struct cmsghdr *cmsg;
+
+		msg.msg_controllen = CMSG_SPACE(fdsize);
+		cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize);
+	} else {
+		msg.msg_controllen = 0;
+	}
+
+	do {
+		rc = sendmsg(conn_fd, &msg, 0);
+	} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+	if (vmsg->hdr.size) {
+		do {
+			rc = write(conn_fd, p + VHOST_USER_HDR_SIZE,
+				   vmsg->hdr.size);
+		} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+	}
+
+	if (rc <= 0)
+		vu_panic("Error while writing: %s", strerror(errno));
+}
+
+/**
+ * vu_send_reply() - Update message flags and send it to front-end
+ * @conn_fd:	Vhost-user command socket
+ * @vmsg:	Vhost-user message
+ */
+static void vu_send_reply(int conn_fd, struct vhost_user_msg *msg)
+{
+	msg->hdr.flags &= ~VHOST_USER_VERSION_MASK;
+	msg->hdr.flags |= VHOST_USER_VERSION;
+	msg->hdr.flags |= VHOST_USER_REPLY_MASK;
+
+	vu_message_write(conn_fd, msg);
+}
+
+/**
+ * vu_get_features_exec() - Provide back-end features bitmask to front-end
+ * @vmsg:	Vhost-user message
+ *
+ * Return: true as a reply is requested
+ */
+static bool vu_get_features_exec(struct vhost_user_msg *msg)
+{
+	uint64_t features =
+		1ULL << VIRTIO_F_VERSION_1 |
+		1ULL << VIRTIO_NET_F_MRG_RXBUF |
+		1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+
+	vmsg_set_reply_u64(msg, features);
+
+	debug("Sending back to guest u64: 0x%016"PRIx64, msg->payload.u64);
+
+	return true;
+}
+
+/**
+ * vu_set_enable_all_rings() - Enable/disable all the virqueues
s/virqueues/virtqueues/
...
+ * @vdev:	Vhost-user device
+ * @enabled:	New virtqueues state
Perhaps 'enable' (imperative) instead of 'enabled' (indicative), so that
it's clear it's the (new) state we want?
...
+ */
+static void vu_set_enable_all_rings(struct vu_dev *vdev, bool enabled)
+{
+	uint16_t i;
+
+	for (i = 0; i < VHOST_USER_MAX_QUEUES; i++)
+		vdev->vq[i].enable = enabled;
+}
+
+/**
+ * vu_set_features_exec() - Enable features of the back-end
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_features_exec(struct vu_dev *vdev,
+				 struct vhost_user_msg *msg)
+{
+	debug("u64: 0x%016"PRIx64, msg->payload.u64);
+
+	vdev->features = msg->payload.u64;
+	/*
No need to have an extra line on top: /* We only support ...
...
+	 * We only support devices conforming to VIRTIO 1.0 or
+	 * later
+	 */
+	if (!vu_has_feature(vdev, VIRTIO_F_VERSION_1))
+		vu_panic("virtio legacy devices aren't supported by passt");
+
+	if (!vu_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES))
+		vu_set_enable_all_rings(vdev, true);
+
+	/* virtio-net features */
+
+	if (vu_has_feature(vdev, VIRTIO_F_VERSION_1) ||
+	    vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
+		vdev->hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	} else {
+		vdev->hdrlen = sizeof(struct virtio_net_hdr);
+	}
+
+	return false;
+}
+
+/**
+ * vu_set_owner_exec() - Session start flag, do nothing in our case
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_owner_exec(void)
+{
+	return false;
+}
+
+/**
+ * map_ring() - Convert ring front-end (QEMU) addresses to our process
+ * 		virtual address space.
+ * @vdev:	Vhost-user device
+ * @vq:		Virtqueue
+ *
+ * Return: true if ring cannot be mapped to our address space
+ */
+static bool map_ring(struct vu_dev *vdev, struct vu_virtq *vq)
+{
+	vq->vring.desc = qva_to_va(vdev, vq->vra.desc_user_addr);
+	vq->vring.used = qva_to_va(vdev, vq->vra.used_user_addr);
+	vq->vring.avail = qva_to_va(vdev, vq->vra.avail_user_addr);
+
+	debug("Setting virtq addresses:");
+	debug("    vring_desc  at %p", (void *)vq->vring.desc);
+	debug("    vring_used  at %p", (void *)vq->vring.used);
+	debug("    vring_avail at %p", (void *)vq->vring.avail);
+
+	return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
+}
+
+/**
+ * vu_packet_check_range() - Check if a given memory zone is contained in
+ * 			     a mapped guest memory region
+ * @buf:	Array of the available memory regions
+ * @offset:	Offset of data range in packet descriptor
+ * @size:	Length of desired data range
+ * @start:	Start of the packet descriptor
+ * 
+ * Return: 0 if the zone in a mapped memory region, -1 otherwise
+ */
+/* cppcheck-suppress unusedFunction */
+int vu_packet_check_range(void *buf, size_t offset, size_t len,
+			  const char *start)
+{
+	struct vu_dev_region *dev_region;
+
+	for (dev_region = buf; dev_region->mmap_addr; dev_region++) {
+		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+		char *m = (char *)dev_region->mmap_addr;
+
+		if (m <= start &&
+		    start + offset + len < m + dev_region->mmap_offset +
+					       dev_region->size)
+			return 0;
+	}
+
+	return -1;
+}
+
+/**
+ * vu_set_mem_table_exec() - Sets the memory map regions to be able to
+ * 			     translate the vring addresses. 
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ *
+ * #syscalls:vu mmap munmap
+ */
+static bool vu_set_mem_table_exec(struct vu_dev *vdev,
+				  struct vhost_user_msg *msg)
+{
+	unsigned int i;
+	struct vhost_user_memory m = msg->payload.memory, *memory = &m;
+
+	for (i = 0; i < vdev->nregions; i++) {
+		struct vu_dev_region *r = &vdev->regions[i];
+		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+		void *mm = (void *)r->mmap_addr;
+
+		if (mm)
+			munmap(mm, r->size + r->mmap_offset);
+	}
+	vdev->nregions = memory->nregions;
+
+	debug("Nregions: %u", memory->nregions);
+	for (i = 0; i < vdev->nregions; i++) {
+		void *mmap_addr;
+		struct vhost_user_memory_region *msg_region = &memory->regions[i];
+		struct vu_dev_region *dev_region = &vdev->regions[i];
+
+		debug("Region %d", i);
+		debug("    guest_phys_addr: 0x%016"PRIx64,
+		      msg_region->guest_phys_addr);
+		debug("    memory_size:     0x%016"PRIx64,
+		      msg_region->memory_size);
+		debug("    userspace_addr   0x%016"PRIx64,
+		      msg_region->userspace_addr);
+		debug("    mmap_offset      0x%016"PRIx64,
+		      msg_region->mmap_offset);
+
+		dev_region->gpa = msg_region->guest_phys_addr;
+		dev_region->size = msg_region->memory_size;
+		dev_region->qva = msg_region->userspace_addr;
+		dev_region->mmap_offset = msg_region->mmap_offset;
+
+		/* We don't use offset argument of mmap() since the
+		 * mapped address has to be page aligned, and we use huge
+		 * pages.
+		 */
+		mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
+				 PROT_READ | PROT_WRITE, MAP_SHARED |
+				 MAP_NORESERVE, msg->fds[i], 0);
+
+		if (mmap_addr == MAP_FAILED)
+			vu_panic("region mmap error: %s", strerror(errno));
+
+		dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
+		debug("    mmap_addr:       0x%016"PRIx64,
+		      dev_region->mmap_addr);
+
+		close(msg->fds[i]);
+	}
+
+	for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) {
+		if (vdev->vq[i].vring.desc) {
+			if (map_ring(vdev, &vdev->vq[i]))
+				vu_panic("remapping queue %d during setmemtable", i);
+		}
+	}
+
+	return false;
+}
+
+/**
+ * vu_set_vring_num_exec() - Set the size of the queue (vring size)
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_vring_num_exec(struct vu_dev *vdev,
+				  struct vhost_user_msg *msg)
+{
+	unsigned int idx = msg->payload.state.index;
+	unsigned int num = msg->payload.state.num;
+
+	debug("State.index: %u", idx);
+	debug("State.num:   %u", num);
+	vdev->vq[idx].vring.num = num;
+
+	return false;
+}
+
+/**
+ * vu_set_vring_addr_exec() - Set the addresses of the vring
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_vring_addr_exec(struct vu_dev *vdev,
+				   struct vhost_user_msg *msg)
+{
+	struct vhost_vring_addr addr = msg->payload.addr, *vra = &addr;
+	struct vu_virtq *vq = &vdev->vq[vra->index];
+
+	debug("vhost_vring_addr:");
+	debug("    index:  %d", vra->index);
+	debug("    flags:  %d", vra->flags);
+	debug("    desc_user_addr:   0x%016" PRIx64, (uint64_t)vra->desc_user_addr);
+	debug("    used_user_addr:   0x%016" PRIx64, (uint64_t)vra->used_user_addr);
+	debug("    avail_user_addr:  0x%016" PRIx64, (uint64_t)vra->avail_user_addr);
+	debug("    log_guest_addr:   0x%016" PRIx64, (uint64_t)vra->log_guest_addr);
+
+	vq->vra = *vra;
+	vq->vring.flags = vra->flags;
+	vq->vring.log_guest_addr = vra->log_guest_addr;
+
+	if (map_ring(vdev, vq))
+		vu_panic("Invalid vring_addr message");
+
+	vq->used_idx = le16toh(vq->vring.used->idx);
+
+	if (vq->last_avail_idx != vq->used_idx) {
+		debug("Last avail index != used index: %u != %u",
+		      vq->last_avail_idx, vq->used_idx);
+	}
+
+	return false;
+}
+/**
+ * vu_set_vring_base_exec() - Sets the next index to use for descriptors
+ * 			      in this vring
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_vring_base_exec(struct vu_dev *vdev,
+				   struct vhost_user_msg *msg)
+{
+	unsigned int idx = msg->payload.state.index;
+	unsigned int num = msg->payload.state.num;
+
+	debug("State.index: %u", idx);
+	debug("State.num:   %u", num);
+	vdev->vq[idx].shadow_avail_idx = vdev->vq[idx].last_avail_idx = num;
+
+	return false;
+}
+
+/**
+ * vu_get_vring_base_exec() - Stops the vring and returns the current
+ * 			      descriptor index or indices
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as a reply is requested
+ */
+static bool vu_get_vring_base_exec(struct vu_dev *vdev,
+				   struct vhost_user_msg *msg)
+{
+	unsigned int idx = msg->payload.state.index;
+
+	debug("State.index: %u", idx);
+	msg->payload.state.num = vdev->vq[idx].last_avail_idx;
+	msg->hdr.size = sizeof(msg->payload.state);
+
+	vdev->vq[idx].started = false;
+
+	if (vdev->vq[idx].call_fd != -1) {
+		close(vdev->vq[idx].call_fd);
+		vdev->vq[idx].call_fd = -1;
+	}
+	if (vdev->vq[idx].kick_fd != -1) {
+		vu_remove_watch(vdev,  vdev->vq[idx].kick_fd);
+		close(vdev->vq[idx].kick_fd);
+		vdev->vq[idx].kick_fd = -1;
+	}
+
+	return true;
+}
+
+/**
+ * vu_set_watch() - Add a file descriptor to the passt epoll file descriptor
+ * @vdev:	vhost-user device
+ * @fd:		file descriptor to add
+ */
+static void vu_set_watch(const struct vu_dev *vdev, int fd)
+{
+	(void)vdev;
+	(void)fd;
+}
+
+/**
+ * vu_wait_queue() - wait new free entries in the virtqueue
+ * @vq:		virtqueue to wait on
+ */
+static int vu_wait_queue(const struct vu_virtq *vq)
+{
+	eventfd_t kick_data;
+	ssize_t rc;
+	int status;
+
+	/* wait the kernel to put new entries in the queue */
+
+	status = fcntl(vq->kick_fd, F_GETFL);
+	if (status == -1)
+		return -1;
+
+	fcntl(vq->kick_fd, F_SETFL, status & ~O_NONBLOCK);
Here, and two lines below, Coverity Scan complains about the fact that
you're using fcntl() without checking the return value.
...
+	rc =  eventfd_read(vq->kick_fd, &kick_data);
Extra whitespace after =.
...
+	fcntl(vq->kick_fd, F_SETFL, status);
+	if (rc == -1)
+		return -1;
+
+	return 0;
+}
+
+/**
+ * vu_send() - Send a buffer to the front-end using the RX virtqueue
+ * @vdev:	vhost-user device
+ * @buf:	address of the buffer
+ * @size:	size of the buffer
+ *
+ * Return: number of bytes sent, -1 if there is an error
+ */
+/* cppcheck-suppress unusedFunction */
+int vu_send(struct vu_dev *vdev, const void *buf, size_t size)
+{
+	size_t hdrlen = vdev->hdrlen;
+	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+	struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
+	struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
+	size_t lens[VIRTQUEUE_MAX_SIZE];
+	size_t offset;
+	int i, j;
+	__virtio16 *num_buffers_ptr;
+	int in_sg_count;
Can those be aligned in the usual way (from longest to shortest)?
...
+
+	debug("vu_send size %zu hdrlen %zu", size, hdrlen);
+
+	if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
+		err("Got packet, but no available descriptors on RX virtq.");
+		return 0;
+	}
+
+	offset = 0;
+	i = 0;
+	num_buffers_ptr = NULL;
+	in_sg_count = 0;
Could those be initialised when you declare them?
...
+	while (offset < size) {
+		size_t len;
+		int total;
+		int ret;
+
+		total = 0;
+
+		if (i == ARRAY_SIZE(elem) ||
+		    in_sg_count == ARRAY_SIZE(in_sg)) {
+			err("virtio-net unexpected long buffer chain");
+			goto err;
+		}
+
+		elem[i].out_num = 0;
+		elem[i].out_sg = NULL;
+		elem[i].in_num = ARRAY_SIZE(in_sg) - in_sg_count;
+		elem[i].in_sg = &in_sg[in_sg_count];
+
+		ret = vu_queue_pop(vdev, vq, &elem[i]);
+		if (ret < 0) {
+			if (vu_wait_queue(vq) != -1)
+				continue;
+			if (i) {
+				err("virtio-net unexpected empty queue: "
+				    "i %d mergeable %d offset %zd, size %zd, "
+				    "features 0x%" PRIx64,
+				    i, vu_has_feature(vdev,
+						      VIRTIO_NET_F_MRG_RXBUF),
+				    offset, size, vdev->features);
+			}
+			offset = -1;
+			goto err;
+		}
+		in_sg_count += elem[i].in_num;
+
+		if (elem[i].in_num < 1) {
+			err("virtio-net receive queue contains no in buffers");
+			vu_queue_detach_element(vdev, vq, elem[i].index, 0);
+			offset = -1;
+			goto err;
+		}
+
+		if (i == 0) {
+			struct virtio_net_hdr hdr = {
+				.flags = VIRTIO_NET_HDR_F_DATA_VALID,
+				.gso_type = VIRTIO_NET_HDR_GSO_NONE,
+			};
+
+			ASSERT(offset == 0);
+			ASSERT(elem[i].in_sg[0].iov_len >= hdrlen);
+
+			len = iov_from_buf(elem[i].in_sg, elem[i].in_num, 0,
+					   &hdr, sizeof(hdr));
+
+			num_buffers_ptr = (__virtio16 *)((char *)elem[i].in_sg[0].iov_base +
+							 len);
+
+			total += hdrlen;
Shouldn't this be 'total += len' or, alternatively, shouldn't there be
a check that len == hdrlen?
...
+		}
+
+		len = iov_from_buf(elem[i].in_sg, elem[i].in_num, total,
+				   (char *)buf + offset, size - offset);
+
+		total += len;
+		offset += len;
+
+		/* If buffers can't be merged, at this point we
+		 * must have consumed the complete packet.
+		 * Otherwise, drop it.
+		 */
+		if (!vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) &&
+		    offset < size) {
+			vu_queue_unpop(vdev, vq, elem[i].index, total);
+			goto err;
+		}
+
+		lens[i] = total;
+		i++;
+	}
+
+	if (num_buffers_ptr && vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
+		*num_buffers_ptr = htole16(i);
+
+	for (j = 0; j < i; j++) {
+		debug("filling total %zd idx %d", lens[j], j);
+		vu_queue_fill(vq, &elem[j], lens[j], j);
+	}
+
+	vu_queue_flush(vq, i);
+	vu_queue_notify(vdev, vq);
+
+	debug("sent %zu", offset);
It would be nice to be a bit more specific here ("vhost-user sent ..."
or something like that).
...
+
+	return offset;
+err:
+	for (j = 0; j < i; j++)
+		vu_queue_detach_element(vdev, vq, elem[j].index, lens[j]);
+
+	return offset;
+}
+
+/**
+ * vu_handle_tx() - Receive data from the TX virqueue
s/virqueue/virtqueue/
...
+ * @vdev:	vhost-user device
+ * @index:	index of the virtqueue
+ */
+static void vu_handle_tx(struct vu_dev *vdev, int index)
+{
+	struct vu_virtq *vq = &vdev->vq[index];
+	int hdrlen = vdev->hdrlen;
+	struct timespec now;
+	struct vu_virtq_element  elem[VIRTQUEUE_MAX_SIZE];
Excess whitespace.
...
+	struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
+	int out_sg_count;
+
Excess newline.
...
+	int count;
Could those be ordered in the usual way?
...
+
+	if (index % 2 != VHOST_USER_TX_QUEUE) {
This, and similar checks below, are rather convoluted. The defines are
misleading:

  /* index of the RX virtqueue */
  #define VHOST_USER_RX_QUEUE 0

...but no, 2 is a receive queue too. Perhaps it would be more readable
to just have something like:

#define VHOST_USER_IS_QUEUE_TX(n)	(n % 2)
#define VHOST_USER_IS_QUEUE_RX(n)	(!(n % 2))

?
...
+		debug("index %d is not a TX queue", index);
+		return;
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &now);
I guess vu_kick_cb() could take a timestamp instead?
...
+
+	tap_flush_pools();
+
+	count = 0;
+	out_sg_count = 0;
+	while (1) {
+		int ret;
+
+		ASSERT(index == VHOST_USER_TX_QUEUE);
...why is this one here? 'index' doesn't actually change in this loop.
...
+
+		elem[count].out_num = 1;
+		elem[count].out_sg = &out_sg[out_sg_count];
+		elem[count].in_num = 0;
+		elem[count].in_sg = NULL;
+		ret = vu_queue_pop(vdev, vq, &elem[count]);
+		if (ret < 0)
+			break;
This (a bit hidden) is the intended loop termination condition. I
wonder: should we add an upper limit to the packets that can be
dequeued in one run, or there's no risk of this loop starving
everything else for some other reason?
...
+		out_sg_count += elem[count].out_num;
+
+		if (elem[count].out_num < 1) {
+			debug("virtio-net header not in first element");
+			break;
+		}
+		ASSERT(elem[count].out_num == 1);
+
+		tap_add_packet(vdev->context,
+			       elem[count].out_sg[0].iov_len - hdrlen,
+			       (char *)elem[count].out_sg[0].iov_base + hdrlen);
+		count++;
+	}
+	tap_handler(vdev->context, &now);
+
+	if (count) {
+		int i;
+
+		for (i = 0; i < count; i++)
+			vu_queue_fill(vq, &elem[i], 0, i);
+		vu_queue_flush(vq, count);
+		vu_queue_notify(vdev, vq);
+	}
+}
+
+/**
+ * vu_kick_cb() - Called on a kick event to start to receive data
+ * @vdev:	vhost-user device
+ * @ref:	epoll reference information
+ */
+/* cppcheck-suppress unusedFunction */
+void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref)
+{
+	eventfd_t kick_data;
+	ssize_t rc;
+	int idx;
+
+	for (idx = 0; idx < VHOST_USER_MAX_QUEUES; idx++)
+		if (vdev->vq[idx].kick_fd == ref.fd)
+			break;
+
+	if (idx == VHOST_USER_MAX_QUEUES)
+		return;
+
+	rc =  eventfd_read(ref.fd, &kick_data);
Extra whitespace after =.
...
+	if (rc == -1)
+		vu_panic("kick eventfd_read(): %s", strerror(errno));
+
+	debug("Got kick_data: %016"PRIx64" idx:%d",
+	      kick_data, idx);
+	if (idx % 2 == VHOST_USER_TX_QUEUE)
+		vu_handle_tx(vdev, idx);
+}
+
+/**
+ * vu_check_queue_msg_file() - Check if a message is valid,
+ * 			       close fds if NOFD bit is set
+ * @vmsg:	Vhost-user message
+ */
+static void vu_check_queue_msg_file(struct vhost_user_msg *msg)
+{
+	int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
+
+	if (idx >= VHOST_USER_MAX_QUEUES)
+		vu_panic("Invalid queue index: %u", idx);
+
+	if (nofd) {
+		vmsg_close_fds(msg);
+		return;
+	}
+
+	if (msg->fd_num != 1)
+		vu_panic("Invalid fds in request: %d", msg->hdr.request);
+}
+
+/**
+ * vu_set_vring_kick_exec() - Set the event file descriptor for adding buffers
+ * 			      to the vring
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_vring_kick_exec(struct vu_dev *vdev,
+				   struct vhost_user_msg *msg)
+{
+	int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
+
+	debug("u64: 0x%016"PRIx64, msg->payload.u64);
+
+	vu_check_queue_msg_file(msg);
+
+	if (vdev->vq[idx].kick_fd != -1) {
+		vu_remove_watch(vdev, vdev->vq[idx].kick_fd);
+		close(vdev->vq[idx].kick_fd);
+		vdev->vq[idx].kick_fd = -1;
+	}
+
+	/* cppcheck-suppress redundantAssignment */
Actually, it's not clear to me either: why is this assigned just above?
...
+	vdev->vq[idx].kick_fd = nofd ? -1 : msg->fds[0];
+	debug("Got kick_fd: %d for vq: %d", vdev->vq[idx].kick_fd, idx);
+
+	vdev->vq[idx].started = true;
+
+	if (vdev->vq[idx].kick_fd != -1 && idx % 2 == VHOST_USER_TX_QUEUE) {
+		vu_set_watch(vdev, vdev->vq[idx].kick_fd);
+		debug("Waiting for kicks on fd: %d for vq: %d",
+		      vdev->vq[idx].kick_fd, idx);
+	}
+
+	return false;
+}
+
+/**
+ * vu_set_vring_call_exec() - Set the event file descriptor to signal when
+ * 			      buffers are used
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_vring_call_exec(struct vu_dev *vdev,
+				   struct vhost_user_msg *msg)
+{
+	int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
+
+	debug("u64: 0x%016"PRIx64, msg->payload.u64);
+
+	vu_check_queue_msg_file(msg);
+
+	if (vdev->vq[idx].call_fd != -1) {
+		close(vdev->vq[idx].call_fd);
+		vdev->vq[idx].call_fd = -1;
+	}
+
+	/* cppcheck-suppress redundantAssignment */
+	vdev->vq[idx].call_fd = nofd ? -1 : msg->fds[0];
+
+	/* in case of I/O hang after reconnecting */
+	if (vdev->vq[idx].call_fd != -1)
+		eventfd_write(msg->fds[0], 1);
+
+	debug("Got call_fd: %d for vq: %d", vdev->vq[idx].call_fd, idx);
+
+	return false;
+}
+
+/**
+ * vu_set_vring_err_exec() - Set the event file descriptor to signal when
+ * 			     error occurs
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_vring_err_exec(struct vu_dev *vdev,
+				  struct vhost_user_msg *msg)
+{
+	int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
+
+	debug("u64: 0x%016"PRIx64, msg->payload.u64);
+
+	vu_check_queue_msg_file(msg);
+
+	if (vdev->vq[idx].err_fd != -1) {
+		close(vdev->vq[idx].err_fd);
+		vdev->vq[idx].err_fd = -1;
+	}
+
+	/* cppcheck-suppress redundantAssignment */
...same here.
...
+	vdev->vq[idx].err_fd = nofd ? -1 : msg->fds[0];
+
+	return false;
+}
+
+/**
+ * vu_get_protocol_features_exec() - Provide the protocol (vhost-user) features
+ * 				     to the front-end
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as a reply is requested
+ */
+static bool vu_get_protocol_features_exec(struct vhost_user_msg *msg)
+{
+	uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK;
+
+	vmsg_set_reply_u64(msg, features);
+
+	return true;
+}
+
+/**
+ * vu_set_protocol_features_exec() - Enable protocol (vhost-user) features 
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_protocol_features_exec(struct vu_dev *vdev,
+					  struct vhost_user_msg *msg)
+{
+	uint64_t features = msg->payload.u64;
+
+	debug("u64: 0x%016"PRIx64, features);
+
+	vdev->protocol_features = msg->payload.u64;
+
+	if (vu_has_protocol_feature(vdev,
+				    VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
+	    (!vu_has_protocol_feature(vdev, VHOST_USER_PROTOCOL_F_BACKEND_REQ) ||
+	     !vu_has_protocol_feature(vdev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
+	/*
+	 * The use case for using messages for kick/call is simulation, to make
+	 * the kick and call synchronous. To actually get that behaviour, both
+	 * of the other features are required.
+	 * Theoretically, one could use only kick messages, or do them without
+	 * having F_REPLY_ACK, but too many (possibly pending) messages on the
+	 * socket will eventually cause the master to hang, to avoid this in
+	 * scenarios where not desired enforce that the settings are in a way
+	 * that actually enables the simulation case.
+	 */
+		vu_panic("F_IN_BAND_NOTIFICATIONS requires F_BACKEND_REQ && F_REPLY_ACK");
+		return false;
+	}
+
+	return false;
+}
+
+/**
+ * vu_get_queue_num_exec() - Tell how many queues we support
+ * @vmsg:	Vhost-user message
+ *
+ * Return: true as a reply is requested
+ */
+static bool vu_get_queue_num_exec(struct vhost_user_msg *msg)
+{
+	vmsg_set_reply_u64(msg, VHOST_USER_MAX_QUEUES);
+	return true;
+}
+
+/**
+ * vu_set_vring_enable_exec() - Enable or disable corresponding vring
+ * @vdev:	Vhost-user device
+ * @vmsg:	Vhost-user message
+ *
+ * Return: false as no reply is requested
+ */
+static bool vu_set_vring_enable_exec(struct vu_dev *vdev,
+				     struct vhost_user_msg *msg)
+{
+	unsigned int idx = msg->payload.state.index;
+	unsigned int enable = msg->payload.state.num;
+
+	debug("State.index:  %u", idx);
+	debug("State.enable: %u", enable);
+
+	if (idx >= VHOST_USER_MAX_QUEUES)
+		vu_panic("Invalid vring_enable index: %u", idx);
+
+	vdev->vq[idx].enable = enable;
+	return false;
+}
+
+/**
+ * vu_init() - Initialize vhost-user device structure
+ * @c:		execution context
+ * @vdev:	vhost-user device
+ */
+/* cppcheck-suppress unusedFunction */
+void vu_init(struct ctx *c, struct vu_dev *vdev)
+{
+	int i;
+
+	vdev->context = c;
+	vdev->hdrlen = 0;
+	for (i = 0; i < VHOST_USER_MAX_QUEUES; i++)
Curly brackets for multi-line blocks (for consistency, not needed
otherwise).
...
+		vdev->vq[i] = (struct vu_virtq){
+			.call_fd = -1,
+			.kick_fd = -1,
+			.err_fd = -1,
+			.notification = true,
+		};
+}
+
+/**
+ * vu_cleanup() - Reset vhost-user device
+ * @vdev:	vhost-user device
+ */
+void vu_cleanup(struct vu_dev *vdev)
+{
+	unsigned int i;
+
+	for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) {
+		struct vu_virtq *vq = &vdev->vq[i];
+
+		vq->started = false;
+		vq->notification = true;
+
+		if (vq->call_fd != -1) {
+			close(vq->call_fd);
+			vq->call_fd = -1;
+		}
+		if (vq->err_fd != -1) {
+			close(vq->err_fd);
+			vq->err_fd = -1;
+		}
+		if (vq->kick_fd != -1) {
+			vu_remove_watch(vdev,  vq->kick_fd);
Excess whitespace.
...
+			close(vq->kick_fd);
+			vq->kick_fd = -1;
+		}
+
+		vq->vring.desc = 0;
+		vq->vring.used = 0;
+		vq->vring.avail = 0;
+	}
+	vdev->hdrlen = 0;
+
+	for (i = 0; i < vdev->nregions; i++) {
+		const struct vu_dev_region *r = &vdev->regions[i];
+		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+		void *m = (void *)r->mmap_addr;
+
+		if (m)
+			munmap(m, r->size + r->mmap_offset);
+	}
+	vdev->nregions = 0;
+}
+
+/**
+ * vu_sock_reset() - Reset connection socket
+ * @vdev:	vhost-user device
+ */
+static void vu_sock_reset(struct vu_dev *vdev)
+{
+	(void)vdev;
+}
+
+/**
+ * tap_handler_vu() - Packet handler for vhost-user
+ * @vdev:	vhost-user device
+ * @fd:		vhost-user message socket
+ * @events:	epoll events
+ */
+/* cppcheck-suppress unusedFunction */
+void tap_handler_vu(struct vu_dev *vdev, int fd, uint32_t events)
+{
+	struct vhost_user_msg msg = { 0 };
+	bool need_reply, reply_requested;
+	int ret;
+
+	if (events & (EPOLLRDHUP | EPOLLHUP | EPOLLERR)) {
+		vu_sock_reset(vdev);
+		return;
+	}
+
Excess newline.
...
+
+	ret = vu_message_read_default(fd, &msg);
+	if (ret < 0)
+		vu_panic("Error while recvmsg: %s", strerror(errno));
die() has also a die_perror() variant, by the way.
...
+	if (ret == 0) {
+		vu_sock_reset(vdev);
+		return;
+	}
+	debug("================ Vhost user message ================");
+	debug("Request: %s (%d)", vu_request_to_string(msg.hdr.request),
+		msg.hdr.request);
+	debug("Flags:   0x%x", msg.hdr.flags);
+	debug("Size:    %u", msg.hdr.size);
+
+	need_reply = msg.hdr.flags & VHOST_USER_NEED_REPLY_MASK;
+	switch (msg.hdr.request) {
+	case VHOST_USER_GET_FEATURES:
+		reply_requested = vu_get_features_exec(&msg);
+		break;
+	case VHOST_USER_SET_FEATURES:
+		reply_requested = vu_set_features_exec(vdev, &msg);
+		break;
+	case VHOST_USER_GET_PROTOCOL_FEATURES:
+		reply_requested = vu_get_protocol_features_exec(&msg);
+		break;
+	case VHOST_USER_SET_PROTOCOL_FEATURES:
+		reply_requested = vu_set_protocol_features_exec(vdev, &msg);
+		break;
+	case VHOST_USER_GET_QUEUE_NUM:
+		reply_requested = vu_get_queue_num_exec(&msg);
+		break;
+	case VHOST_USER_SET_OWNER:
+		reply_requested = vu_set_owner_exec();
+		break;
+	case VHOST_USER_SET_MEM_TABLE:
+		reply_requested = vu_set_mem_table_exec(vdev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_NUM:
+		reply_requested = vu_set_vring_num_exec(vdev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_ADDR:
+		reply_requested = vu_set_vring_addr_exec(vdev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_BASE:
+		reply_requested = vu_set_vring_base_exec(vdev, &msg);
+		break;
+	case VHOST_USER_GET_VRING_BASE:
+		reply_requested = vu_get_vring_base_exec(vdev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_KICK:
+		reply_requested = vu_set_vring_kick_exec(vdev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_CALL:
+		reply_requested = vu_set_vring_call_exec(vdev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_ERR:
+		reply_requested = vu_set_vring_err_exec(vdev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_ENABLE:
+		reply_requested = vu_set_vring_enable_exec(vdev, &msg);
+		break;
+	case VHOST_USER_NONE:
+		vu_cleanup(vdev);
+		return;
+	default:
+		vu_panic("Unhandled request: %d", msg.hdr.request);
+		return;
+	}
+
+	if (!reply_requested && need_reply) {
+		msg.payload.u64 = 0;
+		msg.hdr.flags = 0;
+		msg.hdr.size = sizeof(msg.payload.u64);
+		msg.fd_num = 0;
+		reply_requested = true;
+	}
+
+	if (reply_requested)
+		vu_send_reply(fd, &msg);
+}
diff --git a/vhost_user.h b/vhost_user.h
new file mode 100644
index 000000000000..b9e4bcf8e531
--- /dev/null
+++ b/vhost_user.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: Laurent Vivier 
+ *
+ * vhost-user API, command management and virtio interface
+ */
+
+/* some parts from subprojects/libvhost-user/libvhost-user.h */
+
+#ifndef VHOST_USER_H
+#define VHOST_USER_H
+
+#include "virtio.h"
+#include "iov.h"
+
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+
+#define VHOST_MEMORY_BASELINE_NREGIONS 8
+
+/**
+ * enum vhost_user_protocol_feature - List of available vhost-user features
+ */
+enum vhost_user_protocol_feature {
+	VHOST_USER_PROTOCOL_F_MQ = 0,
+	VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+	VHOST_USER_PROTOCOL_F_RARP = 2,
+	VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
+	VHOST_USER_PROTOCOL_F_NET_MTU = 4,
+	VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5,
+	VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
+	VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
+	VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
+	VHOST_USER_PROTOCOL_F_CONFIG = 9,
+	VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
+	VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
+	VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
+	VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
+	VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
+
+	VHOST_USER_PROTOCOL_F_MAX
+};
+
+/**
+ * enum vhost_user_request - list of available vhost-user request
+ */
+enum vhost_user_request {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_BACKEND_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_SET_VRING_ENDIAN = 23,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
+	VHOST_USER_CREATE_CRYPTO_SESSION = 26,
+	VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
+	VHOST_USER_POSTCOPY_ADVISE  = 28,
+	VHOST_USER_POSTCOPY_LISTEN  = 29,
+	VHOST_USER_POSTCOPY_END     = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_GPU_SET_SOCKET = 33,
+	VHOST_USER_VRING_KICK = 35,
+	VHOST_USER_GET_MAX_MEM_SLOTS = 36,
+	VHOST_USER_ADD_MEM_REG = 37,
+	VHOST_USER_REM_MEM_REG = 38,
+	VHOST_USER_MAX
+};
+
+/**
+ * struct vhost_user_header - Vhost-user message header
+ * @request:	Request type of the message
+ * @flags:	Request flags
+ * @size:	The following payload size
+ */
+struct vhost_user_header {
+	enum vhost_user_request request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+} __attribute__ ((__packed__));
+
+/**
+ * struct vhost_user_memory_region - Front-end shared memory region information
+ * @guest_phys_addr:	Guest physical address of the region
+ * @memory_size:	Memory size
+ * @userspace_addr:	front-end (QEMU) userspace address
+ * @mmap_offset:	region offset in the shared memory area
+ */
+struct vhost_user_memory_region {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+};
+
+/**
+ * struct vhost_user_memory - List of all the shared memory regions
+ * @nregions:	Number of memory regions
+ * @padding:	Padding
+ * @regions:	Memory regions list
+ */
+struct vhost_user_memory {
+	uint32_t nregions;
+	uint32_t padding;
+	struct vhost_user_memory_region regions[VHOST_MEMORY_BASELINE_NREGIONS];
+};
+
+/**
+ * union vhost_user_payload - Vhost-user message payload
+ * @u64:		64bit payload
+ * @state:		Vring state payload
+ * @addr:		Vring addresses payload
+ * vhost_user_memory:	Memory regions information payload
+ */
+union vhost_user_payload {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
+	uint64_t u64;
+	struct vhost_vring_state state;
+	struct vhost_vring_addr addr;
+	struct vhost_user_memory memory;
+};
+
+/**
+ * struct vhost_user_msg - Vhost-use message
+ * @hdr:		Message header
+ * @payload:		Message payload
+ * @fds:		File descriptors associated with the message
+ * 			in the ancillary data.
+ * 			(shared memory or event file descriptors)
+ * @fd_num:		Number of file descriptors
+ */
+struct vhost_user_msg {
+	struct vhost_user_header hdr;
+	union vhost_user_payload payload;
+
+	int fds[VHOST_MEMORY_BASELINE_NREGIONS];
+	int fd_num;
+} __attribute__ ((__packed__));
+#define VHOST_USER_HDR_SIZE sizeof(struct vhost_user_header)
+
+/* index of the RX virtqueue */
+#define VHOST_USER_RX_QUEUE 0
+/* index of the TX virtqueue */
+#define VHOST_USER_TX_QUEUE 1
+
+/**
+ * vu_queue_enabled - Return state of a virtqueue
+ * @vq:		Virtqueue to check
+ *
+ * Return: true if the virqueue is enabled, false otherwise
+ */
+static inline bool vu_queue_enabled(const struct vu_virtq *vq)
+{
+	return vq->enable;
+}
+
+/**
+ * vu_queue_started - Return state of a virtqueue
+ * @vq:		Virtqueue to check
+ *
+ * Return: true if the virqueue is started, false otherwise
+ */
+static inline bool vu_queue_started(const struct vu_virtq *vq)
+{
+	return vq->started;
+}
+
+int vu_send(struct vu_dev *vdev, const void *buf, size_t size);
+void vu_print_capabilities(void);
+void vu_init(struct ctx *c, struct vu_dev *vdev);
+void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref);
+void vu_cleanup(struct vu_dev *vdev);
+void tap_handler_vu(struct vu_dev *vdev, int fd, uint32_t events);
+#endif /* VHOST_USER_H */
diff --git a/virtio.c b/virtio.c
index 5f984f92cae0..d712f30cc33d 100644
--- a/virtio.c
+++ b/virtio.c
@@ -261,7 +261,6 @@ static bool vring_notify(const struct vu_dev *dev, struct vu_virtq *vq)
  * @dev:	Vhost-user device
  * @vq:		Virtqueue
  */
-/* cppcheck-suppress unusedFunction */
 void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq)
 {
  if (!vq->vring.avail)
@@ -436,7 +435,6 @@ static int vu_queue_map_desc(struct vu_dev *dev, struct vu_virtq *vq, unsigned i
  *
  * Return: -1 if there is an error, 0 otherwise
  */
-/* cppcheck-suppress unusedFunction */
 int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq, struct vu_virtq_element *elem)
 {
  unsigned int head;
@@ -497,7 +495,6 @@ void vu_queue_detach_element(struct vu_dev *dev, struct vu_virtq *vq,
  * @index:	Index of the element to unpop
  * @len:	Size of the element to unpop
  */
-/* cppcheck-suppress unusedFunction */
 void vu_queue_unpop(struct vu_dev *dev, struct vu_virtq *vq, unsigned int index, size_t len)
 {
  vq->last_avail_idx--;
@@ -567,7 +564,6 @@ void vu_queue_fill_by_index(struct vu_virtq *vq, unsigned int index,
  * @len:	Size of the element
  * @idx:	Used ring entry index
  */
-/* cppcheck-suppress unusedFunction */
 void vu_queue_fill(struct vu_virtq *vq, const struct vu_virtq_element *elem,
         unsigned int len, unsigned int idx)
 {
@@ -591,7 +587,6 @@ static inline void vring_used_idx_set(struct vu_virtq *vq, uint16_t val)
  * @vq:		Virtqueue
  * @count:	Number of entry to flush
  */
-/* cppcheck-suppress unusedFunction */
 void vu_queue_flush(struct vu_virtq *vq, unsigned int count)
 {
  uint16_t old, new;
diff --git a/virtio.h b/virtio.h
index 0a2cf6230139..61fb2f9cbf20 100644
--- a/virtio.h
+++ b/virtio.h
@@ -107,6 +107,7 @@ struct vu_dev_region {
  * @hdrlen:		Virtio -net header length
  */
 struct vu_dev {
+	struct ctx *context;
  uint32_t nregions;
  struct vu_dev_region regions[VHOST_USER_MAX_RAM_SLOTS];
  struct vu_virtq vq[VHOST_USER_MAX_QUEUES];
@@ -163,7 +164,6 @@ static inline bool vu_has_feature(const struct vu_dev *vdev,
  *
  * Return:	True if the feature is available
  */
-/* cppcheck-suppress unusedFunction */
 static inline bool vu_has_protocol_feature(const struct vu_dev *vdev,
      			   unsigned int fbit)
 {
...the rest looks good to me, but I didn't review 4/4 yet (it conflicts
quite a bit with the flow table implementation and I didn't manage to
apply it quickly).

-- 
Stefano

    

Re: [PATCH v2 3/4] vhost-user: introduce vhost-user API

Stefano Brivio