add virtio and vhost-user functions to connect with QEMU.
$ ./passt --vhost-user
and
# qemu-system-x86_64 ... -m 4G \
-object memory-backend-memfd,id=memfd0,share=on,size=4G \
-numa node,memdev=memfd0 \
-chardev socket,id=chr0,path=/tmp/passt_1.socket \
-netdev vhost-user,id=netdev0,chardev=chr0 \
-device virtio-net,mac=9a:2b:2c:2d:2e:2f,netdev=netdev0 \
...
Signed-off-by: Laurent Vivier
---
conf.c | 20 ++++++++++++++--
passt.c | 7 ++++++
passt.h | 1 +
tap.c | 73 ++++++++++++++++++++++++++++++++++++++++++---------------
tcp.c | 8 +++++--
udp.c | 6 +++--
6 files changed, 90 insertions(+), 25 deletions(-)
diff --git a/conf.c b/conf.c
index b6a2a1f0fdc3..40aa9519f8a6 100644
--- a/conf.c
+++ b/conf.c
@@ -44,6 +44,7 @@
#include "lineread.h"
#include "isolation.h"
#include "log.h"
+#include "vhost_user.h"
/**
* next_chunk - Return the next piece of a string delimited by a character
@@ -735,9 +736,12 @@ static void print_usage(const char *name, int status)
info( " -I, --ns-ifname NAME namespace interface name");
info( " default: same interface name as external one");
} else {
- info( " -s, --socket PATH UNIX domain socket path");
+ info( " -s, --socket, --socket-path PATH UNIX domain socket path");
info( " default: probe free path starting from "
UNIX_SOCK_PATH, 1);
+ info( " --vhost-user Enable vhost-user mode");
+ info( " UNIX domain socket is provided by -s option");
+ info( " --print-capabilities print back-end capabilities in JSON format");
}
info( " -F, --fd FD Use FD as pre-opened connected socket");
@@ -1123,6 +1127,7 @@ void conf(struct ctx *c, int argc, char **argv)
{"help", no_argument, NULL, 'h' },
{"socket", required_argument, NULL, 's' },
{"fd", required_argument, NULL, 'F' },
+ {"socket-path", required_argument, NULL, 's' }, /* vhost-user mandatory */
{"ns-ifname", required_argument, NULL, 'I' },
{"pcap", required_argument, NULL, 'p' },
{"pid", required_argument, NULL, 'P' },
@@ -1169,6 +1174,8 @@ void conf(struct ctx *c, int argc, char **argv)
{"config-net", no_argument, NULL, 17 },
{"no-copy-routes", no_argument, NULL, 18 },
{"no-copy-addrs", no_argument, NULL, 19 },
+ {"vhost-user", no_argument, NULL, 20 },
+ {"print-capabilities", no_argument, NULL, 21 }, /* vhost-user mandatory */
{ 0 },
};
char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 };
@@ -1328,7 +1335,6 @@ void conf(struct ctx *c, int argc, char **argv)
sizeof(c->ip6.ifname_out), "%s", optarg);
if (ret <= 0 || ret >= (int)sizeof(c->ip6.ifname_out))
die("Invalid interface name: %s", optarg);
-
break;
case 17:
if (c->mode != MODE_PASTA)
@@ -1350,6 +1356,16 @@ void conf(struct ctx *c, int argc, char **argv)
warn("--no-copy-addrs will be dropped soon");
c->no_copy_addrs = copy_addrs_opt = true;
break;
+ case 20:
+ if (c->mode == MODE_PASTA) {
+ err("--vhost-user is for passt mode only");
+ usage(argv[0]);
+ }
+ c->mode = MODE_VU;
+ break;
+ case 21:
+ vu_print_capabilities();
+ break;
case 'd':
if (c->debug)
die("Multiple --debug options given");
diff --git a/passt.c b/passt.c
index 95034d73381f..952aded12848 100644
--- a/passt.c
+++ b/passt.c
@@ -282,6 +282,7 @@ int main(int argc, char **argv)
quit_fd = pasta_netns_quit_init(&c);
tap_sock_init(&c);
+ vu_init(&c);
secret_init(&c);
@@ -399,6 +400,12 @@ loop:
case EPOLL_TYPE_ICMPV6:
icmp_sock_handler(&c, AF_INET6, ref);
break;
+ case EPOLL_TYPE_VHOST_CMD:
+ tap_handler_vu(&c, eventmask);
+ break;
+ case EPOLL_TYPE_VHOST_KICK:
+ vu_kick_cb(&c, ref);
+ break;
default:
/* Can't happen */
ASSERT(0);
diff --git a/passt.h b/passt.h
index 6ed1d0b19e82..4e0100d51a4d 100644
--- a/passt.h
+++ b/passt.h
@@ -141,6 +141,7 @@ struct fqdn {
enum passt_modes {
MODE_PASST,
MODE_PASTA,
+ MODE_VU,
};
/**
diff --git a/tap.c b/tap.c
index 936206e53637..c2a917bc00ca 100644
--- a/tap.c
+++ b/tap.c
@@ -57,6 +57,7 @@
#include "packet.h"
#include "tap.h"
#include "log.h"
+#include "vhost_user.h"
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
@@ -75,19 +76,22 @@ static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
*/
int tap_send(const struct ctx *c, const void *data, size_t len)
{
- pcap(data, len);
+ int flags = MSG_NOSIGNAL | MSG_DONTWAIT;
+ uint32_t vnet_len = htonl(len);
- if (c->mode == MODE_PASST) {
- int flags = MSG_NOSIGNAL | MSG_DONTWAIT;
- uint32_t vnet_len = htonl(len);
+ pcap(data, len);
+ switch (c->mode) {
+ case MODE_PASST:
if (send(c->fd_tap, &vnet_len, 4, flags) < 0)
return -1;
-
return send(c->fd_tap, data, len, flags);
+ case MODE_PASTA:
+ return write(c->fd_tap, (char *)data, len);
+ case MODE_VU:
+ return vu_send(c, data, len);
}
-
- return write(c->fd_tap, (char *)data, len);
+ return 0;
}
/**
@@ -428,10 +432,20 @@ size_t tap_send_frames(const struct ctx *c, const struct iovec *iov, size_t n)
if (!n)
return 0;
- if (c->mode == MODE_PASTA)
+ switch (c->mode) {
+ case MODE_PASTA:
m = tap_send_frames_pasta(c, iov, n);
- else
+ break;
+ case MODE_PASST:
m = tap_send_frames_passt(c, iov, n);
+ break;
+ case MODE_VU:
+ m = tap_send_frames_vu(c, iov, n);
+ break;
+ default:
+ m = 0;
+ break;
+ }
if (m < n)
debug("tap: failed to send %zu frames of %zu", n - m, n);
@@ -1149,11 +1163,17 @@ static void tap_sock_unix_init(struct ctx *c)
ev.data.u64 = ref.u64;
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev);
- info("You can now start qemu (>= 7.2, with commit 13c6be96618c):");
- info(" kvm ... -device virtio-net-pci,netdev=s -netdev stream,id=s,server=off,addr.type=unix,addr.path=%s",
- addr.sun_path);
- info("or qrap, for earlier qemu versions:");
- info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
+ if (c->mode == MODE_VU) {
+ info("You can start qemu with:");
+ info(" kvm ... -chardev socket,id=chr0,path=%s -netdev vhost-user,id=netdev0,chardev=chr0 -device virtio-net,netdev=netdev0 -object memory-backend-memfd,id=memfd0,share=on,size=$RAMSIZE -numa node,memdev=memfd0\n",
+ addr.sun_path);
+ } else {
+ info("You can now start qemu (>= 7.2, with commit 13c6be96618c):");
+ info(" kvm ... -device virtio-net-pci,netdev=s -netdev stream,id=s,server=off,addr.type=unix,addr.path=%s",
+ addr.sun_path);
+ info("or qrap, for earlier qemu versions:");
+ info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
+ }
}
/**
@@ -1163,7 +1183,7 @@ static void tap_sock_unix_init(struct ctx *c)
*/
void tap_listen_handler(struct ctx *c, uint32_t events)
{
- union epoll_ref ref = { .type = EPOLL_TYPE_TAP_PASST };
+ union epoll_ref ref;
struct epoll_event ev = { 0 };
int v = INT_MAX / 2;
struct ucred ucred;
@@ -1204,7 +1224,13 @@ void tap_listen_handler(struct ctx *c, uint32_t events)
trace("tap: failed to set SO_SNDBUF to %i", v);
ref.fd = c->fd_tap;
- ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+ if (c->mode == MODE_VU) {
+ ref.type = EPOLL_TYPE_VHOST_CMD;
+ ev.events = EPOLLIN | EPOLLRDHUP;
+ } else {
+ ref.type = EPOLL_TYPE_TAP_PASST;
+ ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
+ }
ev.data.u64 = ref.u64;
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
}
@@ -1288,12 +1314,21 @@ void tap_sock_init(struct ctx *c)
ASSERT(c->one_off);
ref.fd = c->fd_tap;
- if (c->mode == MODE_PASST)
+ switch (c->mode) {
+ case MODE_PASST:
ref.type = EPOLL_TYPE_TAP_PASST;
- else
+ ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+ break;
+ case MODE_PASTA:
ref.type = EPOLL_TYPE_TAP_PASTA;
+ ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+ break;
+ case MODE_VU:
+ ref.type = EPOLL_TYPE_VHOST_CMD;
+ ev.events = EPOLLIN | EPOLLRDHUP;
+ break;
+ }
- ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
ev.data.u64 = ref.u64;
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
return;
diff --git a/tcp.c b/tcp.c
index 54c15087d678..b6aca9f37f19 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1033,7 +1033,9 @@ size_t ipv4_fill_headers(const struct ctx *c,
tcp_set_tcp_header(th, conn, seq);
- th->check = tcp_update_check_tcp4(iph);
+ th->check = 0;
+ if (c->mode != MODE_VU || *c->pcap)
+ th->check = tcp_update_check_tcp4(iph);
return ip_len;
}
@@ -1069,7 +1071,9 @@ size_t ipv6_fill_headers(const struct ctx *c,
tcp_set_tcp_header(th, conn, seq);
- th->check = tcp_update_check_tcp6(ip6h);
+ th->check = 0;
+ if (c->mode != MODE_VU || *c->pcap)
+ th->check = tcp_update_check_tcp6(ip6h);
ip6h->hop_limit = 255;
ip6h->version = 6;
diff --git a/udp.c b/udp.c
index a189c2e0b5a2..799a10989a91 100644
--- a/udp.c
+++ b/udp.c
@@ -671,8 +671,10 @@ static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
uh->source = s_in6->sin6_port;
uh->dest = htons(dstport);
uh->len = ip6h->payload_len;
- uh->check = csum(uh, ntohs(ip6h->payload_len),
- proto_ipv6_header_checksum(ip6h, IPPROTO_UDP));
+ uh->check = 0;
+ if (c->mode != MODE_VU || *c->pcap)
+ uh->check = csum(uh, ntohs(ip6h->payload_len),
+ proto_ipv6_header_checksum(ip6h, IPPROTO_UDP));
ip6h->version = 6;
ip6h->nexthdr = IPPROTO_UDP;
ip6h->hop_limit = 255;
--
2.42.0