On Tue, Sep 03, 2024 at 09:25:50PM +0200, Stefano Brivio wrote:On Tue, 3 Sep 2024 22:02:34 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:I've reworded this.Since 4684f603446b ("tap: Don't use EPOLLET on Qemu sockets") we've only used level-triggered events for the tap device. Prior to that we used it inconsistently which caused some problems.It didn't actually cause any issue according to your commit message for 4684f603446b itself.Fixed.We want to add support for EPOLLOUT events on the tap connection, and without EPOLLET that would require toggling EPOLLOUT on and off, which is awkward. So, re-introduce EPOLLET, but now use it uniformly for all tap modes. The main change this requires is making sure on EPOLLIN we loop until all there's no more data to process. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tap.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tap.c b/tap.c index 2fbcef04..d7d3fc19 100644 --- a/tap.c +++ b/tap.c @@ -985,8 +985,10 @@ static void tap_sock_reset(struct ctx *c) * tap_passt_input() - Handler for new data on the socket to qemu * @c: Execution context * @now: Current timestamp + * + * Return: true if there may be additional data to read, false otherwise */ -static void tap_passt_input(struct ctx *c, const struct timespec *now) +static bool tap_passt_input(struct ctx *c, const struct timespec *now) { static const char *partial_frame; static ssize_t partial_len = 0; @@ -1013,7 +1015,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now) err_perror("Receive error on guest connection, reset"); tap_sock_reset(c); } - return; + return false; } p = pkt_buf; @@ -1025,7 +1027,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now) if (l2len < sizeof(struct ethhdr) || l2len > ETH_MAX_MTU) { err("Bad frame size from guest, resetting connection"); tap_sock_reset(c); - return; + return false; } if (l2len + sizeof(uint32_t) > (size_t)n) @@ -1045,6 +1047,8 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now) partial_frame = p; tap_handler(c, now); + + return true; } /** @@ -1062,15 +1066,18 @@ void tap_handler_passt(struct ctx *c, uint32_t events, } if (events & EPOLLIN) - tap_passt_input(c, now); + while (tap_passt_input(c, now)) + ;Nit (same below): use curly brackets for multi-line block. Or just use: while (tap_passt_input(c, now));-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson} /** * tap_passt_input() - Handler for new data on the socket to qemu * @c: Execution context * @now: Current timestamp + * + * Return: true if there may be additional data to read, false otherwise */ -static void tap_pasta_input(struct ctx *c, const struct timespec *now) +static bool tap_pasta_input(struct ctx *c, const struct timespec *now) { ssize_t n, len; @@ -1101,6 +1108,8 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now) die("EOF on tap device, exiting"); tap_handler(c, now); + + return len > 0; } /** @@ -1116,7 +1125,8 @@ void tap_handler_pasta(struct ctx *c, uint32_t events, die("Disconnect event on /dev/net/tun device, exiting"); if (events & EPOLLIN) - tap_pasta_input(c, now); + while (tap_pasta_input(c, now)) + ; } /** @@ -1250,7 +1260,7 @@ void tap_listen_handler(struct ctx *c, uint32_t events) trace("tap: failed to set SO_SNDBUF to %i", v); ref.fd = c->fd_tap; - ev.events = EPOLLIN | EPOLLRDHUP; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; ev.data.u64 = ref.u64; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); } @@ -1306,7 +1316,7 @@ static void tap_sock_tun_init(struct ctx *c) pasta_ns_conf(c); ref.fd = c->fd_tap; - ev.events = EPOLLIN | EPOLLRDHUP; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; ev.data.u64 = ref.u64; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); } @@ -1339,7 +1349,7 @@ void tap_sock_init(struct ctx *c) else ref.type = EPOLL_TYPE_TAP_PASTA; - ev.events = EPOLLIN | EPOLLRDHUP; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; ev.data.u64 = ref.u64; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); return;