It's possible for the buffers of the tap interface (whatever style) to fill up, at which point we'll get some sort of short write. This can result in TCP connections with buffered data on the socket side we weren't able to forward. To more efficiently know when we can forward that data we need to know when the tap interface is no longer "full". To assist that, keep track of our best estimate of whether the tap device is full: set it when we get a short write, and clear it when we get an EPOLLOUT event on the tap fd, indicating we can write more data. Protocols (specifically TCP) can use this via a new tap_is_full() call. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tap.c | 31 +++++++++++++++++++++++++++---- tap.h | 1 + 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/tap.c b/tap.c index c1db2960..3bdca9a1 100644 --- a/tap.c +++ b/tap.c @@ -63,6 +63,9 @@ static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf); static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf); +/* Filled buffers on the tap device */ +static bool tap_full_flag; + #define TAP_SEQS 128 /* Different L4 tuples in one batch */ #define FRAGMENT_MSG_RATE 10 /* # seconds between fragment warnings */ @@ -411,9 +414,11 @@ size_t tap_send_frames(const struct ctx *c, const struct iovec *iov, else m = tap_send_frames_passt(c, iov, bufs_per_frame, nframes); - if (m < nframes) + if (m < nframes) { + tap_full_flag = true; debug("tap: failed to send %zu frames of %zu", nframes - m, nframes); + } pcap_multiple(iov, bufs_per_frame, m, c->mode == MODE_PASST ? sizeof(uint32_t) : 0); @@ -1069,6 +1074,9 @@ void tap_handler_passt(struct ctx *c, uint32_t events, while (tap_passt_input(c, now)) ; } + + if (events & EPOLLOUT) + tap_full_flag = false; } /** @@ -1130,6 +1138,21 @@ void tap_handler_pasta(struct ctx *c, uint32_t events, while (tap_pasta_input(c, now)) ; } + + if (events & EPOLLOUT) + tap_full_flag = false; +} + +/** + * tap_is_full() - Can we write more to the tap device this cycle? + * + * Return: true if (last we knew) there's more space in the tap buffers, false + * otherwise + */ +/* cppcheck-suppress unusedFunction */ +bool tap_is_full(void) +{ + return tap_full_flag; } /** @@ -1263,7 +1286,7 @@ void tap_listen_handler(struct ctx *c, uint32_t events) trace("tap: failed to set SO_SNDBUF to %i", v); ref.fd = c->fd_tap; - ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; + ev.events = EPOLLIN | EPOLLOUT | EPOLLRDHUP | EPOLLET; ev.data.u64 = ref.u64; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); } @@ -1319,7 +1342,7 @@ static void tap_sock_tun_init(struct ctx *c) pasta_ns_conf(c); ref.fd = c->fd_tap; - ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; + ev.events = EPOLLIN | EPOLLOUT | EPOLLRDHUP | EPOLLET; ev.data.u64 = ref.u64; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); } @@ -1352,7 +1375,7 @@ void tap_sock_init(struct ctx *c) else ref.type = EPOLL_TYPE_TAP_PASTA; - ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; + ev.events = EPOLLIN | EPOLLOUT | EPOLLRDHUP | EPOLLET; ev.data.u64 = ref.u64; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); return; diff --git a/tap.h b/tap.h index ec9e2ace..6094fbdd 100644 --- a/tap.h +++ b/tap.h @@ -67,6 +67,7 @@ void tap_handler_pasta(struct ctx *c, uint32_t events, const struct timespec *now); void tap_handler_passt(struct ctx *c, uint32_t events, const struct timespec *now); +bool tap_is_full(void); int tap_sock_unix_open(char *sock_path); void tap_sock_init(struct ctx *c); void tap_flush_pools(void); -- 2.46.0