Currently TCP uses the 'flow' epoll_ref field for both connected sockets and timers, which consists of just the index of the relevant flow (connection). This is just fine for timers, for while it obviously works, it's subtly incomplete for sockets on spliced connections. In that case we want to know which side of the connection the event is occurring on as well as which connection. At present, we deduce that information by looking at the actual fd, and comparing it to the fds of the sockets on each side. When we use the flow table for more things, we expect more cases where something will need to know a specific side of a specific flow for an event, but nothing more. Therefore add a new 'flowside' epoll_ref field, with exactly that information. We use it for TCP connected sockets. This allows us to directly know the side for spliced connections. For "tap" connections, it's pretty meaningless, since the side is always the socket side. It still makes logical sense though, and it may become important for future flow table work. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- flow.h | 2 +- passt.h | 2 ++ tcp.c | 11 ++++++++--- tcp_splice.c | 37 ++++++++++++------------------------- tcp_splice.h | 2 +- 5 files changed, 24 insertions(+), 30 deletions(-) diff --git a/flow.h b/flow.h index 4f12831..c2a5190 100644 --- a/flow.h +++ b/flow.h @@ -45,7 +45,7 @@ struct flow_common { * @flow: Index of flow referenced */ typedef struct flow_sidx { - int side :1; + unsigned side :1; unsigned flow :FLOW_INDEX_BITS; } flow_sidx_t; static_assert(sizeof(flow_sidx_t) <= sizeof(uint32_t), diff --git a/passt.h b/passt.h index 66a819f..33b493f 100644 --- a/passt.h +++ b/passt.h @@ -37,6 +37,7 @@ union epoll_ref; #include "pif.h" #include "packet.h" +#include "flow.h" #include "icmp.h" #include "port_fwd.h" #include "tcp.h" @@ -91,6 +92,7 @@ union epoll_ref { int32_t fd:FD_REF_BITS; union { uint32_t flow; + flow_sidx_t flowside; union tcp_listen_epoll_ref tcp_listen; union udp_epoll_ref udp; union icmp_epoll_ref icmp; diff --git a/tcp.c b/tcp.c index e80a6aa..24aba44 100644 --- a/tcp.c +++ b/tcp.c @@ -304,6 +304,10 @@ #include "tcp_conn.h" #include "flow_table.h" +/* Sides of a flow as we use them in "tap" connections */ +#define SOCKSIDE 0 +#define TAPSIDE 1 + #define TCP_FRAMES_MEM 128 #define TCP_FRAMES \ (c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1) @@ -639,7 +643,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) { int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock, - .flow = FLOW_IDX(conn) }; + .flowside = FLOW_SIDX(conn, SOCKSIDE) }; struct epoll_event ev = { .data.u64 = ref.u64 }; if (conn->events == CLOSED) { @@ -2874,14 +2878,15 @@ static void tcp_tap_sock_handler(struct ctx *c, struct tcp_tap_conn *conn, */ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events) { - union flow *flow = FLOW(ref.flow); + union flow *flow = FLOW(ref.flowside.flow); switch (flow->f.type) { case FLOW_TCP: tcp_tap_sock_handler(c, &flow->tcp, events); break; case FLOW_TCP_SPLICE: - tcp_splice_sock_handler(c, &flow->tcp_splice, ref.fd, events); + tcp_splice_sock_handler(c, &flow->tcp_splice, ref.flowside.side, + events); break; default: die("Unexpected %s in tcp_sock_handler_compact()", diff --git a/tcp_splice.c b/tcp_splice.c index 9cec9c6..d5c351b 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -128,8 +128,10 @@ static int tcp_splice_epoll_ctl(const struct ctx *c, { int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; union epoll_ref ref[SIDES] = { - { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .flow = FLOW_IDX(conn) }, - { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .flow = FLOW_IDX(conn) } + { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], + .flowside = FLOW_SIDX(conn, 0) }, + { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], + .flowside = FLOW_SIDX(conn, 1) } }; struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 }, { .data.u64 = ref[1].u64 } }; @@ -481,13 +483,13 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, * tcp_splice_sock_handler() - Handler for socket mapped to spliced connection * @c: Execution context * @conn: Connection state - * @s: Socket fd on which an event has occurred + * @side: Side of the connection on which an event has occurred * @events: epoll events bitmap * * #syscalls:pasta splice */ void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn, - int s, uint32_t events) + int side, uint32_t events) { uint8_t lowat_set_flag, lowat_act_flag; int eof, never_read; @@ -507,30 +509,15 @@ void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn, } if (events & EPOLLOUT) { - if (s == conn->s[0]) { - conn_event(c, conn, ~OUT_WAIT_0); - fromside = 1; - } else { - conn_event(c, conn, ~OUT_WAIT_1); - fromside = 0; - } + fromside = !side; + conn_event(c, conn, side == 0 ? ~OUT_WAIT_0 : ~OUT_WAIT_1); } else { - fromside = s == conn->s[0] ? 0 : 1; - } - - if (events & EPOLLRDHUP) { - if (s == conn->s[0]) - conn_event(c, conn, FIN_RCVD_0); - else - conn_event(c, conn, FIN_RCVD_1); + fromside = side; } - if (events & EPOLLHUP) { - if (s == conn->s[0]) - conn_event(c, conn, FIN_SENT_0); /* Fake, but implied */ - else - conn_event(c, conn, FIN_SENT_1); - } + if (events & EPOLLRDHUP) + /* For side 0 this is fake, but implied */ + conn_event(c, conn, side == 0 ? FIN_RCVD_0 : FIN_RCVD_1); swap: eof = 0; diff --git a/tcp_splice.h b/tcp_splice.h index dc486f1..aa85c7c 100644 --- a/tcp_splice.h +++ b/tcp_splice.h @@ -9,7 +9,7 @@ struct tcp_splice_conn; void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn, - int s, uint32_t events); + int side, uint32_t events); bool tcp_splice_conn_from_sock(const struct ctx *c, union tcp_listen_epoll_ref ref, struct tcp_splice_conn *conn, int s, -- 2.43.0