Each spliced connection has two mostly, although not entirely, symmetric sides. We currently call those "a" and "b" and have different fields in the connection structure for each one. We can better exploit that symmetry if we use two element arrays rather thatn separately named fields. Do that in the places we can, and for the others change the "a"/"b" terminology to 0/1 to match. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tcp_conn.h | 45 +++++------ tcp_splice.c | 224 +++++++++++++++++++++++++-------------------------- 2 files changed, 130 insertions(+), 139 deletions(-) diff --git a/tcp_conn.h b/tcp_conn.h index 0751e00..01d31d4 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -119,54 +119,47 @@ struct tcp_tap_conn { uint32_t seq_init_from_tap; }; +#define SIDES 2 /** * struct tcp_splice_conn - Descriptor for a spliced TCP connection * @c: Fields common with tcp_tap_conn * @in_epoll: Is the connection in the epoll set? - * @a: File descriptor number of socket for accepted connection - * @pipe_a_b: Pipe ends for splice() from @a to @b - * @b: File descriptor number of peer connected socket - * @pipe_b_a: Pipe ends for splice() from @b to @a + * @s: File descriptor for sockets + * @pipe: File descriptors for pipes * @events: Events observed/actions performed on connection * @flags: Connection flags (attributes, not events) - * @a_read: Bytes read from @a (not fully written to @b in one shot) - * @a_written: Bytes written to @a (not fully written from one @b read) - * @b_read: Bytes read from @b (not fully written to @a in one shot) - * @b_written: Bytes written to @b (not fully written from one @a read) + * @read: Bytes read (not fully written to other side in one shot) + * @written: Bytes written (not fully written from one other side read) */ struct tcp_splice_conn { /* Must be first element to match tcp_tap_conn */ struct tcp_conn_common c; bool in_epoll :1; - int a; - int pipe_a_b[2]; - int b; - int pipe_b_a[2]; + int s[SIDES]; + int pipe[SIDES][2]; uint8_t events; #define SPLICE_CLOSED 0 #define SPLICE_CONNECT BIT(0) #define SPLICE_ESTABLISHED BIT(1) -#define A_OUT_WAIT BIT(2) -#define B_OUT_WAIT BIT(3) -#define A_FIN_RCVD BIT(4) -#define B_FIN_RCVD BIT(5) -#define A_FIN_SENT BIT(6) -#define B_FIN_SENT BIT(7) +#define OUT_WAIT_0 BIT(2) +#define OUT_WAIT_1 BIT(3) +#define FIN_RCVD_0 BIT(4) +#define FIN_RCVD_1 BIT(5) +#define FIN_SENT_0 BIT(6) +#define FIN_SENT_1 BIT(7) uint8_t flags; #define SPLICE_V6 BIT(0) -#define RCVLOWAT_SET_A BIT(1) -#define RCVLOWAT_SET_B BIT(2) -#define RCVLOWAT_ACT_A BIT(3) -#define RCVLOWAT_ACT_B BIT(4) +#define RCVLOWAT_SET_0 BIT(1) +#define RCVLOWAT_SET_1 BIT(2) +#define RCVLOWAT_ACT_0 BIT(3) +#define RCVLOWAT_ACT_1 BIT(4) #define CLOSING BIT(5) - uint32_t a_read; - uint32_t a_written; - uint32_t b_read; - uint32_t b_written; + uint32_t read[SIDES]; + uint32_t written[SIDES]; }; /** diff --git a/tcp_splice.c b/tcp_splice.c index 7b36688..f405184 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -21,12 +21,12 @@ * * - SPLICE_CONNECT: connection accepted, connecting to target * - SPLICE_ESTABLISHED: connection to target established - * - A_OUT_WAIT: pipe to accepted socket full, wait for EPOLLOUT - * - B_OUT_WAIT: pipe to target socket full, wait for EPOLLOUT - * - A_FIN_RCVD: FIN (EPOLLRDHUP) seen from accepted socket - * - B_FIN_RCVD: FIN (EPOLLRDHUP) seen from target socket - * - A_FIN_RCVD: FIN (write shutdown) sent to accepted socket - * - B_FIN_RCVD: FIN (write shutdown) sent to target socket + * - OUT_WAIT_0: pipe to accepted socket full, wait for EPOLLOUT + * - OUT_WAIT_1: pipe to target socket full, wait for EPOLLOUT + * - FIN_RCVD_0: FIN (EPOLLRDHUP) seen from accepted socket + * - FIN_RCVD_1: FIN (EPOLLRDHUP) seen from target socket + * - FIN_SENT_0: FIN (write shutdown) sent to accepted socket + * - FIN_SENT_1: FIN (write shutdown) sent to target socket * * #syscalls:pasta pipe2|pipe fcntl armv6l:fcntl64 armv7l:fcntl64 ppc64:fcntl64 */ @@ -79,14 +79,14 @@ static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2]; /* Display strings for connection events */ static const char *tcp_splice_event_str[] __attribute((__unused__)) = { - "SPLICE_CONNECT", "SPLICE_ESTABLISHED", "A_OUT_WAIT", "B_OUT_WAIT", - "A_FIN_RCVD", "B_FIN_RCVD", "A_FIN_SENT", "B_FIN_SENT", + "SPLICE_CONNECT", "SPLICE_ESTABLISHED", "OUT_WAIT_0", "OUT_WAIT_1", + "FIN_RCVD_0", "FIN_RCVD_1", "FIN_SENT_0", "FIN_SENT_1", }; /* Display strings for connection flags */ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = { - "SPLICE_V6", "RCVLOWAT_SET_A", "RCVLOWAT_SET_B", "RCVLOWAT_ACT_A", - "RCVLOWAT_ACT_B", "CLOSING", + "SPLICE_V6", "RCVLOWAT_SET_0", "RCVLOWAT_SET_1", "RCVLOWAT_ACT_0", + "RCVLOWAT_ACT_1", "CLOSING", }; /* Forward declaration */ @@ -95,26 +95,24 @@ static int tcp_sock_refill_ns(void *arg); /** * tcp_splice_conn_epoll_events() - epoll events masks for given state * @events: Connection event flags - * @a: Event for socket with accepted connection, set on return - * @b: Event for connection target socket, set on return + * @ev: Events to fill in, 0 is accepted socket, 1 is connecting socket */ static void tcp_splice_conn_epoll_events(uint16_t events, - struct epoll_event *a, - struct epoll_event *b) + struct epoll_event ev[]) { - a->events = b->events = 0; + ev[0].events = ev[1].events = 0; if (events & SPLICE_ESTABLISHED) { - if (!(events & B_FIN_SENT)) - a->events = EPOLLIN | EPOLLRDHUP; - if (!(events & A_FIN_SENT)) - b->events = EPOLLIN | EPOLLRDHUP; + if (!(events & FIN_SENT_1)) + ev[0].events = EPOLLIN | EPOLLRDHUP; + if (!(events & FIN_SENT_0)) + ev[1].events = EPOLLIN | EPOLLRDHUP; } else if (events & SPLICE_CONNECT) { - b->events = EPOLLOUT; + ev[1].events = EPOLLOUT; } - a->events |= (events & A_OUT_WAIT) ? EPOLLOUT : 0; - b->events |= (events & B_OUT_WAIT) ? EPOLLOUT : 0; + ev[0].events |= (events & OUT_WAIT_0) ? EPOLLOUT : 0; + ev[1].events |= (events & OUT_WAIT_1) ? EPOLLOUT : 0; } /** @@ -128,17 +126,17 @@ static int tcp_splice_epoll_ctl(const struct ctx *c, struct tcp_splice_conn *conn) { int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; - union epoll_ref ref_a = { .type = EPOLL_TYPE_TCP, .fd = conn->a, - .tcp.index = CONN_IDX(conn) }; - union epoll_ref ref_b = { .type = EPOLL_TYPE_TCP, .fd = conn->b, - .tcp.index = CONN_IDX(conn) }; - struct epoll_event ev_a = { .data.u64 = ref_a.u64 }; - struct epoll_event ev_b = { .data.u64 = ref_b.u64 }; + union epoll_ref ref[SIDES] = { + { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .tcp.index = CONN_IDX(conn) }, + { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .tcp.index = CONN_IDX(conn) } + }; + struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 }, + { .data.u64 = ref[1].u64 } }; - tcp_splice_conn_epoll_events(conn->events, &ev_a, &ev_b); + tcp_splice_conn_epoll_events(conn->events, ev); - if (epoll_ctl(c->epollfd, m, conn->a, &ev_a) || - epoll_ctl(c->epollfd, m, conn->b, &ev_b)) { + if (epoll_ctl(c->epollfd, m, conn->s[0], &ev[0]) || + epoll_ctl(c->epollfd, m, conn->s[1], &ev[1])) { int ret = -errno; err("TCP (spliced): index %li, ERROR on epoll_ctl(): %s", CONN_IDX(conn), strerror(errno)); @@ -184,8 +182,8 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn, } if (flag == CLOSING) { - epoll_ctl(c->epollfd, EPOLL_CTL_DEL, conn->a, NULL); - epoll_ctl(c->epollfd, EPOLL_CTL_DEL, conn->b, NULL); + epoll_ctl(c->epollfd, EPOLL_CTL_DEL, conn->s[0], NULL); + epoll_ctl(c->epollfd, EPOLL_CTL_DEL, conn->s[1], NULL); } } @@ -263,26 +261,26 @@ void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union) if (conn->events & SPLICE_ESTABLISHED) { /* Flushing might need to block: don't recycle them. */ - if (conn->pipe_a_b[0] != -1) { - close(conn->pipe_a_b[0]); - close(conn->pipe_a_b[1]); - conn->pipe_a_b[0] = conn->pipe_a_b[1] = -1; + if (conn->pipe[0][0] != -1) { + close(conn->pipe[0][0]); + close(conn->pipe[0][1]); + conn->pipe[0][0] = conn->pipe[0][1] = -1; } - if (conn->pipe_b_a[0] != -1) { - close(conn->pipe_b_a[0]); - close(conn->pipe_b_a[1]); - conn->pipe_b_a[0] = conn->pipe_b_a[1] = -1; + if (conn->pipe[1][0] != -1) { + close(conn->pipe[1][0]); + close(conn->pipe[1][1]); + conn->pipe[1][0] = conn->pipe[1][1] = -1; } } if (conn->events & SPLICE_CONNECT) { - close(conn->b); - conn->b = -1; + close(conn->s[1]); + conn->s[1] = -1; } - close(conn->a); - conn->a = -1; - conn->a_read = conn->a_written = conn->b_read = conn->b_written = 0; + close(conn->s[0]); + conn->s[0] = -1; + conn->read[0] = conn->written[0] = conn->read[1] = conn->written[1] = 0; conn->events = SPLICE_CLOSED; conn->flags = 0; @@ -303,47 +301,47 @@ static int tcp_splice_connect_finish(const struct ctx *c, { int i; - conn->pipe_a_b[0] = conn->pipe_b_a[0] = -1; - conn->pipe_a_b[1] = conn->pipe_b_a[1] = -1; + conn->pipe[0][0] = conn->pipe[1][0] = -1; + conn->pipe[0][1] = conn->pipe[1][1] = -1; for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) { if (splice_pipe_pool[i][0] >= 0) { - SWAP(conn->pipe_a_b[0], splice_pipe_pool[i][0]); - SWAP(conn->pipe_a_b[1], splice_pipe_pool[i][1]); + SWAP(conn->pipe[0][0], splice_pipe_pool[i][0]); + SWAP(conn->pipe[0][1], splice_pipe_pool[i][1]); break; } } - if (conn->pipe_a_b[0] < 0) { - if (pipe2(conn->pipe_a_b, O_NONBLOCK | O_CLOEXEC)) { - err("TCP (spliced): cannot create a->b pipe: %s", + if (conn->pipe[0][0] < 0) { + if (pipe2(conn->pipe[0], O_NONBLOCK | O_CLOEXEC)) { + err("TCP (spliced): cannot create 0->1 pipe: %s", strerror(errno)); conn_flag(c, conn, CLOSING); return -EIO; } - if (fcntl(conn->pipe_a_b[0], F_SETPIPE_SZ, c->tcp.pipe_size)) { - trace("TCP (spliced): cannot set a->b pipe size to %lu", + if (fcntl(conn->pipe[0][0], F_SETPIPE_SZ, c->tcp.pipe_size)) { + trace("TCP (spliced): cannot set 0->1 pipe size to %lu", c->tcp.pipe_size); } } for (; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) { if (splice_pipe_pool[i][0] >= 0) { - SWAP(conn->pipe_b_a[0], splice_pipe_pool[i][0]); - SWAP(conn->pipe_b_a[1], splice_pipe_pool[i][1]); + SWAP(conn->pipe[1][0], splice_pipe_pool[i][0]); + SWAP(conn->pipe[1][1], splice_pipe_pool[i][1]); break; } } - if (conn->pipe_b_a[0] < 0) { - if (pipe2(conn->pipe_b_a, O_NONBLOCK | O_CLOEXEC)) { - err("TCP (spliced): cannot create b->a pipe: %s", + if (conn->pipe[1][0] < 0) { + if (pipe2(conn->pipe[1], O_NONBLOCK | O_CLOEXEC)) { + err("TCP (spliced): cannot create 1->0 pipe: %s", strerror(errno)); conn_flag(c, conn, CLOSING); return -EIO; } - if (fcntl(conn->pipe_b_a[0], F_SETPIPE_SZ, c->tcp.pipe_size)) { - trace("TCP (spliced): cannot set b->a pipe size to %lu", + if (fcntl(conn->pipe[1][0], F_SETPIPE_SZ, c->tcp.pipe_size)) { + trace("TCP (spliced): cannot set 1->0 pipe size to %lu", c->tcp.pipe_size); } } @@ -379,12 +377,12 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, const struct sockaddr *sa; socklen_t sl; - conn->b = sock_conn; + conn->s[1] = sock_conn; - if (setsockopt(conn->b, SOL_TCP, TCP_QUICKACK, + if (setsockopt(conn->s[1], SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int))) { trace("TCP (spliced): failed to set TCP_QUICKACK on socket %i", - conn->b); + conn->s[1]); } if (CONN_V6(conn)) { @@ -395,7 +393,7 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, sl = sizeof(addr4); } - if (connect(conn->b, sa, sl)) { + if (connect(conn->s[1], sa, sl)) { if (errno != EINPROGRESS) { int ret = -errno; @@ -473,13 +471,13 @@ static void tcp_splice_dir(struct tcp_splice_conn *conn, int ref_sock, { if (!reverse) { *from = ref_sock; - *to = (*from == conn->a) ? conn->b : conn->a; + *to = (*from == conn->s[0]) ? conn->s[1] : conn->s[0]; } else { *to = ref_sock; - *from = (*to == conn->a) ? conn->b : conn->a; + *from = (*to == conn->s[0]) ? conn->s[1] : conn->s[0]; } - *pipes = *from == conn->a ? conn->pipe_a_b : conn->pipe_b_a; + *pipes = *from == conn->s[0] ? conn->pipe[0] : conn->pipe[1]; } /** @@ -521,7 +519,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, trace("TCP (spliced): failed to set TCP_QUICKACK on %i", s); conn->c.spliced = true; - conn->a = s; + conn->s[0] = s; if (tcp_splice_new(c, conn, ref.port, ref.pif)) conn_flag(c, conn, CLOSING); @@ -559,10 +557,10 @@ void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn, } if (events & EPOLLOUT) { - if (s == conn->a) - conn_event(c, conn, ~A_OUT_WAIT); + if (s == conn->s[0]) + conn_event(c, conn, ~OUT_WAIT_0); else - conn_event(c, conn, ~B_OUT_WAIT); + conn_event(c, conn, ~OUT_WAIT_1); tcp_splice_dir(conn, s, 1, &from, &to, &pipes); } else { @@ -570,33 +568,33 @@ void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn, } if (events & EPOLLRDHUP) { - if (s == conn->a) - conn_event(c, conn, A_FIN_RCVD); + if (s == conn->s[0]) + conn_event(c, conn, FIN_RCVD_0); else - conn_event(c, conn, B_FIN_RCVD); + conn_event(c, conn, FIN_RCVD_1); } if (events & EPOLLHUP) { - if (s == conn->a) - conn_event(c, conn, A_FIN_SENT); /* Fake, but implied */ + if (s == conn->s[0]) + conn_event(c, conn, FIN_SENT_0); /* Fake, but implied */ else - conn_event(c, conn, B_FIN_SENT); + conn_event(c, conn, FIN_SENT_1); } swap: eof = 0; never_read = 1; - if (from == conn->a) { - seq_read = &conn->a_read; - seq_write = &conn->a_written; - lowat_set_flag = RCVLOWAT_SET_A; - lowat_act_flag = RCVLOWAT_ACT_A; + if (from == conn->s[0]) { + seq_read = &conn->read[0]; + seq_write = &conn->written[0]; + lowat_set_flag = RCVLOWAT_SET_0; + lowat_act_flag = RCVLOWAT_ACT_0; } else { - seq_read = &conn->b_read; - seq_write = &conn->b_written; - lowat_set_flag = RCVLOWAT_SET_B; - lowat_act_flag = RCVLOWAT_ACT_B; + seq_read = &conn->read[1]; + seq_write = &conn->written[1]; + lowat_set_flag = RCVLOWAT_SET_1; + lowat_act_flag = RCVLOWAT_ACT_1; } while (1) { @@ -666,10 +664,10 @@ eintr: if (never_read) break; - if (to == conn->a) - conn_event(c, conn, A_OUT_WAIT); + if (to == conn->s[0]) + conn_event(c, conn, OUT_WAIT_0); else - conn_event(c, conn, B_OUT_WAIT); + conn_event(c, conn, OUT_WAIT_1); break; } @@ -685,31 +683,31 @@ eintr: break; } - if ((conn->events & A_FIN_RCVD) && !(conn->events & B_FIN_SENT)) { + if ((conn->events & FIN_RCVD_0) && !(conn->events & FIN_SENT_1)) { if (*seq_read == *seq_write && eof) { - shutdown(conn->b, SHUT_WR); - conn_event(c, conn, B_FIN_SENT); + shutdown(conn->s[1], SHUT_WR); + conn_event(c, conn, FIN_SENT_1); } } - if ((conn->events & B_FIN_RCVD) && !(conn->events & A_FIN_SENT)) { + if ((conn->events & FIN_RCVD_1) && !(conn->events & FIN_SENT_0)) { if (*seq_read == *seq_write && eof) { - shutdown(conn->a, SHUT_WR); - conn_event(c, conn, A_FIN_SENT); + shutdown(conn->s[0], SHUT_WR); + conn_event(c, conn, FIN_SENT_0); } } - if (CONN_HAS(conn, A_FIN_SENT | B_FIN_SENT)) + if (CONN_HAS(conn, FIN_SENT_0 | FIN_SENT_1)) goto close; if ((events & (EPOLLIN | EPOLLOUT)) == (EPOLLIN | EPOLLOUT)) { events = EPOLLIN; SWAP(from, to); - if (pipes == conn->pipe_a_b) - pipes = conn->pipe_b_a; + if (pipes == conn->pipe[0]) + pipes = conn->pipe[1]; else - pipes = conn->pipe_a_b; + pipes = conn->pipe[0]; goto swap; } @@ -843,26 +841,26 @@ void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union) return; } - if ( (conn->flags & RCVLOWAT_SET_A) && - !(conn->flags & RCVLOWAT_ACT_A)) { - if (setsockopt(conn->a, SOL_SOCKET, SO_RCVLOWAT, + if ( (conn->flags & RCVLOWAT_SET_0) && + !(conn->flags & RCVLOWAT_ACT_0)) { + if (setsockopt(conn->s[0], SOL_SOCKET, SO_RCVLOWAT, &((int){ 1 }), sizeof(int))) { trace("TCP (spliced): can't set SO_RCVLOWAT on " - "%i", conn->a); + "%i", conn->s[0]); } - conn_flag(c, conn, ~RCVLOWAT_SET_A); + conn_flag(c, conn, ~RCVLOWAT_SET_0); } - if ( (conn->flags & RCVLOWAT_SET_B) && - !(conn->flags & RCVLOWAT_ACT_B)) { - if (setsockopt(conn->b, SOL_SOCKET, SO_RCVLOWAT, + if ( (conn->flags & RCVLOWAT_SET_1) && + !(conn->flags & RCVLOWAT_ACT_1)) { + if (setsockopt(conn->s[1], SOL_SOCKET, SO_RCVLOWAT, &((int){ 1 }), sizeof(int))) { trace("TCP (spliced): can't set SO_RCVLOWAT on " - "%i", conn->b); + "%i", conn->s[1]); } - conn_flag(c, conn, ~RCVLOWAT_SET_B); + conn_flag(c, conn, ~RCVLOWAT_SET_1); } - conn_flag(c, conn, ~RCVLOWAT_ACT_A); - conn_flag(c, conn, ~RCVLOWAT_ACT_B); + conn_flag(c, conn, ~RCVLOWAT_ACT_0); + conn_flag(c, conn, ~RCVLOWAT_ACT_1); } -- 2.41.0