In pasta mode, tcp_sock_init[46]() create separate sockets to listen for spliced connections (these are bound to localhost) and non-spliced connections (these are bound to the host address). This introduces a subtle behavioural difference between pasta and passt: by default, pasta will listen only on a single host address, whereas passt will listen on all addresses (0.0.0.0 or ::). This also prevents us using some additional optimizations that only work with the unspecified (0.0.0.0 or ::) address. However, it turns out we don't need to do this. We can splice a connection if and only if it originates from the loopback address. Currently we ensure this by having the "spliced" listening sockets listening only on loopback. However, we can defer the decision about whether to splice a connection until after accept(), by checking if the connection was made from localhost. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tcp.c | 131 +++++++++++++++++----------------------------------------- 1 file changed, 39 insertions(+), 92 deletions(-) diff --git a/tcp.c b/tcp.c index e7bfc8c..ac70d4e 100644 --- a/tcp.c +++ b/tcp.c @@ -434,7 +434,6 @@ static const char *tcp_flag_str[] __attribute((__unused__)) = { }; /* Listening sockets, used for automatic port forwarding in pasta mode only */ -static int tcp_sock_init_lo [NUM_PORTS][IP_VERSIONS]; static int tcp_sock_init_ext [NUM_PORTS][IP_VERSIONS]; static int tcp_sock_ns [NUM_PORTS][IP_VERSIONS]; @@ -2847,9 +2846,13 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref, { struct sockaddr_storage sa; union tcp_conn *conn; + bool can_splice = false; socklen_t sl; int s; + assert(ref.r.p.tcp.tcp.listen); + assert(!ref.r.p.tcp.tcp.splice); + if (c->tcp.conn_count >= TCP_MAX_CONNS) return; @@ -2860,7 +2863,25 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref, conn = tc + c->tcp.conn_count++; - if (ref.r.p.tcp.tcp.splice) + if (c->mode == MODE_PASTA) { + if (ref.r.p.tcp.tcp.v6) { + const struct sockaddr_in6 *sa6 + = (const struct sockaddr_in6 *)&sa; + /* clang-tidy doesn't realize accept() initializes sa/sa6 */ + /* NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) */ + if (IN6_IS_ADDR_LOOPBACK(&sa6->sin6_addr)) + can_splice = true; + } else { + const struct sockaddr_in *sa4 = + (const struct sockaddr_in *)&sa; + /* clang-tidy doesn't realize accept() initializes sa/sa4 */ + /* NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage) */ + if (htonl(sa4->sin_addr.s_addr) == INADDR_LOOPBACK) + can_splice = true; + } + } + + if (can_splice) tcp_splice_conn_from_sock(c, ref, &conn->splice, s); else tcp_tap_conn_from_sock(c, ref, &conn->tap, s, @@ -3017,47 +3038,16 @@ static void tcp_sock_init4(const struct ctx *c, const struct in_addr *addr, { in_port_t idx = port + c->tcp.fwd_in.delta[port]; union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.index = idx }; - bool spliced = false, tap = true; int s; - if (c->mode == MODE_PASTA) { - spliced = !addr || IN4_IS_ADDR_UNSPECIFIED(addr) || - IN4_IS_ADDR_LOOPBACK(addr); - - if (!addr) - addr = &c->ip4.addr; - - tap = !IN4_IS_ADDR_LOOPBACK(addr); - } - - if (tap) { - s = sock_l4(c, AF_INET, IPPROTO_TCP, addr, ifname, port, - tref.u32); - if (s >= 0) - tcp_sock_set_bufsize(c, s); - else - s = -1; - - if (c->tcp.fwd_in.mode == FWD_AUTO) - tcp_sock_init_ext[port][V4] = s; - } - - if (spliced) { - struct in_addr loopback = { htonl(INADDR_LOOPBACK) }; - tref.tcp.splice = 1; - - addr = &loopback; - - s = sock_l4(c, AF_INET, IPPROTO_TCP, addr, ifname, port, - tref.u32); - if (s >= 0) - tcp_sock_set_bufsize(c, s); - else - s = -1; + s = sock_l4(c, AF_INET, IPPROTO_TCP, addr, ifname, port, tref.u32); + if (s >= 0) + tcp_sock_set_bufsize(c, s); + else + s = -1; - if (c->tcp.fwd_out.mode == FWD_AUTO) - tcp_sock_init_lo[port][V4] = s; - } + if (c->tcp.fwd_in.mode == FWD_AUTO) + tcp_sock_init_ext[port][V4] = s; } /** @@ -3074,47 +3064,16 @@ static void tcp_sock_init6(const struct ctx *c, in_port_t idx = port + c->tcp.fwd_in.delta[port]; union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.v6 = 1, .tcp.index = idx }; - bool spliced = false, tap = true; int s; - if (c->mode == MODE_PASTA) { - spliced = !addr || - IN6_IS_ADDR_UNSPECIFIED(addr) || - IN6_IS_ADDR_LOOPBACK(addr); - - if (!addr) - addr = &c->ip6.addr; - - tap = !IN6_IS_ADDR_LOOPBACK(addr); - } - - if (tap) { - s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, - tref.u32); - if (s >= 0) - tcp_sock_set_bufsize(c, s); - else - s = -1; - - if (c->tcp.fwd_in.mode == FWD_AUTO) - tcp_sock_init_ext[port][V6] = s; - } - - if (spliced) { - tref.tcp.splice = 1; - - addr = &in6addr_loopback; - - s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, - tref.u32); - if (s >= 0) - tcp_sock_set_bufsize(c, s); - else - s = -1; + s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, tref.u32); + if (s >= 0) + tcp_sock_set_bufsize(c, s); + else + s = -1; - if (c->tcp.fwd_out.mode == FWD_AUTO) - tcp_sock_init_lo[port][V6] = s; - } + if (c->tcp.fwd_in.mode == FWD_AUTO) + tcp_sock_init_ext[port][V6] = s; } /** @@ -3143,7 +3102,7 @@ static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port) { in_port_t idx = port + c->tcp.fwd_out.delta[port]; union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1, - .tcp.splice = 1, .tcp.index = idx }; + .tcp.index = idx }; struct in_addr loopback = { htonl(INADDR_LOOPBACK) }; int s; @@ -3168,8 +3127,7 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port) { in_port_t idx = port + c->tcp.fwd_out.delta[port]; union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1, - .tcp.splice = 1, .tcp.v6 = 1, - .tcp.index = idx}; + .tcp.v6 = 1, .tcp.index = idx}; int s; assert(c->mode == MODE_PASTA); @@ -3336,7 +3294,6 @@ int tcp_init(struct ctx *c) memset(init_sock_pool6, 0xff, sizeof(init_sock_pool6)); memset(ns_sock_pool4, 0xff, sizeof(ns_sock_pool4)); memset(ns_sock_pool6, 0xff, sizeof(ns_sock_pool6)); - memset(tcp_sock_init_lo, 0xff, sizeof(tcp_sock_init_lo)); memset(tcp_sock_init_ext, 0xff, sizeof(tcp_sock_init_ext)); memset(tcp_sock_ns, 0xff, sizeof(tcp_sock_ns)); @@ -3444,16 +3401,6 @@ static int tcp_port_rebind(void *arg) close(tcp_sock_init_ext[port][V6]); tcp_sock_init_ext[port][V6] = -1; } - - if (tcp_sock_init_lo[port][V4] >= 0) { - close(tcp_sock_init_lo[port][V4]); - tcp_sock_init_lo[port][V4] = -1; - } - - if (tcp_sock_init_lo[port][V6] >= 0) { - close(tcp_sock_init_lo[port][V6]); - tcp_sock_init_lo[port][V6] = -1; - } continue; } -- 2.38.1