The sock_l4() function is very convenient for creating sockets bound to a given address, but its interface has some problems. Most importantly, the address and port alone aren't enough in some cases. For link-local addresses (at least) we also need the pif in order to properly construct a socket adddress. This case doesn't yet arise, but it might cause us trouble in future. Additionally, sock_l4() can take AF_UNSPEC with the special meaning that it should attempt to create a "dual stack" socket which will respond to both IPv4 and IPv6 traffic. This only makes sense if there is no specific address given. We verify this at runtime, but it would be nicer if we could enforce it structurally. For sockets associated specifically with a single flow we already replaced sock_l4() with flowside_sock_l4() which avoids those problems. Now, replace all the remaining users with a new pif_sock_l4() which also takes an explicit pif. The new function takes the address as an inany *, with NULL indicating the dual stack case. This does add some complexity in some of the callers, however future planned cleanups should make this go away again. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- pif.c | 42 ++++++++++++++++++++++++++++++++++++++++++ pif.h | 3 +++ tcp.c | 22 +++++++++++++++++----- udp.c | 34 ++++++++++++++++++++++------------ util.c | 52 ---------------------------------------------------- util.h | 3 --- 6 files changed, 84 insertions(+), 72 deletions(-) diff --git a/pif.c b/pif.c index a099e31b..592fafaa 100644 --- a/pif.c +++ b/pif.c @@ -59,3 +59,45 @@ void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl, *sl = sizeof(sa->sa6); } } + +/** pif_sock_l4() - Open a socket bound to an address on a specified interface + * @c: Execution context + * @type: Socket epoll type + * @pif: Interface for this socket + * @addr: Address to bind to, or NULL for dual-stack any + * @ifname: Interface for binding, NULL for any + * @port: Port number to bind to (host byte order) + * @data: epoll reference portion for protocol handlers + * + * NOTE: For namespace pifs, this must be called having already entered the + * relevant namespace. + * + * Return: newly created socket, negative error code on failure + */ +int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif, + const union inany_addr *addr, const char *ifname, + in_port_t port, uint32_t data) +{ + union sockaddr_inany sa = { + .sa6.sin6_family = AF_INET6, + .sa6.sin6_addr = in6addr_any, + .sa6.sin6_port = htons(port), + }; + socklen_t sl; + + ASSERT(pif_is_socket(pif)); + + if (pif == PIF_SPLICE) { + /* Sanity checks */ + ASSERT(!ifname); + ASSERT(addr && inany_is_loopback(addr)); + } + + if (!addr) + return sock_l4_sa(c, type, &sa, sizeof(sa.sa6), + ifname, false, data); + + pif_sockaddr(c, &sa, &sl, pif, addr, port); + return sock_l4_sa(c, type, &sa, sl, + ifname, sa.sa_family == AF_INET6, data); +} diff --git a/pif.h b/pif.h index 8777bb5b..f029282b 100644 --- a/pif.h +++ b/pif.h @@ -59,5 +59,8 @@ static inline bool pif_is_socket(uint8_t pif) void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl, uint8_t pif, const union inany_addr *addr, in_port_t port); +int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif, + const union inany_addr *addr, const char *ifname, + in_port_t port, uint32_t data); #endif /* PIF_H */ diff --git a/tcp.c b/tcp.c index 1962fcc4..49e0cfe3 100644 --- a/tcp.c +++ b/tcp.c @@ -2291,7 +2291,19 @@ static int tcp_sock_init_af(const struct ctx *c, sa_family_t af, in_port_t port, }; int s; - s = sock_l4(c, af, EPOLL_TYPE_TCP_LISTEN, addr, ifname, port, tref.u32); + if (af == AF_UNSPEC) { + ASSERT(!addr); + s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_HOST, NULL, + ifname, port, tref.u32); + } else { + union inany_addr aany = af == AF_INET ? inany_any4 : inany_any6; + + if (addr) + inany_from_af(&aany, af, addr); + + s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_HOST, &aany, + ifname, port, tref.u32); + } if (c->tcp.fwd_in.mode == FWD_AUTO) { if (af == AF_INET || af == AF_UNSPEC) @@ -2357,8 +2369,8 @@ static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port) ASSERT(c->mode == MODE_PASTA); - s = sock_l4(c, AF_INET, EPOLL_TYPE_TCP_LISTEN, &in4addr_loopback, - NULL, port, tref.u32); + s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_SPLICE, &inany_loopback4, + NULL, port, tref.u32); if (s >= 0) tcp_sock_set_bufsize(c, s); else @@ -2383,8 +2395,8 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port) ASSERT(c->mode == MODE_PASTA); - s = sock_l4(c, AF_INET6, EPOLL_TYPE_TCP_LISTEN, &in6addr_loopback, - NULL, port, tref.u32); + s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_SPLICE, &inany_loopback6, + NULL, port, tref.u32); if (s >= 0) tcp_sock_set_bufsize(c, s); else diff --git a/udp.c b/udp.c index 8cea80cd..b3d4a64c 100644 --- a/udp.c +++ b/udp.c @@ -809,8 +809,8 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, ASSERT(!addr); /* Attempt to get a dual stack socket */ - s = sock_l4(c, AF_UNSPEC, EPOLL_TYPE_UDP_LISTEN, - NULL, ifname, port, uref.u32); + s = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_HOST, + NULL, ifname, port, uref.u32); udp_splice_init[V4][port] = s < 0 ? -1 : s; udp_splice_init[V6][port] = s < 0 ? -1 : s; if (IN_INTERVAL(0, FD_REF_MAX, s)) @@ -819,28 +819,38 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) { if (!ns) { - r4 = sock_l4(c, AF_INET, EPOLL_TYPE_UDP_LISTEN, - addr, ifname, port, uref.u32); + union inany_addr aany = inany_any4; + + if (addr) + inany_from_af(&aany, AF_INET, addr); + + r4 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_HOST, + &aany, ifname, port, uref.u32); udp_splice_init[V4][port] = r4 < 0 ? -1 : r4; } else { - r4 = sock_l4(c, AF_INET, EPOLL_TYPE_UDP_LISTEN, - &in4addr_loopback, - ifname, port, uref.u32); + r4 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_SPLICE, + &inany_loopback4, ifname, + port, uref.u32); udp_splice_ns[V4][port] = r4 < 0 ? -1 : r4; } } if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) { if (!ns) { - r6 = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP_LISTEN, - addr, ifname, port, uref.u32); + union inany_addr aany = inany_any6; + + if (addr) + inany_from_af(&aany, AF_INET6, addr); + + r6 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_HOST, + &aany, ifname, port, uref.u32); udp_splice_init[V6][port] = r6 < 0 ? -1 : r6; } else { - r6 = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP_LISTEN, - &in6addr_loopback, - ifname, port, uref.u32); + r6 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_SPLICE, + &inany_loopback6, ifname, + port, uref.u32); udp_splice_ns[V6][port] = r6 < 0 ? -1 : r6; } } diff --git a/util.c b/util.c index 87309c51..ebd93ed2 100644 --- a/util.c +++ b/util.c @@ -157,58 +157,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, return fd; } -/** - * sock_l4() - Create and bind socket for given L4, add to epoll list - * @c: Execution context - * @af: Address family, AF_INET or AF_INET6 - * @type: epoll type - * @bind_addr: Address for binding, NULL for any - * @ifname: Interface for binding, NULL for any - * @port: Port, host order - * @data: epoll reference portion for protocol handlers - * - * Return: newly created socket, negative error code on failure - */ -int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type, - const void *bind_addr, const char *ifname, uint16_t port, - uint32_t data) -{ - switch (af) { - case AF_INET: { - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(port), - { 0 }, { 0 }, - }; - if (bind_addr) - addr4.sin_addr = *(struct in_addr *)bind_addr; - return sock_l4_sa(c, type, &addr4, sizeof(addr4), ifname, - false, data); - } - - case AF_UNSPEC: - if (!DUAL_STACK_SOCKETS || bind_addr) - return -EINVAL; - /* fallthrough */ - case AF_INET6: { - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(port), - 0, IN6ADDR_ANY_INIT, 0, - }; - if (bind_addr) { - addr6.sin6_addr = *(struct in6_addr *)bind_addr; - - if (IN6_IS_ADDR_LINKLOCAL(bind_addr)) - addr6.sin6_scope_id = c->ifi6; - } - return sock_l4_sa(c, type, &addr6, sizeof(addr6), ifname, - af == AF_INET6, data); - } - default: - return -EINVAL; - } -} /** * sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed diff --git a/util.h b/util.h index 5e67f1fa..2c1e08e0 100644 --- a/util.h +++ b/util.h @@ -181,9 +181,6 @@ int close_range(unsigned int first, unsigned int last, int flags) { int sock_l4_sa(const struct ctx *c, enum epoll_type type, const void *sa, socklen_t sl, const char *ifname, bool v6only, uint32_t data); -int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type, - const void *bind_addr, const char *ifname, uint16_t port, - uint32_t data); void sock_probe_mem(struct ctx *c); long timespec_diff_ms(const struct timespec *a, const struct timespec *b); int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b); -- 2.46.1