If we can't bind a few ports out of a port specifier, fine, we don't want to bother the user with that. But running out of files on '-t all' or similar is something different: patch 3/3 fixes this, 1/3 and 2/3 are preparation steps. Stefano Brivio (3): tcp, udp, util: Pass socket creation errors all the way up tcp, udp: Fix partial success return codes in {tcp,udp}_sock_init() conf: Terminate on EMFILE or ENFILE on sockets for port mapping conf.c | 36 +++++++++++++++++++++++++++++------- tcp.c | 29 ++++++++++++++--------------- udp.c | 44 +++++++++++++++++++++----------------------- util.c | 31 ++++++++++++++++++------------- 4 files changed, 82 insertions(+), 58 deletions(-) -- 2.39.2
...starting from sock_l4(), pass negative error (errno) codes instead of -1. They will only be used in two commits from now, no functional changes intended here. Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com> --- tcp.c | 22 ++++++++++++---------- udp.c | 18 +++++++++--------- util.c | 31 ++++++++++++++++++------------- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/tcp.c b/tcp.c index 96ca5c7..e209483 100644 --- a/tcp.c +++ b/tcp.c @@ -2955,7 +2955,7 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, * @addr: Pointer to address for binding, NULL if not configured * @ifname: Name of interface to bind to, NULL if not configured * - * Return: fd for the new listening socket, or -1 on failure + * Return: fd for the new listening socket, negative error code on failure */ static int tcp_sock_init_af(const struct ctx *c, int af, in_port_t port, const struct in_addr *addr, const char *ifname) @@ -2968,13 +2968,13 @@ static int tcp_sock_init_af(const struct ctx *c, int af, in_port_t port, if (c->tcp.fwd_in.mode == FWD_AUTO) { if (af == AF_INET || af == AF_UNSPEC) - tcp_sock_init_ext[port][V4] = s; + tcp_sock_init_ext[port][V4] = s < 0 ? -1 : s; if (af == AF_INET6 || af == AF_UNSPEC) - tcp_sock_init_ext[port][V6] = s; + tcp_sock_init_ext[port][V6] = s < 0 ? -1 : s; } if (s < 0) - return -1; + return s; tcp_sock_set_bufsize(c, s); return s; @@ -2988,12 +2988,12 @@ static int tcp_sock_init_af(const struct ctx *c, int af, in_port_t port, * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order * - * Return: 0 on (partial) success, -1 on (complete) failure + * Return: 0 on (partial) success, negative error code on (complete) failure */ int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, const char *ifname, in_port_t port) { - int ret = 0; + int ret = 0, af_ret; if (af == AF_UNSPEC && c->ifi4 && c->ifi6) /* Attempt to get a dual stack socket */ @@ -3002,13 +3002,15 @@ int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, /* Otherwise create a socket per IP version */ if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) { - if (tcp_sock_init_af(c, AF_INET, port, addr, ifname) < 0) - ret = -1; + af_ret = tcp_sock_init_af(c, AF_INET, port, addr, ifname); + if (af_ret < 0) + ret = af_ret; } if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) { - if (tcp_sock_init_af(c, AF_INET6, port, addr, ifname) < 0) - ret = -1; + af_ret = tcp_sock_init_af(c, AF_INET6, port, addr, ifname); + if (af_ret < 0) + ret = af_ret; } return ret; diff --git a/udp.c b/udp.c index 1077cde..41e0afd 100644 --- a/udp.c +++ b/udp.c @@ -977,7 +977,7 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order * - * Return: 0 on (partial) success, -1 on (complete) failure + * Return: 0 on (partial) success, negative error code on (complete) failure */ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, const void *addr, const char *ifname, in_port_t port) @@ -1002,19 +1002,19 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, s = sock_l4(c, AF_INET, IPPROTO_UDP, addr, ifname, port, uref.u32); - udp_tap_map[V4][uref.udp.port].sock = s; - udp_splice_init[V4][port].sock = s; + udp_tap_map[V4][uref.udp.port].sock = s < 0 ? -1 : s; + udp_splice_init[V4][port].sock = s < 0 ? -1 : s; } else { struct in_addr loopback = { htonl(INADDR_LOOPBACK) }; uref.udp.ns = true; s = sock_l4(c, AF_INET, IPPROTO_UDP, &loopback, ifname, port, uref.u32); - udp_splice_ns[V4][port].sock = s; + udp_splice_ns[V4][port].sock = s < 0 ? -1 : s; } if (s < 0) - ret = -1; + ret = s; } if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) { @@ -1026,18 +1026,18 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, s = sock_l4(c, AF_INET6, IPPROTO_UDP, addr, ifname, port, uref.u32); - udp_tap_map[V6][uref.udp.port].sock = s; - udp_splice_init[V6][port].sock = s; + udp_tap_map[V6][uref.udp.port].sock = s < 0 ? -1 : s; + udp_splice_init[V6][port].sock = s < 0 ? -1 : s; } else { uref.udp.ns = true; s = sock_l4(c, AF_INET6, IPPROTO_UDP, &in6addr_loopback, ifname, port, uref.u32); - udp_splice_ns[V6][port].sock = s; + udp_splice_ns[V6][port].sock = s < 0 ? -1 : s; } if (s < 0) - ret = -1; + ret = s; } return ret; diff --git a/util.c b/util.c index 484889b..fddb5ed 100644 --- a/util.c +++ b/util.c @@ -96,7 +96,7 @@ found: * @port: Port, host order * @data: epoll reference portion for protocol handlers * - * Return: newly created socket, -1 on error + * Return: newly created socket, negative error code on failure */ int sock_l4(const struct ctx *c, int af, uint8_t proto, const void *bind_addr, const char *ifname, uint16_t port, @@ -115,16 +115,16 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, }; const struct sockaddr *sa; bool dual_stack = false; + int fd, sl, y = 1, ret; struct epoll_event ev; - int fd, sl, y = 1; if (proto != IPPROTO_TCP && proto != IPPROTO_UDP && proto != IPPROTO_ICMP && proto != IPPROTO_ICMPV6) - return -1; /* Not implemented. */ + return -EPFNOSUPPORT; /* Not implemented. */ if (af == AF_UNSPEC) { if (!DUAL_STACK_SOCKETS || bind_addr) - return -1; + return -EINVAL; dual_stack = true; af = AF_INET6; } @@ -134,14 +134,15 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, else fd = socket(af, SOCK_DGRAM | SOCK_NONBLOCK, proto); + ret = -errno; if (fd < 0) { - warn("L4 socket: %s", strerror(errno)); - return -1; + warn("L4 socket: %s", strerror(-ret)); + return ret; } if (fd > SOCKET_MAX) { close(fd); - return -1; + return -EBADF; } ref.r.s = fd; @@ -186,10 +187,11 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, */ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname))) { + ret = -errno; warn("Can't bind socket for %s port %u to %s, closing", ip_proto_str[proto], port, ifname); close(fd); - return -1; + return ret; } } @@ -200,22 +202,25 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, * broken SELinux policy, see icmp_tap_handler(). */ if (proto != IPPROTO_ICMP && proto != IPPROTO_ICMPV6) { + ret = -errno; close(fd); - return -1; + return ret; } } if (proto == IPPROTO_TCP && listen(fd, 128) < 0) { - warn("TCP socket listen: %s", strerror(errno)); + ret = -errno; + warn("TCP socket listen: %s", strerror(-ret)); close(fd); - return -1; + return ret; } ev.events = EPOLLIN; ev.data.u64 = ref.u64; if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) { - warn("L4 epoll_ctl: %s", strerror(errno)); - return -1; + ret = -errno; + warn("L4 epoll_ctl: %s", strerror(-ret)); + return ret; } return fd; -- 2.39.2
On Wed, Mar 08, 2023 at 01:33:46PM +0100, Stefano Brivio wrote:...starting from sock_l4(), pass negative error (errno) codes instead of -1. They will only be used in two commits from now, no functional changes intended here. Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com>Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au>--- tcp.c | 22 ++++++++++++---------- udp.c | 18 +++++++++--------- util.c | 31 ++++++++++++++++++------------- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/tcp.c b/tcp.c index 96ca5c7..e209483 100644 --- a/tcp.c +++ b/tcp.c @@ -2955,7 +2955,7 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, * @addr: Pointer to address for binding, NULL if not configured * @ifname: Name of interface to bind to, NULL if not configured * - * Return: fd for the new listening socket, or -1 on failure + * Return: fd for the new listening socket, negative error code on failure */ static int tcp_sock_init_af(const struct ctx *c, int af, in_port_t port, const struct in_addr *addr, const char *ifname) @@ -2968,13 +2968,13 @@ static int tcp_sock_init_af(const struct ctx *c, int af, in_port_t port, if (c->tcp.fwd_in.mode == FWD_AUTO) { if (af == AF_INET || af == AF_UNSPEC) - tcp_sock_init_ext[port][V4] = s; + tcp_sock_init_ext[port][V4] = s < 0 ? -1 : s; if (af == AF_INET6 || af == AF_UNSPEC) - tcp_sock_init_ext[port][V6] = s; + tcp_sock_init_ext[port][V6] = s < 0 ? -1 : s; } if (s < 0) - return -1; + return s; tcp_sock_set_bufsize(c, s); return s; @@ -2988,12 +2988,12 @@ static int tcp_sock_init_af(const struct ctx *c, int af, in_port_t port, * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order * - * Return: 0 on (partial) success, -1 on (complete) failure + * Return: 0 on (partial) success, negative error code on (complete) failure */ int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, const char *ifname, in_port_t port) { - int ret = 0; + int ret = 0, af_ret; if (af == AF_UNSPEC && c->ifi4 && c->ifi6) /* Attempt to get a dual stack socket */ @@ -3002,13 +3002,15 @@ int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, /* Otherwise create a socket per IP version */ if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) { - if (tcp_sock_init_af(c, AF_INET, port, addr, ifname) < 0) - ret = -1; + af_ret = tcp_sock_init_af(c, AF_INET, port, addr, ifname); + if (af_ret < 0) + ret = af_ret; } if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) { - if (tcp_sock_init_af(c, AF_INET6, port, addr, ifname) < 0) - ret = -1; + af_ret = tcp_sock_init_af(c, AF_INET6, port, addr, ifname); + if (af_ret < 0) + ret = af_ret; } return ret; diff --git a/udp.c b/udp.c index 1077cde..41e0afd 100644 --- a/udp.c +++ b/udp.c @@ -977,7 +977,7 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order * - * Return: 0 on (partial) success, -1 on (complete) failure + * Return: 0 on (partial) success, negative error code on (complete) failure */ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, const void *addr, const char *ifname, in_port_t port) @@ -1002,19 +1002,19 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, s = sock_l4(c, AF_INET, IPPROTO_UDP, addr, ifname, port, uref.u32); - udp_tap_map[V4][uref.udp.port].sock = s; - udp_splice_init[V4][port].sock = s; + udp_tap_map[V4][uref.udp.port].sock = s < 0 ? -1 : s; + udp_splice_init[V4][port].sock = s < 0 ? -1 : s; } else { struct in_addr loopback = { htonl(INADDR_LOOPBACK) }; uref.udp.ns = true; s = sock_l4(c, AF_INET, IPPROTO_UDP, &loopback, ifname, port, uref.u32); - udp_splice_ns[V4][port].sock = s; + udp_splice_ns[V4][port].sock = s < 0 ? -1 : s; } if (s < 0) - ret = -1; + ret = s; } if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) { @@ -1026,18 +1026,18 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, s = sock_l4(c, AF_INET6, IPPROTO_UDP, addr, ifname, port, uref.u32); - udp_tap_map[V6][uref.udp.port].sock = s; - udp_splice_init[V6][port].sock = s; + udp_tap_map[V6][uref.udp.port].sock = s < 0 ? -1 : s; + udp_splice_init[V6][port].sock = s < 0 ? -1 : s; } else { uref.udp.ns = true; s = sock_l4(c, AF_INET6, IPPROTO_UDP, &in6addr_loopback, ifname, port, uref.u32); - udp_splice_ns[V6][port].sock = s; + udp_splice_ns[V6][port].sock = s < 0 ? -1 : s; } if (s < 0) - ret = -1; + ret = s; } return ret; diff --git a/util.c b/util.c index 484889b..fddb5ed 100644 --- a/util.c +++ b/util.c @@ -96,7 +96,7 @@ found: * @port: Port, host order * @data: epoll reference portion for protocol handlers * - * Return: newly created socket, -1 on error + * Return: newly created socket, negative error code on failure */ int sock_l4(const struct ctx *c, int af, uint8_t proto, const void *bind_addr, const char *ifname, uint16_t port, @@ -115,16 +115,16 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, }; const struct sockaddr *sa; bool dual_stack = false; + int fd, sl, y = 1, ret; struct epoll_event ev; - int fd, sl, y = 1; if (proto != IPPROTO_TCP && proto != IPPROTO_UDP && proto != IPPROTO_ICMP && proto != IPPROTO_ICMPV6) - return -1; /* Not implemented. */ + return -EPFNOSUPPORT; /* Not implemented. */ if (af == AF_UNSPEC) { if (!DUAL_STACK_SOCKETS || bind_addr) - return -1; + return -EINVAL; dual_stack = true; af = AF_INET6; } @@ -134,14 +134,15 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, else fd = socket(af, SOCK_DGRAM | SOCK_NONBLOCK, proto); + ret = -errno; if (fd < 0) { - warn("L4 socket: %s", strerror(errno)); - return -1; + warn("L4 socket: %s", strerror(-ret)); + return ret; } if (fd > SOCKET_MAX) { close(fd); - return -1; + return -EBADF; } ref.r.s = fd; @@ -186,10 +187,11 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, */ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname))) { + ret = -errno; warn("Can't bind socket for %s port %u to %s, closing", ip_proto_str[proto], port, ifname); close(fd); - return -1; + return ret; } } @@ -200,22 +202,25 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, * broken SELinux policy, see icmp_tap_handler(). */ if (proto != IPPROTO_ICMP && proto != IPPROTO_ICMPV6) { + ret = -errno; close(fd); - return -1; + return ret; } } if (proto == IPPROTO_TCP && listen(fd, 128) < 0) { - warn("TCP socket listen: %s", strerror(errno)); + ret = -errno; + warn("TCP socket listen: %s", strerror(-ret)); close(fd); - return -1; + return ret; } ev.events = EPOLLIN; ev.data.u64 = ref.u64; if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) { - warn("L4 epoll_ctl: %s", strerror(errno)); - return -1; + ret = -errno; + warn("L4 epoll_ctl: %s", strerror(-ret)); + return ret; } return fd;-- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
The comments say we should return 0 on partial success, and an error code on complete failure. Rationale: if the user configures a port forwarding, and we succeed to bind that port for IPv4 or IPv6 only, that might actually be what the user intended. Adjust the two functions to reflect the comments. Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com> --- tcp.c | 21 +++++++++------------ udp.c | 30 ++++++++++++++---------------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/tcp.c b/tcp.c index e209483..a29e387 100644 --- a/tcp.c +++ b/tcp.c @@ -2993,7 +2993,7 @@ static int tcp_sock_init_af(const struct ctx *c, int af, in_port_t port, int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, const char *ifname, in_port_t port) { - int ret = 0, af_ret; + int r4 = SOCKET_MAX + 1, r6 = SOCKET_MAX + 1; if (af == AF_UNSPEC && c->ifi4 && c->ifi6) /* Attempt to get a dual stack socket */ @@ -3001,19 +3001,16 @@ int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, return 0; /* Otherwise create a socket per IP version */ - if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) { - af_ret = tcp_sock_init_af(c, AF_INET, port, addr, ifname); - if (af_ret < 0) - ret = af_ret; - } + if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) + r4 = tcp_sock_init_af(c, AF_INET, port, addr, ifname); - if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) { - af_ret = tcp_sock_init_af(c, AF_INET6, port, addr, ifname); - if (af_ret < 0) - ret = af_ret; - } + if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) + r6 = tcp_sock_init_af(c, AF_INET6, port, addr, ifname); - return ret; + if (IN_INTERVAL(0, SOCKET_MAX, r4) || IN_INTERVAL(0, SOCKET_MAX, r6)) + return 0; + + return r4 < 0 ? r4 : r6; } /** diff --git a/udp.c b/udp.c index 41e0afd..a5f7537 100644 --- a/udp.c +++ b/udp.c @@ -983,7 +983,7 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, const void *addr, const char *ifname, in_port_t port) { union udp_epoll_ref uref = { .u32 = 0 }; - int s, ret = 0; + int s, r4 = SOCKET_MAX + 1, r6 = SOCKET_MAX + 1; if (ns) { uref.udp.port = (in_port_t)(port + @@ -999,8 +999,8 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, uref.udp.orig = true; if (!ns) { - s = sock_l4(c, AF_INET, IPPROTO_UDP, addr, ifname, - port, uref.u32); + r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP, addr, + ifname, port, uref.u32); udp_tap_map[V4][uref.udp.port].sock = s < 0 ? -1 : s; udp_splice_init[V4][port].sock = s < 0 ? -1 : s; @@ -1008,13 +1008,10 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, struct in_addr loopback = { htonl(INADDR_LOOPBACK) }; uref.udp.ns = true; - s = sock_l4(c, AF_INET, IPPROTO_UDP, &loopback, - ifname, port, uref.u32); + r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP, &loopback, + ifname, port, uref.u32); udp_splice_ns[V4][port].sock = s < 0 ? -1 : s; } - - if (s < 0) - ret = s; } if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) { @@ -1023,24 +1020,25 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, uref.udp.orig = true; if (!ns) { - s = sock_l4(c, AF_INET6, IPPROTO_UDP, addr, ifname, - port, uref.u32); + r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP, addr, + ifname, port, uref.u32); udp_tap_map[V6][uref.udp.port].sock = s < 0 ? -1 : s; udp_splice_init[V6][port].sock = s < 0 ? -1 : s; } else { uref.udp.ns = true; - s = sock_l4(c, AF_INET6, IPPROTO_UDP, &in6addr_loopback, - ifname, port, uref.u32); + r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP, + &in6addr_loopback, + ifname, port, uref.u32); udp_splice_ns[V6][port].sock = s < 0 ? -1 : s; } - - if (s < 0) - ret = s; } - return ret; + if (IN_INTERVAL(0, SOCKET_MAX, r4) || IN_INTERVAL(0, SOCKET_MAX, r6)) + return 0; + + return r4 < 0 ? r4 : r6; } /** -- 2.39.2
On Wed, Mar 08, 2023 at 01:33:47PM +0100, Stefano Brivio wrote:The comments say we should return 0 on partial success, and an error code on complete failure. Rationale: if the user configures a port forwarding, and we succeed to bind that port for IPv4 or IPv6 only, that might actually be what the user intended. Adjust the two functions to reflect the comments. Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com>Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au>--- tcp.c | 21 +++++++++------------ udp.c | 30 ++++++++++++++---------------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/tcp.c b/tcp.c index e209483..a29e387 100644 --- a/tcp.c +++ b/tcp.c @@ -2993,7 +2993,7 @@ static int tcp_sock_init_af(const struct ctx *c, int af, in_port_t port, int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, const char *ifname, in_port_t port) { - int ret = 0, af_ret; + int r4 = SOCKET_MAX + 1, r6 = SOCKET_MAX + 1; if (af == AF_UNSPEC && c->ifi4 && c->ifi6) /* Attempt to get a dual stack socket */ @@ -3001,19 +3001,16 @@ int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, return 0; /* Otherwise create a socket per IP version */ - if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) { - af_ret = tcp_sock_init_af(c, AF_INET, port, addr, ifname); - if (af_ret < 0) - ret = af_ret; - } + if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) + r4 = tcp_sock_init_af(c, AF_INET, port, addr, ifname); - if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) { - af_ret = tcp_sock_init_af(c, AF_INET6, port, addr, ifname); - if (af_ret < 0) - ret = af_ret; - } + if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) + r6 = tcp_sock_init_af(c, AF_INET6, port, addr, ifname); - return ret; + if (IN_INTERVAL(0, SOCKET_MAX, r4) || IN_INTERVAL(0, SOCKET_MAX, r6)) + return 0; + + return r4 < 0 ? r4 : r6; } /** diff --git a/udp.c b/udp.c index 41e0afd..a5f7537 100644 --- a/udp.c +++ b/udp.c @@ -983,7 +983,7 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, const void *addr, const char *ifname, in_port_t port) { union udp_epoll_ref uref = { .u32 = 0 }; - int s, ret = 0; + int s, r4 = SOCKET_MAX + 1, r6 = SOCKET_MAX + 1; if (ns) { uref.udp.port = (in_port_t)(port + @@ -999,8 +999,8 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, uref.udp.orig = true; if (!ns) { - s = sock_l4(c, AF_INET, IPPROTO_UDP, addr, ifname, - port, uref.u32); + r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP, addr, + ifname, port, uref.u32); udp_tap_map[V4][uref.udp.port].sock = s < 0 ? -1 : s; udp_splice_init[V4][port].sock = s < 0 ? -1 : s; @@ -1008,13 +1008,10 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, struct in_addr loopback = { htonl(INADDR_LOOPBACK) }; uref.udp.ns = true; - s = sock_l4(c, AF_INET, IPPROTO_UDP, &loopback, - ifname, port, uref.u32); + r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP, &loopback, + ifname, port, uref.u32); udp_splice_ns[V4][port].sock = s < 0 ? -1 : s; } - - if (s < 0) - ret = s; } if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) { @@ -1023,24 +1020,25 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, uref.udp.orig = true; if (!ns) { - s = sock_l4(c, AF_INET6, IPPROTO_UDP, addr, ifname, - port, uref.u32); + r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP, addr, + ifname, port, uref.u32); udp_tap_map[V6][uref.udp.port].sock = s < 0 ? -1 : s; udp_splice_init[V6][port].sock = s < 0 ? -1 : s; } else { uref.udp.ns = true; - s = sock_l4(c, AF_INET6, IPPROTO_UDP, &in6addr_loopback, - ifname, port, uref.u32); + r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP, + &in6addr_loopback, + ifname, port, uref.u32); udp_splice_ns[V6][port].sock = s < 0 ? -1 : s; } - - if (s < 0) - ret = s; } - return ret; + if (IN_INTERVAL(0, SOCKET_MAX, r4) || IN_INTERVAL(0, SOCKET_MAX, r6)) + return 0; + + return r4 < 0 ? r4 : r6;Same comment (unless sock_l4() already looks for} /**-- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
In general, we don't terminate or report failures if we fail to bind to some ports out of a given port range specifier, to allow users to conveniently specify big port ranges (or "all") without having to care about ports that might already be in use. However, running out of the open file descriptors quota is a different story: we can't do what the user requested in a very substantial way. For example, if the user specifies '-t all' and we can only bind 1024 sockets, the behaviour is rather unexpected. Fail whenever socket creation returns -ENFILE or -EMFILE. Link: https://bugs.passt.top/show_bug.cgi?id=27 Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com> --- conf.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/conf.c b/conf.c index 582c391..ee56501 100644 --- a/conf.c +++ b/conf.c @@ -184,6 +184,7 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, bool exclude_only = true, bound_one = false; uint8_t exclude[PORT_BITMAP_SIZE] = { 0 }; sa_family_t af = AF_UNSPEC; + int ret; if (!strcmp(optarg, "none")) { if (fwd->mode) @@ -218,11 +219,18 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, for (i = 0; i < PORT_EPHEMERAL_MIN; i++) { if (optname == 't') { - if (!tcp_sock_init(c, AF_UNSPEC, NULL, NULL, i)) + ret = tcp_sock_init(c, AF_UNSPEC, NULL, NULL, + i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else if (optname == 'u') { - if (!udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, - i)) + ret = udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, + i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } } @@ -303,10 +311,16 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, bitmap_set(fwd->map, i); if (optname == 't') { - if (!tcp_sock_init(c, af, addr, ifname, i)) + ret = tcp_sock_init(c, af, addr, ifname, i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else if (optname == 'u') { - if (!udp_sock_init(c, 0, af, addr, ifname, i)) + ret = udp_sock_init(c, 0, af, addr, ifname, i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else { /* No way to check in advance for -T and -U */ @@ -358,10 +372,16 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, fwd->delta[i] = mapped_range.first - orig_range.first; if (optname == 't') { - if (!tcp_sock_init(c, af, addr, ifname, i)) + ret = tcp_sock_init(c, af, addr, ifname, i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else if (optname == 'u') { - if (!udp_sock_init(c, 0, af, addr, ifname, i)) + ret = udp_sock_init(c, 0, af, addr, ifname, i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else { /* No way to check in advance for -T and -U */ @@ -374,6 +394,8 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, goto bind_fail; return; +enfile: + die("Can't open enough sockets for port specifier: %s", optarg); bad: die("Invalid port specifier %s", optarg); overlap: -- 2.39.2
On Wed, Mar 08, 2023 at 01:33:48PM +0100, Stefano Brivio wrote:In general, we don't terminate or report failures if we fail to bind to some ports out of a given port range specifier, to allow users to conveniently specify big port ranges (or "all") without having to care about ports that might already be in use. However, running out of the open file descriptors quota is a different story: we can't do what the user requested in a very substantial way. For example, if the user specifies '-t all' and we can only bind 1024 sockets, the behaviour is rather unexpected. Fail whenever socket creation returns -ENFILE or -EMFILE. Link: https://bugs.passt.top/show_bug.cgi?id=27 Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com>Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au>--- conf.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/conf.c b/conf.c index 582c391..ee56501 100644 --- a/conf.c +++ b/conf.c @@ -184,6 +184,7 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, bool exclude_only = true, bound_one = false; uint8_t exclude[PORT_BITMAP_SIZE] = { 0 }; sa_family_t af = AF_UNSPEC; + int ret; if (!strcmp(optarg, "none")) { if (fwd->mode) @@ -218,11 +219,18 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, for (i = 0; i < PORT_EPHEMERAL_MIN; i++) { if (optname == 't') { - if (!tcp_sock_init(c, AF_UNSPEC, NULL, NULL, i)) + ret = tcp_sock_init(c, AF_UNSPEC, NULL, NULL, + i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else if (optname == 'u') { - if (!udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, - i)) + ret = udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, + i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } } @@ -303,10 +311,16 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, bitmap_set(fwd->map, i); if (optname == 't') { - if (!tcp_sock_init(c, af, addr, ifname, i)) + ret = tcp_sock_init(c, af, addr, ifname, i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else if (optname == 'u') { - if (!udp_sock_init(c, 0, af, addr, ifname, i)) + ret = udp_sock_init(c, 0, af, addr, ifname, i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else { /* No way to check in advance for -T and -U */ @@ -358,10 +372,16 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, fwd->delta[i] = mapped_range.first - orig_range.first; if (optname == 't') { - if (!tcp_sock_init(c, af, addr, ifname, i)) + ret = tcp_sock_init(c, af, addr, ifname, i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else if (optname == 'u') { - if (!udp_sock_init(c, 0, af, addr, ifname, i)) + ret = udp_sock_init(c, 0, af, addr, ifname, i); + if (ret == -ENFILE || ret == -EMFILE) + goto enfile; + if (!ret) bound_one = true; } else { /* No way to check in advance for -T and -U */ @@ -374,6 +394,8 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg, goto bind_fail; return; +enfile: + die("Can't open enough sockets for port specifier: %s", optarg); bad: die("Invalid port specifier %s", optarg); overlap:-- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson