On Wed, Feb 19, 2025 at 02:30:07PM -0500, Jon Maloy wrote:When a local peer sends a UDP message to a non-existing port on an existing remote host, that host will return an ICMP message containing the error code ICMP_PORT_UNREACH, plus the header and the first eight bytes of the original message. If the sender socket has been connected, it uses this message to issue a "Connection Refused" event to the user. Until now, we have only read such events from the externally facing socket, but we don't forward them back to the local sender because we cannot read the ICMP message directly to user space. Because of this, the local peer will hang and wait for a response that never arrives. We now fix this for IPv4 by recreating and forwarding a correct ICMP message back to the internal sender. We synthesize the message based on the information in the extended error structure, plus the returned part of the original message body. Note that for the sake of completeness, we even produce ICMP messages for other error codes. We have noticed that at least ICMP_PROT_UNREACH is propagated as an error event back to the user. Signed-off-by: Jon Maloy <jmaloy(a)redhat.com>Almost there... [snip]ee = (const struct sock_extended_err *)CMSG_DATA(hdr); - - /* TODO: When possible propagate and otherwise handle errors */ + if (ee->ee_type == ICMP_DEST_UNREACH) { + flow_sidx_t sidx; + struct udp_flow *flow; + const struct flowside *toside; + + if (ref.type == EPOLL_TYPE_UDP_LISTEN) { + sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, + &saddr, ref.udp.port); + flow = udp_at_sidx(sidx); + if (!flow) { + err("Unexpected cmsg reading error queue"); + return -1;This is too strong a response if you can't find a flow. This will happen if we get some random ICMP which doesn't match an existing flow. It could be a long delayed ICMP from some flow that already expired, some peer confused by something, or a malicious actor. This will log a hard error (without rate limiting) and break scanning for any additional errors. I think we want just a trace() and return 1; here (we did receive and process an error - but all we could do was drop it).+ } + flow->ts = now->tv_sec; + sidx = flow_sidx_opposite(sidx); + } else { + sidx = flow_sidx_opposite(ref.flowside); + } + toside = flowside_at_sidx(sidx); + udp_send_conn_fail_icmp4(c, ee, toside, udp_data, rc); + } debug("%s error on UDP socket %i: %s", str_ee_origin(ee), s, strerror_(ee->ee_errno)); @@ -461,15 +528,18 @@ static int udp_sock_recverr(int s) /** * udp_sock_errs() - Process errors on a socket * @c: Execution context - * @s: Socket to receive from + * @ref: epoll reference * @events: epoll events bitmap + * @now: Current timestamp * * Return: Number of errors handled, or < 0 if we have an unrecoverable error */ -int udp_sock_errs(const struct ctx *c, int s, uint32_t events) +int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events, + const struct timespec *now) { unsigned n_err = 0; socklen_t errlen; + int s = ref.fd; int rc, err; ASSERT(!c->no_udp); @@ -478,7 +548,7 @@ int udp_sock_errs(const struct ctx *c, int s, uint32_t events) return 0; /* Nothing to do */ /* Empty the error queue */ - while ((rc = udp_sock_recverr(s)) > 0) + while ((rc = udp_sock_recverr(c, ref, now)) > 0) n_err += rc; if (rc < 0) @@ -558,7 +628,7 @@ static void udp_buf_listen_sock_handler(const struct ctx *c, const socklen_t sasize = sizeof(udp_meta[0].s_in); int n, i; - if (udp_sock_errs(c, ref.fd, events) < 0) { + if (udp_sock_errs(c, ref, events, now) < 0) { err("UDP: Unrecoverable error on listening socket:" " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port); /* FIXME: what now? close/re-open socket? */ @@ -661,7 +731,7 @@ static void udp_buf_reply_sock_handler(const struct ctx *c, union epoll_ref ref, from_s = uflow->s[ref.flowside.sidei]; - if (udp_sock_errs(c, from_s, events) < 0) { + if (udp_sock_errs(c, ref, events, now) < 0) { flow_err(uflow, "Unrecoverable error on reply socket"); flow_err_details(uflow); udp_flow_close(c, uflow); diff --git a/udp_internal.h b/udp_internal.h index cc80e30..c5f8304 100644 --- a/udp_internal.h +++ b/udp_internal.h @@ -30,5 +30,6 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, const struct flowside *toside, size_t dlen, bool no_udp_csum); -int udp_sock_errs(const struct ctx *c, int s, uint32_t events); +int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events, + const struct timespec *now); #endif /* UDP_INTERNAL_H */ diff --git a/udp_vu.c b/udp_vu.c index 4123510..0b1d3c6 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -227,7 +227,7 @@ void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref, struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; int i; - if (udp_sock_errs(c, ref.fd, events) < 0) { + if (udp_sock_errs(c, ref, events, now) < 0) { err("UDP: Unrecoverable error on listening socket:" " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port); return; @@ -302,7 +302,7 @@ void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref, ASSERT(!c->no_udp); - if (udp_sock_errs(c, from_s, events) < 0) { + if (udp_sock_errs(c, ref, events, now) < 0) { flow_err(uflow, "Unrecoverable error on reply socket"); flow_err_details(uflow); udp_flow_close(c, uflow);-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson