On Sat, Mar 15, 2025 at 11:32:44AM -0400, Jon Maloy wrote:While developing traceroute forwarding tap-to-sock we found that struct msghdr.msg_name for the ICMPs in the opposite direction always contains the destination address of the original UDP message, and not, as one might expect, the one of the host which created the error message. Study of the kernel code reveals that this address instead is appended as extra data after the received struct sock_extended_err area. We now change the ICMP receive code accordingly. Fixes: 55431f0077b6 ("udp: create and send ICMPv4 to local peer when applicable") Fixes: 68b04182e07d ("udp: create and send ICMPv6 to local peer when applicable") Signed-off-by: Jon Maloy <jmaloy(a)redhat.com>The idea looks good, a few minor warts in the implementation.--- udp.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/udp.c b/udp.c index 80520cb..271e570 100644 --- a/udp.c +++ b/udp.c @@ -510,10 +510,13 @@ static void udp_send_conn_fail_icmp6(const struct ctx *c, */ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref) { - const struct sock_extended_err *ee; + struct errhdr { + struct sock_extended_err ee; + union sockaddr_inany saddr; + }; + const struct errhdr *eh; const struct cmsghdr *hdr; - union sockaddr_inany saddr; - char buf[CMSG_SPACE(sizeof(*ee))]; + char buf[CMSG_SPACE(sizeof(struct errhdr))]; char data[ICMP6_MAX_DLEN]; int s = ref.fd; struct iovec iov = { @@ -521,11 +524,11 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref) .iov_len = sizeof(data) }; struct msghdr mh = { - .msg_name = &saddr, - .msg_namelen = sizeof(saddr), + .msg_name = 0, + .msg_namelen = 0,I think you can just omit these and they'll be zero initialized..msg_iov = &iov, .msg_iovlen = 1, - .msg_control = buf, + .msg_control = buf,//(void *)&errhdr,Looks like you have a leftover comment here..msg_controllen = sizeof(buf), }; ssize_t rc; @@ -553,7 +556,7 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref) return -1; } - ee = (const struct sock_extended_err *)CMSG_DATA(hdr); + eh = (const struct errhdr *)CMSG_DATA(hdr); if (ref.type == EPOLL_TYPE_UDP_REPLY) { flow_sidx_t sidx = flow_sidx_opposite(ref.flowside); const struct flowside *toside = flowside_at_sidx(sidx); @@ -561,18 +564,19 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref) if (hdr->cmsg_level == IPPROTO_IP) { dlen = MIN(dlen, ICMP4_MAX_DLEN); - udp_send_conn_fail_icmp4(c, ee, toside, saddr.sa4.sin_addr, + udp_send_conn_fail_icmp4(c, &eh->ee, toside, + eh->saddr.sa4.sin_addr, data, dlen);So, it looks from man ip(7) that you're supposed to use the SO_EE_OFFENDER() macro to get the socket address, rather than just assuming it's immediately after the struct sock_extended_err. Except... without assuming that, I can't really see how you're supposed to properly size the buffer} else if (hdr->cmsg_level == IPPROTO_IPV6) { - udp_send_conn_fail_icmp6(c, ee, toside, - &saddr.sa6.sin6_addr, + udp_send_conn_fail_icmp6(c, &eh->ee, toside, + &eh->saddr.sa6.sin6_addr, data, dlen, sidx.flowi); } } else { trace("Ignoring received IP_RECVERR cmsg on listener socket"); } debug("%s error on UDP socket %i: %s", - str_ee_origin(ee), s, strerror_(ee->ee_errno)); + str_ee_origin(&eh->ee), s, strerror_(eh->ee.ee_errno)); return 1; }-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson