On Fri, 2 Feb 2024 15:11:41 +0100 Laurent Vivier <lvivier(a)redhat.com> wrote:Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- udp.c | 126 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 73 insertions(+), 53 deletions(-) diff --git a/udp.c b/udp.c index db635742319b..77168fb0a2af 100644 --- a/udp.c +++ b/udp.c @@ -562,47 +562,48 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n, * * Return: size of tap frame with headers */ -static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport, - const struct timespec *now) +static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph, + size_t data_len, struct sockaddr_in *s_in, + in_port_t dstport, const struct timespec *now)Function comment should be updated to reflect the new set of parameters.{ - struct udp4_l2_buf_t *b = &udp4_l2_buf[n]; + struct udphdr *uh = (struct udphdr *)(iph + 1); in_port_t src_port; size_t ip_len; - ip_len = udp4_l2_mh_sock[n].msg_len + sizeof(b->iph) + sizeof(b->uh); + ip_len = data_len + sizeof(struct iphdr) + sizeof(struct udphdr);ip_len takes into account the size of iph and uh, both local, so for consistency with the rest of the codebase, + sizeof(*iph) + sizeof(*uh).- b->iph.tot_len = htons(ip_len); - b->iph.daddr = c->ip4.addr_seen.s_addr; + iph->tot_len = htons(ip_len); + iph->daddr = c->ip4.addr_seen.s_addr; - src_port = ntohs(b->s_in.sin_port); + src_port = ntohs(s_in->sin_port); if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) && - IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr, &c->ip4.dns_host) && + IN4_ARE_ADDR_EQUAL(&s_in->sin_addr, &c->ip4.dns_host) && src_port == 53) { - b->iph.saddr = c->ip4.dns_match.s_addr; - } else if (IN4_IS_ADDR_LOOPBACK(&b->s_in.sin_addr) || - IN4_IS_ADDR_UNSPECIFIED(&b->s_in.sin_addr)|| - IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr, &c->ip4.addr_seen)) { - b->iph.saddr = c->ip4.gw.s_addr; + iph->saddr = c->ip4.dns_match.s_addr; + } else if (IN4_IS_ADDR_LOOPBACK(&s_in->sin_addr) || + IN4_IS_ADDR_UNSPECIFIED(&s_in->sin_addr)|| + IN4_ARE_ADDR_EQUAL(&s_in->sin_addr, &c->ip4.addr_seen)) { + iph->saddr = c->ip4.gw.s_addr; udp_tap_map[V4][src_port].ts = now->tv_sec; udp_tap_map[V4][src_port].flags |= PORT_LOCAL; - if (IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr.s_addr, &c->ip4.addr_seen)) + if (IN4_ARE_ADDR_EQUAL(&s_in->sin_addr.s_addr, &c->ip4.addr_seen)) udp_tap_map[V4][src_port].flags &= ~PORT_LOOPBACK; else udp_tap_map[V4][src_port].flags |= PORT_LOOPBACK; bitmap_set(udp_act[V4][UDP_ACT_TAP], src_port); } else { - b->iph.saddr = b->s_in.sin_addr.s_addr; + iph->saddr = s_in->sin_addr.s_addr; } - b->iph.check = ipv4_hdr_checksum(&b->iph, IPPROTO_UDP); - b->uh.source = b->s_in.sin_port; - b->uh.dest = htons(dstport); - b->uh.len = htons(udp4_l2_mh_sock[n].msg_len + sizeof(b->uh)); + iph->check = ipv4_hdr_checksum(iph, IPPROTO_UDP); + uh->source = s_in->sin_port; + uh->dest = htons(dstport); + uh->len= htons(data_len + sizeof(struct udphdr));Missing whitespace.- return tap_iov_len(c, &b->taph, ip_len); + return ip_len; } /** @@ -614,38 +615,39 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport, * * Return: size of tap frame with headers */ -static size_t udp_update_hdr6(const struct ctx *c, int n, in_port_t dstport, - const struct timespec *now) +static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h, + size_t data_len, struct sockaddr_in6 *s_in6, + in_port_t dstport, const struct timespec *now)Same here, function comment should be updated.{ - struct udp6_l2_buf_t *b = &udp6_l2_buf[n]; + struct udphdr *uh = (struct udphdr *)(ip6h + 1); struct in6_addr *src; in_port_t src_port; size_t ip_len; - src = &b->s_in6.sin6_addr; - src_port = ntohs(b->s_in6.sin6_port); + src = &s_in6->sin6_addr; + src_port = ntohs(s_in6->sin6_port); - ip_len = udp6_l2_mh_sock[n].msg_len + sizeof(b->ip6h) + sizeof(b->uh); + ip_len = data_len + sizeof(struct ipv6hdr) + sizeof(struct udphdr); - b->ip6h.payload_len = htons(udp6_l2_mh_sock[n].msg_len + sizeof(b->uh)); + ip6h->payload_len = htons(data_len + sizeof(struct udphdr)); if (IN6_IS_ADDR_LINKLOCAL(src)) { - b->ip6h.daddr = c->ip6.addr_ll_seen; - b->ip6h.saddr = b->s_in6.sin6_addr; + ip6h->daddr = c->ip6.addr_ll_seen; + ip6h->saddr = s_in6->sin6_addr; } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_match) && IN6_ARE_ADDR_EQUAL(src, &c->ip6.dns_host) && src_port == 53) { - b->ip6h.daddr = c->ip6.addr_seen; - b->ip6h.saddr = c->ip6.dns_match; + ip6h->daddr = c->ip6.addr_seen; + ip6h->saddr = c->ip6.dns_match; } else if (IN6_IS_ADDR_LOOPBACK(src) || IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr_seen) || IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) { - b->ip6h.daddr = c->ip6.addr_ll_seen; + ip6h->daddr = c->ip6.addr_ll_seen; if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw)) - b->ip6h.saddr = c->ip6.gw; + ip6h->saddr = c->ip6.gw; else - b->ip6h.saddr = c->ip6.addr_ll; + ip6h->saddr = c->ip6.addr_ll; udp_tap_map[V6][src_port].ts = now->tv_sec; udp_tap_map[V6][src_port].flags |= PORT_LOCAL; @@ -662,20 +664,20 @@ static size_t udp_update_hdr6(const struct ctx *c, int n, in_port_t dstport, bitmap_set(udp_act[V6][UDP_ACT_TAP], src_port); } else { - b->ip6h.daddr = c->ip6.addr_seen; - b->ip6h.saddr = b->s_in6.sin6_addr; + ip6h->daddr = c->ip6.addr_seen; + ip6h->saddr = s_in6->sin6_addr; } - b->uh.source = b->s_in6.sin6_port; - b->uh.dest = htons(dstport); - b->uh.len = b->ip6h.payload_len; - b->uh.check = csum(&b->uh, ntohs(b->ip6h.payload_len), - proto_ipv6_header_checksum(&b->ip6h, IPPROTO_UDP)); - b->ip6h.version = 6; - b->ip6h.nexthdr = IPPROTO_UDP; - b->ip6h.hop_limit = 255; + uh->source = s_in6->sin6_port; + uh->dest = htons(dstport); + uh->len = ip6h->payload_len; + uh->check = csum(uh, ntohs(ip6h->payload_len), + proto_ipv6_header_checksum(ip6h, IPPROTO_UDP)); + ip6h->version = 6; + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 255; - return tap_iov_len(c, &b->taph, ip_len); + return ip_len; } /** @@ -689,6 +691,11 @@ static size_t udp_update_hdr6(const struct ctx *c, int n, in_port_t dstport, * * Return: size of tap frame with headers */ +#pragma GCC diagnostic push +/* ignore unaligned pointer value warning for &udp6_l2_buf[i].ip6h and + * &udp4_l2_buf[i].iph...this is the reason why I originally wrote these functions this way: for AVX2 builds, we align ip6h because we need to calculate the checksum (the UDP checksum for IPv6 needs a version of the IPv6 header as pseudo-header). But for non-AVX2 builds, and for IPv4, we don't need any alignment other than 4-bytes alignment of the start (s_in / s_in6). If you pass iph or ip6h as arguments and dereference them, then they need to be aligned (4-bytes) to be safe on all the architectures we might reasonably be running on. Passing &udp6_l2_buf[n] and dereferencing only that should work (gcc checks are rather reliable in my experience, you don't have to go and test this on all the possible architectures). Would that work for you? Otherwise, we can pad as we do for AVX2 builds, but we'll waste bytes. In this sense, the existing version is not ideal either: $ CFLAGS="-g" make $ pahole passt | less [...] struct udp4_l2_buf_t { struct sockaddr_in s_in; /* 0 16 */ struct tap_hdr taph; /* 16 18 */ struct iphdr iph; /* 34 20 */ struct udphdr uh; /* 54 8 */ uint8_t data[65507]; /* 62 65507 */ /* size: 65572, cachelines: 1025, members: 5 */ /* padding: 3 */ /* last cacheline: 36 bytes */ } __attribute__((__aligned__(4))); while in general we try to keep those structures below or at exactly 64 KiB. If we make 'data' smaller, though, we might truncate messages. But at the same time, we need to keep struct sockaddr_in (or bigger) and struct tap_hdr in here, plus padding for AVX2 builds. Given that ~64 KiB messages are not common in practice, I wonder if we could keep parallel arrays with sets of those few excess bytes we need, using them as second items of iovec arrays, which will almost never be used. Anyway, this is beyond the scope of this patch. About this change itself, I guess passing (and dereferencing) the head of the buffer, or aligning iph / ipv6h should be enough.+ */ +#pragma GCC diagnostic ignored "-Waddress-of-packed-member" static void udp_tap_send(const struct ctx *c, unsigned int start, unsigned int n, in_port_t dstport, bool v6, const struct timespec *now) @@ -702,18 +709,31 @@ static void udp_tap_send(const struct ctx *c, tap_iov = udp4_l2_iov_tap; for (i = start; i < start + n; i++) { - size_t buf_len; - - if (v6) - buf_len = udp_update_hdr6(c, i, dstport, now); - else - buf_len = udp_update_hdr4(c, i, dstport, now); - - tap_iov[i].iov_len = buf_len; + size_t ip_len; + + if (v6) { + ip_len = udp_update_hdr6(c, &udp6_l2_buf[i].ip6h, + udp6_l2_mh_sock[i].msg_len, + &udp6_l2_buf[i].s_in6, dstport, + now); + tap_iov[i].iov_len = tap_iov_len(c, + &udp6_l2_buf[i].taph, + ip_len); + } else { + ip_len = udp_update_hdr4(c, &udp4_l2_buf[i].iph, + udp4_l2_mh_sock[i].msg_len, + &udp4_l2_buf[i].s_in, + dstport, now); + + tap_iov[i].iov_len = tap_iov_len(c, + &udp4_l2_buf[i].taph, + ip_len); + } } tap_send_frames(c, tap_iov + start, n); } +#pragma GCC diagnostic pop /** * udp_sock_handler() - Handle new data from socket-- Stefano