We need rudimentary "connection" tracking for UDP because we do have a many-to-one mapping for our few NAT cases. This is handled by the port flags. We can make some simplfiications to this code for clarity. David Gibson (5): udp: Don't attempt to translate a 0.0.0.0 source address udp: Small streamline to udp_update_hdr4() udp: Implement IPv6 PORT_GUA logic for IPv4 as well udp: Clarify connection tracking flags udp: Remove PORT_ADDR_SEEN "connection" tracking mode udp.c | 69 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 33 deletions(-) -- 2.40.1
If an incoming packet has a source address of 0.0.0.0 we translate that to the gateway address. This doesn't really make sense, because we have no way to do a reverse translation for reply packets. Certain UDP protocols do use an unspecified source address in some circumstances (e.g. DHCP). These generally either require no reply, a multicast reply, or provides a suitable reply address by other means. In none of those cases does translating it in passt/pasta make sense. The best we can really do here is just leave it as is. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/udp.c b/udp.c index 39c59d4..78cb221 100644 --- a/udp.c +++ b/udp.c @@ -595,7 +595,6 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport, src_port == 53) { b->iph.saddr = c->ip4.dns_match.s_addr; } else if (IN4_IS_ADDR_LOOPBACK(&b->s_in.sin_addr) || - IN4_IS_ADDR_UNSPECIFIED(&b->s_in.sin_addr)|| IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr, &c->ip4.addr_seen)) { b->iph.saddr = c->ip4.gw.s_addr; udp_tap_map[V4][src_port].ts = now->tv_sec; -- 2.40.1
Streamline the logic here slightly, by introducing a 'src' temporary for brevity. We also transform the logic for setting/clearing PORT_LOOPBACK. This makes udp_update_hdr4() more closely match the corresponding logic from udp_update_udp6(). Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/udp.c b/udp.c index 78cb221..d7e1020 100644 --- a/udp.c +++ b/udp.c @@ -581,6 +581,7 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport, const struct timespec *now) { struct udp4_l2_buf_t *b = &udp4_l2_buf[n]; + struct in_addr *src; in_port_t src_port; size_t ip_len; @@ -588,26 +589,26 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport, b->iph.tot_len = htons(ip_len); + src = &b->s_in.sin_addr; src_port = ntohs(b->s_in.sin_port); if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) && - IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr, &c->ip4.dns_host) && - src_port == 53) { + IN4_ARE_ADDR_EQUAL(src, &c->ip4.dns_host) && src_port == 53) { b->iph.saddr = c->ip4.dns_match.s_addr; - } else if (IN4_IS_ADDR_LOOPBACK(&b->s_in.sin_addr) || - IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr, &c->ip4.addr_seen)) { + } else if (IN4_IS_ADDR_LOOPBACK(src) || + IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr_seen)) { b->iph.saddr = c->ip4.gw.s_addr; udp_tap_map[V4][src_port].ts = now->tv_sec; udp_tap_map[V4][src_port].flags |= PORT_LOCAL; - if (IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr.s_addr, &c->ip4.addr_seen)) - udp_tap_map[V4][src_port].flags &= ~PORT_LOOPBACK; - else + if (IN4_IS_ADDR_LOOPBACK(src)) udp_tap_map[V4][src_port].flags |= PORT_LOOPBACK; + else + udp_tap_map[V4][src_port].flags &= ~PORT_LOOPBACK; bitmap_set(udp_act[V4][UDP_ACT_TAP], src_port); } else { - b->iph.saddr = b->s_in.sin_addr.s_addr; + b->iph.saddr = src->s_addr; } udp_update_check4(b); -- 2.40.1
For IPv6 UDP, the PORT_GUA flag is set for a port when we get a "connection" from ip6.addr, that is from the host's global address. An exactly analogous situation is possible for IPv4, but we don't handle it the same way. In practice it will only show up if addr_seen is different from addr, which is unusual. Nonetheless we should handle this the same way for IPv4 and IPv6. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/udp.c b/udp.c index d7e1020..950d5a9 100644 --- a/udp.c +++ b/udp.c @@ -596,7 +596,8 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport, IN4_ARE_ADDR_EQUAL(src, &c->ip4.dns_host) && src_port == 53) { b->iph.saddr = c->ip4.dns_match.s_addr; } else if (IN4_IS_ADDR_LOOPBACK(src) || - IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr_seen)) { + IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr_seen) || + IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr)) { b->iph.saddr = c->ip4.gw.s_addr; udp_tap_map[V4][src_port].ts = now->tv_sec; udp_tap_map[V4][src_port].flags |= PORT_LOCAL; @@ -606,6 +607,11 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport, else udp_tap_map[V4][src_port].flags &= ~PORT_LOOPBACK; + if (IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr)) + udp_tap_map[V4][src_port].flags |= PORT_GUA; + else + udp_tap_map[V4][src_port].flags &= ~PORT_GUA; + bitmap_set(udp_act[V4][UDP_ACT_TAP], src_port); } else { b->iph.saddr = src->s_addr; @@ -852,6 +858,8 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, if (!(udp_tap_map[V4][dst].flags & PORT_LOCAL) || (udp_tap_map[V4][dst].flags & PORT_LOOPBACK)) s_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + else if (udp_tap_map[V4][dst].flags & PORT_GUA) + s_in.sin_addr = c->ip4.addr; else s_in.sin_addr = c->ip4.addr_seen; } -- 2.40.1
The PORT_LOCAL, PORT_LOOPBACK and PORT_GUA flags in udp_tap_port are a rudimentary form of "connection" tracking for UDP. We need this because there are several remote source addresses that we have to translate to the gateway address when we present packets to the guest. We need these flags to work out which address to translate that gateway address back to for reply packets. The flags are confusing, though, it's not really clear what each one means individually, and LOCAL and LOOPBACK are only ever tested in combination. Really, it all boils down to just 3 options: - Packets from addr_seen got tanslated - Packets from (host side) loopback got translated - Packets from addr got translated Replace the flags with an enum explicitly giving those options. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 67 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/udp.c b/udp.c index 950d5a9..3c78fca 100644 --- a/udp.c +++ b/udp.c @@ -120,6 +120,18 @@ #define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ #define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */ +/** + * enum udp_port_remote - Original remote address of UDP "connection" to a port + * @PORT_LOOPBACK - Original remote address was (host side) loopback + * @PORT_ADDR_SEEN - Original remote address was the same as the guest is using + * @PORT_ADDR - Original remote address was guest assigned address + */ +enum udp_port_remote { + PORT_LOOPBACK = 0, + PORT_ADDR_SEEN = 1, + PORT_ADDR = 2, +}; + /** * struct udp_tap_port - Port tracking based on tap-facing source port * @sock: Socket bound to source port used as index @@ -128,10 +140,7 @@ */ struct udp_tap_port { int sock; - uint8_t flags; -#define PORT_LOCAL BIT(0) -#define PORT_LOOPBACK BIT(1) -#define PORT_GUA BIT(2) + enum udp_port_remote remote; time_t ts; }; @@ -600,17 +609,13 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport, IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr)) { b->iph.saddr = c->ip4.gw.s_addr; udp_tap_map[V4][src_port].ts = now->tv_sec; - udp_tap_map[V4][src_port].flags |= PORT_LOCAL; if (IN4_IS_ADDR_LOOPBACK(src)) - udp_tap_map[V4][src_port].flags |= PORT_LOOPBACK; - else - udp_tap_map[V4][src_port].flags &= ~PORT_LOOPBACK; - - if (IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr)) - udp_tap_map[V4][src_port].flags |= PORT_GUA; + udp_tap_map[V4][src_port].remote = PORT_LOOPBACK; + else if (IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr)) + udp_tap_map[V4][src_port].remote = PORT_ADDR; else - udp_tap_map[V4][src_port].flags &= ~PORT_GUA; + udp_tap_map[V4][src_port].remote = PORT_ADDR_SEEN; bitmap_set(udp_act[V4][UDP_ACT_TAP], src_port); } else { @@ -668,17 +673,13 @@ static size_t udp_update_hdr6(const struct ctx *c, int n, in_port_t dstport, b->ip6h.saddr = c->ip6.addr_ll; udp_tap_map[V6][src_port].ts = now->tv_sec; - udp_tap_map[V6][src_port].flags |= PORT_LOCAL; if (IN6_IS_ADDR_LOOPBACK(src)) - udp_tap_map[V6][src_port].flags |= PORT_LOOPBACK; - else - udp_tap_map[V6][src_port].flags &= ~PORT_LOOPBACK; - - if (IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) - udp_tap_map[V6][src_port].flags |= PORT_GUA; + udp_tap_map[V6][src_port].remote = PORT_LOOPBACK; + else if (IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) + udp_tap_map[V6][src_port].remote = PORT_ADDR; else - udp_tap_map[V6][src_port].flags &= ~PORT_GUA; + udp_tap_map[V6][src_port].remote = PORT_ADDR_SEEN; bitmap_set(udp_act[V6][UDP_ACT_TAP], src_port); } else { @@ -855,13 +856,17 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, s_in.sin_addr = c->ip4.dns_host; } else if (IN4_ARE_ADDR_EQUAL(&s_in.sin_addr, &c->ip4.gw) && !c->no_map_gw) { - if (!(udp_tap_map[V4][dst].flags & PORT_LOCAL) || - (udp_tap_map[V4][dst].flags & PORT_LOOPBACK)) + switch (udp_tap_map[V4][dst].remote) { + case PORT_LOOPBACK: s_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - else if (udp_tap_map[V4][dst].flags & PORT_GUA) + break; + case PORT_ADDR: s_in.sin_addr = c->ip4.addr; - else + break; + case PORT_ADDR_SEEN: s_in.sin_addr = c->ip4.addr_seen; + break; + } } if (!(s = udp_tap_map[V4][src].sock)) { @@ -903,13 +908,17 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, s_in6.sin6_addr = c->ip6.dns_host; } else if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.gw) && !c->no_map_gw) { - if (!(udp_tap_map[V6][dst].flags & PORT_LOCAL) || - (udp_tap_map[V6][dst].flags & PORT_LOOPBACK)) + switch (udp_tap_map[V6][dst].remote) { + case PORT_LOOPBACK: s_in6.sin6_addr = in6addr_loopback; - else if (udp_tap_map[V6][dst].flags & PORT_GUA) + break; + case PORT_ADDR: s_in6.sin6_addr = c->ip6.addr; - else + break; + case PORT_ADDR_SEEN: s_in6.sin6_addr = c->ip6.addr_seen; + break; + } } else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) { bind_addr = &c->ip6.addr_ll; } @@ -1162,7 +1171,7 @@ static void udp_timer_one(struct ctx *c, int v6, enum udp_act_type type, if (ts->tv_sec - tp->ts > UDP_CONN_TIMEOUT) { s = tp->sock; - tp->flags = 0; + tp->remote = PORT_LOOPBACK; } break; -- 2.40.1
The mode of UDP NAT represented by PORT_ADDR_SEEN isn't actually useful. In most cases addr_seen and addr will be the same, in which case it's just redundant with PORT_ADDR. If they are different, that means the guest is using an address different from the one it's been assigned. The natural consequence of doing that is that you can't communicate with some other host which is using the address you squatted. We don't need to shield the guest from the consequences of shooting itself in the foot this way. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/udp.c b/udp.c index 3c78fca..2d05584 100644 --- a/udp.c +++ b/udp.c @@ -123,13 +123,11 @@ /** * enum udp_port_remote - Original remote address of UDP "connection" to a port * @PORT_LOOPBACK - Original remote address was (host side) loopback - * @PORT_ADDR_SEEN - Original remote address was the same as the guest is using - * @PORT_ADDR - Original remote address was guest assigned address + * @PORT_ADDR - Original remote address was host address shared with guest */ enum udp_port_remote { PORT_LOOPBACK = 0, - PORT_ADDR_SEEN = 1, - PORT_ADDR = 2, + PORT_ADDR = 1, }; /** @@ -605,17 +603,14 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport, IN4_ARE_ADDR_EQUAL(src, &c->ip4.dns_host) && src_port == 53) { b->iph.saddr = c->ip4.dns_match.s_addr; } else if (IN4_IS_ADDR_LOOPBACK(src) || - IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr_seen) || IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr)) { b->iph.saddr = c->ip4.gw.s_addr; udp_tap_map[V4][src_port].ts = now->tv_sec; if (IN4_IS_ADDR_LOOPBACK(src)) udp_tap_map[V4][src_port].remote = PORT_LOOPBACK; - else if (IN4_ARE_ADDR_EQUAL(src, &c->ip4.addr)) - udp_tap_map[V4][src_port].remote = PORT_ADDR; else - udp_tap_map[V4][src_port].remote = PORT_ADDR_SEEN; + udp_tap_map[V4][src_port].remote = PORT_ADDR; bitmap_set(udp_act[V4][UDP_ACT_TAP], src_port); } else { @@ -663,7 +658,6 @@ static size_t udp_update_hdr6(const struct ctx *c, int n, in_port_t dstport, b->ip6h.daddr = c->ip6.addr_seen; b->ip6h.saddr = c->ip6.dns_match; } else if (IN6_IS_ADDR_LOOPBACK(src) || - IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr_seen) || IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) { b->ip6h.daddr = c->ip6.addr_ll_seen; @@ -676,10 +670,8 @@ static size_t udp_update_hdr6(const struct ctx *c, int n, in_port_t dstport, if (IN6_IS_ADDR_LOOPBACK(src)) udp_tap_map[V6][src_port].remote = PORT_LOOPBACK; - else if (IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) - udp_tap_map[V6][src_port].remote = PORT_ADDR; else - udp_tap_map[V6][src_port].remote = PORT_ADDR_SEEN; + udp_tap_map[V6][src_port].remote = PORT_ADDR; bitmap_set(udp_act[V6][UDP_ACT_TAP], src_port); } else { @@ -863,9 +855,6 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, case PORT_ADDR: s_in.sin_addr = c->ip4.addr; break; - case PORT_ADDR_SEEN: - s_in.sin_addr = c->ip4.addr_seen; - break; } } @@ -915,9 +904,6 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, case PORT_ADDR: s_in6.sin6_addr = c->ip6.addr; break; - case PORT_ADDR_SEEN: - s_in6.sin6_addr = c->ip6.addr_seen; - break; } } else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) { bind_addr = &c->ip6.addr_ll; -- 2.40.1
On Wed, May 17, 2023 at 03:05:24PM +1000, David Gibson wrote:We need rudimentary "connection" tracking for UDP because we do have a many-to-one mapping for our few NAT cases. This is handled by the port flags. We can make some simplfiications to this code for clarity.I'm planning to subsume these patches in a broader reaching NAT cleanup, so no need to pay too much attention to them. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson