Here are fixes for a number of bugs I encountered while working toward configurable host mapping. There are more coming, but I'm still working on getting them ready. David Gibson (3): Correct inaccurate comments on ip[46]_ctx::addr conf: Delay handling -D option until after addresses are configured fwd: Rework inconsistencies in translation of inbound flows conf.c | 84 +++++++++++++++++++++++++++++------------------------- fwd.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++-------- passt.h | 4 +-- 3 files changed, 124 insertions(+), 52 deletions(-) -- 2.46.0
These fields are described as being an address for an external, routable interface. That's not necessarily the case when using -a. But, more importantly, saying where the value comes from is not as useful as what it's used for. The real purpose of this field is as the address which we assign to the guest via DHCP or --config-net. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- passt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/passt.h b/passt.h index 12ae1b97..ef684037 100644 --- a/passt.h +++ b/passt.h @@ -91,7 +91,7 @@ enum passt_modes { /** * struct ip4_ctx - IPv4 execution context - * @addr: IPv4 address for external, routable interface + * @addr: IPv4 address assigned to guest * @addr_seen: Latest IPv4 address seen as source from tap * @prefixlen: IPv4 prefix length (netmask) * @gw: Default IPv4 gateway @@ -121,7 +121,7 @@ struct ip4_ctx { /** * struct ip6_ctx - IPv6 execution context - * @addr: IPv6 address for external, routable interface + * @addr: IPv6 address assigned to guest * @addr_ll: Link-local IPv6 address on external, routable interface * @addr_seen: Latest IPv6 global/site address seen as source from tap * @addr_ll_seen: Latest IPv6 link-local address seen as source from tap -- 2.46.0
add_dns[46]() rely on the gateway address and c->no_map_gw being already initialised, in order to properly handle DNS servers which need NAT to be accessed from the guest. Usually these are called from get_dns() which is well after the addresses are configured, so that's fine. However, they can also be called earlier if an explicit -D command line option is given. In this case no_map_gw and/or c->ip[46].gw may not get be initialised properly, leading to this doing the wrong thing. Luckily we already have a second pass of option parsing for things which need addresses to already be configured. Move handling of -D to there. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- conf.c | 84 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/conf.c b/conf.c index a79e7a66..76d37da0 100644 --- a/conf.c +++ b/conf.c @@ -1235,10 +1235,10 @@ void conf(struct ctx *c, int argc, char **argv) const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt"; char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 }; bool copy_addrs_opt = false, copy_routes_opt = false; - struct in6_addr *dns6 = c->ip6.dns, dns6_tmp; - struct in_addr *dns4 = c->ip4.dns, dns4_tmp; enum fwd_ports_mode fwd_default = FWD_NONE; bool v4_only = false, v6_only = false; + struct in6_addr *dns6 = c->ip6.dns; + struct in_addr *dns4 = c->ip4.dns; struct fqdn *dnss = c->dns_search; unsigned int ifi4 = 0, ifi6 = 0; const char *logfile = NULL; @@ -1545,40 +1545,6 @@ void conf(struct ctx *c, int argc, char **argv) die("Invalid or redundant outbound address: %s", optarg); break; - case 'D': - if (!strcmp(optarg, "none")) { - c->no_dns = 1; - - dns4 = &c->ip4.dns[0]; - memset(c->ip4.dns, 0, sizeof(c->ip4.dns)); - c->ip4.dns[0] = (struct in_addr){ 0 }; - c->ip4.dns_match = (struct in_addr){ 0 }; - c->ip4.dns_host = (struct in_addr){ 0 }; - - dns6 = &c->ip6.dns[0]; - memset(c->ip6.dns, 0, sizeof(c->ip6.dns)); - c->ip6.dns_match = (struct in6_addr){ 0 }; - c->ip6.dns_host = (struct in6_addr){ 0 }; - - break; - } - - c->no_dns = 0; - - if (dns4 - &c->ip4.dns[0] < ARRAY_SIZE(c->ip4.dns) && - inet_pton(AF_INET, optarg, &dns4_tmp)) { - add_dns4(c, &dns4_tmp, &dns4); - break; - } - - if (dns6 - &c->ip6.dns[0] < ARRAY_SIZE(c->ip6.dns) && - inet_pton(AF_INET6, optarg, &dns6_tmp)) { - add_dns6(c, &dns6_tmp, &dns6); - break; - } - - die("Cannot use DNS address %s", optarg); - break; case 'S': if (!strcmp(optarg, "none")) { c->no_dns_search = 1; @@ -1620,6 +1586,7 @@ void conf(struct ctx *c, int argc, char **argv) case 'u': case 'T': case 'U': + case 'D': /* Handle these later, once addresses are configured */ break; case 'h': @@ -1677,16 +1644,55 @@ void conf(struct ctx *c, int argc, char **argv) if (c->ifi6 && IN6_IS_ADDR_UNSPECIFIED(&c->ip6.gw)) c->no_map_gw = 1; - /* Inbound port options can be parsed now (after IPv4/IPv6 settings) */ + /* Inbound port options & DNS can be parsed now (after IPv4/IPv6 + * settings) + */ udp_portmap_clear(); optind = 1; do { name = getopt_long(argc, argv, optstring, options, NULL); - if (name == 't') + if (name == 't') { conf_ports(c, name, optarg, &c->tcp.fwd_in); - else if (name == 'u') + } else if (name == 'u') { conf_ports(c, name, optarg, &c->udp.fwd_in); + } else if (name == 'D') { + struct in6_addr dns6_tmp; + struct in_addr dns4_tmp; + + if (!strcmp(optarg, "none")) { + c->no_dns = 1; + + dns4 = &c->ip4.dns[0]; + memset(c->ip4.dns, 0, sizeof(c->ip4.dns)); + c->ip4.dns[0] = (struct in_addr){ 0 }; + c->ip4.dns_match = (struct in_addr){ 0 }; + c->ip4.dns_host = (struct in_addr){ 0 }; + + dns6 = &c->ip6.dns[0]; + memset(c->ip6.dns, 0, sizeof(c->ip6.dns)); + c->ip6.dns_match = (struct in6_addr){ 0 }; + c->ip6.dns_host = (struct in6_addr){ 0 }; + + continue; + } + + c->no_dns = 0; + + if (dns4 - &c->ip4.dns[0] < ARRAY_SIZE(c->ip4.dns) && + inet_pton(AF_INET, optarg, &dns4_tmp)) { + add_dns4(c, &dns4_tmp, &dns4); + break; + } + + if (dns6 - &c->ip6.dns[0] < ARRAY_SIZE(c->ip6.dns) && + inet_pton(AF_INET6, optarg, &dns6_tmp)) { + add_dns6(c, &dns6_tmp, &dns6); + break; + } + + die("Cannot use DNS address %s", optarg); + } } while (name != -1); if (c->mode == MODE_PASTA) -- 2.46.0
We usually avoid NAT, but in a few cases we need to apply address translations. The current logic for this on inbound flows has some inconsistencies: * For IPv4 (but not IPv6) we translated unspecified source addresses While an unspecified source probably doesn't make sense, it makes no less sense in the guest context than it does for the host, it's possible there could be protocols / flow types where this works. At the moment, this should never happen since the protocols will drop the flow before getting here. So, don't alter unspecified addresses for either IP version. * For IPv6 (but not IPv4) we translated the guest's assigned address We always translated the guest's observed address, but for the case where the assigned address different we should be consistent. Translate the guest assigned address for IPv4 as well. The overall point is that we need to translate here if and only if the host address is one that's not accessible to the guest. Create some helper functions to determine this, which will have additional uses later. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- fwd.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 77 insertions(+), 11 deletions(-) diff --git a/fwd.c b/fwd.c index dea36f6c..24700998 100644 --- a/fwd.c +++ b/fwd.c @@ -170,6 +170,73 @@ static bool is_dns_flow(uint8_t proto, const struct flowside *ini) ((ini->fport == 53) || (ini->fport == 853)); } +/** + * fwd_guest_accessible4() - Is IPv4 address guest accessible + * @c: Execution context + * @addr: Host visible IPv4 address + * + * Return: true if @addr on the host is accessible to the guest without + * translation, false otherwise + */ +static bool fwd_guest_accessible4(const struct ctx *c, + const struct in_addr *addr) +{ + if (IN4_IS_ADDR_LOOPBACK(addr)) + return false; + + if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr)) + return false; + + if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_seen) && + IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen)) + return false; + + return true; +} + +/** + * fwd_guest_accessible6() - Is IPv6 address guest accessible + * @c: Execution context + * @addr: Host visible IPv6 address + * + * Return: true if @addr on the host is accessible to the guest without + * translation, false otherwise + */ +static bool fwd_guest_accessible6(const struct ctx *c, + const struct in6_addr *addr) +{ + if (IN6_IS_ADDR_LOOPBACK(addr)) + return false; + + if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr)) + return false; + + if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_seen) && + IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr_seen)) + return false; + + return true; +} + +/** + * fwd_guest_accessible() - Is IPv[46] address guest accessible + * @c: Execution context + * @addr: Host visible IPv[46] address + * + * Return: true if @addr on the host is accessible to the guest without + * translation, false otherwise + */ +static bool fwd_guest_accessible(const struct ctx *c, + const union inany_addr *addr) +{ + const struct in_addr *a4 = inany_v4(addr); + + if (a4) + return fwd_guest_accessible4(c, a4); + + return fwd_guest_accessible6(c, &addr->a6); +} + /** * fwd_nat_from_tap() - Determine to forward a flow from the tap interface * @c: Execution context @@ -307,21 +374,20 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto, return PIF_SPLICE; } - tgt->faddr = ini->eaddr; - tgt->fport = ini->eport; - - if (inany_is_loopback4(&tgt->faddr) || - inany_is_unspecified4(&tgt->faddr) || - inany_equals4(&tgt->faddr, &c->ip4.addr_seen)) { - tgt->faddr = inany_from_v4(c->ip4.gw); - } else if (inany_is_loopback6(&tgt->faddr) || - inany_equals6(&tgt->faddr, &c->ip6.addr_seen) || - inany_equals6(&tgt->faddr, &c->ip6.addr)) { - if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw)) + /* These host-visible addresses aren't visible to the guest, so need + * translation + */ + if (!fwd_guest_accessible(c, &ini->eaddr)) { + if (inany_v4(&ini->eaddr)) + tgt->faddr = inany_from_v4(c->ip4.gw); + else if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw)) tgt->faddr.a6 = c->ip6.gw; else tgt->faddr.a6 = c->ip6.addr_ll; + } else { + tgt->faddr = ini->eaddr; } + tgt->fport = ini->eport; if (inany_v4(&tgt->faddr)) { tgt->eaddr = inany_from_v4(c->ip4.addr_seen); -- 2.46.0
I applied up to 2/3, I just have one doubt here, and a nit: On Mon, 12 Aug 2024 19:53:55 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:We usually avoid NAT, but in a few cases we need to apply address translations. The current logic for this on inbound flows has some inconsistencies: * For IPv4 (but not IPv6) we translated unspecified source addresses...I know we already talked about this, but 0.0.0.0/8 is not just unspecified, it also means "this host on this network" (RFC 6890, 2.2.2), and that's the reason for this apparent inconsistency (:: doesn't). By the way, somebody was reminded of this just recently: https://www.oligo.security/blog/0-0-0-0-day-exploiting-localhost-apis-from-… https://lwn.net/Articles/984838/ Now, if I recall correctly, the kernel translates that anyway to a loopback address before we see it. But still it's a legitimate address to use, so I would keep handling it as a loopback address.While an unspecified source probably doesn't make sense, it makes no less sense in the guest context than it does for the host, it's possible there could be protocols / flow types where this works. At the moment, this should never happen since the protocols will drop the flow before getting here. So, don't alter unspecified addresses for either IP version. * For IPv6 (but not IPv4) we translated the guest's assigned address We always translated the guest's observed address, but for the case where the assigned address different we should be consistent. Translate the guest assigned address for IPv4 as well.Hm, right, this looks like an oversight in e58828f34067 ("tcp: Fixes for closing states, spliced connections, out-of-order packets, etc.").The overall point is that we need to translate here if and only if the host address is one that's not accessible to the guest. Create some helper functions to determine this, which will have additional uses later. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- fwd.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 77 insertions(+), 11 deletions(-) diff --git a/fwd.c b/fwd.c index dea36f6c..24700998 100644 --- a/fwd.c +++ b/fwd.c @@ -170,6 +170,73 @@ static bool is_dns_flow(uint8_t proto, const struct flowside *ini) ((ini->fport == 53) || (ini->fport == 853)); } +/** + * fwd_guest_accessible4() - Is IPv4 address guest accessible + * @c: Execution context + * @addr: Host visible IPv4 address + * + * Return: true if @addr on the host is accessible to the guest without + * translation, false otherwise + */ +static bool fwd_guest_accessible4(const struct ctx *c, + const struct in_addr *addr) +{ + if (IN4_IS_ADDR_LOOPBACK(addr)) + return false; + + if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr)) + return false; + + if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_seen) && + IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen)) + return false; + + return true; +} + +/** + * fwd_guest_accessible6() - Is IPv6 address guest accessible + * @c: Execution context + * @addr: Host visible IPv6 address + * + * Return: true if @addr on the host is accessible to the guest without + * translation, false otherwise + */ +static bool fwd_guest_accessible6(const struct ctx *c, + const struct in6_addr *addr) +{ + if (IN6_IS_ADDR_LOOPBACK(addr)) + return false; + + if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr)) + return false; + + if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_seen) && + IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr_seen)) + return false; + + return true; +} + +/** + * fwd_guest_accessible() - Is IPv[46] address guest accessible + * @c: Execution context + * @addr: Host visible IPv[46] address + * + * Return: true if @addr on the host is accessible to the guest without + * translation, false otherwise + */ +static bool fwd_guest_accessible(const struct ctx *c, + const union inany_addr *addr) +{ + const struct in_addr *a4 = inany_v4(addr); + + if (a4) + return fwd_guest_accessible4(c, a4); + + return fwd_guest_accessible6(c, &addr->a6); +} + /** * fwd_nat_from_tap() - Determine to forward a flow from the tap interface * @c: Execution context @@ -307,21 +374,20 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto, return PIF_SPLICE; } - tgt->faddr = ini->eaddr; - tgt->fport = ini->eport; - - if (inany_is_loopback4(&tgt->faddr) || - inany_is_unspecified4(&tgt->faddr) || - inany_equals4(&tgt->faddr, &c->ip4.addr_seen)) { - tgt->faddr = inany_from_v4(c->ip4.gw); - } else if (inany_is_loopback6(&tgt->faddr) || - inany_equals6(&tgt->faddr, &c->ip6.addr_seen) || - inany_equals6(&tgt->faddr, &c->ip6.addr)) { - if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw)) + /* These host-visible addresses aren't visible to the guest, so needI guess you wrote this before moving the checks to their own function: it took me a bit to figure out that "These host-visible addresses" are not really mentioned here. Maybe s/These/Some/?+ * translation + */ + if (!fwd_guest_accessible(c, &ini->eaddr)) { + if (inany_v4(&ini->eaddr)) + tgt->faddr = inany_from_v4(c->ip4.gw); + else if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw)) tgt->faddr.a6 = c->ip6.gw; else tgt->faddr.a6 = c->ip6.addr_ll; + } else { + tgt->faddr = ini->eaddr; } + tgt->fport = ini->eport; if (inany_v4(&tgt->faddr)) { tgt->eaddr = inany_from_v4(c->ip4.addr_seen);-- Stefano
On Mon, Aug 12, 2024 at 11:51:17PM +0200, Stefano Brivio wrote:I applied up to 2/3, I just have one doubt here, and a nit: On Mon, 12 Aug 2024 19:53:55 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:Good point. I've changed that behaviour for the next spin. And added comments about this for the next sucker who notices the apparent inconsistency :). -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibsonWe usually avoid NAT, but in a few cases we need to apply address translations. The current logic for this on inbound flows has some inconsistencies: * For IPv4 (but not IPv6) we translated unspecified source addresses...I know we already talked about this, but 0.0.0.0/8 is not just unspecified, it also means "this host on this network" (RFC 6890, 2.2.2), and that's the reason for this apparent inconsistency (:: doesn't). By the way, somebody was reminded of this just recently: