On Wed, Jun 26, 2024 at 12:23:59AM +0200, Stefano Brivio wrote:On Fri, 14 Jun 2024 16:13:24 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:Right.. as it happens, I'm reworking this in a way that will obsolete this parameter anyway.Require the address and port information for the target (non initiating) side to be populated when a flow enters TGT state. Implement that for TCP and ICMP. For now this leaves some information redundantly recorded in both generic and type specific fields. We'll fix that in later patches. For TCP we now use the information from the flow to construct the destination socket address in both tcp_conn_from_tap() and tcp_splice_connect(). Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- flow.c | 38 ++++++++++++++++++------ flow_table.h | 5 +++- icmp.c | 3 +- inany.h | 30 ++++++++++++++++++- tcp.c | 83 ++++++++++++++++++++++++++++------------------------ tcp_splice.c | 45 +++++++++++----------------- 6 files changed, 126 insertions(+), 78 deletions(-) diff --git a/flow.c b/flow.c index 1819111d..39e046bf 100644 --- a/flow.c +++ b/flow.c @@ -165,8 +165,10 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...) */ static void flow_set_state(struct flow_common *f, enum flow_state state) { - char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN]; + char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN]; + char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN]; const struct flowside *ini = &f->side[INISIDE]; + const struct flowside *tgt = &f->side[TGTSIDE]; uint8_t oldstate = f->state; ASSERT(state < FLOW_NUM_STATES); @@ -177,19 +179,24 @@ static void flow_set_state(struct flow_common *f, enum flow_state state) FLOW_STATE(f)); if (MAX(state, oldstate) >= FLOW_STATE_TGT) - flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => %s", + flow_log_(f, LOG_DEBUG, + "%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu", pif_name(f->pif[INISIDE]), - inany_ntop(&ini->eaddr, estr, sizeof(estr)), + inany_ntop(&ini->eaddr, estr0, sizeof(estr0)), ini->eport, - inany_ntop(&ini->faddr, fstr, sizeof(fstr)), + inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)), ini->fport, - pif_name(f->pif[TGTSIDE])); + pif_name(f->pif[TGTSIDE]), + inany_ntop(&tgt->faddr, fstr1, sizeof(fstr1)), + tgt->fport, + inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)), + tgt->eport); else if (MAX(state, oldstate) >= FLOW_STATE_INI) flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => ?", pif_name(f->pif[INISIDE]), - inany_ntop(&ini->eaddr, estr, sizeof(estr)), + inany_ntop(&ini->eaddr, estr0, sizeof(estr0)), ini->eport, - inany_ntop(&ini->faddr, fstr, sizeof(fstr)), + inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)), ini->fport); } @@ -261,21 +268,34 @@ const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, } /** - * flow_target() - Move flow to TGT, setting TGTSIDE details + * flow_target_af() - Move flow to TGT, setting TGTSIDE details * @flow: Flow to change state * @pif: pif of the target side + * @af: Address family for @eaddr and @faddr + * @saddr: Source address (pointer to in_addr or in6_addr) + * @sport: Endpoint port + * @daddr: Destination address (pointer to in_addr or in6_addr) + * @dport: Destination port + * + * Return: pointer to the target flowside information */ -void flow_target(union flow *flow, uint8_t pif) +const struct flowside *flow_target_af(union flow *flow, uint8_t pif, + sa_family_t af, + const void *saddr, in_port_t sport, + const void *daddr, in_port_t dport) { struct flow_common *f = &flow->f; + struct flowside *tgt = &f->side[TGTSIDE]; ASSERT(pif != PIF_NONE); ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI); ASSERT(f->type == FLOW_TYPE_NONE); ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE); + flowside_from_af(tgt, af, daddr, dport, saddr, sport); f->pif[TGTSIDE] = pif; flow_set_state(f, FLOW_STATE_TGT); + return tgt; } /** diff --git a/flow_table.h b/flow_table.h index 2e912532..7af32c6a 100644 --- a/flow_table.h +++ b/flow_table.h @@ -114,7 +114,10 @@ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif, const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, const union sockaddr_inany *ssa, in_port_t dport); -void flow_target(union flow *flow, uint8_t pif); +const struct flowside *flow_target_af(union flow *flow, uint8_t pif, + sa_family_t af, + const void *saddr, in_port_t sport, + const void *daddr, in_port_t dport); union flow *flow_set_type(union flow *flow, enum flow_type type); #define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_) diff --git a/icmp.c b/icmp.c index 38d1deeb..c86c54c3 100644 --- a/icmp.c +++ b/icmp.c @@ -167,7 +167,8 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, return NULL; flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id); - flow_target(flow, PIF_HOST); + /* FIXME: Record outbound source address when known */ + flow_target_af(flow, PIF_HOST, af, NULL, 0, daddr, 0); pingf = FLOW_SET_TYPE(flow, flowtype, ping); pingf->seq = -1; diff --git a/inany.h b/inany.h index 47b66fa9..2bf3becf 100644 --- a/inany.h +++ b/inany.h @@ -187,7 +187,6 @@ static inline bool inany_is_unspecified(const union inany_addr *a) * * Return: true if @a is in fe80::/10 (IPv6 link local unicast) */ -/* cppcheck-suppress unusedFunction */ static inline bool inany_is_linklocal6(const union inany_addr *a) { return IN6_IS_ADDR_LINKLOCAL(&a->a6); @@ -273,4 +272,33 @@ static inline void inany_siphash_feed(struct siphash_state *state, const char *inany_ntop(const union inany_addr *src, char *dst, socklen_t size); +/** sockaddr_from_inany() - Construct a sockaddr from an inany + * @sa: Pointer to sockaddr to fill in + * @sl: Updated to relevant of length of initialised @sa + * @addr: IPv[46] address + * @port: Port (host byte order) + * @scope: Scope ID (ignored for IPv4 addresses)Perhaps worth specifying that it's the interface index for IPv6 link-local addresses only (even if it's not the case as of this patch, more below): * @scope: Scope ID for link-local IPv6 addresses only: interface indexRight, that will be changed by the rework I have in mind anyway.+ */ +static inline void sockaddr_from_inany(union sockaddr_inany *sa, socklen_t *sl, + const union inany_addr *addr, + in_port_t port, uint32_t scope) +{ + const struct in_addr *v4 = inany_v4(addr); + + if (v4) { + sa->sa_family = AF_INET; + sa->sa4.sin_addr = *v4; + sa->sa4.sin_port = htons(port); + memset(&sa->sa4.sin_zero, 0, sizeof(sa->sa4.sin_zero)); + *sl = sizeof(sa->sa4); + } else { + sa->sa_family = AF_INET6; + sa->sa6.sin6_addr = addr->a6; + sa->sa6.sin6_port = htons(port); + sa->sa6.sin6_scope_id = scope; + sa->sa6.sin6_flowinfo = 0; + *sl = sizeof(sa->sa6); + } +} + #endif /* INANY_H */ diff --git a/tcp.c b/tcp.c index 07a2eb1c..c6cd0c72 100644 --- a/tcp.c +++ b/tcp.c @@ -1610,18 +1610,10 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, { in_port_t srcport = ntohs(th->source); in_port_t dstport = ntohs(th->dest); - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(dstport), - .sin_addr = *(struct in_addr *)daddr, - }; - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(dstport), - .sin6_addr = *(struct in6_addr *)daddr, - }; - const struct sockaddr *sa; + const struct flowside *ini, *tgt; struct tcp_tap_conn *conn; + union inany_addr dstaddr; /* FIXME: Avoid bulky temporary */ + union sockaddr_inany sa; union flow *flow; int s = -1, mss; socklen_t sl; @@ -1629,7 +1621,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, if (!(flow = flow_alloc())) return; - flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport); + ini = flow_initiate_af(flow, PIF_TAP, + af, saddr, srcport, daddr, dstport); if (af == AF_INET) { if (IN4_IS_ADDR_UNSPECIFIED(saddr) || @@ -1661,19 +1654,28 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, dstport); goto cancel; } + } else { + ASSERT(0); } if ((s = tcp_conn_sock(c, af)) < 0) goto cancel; + dstaddr = ini->faddr; + if (!c->no_map_gw) { - if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw)) - addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - if (af == AF_INET6 && IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw)) - addr6.sin6_addr = in6addr_loopback; + if (inany_equals4(&dstaddr, &c->ip4.gw)) + dstaddr = inany_loopback4; + else if (inany_equals6(&dstaddr, &c->ip6.gw)) + dstaddr = inany_loopback6; } - if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)) { + /* FIXME: Record outbound source address when known */ + tgt = flow_target_af(flow, PIF_HOST, AF_INET6, + NULL, 0, /* Kernel decides source address */ + &dstaddr, dstport); + + if (inany_is_linklocal6(&tgt->eaddr)) { struct sockaddr_in6 addr6_ll = { .sin6_family = AF_INET6, .sin6_addr = c->ip6.addr_ll, @@ -1681,9 +1683,10 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, }; if (bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll))) goto cancel; + } else if (!inany_is_loopback(&tgt->eaddr)) { + tcp_bind_outbound(c, s, af); } - flow_target(flow, PIF_HOST); conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); conn->sock = s; conn->timer = -1; @@ -1706,14 +1709,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, inany_from_af(&conn->faddr, af, daddr); - if (af == AF_INET) { - sa = (struct sockaddr *)&addr4; - sl = sizeof(addr4); - } else { - sa = (struct sockaddr *)&addr6; - sl = sizeof(addr6); - } - conn->fport = dstport; conn->eport = srcport; @@ -1726,19 +1721,16 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, tcp_hash_insert(c, conn); - if (!bind(s, sa, sl)) { + sockaddr_from_inany(&sa, &sl, &tgt->eaddr, tgt->eport, c->ifi6);...so, I never tried to use a non-zero scope identifier for, say, global unicast IPv6 addresses. Perhaps it's harmless. But I still think that, logically, we should pass it as zero in that case.Yeah, I already did that rebase, it was surprisingly fiddly.+ + if (!bind(s, &sa.sa, sl)) { tcp_rst(c, conn); /* Nobody is listening then */ goto cancel; } if (errno != EADDRNOTAVAIL && errno != EACCES) conn_flag(c, conn, LOCAL);This will have a semi-trivial conflict with 54a9d3801b95 ("tcp: Don't rely on bind() to fail to decide that connection target is valid").-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson- if ((af == AF_INET && !IN4_IS_ADDR_LOOPBACK(&addr4.sin_addr)) || - (af == AF_INET6 && !IN6_IS_ADDR_LOOPBACK(&addr6.sin6_addr) && - !IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr))) - tcp_bind_outbound(c, s, af); - - if (connect(s, sa, sl)) { + if (connect(s, &sa.sa, sl)) { if (errno != EINPROGRESS) { tcp_rst(c, conn); goto cancel; @@ -2236,9 +2228,25 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, const union sockaddr_inany *sa, const struct timespec *now) { + union inany_addr saddr, daddr; /* FIXME: avoid bulky temporaries */ struct tcp_tap_conn *conn; + in_port_t srcport; + + inany_from_sockaddr(&saddr, &srcport, sa); + tcp_snat_inbound(c, &saddr); - flow_target(flow, PIF_TAP); + if (inany_v4(&saddr)) { + daddr = inany_from_v4(c->ip4.addr_seen); + } else { + if (inany_is_linklocal6(&saddr)) + daddr.a6 = c->ip6.addr_ll_seen; + else + daddr.a6 = c->ip6.addr_seen; + } + dstport += c->tcp.fwd_in.delta[dstport]; + + flow_target_af(flow, PIF_TAP, AF_INET6, + &saddr, srcport, &daddr, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); conn->sock = s; @@ -2246,10 +2254,9 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, conn->ws_to_tap = conn->ws_from_tap = 0; conn_event(c, conn, SOCK_ACCEPTED); - inany_from_sockaddr(&conn->faddr, &conn->fport, sa); - conn->eport = dstport + c->tcp.fwd_in.delta[dstport]; - - tcp_snat_inbound(c, &conn->faddr); + conn->faddr = saddr; + conn->fport = srcport; + conn->eport = dstport; tcp_seq_init(c, conn, now); tcp_hash_insert(c, conn); diff --git a/tcp_splice.c b/tcp_splice.c index 5a406c63..bcb42c97 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -320,31 +320,20 @@ static int tcp_splice_connect_finish(const struct ctx *c, * tcp_splice_connect() - Create and connect socket for new spliced connection * @c: Execution context * @conn: Connection pointer - * @af: Address family - * @pif: pif on which to create socket - * @port: Destination port, host order * * Return: 0 for connect() succeeded or in progress, negative value on error */ -static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, - sa_family_t af, uint8_t pif, in_port_t port) +static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn) { - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(port), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(port), - .sin_addr = IN4ADDR_LOOPBACK_INIT, - }; - const struct sockaddr *sa; + const struct flowside *tgt = &conn->f.side[TGTSIDE]; + sa_family_t af = inany_v4(&tgt->eaddr) ? AF_INET : AF_INET6; + uint8_t tgtpif = conn->f.pif[TGTSIDE]; + union sockaddr_inany sa; socklen_t sl; - if (pif == PIF_HOST) + if (tgtpif == PIF_HOST) conn->s[1] = tcp_conn_sock(c, af); - else if (pif == PIF_SPLICE) + else if (tgtpif == PIF_SPLICE) conn->s[1] = tcp_conn_sock_ns(c, af); else ASSERT(0); @@ -358,15 +347,9 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, conn->s[1]); } - if (CONN_V6(conn)) { - sa = (struct sockaddr *)&addr6; - sl = sizeof(addr6); - } else { - sa = (struct sockaddr *)&addr4; - sl = sizeof(addr4); - } + sockaddr_from_inany(&sa, &sl, &tgt->eaddr, tgt->eport, 0); - if (connect(conn->s[1], sa, sl)) { + if (connect(conn->s[1], &sa.sa, sl)) { if (errno != EINPROGRESS) { flow_trace(conn, "Couldn't connect socket for splice: %s", strerror(errno)); @@ -471,7 +454,13 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, return false; } - flow_target(flow, tgtpif); + /* FIXME: Record outbound source address when known */ + if (af == AF_INET) + flow_target_af(flow, tgtpif, AF_INET, + NULL, 0, &in4addr_loopback, dstport); + else + flow_target_af(flow, tgtpif, AF_INET6, + NULL, 0, &in6addr_loopback, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice); conn->flags = af == AF_INET ? 0 : SPLICE_V6; @@ -483,7 +472,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int))) flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0); - if (tcp_splice_connect(c, conn, af, tgtpif, dstport)) + if (tcp_splice_connect(c, conn)) conn_flag(c, conn, CLOSING); FLOW_ACTIVATE(conn);