Currently although we associate a flow with traffic coming from a socket, we don't use that flow to determine how to forward the traffic. Fix this for the case of traffic going from socket to tap (i.e. host to guest). Determine first that we should be sending to tap from the pif in the flow entry, rather than with separate logic. Also use the addresses and ports in the flow entry to construct the headers for the tap interface. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- flow_table.h | 28 +++++++++ udp.c | 158 +++++++++++++++++---------------------------------- 2 files changed, 81 insertions(+), 105 deletions(-) diff --git a/flow_table.h b/flow_table.h index 6cf4f2b7..3f1e81b9 100644 --- a/flow_table.h +++ b/flow_table.h @@ -80,6 +80,34 @@ static inline union flow *flow_at_sidx(flow_sidx_t sidx) return FLOW(sidx.flow); } +/** flowside_at_sidx - Flow side for a given sidx + * @sidx: Flow & side index + * + * Return: pointer to the corresponding flowside, or NULL + */ +static inline struct flowside *flowside_at_sidx(flow_sidx_t sidx) +{ + union flow *flow = flow_at_sidx(sidx); + + if (!flow) + return NULL; + return &flow->f.side[sidx.side]; +} + +/** pif_at_sidx - Interface id for a given sidx + * @sidx: Flow & side index + * + * Return: pif for the given flow & side, or PIF_NONE if the sidx is invalid + */ +static inline uint8_t pif_at_sidx(flow_sidx_t sidx) +{ + union flow *flow = flow_at_sidx(sidx); + + if (!flow) + return PIF_NONE; + return flow->f.pif[sidx.side]; +} + /** flow_sidx_t - Index of one side of a flow from common structure * @f: Common flow fields pointer * @side: Which side to refer to (0 or 1) diff --git a/udp.c b/udp.c index 4668690e..29f3ba85 100644 --- a/udp.c +++ b/udp.c @@ -711,118 +711,53 @@ out: } /** - * udp_update_hdr4() - Update headers for one IPv4 datagram - * @c: Execution context + * udp_update_hdr4) - Update headers for one IPv4 datagram * @ip4h: Pre-filled IPv4 header (except for tot_len and saddr) - * @s_in: Source socket address, filled in by recvmmsg() * @bp: Pointer to udp_payload_t to update - * @dstport: Destination port number + * @fside: Flowside with addresses to direct the datagram * @dlen: Length of UDP payload - * @now: Current timestamp * * Return: size of IPv4 payload (UDP header + data) */ -static size_t udp_update_hdr4(const struct ctx *c, - struct iphdr *ip4h, const struct sockaddr_in *s_in, - struct udp_payload_t *bp, - in_port_t dstport, size_t dlen, - const struct timespec *now) +static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, + const struct flowside *fside, size_t dlen) { - const struct in_addr dst = c->ip4.addr_seen; - in_port_t srcport = ntohs(s_in->sin_port); + const struct in_addr *src = inany_v4(&fside->faddr); + const struct in_addr *dst = inany_v4(&fside->eaddr); size_t l4len = dlen + sizeof(bp->uh); size_t l3len = l4len + sizeof(*ip4h); - struct in_addr src = s_in->sin_addr; - - if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) && - IN4_ARE_ADDR_EQUAL(&src, &c->ip4.dns_host) && srcport == 53 && - (udp_tap_map[V4][dstport].flags & PORT_DNS_FWD)) { - src = c->ip4.dns_match; - } else if (IN4_IS_ADDR_LOOPBACK(&src) || - IN4_ARE_ADDR_EQUAL(&src, &c->ip4.addr_seen)) { - udp_tap_map[V4][srcport].ts = now->tv_sec; - udp_tap_map[V4][srcport].flags |= PORT_LOCAL; - - if (IN4_IS_ADDR_LOOPBACK(&src)) - udp_tap_map[V4][srcport].flags |= PORT_LOOPBACK; - else - udp_tap_map[V4][srcport].flags &= ~PORT_LOOPBACK; - bitmap_set(udp_act[V4][UDP_ACT_TAP], srcport); - - src = c->ip4.gw; - } + ASSERT(src && dst); ip4h->tot_len = htons(l3len); - ip4h->daddr = dst.s_addr; - ip4h->saddr = src.s_addr; - ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, src, dst); + ip4h->daddr = dst->s_addr; + ip4h->saddr = src->s_addr; + ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst); - bp->uh.source = s_in->sin_port; - bp->uh.dest = htons(dstport); + bp->uh.source = htons(fside->fport); + bp->uh.dest = htons(fside->eport); bp->uh.len = htons(l4len); - csum_udp4(&bp->uh, src, dst, bp->data, dlen); + csum_udp4(&bp->uh, *src, *dst, bp->data, dlen); return l4len; } /** * udp_update_hdr6() - Update headers for one IPv6 datagram - * @c: Execution context * @ip6h: Pre-filled IPv6 header (except for payload_len and addresses) - * @s_in: Source socket address, filled in by recvmmsg() * @bp: Pointer to udp_payload_t to update - * @dstport: Destination port number + * @fside: Flowside with addresses to direct the datagram * @dlen: Length of UDP payload - * @now: Current timestamp * * Return: size of IPv6 payload (UDP header + data) */ -static size_t udp_update_hdr6(const struct ctx *c, - struct ipv6hdr *ip6h, struct sockaddr_in6 *s_in6, - struct udp_payload_t *bp, - in_port_t dstport, size_t dlen, - const struct timespec *now) +static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, + const struct flowside *fside, size_t dlen) { - const struct in6_addr *src = &s_in6->sin6_addr; - const struct in6_addr *dst = &c->ip6.addr_seen; - in_port_t srcport = ntohs(s_in6->sin6_port); + const struct in6_addr *src = &fside->faddr.a6; + const struct in6_addr *dst = &fside->eaddr.a6; uint16_t l4len = dlen + sizeof(bp->uh); - if (IN6_IS_ADDR_LINKLOCAL(src)) { - dst = &c->ip6.addr_ll_seen; - } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_match) && - IN6_ARE_ADDR_EQUAL(src, &c->ip6.dns_host) && - srcport == 53 && - (udp_tap_map[V4][dstport].flags & PORT_DNS_FWD)) { - src = &c->ip6.dns_match; - } else if (IN6_IS_ADDR_LOOPBACK(src) || - IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr_seen) || - IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) { - udp_tap_map[V6][srcport].ts = now->tv_sec; - udp_tap_map[V6][srcport].flags |= PORT_LOCAL; - - if (IN6_IS_ADDR_LOOPBACK(src)) - udp_tap_map[V6][srcport].flags |= PORT_LOOPBACK; - else - udp_tap_map[V6][srcport].flags &= ~PORT_LOOPBACK; - - if (IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) - udp_tap_map[V6][srcport].flags |= PORT_GUA; - else - udp_tap_map[V6][srcport].flags &= ~PORT_GUA; - - bitmap_set(udp_act[V6][UDP_ACT_TAP], srcport); - - dst = &c->ip6.addr_ll_seen; - - if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw)) - src = &c->ip6.gw; - else - src = &c->ip6.addr_ll; - - } - ip6h->payload_len = htons(l4len); ip6h->daddr = *dst; ip6h->saddr = *src; @@ -830,8 +765,8 @@ static size_t udp_update_hdr6(const struct ctx *c, ip6h->nexthdr = IPPROTO_UDP; ip6h->hop_limit = 255; - bp->uh.source = s_in6->sin6_port; - bp->uh.dest = htons(dstport); + bp->uh.source = htons(fside->fport); + bp->uh.dest = htons(fside->eport); bp->uh.len = ip6h->payload_len; csum_udp6(&bp->uh, src, dst, bp->data, dlen); @@ -843,26 +778,23 @@ static size_t udp_update_hdr6(const struct ctx *c, * @c: Execution context * @start: Index of first datagram in udp[46]_l2_buf pool * @n: Total number of datagrams in udp[46]_l2_buf pool - * @dstport: Destination port number on destination side * @uref: UDP epoll reference for origin socket * @now: Current timestamp * * This consumes as many frames as are sendable via tap. It requires that - * udp_meta[(a)start].splicesrc is initialised, and will initialise - * udp_meta[].splicesrc for each frame it consumes *and one more* (if present). + * udp_meta[(a)start].tosidx is initialised, and will initialise udp_meta[].tosidx + * for each frame it consumes *and one more* (if present). * * Return: Number of frames sent via tap */ static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n, - in_port_t dstport, union udp_epoll_ref uref, + union udp_epoll_ref uref, const struct timespec *now) { struct iovec (*tap_iov)[UDP_NUM_IOVS]; struct mmsghdr *mmh_recv; size_t i = start; - ASSERT(udp_meta[start].splicesrc == -1); - if (uref.v6) { tap_iov = udp6_l2_iov_tap; mmh_recv = udp6_l2_mh_sock; @@ -874,18 +806,19 @@ static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n, do { struct udp_payload_t *bp = &udp_payload[i]; struct udp_meta_t *bm = &udp_meta[i]; + const struct flowside *toside = flowside_at_sidx(bm->tosidx); size_t l4len; if (uref.v6) { - l4len = udp_update_hdr6(c, &bm->ip6h, - &bm->s_in.sa6, bp, dstport, - udp6_l2_mh_sock[i].msg_len, now); + udp_tap_map[V6][toside->fport].ts = now->tv_sec; + l4len = udp_update_hdr6(&bm->ip6h, bp, toside, + udp6_l2_mh_sock[i].msg_len); tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + sizeof(udp6_eth_hdr)); } else { - l4len = udp_update_hdr4(c, &bm->ip4h, - &bm->s_in.sa4, bp, dstport, - udp4_l2_mh_sock[i].msg_len, now); + udp_tap_map[V4][toside->fport].ts = now->tv_sec; + l4len = udp_update_hdr4(&bm->ip4h, bp, toside, + udp4_l2_mh_sock[i].msg_len); tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + sizeof(udp4_eth_hdr)); } @@ -896,7 +829,7 @@ static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n, udp_meta[i].splicesrc = udp_mmh_splice_port(uref, &mmh_recv[i]); udp_meta[i].tosidx = udp_flow_from_sock(c, uref, &udp_meta[i]); - } while (udp_meta[i].splicesrc == -1); + } while (pif_at_sidx(udp_meta[i].tosidx) == PIF_TAP); tap_send_frames(c, &tap_iov[start][0], UDP_NUM_IOVS, i - start); return i - start; @@ -948,19 +881,34 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve return; /* We divide things into batches based on how we need to send them, - * determined by udp_meta[i].splicesrc. To avoid either two passes - * through the array, or recalculating splicesrc and tosidx for a single - * entry, we have to populate them one entry *ahead* of the loop counter - * (if present). So we fill in entry 0 before the loop, then + * determined by udp_meta[i].splicesrc or tosidx. To avoid either two + * passes through the array, or recalculating splicesrc and tosidx for a + * single entry, we have to populate them one entry *ahead* of the loop + * counter (if present). So we fill in entry 0 before the loop, then * udp_*_send() populate one entry past where they consume. */ udp_meta[0].splicesrc = udp_mmh_splice_port(ref.udp, mmh_recv); udp_meta[0].tosidx = udp_flow_from_sock(c, ref.udp, &udp_meta[0]); for (i = 0; i < n; i += m) { - if (udp_meta[i].splicesrc >= 0) + flow_sidx_t tosidx = udp_meta[i].tosidx; + uint8_t topif = pif_at_sidx(tosidx); + + if (topif == PIF_TAP) { + m = udp_tap_send(c, i, n, ref.udp, now); + } else if (udp_meta[i].splicesrc >= 0) { m = udp_splice_send(c, i, n, dstport, ref.udp, now); - else - m = udp_tap_send(c, i, n, dstport, ref.udp, now); + } else { + char sstr[SOCKADDR_STRLEN]; + + debug("Dropping UDP packet without usable flow from %s %s -> ?:%hu", + pif_name(ref.udp.pif), + sockaddr_ntop(&udp_meta[i].s_in, sstr, sizeof(sstr)), + dstport); + + m = 1; + udp_meta[i].splicesrc = udp_mmh_splice_port(ref.udp, mmh_recv); + udp_meta[i].tosidx = udp_flow_from_sock(c, ref.udp, &udp_meta[i]); + } } } -- 2.45.2