On Tue, 14 May 2024 11:03:20 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:Each flow in the flow table has two sides, 0 and 1, representing the two interfaces between which passt/pasta will forward data for that flow. Which side is which is currently up to the protocol specific code: TCP uses side 0 for the host/"sock" side and 1 for the guest/"tap" side, except for spliced connections where it uses 0 for the initiating side and 1 for the accepting side. ICMP also uses 0 for the host/"sock" side and 1 for the guest/"tap" side, but in its case the latter is always also the initiating side. Make this generically consistent by always using side 0 for the initiating side and 1 for the accepting side. This doesn't simplify a lot for now, and arguably makes TCP slightly more complex, since we add an extra field to the connection structure to record which is the guest facing side. This is an interim change, which we'll be able to remove later. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- flow.c | 5 +---- flow.h | 5 +++++ flow_table.h | 6 ++---- icmp.c | 8 ++------ tcp.c | 19 ++++++++----------- tcp_conn.h | 3 ++- tcp_splice.c | 2 +- 7 files changed, 21 insertions(+), 27 deletions(-) diff --git a/flow.c b/flow.c index 768e0f6..7456021 100644 --- a/flow.c +++ b/flow.c @@ -152,12 +152,10 @@ static void flow_set_state(struct flow_common *f, enum flow_state state) * flow_set_type() - Set type and mvoe to TYPED state * @flow: Flow to change state * @type: Type for new flow - * @iniside: Which side initiated the new flow * * Return: @flow */ -union flow *flow_set_type(union flow *flow, enum flow_type type, - unsigned iniside) +union flow *flow_set_type(union flow *flow, enum flow_type type) { struct flow_common *f = &flow->f; @@ -165,7 +163,6 @@ union flow *flow_set_type(union flow *flow, enum flow_type type, ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_NEW); ASSERT(f->type == FLOW_TYPE_NONE); - (void)iniside; f->type = type; flow_set_state(f, FLOW_STATE_TYPED); return flow; diff --git a/flow.h b/flow.h index 073a734..28169a8 100644 --- a/flow.h +++ b/flow.h @@ -95,6 +95,11 @@ extern const uint8_t flow_proto[]; #define FLOW_PROTO(f) \ ((f)->type < FLOW_NUM_TYPES ? flow_proto[(f)->type] : 0) +#define SIDES 2 + +#define INISIDE 0 /* Initiating side */ +#define FWDSIDE 1 /* Forwarded side */ + /** * struct flow_common - Common fields for packet flows * @state: State of the flow table entry diff --git a/flow_table.h b/flow_table.h index 58014d8..7c98195 100644 --- a/flow_table.h +++ b/flow_table.h @@ -107,10 +107,8 @@ static inline flow_sidx_t flow_sidx(const struct flow_common *f, union flow *flow_alloc(void); void flow_alloc_cancel(union flow *flow); -union flow *flow_set_type(union flow *flow, enum flow_type type, - unsigned iniside); -#define FLOW_SET_TYPE(flow_, t_, var_, i_) \ - (&flow_set_type((flow_), (t_), (i_))->var_) +union flow *flow_set_type(union flow *flow, enum flow_type type); +#define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_) void flow_activate(struct flow_common *f); #define FLOW_ACTIVATE(flow_) \ diff --git a/icmp.c b/icmp.c index fda868d..6df0989 100644 --- a/icmp.c +++ b/icmp.c @@ -45,10 +45,6 @@ #define ICMP_ECHO_TIMEOUT 60 /* s, timeout for ICMP socket activity */ #define ICMP_NUM_IDS (1U << 16) -/* Sides of a flow as we use them for ping streams */ -#define SOCKSIDE 0 -#define TAPSIDE 1 - #define PINGF(idx) (&(FLOW(idx)->ping)) /* Indexed by ICMP echo identifier */ @@ -167,7 +163,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, if (!flow) return NULL; - pingf = FLOW_SET_TYPE(flow, flowtype, ping, TAPSIDE); + pingf = FLOW_SET_TYPE(flow, flowtype, ping); pingf->seq = -1; pingf->id = id; @@ -180,7 +176,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, bind_if = c->ip6.ifname_out; } - ref.flowside = FLOW_SIDX(flow, SOCKSIDE); + ref.flowside = FLOW_SIDX(flow, FWDSIDE); pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if, 0, ref.data); diff --git a/tcp.c b/tcp.c index 65208ca..06401ba 100644 --- a/tcp.c +++ b/tcp.c @@ -303,10 +303,6 @@ #include "flow_table.h" -/* Sides of a flow as we use them in "tap" connections */ -#define SOCKSIDE 0 -#define TAPSIDE 1 - #define TCP_FRAMES_MEM 128 #define TCP_FRAMES \ (c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1) @@ -581,7 +577,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) { int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock, - .flowside = FLOW_SIDX(conn, SOCKSIDE) }; + .flowside = FLOW_SIDX(conn, !conn->tapside), }; struct epoll_event ev = { .data.u64 = ref.u64 }; if (conn->events == CLOSED) { @@ -1134,7 +1130,7 @@ static uint64_t tcp_conn_hash(const struct ctx *c, static inline unsigned tcp_hash_probe(const struct ctx *c, const struct tcp_tap_conn *conn) { - flow_sidx_t sidx = FLOW_SIDX(conn, TAPSIDE); + flow_sidx_t sidx = FLOW_SIDX(conn, conn->tapside); unsigned b = tcp_conn_hash(c, conn) % TCP_HASH_TABLE_SIZE; /* Linear probing */ @@ -1154,7 +1150,7 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn) { unsigned b = tcp_hash_probe(c, conn); - tc_hash[b] = FLOW_SIDX(conn, TAPSIDE); + tc_hash[b] = FLOW_SIDX(conn, conn->tapside); flow_dbg(conn, "hash table insert: sock %i, bucket: %u", conn->sock, b); } @@ -2006,7 +2002,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, goto cancel; } - conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp, TAPSIDE); + conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); + conn->tapside = INISIDE; conn->sock = s; conn->timer = -1; conn_event(c, conn, TAP_SYN_RCVD); @@ -2725,9 +2722,9 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, const union sockaddr_inany *sa, const struct timespec *now) { - struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp, - SOCKSIDE); + struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); + conn->tapside = FWDSIDE; conn->sock = s; conn->timer = -1; conn->ws_to_tap = conn->ws_from_tap = 0; @@ -2884,7 +2881,7 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events) struct tcp_tap_conn *conn = CONN(ref.flowside.flow); ASSERT(conn->f.type == FLOW_TCP); - ASSERT(ref.flowside.side == SOCKSIDE); + ASSERT(ref.flowside.side == !conn->tapside); if (conn->events == CLOSED) return; diff --git a/tcp_conn.h b/tcp_conn.h index d280b22..5df0076 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -13,6 +13,7 @@ * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced) * @f: Generic flow information * @in_epoll: Is the connection in the epoll set? + * @tapside: Which side of the flow faces the tap/guest interface * @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS * @sock: Socket descriptor number * @events: Connection events, implying connection states @@ -39,6 +40,7 @@ struct tcp_tap_conn { struct flow_common f; bool in_epoll :1; + unsigned tapside :1;This is a bit "far" from where the bit meaning is defined (flow.h). Perhaps, in the comment: * @tapside: Which side (INISIDE/FWDSIDE) corresponds to the tap/guest interface ? And this is almost too obvious to ask, but I'm not sure: why isn't this in flow_common? I guess we'll need it for all the protocols, eventually, right? Is it because otherwise we have 17 bits there?#define TCP_RETRANS_BITS 3 unsigned int retrans :TCP_RETRANS_BITS; @@ -106,7 +108,6 @@ struct tcp_tap_conn { uint32_t seq_init_from_tap; }; -#define SIDES 2 /** * struct tcp_splice_conn - Descriptor for a spliced TCP connection * @f: Generic flow information diff --git a/tcp_splice.c b/tcp_splice.c index abe98a0..5da7021 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -472,7 +472,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, return false; } - conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice, 0); + conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice); conn->flags = af == AF_INET ? 0 : SPLICE_V6; conn->s[0] = s0;-- Stefano