Currently tcp.c open codes the process of allocating a new flow from the flow table: twice, in fact, once for guest to host and once for host to guest connections. This duplication isn't ideal and will get worse as we add more protocols to the flow table. It also makes it harder to experiment with different ways of handling flow table allocation. Instead, introduce helpers to allocate new flows. Rather than a single function we do this in two stages: flow_prealloc() to check if a slot is available and flow_alloc_commit() to "really" allocate it, setting the type to something other than FLOW_TYPE_NONE. We do this because it's a common pattern to perform some network operations which might fail, and we don't want to commit to allocating a slot until they've succeeded. However we don't want to bother doing that if we already know there's no space for a new flow. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- flow.c | 18 ++++++++++++++++++ flow_table.h | 17 +++++++++++++++++ tcp.c | 25 ++++++++++++------------- tcp_splice.c | 11 ++++++----- tcp_splice.h | 2 +- 5 files changed, 54 insertions(+), 19 deletions(-) diff --git a/flow.c b/flow.c index 294412a..3713fac 100644 --- a/flow.c +++ b/flow.c @@ -49,6 +49,24 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...) logmsg(pri, "Flow %u (%s): %s", flow_idx(f), FLOW_TYPE(f), msg); } + +/** + * flow_alloc_commit() - Mark a new flow as used + * @pflow: Unused flow returned from flow_prealloc() + * @type: type of flow this entry will now be used for + * + * Return: writable pointer to @pflow. + */ +union flow *flow_alloc_commit(const union flow *pflow, enum flow_type type) +{ + union flow *flow = (union flow *)pflow; + + ASSERT(FLOW_IDX(flow) == flow_count); + flow_count++; + flow->f.type = type; + return flow; +} + /** * flow_table_compact() - Perform compaction on flow table * @c: Execution context diff --git a/flow_table.h b/flow_table.h index 4aa2398..3236288 100644 --- a/flow_table.h +++ b/flow_table.h @@ -88,4 +88,21 @@ static inline flow_sidx_t flow_sidx(const struct flow_common *f, */ #define FLOW_SIDX(f_, side) (flow_sidx(&(f_)->f, (side))) +/** + * flow_prealloc() - Check we have space for a new flow + * + * Return: pointer to an unused flow entry, or NULL if the table is full + * + * Pointer is const, because the caller must not write to it until calling + * flow_alloc_commit(). + */ +static inline const union flow *flow_prealloc(void) +{ + if (flow_count >= FLOW_MAX) + return NULL; + return &flowtab[flow_count]; +} + +union flow *flow_alloc_commit(const union flow *pflow, enum flow_type type); + #endif /* FLOW_TABLE_H */ diff --git a/tcp.c b/tcp.c index 6aefd0b..77fe2d1 100644 --- a/tcp.c +++ b/tcp.c @@ -1943,12 +1943,13 @@ static void tcp_conn_from_tap(struct ctx *c, }; const struct sockaddr *sa; struct tcp_tap_conn *conn; + const union flow *flow; socklen_t sl; int s, mss; (void)saddr; - if (flow_count >= FLOW_MAX) + if (!(flow = flow_prealloc())) return; if ((s = tcp_conn_pool_sock(pool)) < 0) @@ -1974,8 +1975,7 @@ static void tcp_conn_from_tap(struct ctx *c, } } - conn = CONN(flow_count++); - conn->f.type = FLOW_TCP; + conn = &flow_alloc_commit(flow, FLOW_TCP)->tcp; conn->sock = s; conn->timer = -1; conn_event(c, conn, TAP_SYN_RCVD); @@ -2674,18 +2674,19 @@ static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr) * tcp_tap_conn_from_sock() - Initialize state for non-spliced connection * @c: Execution context * @ref: epoll reference of listening socket - * @conn: connection structure to initialize + * @flow: unused flow in which to put the connection * @s: Accepted socket * @sa: Peer socket address (from accept()) * @now: Current timestamp */ static void tcp_tap_conn_from_sock(struct ctx *c, union tcp_listen_epoll_ref ref, - struct tcp_tap_conn *conn, int s, + const union flow *flow, int s, const struct sockaddr *sa, const struct timespec *now) { - conn->f.type = FLOW_TCP; + struct tcp_tap_conn *conn = &flow_alloc_commit(flow, FLOW_TCP)->tcp; + conn->sock = s; conn->timer = -1; conn->ws_to_tap = conn->ws_from_tap = 0; @@ -2720,25 +2721,23 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref, { struct sockaddr_storage sa; socklen_t sl = sizeof(sa); - union flow *flow; + const union flow *flow; int s; - if (c->no_tcp || flow_count >= FLOW_MAX) + if (c->no_tcp || !(flow = flow_prealloc())) return; s = accept4(ref.fd, (struct sockaddr *)&sa, &sl, SOCK_NONBLOCK); if (s < 0) return; - flow = flowtab + flow_count++; - if (c->mode == MODE_PASTA && - tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice, + tcp_splice_conn_from_sock(c, ref.tcp_listen, flow, s, (struct sockaddr *)&sa)) return; - tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s, - (struct sockaddr *)&sa, now); + tcp_tap_conn_from_sock(c, ref.tcp_listen, flow, + s, (struct sockaddr *)&sa, now); } /** diff --git a/tcp_splice.c b/tcp_splice.c index 18cd2e6..869ab4b 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -438,7 +438,7 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn, * tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection * @c: Execution context * @ref: epoll reference of listening socket - * @conn: connection structure to initialize + * @flow: unused flow in which to put the connection * @s: Accepted socket * @sa: Peer address of connection * @@ -447,9 +447,10 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn, */ bool tcp_splice_conn_from_sock(const struct ctx *c, union tcp_listen_epoll_ref ref, - struct tcp_splice_conn *conn, int s, + const union flow *flow, int s, const struct sockaddr *sa) { + struct tcp_splice_conn *conn; const struct in_addr *a4; union inany_addr aany; in_port_t port; @@ -462,17 +463,17 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, if (a4) { if (!IN4_IS_ADDR_LOOPBACK(a4)) return false; - conn->flags = 0; } else { if (!IN6_IS_ADDR_LOOPBACK(&aany.a6)) return false; - conn->flags = SPLICE_V6; } + conn = &flow_alloc_commit(flow, FLOW_TCP_SPLICE)->tcp_splice; + conn->flags = a4 ? 0 : SPLICE_V6; + if (setsockopt(s, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int))) flow_trace(conn, "failed to set TCP_QUICKACK on %i", s); - conn->f.type = FLOW_TCP_SPLICE; conn->s[0] = s; if (tcp_splice_new(c, conn, ref.port, ref.pif)) diff --git a/tcp_splice.h b/tcp_splice.h index 18193e4..daa5a4b 100644 --- a/tcp_splice.h +++ b/tcp_splice.h @@ -12,7 +12,7 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events); bool tcp_splice_conn_from_sock(const struct ctx *c, union tcp_listen_epoll_ref ref, - struct tcp_splice_conn *conn, int s, + const union flow *flow, int s, const struct sockaddr *sa); void tcp_splice_init(struct ctx *c); -- 2.43.0