Our allocation scheme for flow entries means there are some non-obvious constraints on when what things can be done with an entry. Add a big doc comment explaining the life cycle. In addition, make a FLOW_START() macro to mark one of the important transitions. This encourages correct usage, by making it natural to only access the flow type specific structure after calling it. It also logs that a new flow has been created, which is useful for debugging. We also add logging when a flow's lifecycle ends. This doesn't need a new helper, because it can only happen either from flow_alloc_cancel() or from the flow deferred handler. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- flow.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++-- flow.h | 5 ++++ tcp.c | 15 +++++------ tcp_splice.c | 11 ++++---- tcp_splice.h | 5 ++-- 5 files changed, 94 insertions(+), 18 deletions(-) diff --git a/flow.c b/flow.c index beb9749c..a155b54b 100644 --- a/flow.c +++ b/flow.c @@ -34,6 +34,45 @@ static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES, /* Global Flow Table */ +/** + * DOC: Theory of Operation - flow entry life cycle + * + * An individual flow table entry moves through these logical states, usually in + * this order. + * + * FREE - Part of the general pool of free flow table entries + * Operations: + * - flow_alloc() finds an entry and moves it to ALLOC state + * + * ALLOC - A tentatively allocated entry + * Operations: + * - flow_alloc_cancel() returns the entry to FREE state + * - FLOW_START() set the entry's type and moves to START state + * Caveats: + * - It's not safe to write fields in the flow entry + * - It's not safe to allocate other entries with flow_alloc() + * - It's not safe to return to the main epoll loop + * - It's not safe to use flow_*() logging functions + * + * START - An entry being prepared by flow type specific code + * Operations: + * - Flow type specific fields may be accessed + * - flow_*() logging functions + * - flow_alloc_cancel() returns the entry to FREE state + * Caveats: + * - Returning to the main epoll loop or allocating another entry + * with flow_alloc() implicitly moves the entry to ACTIVE state. + * + * ACTIVE - An active flow entry managed by flow type specific code + * Operations: + * - Flow type specific fields may be accessed + * - flow_*() logging functions + * - Flow may be expired by returning 'true' from flow type specific + * deferred or timer handler. This will return it to FREE state. + * Caveats: + * - It's not safe to call flow_alloc_cancel() + */ + /** * DOC: Theory of Operation - allocating and freeing flow entries * @@ -109,6 +148,39 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...) logmsg(pri, "Flow %u (%s): %s", flow_idx(f), FLOW_TYPE(f), msg); } +/** + * flow_start() - Set flow type for new flow and log + * @flow: Flow to set type for + * @type: Type for new flow + * @iniside: Which side initiated the new flow + * + * Return: @flow + * + * Should be called before setting any flow type specific fields in the flow + * table entry. + */ +union flow *flow_start(union flow *flow, enum flow_type type, + unsigned iniside) +{ + (void)iniside; + flow->f.type = type; + flow_dbg(flow, "START %s", flow_type_str[flow->f.type]); + return flow; +} + +/** + * flow_end() - Clear flow type for finished flow and log + * @flow: Flow to clear + */ +static void flow_end(union flow *flow) +{ + if (flow->f.type == FLOW_TYPE_NONE) + return; /* Nothing to do */ + + flow_dbg(flow, "END %s", flow_type_str[flow->f.type]); + flow->f.type = FLOW_TYPE_NONE; +} + /** * flow_alloc() - Allocate a new flow * @@ -157,7 +229,7 @@ void flow_alloc_cancel(union flow *flow) { ASSERT(flow_first_free > FLOW_IDX(flow)); - flow->f.type = FLOW_TYPE_NONE; + flow_end(flow); /* Put it back in a length 1 free cluster, don't attempt to fully * reverse flow_alloc()s steps. This will get folded together the next * time flow_defer_handler runs anyway() */ @@ -227,7 +299,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now) } if (closed) { - flow->f.type = FLOW_TYPE_NONE; + flow_end(flow); if (free_head) { /* Add slot to current free cluster */ diff --git a/flow.h b/flow.h index e9b3ce3e..8b66751b 100644 --- a/flow.h +++ b/flow.h @@ -45,6 +45,11 @@ struct flow_common { #define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */ #define FLOW_FILE_PRESSURE 30 /* % of c->nofile */ +union flow *flow_start(union flow *flow, enum flow_type type, + unsigned iniside); +#define FLOW_START(flow_, t_, var_, i_) \ + (&flow_start((flow_), (t_), (i_))->var_) + /** * struct flow_sidx - ID for one side of a specific flow * @side: Side referenced (0 or 1) diff --git a/tcp.c b/tcp.c index 3722dc09..e15b932f 100644 --- a/tcp.c +++ b/tcp.c @@ -1952,8 +1952,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, goto cancel; } - conn = &flow->tcp; - conn->f.type = FLOW_TCP; + conn = FLOW_START(flow, FLOW_TCP, tcp, TAPSIDE); conn->sock = s; conn->timer = -1; conn_event(c, conn, TAP_SYN_RCVD); @@ -2658,18 +2657,19 @@ static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr) * tcp_tap_conn_from_sock() - Initialize state for non-spliced connection * @c: Execution context * @ref: epoll reference of listening socket - * @conn: connection structure to initialize + * @flow: flow to initialise * @s: Accepted socket * @sa: Peer socket address (from accept()) * @now: Current timestamp */ static void tcp_tap_conn_from_sock(struct ctx *c, union tcp_listen_epoll_ref ref, - struct tcp_tap_conn *conn, int s, + union flow *flow, int s, const union sockaddr_inany *sa, const struct timespec *now) { - conn->f.type = FLOW_TCP; + struct tcp_tap_conn *conn = FLOW_START(flow, FLOW_TCP, tcp, SOCKSIDE); + conn->sock = s; conn->timer = -1; conn->ws_to_tap = conn->ws_from_tap = 0; @@ -2715,11 +2715,10 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref, goto cancel; if (c->mode == MODE_PASTA && - tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice, - s, &sa)) + tcp_splice_conn_from_sock(c, ref.tcp_listen, flow, s, &sa)) return; - tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s, &sa, now); + tcp_tap_conn_from_sock(c, ref.tcp_listen, flow, s, &sa, now); return; cancel: diff --git a/tcp_splice.c b/tcp_splice.c index 180a9ea7..576fe9be 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -424,7 +424,7 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn, * tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection * @c: Execution context * @ref: epoll reference of listening socket - * @conn: connection structure to initialize + * @flow: flow to initialise * @s: Accepted socket * @sa: Peer address of connection * @@ -432,10 +432,10 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn, * #syscalls:pasta setsockopt */ bool tcp_splice_conn_from_sock(const struct ctx *c, - union tcp_listen_epoll_ref ref, - struct tcp_splice_conn *conn, int s, - const union sockaddr_inany *sa) + union tcp_listen_epoll_ref ref, union flow *flow, + int s, const union sockaddr_inany *sa) { + struct tcp_splice_conn *conn; union inany_addr aany; in_port_t port; @@ -445,7 +445,8 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, if (!inany_is_loopback(&aany)) return false; - conn->f.type = FLOW_TCP_SPLICE; + conn = FLOW_START(flow, FLOW_TCP_SPLICE, tcp_splice, 0); + conn->flags = inany_v4(&aany) ? 0 : SPLICE_V6; conn->s[0] = s; conn->s[1] = -1; diff --git a/tcp_splice.h b/tcp_splice.h index 20f41b39..5a471af0 100644 --- a/tcp_splice.h +++ b/tcp_splice.h @@ -12,9 +12,8 @@ union sockaddr_inany; void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events); bool tcp_splice_conn_from_sock(const struct ctx *c, - union tcp_listen_epoll_ref ref, - struct tcp_splice_conn *conn, int s, - const union sockaddr_inany *sa); + union tcp_listen_epoll_ref ref, union flow *flow, + int s, const union sockaddr_inany *sa); void tcp_splice_init(struct ctx *c); #endif /* TCP_SPLICE_H */ -- 2.43.0