We want to generalise "connection" tracking to things other than true TCP connections. Continue implenenting this by renaming the TCP connection table to the "flow table" and moving it to flow.c. The definitions are split between flow.h and flow_table.h - we need this separation to avoid circular dependencies: the definitions in flow.h will be needed by many headers using the flow mechanism, but flow_table.h needs all those protocol specific headers in order to define the full flow table entry. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- Makefile | 8 ++--- flow.c | 11 +++++++ flow.h | 8 +++++ flow_table.h | 25 +++++++++++++++ passt.h | 3 ++ tcp.c | 87 +++++++++++++++++++++++++--------------------------- tcp.h | 5 --- tcp_conn.h | 23 +++----------- tcp_splice.c | 19 ++++++------ 9 files changed, 107 insertions(+), 82 deletions(-) create mode 100644 flow_table.h diff --git a/Makefile b/Makefile index e2970e3..d14b029 100644 --- a/Makefile +++ b/Makefile @@ -52,10 +52,10 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS) MANPAGES = passt.1 pasta.1 qrap.1 -PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h icmp.h \ - inany.h isolation.h lineread.h log.h ndp.h netlink.h packet.h passt.h \ - pasta.h pcap.h pif.h port_fwd.h siphash.h tap.h tcp.h tcp_conn.h \ - tcp_splice.h udp.h util.h +PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h \ + flow_table.h icmp.h inany.h isolation.h lineread.h log.h ndp.h \ + netlink.h packet.h passt.h pasta.h pcap.h pif.h port_fwd.h siphash.h \ + tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h HEADERS = $(PASST_HEADERS) seccomp.h C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 }; diff --git a/flow.c b/flow.c index 7c3603c..a9bb8f5 100644 --- a/flow.c +++ b/flow.c @@ -6,11 +6,22 @@ */ #include <stdint.h> +#include <unistd.h> +#include <string.h> +#include "util.h" +#include "passt.h" +#include "siphash.h" +#include "inany.h" #include "flow.h" +#include "tcp_conn.h" +#include "flow_table.h" const char *flow_type_str[] = { [FLOW_TYPE_NONE] = "<none>", [FLOW_TCP] = "TCP connection", [FLOW_TCP_SPLICE] = "TCP connection (spliced)", }; + +/* Global Flow Table */ +union flow flowtab[FLOW_MAX]; diff --git a/flow.h b/flow.h index 88e6f0b..57289a8 100644 --- a/flow.h +++ b/flow.h @@ -33,4 +33,12 @@ struct flow_common { uint8_t type; }; +#define FLOW_INDEX_BITS 17 /* 128k - 1 */ +#define FLOW_MAX MAX_FROM_BITS(FLOW_INDEX_BITS) + +#define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */ +#define FLOW_FILE_PRESSURE 30 /* % of c->nofile */ + +union flow; + #endif /* FLOW_H */ diff --git a/flow_table.h b/flow_table.h new file mode 100644 index 0000000..c4c646b --- /dev/null +++ b/flow_table.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson <david(a)gibson.dropbear.id.au> + * + * Definitions for the global table of packet flows. + */ +#ifndef FLOW_TABLE_H +#define FLOW_TABLE_H + +/** + * union flow - Descriptor for a logical packet flow (e.g. connection) + * @f: Fields common between all variants + * @tcp: Fields for non-spliced TCP connections + * @tcp_splice: Fields for spliced TCP connections +*/ +union flow { + struct flow_common f; + struct tcp_tap_conn tcp; + struct tcp_splice_conn tcp_splice; +}; + +/* Global Flow Table */ +extern union flow flowtab[]; + +#endif /* FLOW_TABLE_H */ diff --git a/passt.h b/passt.h index cac720a..3f5dfb9 100644 --- a/passt.h +++ b/passt.h @@ -219,6 +219,7 @@ struct ip6_ctx { * @pasta_conf_ns: Configure namespace after creating it * @no_copy_routes: Don't copy all routes when configuring target namespace * @no_copy_addrs: Don't copy all addresses when configuring namespace + * @flow_count: Number of tracked packet flows (connections etc.) * @no_tcp: Disable TCP operation * @tcp: Context for TCP protocol handler * @no_tcp: Disable UDP operation @@ -277,6 +278,8 @@ struct ctx { int no_copy_routes; int no_copy_addrs; + unsigned flow_count; + int no_tcp; struct tcp_ctx tcp; int no_udp; diff --git a/tcp.c b/tcp.c index 1c25032..0119bd3 100644 --- a/tcp.c +++ b/tcp.c @@ -302,14 +302,14 @@ #include "flow.h" #include "tcp_conn.h" +#include "flow_table.h" #define TCP_FRAMES_MEM 128 #define TCP_FRAMES \ (c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1) #define TCP_HASH_TABLE_LOAD 70 /* % */ -#define TCP_HASH_TABLE_SIZE (TCP_MAX_CONNS * 100 / \ - TCP_HASH_TABLE_LOAD) +#define TCP_HASH_TABLE_SIZE (FLOW_MAX * 100 / TCP_HASH_TABLE_LOAD) #define MAX_WS 8 #define MAX_WINDOW (1 << (16 + (MAX_WS))) @@ -570,11 +570,8 @@ tcp6_l2_flags_buf[TCP_FRAMES_MEM]; static unsigned int tcp6_l2_flags_buf_used; -/* TCP connections */ -union tcp_conn tc[TCP_MAX_CONNS]; - -#define CONN(idx) (&tc[(idx)].tap) -#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc) +#define CONN(idx) (&flowtab[(idx)].tcp) +#define CONN_IDX(conn) ((union flow *)(conn) - flowtab) /** conn_at_idx() - Find a connection by index, if present * @idx: Index of connection to lookup @@ -583,7 +580,7 @@ union tcp_conn tc[TCP_MAX_CONNS]; */ static inline struct tcp_tap_conn *conn_at_idx(int idx) { - if ((idx < 0) || (idx >= TCP_MAX_CONNS)) + if ((idx < 0) || (idx >= FLOW_MAX)) return NULL; ASSERT(CONN(idx)->f.type == FLOW_TCP); return CONN(idx); @@ -1306,26 +1303,26 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c, * @c: Execution context * @hole: Pointer to recently closed connection */ -void tcp_table_compact(struct ctx *c, union tcp_conn *hole) +void tcp_table_compact(struct ctx *c, union flow *hole) { - union tcp_conn *from; + union flow *from; - if (CONN_IDX(hole) == --c->tcp.conn_count) { + if (CONN_IDX(hole) == --c->flow_count) { debug("TCP: table compaction: maximum index was %li (%p)", CONN_IDX(hole), (void *)hole); memset(hole, 0, sizeof(*hole)); return; } - from = tc + c->tcp.conn_count; + from = flowtab + c->flow_count; memcpy(hole, from, sizeof(*hole)); switch (from->f.type) { case FLOW_TCP: - tcp_tap_conn_update(c, &from->tap, &hole->tap); + tcp_tap_conn_update(c, &from->tcp, &hole->tcp); break; case FLOW_TCP_SPLICE: - tcp_splice_conn_update(c, &hole->splice); + tcp_splice_conn_update(c, &hole->tcp_splice); break; default: die("Unexpected %s in tcp_table_compact()", @@ -1343,18 +1340,18 @@ void tcp_table_compact(struct ctx *c, union tcp_conn *hole) /** * tcp_conn_destroy() - Close sockets, trigger hash table removal and compaction * @c: Execution context - * @conn_union: Connection pointer (container union) + * @flow: Flow table entry for this connection */ -static void tcp_conn_destroy(struct ctx *c, union tcp_conn *conn_union) +static void tcp_conn_destroy(struct ctx *c, union flow *flow) { - const struct tcp_tap_conn *conn = &conn_union->tap; + const struct tcp_tap_conn *conn = &flow->tcp; close(conn->sock); if (conn->timer != -1) close(conn->timer); tcp_hash_remove(c, conn); - tcp_table_compact(c, conn_union); + tcp_table_compact(c, flow); } static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn); @@ -1404,24 +1401,24 @@ static void tcp_l2_data_buf_flush(const struct ctx *c) */ void tcp_defer_handler(struct ctx *c) { - union tcp_conn *conn; + union flow *flow; tcp_l2_flags_buf_flush(c); tcp_l2_data_buf_flush(c); - for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) { - switch (conn->f.type) { + for (flow = flowtab + c->flow_count - 1; flow >= flowtab; flow--) { + switch (flow->f.type) { case FLOW_TCP: - if (conn->tap.events == CLOSED) - tcp_conn_destroy(c, conn); + if (flow->tcp.events == CLOSED) + tcp_conn_destroy(c, flow); break; case FLOW_TCP_SPLICE: - if (conn->splice.flags & CLOSING) - tcp_splice_destroy(c, conn); + if (flow->tcp_splice.flags & CLOSING) + tcp_splice_destroy(c, flow); break; default: die("Unexpected %s in tcp_defer_handler()", - FLOW_TYPE(&conn->f)); + FLOW_TYPE(&flow->f)); } } } @@ -2004,7 +2001,7 @@ static void tcp_conn_from_tap(struct ctx *c, (void)saddr; - if (c->tcp.conn_count >= TCP_MAX_CONNS) + if (c->flow_count >= FLOW_MAX) return; if ((s = tcp_conn_pool_sock(pool)) < 0) @@ -2030,7 +2027,7 @@ static void tcp_conn_from_tap(struct ctx *c, } } - conn = CONN(c->tcp.conn_count++); + conn = CONN(c->flow_count++); conn->f.type = FLOW_TCP; conn->sock = s; conn->timer = -1; @@ -2775,24 +2772,24 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref, { struct sockaddr_storage sa; socklen_t sl = sizeof(sa); - union tcp_conn *conn; + union flow *flow; int s; - if (c->no_tcp || c->tcp.conn_count >= TCP_MAX_CONNS) + if (c->no_tcp || c->flow_count >= FLOW_MAX) return; s = accept4(ref.fd, (struct sockaddr *)&sa, &sl, SOCK_NONBLOCK); if (s < 0) return; - conn = tc + c->tcp.conn_count++; + flow = flowtab + c->flow_count++; if (c->mode == MODE_PASTA && - tcp_splice_conn_from_sock(c, ref.tcp_listen, &conn->splice, + tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice, s, (struct sockaddr *)&sa)) return; - tcp_tap_conn_from_sock(c, ref.tcp_listen, &conn->tap, s, + tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s, (struct sockaddr *)&sa, now); } @@ -2921,18 +2918,18 @@ static void tcp_tap_sock_handler(struct ctx *c, struct tcp_tap_conn *conn, */ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events) { - union tcp_conn *conn = tc + ref.tcp.index; + union flow *flow = flowtab + ref.tcp.index; - switch (conn->f.type) { + switch (flow->f.type) { case FLOW_TCP: - tcp_tap_sock_handler(c, &conn->tap, events); + tcp_tap_sock_handler(c, &flow->tcp, events); break; case FLOW_TCP_SPLICE: - tcp_splice_sock_handler(c, &conn->splice, ref.fd, events); + tcp_splice_sock_handler(c, &flow->tcp_splice, ref.fd, events); break; default: die("Unexpected %s in tcp_sock_handler_compact()", - FLOW_TYPE(&conn->f)); + FLOW_TYPE(&flow->f)); } } @@ -3248,7 +3245,7 @@ static int tcp_port_rebind_outbound(void *arg) */ void tcp_timer(struct ctx *c, const struct timespec *ts) { - union tcp_conn *conn; + union flow *flow; (void)ts; @@ -3264,18 +3261,18 @@ void tcp_timer(struct ctx *c, const struct timespec *ts) } } - for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) { - switch (conn->f.type) { + for (flow = flowtab + c->flow_count - 1; flow >= flowtab; flow--) { + switch (flow->f.type) { case FLOW_TCP: - if (conn->tap.events == CLOSED) - tcp_conn_destroy(c, conn); + if (flow->tcp.events == CLOSED) + tcp_conn_destroy(c, flow); break; case FLOW_TCP_SPLICE: - tcp_splice_timer(c, conn); + tcp_splice_timer(c, flow); break; default: die("Unexpected %s in tcp_timer()", - FLOW_TYPE(&conn->f)); + FLOW_TYPE(&flow->f)); } } diff --git a/tcp.h b/tcp.h index 06965d8..c8b738d 100644 --- a/tcp.h +++ b/tcp.h @@ -8,9 +8,6 @@ #define TCP_TIMER_INTERVAL 1000 /* ms */ -#define TCP_CONN_INDEX_BITS 17 /* 128k - 1 */ -#define TCP_MAX_CONNS MAX_FROM_BITS(TCP_CONN_INDEX_BITS) - struct ctx; void tcp_timer_handler(struct ctx *c, union epoll_ref ref); @@ -56,7 +53,6 @@ union tcp_listen_epoll_ref { /** * struct tcp_ctx - Execution context for TCP routines * @hash_secret: 128-bit secret for hash functions, ISN and hash table - * @conn_count: Count of total connections in connection table * @port_to_tap: Ports bound host-side, packets to tap or spliced * @fwd_in: Port forwarding configuration for inbound packets * @fwd_out: Port forwarding configuration for outbound packets @@ -66,7 +62,6 @@ union tcp_listen_epoll_ref { */ struct tcp_ctx { uint64_t hash_secret[2]; - int conn_count; struct port_fwd fwd_in; struct port_fwd fwd_out; struct timespec timer_run; diff --git a/tcp_conn.h b/tcp_conn.h index 136f8da..5a107fc 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -40,7 +40,7 @@ struct tcp_tap_conn { struct flow_common f; bool in_epoll :1; - int next_index :TCP_CONN_INDEX_BITS + 2; + int next_index :FLOW_INDEX_BITS + 2; #define TCP_RETRANS_BITS 3 unsigned int retrans :TCP_RETRANS_BITS; @@ -151,21 +151,6 @@ struct tcp_splice_conn { uint32_t written[SIDES]; }; -/** - * union tcp_conn - Descriptor for a TCP connection (spliced or non-spliced) - * @c: Fields common between all variants - * @tap: Fields specific to non-spliced connections - * @splice: Fields specific to spliced connections -*/ -union tcp_conn { - struct flow_common f; - struct tcp_tap_conn tap; - struct tcp_splice_conn splice; -}; - -/* TCP connections */ -extern union tcp_conn tc[]; - /* Socket pools */ #define TCP_SOCK_POOL_SIZE 32 @@ -173,9 +158,9 @@ extern int init_sock_pool4 [TCP_SOCK_POOL_SIZE]; extern int init_sock_pool6 [TCP_SOCK_POOL_SIZE]; void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new); -void tcp_table_compact(struct ctx *c, union tcp_conn *hole); -void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union); -void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union); +void tcp_table_compact(struct ctx *c, union flow *hole); +void tcp_splice_destroy(struct ctx *c, union flow *flow); +void tcp_splice_timer(struct ctx *c, union flow *flow); int tcp_conn_pool_sock(int pool[]); int tcp_conn_new_sock(const struct ctx *c, sa_family_t af); void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af); diff --git a/tcp_splice.c b/tcp_splice.c index bfd2bd1..9f4831a 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -57,6 +57,7 @@ #include "flow.h" #include "tcp_conn.h" +#include "flow_table.h" #define MAX_PIPE_SIZE (8UL * 1024 * 1024) #define TCP_SPLICE_PIPE_POOL_SIZE 32 @@ -76,7 +77,7 @@ static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2]; #define CONN_V4(x) (!CONN_V6(x)) #define CONN_HAS(conn, set) ((conn->events & (set)) == (set)) #define CONN(idx) (&tc[(idx)].splice) -#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc) +#define CONN_IDX(conn) ((union flow *)(conn) - flowtab) /* Display strings for connection events */ static const char *tcp_splice_event_str[] __attribute((__unused__)) = { @@ -254,11 +255,11 @@ void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new) /** * tcp_splice_destroy() - Close spliced connection and pipes, clear * @c: Execution context - * @conn_union: Spliced connection (container union) + * @flow: Flow table entry */ -void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union) +void tcp_splice_destroy(struct ctx *c, union flow *flow) { - struct tcp_splice_conn *conn = &conn_union->splice; + struct tcp_splice_conn *conn = &flow->tcp_splice; int side; for (side = 0; side < SIDES; side++) { @@ -283,7 +284,7 @@ void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union) conn->flags = 0; debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn)); - tcp_table_compact(c, conn_union); + tcp_table_compact(c, flow); } /** @@ -775,15 +776,15 @@ void tcp_splice_init(struct ctx *c) /** * tcp_splice_timer() - Timer for spliced connections * @c: Execution context - * @conn_union: Spliced connection (container union) + * @flow: Flow table entry */ -void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union) +void tcp_splice_timer(struct ctx *c, union flow *flow) { - struct tcp_splice_conn *conn = &conn_union->splice; + struct tcp_splice_conn *conn = &flow->tcp_splice; int side; if (conn->flags & CLOSING) { - tcp_splice_destroy(c, conn_union); + tcp_splice_destroy(c, flow); return; } -- 2.42.0