We partially prepopulate IP and TCP header structures including, amongst other things the destination address, which for IPv4 is always the known address of the guest/namespace. We partially precompute both the IPv4 header checksum and the TCP checksum based on this. In future we're going to want more flexibility with controlling the destination for IPv4 (as we already do for IPv6), so this precomputed value gets in the way. Therefore remove the IPv4 destination from the precomputed checksum and fold it into the checksum update when we actually send a packet. Doing this means we no longer need to recompute those partial sums when the destination address changes ({tcp,udp}_update_l2_buf()) and instead the computation can be moved to compile time. This means while we perform slightly more computations on each packet, we slightly reduce the amount of memory we need to access. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tcp.c | 61 ++++++++++++++++++++-------------------------------------- udp.c | 14 +++----------- util.h | 4 +++- 3 files changed, 27 insertions(+), 52 deletions(-) diff --git a/tcp.c b/tcp.c index 56634c9..c52ea2b 100644 --- a/tcp.c +++ b/tcp.c @@ -323,10 +323,8 @@ #define MSS_DEFAULT 536 struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */ - uint32_t psum; - uint32_t tsum; #ifdef __AVX2__ - uint8_t pad[18]; + uint8_t pad[26]; #else uint8_t pad[2]; #endif @@ -443,8 +441,6 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE]; /** * tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections - * @psum: Partial IP header checksum (excluding tot_len and saddr) - * @tsum: Partial TCP header checksum (excluding length and saddr) * @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only * @taph: Tap-level headers (partially pre-filled) * @iph: Pre-filled IP header (except for tot_len and saddr) @@ -452,17 +448,15 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE]; * @data: Storage for TCP payload */ static struct tcp4_l2_buf_t { - uint32_t psum; /* 0 */ - uint32_t tsum; /* 4 */ #ifdef __AVX2__ - uint8_t pad[18]; /* 8, align th to 32 bytes */ + uint8_t pad[26]; /* 0, align th to 32 bytes */ #else - uint8_t pad[2]; /* align iph to 4 bytes 8 */ + uint8_t pad[2]; /* align iph to 4 bytes 0 */ #endif - struct tap_hdr taph; /* 26 10 */ - struct iphdr iph; /* 44 28 */ - struct tcphdr th; /* 64 48 */ - uint8_t data[MSS4]; /* 84 68 */ + struct tap_hdr taph; /* 26 2 */ + struct iphdr iph; /* 44 20 */ + struct tcphdr th; /* 64 40 */ + uint8_t data[MSS4]; /* 84 60 */ /* 65536 65532 */ #ifdef __AVX2__ } __attribute__ ((packed, aligned(32))) @@ -517,8 +511,6 @@ static struct iovec tcp_iov [UIO_MAXIOV]; /** * tcp4_l2_flags_buf_t - IPv4 packet buffers for segments without data (flags) - * @psum: Partial IP header checksum (excluding tot_len and saddr) - * @tsum: Partial TCP header checksum (excluding length and saddr) * @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only * @taph: Tap-level headers (partially pre-filled) * @iph: Pre-filled IP header (except for tot_len and saddr) @@ -526,16 +518,14 @@ static struct iovec tcp_iov [UIO_MAXIOV]; * @opts: Headroom for TCP options */ static struct tcp4_l2_flags_buf_t { - uint32_t psum; /* 0 */ - uint32_t tsum; /* 4 */ #ifdef __AVX2__ - uint8_t pad[18]; /* 8, align th to 32 bytes */ + uint8_t pad[26]; /* 0, align th to 32 bytes */ #else - uint8_t pad[2]; /* align iph to 4 bytes 8 */ + uint8_t pad[2]; /* align iph to 4 bytes 0 */ #endif - struct tap_hdr taph; /* 26 10 */ - struct iphdr iph; /* 44 28 */ - struct tcphdr th; /* 64 48 */ + struct tap_hdr taph; /* 26 2 */ + struct iphdr iph; /* 44 20 */ + struct tcphdr th; /* 64 40 */ char opts[OPT_MSS_LEN + OPT_WS_LEN + 1]; #ifdef __AVX2__ } __attribute__ ((packed, aligned(32))) @@ -953,11 +943,13 @@ void tcp_sock_set_bufsize(const struct ctx *c, int s) */ static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf) { - uint32_t sum = buf->psum; + uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_TCP); sum += buf->iph.tot_len; sum += (buf->iph.saddr >> 16) & 0xffff; sum += buf->iph.saddr & 0xffff; + sum += (buf->iph.daddr >> 16) & 0xffff; + sum += buf->iph.daddr & 0xffff; buf->iph.check = (uint16_t)~csum_fold(sum); } @@ -969,10 +961,12 @@ static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf) static void tcp_update_check_tcp4(struct tcp4_l2_buf_t *buf) { uint16_t tlen = ntohs(buf->iph.tot_len) - 20; - uint32_t sum = buf->tsum; + uint32_t sum = htons(IPPROTO_TCP); sum += (buf->iph.saddr >> 16) & 0xffff; sum += buf->iph.saddr & 0xffff; + sum += (buf->iph.daddr >> 16) & 0xffff; + sum += buf->iph.daddr & 0xffff; sum += htons(ntohs(buf->iph.tot_len) - 20); buf->th.check = 0; @@ -1023,20 +1017,6 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s, if (ip_da) { b4f->iph.daddr = b4->iph.daddr = ip_da->s_addr; - if (!i) { - b4f->iph.saddr = b4->iph.saddr = 0; - b4f->iph.tot_len = b4->iph.tot_len = 0; - b4f->iph.check = b4->iph.check = 0; - b4f->psum = b4->psum = sum_16b(&b4->iph, 20); - - b4->tsum = ((ip_da->s_addr >> 16) & 0xffff) + - (ip_da->s_addr & 0xffff) + - htons(IPPROTO_TCP); - b4f->tsum = b4->tsum; - } else { - b4f->psum = b4->psum = tcp4_l2_buf[0].psum; - b4f->tsum = b4->tsum = tcp4_l2_buf[0].tsum; - } } } } @@ -1045,15 +1025,16 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s, * tcp_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets * @c: Execution context */ -static void tcp_sock4_iov_init(const struct ctx *c) +static void tcp_sock4_iov_init(struct ctx *c) { + struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP); struct iovec *iov; int i; for (i = 0; i < ARRAY_SIZE(tcp4_l2_buf); i++) { tcp4_l2_buf[i] = (struct tcp4_l2_buf_t) { .taph = TAP_HDR_INIT(ETH_P_IP), - .iph = L2_BUF_IP4_INIT(IPPROTO_TCP), + .iph = iph, .th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 } }; } diff --git a/udp.c b/udp.c index 21c6888..6bb515a 100644 --- a/udp.c +++ b/udp.c @@ -168,7 +168,6 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8) /** * udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections * @s_in: Source socket address, filled in by recvmmsg() - * @psum: Partial IP header checksum (excluding tot_len and saddr) * @taph: Tap-level headers (partially pre-filled) * @iph: Pre-filled IP header (except for tot_len and saddr) * @uh: Headroom for UDP header @@ -176,7 +175,6 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8) */ static struct udp4_l2_buf_t { struct sockaddr_in s_in; - uint32_t psum; struct tap_hdr taph; struct iphdr iph; @@ -263,11 +261,13 @@ static void udp_invert_portmap(struct udp_port_fwd *fwd) */ static void udp_update_check4(struct udp4_l2_buf_t *buf) { - uint32_t sum = buf->psum; + uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_UDP); sum += buf->iph.tot_len; sum += (buf->iph.saddr >> 16) & 0xffff; sum += buf->iph.saddr & 0xffff; + sum += (buf->iph.daddr >> 16) & 0xffff; + sum += buf->iph.daddr & 0xffff; buf->iph.check = (uint16_t)~csum_fold(sum); } @@ -292,14 +292,6 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s, if (ip_da) { b4->iph.daddr = ip_da->s_addr; - if (!i) { - b4->iph.saddr = 0; - b4->iph.tot_len = 0; - b4->iph.check = 0; - b4->psum = sum_16b(&b4->iph, 20); - } else { - b4->psum = udp4_l2_buf[0].psum; - } } } } diff --git a/util.h b/util.h index 23dcad5..e4db33a 100644 --- a/util.h +++ b/util.h @@ -141,11 +141,13 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags, .tot_len = 0, \ .id = 0, \ .frag_off = 0, \ - .ttl = 255, \ + .ttl = 0xff, \ .protocol = (proto), \ .saddr = 0, \ .daddr = 0, \ } +#define L2_BUF_IP4_PSUM(proto) ((uint32_t)htons_constant(0x4500) + \ + (uint32_t)htons_constant(0xff00 | (proto))) #define L2_BUF_IP6_INIT(proto) \ { \ -- 2.41.0