Currently in tap.c we (mostly) use ETH_MAX_MTU as the maximum length of an L2 frame. This define comes from the kernel, but it's badly named and used confusingly. First, it doesn't really have anything to do with Ethernet, which has no structural limit on frame lengths. It comes more from either a) IP which imposes a 64k datagram limit or b) from internal buffers used in various places in the kernel (and in passt). Worse, MTU generally means the maximum size of the IP (L3) datagram which may be transferred, _not_ counting the L2 headers. In the kernel ETH_MAX_MTU is sometimes used that way, but sometimes seems to be used as a maximum frame length, _including_ L2 headers. In tap.c we're mostly using it in the second way. Finally, each of our tap backends could have different limits on the frame size imposed by the mechanisms they're using. Start clearing up this confusion by replacing it in tap.c with new L2_MAX_LEN_* defines which specifically refer to the maximum L2 frame length for each backend. Signed-off-by: David Gibson <dgibson(a)redhat.com> --- tap.c | 18 ++++++++++++++---- tap.h | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/tap.c b/tap.c index fb306e75..4840dcfa 100644 --- a/tap.c +++ b/tap.c @@ -62,6 +62,15 @@ #include "vhost_user.h" #include "vu_common.h" +/* Maximum allowed frame lengths (including L2 header) */ + +static_assert(L2_MAX_LEN_PASTA <= PACKET_MAX_LEN, + "packet pool can't store maximum size pasta frame"); +static_assert(L2_MAX_LEN_PASST <= PACKET_MAX_LEN, + "packet pool can't store maximum size qemu socket frame"); +static_assert(L2_MAX_LEN_VU <= PACKET_MAX_LEN, + "packet pool can't store maximum size vhost-user frame"); + /* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */ static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf); static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf); @@ -1097,7 +1106,8 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now) while (n >= (ssize_t)sizeof(uint32_t)) { uint32_t l2len = ntohl_unaligned(p); - if (l2len < sizeof(struct ethhdr) || l2len > ETH_MAX_MTU) { + if (l2len < sizeof(struct ethhdr) || + l2len > L2_MAX_LEN_PASST) { err("Bad frame size from guest, resetting connection"); tap_sock_reset(c); return; @@ -1151,8 +1161,8 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now) tap_flush_pools(); - for (n = 0; n <= (ssize_t)(sizeof(pkt_buf) - ETH_MAX_MTU); n += len) { - len = read(c->fd_tap, pkt_buf + n, ETH_MAX_MTU); + for (n = 0; n <= (ssize_t)(sizeof(pkt_buf) - L2_MAX_LEN_PASTA); n += len) { + len = read(c->fd_tap, pkt_buf + n, L2_MAX_LEN_PASTA); if (len == 0) { die("EOF on tap device, exiting"); @@ -1170,7 +1180,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now) /* Ignore frames of bad length */ if (len < (ssize_t)sizeof(struct ethhdr) || - len > (ssize_t)ETH_MAX_MTU) + len > (ssize_t)L2_MAX_LEN_PASTA) continue; tap_add_packet(c, len, pkt_buf + n); diff --git a/tap.h b/tap.h index a2c3b87d..140e3305 100644 --- a/tap.h +++ b/tap.h @@ -6,6 +6,31 @@ #ifndef TAP_H #define TAP_H +/** L2_MAX_LEN_PASTA - Maximum frame length for pasta mode (with L2 header) + * + * The kernel tuntap device imposes a maximum frame size of 65535 including + * 'hard_header_len' (14 bytes for L2 Ethernet in the case of "tap" mode). + */ +#define L2_MAX_LEN_PASTA USHRT_MAX + +/** L2_MAX_LEN_PASST - Maximum frame length for passt mode (with L2 header) + * + * The only structural limit the Qemu socket protocol imposes on frames is + * (2^32-1) bytes, but that would be ludicrously long in practice. For now, + * limit it somewhat arbitrarily to 65535 bytes. FIXME: Work out an appropriate + * limit with more precision. + */ +#define L2_MAX_LEN_PASST USHRT_MAX + +/** L2_MAX_LEN_VU - Maximum frame length for vhost-user mode (with L2 header) + * + * VU allows multiple buffers per frame, each of which can be quite large, so + * the inherent frame size limit is rather large. Much larger than is actually + * useful for IP. For now limit arbitrarily to 65535 bytes. FIXME: Work out an + * appropriate limit with more precision. + */ +#define L2_MAX_LEN_VU USHRT_MAX + struct udphdr; /** -- 2.48.1