Frequent DNS queries over UDP from a container or guest can result
in many sockets shown in ss(8), typically one per flow. This is
expected and harmless, but it can make the output of ss(8) look
noisy and potentially concern users.
This patch splits UDP flow timeouts into two, mirroring the Linux
kernel, and sources the values from kernel parameters. The shorter
timeout is applied to unidirectional flows and minimal bidirectional
exchanges (single datagram and reply), while the longer timeout is
used for bidirectional flows with multiple datagrams on either side.
Link: https://bugs.passt.top/show_bug.cgi?id=197
Suggested-by: Stefano Brivio
I haven't tested this yet, just two exceedingly minor nits (I can fix
it up on merge if you're fine with it and if no other changes are
needed):
On Thu, 12 Feb 2026 16:04:14 +0800
Yumei Huang
Frequent DNS queries over UDP from a container or guest can result in many sockets shown in ss(8), typically one per flow. This is expected and harmless, but it can make the output of ss(8) look noisy and potentially concern users.
This patch splits UDP flow timeouts into two, mirroring the Linux kernel, and sources the values from kernel parameters. The shorter timeout is applied to unidirectional flows and minimal bidirectional exchanges (single datagram and reply), while the longer timeout is used for bidirectional flows with multiple datagrams on either side.
Link: https://bugs.passt.top/show_bug.cgi?id=197 Suggested-by: Stefano Brivio
Signed-off-by: Yumei Huang --- udp.c | 33 ++++++++++++++++++++++++++++++++- udp.h | 4 ++++ udp_flow.c | 11 ++++++++--- udp_flow.h | 13 +++++++++++++ 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/udp.c b/udp.c index b2383e2..3afec35 100644 --- a/udp.c +++ b/udp.c @@ -26,7 +26,10 @@ * * We track pseudo-connections of this type as flow table entries of type * FLOW_UDP. We store the time of the last traffic on the flow in uflow->ts, - * and let the flow expire if there is no traffic for UDP_CONN_TIMEOUT seconds. + * and let the flow expire if there is no traffic for UDP_TIMEOUT seconds for + * unidirectional flows and flows with only one datagram and one reply, or + * UDP_TIMEOUT_STREAM seconds for bidirectional flows with more than one + * datagram on either side. * * NOTE: This won't handle multicast protocols, or some protocols with different * port usage. We'll need specific logic if we want to handle those. @@ -118,6 +121,13 @@
#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */
+#define UDP_TIMEOUT "/proc/sys/net/netfilter/nf_conntrack_udp_timeout" +#define UDP_TIMEOUT_STREAM \ + "/proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream" + +#define UDP_TIMEOUT_DEFAULT 30 /* s */ +#define UDP_TIMEOUT_STREAM_DEFAULT 120 /* s */ + /* Maximum UDP data to be returned in ICMP messages */ #define ICMP4_MAX_DLEN 8 #define ICMP6_MAX_DLEN (IPV6_MIN_MTU \ @@ -954,6 +964,7 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
flow_trace(uflow, "Received data on reply socket"); uflow->ts = now->tv_sec; + udp_flow_activity(uflow, !tosidx.sidei);
if (pif_is_socket(topif)) { udp_sock_to_sock(c, ref.fd, n, tosidx); @@ -1179,6 +1190,24 @@ static void udp_splice_iov_init(void) } }
+/** + * udp_get_timeout_params() - Get host kernel UDP timeout parameters + * @c: Execution context + */ +static void udp_get_timeout_params(struct ctx *c) +{ + intmax_t v; + + v = read_file_integer(UDP_TIMEOUT, UDP_TIMEOUT_DEFAULT); + c->udp.timeout = v; + + v = read_file_integer(UDP_TIMEOUT_STREAM, UDP_TIMEOUT_STREAM_DEFAULT); + c->udp.stream_timeout = v; + + debug("Using UDP timeout parameters, timeout: %d, stream_timeout: %d", + c->udp.timeout, c->udp.stream_timeout); +} + /** * udp_init() - Initialise per-socket data, and sockets in namespace * @c: Execution context @@ -1189,6 +1218,8 @@ int udp_init(struct ctx *c) { ASSERT(!c->no_udp);
+ udp_get_timeout_params(c); + udp_iov_init(c);
if (fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP) < 0) diff --git a/udp.h b/udp.h index 2b91d72..da9c2df 100644 --- a/udp.h +++ b/udp.h @@ -24,11 +24,15 @@ void udp_update_l2_buf(const unsigned char *eth_d); * @fwd_in: Port forwarding configuration for inbound packets * @fwd_out: Port forwarding configuration for outbound packets * @timer_run: Timestamp of most recent timer run + * @timeout: Timeout for unidirectional flows (in s) + * @stream_timeout: Timeout for stream-like flows (in s) */ struct udp_ctx { struct fwd_ports fwd_in; struct fwd_ports fwd_out; struct timespec timer_run; + int timeout; + int stream_timeout; };
#endif /* UDP_H */ diff --git a/udp_flow.c b/udp_flow.c index 1f5e84e..9b22586 100644 --- a/udp_flow.c +++ b/udp_flow.c @@ -17,8 +17,6 @@ #include "udp_internal.h" #include "epoll_ctl.h"
-#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ - /** * udp_at_sidx() - Get UDP specific flow at given sidx * @sidx: Flow and side to retrieve @@ -152,6 +150,7 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow, uflow->ts = now->tv_sec; uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1; uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0; + uflow->activity[INISIDE] = uflow->activity[TGTSIDE] = 0;
flow_foreach_sidei(sidei) { if (pif_is_socket(uflow->f.pif[sidei])) @@ -362,7 +361,13 @@ bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow, bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, const struct timespec *now) { - if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT) + int timeout = c->udp.timeout; + + if (uflow->activity[TGTSIDE] && + (uflow->activity[INISIDE] > 1 || uflow->activity[TGTSIDE] > 1)) + timeout = c->udp.stream_timeout; + + if (now->tv_sec - uflow->ts <= timeout) return false;
udp_flow_close(c, uflow); diff --git a/udp_flow.h b/udp_flow.h index 14e0f92..158a0f6 100644 --- a/udp_flow.h +++ b/udp_flow.h @@ -16,6 +16,7 @@ * @flush1: @s[1] may have datagrams queued for other flows * @ts: Activity timestamp * @s: Socket fd (or -1) for each side of the flow + * @activity: Activity for each side of the flow
It's not clear what the measurement unit is, here. I think we should also specify that it's packets *coming from* (right?), not going to, each side of the flow. Maybe: * @activity: Packets seen from each side of the flow, up to UINT8_MAX ...if I got it right?
*/ struct udp_flow { /* Must be first element */ @@ -29,8 +30,20 @@ struct udp_flow {
time_t ts; int s[SIDES]; + uint8_t activity[SIDES]; };
+/** + * udp_flow_activity() - Track activity of a udp flow
UDP
+ * @uflow: UDP flow + * @sidei: Side index of the flow
Maybe we can specify here: (INISIDE or TGTSIDE)
+ */ +static inline void udp_flow_activity(struct udp_flow *uflow, unsigned int sidei) +{ + if (uflow->activity[sidei] < UINT8_MAX) + uflow->activity[sidei]++; +} + struct udp_flow *udp_at_sidx(flow_sidx_t sidx); flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, const union inany_addr *dst, in_port_t port,
Everything else looks good to me! (But again, I haven't tested this yet). -- Stefano
On Thu, Feb 12, 2026 at 4:59 PM Stefano Brivio
I haven't tested this yet, just two exceedingly minor nits (I can fix it up on merge if you're fine with it and if no other changes are needed):
Sure, I'm fine with it.
On Thu, 12 Feb 2026 16:04:14 +0800 Yumei Huang
wrote: Frequent DNS queries over UDP from a container or guest can result in many sockets shown in ss(8), typically one per flow. This is expected and harmless, but it can make the output of ss(8) look noisy and potentially concern users.
This patch splits UDP flow timeouts into two, mirroring the Linux kernel, and sources the values from kernel parameters. The shorter timeout is applied to unidirectional flows and minimal bidirectional exchanges (single datagram and reply), while the longer timeout is used for bidirectional flows with multiple datagrams on either side.
Link: https://bugs.passt.top/show_bug.cgi?id=197 Suggested-by: Stefano Brivio
Signed-off-by: Yumei Huang --- udp.c | 33 ++++++++++++++++++++++++++++++++- udp.h | 4 ++++ udp_flow.c | 11 ++++++++--- udp_flow.h | 13 +++++++++++++ 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/udp.c b/udp.c index b2383e2..3afec35 100644 --- a/udp.c +++ b/udp.c @@ -26,7 +26,10 @@ * * We track pseudo-connections of this type as flow table entries of type * FLOW_UDP. We store the time of the last traffic on the flow in uflow->ts, - * and let the flow expire if there is no traffic for UDP_CONN_TIMEOUT seconds. + * and let the flow expire if there is no traffic for UDP_TIMEOUT seconds for + * unidirectional flows and flows with only one datagram and one reply, or + * UDP_TIMEOUT_STREAM seconds for bidirectional flows with more than one + * datagram on either side. * * NOTE: This won't handle multicast protocols, or some protocols with different * port usage. We'll need specific logic if we want to handle those. @@ -118,6 +121,13 @@
#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */
+#define UDP_TIMEOUT "/proc/sys/net/netfilter/nf_conntrack_udp_timeout" +#define UDP_TIMEOUT_STREAM \ + "/proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream" + +#define UDP_TIMEOUT_DEFAULT 30 /* s */ +#define UDP_TIMEOUT_STREAM_DEFAULT 120 /* s */ + /* Maximum UDP data to be returned in ICMP messages */ #define ICMP4_MAX_DLEN 8 #define ICMP6_MAX_DLEN (IPV6_MIN_MTU \ @@ -954,6 +964,7 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
flow_trace(uflow, "Received data on reply socket"); uflow->ts = now->tv_sec; + udp_flow_activity(uflow, !tosidx.sidei);
if (pif_is_socket(topif)) { udp_sock_to_sock(c, ref.fd, n, tosidx); @@ -1179,6 +1190,24 @@ static void udp_splice_iov_init(void) } }
+/** + * udp_get_timeout_params() - Get host kernel UDP timeout parameters + * @c: Execution context + */ +static void udp_get_timeout_params(struct ctx *c) +{ + intmax_t v; + + v = read_file_integer(UDP_TIMEOUT, UDP_TIMEOUT_DEFAULT); + c->udp.timeout = v; + + v = read_file_integer(UDP_TIMEOUT_STREAM, UDP_TIMEOUT_STREAM_DEFAULT); + c->udp.stream_timeout = v; + + debug("Using UDP timeout parameters, timeout: %d, stream_timeout: %d", + c->udp.timeout, c->udp.stream_timeout); +} + /** * udp_init() - Initialise per-socket data, and sockets in namespace * @c: Execution context @@ -1189,6 +1218,8 @@ int udp_init(struct ctx *c) { ASSERT(!c->no_udp);
+ udp_get_timeout_params(c); + udp_iov_init(c);
if (fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP) < 0) diff --git a/udp.h b/udp.h index 2b91d72..da9c2df 100644 --- a/udp.h +++ b/udp.h @@ -24,11 +24,15 @@ void udp_update_l2_buf(const unsigned char *eth_d); * @fwd_in: Port forwarding configuration for inbound packets * @fwd_out: Port forwarding configuration for outbound packets * @timer_run: Timestamp of most recent timer run + * @timeout: Timeout for unidirectional flows (in s) + * @stream_timeout: Timeout for stream-like flows (in s) */ struct udp_ctx { struct fwd_ports fwd_in; struct fwd_ports fwd_out; struct timespec timer_run; + int timeout; + int stream_timeout; };
#endif /* UDP_H */ diff --git a/udp_flow.c b/udp_flow.c index 1f5e84e..9b22586 100644 --- a/udp_flow.c +++ b/udp_flow.c @@ -17,8 +17,6 @@ #include "udp_internal.h" #include "epoll_ctl.h"
-#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ - /** * udp_at_sidx() - Get UDP specific flow at given sidx * @sidx: Flow and side to retrieve @@ -152,6 +150,7 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow, uflow->ts = now->tv_sec; uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1; uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0; + uflow->activity[INISIDE] = uflow->activity[TGTSIDE] = 0;
flow_foreach_sidei(sidei) { if (pif_is_socket(uflow->f.pif[sidei])) @@ -362,7 +361,13 @@ bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow, bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, const struct timespec *now) { - if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT) + int timeout = c->udp.timeout; + + if (uflow->activity[TGTSIDE] && + (uflow->activity[INISIDE] > 1 || uflow->activity[TGTSIDE] > 1)) + timeout = c->udp.stream_timeout; + + if (now->tv_sec - uflow->ts <= timeout) return false;
udp_flow_close(c, uflow); diff --git a/udp_flow.h b/udp_flow.h index 14e0f92..158a0f6 100644 --- a/udp_flow.h +++ b/udp_flow.h @@ -16,6 +16,7 @@ * @flush1: @s[1] may have datagrams queued for other flows * @ts: Activity timestamp * @s: Socket fd (or -1) for each side of the flow + * @activity: Activity for each side of the flow
It's not clear what the measurement unit is, here. I think we should also specify that it's packets *coming from* (right?), not going to, each side of the flow. Maybe:
* @activity: Packets seen from each side of the flow, up to UINT8_MAX
...if I got it right?
Yes, you got it right.
*/ struct udp_flow { /* Must be first element */ @@ -29,8 +30,20 @@ struct udp_flow {
time_t ts; int s[SIDES]; + uint8_t activity[SIDES]; };
+/** + * udp_flow_activity() - Track activity of a udp flow
UDP
+ * @uflow: UDP flow + * @sidei: Side index of the flow
Maybe we can specify here:
(INISIDE or TGTSIDE)
+ */ +static inline void udp_flow_activity(struct udp_flow *uflow, unsigned int sidei) +{ + if (uflow->activity[sidei] < UINT8_MAX) + uflow->activity[sidei]++; +} + struct udp_flow *udp_at_sidx(flow_sidx_t sidx); flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif, const union inany_addr *dst, in_port_t port,
Everything else looks good to me! (But again, I haven't tested this yet).
Thanks, just let me know if other changes are needed.
-- Stefano
-- Thanks, Yumei Huang
participants (2)
-
Stefano Brivio
-
Yumei Huang