While running piHole using podman, traffic can trigger the following assert: ASSSERTION FAILED in flow_alloc (flow.c:521): flow->f.state == FLOW_STATE_FREE Backtrace shows that this happens in flow_defer_handler(): #4 0x00005610d6f5b481 flow_alloc (passt + 0xb481) #5 0x00005610d6f74f86 udp_flow_from_sock (passt + 0x24f86) #6 0x00005610d6f737c3 udp_sock_fwd (passt + 0x237c3) #7 0x00005610d6f74c07 udp_flush_flow (passt + 0x24c07) #8 0x00005610d6f752c2 udp_flow_defer (passt + 0x252c2) #9 0x00005610d6f5bce1 flow_defer_handler (passt + 0xbce1) We are trying to allocate a new flow inside the loop freeing them. Inside the loop free_head points to the first free flow entry in the current cluster. But if we allocate a new entry during the loop, free_head is not updated and can point now to the entry we have just allocated. We can fix the problem by spliting the loop in two parts: - first part where we can close some of them and allocate some new flow entries, - second part where we free the entries closed in the previous loop and we aggregate the free entries to merge consecutive the clusters. Link: https://github.com/containers/podman/issues/25959 Signed-off-by: Laurent Vivier <lvivier(a)redhat.com> --- flow.c | 107 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 50 deletions(-) diff --git a/flow.c b/flow.c index 3c81cb42f921..00c1b2cc316f 100644 --- a/flow.c +++ b/flow.c @@ -788,6 +788,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now) { struct flow_free_cluster *free_head = NULL; unsigned *last_next = &flow_first_free; + bool to_free[FLOW_MAX] = { 0 }; bool timer = false; union flow *flow; @@ -798,9 +799,44 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now) ASSERT(!flow_new_entry); /* Incomplete flow at end of cycle */ - flow_foreach_slot(flow) { + /* Check which flows we might need to close first, but don't free them + * yet as it's not safe to do that in the middle of flow_foreach(). + */ + flow_foreach(flow) { bool closed = false; + switch (flow->f.type) { + case FLOW_TYPE_NONE: + ASSERT(false); + break; + case FLOW_TCP: + closed = tcp_flow_defer(&flow->tcp); + break; + case FLOW_TCP_SPLICE: + closed = tcp_splice_flow_defer(&flow->tcp_splice); + if (!closed && timer) + tcp_splice_timer(c, &flow->tcp_splice); + break; + case FLOW_PING4: + case FLOW_PING6: + if (timer) + closed = icmp_ping_timer(c, &flow->ping, now); + break; + case FLOW_UDP: + closed = udp_flow_defer(c, &flow->udp, now); + if (!closed && timer) + closed = udp_flow_timer(c, &flow->udp, now); + break; + default: + /* Assume other flow types don't need any handling */ + ; + } + + to_free[FLOW_IDX(flow)] = closed; + } + + /* Second step: actually free the flows */ + flow_foreach_slot(flow) { switch (flow->f.state) { case FLOW_STATE_FREE: { unsigned skip = flow->free.n; @@ -833,60 +869,31 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now) break; case FLOW_STATE_ACTIVE: - /* Nothing to do */ + if (to_free[FLOW_IDX(flow)]) { + flow_set_state(&flow->f, FLOW_STATE_FREE); + memset(flow, 0, sizeof(*flow)); + + if (free_head) { + /* Add slot to current free cluster */ + ASSERT(FLOW_IDX(flow) == + FLOW_IDX(free_head) + free_head->n); + free_head->n++; + flow->free.n = flow->free.next = 0; + } else { + /* Create new free cluster */ + free_head = &flow->free; + free_head->n = 1; + *last_next = FLOW_IDX(flow); + last_next = &free_head->next; + } + } else { + free_head = NULL; + } break; default: ASSERT(false); } - - switch (flow->f.type) { - case FLOW_TYPE_NONE: - ASSERT(false); - break; - case FLOW_TCP: - closed = tcp_flow_defer(&flow->tcp); - break; - case FLOW_TCP_SPLICE: - closed = tcp_splice_flow_defer(&flow->tcp_splice); - if (!closed && timer) - tcp_splice_timer(c, &flow->tcp_splice); - break; - case FLOW_PING4: - case FLOW_PING6: - if (timer) - closed = icmp_ping_timer(c, &flow->ping, now); - break; - case FLOW_UDP: - closed = udp_flow_defer(c, &flow->udp, now); - if (!closed && timer) - closed = udp_flow_timer(c, &flow->udp, now); - break; - default: - /* Assume other flow types don't need any handling */ - ; - } - - if (closed) { - flow_set_state(&flow->f, FLOW_STATE_FREE); - memset(flow, 0, sizeof(*flow)); - - if (free_head) { - /* Add slot to current free cluster */ - ASSERT(FLOW_IDX(flow) == - FLOW_IDX(free_head) + free_head->n); - free_head->n++; - flow->free.n = flow->free.next = 0; - } else { - /* Create new free cluster */ - free_head = &flow->free; - free_head->n = 1; - *last_next = FLOW_IDX(flow); - last_next = &free_head->next; - } - } else { - free_head = NULL; - } } *last_next = FLOW_MAX; -- 2.49.0