In the title: s/subsciption/subscription/
On Thu, 2 Oct 2025 20:34:04 -0400
Jon Maloy
The solution to bug https://bugs.passt.top/show_bug.cgi?id=120 requires the ability to translate from an IP address to its corresponding MAC address in cases where those are present in the ARP or NDP tables.
To keep track of the contents of these tables we add a netlink based neighbour subscription feature.
Signed-off-by: Jon Maloy
Reviewed-by: David Gibson --- v3: - Added an attribute contianing NDA_DST to sent message, so that we let the kernel do the filtering of the IP address and return only one entry. - Added interface index to the call signature. Since the only interface we know is the template interface, this limits the number of hosts that will be seen as 'network segment local' from a PASST viewpoint. v4: - Made loop independent of attribute order. - Ignoring L2 addresses which are not of size ETH_ALEN. v5: - Changed return value of new function, so caller can know if a MAC address really was found. v6: - Removed warning printout which had ended up in the wrong commit. v8: - Changed to neighbour event subscription model - netlink: arp/ndp table subscription v10:- Updated according to David's latest comments on v8 - Added functionaly where we initially read current state of ARP/NDP tables v12:- Updates based on feedback from David and Stefano --- epoll_type.h | 2 + netlink.c | 204 ++++++++++++++++++++++++++++++++++++++++++++++++++- netlink.h | 4 + passt.c | 7 ++ 4 files changed, 214 insertions(+), 3 deletions(-)
diff --git a/epoll_type.h b/epoll_type.h index 12ac59b..a90ffb6 100644 --- a/epoll_type.h +++ b/epoll_type.h @@ -44,6 +44,8 @@ enum epoll_type { EPOLL_TYPE_REPAIR_LISTEN, /* TCP_REPAIR helper socket */ EPOLL_TYPE_REPAIR, + /* Netlink neighbour subscription socket */ + EPOLL_TYPE_NL_NEIGH,
EPOLL_NUM_TYPES, }; diff --git a/netlink.c b/netlink.c index c436780..3fe2fdd 100644 --- a/netlink.c +++ b/netlink.c @@ -26,6 +26,7 @@ #include
#include #include +#include #include
#include @@ -40,6 +41,10 @@ #define RTNH_NEXT_AND_DEC(rtnh, attrlen) \ ((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh)) +/* Convenience macro borrowed from kernel */ +#define NUD_VALID \ + (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE) + /* Netlink expects a buffer of at least 8kiB or the system page size, * whichever is larger. 32kiB is recommended for more efficient. * Since the largest page size on any remotely common Linux setup is @@ -50,9 +55,10 @@ #define NLBUFSIZ 65536
/* Socket in init, in target namespace, sequence (just needs to be monotonic) */ -int nl_sock = -1; -int nl_sock_ns = -1; -static int nl_seq = 1; +int nl_sock = -1; +int nl_sock_ns = -1; +static int nl_sock_neigh = -1; +static int nl_seq = 1;
/** * nl_sock_init_do() - Set up netlink sockets in init or target namespace @@ -1103,3 +1109,195 @@ int nl_link_set_flags(int s, unsigned int ifi,
return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req)); } + +/** + * nl_neigh_msg_read() - Interpret a neigbour state message from netlink + * @c: Execution context + * @nh: Message to be read + *
Nit: excess newline.
+ */ +static void nl_neigh_msg_read(const struct ctx *c, struct nlmsghdr *nh) +{ + struct ndmsg *ndm = NLMSG_DATA(nh); + struct rtattr *rta = (struct rtattr *)(ndm + 1); + size_t na = NLMSG_PAYLOAD(nh, sizeof(*ndm)); + char ip_str[INET6_ADDRSTRLEN]; + char mac_str[ETH_ADDRSTRLEN]; + const uint8_t *lladdr = NULL; + const void *dst = NULL; + size_t lladdr_len = 0; + uint8_t mac[ETH_ALEN]; + union inany_addr addr; + size_t dstlen = 0; + + if (nh->nlmsg_type == NLMSG_DONE) + return; + + if (nh->nlmsg_type == NLMSG_ERROR) { + warn_perror("nlmsg_type error at msg read"); + return; + } + + if (nh->nlmsg_type != RTM_NEWNEIGH && + nh->nlmsg_type != RTM_DELNEIGH) + return; + + for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) { + switch (rta->rta_type) { + case NDA_DST: + dst = RTA_DATA(rta); + dstlen = RTA_PAYLOAD(rta); + break; + case NDA_LLADDR: + lladdr = RTA_DATA(rta); + lladdr_len = RTA_PAYLOAD(rta); + break; + default: + break; + } + } + + if (!dst) + return; + + if (!lladdr || lladdr_len != ETH_ALEN) + return; + + if (ndm->ndm_type != ARPHRD_ETHER) + return; + + memcpy(mac, lladdr, ETH_ALEN); + eth_ntop(mac, mac_str, sizeof(mac_str)); + + if (ndm->ndm_family == AF_INET && + ndm->ndm_ifindex != c->ifi4) + return; + + if (ndm->ndm_family == AF_INET6 && + ndm->ndm_ifindex != c->ifi6) + return; + + if (ndm->ndm_family != AF_INET && + ndm->ndm_family != AF_INET6) + return; + + if (ndm->ndm_family == AF_INET && + dstlen != sizeof(struct in_addr)) + return; + + if (ndm->ndm_family == AF_INET6 && + dstlen != sizeof(struct in6_addr)) + return; + + inany_from_af(&addr, ndm->ndm_family, dst); + inany_ntop(dst, ip_str, sizeof(ip_str)); + + if (nh->nlmsg_type == RTM_NEWNEIGH && ndm->ndm_state & NUD_VALID) + trace("neigh table update: %s / %s", ip_str, mac_str); + else + trace("neigh table delete: %s / %s", ip_str, mac_str); +} + +/** + * nl_neigh_sync() - Read current contents ARP/NDP tables + * @c: Execution context + * @proto: Protocol, AF_INET or AF_INET6 + * @ifi: Interface index + * + */ +static void nl_neigh_sync(const struct ctx *c, int proto, int ifi) +{ + struct { + struct nlmsghdr nlh; + struct ndmsg ndm; + } req = { + .nlh = {0}, + .ndm.ndm_family = proto, + .ndm.ndm_ifindex = ifi, + .ndm.ndm_state = 0, + .ndm.ndm_flags = 0, + .ndm.ndm_type = 0 + }; + struct nlmsghdr *nh; + char buf[NLBUFSIZ]; + ssize_t status; + uint32_t seq; + + seq = nl_send(nl_sock_neigh, &req, RTM_GETNEIGH, NLM_F_DUMP, sizeof(req)); + nl_foreach_oftype(nh, status, nl_sock_neigh, buf, seq, RTM_NEWNEIGH) + nl_neigh_msg_read(c, nh); + if (status < 0) + warn("Failed to read netlink message. status == %li", status);
That should be a human-readable message, so I'm not sure if the C notation for equality belongs there. In general we use the same format as perror() (you can't use warn_perror() here) that is, "Failed to read netlink message: %s", strerror_(status)); -- Stefano