On Wed, Jul 01, 2026 at 02:07:12AM +0200, Stefano Brivio wrote:
On Fri, 26 Jun 2026 17:09:54 +1000 David Gibson
wrote: As we add more complexity to what forwarding rules are allowed, our existing ad-hoc C parsing is starting to become quite awkward. We already have some parts that resemble a very simple recursive[0] descent parser, with composable subfunctions that consume as much input as they need, rather than pre-splitting based on known delimiters.
Start extending this approach, by creating parse.[ch] with parsing helpers with a uniform interface. Initially we start with very simple cases: the parse_keyword() function from fwd_rule.c and another helper to check that we've reached the end of input.
Rename parse_keyword() to parse_literal(), because it's not just useful for "keywords" as such. We can use it in a bunch of additional places for parsing delimiters and other symbols. This doesn't gain us a lot now but will make things clearer as we use more such parser helpers.
[0] Except that the grammars we have aren't actually recursive, so neither is the code.
Signed-off-by: David Gibson
--- Makefile | 17 ++++++++------ conf.c | 5 +++- fwd_rule.c | 33 +++++---------------------- parse.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ parse.h | 14 ++++++++++++ 5 files changed, 101 insertions(+), 35 deletions(-) create mode 100644 parse.c create mode 100644 parse.h diff --git a/Makefile b/Makefile index 5ed0f702..bd729c75 100644 --- a/Makefile +++ b/Makefile @@ -36,12 +36,13 @@ BASE_CFLAGS += -pedantic -Wall -Wextra -Wno-format-zero-length -Wformat-security PASST_SRCS = arch.c arp.c bitmap.c checksum.c conf.c dhcp.c dhcpv6.c \ epoll_ctl.c flow.c fwd.c fwd_rule.c icmp.c igmp.c inany.c iov.c ip.c \ isolation.c lineread.c log.c mld.c ndp.c netlink.c migrate.c packet.c \ - passt.c pasta.c pcap.c pif.c repair.c serialise.c tap.c tcp.c \ + parse.c passt.c pasta.c pcap.c pif.c repair.c serialise.c tap.c tcp.c \ tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c udp_vu.c util.c \ vhost_user.c virtio.c vu_common.c QRAP_SRCS = qrap.c PASST_REPAIR_SRCS = passt-repair.c -PESTO_SRCS = pesto.c bitmap.c fwd_rule.c inany.c ip.c lineread.c serialise.c +PESTO_SRCS = pesto.c bitmap.c fwd_rule.c inany.c ip.c lineread.c parse.c \ + serialise.c SRCS = $(PASST_SRCS) $(QRAP_SRCS) $(PASST_REPAIR_SRCS) $(PESTO_SRCS)
MANPAGES = passt.1 pasta.1 pesto.1 qrap.1 passt-repair.1 @@ -49,13 +50,14 @@ MANPAGES = passt.1 pasta.1 pesto.1 qrap.1 passt-repair.1 PASST_HEADERS = arch.h arp.h bitmap.h checksum.h conf.h dhcp.h dhcpv6.h \ epoll_ctl.h flow.h fwd.h fwd_rule.h flow_table.h icmp.h icmp_flow.h \ inany.h iov.h ip.h isolation.h lineread.h log.h migrate.h ndp.h \ - netlink.h packet.h passt.h pasta.h pcap.h pif.h repair.h serialise.h \ - siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \ - tcp_vu.h udp.h udp_flow.h udp_internal.h udp_vu.h util.h vhost_user.h \ - virtio.h vu_common.h + netlink.h packet.h parse.h passt.h pasta.h pcap.h pif.h repair.h \ + serialise.h siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h \ + tcp_splice.h tcp_vu.h udp.h udp_flow.h udp_internal.h udp_vu.h util.h \ + vhost_user.h virtio.h vu_common.h QRAP_HEADERS = arp.h ip.h passt.h util.h PASST_REPAIR_HEADERS = linux_dep.h -PESTO_HEADERS = bitmap.h common.h fwd_rule.h inany.h ip.h log.h pesto.h serialise.h +PESTO_HEADERS = bitmap.h common.h fwd_rule.h inany.h ip.h log.h parse.h \ + pesto.h serialise.h
C := \#include
\nint main(){int a=getrandom(0, 0, 0);} ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0) @@ -223,6 +225,7 @@ pesto.cppcheck: CPPCHECK_FLAGS += --suppress=unusedFunction:bitmap.c pesto.cppcheck: CPPCHECK_FLAGS += --suppress=unusedFunction:inany.h pesto.cppcheck: CPPCHECK_FLAGS += --suppress=unusedFunction:inany.c pesto.cppcheck: CPPCHECK_FLAGS += --suppress=unusedFunction:ip.h +pesto.cppcheck: CPPCHECK_FLAGS += --suppress=unusedFunction:parse.c pesto.cppcheck: CPPCHECK_FLAGS += --suppress=unusedFunction:serialise.c pesto.cppcheck: CPPCHECK_FLAGS += --suppress=staticFunction:fwd_rule.c pesto.cppcheck: $(PESTO_SRCS) $(PESTO_HEADERS) seccomp_pesto.h diff --git a/conf.c b/conf.c index 6ab8efec..ca6c859c 100644 --- a/conf.c +++ b/conf.c @@ -52,6 +52,7 @@ #include "conf.h" #include "pesto.h" #include "serialise.h" +#include "parse.h" #define NETNS_RUN_DIR "/run/netns"
@@ -1028,7 +1029,9 @@ static void conf_ugid(char *runas, uid_t *uid, gid_t *gid) static void conf_nat(const char *arg, struct in_addr *addr4, struct in6_addr *addr6, int *no_map_gw) { - if (strcmp(arg, "none") == 0) { + const char *p = arg; + + if (parse_literal(&p, "none") && parse_eoi(p)) {
I think that !*p would be so much clearer here compared to:
- parse_eoi(p)... must be "input"?
- [takes a look at parse_eoi()] yes, it's input, great [goes back to here]. Ah, wait, does it return false if *p is NULL? Or true?
Like all the parse_*() functions, it returns true if it *did* see the thing it was looking for, in this case end of string.
- [takes a second look at parse_eoi()] good, it returns true, so this is *!p...
I understand it's somewhat consistent with parse_literal() this way, so I'm not strongly against it, just a slight preference overall.
Right, that's the reason for it. Because later on composing these parse helpers has a bit of its own style, it seemed helpful to continue that style, rather than poking directly into the string.
*addr4 = in4addr_any; *addr6 = in6addr_any; if (no_map_gw) diff --git a/fwd_rule.c b/fwd_rule.c index 04a0101d..d0e90402 100644 --- a/fwd_rule.c +++ b/fwd_rule.c @@ -24,6 +24,7 @@ #include "fwd_rule.h" #include "lineread.h" #include "log.h" +#include "parse.h" #include "serialise.h"
/* Ephemeral port range: values from RFC 6335 */ @@ -427,28 +428,6 @@ static int parse_port_range(const char *s, const char **endptr, return 0; }
-/** - * parse_keyword() - Parse a literal keyword - * @s: String to parse - * @endptr: Update to the character after the keyword - * @kw: Keyword to accept - * - * Return: 0, if @s starts with @kw, -EINVAL if it does not - */ -static int parse_keyword(const char *s, const char **endptr, const char *kw) -{ - size_t len = strlen(kw); - - if (strlen(s) < len) - return -EINVAL; - - if (memcmp(s, kw, len)) - return -EINVAL; - - *endptr = s + len; - return 0; -} - /** * fwd_rule_range_except() - Set up forwarding for a range of ports minus a * bitmap of exclusions @@ -568,7 +547,7 @@ static void fwd_rule_parse_ports(struct fwd_table *fwd, bool del, uint8_t proto, continue; }
- if (parse_keyword(p, &p, "auto") == 0) { + if (parse_literal(&p, "auto")) { if (p != ep) /* Garbage after the keyword */ goto bad;
@@ -582,9 +561,8 @@ static void fwd_rule_parse_ports(struct fwd_table *fwd, bool del, uint8_t proto, }
/* Should be an exclude range */ - if (*p != '~') + if (!parse_literal(&p, "~")) goto bad; - p++;
if (parse_port_range(p, &p, &xrange)) goto bad; @@ -616,8 +594,9 @@ static void fwd_rule_parse_ports(struct fwd_table *fwd, bool del, uint8_t proto, if (parse_port_range(p, &p, &orig_range)) goto bad;
- if (*p == ':') { /* There's a range to map to as well */ - if (parse_port_range(p + 1, &p, &mapped_range)) + if (parse_literal(&p, ":")) { + /* There's a range to map to as well */ + if (parse_port_range(p, &p, &mapped_range)) goto bad; if ((mapped_range.last - mapped_range.first) != (orig_range.last - orig_range.first)) diff --git a/parse.c b/parse.c new file mode 100644 index 00000000..c2a92422 --- /dev/null +++ b/parse.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* PASST - Plug A Simple Socket Transport + * for qemu/UNIX domain socket mode + * + * PASTA - Pack A Subtle Tap Abstraction + * for network namespace/tap device mode + * + * parse.c - Composable parsing helpers + * + * Copyright Red Hat + * Author: David Gibson
+ */ + +#include +#include +#include +#include +#include +#include + +#include "common.h" + +/** + * DOC: Theory of Operation + * + * These are a number of primitives which can be combined into parsers for + * moderately complex input. For simpler composability, they have common + * conventions. + * + * - Functions return a bool indicating whether they successfully parsed. As I reached 7/12 (converting parse_port_range()), this became rather confusing.
It makes sense for parse_eoi() (because in some sense it's a parse_is_eoi() function), but for the rest I'd expect almost anything that doesn't have "is" or similar in its name to return 0 on success, and a negative error code on failure (or -1).
Huh. So an earlier draft did that, making them more similar to strtoul() in interface. I changed it because I was finding it confusing. Even knowing the convention, I found it hard to remember to read: if (!parse_foo() && !parse_bar()) as meaning we *succeeded* in parsing a foo and a bar. That would not be true of parse_foo() == 0, but that's bulkier.
I'm not sure if I'm ignoring some particular advantage for the composability you mention above, if it's clearly better for whatever reason so be it, but otherwise I'd suggest re-considering this.
No technical advantage, I just found it more readable.
I mean:
- if (parse_port_range(p, &p, &xrange)) + if (!parse_port_range(&p, &xrange))
...is quite telling.
It is? I'm not sure what it's telling me.
+ * - Any specific output from the parse is as output parameters + * - First argument is always @cursor, a const char **. + * - On entry, *@cursor has the point to start parsing. + * - On successful exit, *@cursor is updated to the next character after the + parsed portion of the input + * - On failure, *@cursor and any output arguments are not modified + * + * For brevity the common parameters and return information are omitted from the + * individual function documentation comments.
The reason why we have "@c: Execution context" all over the place isn't so much human readability, it's rather automated usage, and the day we manage to run Sphinx on the whole codebase:
https://bugs.passt.top/show_bug.cgi?id=35
it would be nice to have a clean output instead of having to go through a pile of warnings.
So, actually, I'd keep @cursor "described" everywhere. We can also skip it for the moment but I'm afraid we'll start following this example and it will be more work later.
Yeah, that's reasonable. Now that I'm not churning so much on the interfaces it's not too hard to duplicate the descriptions.
+ */ + +/** + * parse_literal() - Parse a specified literal string + * @lit: Keyword to accept + */ +bool parse_literal(const char **cursor, const char *lit) +{ + size_t len = strlen(lit); + + if (strlen(*cursor) < len || memcmp(*cursor, lit, len)) + return false; + + *cursor += len; + return true; +} + +/** + * parse_eoi() - Parse end of input + * @cursor: Current parse pointer + * + * Return: true if @p is at End of Input (\0), false otherwise + */ +bool parse_eoi(const char *cursor) +{ + return !(*cursor); +} diff --git a/parse.h b/parse.h new file mode 100644 index 00000000..eb51a469 --- /dev/null +++ b/parse.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson
+ */ + +#ifndef PARSE_H +#define PARSE_H + +#include + +bool parse_literal(const char **cursor, const char *lit); +bool parse_eoi(const char *cursor); + +#endif /* _PARSE_H */ -- Stefano
-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson