On Fri, Oct 07, 2022 at 02:47:38AM +0200, Stefano Brivio wrote:In some environments, such as KubeVirt pods, we might not have a system logger available. We could choose to run in foreground, but this takes away the convenient synchronisation mechanism derived from forking to background when interfaces are ready. Add optional logging to file with -l/--log-file and --log-size. Unfortunately, this means we need to duplicate features that are more appropriately implemented by a system logger, such as rotation. Keep that reasonably simple, by using fallocate() with range collapsing where supported (Linux kernel >= 3.15, extent-based ext4 and XFS) and falling back to an unsophisticated block-by-block moving of entries toward the beginning of the file once we reach the (mandatory) size limit. While at it, clarify the role of LOG_EMERG in passt.c. Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com> --- Makefile | 2 +- README.md | 2 +- conf.c | 49 ++++++++++- log.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++---- log.h | 6 ++ passt.1 | 14 ++- passt.c | 1 + 7 files changed, 300 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index 74bbfeb..fafb024 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS) MANPAGES = passt.1 pasta.1 qrap.1 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \ - isolation.h lineread.h ndp.h netlink.h packet.h passt.h pasta.h \ + isolation.h lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h \ pcap.h port_fwd.h siphash.h tap.h tcp.h tcp_splice.h udp.h util.h HEADERS = $(PASST_HEADERS) seccomp.h diff --git a/README.md b/README.md index 2c522b2..3dc4fc5 100644 --- a/README.md +++ b/README.md @@ -289,7 +289,7 @@ speeding up local connections, and usually requiring NAT. _pasta_: * ✅ all capabilities dropped, other than `CAP_NET_BIND_SERVICE` (if granted) * ✅ with default options, user, mount, IPC, UTS, PID namespaces are detached * ✅ no external dependencies (other than a standard C library) -* ✅ restrictive seccomp profiles (26 syscalls allowed for _passt_, 40 for +* ✅ restrictive seccomp profiles (30 syscalls allowed for _passt_, 41 for _pasta_ on x86_64)That's a pity, but I guess we need it.* ✅ examples of [AppArmor](/passt/tree/contrib/apparmor) and [SELinux](/passt/tree/contrib/selinux) profiles available diff --git a/conf.c b/conf.c index e9cd4a7..f22940b 100644 --- a/conf.c +++ b/conf.c @@ -633,6 +633,9 @@ static void usage(const char *name) info( " default: run in background if started from a TTY"); info( " -e, --stderr Log to stderr too"); info( " default: log to system logger only if started from a TTY"); + info( " -l, --log-file PATH Log (only) to given file"); + info( " --log-size BYTES Maximum size of log file"); + info( " default: 1 MiB"); info( " --runas UID|UID:GID Run as given UID, GID, which can be"); info( " numeric, or login and group names"); info( " default: drop to user \"nobody\""); @@ -994,6 +997,7 @@ void conf(struct ctx *c, int argc, char **argv) {"quiet", no_argument, NULL, 'q' }, {"foreground", no_argument, NULL, 'f' }, {"stderr", no_argument, NULL, 'e' }, + {"log-file", required_argument, NULL, 'l' }, {"help", no_argument, NULL, 'h' }, {"socket", required_argument, NULL, 's' }, {"ns-ifname", required_argument, NULL, 'I' }, @@ -1034,26 +1038,28 @@ void conf(struct ctx *c, int argc, char **argv) {"no-netns-quit", no_argument, NULL, 10 }, {"trace", no_argument, NULL, 11 }, {"runas", required_argument, NULL, 12 }, + {"log-size", required_argument, NULL, 13 }, { 0 }, }; struct get_bound_ports_ns_arg ns_ports_arg = { .c = c }; char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 }; bool v4_only = false, v6_only = false; + char *runas = NULL, *logfile = NULL; struct in6_addr *dns6 = c->ip6.dns; struct fqdn *dnss = c->dns_search; uint32_t *dns4 = c->ip4.dns; int name, ret, mask, b, i; const char *optstring; unsigned int ifi = 0; - char *runas = NULL; + size_t logsize = 0; uid_t uid; gid_t gid; if (c->mode == MODE_PASTA) { c->no_dhcp_dns = c->no_dhcp_dns_search = 1; - optstring = "dqfehI:p:P:m:a:n:M:g:i:D:S:46t:u:T:U:"; + optstring = "dqfel:hI:p:P:m:a:n:M:g:i:D:S:46t:u:T:U:"; } else { - optstring = "dqfehs:p:P:m:a:n:M:g:i:D:S:46t:u:"; + optstring = "dqfel:hs:p:P:m:a:n:M:g:i:D:S:46t:u:"; } c->tcp.fwd_in.mode = c->tcp.fwd_out.mode = 0; @@ -1177,6 +1183,20 @@ void conf(struct ctx *c, int argc, char **argv) runas = optarg; break; + case 13: + if (logsize) { + err("Multiple --log-size options given"); + usage(argv[0]); + } + + errno = 0; + logsize = strtol(optarg, NULL, 0); + + if (logsize < LOGFILE_SIZE_MIN || errno) { + err("Invalid --log-size: %s", optarg); + usage(argv[0]); + } + break; case 'd': if (c->debug) { err("Multiple --debug options given"); @@ -1192,6 +1212,11 @@ void conf(struct ctx *c, int argc, char **argv) c->foreground = 1; break; case 'e': + if (logfile) { + err("Can't log to both file and stderr"); + usage(argv[0]); + } + if (c->stderr) { err("Multiple --stderr options given"); usage(argv[0]); @@ -1199,6 +1224,19 @@ void conf(struct ctx *c, int argc, char **argv) c->stderr = 1; break; + case 'l': + if (c->stderr) { + err("Can't log to both stderr and file"); + usage(argv[0]); + } + + if (logfile) { + err("Multiple --log-file options given"); + usage(argv[0]); + } + + logfile = optarg; + break; case 'q': if (c->quiet) { err("Multiple --quiet options given"); @@ -1460,6 +1498,11 @@ void conf(struct ctx *c, int argc, char **argv) if (ret) usage(argv[0]); + if (logfile) { + logfile_init(c->mode == MODE_PASST ? "passt" : "pasta", + logfile, logsize); + } + if (c->mode == MODE_PASTA) { if (conf_pasta_ns(&netns_only, userns, netns, optind, argc, argv) < 0) diff --git a/log.c b/log.c index 54a7cbb..1e405e3 100644 --- a/log.c +++ b/log.c @@ -12,7 +12,12 @@ * Author: Stefano Brivio <sbrivio(a)redhat.com> */ +#include <arpa/inet.h> +#include <limits.h> +#include <errno.h> +#include <fcntl.h> #include <stdio.h> +#include <stdint.h> #include <stdlib.h> #include <unistd.h> #include <string.h> @@ -20,36 +25,49 @@ #include <syslog.h> #include <stdarg.h> #include <sys/socket.h> -#include <sys/un.h> #include "log.h" +#include "util.h" +#include "passt.h" -/* For __openlog() and __setlogmask() wrappers, and passt_vsyslog() */ -static int log_mask; -static int log_sock = -1; -static char log_ident[BUFSIZ]; -static int log_opt; -static time_t log_debug_start; -int log_trace; +static int log_sock = -1; /* Optional socket to system logger */ +static char log_ident[BUFSIZ]; /* Identifier string for openlog() */ +static int log_mask; /* Current log priority mask */ +static int log_opt; /* Options for openlog() */ + +static int log_file = -1; /* Optional log file descriptor */ +static size_t log_size; /* Maximum log file size in bytes */ +static size_t log_written; /* Currently used bytes in log file */ +static size_t log_cut_size; /* Bytes to cut at start on rotation */ +static char log_header[BUFSIZ]; /* File header, written back on cuts */ + +static time_t log_start; /* Start timestamp */ +int log_trace; /* --trace mode enabled */ #define logfn(name, level) \ void name(const char *format, ...) { \ struct timespec tp; \ va_list args; \ \ - if (setlogmask(0) & LOG_MASK(LOG_DEBUG)) { \ + if (setlogmask(0) & LOG_MASK(LOG_DEBUG) && log_file == -1) { \ clock_gettime(CLOCK_REALTIME, &tp); \ fprintf(stderr, "%li.%04li: ", \ - tp.tv_sec - log_debug_start, \ + tp.tv_sec - log_start, \ tp.tv_nsec / (100L * 1000)); \ - } else { \ + } \ + \ + if ((LOG_MASK(LOG_PRI(level)) & log_mask) || \ + setlogmask(0) == LOG_MASK(LOG_EMERG)) { \ va_start(args, format); \ - passt_vsyslog(level, format, args); \ + if (log_file != -1) \ + logfile_write(level, format, args); \ + else if (!(setlogmask(0) & LOG_MASK(LOG_DEBUG))) \ + passt_vsyslog(level, format, args); \ va_end(args); \ } \ \ - if (setlogmask(0) & LOG_MASK(LOG_DEBUG) || \ - setlogmask(0) == LOG_MASK(LOG_EMERG)) { \ + if ((setlogmask(0) & LOG_MASK(LOG_DEBUG) || \ + setlogmask(0) == LOG_MASK(LOG_EMERG)) && log_file == -1) { \ va_start(args, format); \ (void)vfprintf(stderr, format, args); \ va_end(args); \ @@ -58,6 +76,16 @@ void name(const char *format, ...) { \ } \ } +/* Prefixes for log file messages, indexed by priority */ +const char *logfile_prefix[] = { + NULL, NULL, NULL, /* Unused: LOG_EMERG, LOG_ALERT, LOG_CRIT */ + "ERROR: ", + "WARNING: ", + NULL, /* Unused: LOG_NOTICE */ + "info: ", + " ", /* LOG_DEBUG */ +}; + logfn(err, LOG_ERR) logfn(warn, LOG_WARNING) logfn(info, LOG_INFO) @@ -79,7 +107,7 @@ void __openlog(const char *ident, int option, int facility) struct timespec tp; clock_gettime(CLOCK_REALTIME, &tp); - log_debug_start = tp.tv_sec; + log_start = tp.tv_sec; if (log_sock < 0) { struct sockaddr_un a = { .sun_family = AF_UNIX, }; @@ -124,9 +152,6 @@ void passt_vsyslog(int pri, const char *format, va_list ap) char buf[BUFSIZ]; int n; - if (!(LOG_MASK(LOG_PRI(pri)) & log_mask)) - return; - /* Send without name and timestamp, the system logger should add them */ n = snprintf(buf, BUFSIZ, "<%i> ", pri); @@ -141,3 +166,193 @@ void passt_vsyslog(int pri, const char *format, va_list ap) if (send(log_sock, buf, n, 0) != n) fprintf(stderr, "Failed to send %i bytes to syslog\n", n); } + +/** + * logfile_init() - Open log file and write header with PID and path + * @name: Identifier for header: passt or pasta + * @path: Path to log file + * @size: Maximum size of log file: log_cut_size is calculatd here + */ +void logfile_init(const char *name, const char *path, size_t size) +{ + char nl = '\n', exe[PATH_MAX] = { 0 }; + int n; + + if (readlink("/proc/self/exe", exe, PATH_MAX - 1) < 0) { + perror("readlink /proc/self/exe"); + exit(EXIT_FAILURE); + } + + log_file = open(path, O_CREAT | O_TRUNC | O_APPEND | O_RDWR | O_CLOEXEC, + S_IRUSR | S_IWUSR);Do+ if (log_file == -1) { + err("Couldn't open log file %s: %s", path, strerror(errno)); + exit(EXIT_FAILURE); + } + + log_size = size ? size : LOGFILE_SIZE_DEFAULT; + + n = snprintf(log_header, sizeof(log_header), "%s: %s (%i)", + name, exe, getpid()); + + if (write(log_file, log_header, n) <= 0 || + write(log_file, &nl, 1) <= 0) { + perror("Couldn't write to log file\n"); + exit(EXIT_FAILURE); + } + + /* For FALLOC_FL_COLLAPSE_RANGE: VFS block size can be up to one page */ + log_cut_size = ROUND_UP(log_size * LOGFILE_CUT_RATIO / 100, PAGE_SIZE); +} + +/** + * logfile_rotate_fallocate() - Write header, set log_written after fallocate() + * @fd: Log file descriptor + * @ts: Current timestamp + * + * #syscalls lseek ppc64le:_llseek ppc64:_llseek armv6l:_llseek armv7l:_llseek + */ +static void logfile_rotate_fallocate(int fd, struct timespec *ts) +{ + char buf[BUFSIZ], *nl; + int n; + + if (lseek(fd, 0, SEEK_SET) == -1) + return; + if (read(fd, buf, BUFSIZ) == -1) + return; + + n = snprintf(buf, BUFSIZ, + "%s - log truncated at %li.%04li", log_header, + ts->tv_sec - log_start, ts->tv_nsec / (100L * 1000)); + + /* Avoid partial lines by padding the header with spaces */ + nl = memchr(buf + n + 1, '\n', BUFSIZ - n - 1); + if (nl) + memset(buf + n, ' ', nl - (buf + n)); + + if (lseek(fd, 0, SEEK_SET) == -1) + return; + if (write(fd, buf, BUFSIZ) == -1)Have you tested this path? I thought with O_APPEND, this write would go at the end, regardless of the lseek() above.+ return; + + log_written -= log_cut_size; +} + +/** + * logfile_rotate_move() - Fallback: move recent entries toward start, then cut + * @fd: Log file descriptor + * @ts: Current timestamp + * + * #syscalls lseek ppc64le:_llseek ppc64:_llseek armv6l:_llseek armv7l:_llseek + * #syscalls ftruncate + */ +static void logfile_rotate_move(int fd, struct timespec *ts) +{ + int header_len, write_offset, end, discard, n; + char buf[BUFSIZ], *nl; + + header_len = snprintf(buf, BUFSIZ, + "%s - log truncated at %li.%04li\n", log_header, + ts->tv_sec - log_start, + ts->tv_nsec / (100L * 1000));A common helper for the truncation header seems like it would be a good idea.+ if (lseek(fd, 0, SEEK_SET) == -1) + return; + if (write(fd, buf, header_len) == -1) + return; + + end = write_offset = header_len; + discard = log_cut_size + header_len; + + /* Try to cut cleanly at newline */ + if (lseek(fd, discard, SEEK_SET) == -1) + goto out; + if ((n = read(fd, buf, BUFSIZ)) <= 0) + goto out; + if ((nl = memchr(buf, '\n', n))) + discard += (nl - buf) + 1; + + /* Go to first block to be moved */ + if (lseek(fd, discard, SEEK_SET) == -1) + goto out; + + while ((n = read(fd, buf, BUFSIZ)) > 0) { + end = header_len;Hrm.. so errors in this section will throw away everything except the header.+ + if (lseek(fd, write_offset, SEEK_SET) == -1) + goto out; + if ((n = write(fd, buf, n)) == -1) + goto out; + write_offset += n; + + if ((n = lseek(fd, 0, SEEK_CUR)) == -1) + goto out; + end = n;But errors after here will just throw away what we haven't moved so far. Seems a bit inconsistent.+ + if (lseek(fd, discard - header_len, SEEK_CUR) == -1) + goto out; + }I wonder if those whole function would be made simpler using pread() and pwrite() instead of lseek()s all over the place.+out: + if (ftruncate(fd, end)) + return; + + log_written = end; +} + +/** + * logfile_rotate() - "Rotate" log file once it's full + * @fd: Log file descriptor + * @ts: Current timestamp + * + * Return: 0 on success, negative error code on failure + * + * #syscalls fallocate fcntl + */ +static int logfile_rotate(int fd, struct timespec *ts) +{ + if (fcntl(fd, F_SETFL, O_RDWR /* Drop O_APPEND: explicit lseek() */)) + return -errno; + + /* Collapsing only for Linux kernel >= 3.15, extent-based ext4 or XFS */ + if (!fallocate(fd, FALLOC_FL_COLLAPSE_RANGE, 0, log_cut_size)) + logfile_rotate_fallocate(fd, ts); + else + logfile_rotate_move(fd, ts); + + if (fcntl(fd, F_SETFL, O_RDWR | O_APPEND)) + return -errno; + + return 0; +} + +/** + * logfile_write() - Write entry to log file, trigger rotation if full + * @pri: Facility and level map, same as priority for vsyslog() + * @format: Same as vsyslog() format + * @ap: Same as vsyslog() ap + */ +void logfile_write(int pri, const char *format, va_list ap) +{ + struct timespec ts; + char buf[BUFSIZ]; + int n; + + if (clock_gettime(CLOCK_REALTIME, &ts)) + return; + + n = snprintf(buf, BUFSIZ, "%li.%04li: %s", + ts.tv_sec - log_start, ts.tv_nsec / (100L * 1000), + logfile_prefix[pri]); + + n += vsnprintf(buf + n, BUFSIZ - n, format, ap); + + if (format[strlen(format)] != '\n') + n += snprintf(buf + n, BUFSIZ - n, "\n"); + + if ((log_written + n >= log_size) && logfile_rotate(log_file, &ts)) + return; + + if ((n = write(log_file, buf, n)) >= 0) + log_written += n; +} diff --git a/log.h b/log.h index 70cad6f..ed71cd7 100644 --- a/log.h +++ b/log.h @@ -6,6 +6,10 @@ #ifndef LOG_H #define LOG_H +#define LOGFILE_SIZE_DEFAULT 1E6Wouldn't a multiple of the block size make more sense?+#define LOGFILE_CUT_RATIO 30 /* When full, cut ~30% size */ +#define LOGFILE_SIZE_MIN (5UL * MAX(BUFSIZ, PAGE_SIZE)) + void err(const char *format, ...); void warn(const char *format, ...); void info(const char *format, ...); @@ -20,7 +24,9 @@ void trace_init(int enable); } while (0) void __openlog(const char *ident, int option, int facility); +void logfile_init(const char *name, const char *path, size_t size); void passt_vsyslog(int pri, const char *format, va_list ap); +void logfile_write(int pri, const char *format, va_list ap); void __setlogmask(int mask); #endif /* LOG_H */ diff --git a/passt.1 b/passt.1 index 83395bc..64236b6 100644 --- a/passt.1 +++ b/passt.1 @@ -79,7 +79,7 @@ for performance reasons. .TP .BR \-d ", " \-\-debug -Be verbose, don't run in background. +Be verbose, don't run in background, don't log to the system logger. .TP .BR \-\-trace @@ -99,9 +99,19 @@ Default is to fork into background. .TP .BR \-e ", " \-\-stderr Log to standard error too. -Default is to log to system logger only, if started from an interactive +Default is to log to the system logger only, if started from an interactive terminal, and to both system logger and standard error otherwise. +.TP +.BR \-l ", " \-\-log-file " " \fIPATH\fR +Log to file \fIPATH\fR, not to standard error, and not to the system logger. + +.TP +.BR \-\-log-size " " \fISIZE\fR +Limit log file size to \fISIZE\fR bytes. When the log file is full, make room +for new entries by removing old ones at the beginning. This limit is mandatory. +Default is 1048576 (1 MiB). + .TP .BR \-\-runas " " \fIUID\fR|\fIUID:GID\fR|\fILOGIN\fR|\fILOGIN:GROUP\fR Attempt to change to given UID and corresponding group if UID is given, diff --git a/passt.c b/passt.c index 0c561b2..7589b05 100644 --- a/passt.c +++ b/passt.c @@ -220,6 +220,7 @@ int main(int argc, char **argv) __openlog(log_name, 0, LOG_DAEMON); + /* Meaning we don't know yet: log everything. LOG_EMERG is unused */ __setlogmask(LOG_MASK(LOG_EMERG)); c.epollfd = epoll_create1(EPOLL_CLOEXEC);-- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson