A privileged helper to set/clear TCP_REPAIR on sockets on behalf of passt. Not used yet. Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com> --- Note: I'm merging this right now, but posting anyway to ease review. .gitignore | 1 + Makefile | 17 ++- contrib/apparmor/usr.bin.passt-repair | 29 +++++ contrib/fedora/passt.spec | 2 + contrib/selinux/passt-repair.fc | 11 ++ contrib/selinux/passt-repair.te | 58 ++++++++++ hooks/pre-push | 1 + passt-repair.1 | 70 ++++++++++++ passt-repair.c | 154 ++++++++++++++++++++++++++ seccomp.sh | 6 +- 10 files changed, 342 insertions(+), 7 deletions(-) create mode 100644 contrib/apparmor/usr.bin.passt-repair create mode 100644 contrib/selinux/passt-repair.fc create mode 100644 contrib/selinux/passt-repair.te create mode 100644 passt-repair.1 create mode 100644 passt-repair.c diff --git a/.gitignore b/.gitignore index d1c8be9..5824a71 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ /passt.avx2 /pasta /pasta.avx2 +/passt-repair /qrap /pasta.1 /seccomp.h diff --git a/Makefile b/Makefile index 464eef1..6ab8d24 100644 --- a/Makefile +++ b/Makefile @@ -42,9 +42,10 @@ PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \ tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c udp_vu.c util.c \ vhost_user.c virtio.c vu_common.c QRAP_SRCS = qrap.c -SRCS = $(PASST_SRCS) $(QRAP_SRCS) +PASST_REPAIR_SRCS = passt-repair.c +SRCS = $(PASST_SRCS) $(QRAP_SRCS) $(PASST_REPAIR_SRCS) -MANPAGES = passt.1 pasta.1 qrap.1 +MANPAGES = passt.1 pasta.1 qrap.1 passt-repair.1 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \ flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \ @@ -72,9 +73,9 @@ mandir ?= $(datarootdir)/man man1dir ?= $(mandir)/man1 ifeq ($(TARGET_ARCH),x86_64) -BIN := passt passt.avx2 pasta pasta.avx2 qrap +BIN := passt passt.avx2 pasta pasta.avx2 qrap passt-repair else -BIN := passt pasta qrap +BIN := passt pasta qrap passt-repair endif all: $(BIN) $(MANPAGES) docs @@ -83,7 +84,10 @@ static: FLAGS += -static -DGLIBC_NO_STATIC_NSS static: clean all seccomp.h: seccomp.sh $(PASST_SRCS) $(PASST_HEADERS) - @ EXTRA_SYSCALLS="$(EXTRA_SYSCALLS)" ARCH="$(TARGET_ARCH)" CC="$(CC)" ./seccomp.sh $(PASST_SRCS) $(PASST_HEADERS) + @ EXTRA_SYSCALLS="$(EXTRA_SYSCALLS)" ARCH="$(TARGET_ARCH)" CC="$(CC)" ./seccomp.sh seccomp.h $(PASST_SRCS) $(PASST_HEADERS) + +seccomp_repair.h: seccomp.sh $(PASST_REPAIR_SRCS) + @ ARCH="$(TARGET_ARCH)" CC="$(CC)" ./seccomp.sh seccomp_repair.h $(PASST_REPAIR_SRCS) passt: $(PASST_SRCS) $(HEADERS) $(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) $(PASST_SRCS) -o passt $(LDFLAGS) @@ -101,6 +105,9 @@ pasta.avx2 pasta.1 pasta: pasta%: passt% qrap: $(QRAP_SRCS) passt.h $(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) -DARCH=\"$(TARGET_ARCH)\" $(QRAP_SRCS) -o qrap $(LDFLAGS) +passt-repair: $(PASST_REPAIR_SRCS) seccomp_repair.h + $(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) $(PASST_REPAIR_SRCS) -o passt-repair $(LDFLAGS) + valgrind: EXTRA_SYSCALLS += rt_sigprocmask rt_sigtimedwait rt_sigaction \ rt_sigreturn getpid gettid kill clock_gettime mmap \ mmap2 munmap open unlink gettimeofday futex statx \ diff --git a/contrib/apparmor/usr.bin.passt-repair b/contrib/apparmor/usr.bin.passt-repair new file mode 100644 index 0000000..901189d --- /dev/null +++ b/contrib/apparmor/usr.bin.passt-repair @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# PASST - Plug A Simple Socket Transport +# for qemu/UNIX domain socket mode +# +# PASTA - Pack A Subtle Tap Abstraction +# for network namespace/tap device mode +# +# contrib/apparmor/usr.bin.passt-repair - AppArmor profile for passt-repair(1) +# +# Copyright (c) 2025 Red Hat GmbH +# Author: Stefano Brivio <sbrivio(a)redhat.com> + +abi <abi/3.0>, + +#include <tunables/global> + +profile passt-repair /usr/bin/passt-repair { + #include <abstractions/base> + /** rw, # passt's ".repair" socket might be anywhere + unix (connect, receive, send) type=stream, + + capability dac_override, # connect to passt's socket as root + capability net_admin, # currently needed for TCP_REPAIR socket option + capability net_raw, # what TCP_REPAIR should require instead + + network unix stream, # connect and use UNIX domain socket + network inet stream, # use TCP sockets +} diff --git a/contrib/fedora/passt.spec b/contrib/fedora/passt.spec index 7950fb9..6a83f8b 100644 --- a/contrib/fedora/passt.spec +++ b/contrib/fedora/passt.spec @@ -108,9 +108,11 @@ fi %{_bindir}/passt %{_bindir}/pasta %{_bindir}/qrap +%{_bindir}/passt-repair %{_mandir}/man1/passt.1* %{_mandir}/man1/pasta.1* %{_mandir}/man1/qrap.1* +%{_mandir}/man1/passt-repair.1* %ifarch x86_64 %{_bindir}/passt.avx2 %{_mandir}/man1/passt.avx2.1* diff --git a/contrib/selinux/passt-repair.fc b/contrib/selinux/passt-repair.fc new file mode 100644 index 0000000..bcd526e --- /dev/null +++ b/contrib/selinux/passt-repair.fc @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# PASST - Plug A Simple Socket Transport +# for qemu/UNIX domain socket mode +# +# contrib/selinux/passt-repair.fc - SELinux: File Context for passt-repair +# +# Copyright (c) 2025 Red Hat GmbH +# Author: Stefano Brivio <sbrivio(a)redhat.com> + +/usr/bin/passt-repair system_u:object_r:passt_repair_exec_t:s0 diff --git a/contrib/selinux/passt-repair.te b/contrib/selinux/passt-repair.te new file mode 100644 index 0000000..e3ffbcd --- /dev/null +++ b/contrib/selinux/passt-repair.te @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# PASST - Plug A Simple Socket Transport +# for qemu/UNIX domain socket mode +# +# contrib/selinux/passt-repair.te - SELinux: Type Enforcement for passt-repair +# +# Copyright (c) 2025 Red Hat GmbH +# Author: Stefano Brivio <sbrivio(a)redhat.com> + +policy_module(passt-repair, 0.1) + +require { + type unconfined_t; + type passt_t; + role unconfined_r; + class process transition; + + class file { read execute execute_no_trans entrypoint open map }; + class capability { dac_override net_admin net_raw }; + class chr_file { append open getattr read write ioctl }; + + class unix_stream_socket { create connect sendto }; + class sock_file { read write }; + + class tcp_socket { read setopt write }; + + type console_device_t; + type user_devpts_t; + type user_tmp_t; +} + +type passt_repair_t; +domain_type(passt_repair_t); +type passt_repair_exec_t; +files_type(passt_repair_exec_t); + +role unconfined_r types passt_repair_t; + +allow passt_repair_t passt_repair_exec_t:file { read execute execute_no_trans entrypoint open map }; +type_transition unconfined_t passt_repair_exec_t:process passt_repair_t; +allow unconfined_t passt_repair_t:process transition; + +allow passt_repair_t self:capability { dac_override net_admin net_raw }; + +allow passt_repair_t console_device_t:chr_file { append open getattr read write ioctl }; +allow passt_repair_t user_devpts_t:chr_file { append open getattr read write ioctl }; + +allow passt_repair_t unconfined_t:unix_stream_socket { connectto read write }; +allow passt_repair_t passt_t:unix_stream_socket { connectto read write }; +allow passt_repair_t user_tmp_t:unix_stream_socket { connectto read write }; + +allow passt_repair_t unconfined_t:sock_file { read write }; +allow passt_repair_t passt_t:sock_file { read write }; +allow passt_repair_t user_tmp_t:sock_file { read write }; + +allow passt_repair_t unconfined_t:tcp_socket { read setopt write }; +allow passt_repair_t passt_t:tcp_socket { read setopt write }; diff --git a/hooks/pre-push b/hooks/pre-push index 33a2052..8dbfa5f 100755 --- a/hooks/pre-push +++ b/hooks/pre-push @@ -56,6 +56,7 @@ cd .. make pkgs scp passt passt.avx2 passt.1 qrap qrap.1 "${USER_HOST}:${BIN}" scp pasta pasta.avx2 pasta.1 "${USER_HOST}:${BIN}" +scp passt-repair passt-repair.1 "${USER_HOST}:${BIN}" ssh "${USER_HOST}" "rm -f ${BIN}/*.deb" ssh "${USER_HOST}" "rm -f ${BIN}/*.rpm" diff --git a/passt-repair.1 b/passt-repair.1 new file mode 100644 index 0000000..8d07c97 --- /dev/null +++ b/passt-repair.1 @@ -0,0 +1,70 @@ +.\" SPDX-License-Identifier: GPL-2.0-or-later +.\" Copyright (c) 2025 Red Hat GmbH +.\" Author: Stefano Brivio <sbrivio(a)redhat.com> +.TH passt-repair 1 + +.SH NAME +.B passt-repair +\- Helper setting TCP_REPAIR socket options for \fBpasst\fR(1) + +.SH SYNOPSIS +.B passt-repair +\fIPATH\fR + +.SH DESCRIPTION + +.B passt-repair +is a privileged helper setting and clearing repair mode on TCP sockets on behalf +of \fBpasst\fR(1), as instructed via single-byte commands over a UNIX domain +socket, specified by \fIPATH\fR. + +It can be used to migrate TCP connections between guests without granting +additional capabilities to \fBpasst\fR(1) itself: to migrate TCP connections, +\fBpasst\fR(1) leverages repair mode, which needs the \fBCAP_NET_ADMIN\fR +capability (see \fBcapabilities\fR(7)) to be set or cleared. + +.SH PROTOCOL + +\fBpasst-repair\fR(1) connects to \fBpasst\fR(1) using the socket specified via +\fI--repair-path\fR option in \fBpasst\fR(1) itself. By default, the name is the +same as the UNIX domain socket used for guest communication, suffixed by +\fI.repair\fR. + +The messages consist of one 8-bit signed integer that can be \fITCP_REPAIR_ON\fR +(1), \fITCP_REPAIR_OFF\fR (2), or \fITCP_REPAIR_OFF_WP\fR (-1), as defined by +the Linux kernel user API, and one to SCM_MAX_FD (253) sockets as SCM_RIGHTS +(see \fBunix\fR(7)) ancillary message, sent by the server, \fBpasst\fR(1). + +The client, \fBpasst-repair\fR(1), replies with the same byte (and no ancillary +message) to indicate success, and closes the connection on failure. + +The server closes the connection on error or completion. + +.SH NOTES + +\fBpasst-repair\fR(1) can be granted the \fBCAP_NET_ADMIN\fR capability +(preferred, as it limits privileges to the strictly necessary ones), or it can +be run as root. + +.SH AUTHOR + +Stefano Brivio <sbrivio(a)redhat.com>om>. + +.SH REPORTING BUGS + +Please report issues on the bug tracker at https://bugs.passt.top/, or +send a message to the passt-user(a)passt.top mailing list, see +https://lists.passt.top/. + +.SH COPYRIGHT + +Copyright (c) 2025 Red Hat GmbH. + +\fBpasst-repair\fR is free software: you can redistribute them and/or modify +them under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 2 of the License, or (at your option) any +later version. + +.SH SEE ALSO + +\fBpasst\fR(1), \fBqemu\fR(1), \fBcapabilities\fR(7), \fBunix\fR(7). diff --git a/passt-repair.c b/passt-repair.c new file mode 100644 index 0000000..767a821 --- /dev/null +++ b/passt-repair.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* PASST - Plug A Simple Socket Transport + * for qemu/UNIX domain socket mode + * + * PASTA - Pack A Subtle Tap Abstraction + * for network namespace/tap device mode + * + * passt-repair.c - Privileged helper to set/clear TCP_REPAIR on sockets + * + * Copyright (c) 2025 Red Hat GmbH + * Author: Stefano Brivio <sbrivio(a)redhat.com> + * + * Connect to passt via UNIX domain socket, receive sockets via SCM_RIGHTS along + * with byte commands mapping to TCP_REPAIR values, and switch repair mode on or + * off. Reply by echoing the command. Exit on EOF. + */ + +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <unistd.h> +#include <netdb.h> + +#include <netinet/tcp.h> + +#include <linux/audit.h> +#include <linux/capability.h> +#include <linux/filter.h> +#include <linux/seccomp.h> + +#include "seccomp_repair.h" + +#define SCM_MAX_FD 253 /* From Linux kernel (include/net/scm.h), not in UAPI */ + +/** + * main() - Entry point and whole program with loop + * @argc: Argument count, must be 2 + * @argv: Argument: path of UNIX domain socket to connect to + * + * Return: 0 on success (EOF), 1 on error, 2 on usage error + * + * #syscalls:repair connect setsockopt write exit_group + * #syscalls:repair socket s390x:socketcall i686:socketcall + * #syscalls:repair recvfrom recvmsg arm:recv ppc64le:recv + * #syscalls:repair sendto sendmsg arm:send ppc64le:send + */ +int main(int argc, char **argv) +{ + char buf[CMSG_SPACE(sizeof(int) * SCM_MAX_FD)] + __attribute__ ((aligned(__alignof__(struct cmsghdr)))); + struct sockaddr_un a = { AF_UNIX, "" }; + int fds[SCM_MAX_FD], s, ret, i, n; + struct sock_fprog prog; + int8_t cmd = INT8_MAX; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + + prctl(PR_SET_DUMPABLE, 0); + + prog.len = (unsigned short)sizeof(filter_repair) / + sizeof(filter_repair[0]); + prog.filter = filter_repair; + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || + prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { + fprintf(stderr, "Failed to apply seccomp filter"); + return 1; + } + + iov = (struct iovec){ &cmd, sizeof(cmd) }; + msg = (struct msghdr){ NULL, 0, &iov, 1, buf, sizeof(buf), 0 }; + cmsg = CMSG_FIRSTHDR(&msg); + + if (argc != 2) { + fprintf(stderr, "Usage: %s PATH\n", argv[0]); + return 2; + } + + ret = snprintf(a.sun_path, sizeof(a.sun_path), "%s", argv[1]); + if (ret <= 0 || ret >= (int)sizeof(a.sun_path)) { + fprintf(stderr, "Invalid socket path: %s\n", argv[1]); + return 2; + } + + if ((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { + perror("Failed to create AF_UNIX socket"); + return 1; + } + + if (connect(s, (struct sockaddr *)&a, sizeof(a))) { + fprintf(stderr, "Failed to connect to %s: %s\n", argv[1], + strerror(errno)); + return 1; + } + +loop: + ret = recvmsg(s, &msg, 0); + if (ret < 0) { + perror("Failed to receive message"); + return 1; + } + + if (!ret) /* Done */ + return 0; + + if (!cmsg || + cmsg->cmsg_len < CMSG_LEN(sizeof(int)) || + cmsg->cmsg_len > CMSG_LEN(sizeof(int) * SCM_MAX_FD) || + cmsg->cmsg_type != SCM_RIGHTS) { + fprintf(stderr, "No/bad ancillary data from peer\n"); + return 1; + } + + n = cmsg->cmsg_len / CMSG_LEN(sizeof(int)); + memcpy(fds, CMSG_DATA(cmsg), sizeof(int) * n); + + if (cmd != TCP_REPAIR_ON && cmd != TCP_REPAIR_OFF && + cmd != TCP_REPAIR_OFF_NO_WP) { + fprintf(stderr, "Unsupported command 0x%04x\n", cmd); + return 1; + } + + for (i = 0; i < n; i++) { + int o = cmd; + + if (setsockopt(fds[i], SOL_TCP, TCP_REPAIR, &o, sizeof(o))) { + fprintf(stderr, + "Setting TCP_REPAIR to %i on socket %i: %s", o, + fds[i], strerror(errno)); + return 1; + } + + /* Close _our_ copy */ + close(fds[i]); + + /* Confirm setting by echoing the command back */ + if (send(s, &cmd, sizeof(cmd), 0) < 0) { + fprintf(stderr, "Reply to command %i: %s\n", + o, strerror(errno)); + return 1; + } + } + + goto loop; + + return 0; +} diff --git a/seccomp.sh b/seccomp.sh index 6499c58..4c521ae 100755 --- a/seccomp.sh +++ b/seccomp.sh @@ -14,8 +14,10 @@ # Author: Stefano Brivio <sbrivio(a)redhat.com> TMP="$(mktemp)" -IN="$@" OUT="$(mktemp)" +OUT_FINAL="${1}" +shift +IN="$@" [ -z "${ARCH}" ] && ARCH="$(uname -m)" [ -z "${CC}" ] && CC="cc" @@ -268,4 +270,4 @@ for __p in ${__profiles}; do gen_profile "${__p}" ${__calls} done -mv "${OUT}" seccomp.h +mv "${OUT}" "${OUT_FINAL}" -- 2.43.0
On Tue, Feb 04, 2025 at 01:05:14AM +0100, Stefano Brivio wrote:A privileged helper to set/clear TCP_REPAIR on sockets on behalf of passt. Not used yet. Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com> --- Note: I'm merging this right now, but posting anyway to ease review.[snip]+/** + * main() - Entry point and whole program with loop + * @argc: Argument count, must be 2 + * @argv: Argument: path of UNIX domain socket to connect to + * + * Return: 0 on success (EOF), 1 on error, 2 on usage error + * + * #syscalls:repair connect setsockopt write exit_group + * #syscalls:repair socket s390x:socketcall i686:socketcall + * #syscalls:repair recvfrom recvmsg arm:recv ppc64le:recv + * #syscalls:repair sendto sendmsg arm:send ppc64le:send + */You are missing close() :(. -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson