passt/pasta contains a number of routines designed to isolate passt from the rest of the system for security. These are spread through util.c and passt.c. Move them together into a new isolation.c file. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- Makefile | 8 +-- conf.c | 1 + isolation.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++ isolation.h | 15 +++++ passt.c | 113 +---------------------------------- pasta.c | 1 + util.c | 49 ---------------- util.h | 2 - 8 files changed, 187 insertions(+), 167 deletions(-) create mode 100644 isolation.c create mode 100644 isolation.h diff --git a/Makefile b/Makefile index 644a541..af3d1ff 100644 --- a/Makefile +++ b/Makefile @@ -32,16 +32,16 @@ CFLAGS += -DRLIMIT_STACK_VAL=$(RLIMIT_STACK_VAL) CFLAGS += -DARCH=\"$(TARGET_ARCH)\" PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c icmp.c igmp.c \ - lineread.c mld.c ndp.c netlink.c packet.c passt.c pasta.c pcap.c \ - siphash.c tap.c tcp.c tcp_splice.c udp.c util.c + isolation.c lineread.c mld.c ndp.c netlink.c packet.c passt.c pasta.c \ + pcap.c siphash.c tap.c tcp.c tcp_splice.c udp.c util.c QRAP_SRCS = qrap.c SRCS = $(PASST_SRCS) $(QRAP_SRCS) MANPAGES = passt.1 pasta.1 qrap.1 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \ - lineread.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h \ - siphash.h tap.h tcp.h tcp_splice.h udp.h util.h + isolation.h lineread.h ndp.h netlink.h packet.h passt.h pasta.h \ + pcap.h siphash.h tap.h tcp.h tcp_splice.h udp.h util.h HEADERS = $(PASST_HEADERS) # On gcc 11.2, with -O2 and -flto, tcp_hash() and siphash_20b(), if inlined, diff --git a/conf.c b/conf.c index f1aaa8a..08a2106 100644 --- a/conf.c +++ b/conf.c @@ -40,6 +40,7 @@ #include "tcp.h" #include "pasta.h" #include "lineread.h" +#include "isolation.h" /** * get_bound_ports() - Get maps of ports with bound sockets diff --git a/isolation.c b/isolation.c new file mode 100644 index 0000000..bc8240f --- /dev/null +++ b/isolation.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +/* PASST - Plug A Simple Socket Transport + * for qemu/UNIX domain socket mode + * + * PASTA - Pack A Subtle Tap Abstraction + * for network namespace/tap device mode + * + * isolation.c - Self isolation helpers + * + * Copyright Red Hat + * Author: Stefano Brivio <sbrivio(a)redhat.com> + * Author: David Gibson <david(a)gibson.dropbear.id.au> + */ + +#include <errno.h> +#include <fcntl.h> +#include <grp.h> +#include <inttypes.h> +#include <limits.h> +#include <pwd.h> +#include <sched.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <sys/mount.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <netinet/if_ether.h> + +#include <linux/audit.h> +#include <linux/capability.h> +#include <linux/filter.h> +#include <linux/seccomp.h> + +#include "util.h" +#include "seccomp.h" +#include "passt.h" +#include "isolation.h" + +/** + * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SERVICE + */ +void drop_caps(void) +{ + int i; + + for (i = 0; i < 64; i++) { + if (i == CAP_NET_BIND_SERVICE) + continue; + + prctl(PR_CAPBSET_DROP, i, 0, 0, 0); + } +} + +/** + * drop_root() - Switch to given UID and GID + */ +void drop_root(uid_t uid, gid_t gid) +{ + if (setgroups(0, NULL)) { + /* If we don't start with CAP_SETGID, this will EPERM */ + if (errno != EPERM) { + err("Can't drop supplementary groups: %s", + strerror(errno)); + exit(EXIT_FAILURE); + } + } + + if (!setgid(gid) && !setuid(uid)) + return; + + err("Can't change user/group, exiting"); + exit(EXIT_FAILURE); +} + +/** + * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root + * + * Return: negative error code on failure, zero on success + */ +int sandbox(struct ctx *c) +{ + int flags = CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS; + + if (!c->netns_only) { + if (c->pasta_userns_fd == -1) + flags |= CLONE_NEWUSER; + else + setns(c->pasta_userns_fd, CLONE_NEWUSER); + } + + c->pasta_userns_fd = -1; + + /* If we run in foreground, we have no chance to actually move to a new + * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody + * ever gets around seccomp profiles -- there's no harm in passing it. + */ + if (!c->foreground || c->mode == MODE_PASST) + flags |= CLONE_NEWPID; + + if (unshare(flags)) { + perror("unshare"); + return -errno; + } + + if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) { + perror("mount /"); + return -errno; + } + + if (mount("", TMPDIR, "tmpfs", + MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, + "nr_inodes=2,nr_blocks=0")) { + perror("mount tmpfs"); + return -errno; + } + + if (chdir(TMPDIR)) { + perror("chdir"); + return -errno; + } + + if (syscall(SYS_pivot_root, ".", ".")) { + perror("pivot_root"); + return -errno; + } + + if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) { + perror("umount2"); + return -errno; + } + + drop_caps(); /* Relative to the new user namespace this time. */ + + return 0; +} + +/** + * seccomp() - Set up seccomp filters depending on mode, won't return on failure + * @c: Execution context + */ +void seccomp(const struct ctx *c) +{ + struct sock_fprog prog; + + if (c->mode == MODE_PASST) { + prog.len = (unsigned short)ARRAY_SIZE(filter_passt); + prog.filter = filter_passt; + } else { + prog.len = (unsigned short)ARRAY_SIZE(filter_pasta); + prog.filter = filter_pasta; + } + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || + prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { + perror("prctl"); + exit(EXIT_FAILURE); + } +} diff --git a/isolation.h b/isolation.h new file mode 100644 index 0000000..2540a35 --- /dev/null +++ b/isolation.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: AGPL-3.0-or-later + * Copyright Red Hat + * Author: Stefano Brivio <sbrivio(a)redhat.com> + * Author: David Gibson <david(a)gibson.dropbear.id.au> + */ + +#ifndef ISOLATION_H +#define ISOLATION_H + +void drop_caps(void); +void drop_root(uid_t uid, gid_t gid); +int sandbox(struct ctx *c); +void seccomp(const struct ctx *c); + +#endif /* ISOLATION_H */ diff --git a/passt.c b/passt.c index bbf53d9..2a8314c 100644 --- a/passt.c +++ b/passt.c @@ -19,51 +19,25 @@ * created in a separate network namespace). */ -#include <sched.h> -#include <stdio.h> #include <sys/epoll.h> -#include <sys/socket.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <dirent.h> #include <fcntl.h> #include <sys/mman.h> #include <sys/resource.h> -#include <sys/uio.h> -#include <sys/syscall.h> -#include <sys/wait.h> -#include <sys/mount.h> -#include <netinet/ip.h> -#include <net/ethernet.h> -#include <libgen.h> #include <stdlib.h> #include <unistd.h> -#include <net/if.h> #include <netdb.h> #include <string.h> #include <errno.h> #include <time.h> #include <syslog.h> -#include <sys/stat.h> #include <sys/prctl.h> -#include <stddef.h> -#include <netinet/udp.h> -#include <netinet/tcp.h> #include <netinet/if_ether.h> -#include <linux/seccomp.h> -#include <linux/audit.h> -#include <linux/filter.h> -#include <linux/icmpv6.h> - #include "util.h" -#include "seccomp.h" #include "passt.h" #include "dhcp.h" #include "dhcpv6.h" -#include "icmp.h" -#include "tcp.h" -#include "udp.h" +#include "isolation.h" #include "pcap.h" #include "tap.h" #include "conf.h" @@ -166,91 +140,6 @@ void proto_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s, udp_update_l2_buf(eth_d, eth_s, ip_da); } -/** - * seccomp() - Set up seccomp filters depending on mode, won't return on failure - * @c: Execution context - */ -static void seccomp(const struct ctx *c) -{ - struct sock_fprog prog; - - if (c->mode == MODE_PASST) { - prog.len = (unsigned short)ARRAY_SIZE(filter_passt); - prog.filter = filter_passt; - } else { - prog.len = (unsigned short)ARRAY_SIZE(filter_pasta); - prog.filter = filter_pasta; - } - - if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || - prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { - perror("prctl"); - exit(EXIT_FAILURE); - } -} - -/** - * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root - * - * Return: negative error code on failure, zero on success - */ -static int sandbox(struct ctx *c) -{ - int flags = CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS; - - if (!c->netns_only) { - if (c->pasta_userns_fd == -1) - flags |= CLONE_NEWUSER; - else - setns(c->pasta_userns_fd, CLONE_NEWUSER); - } - - c->pasta_userns_fd = -1; - - /* If we run in foreground, we have no chance to actually move to a new - * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody - * ever gets around seccomp profiles -- there's no harm in passing it. - */ - if (!c->foreground || c->mode == MODE_PASST) - flags |= CLONE_NEWPID; - - if (unshare(flags)) { - perror("unshare"); - return -errno; - } - - if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) { - perror("mount /"); - return -errno; - } - - if (mount("", TMPDIR, "tmpfs", - MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, - "nr_inodes=2,nr_blocks=0")) { - perror("mount tmpfs"); - return -errno; - } - - if (chdir(TMPDIR)) { - perror("chdir"); - return -errno; - } - - if (syscall(SYS_pivot_root, ".", ".")) { - perror("pivot_root"); - return -errno; - } - - if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) { - perror("umount2"); - return -errno; - } - - drop_caps(); /* Relative to the new user namespace this time. */ - - return 0; -} - /** * exit_handler() - Signal handler for SIGQUIT and SIGTERM * @unused: Unused, handler deals with SIGQUIT and SIGTERM only diff --git a/pasta.c b/pasta.c index a844af2..0bdb655 100644 --- a/pasta.c +++ b/pasta.c @@ -40,6 +40,7 @@ #include "util.h" #include "passt.h" +#include "isolation.h" #include "netlink.h" /* PID of child, in case we created a namespace */ diff --git a/util.c b/util.c index 654410f..f709838 100644 --- a/util.c +++ b/util.c @@ -13,30 +13,17 @@ */ #include <sched.h> -#include <stdio.h> -#include <stdint.h> -#include <stddef.h> #include <stdlib.h> #include <unistd.h> #include <arpa/inet.h> #include <net/ethernet.h> -#include <net/if.h> -#include <netinet/tcp.h> -#include <netinet/udp.h> #include <sys/epoll.h> -#include <sys/prctl.h> -#include <sys/types.h> -#include <sys/stat.h> #include <fcntl.h> #include <syslog.h> #include <stdarg.h> #include <string.h> #include <time.h> #include <errno.h> -#include <pwd.h> -#include <grp.h> - -#include <linux/capability.h> #include "util.h" #include "passt.h" @@ -467,42 +454,6 @@ void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns, } } -/** - * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SERVICE - */ -void drop_caps(void) -{ - int i; - - for (i = 0; i < 64; i++) { - if (i == CAP_NET_BIND_SERVICE) - continue; - - prctl(PR_CAPBSET_DROP, i, 0, 0, 0); - } -} - -/** - * drop_root() - Switch to given UID and GID - */ -void drop_root(uid_t uid, gid_t gid) -{ - if (setgroups(0, NULL)) { - /* If we don't start with CAP_SETGID, this will EPERM */ - if (errno != EPERM) { - err("Can't drop supplementary groups: %s", - strerror(errno)); - exit(EXIT_FAILURE); - } - } - - if (!setgid(gid) && !setuid(uid)) - return; - - err("Can't change user/group, exiting"); - exit(EXIT_FAILURE); -} - /** * ns_enter() - Enter configured user (unless already joined) and network ns * @c: Execution context diff --git a/util.h b/util.h index 9626cb5..1003303 100644 --- a/util.h +++ b/util.h @@ -233,8 +233,6 @@ int bitmap_isset(const uint8_t *map, int bit); char *line_read(char *buf, size_t len, int fd); void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns, uint8_t *map, uint8_t *exclude); -void drop_caps(void); -void drop_root(uid_t uid, gid_t gid); int ns_enter(const struct ctx *c); void write_pidfile(int fd, pid_t pid); int __daemon(int pidfile_fd, int devnull_fd); -- 2.37.3