aboutsummaryrefslogtreecommitdiff
blob: 5be42f60bf59b396a632cdc611525cacf4c8e0cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
/*
 * Initialize various namespaces
 *
 * Copyright 1999-2015 Gentoo Foundation
 * Licensed under the GPL-2
 */

#include "headers.h"
#include "sbutil.h"
#include "sandbox.h"

#ifdef __linux__

#include <net/if.h>

#ifndef HAVE_UNSHARE
# ifdef __NR_unshare
#  define unshare(x) syscall(__NR_unshare, x)
# else
#  define unshare(x) -1
# endif
#endif

#define xmount(...) sb_assert(mount(__VA_ARGS__) == 0)
#define xmkdir(...) sb_assert(mkdir(__VA_ARGS__) == 0)
#define xchmod(...) sb_assert(chmod(__VA_ARGS__) == 0)
#define xsymlink(...) sb_assert(symlink(__VA_ARGS__) == 0)

#define xasprintf(fmt, ...) \
({ \
	int _ret = asprintf(fmt, __VA_ARGS__); \
	if (_ret == 0) \
		sb_perr("asprintf(%s) failed", #fmt); \
	_ret; \
})
#define xfopen(path, ...) \
({ \
	FILE *_ret = fopen(path, __VA_ARGS__); \
	if (_ret == 0) \
		sb_perr("fopen(%s) failed", #path); \
	_ret; \
})

static void ns_user_switch(int uid, int gid, int nuid, int ngid)
{
#ifdef CLONE_NEWUSER
	FILE *fp;
	char *map;

	if (uid == nuid || unshare(CLONE_NEWUSER))
		return;

	fp = xfopen("/proc/self/uid_map", "we");
	xasprintf(&map, "%i %i 1", nuid, uid);
	fputs(map, fp);
	fclose(fp);
	free(map);

	fp = xfopen("/proc/self/setgroups", "we");
	fputs("deny", fp);
	fclose(fp);

	fp = xfopen("/proc/self/gid_map", "we");
	xasprintf(&map, "%i %i 1\n", ngid, gid);
	fputs(map, fp);
	fclose(fp);
	free(map);
#endif
}

static void ns_net_setup(void)
{
#ifdef CLONE_NEWNET
	if (unshare(CLONE_NEWNET))
		return;

	int sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0);
	struct ifreq ifr;

	strcpy(ifr.ifr_name, "lo");
	if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0)
		sb_perr("ioctl(SIOCGIFFLAGS, lo) failed");
	strcpy(ifr.ifr_name, "lo");
	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
		sb_perr("ioctl(SIOCSIFFLAGS, lo) failed");
#endif
}

/* Create a nice empty /dev for playing in. */
static void ns_mount_setup(void)
{
#ifdef CLONE_NEWNS
	/* Create a new mount namespace. */
	if (unshare(CLONE_NEWNS))
		return;

	/* Mark the whole tree as private so we don't mess up the parent ns. */
	if (mount("none", "/", NULL, MS_PRIVATE | MS_REC, NULL))
		return;

	/* Create a unique /tmp dir for everyone. */
	if (mount("/tmp", "/tmp", "tmpfs", MS_NOSUID | MS_NODEV | MS_RELATIME, NULL))
		sb_ewarn("could not mount /tmp");

	/* Mount an empty dir inside of /dev which we'll populate with bind mounts
	 * to the existing files in /dev.  We can't just mknod ourselves because
	 * the kernel will deny those calls when we aren't actually root.  We pick
	 * the /dev/shm dir as it should generally exist and we don't care about
	 * binding its contents. */
	if (mount("sandbox-dev", "/dev/shm", "tmpfs", MS_NOSUID | MS_NOEXEC | MS_RELATIME, "mode=0755"))
		return;

	/* Now map in all the files/dirs we do want to expose. */
	int fd;
#define bind_file(node) \
	fd = open("/dev/shm/" node, O_CREAT, 0); \
	sb_assert(fd != -1); \
	close(fd); \
	xmount("/dev/" node, "/dev/shm/" node, NULL, MS_BIND, NULL)
#define bind_dir(node) \
	xmkdir("/dev/shm/" node, 0); \
	xmount("/dev/" node, "/dev/shm/" node, NULL, MS_BIND, NULL)

	bind_file("full");
	bind_file("null");
	bind_file("ptmx");
	bind_file("tty");
	bind_file("urandom");
	bind_file("zero");
	bind_dir("pts");

	xmkdir("/dev/shm/shm", 01777);
	xchmod("/dev/shm/shm", 01777);

	xsymlink("/proc/self/fd", "/dev/shm/fd");
	xsymlink("fd/0", "/dev/shm/stdin");
	xsymlink("fd/1", "/dev/shm/stdout");
	xsymlink("fd/2", "/dev/shm/stderr");

	xchmod("/dev/shm", 0555);

	/* Now that the new root looks good, move it to /dev. */
	xmount("/dev/shm", "/dev", NULL, MS_MOVE, NULL);
#endif
}

static pid_t ns_pid_setup(void)
{
	pid_t pid;

	if (unshare(CLONE_NEWPID) == 0) {
		/* Create a child in the new pid ns. */
		pid = fork();
		if (pid == 0) {
			/* Create a new mount namespace for the child. */
			sb_assert(unshare(CLONE_NEWNS) == 0);
			xmount("none", "/proc", NULL, MS_PRIVATE | MS_REC, NULL);
			xmount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, NULL);
		}
	} else {
		/* At least hide other procs. */
		if (umount2("/proc", MNT_FORCE | MNT_DETACH) == 0)
			xmount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "hidepid=2");
		pid = fork();
	}

	return pid;
}

pid_t setup_namespaces(void)
{
	/* We need to unshare namespaces independently anyways as users can
	 * configure kernels to have only some enabled, and if we try to do
	 * them all at once, we'll get EINVAL. */

	int uid = getuid();
	int gid = getgid();
	pid_t pid;

	/* This comes first so we can do the others as non-root. */
	if (opt_use_ns_user)
		ns_user_switch(uid, gid, 0, 0);

#ifdef CLONE_NEWIPC
	if (opt_use_ns_ipc)
		unshare(CLONE_NEWIPC);
#endif
#ifdef CLONE_SYSVSEM
	if (opt_use_ns_sysv)
		unshare(CLONE_SYSVSEM);
#endif

#ifdef CLONE_NEWUTS
	if (opt_use_ns_uts && unshare(CLONE_NEWUTS) == 0) {
		const char name[] = "gentoo-sandbox";
		if (sethostname(name, sizeof(name) - 1))
			/* silence gcc warning */;
	}
#endif

	if (opt_use_ns_net)
		ns_net_setup();

	if (opt_use_ns_mnt)
		ns_mount_setup();

	if (opt_use_ns_mnt && opt_use_ns_pid)
		pid = ns_pid_setup();
	else
		pid = fork();

	if (opt_use_ns_user)
		ns_user_switch(0, 0, uid, gid);

	return pid;
}

#endif