From 55087abd8dc9802cf68cade776fe612a3f19f6a1 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 16 Feb 2016 19:23:53 -0500 Subject: libsandbox: use ptrace on apps that interpose their own allocator If an app installs its own memory allocator by overriding the internal glibc symbols, then we can easily hit a loop that cannot be broken: the dlsym functions can attempt to allocate memory, and sandbox relies on them to find the "real" functions. So when someone calls a symbol that the sandbox protects, we call dlsym, and that calls malloc, which calls back into the app, and their allocator might use another symbol such as open ... which is protected by the sandbox. So we hit the loop like: -> open -> libsandbox:open -> dlsym -> malloc -> open -> libsandbox:open -> dlsym -> malloc -> ... Change the exec checking logic to scan the ELF instead. If it exports these glibc symbols, then we have to assume it can trigger a loop, so scrub the sandbox environment to prevent us from being loaded. Then we use the out-of-process tracer (i.e. ptrace). This should generally be as robust anyways ... if it's not, that's a bug we want to fix as this is the same code used for static apps. URL: http://crbug.com/586444 Reported-by: Ryo Hashimoto Signed-off-by: Mike Frysinger --- libsandbox/libsandbox.c | 72 ++++++++++------- libsandbox/libsandbox.h | 2 +- libsandbox/wrapper-funcs/__wrapper_exec.c | 126 ++++++++++++++++++++++++++---- tests/Makefile.am | 3 + tests/malloc-2.sh | 4 + tests/malloc.at | 1 + tests/malloc_hooked_tst.c | 44 +++++++++++ 7 files changed, 210 insertions(+), 42 deletions(-) create mode 100755 tests/malloc-2.sh create mode 100644 tests/malloc_hooked_tst.c diff --git a/libsandbox/libsandbox.c b/libsandbox/libsandbox.c index 2bcff95..7555862 100644 --- a/libsandbox/libsandbox.c +++ b/libsandbox/libsandbox.c @@ -1137,7 +1137,7 @@ typedef struct { * XXX: Might be much nicer if we could serialize these vars behind the back of * the program. Might be hard to handle LD_PRELOAD though ... */ -char **sb_check_envp(char **envp, size_t *mod_cnt) +char **sb_check_envp(char **envp, size_t *mod_cnt, bool insert) { char **my_env; char *entry; @@ -1194,7 +1194,8 @@ char **sb_check_envp(char **envp, size_t *mod_cnt) } /* If we found everything, there's nothing to do! */ - if (num_vars == found_var_cnt) + if ((insert && num_vars == found_var_cnt) || + (!insert && found_var_cnt == 0)) /* Use the user's envp */ return envp; @@ -1217,33 +1218,50 @@ char **sb_check_envp(char **envp, size_t *mod_cnt) } my_env = NULL; - if (mod_cnt) { - /* Count directly due to variability with LD_PRELOAD and the value - * logic below. Getting out of sync can mean memory corruption. */ - *mod_cnt = 0; - if (unlikely(merge_ld_preload)) { - str_list_add_item(my_env, ld_preload, error); - (*mod_cnt)++; - } - for (i = 0; i < num_vars; ++i) { - if (found_vars[i] || !vars[i].value) - continue; - str_list_add_item_env(my_env, vars[i].name, vars[i].value, error); - (*mod_cnt)++; - } - - str_list_for_each_item(envp, entry, count) { - if (unlikely(merge_ld_preload && is_env_var(entry, vars[0].name, vars[0].len))) - continue; - str_list_add_item(my_env, entry, error); + if (!insert) { + if (mod_cnt) { + str_list_for_each_item(envp, entry, count) { + for (i = 0; i < num_vars; ++i) + if (is_env_var(entry, vars[i].name, vars[i].len)) { + (*mod_cnt)++; + goto skip; + } + str_list_add_item(my_env, entry, error); + skip: ; + } + } else { + for (i = 0; i < num_vars; ++i) + unsetenv(vars[i].name); } } else { - if (unlikely(merge_ld_preload)) - putenv(ld_preload); - for (i = 0; i < num_vars; ++i) { - if (found_vars[i] || !vars[i].value) - continue; - setenv(vars[i].name, vars[i].value, 1); + if (mod_cnt) { + /* Count directly due to variability with LD_PRELOAD and the value + * logic below. Getting out of sync can mean memory corruption. */ + *mod_cnt = 0; + if (unlikely(merge_ld_preload)) { + str_list_add_item(my_env, ld_preload, error); + (*mod_cnt)++; + } + for (i = 0; i < num_vars; ++i) { + if (found_vars[i] || !vars[i].value) + continue; + str_list_add_item_env(my_env, vars[i].name, vars[i].value, error); + (*mod_cnt)++; + } + + str_list_for_each_item(envp, entry, count) { + if (unlikely(merge_ld_preload && is_env_var(entry, vars[0].name, vars[0].len))) + continue; + str_list_add_item(my_env, entry, error); + } + } else { + if (unlikely(merge_ld_preload)) + putenv(ld_preload); + for (i = 0; i < num_vars; ++i) { + if (found_vars[i] || !vars[i].value) + continue; + setenv(vars[i].name, vars[i].value, 1); + } } } diff --git a/libsandbox/libsandbox.h b/libsandbox/libsandbox.h index 596084d..63882e7 100644 --- a/libsandbox/libsandbox.h +++ b/libsandbox/libsandbox.h @@ -56,7 +56,7 @@ void *get_dlsym(const char *symname, const char *symver); extern char sandbox_lib[SB_PATH_MAX]; extern bool sandbox_on; -char **sb_check_envp(char **envp, size_t *mod_cnt); +char **sb_check_envp(char **envp, size_t *mod_cnt, bool insert); void sb_cleanup_envp(char **envp, size_t mod_cnt); extern pid_t trace_pid; diff --git a/libsandbox/wrapper-funcs/__wrapper_exec.c b/libsandbox/wrapper-funcs/__wrapper_exec.c index 7107c21..f7f51ab 100644 --- a/libsandbox/wrapper-funcs/__wrapper_exec.c +++ b/libsandbox/wrapper-funcs/__wrapper_exec.c @@ -20,17 +20,20 @@ static WRAPPER_RET_TYPE (*WRAPPER_TRUE_NAME)(WRAPPER_ARGS_PROTO) = NULL; #ifndef SB_EXEC_COMMON #define SB_EXEC_COMMON -/* Check to see if this a static ELF and if so, protect using trace mechanisms */ -static void sb_check_exec(const char *filename, char *const argv[]) +/* Check to see if we can run this program in-process. If not, try to fall back + * tracing it out-of-process via some trace mechanisms (e.g. ptrace). + */ +static bool sb_check_exec(const char *filename, char *const argv[]) { int fd; unsigned char *elf; struct stat st; bool do_trace = false; + bool run_in_process = true; - fd = open(filename, O_RDONLY|O_CLOEXEC); + fd = sb_unwrapped_open_DEFAULT(filename, O_RDONLY|O_CLOEXEC, 0); if (fd == -1) - return; + return true; if (fstat(fd, &st)) goto out_fd; if (st.st_size < sizeof(Elf64_Ehdr)) @@ -57,19 +60,110 @@ static void sb_check_exec(const char *filename, char *const argv[]) * gains root just to preload libsandbox.so. That unfortunately * could easily open up people to root vulns. */ - if (getuid() == 0 || !(st.st_mode & (S_ISUID | S_ISGID))) { + if (st.st_mode & (S_ISUID | S_ISGID)) + if (getuid() != 0) + run_in_process = false; + + /* We also need to ptrace programs that interpose their own allocator. + * http://crbug.com/586444 + */ + if (run_in_process) { + static const char * const libc_alloc_syms[] = { + "__libc_calloc", + "__libc_free", + "__libc_malloc", + "__libc_realloc", + "__malloc_hook", + "__realloc_hook", + "__free_hook", + "__memalign_hook", + "__malloc_initialize_hook", + }; #define PARSE_ELF(n) \ ({ \ Elf##n##_Ehdr *ehdr = (void *)elf; \ Elf##n##_Phdr *phdr = (void *)(elf + ehdr->e_phoff); \ - uint16_t p; \ - if (st.st_size < sizeof(*ehdr)) \ - goto out_mmap; \ + Elf##n##_Addr vaddr, filesz, vsym = 0, vstr = 0; \ + Elf##n##_Off offset, symoff = 0, stroff = 0; \ + Elf##n##_Dyn *dyn; \ + Elf##n##_Sym *sym; \ + uint##n##_t ent_size = 0, str_size = 0; \ + bool dynamic = false; \ + size_t i; \ + \ if (st.st_size < ehdr->e_phoff + ehdr->e_phentsize * ehdr->e_phnum) \ goto out_mmap; \ - for (p = 0; p < ehdr->e_phnum; ++p) \ - if (phdr[p].p_type == PT_INTERP) \ - goto done; \ + \ + /* First gather the tags we care about. */ \ + for (i = 0; i < ehdr->e_phnum; ++i) { \ + switch (phdr[i].p_type) { \ + case PT_INTERP: dynamic = true; break; \ + case PT_DYNAMIC: \ + dyn = (void *)(elf + phdr[i].p_offset); \ + while (dyn->d_tag != DT_NULL) { \ + switch (dyn->d_tag) { \ + case DT_SYMTAB: vsym = dyn->d_un.d_val; break; \ + case DT_SYMENT: ent_size = dyn->d_un.d_val; break; \ + case DT_STRTAB: vstr = dyn->d_un.d_val; break; \ + case DT_STRSZ: str_size = dyn->d_un.d_val; break; \ + } \ + ++dyn; \ + } \ + break; \ + } \ + } \ + \ + if (dynamic && vsym && ent_size && vstr && str_size) { \ + /* Figure out where in the file these tables live. */ \ + for (i = 0; i < ehdr->e_phnum; ++i) { \ + vaddr = phdr[i].p_vaddr; \ + filesz = phdr[i].p_filesz; \ + offset = phdr[i].p_offset; \ + if (vsym >= vaddr && vsym < vaddr + filesz) \ + symoff = offset + (vsym - vaddr); \ + if (vstr >= vaddr && vstr < vaddr + filesz) \ + stroff = offset + (vstr - vaddr); \ + } \ + \ + /* Finally walk the symbol table. This should generally be fast as \ + * we only look at exported symbols, and the vast majority of exes \ + * out there do not export any symbols at all. \ + */ \ + if (symoff && stroff) { \ + sym = (void *)(elf + symoff); \ + /* Nowhere is the # of symbols recorded, or the size of the symbol \ + * table. Instead, we do what glibc does: assume that the string \ + * table always follows the symbol table. This seems like a poor \ + * assumption to make, but glibc has gotten by this long. We could \ + * rely on DT_HASH and walking all the buckets to find the largest \ + * symbol index, but that's also a bit hacky. \ + * \ + * We don't sanity check the ranges here as you aren't executing \ + * corrupt programs in the sandbox. \ + */ \ + for (i = 0; i < (vstr - vsym) / ent_size; ++i) { \ + char *symname = (void *)(elf + stroff + sym->st_name); \ + if (ELF##n##_ST_VISIBILITY(sym->st_other) == STV_DEFAULT && \ + sym->st_shndx != SHN_UNDEF && sym->st_shndx < SHN_LORESERVE && \ + sym->st_name && \ + /* Minor optimization to avoid strcmp. */ \ + symname[0] == '_' && symname[1] == '_') { \ + /* Blacklist internal C library symbols. */ \ + size_t j; \ + for (j = 0; j < ARRAY_SIZE(libc_alloc_syms); ++j) \ + if (!strcmp(symname, libc_alloc_syms[j])) { \ + run_in_process = false; \ + goto use_trace; \ + } \ + } \ + ++sym; \ + } \ + } \ + \ + } \ + \ + if (dynamic) \ + goto done; \ }) if (elf[EI_CLASS] == ELFCLASS32) PARSE_ELF(32); @@ -78,6 +172,7 @@ static void sb_check_exec(const char *filename, char *const argv[]) #undef PARSE_ELF } + use_trace: do_trace = trace_possible(filename, argv, elf); /* Now that we're done with stuff, clean up before forking */ @@ -90,6 +185,8 @@ static void sb_check_exec(const char *filename, char *const argv[]) if (do_trace) trace_main(filename, argv); + + return run_in_process; } #endif @@ -107,6 +204,7 @@ WRAPPER_RET_TYPE SB_HIDDEN_FUNC(WRAPPER_NAME)(WRAPPER_ARGS_PROTO_FULL) WRAPPER_RET_TYPE WRAPPER_NAME(WRAPPER_ARGS_PROTO) { WRAPPER_RET_TYPE result = WRAPPER_RET_DEFAULT; + bool run_in_process = true; /* The C library may implement some exec funcs by calling other * exec funcs. So we might get a little sandbox recursion going @@ -151,15 +249,15 @@ WRAPPER_RET_TYPE WRAPPER_NAME(WRAPPER_ARGS_PROTO) if (!SB_SAFE(check_path)) goto done; - sb_check_exec(check_path, argv); + run_in_process = sb_check_exec(check_path, argv); } #endif #ifdef EXEC_MY_ENV size_t mod_cnt; - char **my_env = sb_check_envp((char **)envp, &mod_cnt); + char **my_env = sb_check_envp((char **)envp, &mod_cnt, run_in_process); #else - sb_check_envp(environ, NULL); + sb_check_envp(environ, NULL, run_in_process); #endif restore_errno(); diff --git a/tests/Makefile.am b/tests/Makefile.am index 943ce3b..d98898f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -73,6 +73,7 @@ check_PROGRAMS = \ \ getcwd-gnulib_tst \ libsigsegv_tst \ + malloc_hooked_tst \ malloc_mmap_tst \ pipe-fork_tst \ pipe-fork_static_tst \ @@ -92,6 +93,8 @@ AM_LDFLAGS = `expr $@ : .*_static >/dev/null && echo -all-static` sb_printf_tst_CFLAGS = -I$(top_srcdir)/libsbutil -I$(top_srcdir)/libsbutil/include sb_printf_tst_LDADD = $(top_builddir)/libsbutil/libsbutil.la +malloc_hooked_tst_LDFLAGS = $(AM_LDFLAGS) -pthread + if HAVE_LIBSIGSEGV libsigsegv_tst_LDADD = -lsigsegv endif diff --git a/tests/malloc-2.sh b/tests/malloc-2.sh new file mode 100755 index 0000000..e1cf297 --- /dev/null +++ b/tests/malloc-2.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# Since the malloc binary is in the target ABI, make sure the exec is +# launched from the same ABI so the same libsandbox.so is used. +timeout -s KILL 10 execvp-0 malloc_hooked_tst malloc_hooked_tst diff --git a/tests/malloc.at b/tests/malloc.at index 081d7d2..d364b4b 100644 --- a/tests/malloc.at +++ b/tests/malloc.at @@ -1 +1,2 @@ SB_CHECK(1) +SB_CHECK(2) diff --git a/tests/malloc_hooked_tst.c b/tests/malloc_hooked_tst.c new file mode 100644 index 0000000..18737fe --- /dev/null +++ b/tests/malloc_hooked_tst.c @@ -0,0 +1,44 @@ +/* Make sure programs that override malloc don't mess us up: + * + * libsandbox's __attribute__((constructor)) libsb_init -> + * libsandbox's malloc() -> + * dlsym("mmap") -> + * glibc's libdl calls malloc -> + * tcmalloc's internal code calls open -> + * libsandbox's open wrapper is hit -> + * libsandbox tries to initialize itself (since it never finished originally) -> + * libsandbox's malloc() -> + * dlsym() -> deadlock + * http://crbug.com/586444 + */ + +#include "headers.h" + +static void *malloc_hook(size_t size, const void *caller) +{ + int urandom_fd = open("/dev/urandom", O_RDONLY); + close(urandom_fd); + return NULL; +} + +void *(*__malloc_hook)(size_t, const void *) = &malloc_hook; + +static void *thread_start(void *arg) +{ + return arg; +} + +int main(int argc, char *argv[]) +{ + /* Make sure we reference some pthread symbols, although we don't + * really want to execute it -- our malloc is limited. */ + if (argc < 0) { + pthread_t tid; + pthread_create(&tid, NULL, thread_start, NULL); + } + + /* Trigger malloc! */ + if (malloc(100)) {} + + return 0; +} -- cgit v1.2.3-18-g5258