diff -NurpP --minimal linux-2.6.19.1/Documentation/vserver/debug.txt linux-2.6.19.1-vs2.2.0-rc6/Documentation/vserver/debug.txt --- linux-2.6.19.1/Documentation/vserver/debug.txt 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/Documentation/vserver/debug.txt 2006-11-08 04:57:48 +0100 @@ -0,0 +1,154 @@ + +debug_cvirt: + + 2 4 "vx_map_tgid: %p/%llx: %d -> %d" + "vx_rmap_tgid: %p/%llx: %d -> %d" + +debug_dlim: + + 0 1 "ALLOC (%p,#%d)%c inode (%d)" + "FREE (%p,#%d)%c inode" + 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)" + "FREE (%p,#%d)%c %lld bytes" + 2 4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]" + 3 8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d" + "ext3_has_free_blocks(%p): free=%lu, root=%lu" + "rcu_free_dl_info(%p)" + 4 10 "alloc_dl_info(%p,%d) = %p" + "dealloc_dl_info(%p)" + "get_dl_info(%p[#%d.%d])" + "put_dl_info(%p[#%d.%d])" + 5 20 "alloc_dl_info(%p,%d)*" + 6 40 "__hash_dl_info: %p[#%d]" + "__unhash_dl_info: %p[#%d]" + 7 80 "locate_dl_info(%p,#%d) = %p" + +debug_misc: + + 0 1 "destroy_dqhash: %p [#0x%08x] c=%d" + "new_dqhash: %p [#0x%08x]" + "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]" + "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]" + "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]" + "vroot_get_real_bdev not set" + 1 2 "cow_break_link(»%s«)" + "temp copy »%s«" + 2 4 "dentry_open(new): %p" + "dentry_open(old): %p" + "lookup_create(new): %p" + "old path »%s«" + "path_lookup(old): %d" + "vfs_create(new): %d" + "vfs_rename: %d" + "vfs_sendfile: %d" + 3 8 "fput(new_file=%p[#%d])" + "fput(old_file=%p[#%d])" + 4 10 "vx_info_kill(%p[#%d],%d,%d) = %d" + "vx_info_kill(%p[#%d],%d,%d)*" + 5 20 "vs_reboot(%p[#%d],%d)" + 6 40 "dropping task %p[#%u,%u] for %p[#%u,%u]" + +debug_net: + + 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d" + 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d" + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d" + 4 10 "ip_route_connect(%p) %p,%p;%lx" + 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx" + 6 40 "sk,egf: %p [#%d] (from %d)" + "sk,egn: %p [#%d] (from %d)" + "sk,req: %p [#%d] (from %d)" + "sk: %p [#%d] (from %d)" + "tw: %p [#%d] (from %d)" + 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d" + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d" + +debug_nid: + + 0 1 "__lookup_nx_info(#%u): %p[#%u]" + "alloc_nx_info(%d) = %p" + "create_nx_info(%d) (dynamic rejected)" + "create_nx_info(%d) = %p (already there)" + "create_nx_info(%d) = %p (new)" + "dealloc_nx_info(%p)" + 1 2 "alloc_nx_info(%d)*" + "create_nx_info(%d)*" + 2 4 "get_nx_info(%p[#%d.%d])" + "put_nx_info(%p[#%d.%d])" + 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p" + "clr_nx_info(%p[#%d.%d])" + "init_nx_info(%p[#%d.%d])" + "release_nx_info(%p[#%d.%d.%d]) %p" + "set_nx_info(%p[#%d.%d])" + 4 10 "__hash_nx_info: %p[#%d]" + "__nx_dynamic_id: [#%d]" + "__unhash_nx_info: %p[#%d]" + 5 20 "moved task %p into nxi:%p[#%d]" + "nx_migrate_task(%p,%p[#%d.%d.%d])" + "task_get_nx_info(%p)" + 6 40 "nx_clear_persistent(%p[#%d])" + +debug_quota: + + 0 1 "quota_sync_dqh(%p,%d) discard inode %p" + 1 2 "quota_sync_dqh(%p,%d)" + "sync_dquots(%p,%d)" + "sync_dquots_dqh(%p,%d)" + 3 8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)" + +debug_switch: + + 0 1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]" + 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]" + 4 10 "%s: (%s %s) returned %s with %d" + +debug_tag: + + 7 80 "dx_parse_tag(»%s«): %d:#%d" + "dx_propagate_tag(%p[#%lu.%d]): %d,%d" + +debug_xid: + + 0 1 "__lookup_vx_info(#%u): %p[#%u]" + "alloc_vx_info(%d) = %p" + "alloc_vx_info(%d)*" + "create_vx_info(%d) (dynamic rejected)" + "create_vx_info(%d) = %p (already there)" + "create_vx_info(%d) = %p (new)" + "dealloc_vx_info(%p)" + "loc_vx_info(%d) = %p (found)" + "loc_vx_info(%d) = %p (new)" + "loc_vx_info(%d) = %p (not available)" + 1 2 "create_vx_info(%d)*" + "loc_vx_info(%d)*" + 2 4 "get_vx_info(%p[#%d.%d])" + "put_vx_info(%p[#%d.%d])" + 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p" + "clr_vx_info(%p[#%d.%d])" + "init_vx_info(%p[#%d.%d])" + "release_vx_info(%p[#%d.%d.%d]) %p" + "set_vx_info(%p[#%d.%d])" + "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]" + 4 10 "__hash_vx_info: %p[#%d]" + "__unhash_vx_info: %p[#%d]" + "__vx_dynamic_id: [#%d]" + 5 20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]" + "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]" + "moved task %p into vxi:%p[#%d]" + "task_get_vx_info(%p)" + "vx_migrate_task(%p,%p[#%d.%d])" + 6 40 "vx_clear_persistent(%p[#%d])" + "vx_exit_init(%p[#%d],%p[#%d,%d,%d])" + "vx_set_init(%p[#%d],%p[#%d,%d,%d])" + "vx_set_persistent(%p[#%d])" + "vx_set_reaper(%p[#%d],%p[#%d,%d])" + + +debug_limit: + + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s" + "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d" + + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s" + "vx_acc_pages[%5d,%s,%2d]: %5d += %5d" + "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d" diff -NurpP --minimal linux-2.6.19.1/Makefile linux-2.6.19.1-vs2.2.0-rc6/Makefile --- linux-2.6.19.1/Makefile 2006-12-13 07:46:36 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/Makefile 2006-12-21 21:50:14 +0100 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 19 -EXTRAVERSION = +EXTRAVERSION = -vs2.2.0-rc6-gentoo NAME=Avast! A bilge rat! # *DOCUMENTATION* diff -NurpP --minimal linux-2.6.19.1/arch/alpha/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/Kconfig --- linux-2.6.19.1/arch/alpha/Kconfig 2006-11-30 21:18:23 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/Kconfig 2006-11-08 04:57:40 +0100 @@ -632,6 +632,8 @@ source "arch/alpha/oprofile/Kconfig" source "arch/alpha/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/alpha/kernel/asm-offsets.c linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/asm-offsets.c --- linux-2.6.19.1/arch/alpha/kernel/asm-offsets.c 2006-02-15 13:54:10 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/asm-offsets.c 2006-11-08 04:57:50 +0100 @@ -36,6 +36,7 @@ void foo(void) DEFINE(PT_PTRACED, PT_PTRACED); DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(CLONE_KTHREAD, CLONE_KTHREAD); DEFINE(SIGCHLD, SIGCHLD); BLANK(); diff -NurpP --minimal linux-2.6.19.1/arch/alpha/kernel/entry.S linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/entry.S --- linux-2.6.19.1/arch/alpha/kernel/entry.S 2006-11-30 21:18:23 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/entry.S 2006-11-08 04:57:52 +0100 @@ -644,7 +644,7 @@ kernel_thread: stq $2, 152($sp) /* HAE */ /* Shuffle FLAGS to the front; add CLONE_VM. */ - ldi $1, CLONE_VM|CLONE_UNTRACED + ldi $1, CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD; or $18, $1, $16 bsr $26, sys_clone @@ -873,24 +873,15 @@ sys_getxgid: .globl sys_getxpid .ent sys_getxpid sys_getxpid: + lda $sp, -16($sp) + stq $26, 0($sp) .prologue 0 - ldq $2, TI_TASK($8) - /* See linux/kernel/timer.c sys_getppid for discussion - about this loop. */ - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - ldl $0, TASK_TGID($2) -1: ldl $1, TASK_TGID($4) -#ifdef CONFIG_SMP - mov $4, $5 - mb - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - cmpeq $4, $5, $5 - beq $5, 1b -#endif - stq $1, 80($sp) + lda $16, 96($sp) + jsr $26, do_getxpid + ldq $26, 0($sp) + + lda $sp, 16($sp) ret .end sys_getxpid diff -NurpP --minimal linux-2.6.19.1/arch/alpha/kernel/osf_sys.c linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/osf_sys.c --- linux-2.6.19.1/arch/alpha/kernel/osf_sys.c 2006-11-30 21:18:23 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/osf_sys.c 2006-12-02 01:37:05 +0100 @@ -885,7 +885,7 @@ osf_gettimeofday(struct timeval32 __user { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19.1/arch/alpha/kernel/ptrace.c linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/ptrace.c --- linux-2.6.19.1/arch/alpha/kernel/ptrace.c 2006-04-09 13:49:39 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -283,6 +284,11 @@ do_sys_ptrace(long request, long pid, lo goto out_notsk; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) { + ret = -EPERM; + goto out; + } + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -NurpP --minimal linux-2.6.19.1/arch/alpha/kernel/semaphore.c linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/semaphore.c --- linux-2.6.19.1/arch/alpha/kernel/semaphore.c 2004-08-14 12:55:32 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/semaphore.c 2006-11-08 04:57:42 +0100 @@ -68,8 +68,8 @@ __down_failed(struct semaphore *sem) DECLARE_WAITQUEUE(wait, tsk); #ifdef CONFIG_DEBUG_SEMAPHORE - printk("%s(%d): down failed(%p)\n", - tsk->comm, tsk->pid, sem); + printk("%s(%d:#%u): down failed(%p)\n", + tsk->comm, tsk->pid, tsk->xid, sem); #endif tsk->state = TASK_UNINTERRUPTIBLE; @@ -97,8 +97,8 @@ __down_failed(struct semaphore *sem) wake_up(&sem->wait); #ifdef CONFIG_DEBUG_SEMAPHORE - printk("%s(%d): down acquired(%p)\n", - tsk->comm, tsk->pid, sem); + printk("%s(%d:#%u): down acquired(%p)\n", + tsk->comm, tsk->pid, tsk->xid, sem); #endif } @@ -110,8 +110,8 @@ __down_failed_interruptible(struct semap long ret = 0; #ifdef CONFIG_DEBUG_SEMAPHORE - printk("%s(%d): down failed(%p)\n", - tsk->comm, tsk->pid, sem); + printk("%s(%d:#%u): down failed(%p)\n", + tsk->comm, tsk->pid, tsk->xid, sem); #endif tsk->state = TASK_INTERRUPTIBLE; diff -NurpP --minimal linux-2.6.19.1/arch/alpha/kernel/systbls.S linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/systbls.S --- linux-2.6.19.1/arch/alpha/kernel/systbls.S 2006-11-30 21:18:23 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/systbls.S 2006-11-08 04:57:41 +0100 @@ -446,7 +446,7 @@ sys_call_table: .quad sys_stat64 /* 425 */ .quad sys_lstat64 .quad sys_fstat64 - .quad sys_ni_syscall /* sys_vserver */ + .quad sys_vserver /* sys_vserver */ .quad sys_ni_syscall /* sys_mbind */ .quad sys_ni_syscall /* sys_get_mempolicy */ .quad sys_ni_syscall /* sys_set_mempolicy */ diff -NurpP --minimal linux-2.6.19.1/arch/alpha/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/traps.c --- linux-2.6.19.1/arch/alpha/kernel/traps.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -182,7 +182,8 @@ die_if_kernel(char * str, struct pt_regs #ifdef CONFIG_SMP printk("CPU %d ", hard_smp_processor_id()); #endif - printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err); + printk("%s(%d[#%u]): %s %ld\n", current->comm, + current->pid, current->xid, str, err); dik_show_regs(regs, r9_15); dik_show_trace((unsigned long *)(regs+1)); dik_show_code((unsigned int *)regs->pc); diff -NurpP --minimal linux-2.6.19.1/arch/alpha/mm/init.c linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/mm/init.c --- linux-2.6.19.1/arch/alpha/mm/init.c 2006-11-30 21:18:23 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/alpha/mm/init.c 2006-11-08 04:57:39 +0100 @@ -20,6 +20,7 @@ #include #include /* max_low_pfn */ #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/arch/arm/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/arm/Kconfig --- linux-2.6.19.1/arch/arm/Kconfig 2006-11-30 21:18:24 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/arm/Kconfig 2006-11-30 20:55:45 +0100 @@ -935,6 +935,8 @@ source "arch/arm/oprofile/Kconfig" source "arch/arm/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/arm/kernel/calls.S linux-2.6.19.1-vs2.2.0-rc6/arch/arm/kernel/calls.S --- linux-2.6.19.1/arch/arm/kernel/calls.S 2006-02-18 14:39:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/arm/kernel/calls.S 2006-11-08 04:57:41 +0100 @@ -322,7 +322,7 @@ /* 310 */ CALL(sys_request_key) CALL(sys_keyctl) CALL(ABI(sys_semtimedop, sys_oabi_semtimedop)) -/* vserver */ CALL(sys_ni_syscall) + CALL(sys_vserver) CALL(sys_ioprio_set) /* 315 */ CALL(sys_ioprio_get) CALL(sys_inotify_init) diff -NurpP --minimal linux-2.6.19.1/arch/arm/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/arm/kernel/process.c --- linux-2.6.19.1/arch/arm/kernel/process.c 2006-11-30 21:18:24 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/arm/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -246,7 +246,8 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); + printk("Pid: %d[#%u], comm: %20s\n", + current->pid, current->xid, current->comm); __show_regs(regs); __backtrace(); } @@ -469,7 +470,8 @@ pid_t kernel_thread(int (*fn)(void *), v regs.ARM_pc = (unsigned long)kernel_thread_helper; regs.ARM_cpsr = SVC_MODE; - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19.1/arch/arm/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/arm/kernel/traps.c --- linux-2.6.19.1/arch/arm/kernel/traps.c 2006-11-30 21:18:24 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/arm/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -205,8 +205,8 @@ static void __die(const char *str, int e printk("Internal error: %s: %x [#%d]\n", str, err, ++die_counter); print_modules(); __show_regs(regs); - printk("Process %s (pid: %d, stack limit = 0x%p)\n", - tsk->comm, tsk->pid, thread + 1); + printk("Process %s (pid: %d:#%u, stack limit = 0x%p)\n", + tsk->comm, tsk->pid, tsk->xid, thread + 1); if (!user_mode(regs) || in_interrupt()) { dump_mem("Stack: ", regs->ARM_sp, diff -NurpP --minimal linux-2.6.19.1/arch/arm26/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/arm26/Kconfig --- linux-2.6.19.1/arch/arm26/Kconfig 2006-09-20 16:57:57 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/arm26/Kconfig 2006-11-08 04:57:40 +0100 @@ -234,6 +234,8 @@ source "drivers/usb/Kconfig" source "arch/arm26/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/arm26/kernel/calls.S linux-2.6.19.1-vs2.2.0-rc6/arch/arm26/kernel/calls.S --- linux-2.6.19.1/arch/arm26/kernel/calls.S 2005-03-02 12:38:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/arm26/kernel/calls.S 2006-11-08 04:57:41 +0100 @@ -257,6 +257,11 @@ __syscall_start: .long sys_lremovexattr .long sys_fremovexattr .long sys_tkill + + .rept 313 - (. - __syscall_start) / 4 + .long sys_ni_syscall + .endr + .long sys_vserver /* 313 */ __syscall_end: .rept NR_syscalls - (__syscall_end - __syscall_start) / 4 diff -NurpP --minimal linux-2.6.19.1/arch/arm26/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/arm26/kernel/process.c --- linux-2.6.19.1/arch/arm26/kernel/process.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/arm26/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -365,7 +365,8 @@ pid_t kernel_thread(int (*fn)(void *), v regs.ARM_r3 = (unsigned long)do_exit; regs.ARM_pc = (unsigned long)kernel_thread_helper | MODE_SVC26; - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19.1/arch/arm26/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/arm26/kernel/traps.c --- linux-2.6.19.1/arch/arm26/kernel/traps.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/arm26/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -185,8 +185,9 @@ NORET_TYPE void die(const char *str, str printk("Internal error: %s: %x\n", str, err); printk("CPU: %d\n", smp_processor_id()); show_regs(regs); - printk("Process %s (pid: %d, stack limit = 0x%p)\n", - current->comm, current->pid, end_of_stack(tsk)); + printk("Process %s (pid: %d[#%u], stack limit = 0x%p)\n", + current->comm, current->pid, + current->xid, end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { __dump_stack(tsk, (unsigned long)(regs + 1)); diff -NurpP --minimal linux-2.6.19.1/arch/cris/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/cris/Kconfig --- linux-2.6.19.1/arch/cris/Kconfig 2006-09-20 16:57:57 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/cris/Kconfig 2006-11-08 04:57:40 +0100 @@ -185,6 +185,8 @@ source "drivers/usb/Kconfig" source "arch/cris/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/cris/arch-v10/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/cris/arch-v10/kernel/process.c --- linux-2.6.19.1/arch/cris/arch-v10/kernel/process.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/cris/arch-v10/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -103,7 +103,8 @@ int kernel_thread(int (*fn)(void *), voi regs.dccr = 1 << I_DCCR_BITNR; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* setup the child's kernel stack with a pt_regs and switch_stack on it. diff -NurpP --minimal linux-2.6.19.1/arch/cris/arch-v32/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/cris/arch-v32/kernel/process.c --- linux-2.6.19.1/arch/cris/arch-v32/kernel/process.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/cris/arch-v32/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -120,7 +120,8 @@ kernel_thread(int (*fn)(void *), void * regs.ccs = 1 << (I_CCS_BITNR + CCS_SHIFT); /* Create the new process. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19.1/arch/cris/kernel/irq.c linux-2.6.19.1-vs2.2.0-rc6/arch/cris/kernel/irq.c --- linux-2.6.19.1/arch/cris/kernel/irq.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/cris/kernel/irq.c 2006-11-30 18:26:05 +0100 @@ -92,6 +92,7 @@ skip: asmlinkage void do_IRQ(int irq, struct pt_regs * regs) { unsigned long sp; + irq_enter(); sp = rdsp(); if (unlikely((sp & (PAGE_SIZE - 1)) < (PAGE_SIZE/8))) { diff -NurpP --minimal linux-2.6.19.1/arch/frv/kernel/kernel_thread.S linux-2.6.19.1-vs2.2.0-rc6/arch/frv/kernel/kernel_thread.S --- linux-2.6.19.1/arch/frv/kernel/kernel_thread.S 2005-03-02 12:38:20 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/frv/kernel/kernel_thread.S 2006-11-08 04:57:50 +0100 @@ -13,6 +13,8 @@ #include #define CLONE_VM 0x00000100 /* set if VM shared between processes */ +#define CLONE_KTHREAD 0x10000000 /* kernel thread */ +#define CLONE_KT (CLONE_VM | CLONE_KTHREAD) /* kernel thread flags */ #define KERN_ERR "<3>" .section .rodata @@ -37,7 +39,7 @@ kernel_thread: # start by forking the current process, but with shared VM setlos.p #__NR_clone,gr7 ; syscall number - ori gr10,#CLONE_VM,gr8 ; first syscall arg [clone_flags] + ori gr10,#CLONE_KT,gr8 ; first syscall arg [clone_flags] sethi.p #0xe4e4,gr9 ; second syscall arg [newsp] setlo #0xe4e4,gr9 setlos.p #0,gr10 ; third syscall arg [parent_tidptr] diff -NurpP --minimal linux-2.6.19.1/arch/h8300/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/h8300/Kconfig --- linux-2.6.19.1/arch/h8300/Kconfig 2006-06-18 04:51:49 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/h8300/Kconfig 2006-11-08 04:57:40 +0100 @@ -199,6 +199,8 @@ source "fs/Kconfig" source "arch/h8300/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/h8300/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/h8300/kernel/process.c --- linux-2.6.19.1/arch/h8300/kernel/process.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/h8300/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -134,7 +134,7 @@ int kernel_thread(int (*fn)(void *), voi fs = get_fs(); set_fs (KERNEL_DS); - clone_arg = flags | CLONE_VM; + clone_arg = flags | CLONE_VM | CLONE_KTHREAD; __asm__("mov.l sp,er3\n\t" "sub.l er2,er2\n\t" "mov.l %2,er1\n\t" diff -NurpP --minimal linux-2.6.19.1/arch/i386/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/i386/Kconfig --- linux-2.6.19.1/arch/i386/Kconfig 2006-11-30 21:18:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/i386/Kconfig 2006-11-08 04:57:40 +0100 @@ -1153,6 +1153,8 @@ endmenu source "arch/i386/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/i386/kernel/irq.c linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/irq.c --- linux-2.6.19.1/arch/i386/kernel/irq.c 2006-11-30 21:18:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/irq.c 2006-11-30 18:26:37 +0100 @@ -84,7 +84,6 @@ fastcall unsigned int do_IRQ(struct pt_r } } #endif - #ifdef CONFIG_4KSTACKS curctx = (union irq_ctx *) current_thread_info(); @@ -124,7 +123,6 @@ fastcall unsigned int do_IRQ(struct pt_r } else #endif desc->handle_irq(irq, desc); - irq_exit(); set_irq_regs(old_regs); return 1; diff -NurpP --minimal linux-2.6.19.1/arch/i386/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/process.c --- linux-2.6.19.1/arch/i386/kernel/process.c 2006-11-30 21:18:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/process.c 2006-11-30 20:55:45 +0100 @@ -300,8 +300,10 @@ void show_regs(struct pt_regs * regs) unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); + printk("Pid: %d[#%u], comm: %20s\n", + current->pid, current->xid, current->comm); + printk("EIP: %04x:[<%08lx>] CPU: %d\n", + 0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); if (user_mode_vm(regs)) @@ -352,7 +354,8 @@ int kernel_thread(int (*fn)(void *), voi regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19.1/arch/i386/kernel/syscall_table.S linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/syscall_table.S --- linux-2.6.19.1/arch/i386/kernel/syscall_table.S 2006-11-30 21:18:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/syscall_table.S 2006-11-08 04:57:41 +0100 @@ -272,7 +272,7 @@ ENTRY(sys_call_table) .long sys_tgkill /* 270 */ .long sys_utimes .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ + .long sys_vserver .long sys_mbind .long sys_get_mempolicy .long sys_set_mempolicy diff -NurpP --minimal linux-2.6.19.1/arch/i386/kernel/sysenter.c linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/sysenter.c --- linux-2.6.19.1/arch/i386/kernel/sysenter.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/sysenter.c 2006-11-08 04:57:47 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -156,7 +157,7 @@ int arch_setup_additional_pages(struct l current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return = (void *)VDSO_SYM(&SYSENTER_RETURN); - mm->total_vm++; + vx_vmpages_inc(mm); up_fail: up_write(&mm->mmap_sem); return ret; diff -NurpP --minimal linux-2.6.19.1/arch/i386/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/traps.c --- linux-2.6.19.1/arch/i386/kernel/traps.c 2006-11-30 21:18:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/i386/kernel/traps.c 2006-11-30 20:55:45 +0100 @@ -54,6 +54,8 @@ #include #include +#include +#include #include "mach_traps.h" @@ -371,8 +373,8 @@ void show_registers(struct pt_regs *regs regs->esi, regs->edi, regs->ebp, esp); printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", - TASK_COMM_LEN, current->comm, current->pid, + printk(KERN_EMERG "Process %.*s (pid: %d[#%u], ti=%p task=%p task.ti=%p)", + TASK_COMM_LEN, current->comm, current->pid, current->xid, current_thread_info(), current, current->thread_info); /* * When in-kernel, we also print out the stack and code at the @@ -461,6 +463,8 @@ void die(const char * str, struct pt_reg oops_enter(); + vxh_throw_oops(); + if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); spin_lock_irqsave(&die.lock, flags); @@ -497,9 +501,9 @@ void die(const char * str, struct pt_reg if (nl) printk("\n"); if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) != - NOTIFY_STOP) { + current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { show_registers(regs); + vxh_dump_history(); /* Executive summary in case the oops scrolled away */ esp = (unsigned long) (®s->esp); savesegment(ss, ss); diff -NurpP --minimal linux-2.6.19.1/arch/ia64/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/Kconfig --- linux-2.6.19.1/arch/ia64/Kconfig 2006-11-30 21:18:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/Kconfig 2006-11-20 21:12:32 +0100 @@ -537,6 +537,8 @@ endmenu source "arch/ia64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/ia64/ia32/binfmt_elf32.c linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/ia32/binfmt_elf32.c --- linux-2.6.19.1/arch/ia64/ia32/binfmt_elf32.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/ia32/binfmt_elf32.c 2006-11-08 04:57:47 +0100 @@ -238,7 +238,8 @@ ia32_setup_arg_pages (struct linux_binpr kmem_cache_free(vm_area_cachep, mpnt); return ret; } - current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(current->mm, current->mm->total_vm - vma_pages(mpnt)); + current->mm->stack_vm = current->mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.19.1/arch/ia64/ia32/ia32_entry.S linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/ia32/ia32_entry.S --- linux-2.6.19.1/arch/ia64/ia32/ia32_entry.S 2006-06-18 04:51:55 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/ia32/ia32_entry.S 2006-11-08 04:57:41 +0100 @@ -483,7 +483,7 @@ ia32_syscall_table: data8 sys_tgkill /* 270 */ data8 compat_sys_utimes data8 sys32_fadvise64_64 - data8 sys_ni_syscall + data8 sys32_vserver data8 sys_ni_syscall data8 sys_ni_syscall /* 275 */ data8 sys_ni_syscall diff -NurpP --minimal linux-2.6.19.1/arch/ia64/ia32/sys_ia32.c linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/ia32/sys_ia32.c --- linux-2.6.19.1/arch/ia64/ia32/sys_ia32.c 2006-11-30 21:18:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/ia32/sys_ia32.c 2006-11-08 04:57:44 +0100 @@ -1182,7 +1182,7 @@ sys32_gettimeofday (struct compat_timeva { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19.1/arch/ia64/kernel/asm-offsets.c linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/asm-offsets.c --- linux-2.6.19.1/arch/ia64/kernel/asm-offsets.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/asm-offsets.c 2006-11-08 04:57:50 +0100 @@ -191,6 +191,7 @@ void foo(void) /* for assembly files which can't include sched.h: */ DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); DEFINE(IA64_CLONE_VM, CLONE_VM); + DEFINE(IA64_CLONE_KTHREAD, CLONE_KTHREAD); BLANK(); DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, diff -NurpP --minimal linux-2.6.19.1/arch/ia64/kernel/entry.S linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/entry.S --- linux-2.6.19.1/arch/ia64/kernel/entry.S 2006-11-30 21:18:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/entry.S 2006-11-08 04:57:41 +0100 @@ -1576,7 +1576,7 @@ sys_call_table: data8 sys_mq_notify data8 sys_mq_getsetattr data8 sys_ni_syscall // reserved for kexec_load - data8 sys_ni_syscall // reserved for vserver + data8 sys_vserver data8 sys_waitid // 1270 data8 sys_add_key data8 sys_request_key diff -NurpP --minimal linux-2.6.19.1/arch/ia64/kernel/perfmon.c linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/perfmon.c --- linux-2.6.19.1/arch/ia64/kernel/perfmon.c 2006-11-30 21:18:27 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/perfmon.c 2006-11-08 04:57:47 +0100 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -2357,7 +2358,7 @@ pfm_smpl_buffer_alloc(struct task_struct */ insert_vm_struct(mm, vma); - mm->total_vm += size >> PAGE_SHIFT; + vx_vmpages_add(mm, size >> PAGE_SHIFT); vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, vma_pages(vma)); up_write(&task->mm->mmap_sem); diff -NurpP --minimal linux-2.6.19.1/arch/ia64/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/process.c --- linux-2.6.19.1/arch/ia64/kernel/process.c 2006-11-30 21:18:27 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -105,7 +105,8 @@ show_regs (struct pt_regs *regs) unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; print_modules(); - printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm); + printk("\nPid: %d[#%u], CPU %d, comm: %20s\n", + current->pid, current->xid, smp_processor_id(), current->comm); printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n", regs->cr_ipsr, regs->cr_ifs, ip, print_tainted()); print_symbol("ip is at %s\n", ip); @@ -688,7 +689,8 @@ kernel_thread (int (*fn)(void *), void * regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR); regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET; regs.sw.pr = (1 << PRED_KERNEL_STACK); - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s.pt, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s.pt, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19.1/arch/ia64/kernel/ptrace.c linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/ptrace.c --- linux-2.6.19.1/arch/ia64/kernel/ptrace.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -1442,6 +1443,9 @@ sys_ptrace (long request, pid_t pid, uns read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out_tsk; + ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; diff -NurpP --minimal linux-2.6.19.1/arch/ia64/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/traps.c --- linux-2.6.19.1/arch/ia64/kernel/traps.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -106,8 +106,9 @@ die (const char *str, struct pt_regs *re put_cpu(); if (++die.lock_owner_depth < 3) { - printk("%s[%d]: %s %ld [%d]\n", - current->comm, current->pid, str, err, ++die_counter); + printk("%s[%d[#%u]]: %s %ld [%d]\n", + current->comm, current->pid, current->xid, + str, err, ++die_counter); (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); show_regs(regs); } else @@ -331,8 +332,9 @@ handle_fpu_swa (int fp_fault, struct pt_ last_time = jiffies; ++fpu_swa_count; printk(KERN_WARNING - "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", - current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); + "%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n", + current->comm, current->pid, current->xid, + regs->cr_iip + ia64_psr(regs)->ri, isr); } exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, diff -NurpP --minimal linux-2.6.19.1/arch/ia64/mm/fault.c linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/mm/fault.c --- linux-2.6.19.1/arch/ia64/mm/fault.c 2006-11-30 21:18:27 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/mm/fault.c 2006-11-08 04:57:40 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/arch/ia64/sn/kernel/xpc_main.c linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/sn/kernel/xpc_main.c --- linux-2.6.19.1/arch/ia64/sn/kernel/xpc_main.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ia64/sn/kernel/xpc_main.c 2006-11-08 04:57:40 +0100 @@ -108,6 +108,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 0644, NULL, &proc_dointvec_minmax, + NULL, &sysctl_intvec, NULL, &xpc_hb_min_interval, @@ -121,6 +122,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 0644, NULL, &proc_dointvec_minmax, + NULL, &sysctl_intvec, NULL, &xpc_hb_check_min_interval, @@ -145,6 +147,7 @@ static ctl_table xpc_sys_xpc_dir[] = { 0644, NULL, &proc_dointvec_minmax, + NULL, &sysctl_intvec, NULL, &xpc_disengage_request_min_timelimit, diff -NurpP --minimal linux-2.6.19.1/arch/m32r/kernel/irq.c linux-2.6.19.1-vs2.2.0-rc6/arch/m32r/kernel/irq.c --- linux-2.6.19.1/arch/m32r/kernel/irq.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m32r/kernel/irq.c 2006-11-30 18:26:50 +0100 @@ -78,6 +78,7 @@ skip: asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs) { struct pt_regs *old_regs; + old_regs = set_irq_regs(regs); irq_enter(); diff -NurpP --minimal linux-2.6.19.1/arch/m32r/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/m32r/kernel/process.c --- linux-2.6.19.1/arch/m32r/kernel/process.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m32r/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -211,8 +211,8 @@ int kernel_thread(int (*fn)(void *), voi regs.psw = M32R_PSW_BIE; /* Ok, create the new process. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, - NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19.1/arch/m32r/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/m32r/kernel/traps.c --- linux-2.6.19.1/arch/m32r/kernel/traps.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m32r/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -195,8 +195,9 @@ static void show_registers(struct pt_reg } else { printk("SPI: %08lx\n", sp); } - printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)", - current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current); + printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)", + current->comm, current->pid, current->xid, + 0xffff & i, 4096+(unsigned long)current); /* * When in-kernel, we also print out the stack and code at the diff -NurpP --minimal linux-2.6.19.1/arch/m68k/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/m68k/Kconfig --- linux-2.6.19.1/arch/m68k/Kconfig 2006-11-30 21:18:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m68k/Kconfig 2006-11-08 04:57:40 +0100 @@ -654,6 +654,8 @@ source "fs/Kconfig" source "arch/m68k/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/m68k/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/m68k/kernel/process.c --- linux-2.6.19.1/arch/m68k/kernel/process.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m68k/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -159,7 +159,8 @@ int kernel_thread(int (*fn)(void *), voi { register long retval __asm__ ("d0"); - register long clone_arg __asm__ ("d1") = flags | CLONE_VM | CLONE_UNTRACED; + register long clone_arg __asm__ ("d1") = + flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD; retval = __NR_clone; __asm__ __volatile__ diff -NurpP --minimal linux-2.6.19.1/arch/m68k/kernel/ptrace.c linux-2.6.19.1-vs2.2.0-rc6/arch/m68k/kernel/ptrace.c --- linux-2.6.19.1/arch/m68k/kernel/ptrace.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m68k/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -279,6 +280,8 @@ long arch_ptrace(struct task_struct *chi ret = ptrace_request(child, request, addr, data); break; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out_tsk; return ret; out_eio: diff -NurpP --minimal linux-2.6.19.1/arch/m68k/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/m68k/kernel/traps.c --- linux-2.6.19.1/arch/m68k/kernel/traps.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m68k/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -899,8 +899,8 @@ void show_registers(struct pt_regs *regs printk("d4: %08lx d5: %08lx a0: %08lx a1: %08lx\n", regs->d4, regs->d5, regs->a0, regs->a1); - printk("Process %s (pid: %d, task=%p)\n", - current->comm, current->pid, current); + printk("Process %s (pid: %d[#%u], task=%p)\n", + current->comm, current->pid, current->xid, current); addr = (unsigned long)&fp->un; printk("Frame format=%X ", regs->format); switch (regs->format) { diff -NurpP --minimal linux-2.6.19.1/arch/m68knommu/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/m68knommu/Kconfig --- linux-2.6.19.1/arch/m68knommu/Kconfig 2006-11-30 21:18:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m68knommu/Kconfig 2006-11-08 04:57:40 +0100 @@ -663,6 +663,8 @@ source "fs/Kconfig" source "arch/m68knommu/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/m68knommu/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/m68knommu/kernel/process.c --- linux-2.6.19.1/arch/m68knommu/kernel/process.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m68knommu/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -122,7 +122,7 @@ void show_regs(struct pt_regs * regs) int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { int retval; - long clone_arg = flags | CLONE_VM; + long clone_arg = flags | CLONE_VM | CLONE_KTHREAD; mm_segment_t fs; fs = get_fs(); diff -NurpP --minimal linux-2.6.19.1/arch/m68knommu/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/m68knommu/kernel/traps.c --- linux-2.6.19.1/arch/m68knommu/kernel/traps.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/m68knommu/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -80,8 +80,9 @@ void die_if_kernel(char *str, struct pt_ printk(KERN_EMERG "d4: %08lx d5: %08lx a0: %08lx a1: %08lx\n", fp->d4, fp->d5, fp->a0, fp->a1); - printk(KERN_EMERG "Process %s (pid: %d, stackpage=%08lx)\n", - current->comm, current->pid, PAGE_SIZE+(unsigned long)current); + printk(KERN_EMERG "Process %s (pid: %d[#%u], stackpage=%08lx)\n", + current->comm, current->pid, current->xid, + PAGE_SIZE+(unsigned long)current); show_stack(NULL, (unsigned long *)fp); do_exit(SIGSEGV); } diff -NurpP --minimal linux-2.6.19.1/arch/mips/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/mips/Kconfig --- linux-2.6.19.1/arch/mips/Kconfig 2006-11-30 21:18:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/mips/Kconfig 2006-11-08 21:52:07 +0100 @@ -2006,6 +2006,8 @@ source "arch/mips/oprofile/Kconfig" source "arch/mips/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/mips/kernel/linux32.c linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/linux32.c --- linux-2.6.19.1/arch/mips/kernel/linux32.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/linux32.c 2006-12-02 01:37:05 +0100 @@ -300,7 +300,7 @@ sys32_gettimeofday(struct compat_timeval { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19.1/arch/mips/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/process.c --- linux-2.6.19.1/arch/mips/kernel/process.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -271,7 +271,8 @@ long kernel_thread(int (*fn)(void *), vo #endif /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19.1/arch/mips/kernel/ptrace.c linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/ptrace.c --- linux-2.6.19.1/arch/mips/kernel/ptrace.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -172,6 +173,9 @@ long arch_ptrace(struct task_struct *chi { int ret; + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out; + switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ diff -NurpP --minimal linux-2.6.19.1/arch/mips/kernel/scall32-o32.S linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/scall32-o32.S --- linux-2.6.19.1/arch/mips/kernel/scall32-o32.S 2006-11-30 21:18:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/scall32-o32.S 2006-11-08 04:57:41 +0100 @@ -619,7 +619,7 @@ einval: li v0, -EINVAL sys sys_mq_timedreceive 5 sys sys_mq_notify 2 /* 4275 */ sys sys_mq_getsetattr 3 - sys sys_ni_syscall 0 /* sys_vserver */ + sys sys_vserver 3 sys sys_waitid 5 sys sys_ni_syscall 0 /* available, was setaltroot */ sys sys_add_key 5 /* 4280 */ diff -NurpP --minimal linux-2.6.19.1/arch/mips/kernel/scall64-64.S linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/scall64-64.S --- linux-2.6.19.1/arch/mips/kernel/scall64-64.S 2006-11-30 21:18:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/scall64-64.S 2006-11-08 04:57:41 +0100 @@ -434,7 +434,7 @@ sys_call_table: PTR sys_mq_timedreceive PTR sys_mq_notify PTR sys_mq_getsetattr /* 5235 */ - PTR sys_ni_syscall /* sys_vserver */ + PTR sys_vserver PTR sys_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key diff -NurpP --minimal linux-2.6.19.1/arch/mips/kernel/scall64-n32.S linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/scall64-n32.S --- linux-2.6.19.1/arch/mips/kernel/scall64-n32.S 2006-11-30 21:18:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/scall64-n32.S 2006-11-08 04:57:41 +0100 @@ -360,7 +360,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* 6240, sys_vserver */ + PTR sys32_vserver /* 6240 */ PTR sysn32_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key diff -NurpP --minimal linux-2.6.19.1/arch/mips/kernel/scall64-o32.S linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/scall64-o32.S --- linux-2.6.19.1/arch/mips/kernel/scall64-o32.S 2006-11-30 21:18:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/scall64-o32.S 2006-11-08 04:57:41 +0100 @@ -482,7 +482,7 @@ sys_call_table: PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify /* 4275 */ PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* sys_vserver */ + PTR sys32_vserver PTR sys32_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key /* 4280 */ diff -NurpP --minimal linux-2.6.19.1/arch/mips/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/traps.c --- linux-2.6.19.1/arch/mips/kernel/traps.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/mips/kernel/traps.c 2006-11-08 21:52:08 +0100 @@ -297,8 +297,9 @@ void show_registers(struct pt_regs *regs { show_regs(regs); print_modules(); - printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n", - current->comm, current->pid, current_thread_info(), current); + printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p)\n", + current->comm, current->pid, current->xid, + current_thread_info(), current); show_stacktrace(current, regs); show_code((unsigned int *) regs->cp0_epc); printk("\n"); diff -NurpP --minimal linux-2.6.19.1/arch/parisc/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/Kconfig --- linux-2.6.19.1/arch/parisc/Kconfig 2006-11-30 21:18:30 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/Kconfig 2006-11-08 04:57:40 +0100 @@ -257,6 +257,8 @@ source "arch/parisc/oprofile/Kconfig" source "arch/parisc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/parisc/kernel/entry.S linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/entry.S --- linux-2.6.19.1/arch/parisc/kernel/entry.S 2006-11-30 21:18:30 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/entry.S 2006-11-08 04:57:50 +0100 @@ -761,6 +761,7 @@ fault_vector_11: #define CLONE_VM 0x100 /* Must agree with */ #define CLONE_UNTRACED 0x00800000 +#define CLONE_KTHREAD 0x10000000 .export __kernel_thread, code .import do_fork diff -NurpP --minimal linux-2.6.19.1/arch/parisc/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/process.c --- linux-2.6.19.1/arch/parisc/kernel/process.c 2006-11-30 21:18:30 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -173,7 +173,7 @@ pid_t kernel_thread(int (*fn)(void *), v * kernel_thread can become a #define. */ - return __kernel_thread(fn, arg, flags); + return __kernel_thread(fn, arg, flags | CLONE_KTHREAD); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19.1/arch/parisc/kernel/sys_parisc32.c linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/sys_parisc32.c --- linux-2.6.19.1/arch/parisc/kernel/sys_parisc32.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/sys_parisc32.c 2006-11-08 04:57:44 +0100 @@ -204,11 +204,11 @@ static inline long get_ts32(struct times asmlinkage int sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) { - extern void do_gettimeofday(struct timeval *tv); + extern void vx_gettimeofday(struct timeval *tv); if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_compat_timeval(tv, &ktv)) return -EFAULT; } @@ -612,6 +612,7 @@ asmlinkage int sys32_sysinfo(struct sysi do { seq = read_seqbegin(&xtime_lock); + /* FIXME: requires vx virtualization */ val.uptime = jiffies / HZ; val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); diff -NurpP --minimal linux-2.6.19.1/arch/parisc/kernel/syscall_table.S linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/syscall_table.S --- linux-2.6.19.1/arch/parisc/kernel/syscall_table.S 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/syscall_table.S 2006-11-08 04:57:41 +0100 @@ -368,7 +368,7 @@ ENTRY_COMP(mbind) /* 260 */ ENTRY_COMP(get_mempolicy) ENTRY_COMP(set_mempolicy) - ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */ + ENTRY_DIFF(vserver) ENTRY_SAME(add_key) ENTRY_SAME(request_key) /* 265 */ ENTRY_SAME(keyctl) diff -NurpP --minimal linux-2.6.19.1/arch/parisc/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/traps.c --- linux-2.6.19.1/arch/parisc/kernel/traps.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -210,8 +210,9 @@ void die_if_kernel(char *str, struct pt_ if (err == 0) return; /* STFU */ - printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n", - current->comm, current->pid, str, err, regs->iaoq[0]); + printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n", + current->comm, current->pid, current->xid, + str, err, regs->iaoq[0]); #ifdef PRINT_USER_FAULTS /* XXX for debugging only */ show_regs(regs); @@ -242,8 +243,8 @@ void die_if_kernel(char *str, struct pt_ if (!console_drivers) pdc_console_restart(); - printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n", - current->comm, current->pid, str, err); + printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n", + current->comm, current->pid, current->xid, str, err); show_regs(regs); if (in_interrupt()) diff -NurpP --minimal linux-2.6.19.1/arch/parisc/mm/fault.c linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/mm/fault.c --- linux-2.6.19.1/arch/parisc/mm/fault.c 2006-06-18 04:52:15 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/parisc/mm/fault.c 2006-11-08 04:57:42 +0100 @@ -213,8 +213,9 @@ bad_area: #ifdef PRINT_USER_FAULTS printk(KERN_DEBUG "\n"); - printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n", - tsk->pid, tsk->comm, code, address); + printk(KERN_DEBUG "do_page_fault() pid=%d:#%u " + "command='%s' type=%lu address=0x%08lx\n", + tsk->pid, tsk->xid, tsk->comm, code, address); if (vma) { printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n", vma->vm_start, vma->vm_end); diff -NurpP --minimal linux-2.6.19.1/arch/powerpc/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/Kconfig --- linux-2.6.19.1/arch/powerpc/Kconfig 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/Kconfig 2006-11-20 21:12:32 +0100 @@ -1102,6 +1102,8 @@ endmenu source "arch/powerpc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" config KEYS_COMPAT diff -NurpP --minimal linux-2.6.19.1/arch/powerpc/kernel/asm-offsets.c linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/asm-offsets.c --- linux-2.6.19.1/arch/powerpc/kernel/asm-offsets.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/asm-offsets.c 2006-11-08 04:57:50 +0100 @@ -243,6 +243,7 @@ int main(void) DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(CLONE_KTHREAD, CLONE_KTHREAD); #ifndef CONFIG_PPC64 DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); diff -NurpP --minimal linux-2.6.19.1/arch/powerpc/kernel/irq.c linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/irq.c --- linux-2.6.19.1/arch/powerpc/kernel/irq.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/irq.c 2006-11-30 18:27:23 +0100 @@ -53,6 +53,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/arch/powerpc/kernel/misc_32.S linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/misc_32.S --- linux-2.6.19.1/arch/powerpc/kernel/misc_32.S 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/misc_32.S 2006-11-08 04:57:50 +0100 @@ -749,7 +749,7 @@ _GLOBAL(kernel_thread) mr r30,r3 /* function */ mr r31,r4 /* argument */ ori r3,r5,CLONE_VM /* flags */ - oris r3,r3,CLONE_UNTRACED>>16 + oris r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16 li r4,0 /* new sp (unused) */ li r0,__NR_clone sc diff -NurpP --minimal linux-2.6.19.1/arch/powerpc/kernel/misc_64.S linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/misc_64.S --- linux-2.6.19.1/arch/powerpc/kernel/misc_64.S 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/misc_64.S 2006-11-08 04:57:50 +0100 @@ -394,7 +394,7 @@ _GLOBAL(kernel_thread) mr r29,r3 mr r30,r4 ori r3,r5,CLONE_VM /* flags */ - oris r3,r3,(CLONE_UNTRACED>>16) + oris r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16 li r4,0 /* new sp (unused) */ li r0,__NR_clone sc diff -NurpP --minimal linux-2.6.19.1/arch/powerpc/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/process.c --- linux-2.6.19.1/arch/powerpc/kernel/process.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/process.c 2006-11-08 04:57:42 +0100 @@ -425,8 +425,9 @@ void show_regs(struct pt_regs * regs) trap = TRAP(regs); if (trap == 0x300 || trap == 0x600) printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); - printk("TASK = %p[%d] '%s' THREAD: %p", - current, current->pid, current->comm, task_thread_info(current)); + printk("TASK = %p[%d,#%u] '%s' THREAD: %p", + current, current->pid, current->xid, + current->comm, task_thread_info(current)); #ifdef CONFIG_SMP printk(" CPU: %d", smp_processor_id()); diff -NurpP --minimal linux-2.6.19.1/arch/powerpc/kernel/sys_ppc32.c linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/sys_ppc32.c --- linux-2.6.19.1/arch/powerpc/kernel/sys_ppc32.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/sys_ppc32.c 2006-11-08 04:57:44 +0100 @@ -275,7 +275,7 @@ asmlinkage long compat_sys_gettimeofday( { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19.1/arch/powerpc/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/traps.c --- linux-2.6.19.1/arch/powerpc/kernel/traps.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/traps.c 2006-11-08 21:52:08 +0100 @@ -888,8 +888,9 @@ void nonrecoverable_exception(struct pt_ void trace_syscall(struct pt_regs *regs) { - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", - current, current->pid, regs->nip, regs->link, regs->gpr[0], + printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, current->pid, current->xid, + regs->nip, regs->link, regs->gpr[0], regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); } diff -NurpP --minimal linux-2.6.19.1/arch/powerpc/kernel/vdso.c linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/vdso.c --- linux-2.6.19.1/arch/powerpc/kernel/vdso.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/powerpc/kernel/vdso.c 2006-11-08 04:57:47 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -295,7 +296,7 @@ int arch_setup_additional_pages(struct l /* Put vDSO base into mm struct and account for memory usage */ current->mm->context.vdso_base = vdso_base; - mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + vx_vmpages_add(mm, (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); up_write(&mm->mmap_sem); return 0; diff -NurpP --minimal linux-2.6.19.1/arch/ppc/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/ppc/Kconfig --- linux-2.6.19.1/arch/ppc/Kconfig 2006-11-30 21:18:32 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ppc/Kconfig 2006-11-08 04:57:40 +0100 @@ -1421,6 +1421,8 @@ source "arch/powerpc/oprofile/Kconfig" source "arch/ppc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/ppc/kernel/asm-offsets.c linux-2.6.19.1-vs2.2.0-rc6/arch/ppc/kernel/asm-offsets.c --- linux-2.6.19.1/arch/ppc/kernel/asm-offsets.c 2006-09-20 16:58:01 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ppc/kernel/asm-offsets.c 2006-11-08 04:57:50 +0100 @@ -121,6 +121,7 @@ main(void) DEFINE(TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(CLONE_KTHREAD, CLONE_KTHREAD); DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); /* About the CPU features table */ diff -NurpP --minimal linux-2.6.19.1/arch/ppc/kernel/misc.S linux-2.6.19.1-vs2.2.0-rc6/arch/ppc/kernel/misc.S --- linux-2.6.19.1/arch/ppc/kernel/misc.S 2006-11-30 21:18:32 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ppc/kernel/misc.S 2006-11-08 04:57:50 +0100 @@ -848,7 +848,7 @@ _GLOBAL(kernel_thread) mr r30,r3 /* function */ mr r31,r4 /* argument */ ori r3,r5,CLONE_VM /* flags */ - oris r3,r3,CLONE_UNTRACED>>16 + oris r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16 li r4,0 /* new sp (unused) */ li r0,__NR_clone sc diff -NurpP --minimal linux-2.6.19.1/arch/ppc/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/ppc/kernel/traps.c --- linux-2.6.19.1/arch/ppc/kernel/traps.c 2006-11-30 21:18:32 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/ppc/kernel/traps.c 2006-11-08 21:52:08 +0100 @@ -748,8 +748,9 @@ void nonrecoverable_exception(struct pt_ void trace_syscall(struct pt_regs *regs) { - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", - current, current->pid, regs->nip, regs->link, regs->gpr[0], + printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, current->pid, current->xid, + regs->nip, regs->link, regs->gpr[0], regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); } diff -NurpP --minimal linux-2.6.19.1/arch/s390/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/s390/Kconfig --- linux-2.6.19.1/arch/s390/Kconfig 2006-11-30 21:18:32 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/s390/Kconfig 2006-11-08 21:52:08 +0100 @@ -519,6 +519,8 @@ endmenu source "arch/s390/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/s390/kernel/compat_linux.c linux-2.6.19.1-vs2.2.0-rc6/arch/s390/kernel/compat_linux.c --- linux-2.6.19.1/arch/s390/kernel/compat_linux.c 2006-11-30 21:18:32 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/s390/kernel/compat_linux.c 2006-11-08 04:57:44 +0100 @@ -600,7 +600,7 @@ asmlinkage long sys32_gettimeofday(struc { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19.1/arch/s390/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/s390/kernel/process.c --- linux-2.6.19.1/arch/s390/kernel/process.c 2006-11-30 21:18:32 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/s390/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -165,9 +165,9 @@ void show_regs(struct pt_regs *regs) struct task_struct *tsk = current; printk("CPU: %d %s\n", task_thread_info(tsk)->cpu, print_tainted()); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, (void *) tsk, - (void *) tsk->thread.ksp); + printk("Process %s (pid: %d[#%u], task: %p, ksp: %p)\n", + current->comm, current->pid, current->xid, + (void *) tsk, (void *) tsk->thread.ksp); show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ @@ -198,7 +198,7 @@ int kernel_thread(int (*fn)(void *), voi regs.orig_gpr2 = -1; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, 0, ®s, 0, NULL, NULL); } diff -NurpP --minimal linux-2.6.19.1/arch/s390/kernel/ptrace.c linux-2.6.19.1-vs2.2.0-rc6/arch/s390/kernel/ptrace.c --- linux-2.6.19.1/arch/s390/kernel/ptrace.c 2006-06-18 04:52:33 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/s390/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -723,7 +724,13 @@ sys_ptrace(long request, long pid, long goto out; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) { + ret = -EPERM; + goto out_tsk; + } + ret = do_ptrace(child, request, addr, data); +out_tsk: put_task_struct(child); out: unlock_kernel(); diff -NurpP --minimal linux-2.6.19.1/arch/s390/kernel/syscalls.S linux-2.6.19.1-vs2.2.0-rc6/arch/s390/kernel/syscalls.S --- linux-2.6.19.1/arch/s390/kernel/syscalls.S 2006-11-30 21:18:32 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/s390/kernel/syscalls.S 2006-11-08 04:57:41 +0100 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) -NI_SYSCALL /* reserved for vserver */ +SYSCALL(sys_vserver,sys_vserver,sys32_vserver) SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) diff -NurpP --minimal linux-2.6.19.1/arch/sh/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/sh/Kconfig --- linux-2.6.19.1/arch/sh/Kconfig 2006-11-30 21:18:32 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sh/Kconfig 2006-11-08 04:57:40 +0100 @@ -627,6 +627,8 @@ source "arch/sh/oprofile/Kconfig" source "arch/sh/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/sh/kernel/irq.c linux-2.6.19.1-vs2.2.0-rc6/arch/sh/kernel/irq.c --- linux-2.6.19.1/arch/sh/kernel/irq.c 2006-11-30 21:18:34 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sh/kernel/irq.c 2006-11-30 18:27:47 +0100 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19.1/arch/sh/kernel/kgdb_stub.c linux-2.6.19.1-vs2.2.0-rc6/arch/sh/kernel/kgdb_stub.c --- linux-2.6.19.1/arch/sh/kernel/kgdb_stub.c 2006-11-30 21:18:34 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sh/kernel/kgdb_stub.c 2006-11-08 04:57:52 +0100 @@ -389,7 +389,7 @@ static struct task_struct *get_thread(in if (pid == PID_MAX) pid = 0; /* First check via PID */ - thread = find_task_by_pid(pid); + thread = find_task_by_real_pid(pid); if (thread) return thread; diff -NurpP --minimal linux-2.6.19.1/arch/sh/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/sh/kernel/process.c --- linux-2.6.19.1/arch/sh/kernel/process.c 2006-11-30 21:18:34 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sh/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -104,7 +104,8 @@ void machine_power_off(void) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid : %d, Comm: %20s\n", current->pid, current->comm); + printk("Pid : %d:#%u, Comm: %20s\n", + current->pid, current->xid, current->comm); print_symbol("PC is at %s\n", instruction_pointer(regs)); printk("PC : %08lx SP : %08lx SR : %08lx ", regs->pc, regs->regs[15], regs->sr); @@ -164,7 +165,8 @@ int kernel_thread(int (*fn)(void *), voi regs.sr = (1 << 30); /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19.1/arch/sh/kernel/vsyscall/vsyscall.c linux-2.6.19.1-vs2.2.0-rc6/arch/sh/kernel/vsyscall/vsyscall.c --- linux-2.6.19.1/arch/sh/kernel/vsyscall/vsyscall.c 2006-11-30 21:18:34 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sh/kernel/vsyscall/vsyscall.c 2006-12-02 01:37:05 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include /* * Should the kernel map a VDSO page into processes and pass its @@ -120,7 +121,7 @@ int arch_setup_additional_pages(struct l current->mm->context.vdso = (void *)addr; - mm->total_vm++; + vx_vmpages_inc(mm); up_fail: up_write(&mm->mmap_sem); return ret; diff -NurpP --minimal linux-2.6.19.1/arch/sh64/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/sh64/kernel/process.c --- linux-2.6.19.1/arch/sh64/kernel/process.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sh64/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -400,8 +400,8 @@ int kernel_thread(int (*fn)(void *), voi regs.pc = (unsigned long)kernel_thread_helper; regs.sr = (1 << 30); - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, - ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19.1/arch/sh64/mm/fault.c linux-2.6.19.1-vs2.2.0-rc6/arch/sh64/mm/fault.c --- linux-2.6.19.1/arch/sh64/mm/fault.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sh64/mm/fault.c 2006-11-08 04:57:42 +0100 @@ -82,7 +82,7 @@ static inline void print_vma(struct vm_a static inline void print_task(struct task_struct *tsk) { - printk("Task pid %d\n", tsk->pid); + printk("Task pid %d:#%u\n", tsk->pid, tsk->xid); } static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address) diff -NurpP --minimal linux-2.6.19.1/arch/sparc/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/Kconfig --- linux-2.6.19.1/arch/sparc/Kconfig 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/Kconfig 2006-11-08 04:57:40 +0100 @@ -298,6 +298,8 @@ endmenu source "arch/sparc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/sparc/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/kernel/process.c --- linux-2.6.19.1/arch/sparc/kernel/process.c 2006-09-20 16:58:01 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -705,7 +705,8 @@ pid_t kernel_thread(int (*fn)(void *), v /* Notreached by child. */ "1: mov %%o0, %0\n\t" : "=r" (retval) : - "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED), + "i" (__NR_clone), "r" (flags | + CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD), "i" (__NR_exit), "r" (fn), "r" (arg) : "g1", "g2", "g3", "o0", "o1", "memory", "cc"); return retval; diff -NurpP --minimal linux-2.6.19.1/arch/sparc/kernel/ptrace.c linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/kernel/ptrace.c --- linux-2.6.19.1/arch/sparc/kernel/ptrace.c 2006-04-09 13:49:44 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -299,6 +300,10 @@ asmlinkage void do_ptrace(struct pt_regs pt_error_return(regs, -ret); goto out; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff -NurpP --minimal linux-2.6.19.1/arch/sparc/kernel/systbls.S linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/kernel/systbls.S --- linux-2.6.19.1/arch/sparc/kernel/systbls.S 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/kernel/systbls.S 2006-11-08 21:52:08 +0100 @@ -71,7 +71,7 @@ sys_call_table: /*250*/ .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat diff -NurpP --minimal linux-2.6.19.1/arch/sparc/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/kernel/traps.c --- linux-2.6.19.1/arch/sparc/kernel/traps.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -99,7 +99,8 @@ void die_if_kernel(char *str, struct pt_ " /_| \\__/ |_\\\n" " \\__U_/\n"); - printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter); + printk("%s(%d[#%u]): %s [#%d]\n", current->comm, + current->pid, current->xid, str, ++die_counter); show_regs(regs); __SAVE; __SAVE; __SAVE; __SAVE; diff -NurpP --minimal linux-2.6.19.1/arch/sparc64/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/Kconfig --- linux-2.6.19.1/arch/sparc64/Kconfig 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/Kconfig 2006-11-08 04:57:40 +0100 @@ -431,6 +431,8 @@ endmenu source "arch/sparc64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/sparc64/kernel/binfmt_aout32.c linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/binfmt_aout32.c --- linux-2.6.19.1/arch/sparc64/kernel/binfmt_aout32.c 2006-06-18 04:52:34 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/binfmt_aout32.c 2006-11-08 04:57:40 +0100 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/arch/sparc64/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/process.c --- linux-2.6.19.1/arch/sparc64/kernel/process.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -696,7 +696,8 @@ pid_t kernel_thread(int (*fn)(void *), v /* Notreached by child. */ "1:" : "=r" (retval) : - "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED), + "i" (__NR_clone), "r" (flags | + CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD), "i" (__NR_exit), "r" (fn), "r" (arg) : "g1", "g2", "g3", "o0", "o1", "memory", "cc"); return retval; diff -NurpP --minimal linux-2.6.19.1/arch/sparc64/kernel/ptrace.c linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/ptrace.c --- linux-2.6.19.1/arch/sparc64/kernel/ptrace.c 2006-06-18 04:52:35 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -212,6 +213,10 @@ asmlinkage void do_ptrace(struct pt_regs pt_error_return(regs, -ret); goto out; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff -NurpP --minimal linux-2.6.19.1/arch/sparc64/kernel/sys_sparc32.c linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/sys_sparc32.c --- linux-2.6.19.1/arch/sparc64/kernel/sys_sparc32.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/sys_sparc32.c 2006-11-08 04:57:44 +0100 @@ -793,7 +793,7 @@ asmlinkage long sys32_gettimeofday(struc { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19.1/arch/sparc64/kernel/systbls.S linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/systbls.S --- linux-2.6.19.1/arch/sparc64/kernel/systbls.S 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/systbls.S 2006-11-08 21:52:08 +0100 @@ -72,7 +72,7 @@ sys_call_table32: /*250*/ .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy + .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid /*280*/ .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat @@ -142,7 +142,7 @@ sys_call_table: /*250*/ .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat diff -NurpP --minimal linux-2.6.19.1/arch/sparc64/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/traps.c --- linux-2.6.19.1/arch/sparc64/kernel/traps.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/kernel/traps.c 2006-11-08 21:52:08 +0100 @@ -2223,7 +2223,8 @@ void die_if_kernel(char *str, struct pt_ " /_| \\__/ |_\\\n" " \\__U_/\n"); - printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter); + printk("%s(%d[#%u]): %s [#%d]\n", current->comm, + current->pid, current->xid, str, ++die_counter); notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV); __asm__ __volatile__("flushw"); __show_regs(regs); diff -NurpP --minimal linux-2.6.19.1/arch/sparc64/solaris/fs.c linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/solaris/fs.c --- linux-2.6.19.1/arch/sparc64/solaris/fs.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/sparc64/solaris/fs.c 2006-11-08 04:57:52 +0100 @@ -368,7 +368,7 @@ static int report_statvfs(struct vfsmoun int j = strlen (p); if (j > 15) j = 15; - if (IS_RDONLY(inode)) i = 1; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1; if (mnt->mnt_flags & MNT_NOSUID) i |= 2; if (!sysv_valid_dev(inode->i_sb->s_dev)) return -EOVERFLOW; @@ -404,7 +404,7 @@ static int report_statvfs64(struct vfsmo int j = strlen (p); if (j > 15) j = 15; - if (IS_RDONLY(inode)) i = 1; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1; if (mnt->mnt_flags & MNT_NOSUID) i |= 2; if (!sysv_valid_dev(inode->i_sb->s_dev)) return -EOVERFLOW; diff -NurpP --minimal linux-2.6.19.1/arch/um/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/um/Kconfig --- linux-2.6.19.1/arch/um/Kconfig 2006-11-30 21:18:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/um/Kconfig 2006-11-08 04:57:40 +0100 @@ -306,6 +306,8 @@ source "drivers/connector/Kconfig" source "fs/Kconfig" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/um/kernel/irq.c linux-2.6.19.1-vs2.2.0-rc6/arch/um/kernel/irq.c --- linux-2.6.19.1/arch/um/kernel/irq.c 2006-11-30 21:18:36 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/um/kernel/irq.c 2006-11-30 18:28:41 +0100 @@ -357,6 +357,7 @@ void forward_interrupts(int pid) unsigned int do_IRQ(int irq, union uml_pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); + irq_enter(); __do_IRQ(irq); irq_exit(); diff -NurpP --minimal linux-2.6.19.1/arch/um/kernel/syscall.c linux-2.6.19.1-vs2.2.0-rc6/arch/um/kernel/syscall.c --- linux-2.6.19.1/arch/um/kernel/syscall.c 2006-11-30 21:18:36 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/um/kernel/syscall.c 2006-12-02 01:37:05 +0100 @@ -15,6 +15,7 @@ #include "linux/unistd.h" #include "linux/slab.h" #include "linux/utime.h" + #include "asm/mman.h" #include "asm/uaccess.h" #include "kern_util.h" @@ -118,6 +119,7 @@ long sys_uname(struct old_utsname __user long sys_olduname(struct oldold_utsname __user * name) { long error; + struct new_utsname *ptr; if (!name) return -EFAULT; diff -NurpP --minimal linux-2.6.19.1/arch/v850/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/v850/Kconfig --- linux-2.6.19.1/arch/v850/Kconfig 2006-06-18 04:52:42 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/v850/Kconfig 2006-11-08 04:57:40 +0100 @@ -326,6 +326,8 @@ source "drivers/usb/Kconfig" source "arch/v850/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/v850/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/v850/kernel/process.c --- linux-2.6.19.1/arch/v850/kernel/process.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/v850/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -83,7 +83,7 @@ int kernel_thread (int (*fn)(void *), vo /* Clone this thread. Note that we don't pass the clone syscall's second argument -- it's ignored for calls from kernel mode (the child's SP is always set to the top of the kernel stack). */ - arg0 = flags | CLONE_VM; + arg0 = flags | CLONE_VM | CLONE_KTHREAD; syscall = __NR_clone; asm volatile ("trap " SYSCALL_SHORT_TRAP : "=r" (ret), "=r" (syscall) diff -NurpP --minimal linux-2.6.19.1/arch/v850/kernel/ptrace.c linux-2.6.19.1-vs2.2.0-rc6/arch/v850/kernel/ptrace.c --- linux-2.6.19.1/arch/v850/kernel/ptrace.c 2006-04-09 13:49:44 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/v850/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -117,6 +118,9 @@ long arch_ptrace(struct task_struct *chi { int rval; + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out; + switch (request) { unsigned long val, copied; diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/Kconfig linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/Kconfig --- linux-2.6.19.1/arch/x86_64/Kconfig 2006-11-30 21:18:36 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/Kconfig 2006-11-08 04:57:40 +0100 @@ -701,6 +701,8 @@ endmenu source "arch/x86_64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/ia32/ia32_aout.c linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/ia32_aout.c --- linux-2.6.19.1/arch/x86_64/ia32/ia32_aout.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/ia32_aout.c 2006-11-08 04:57:40 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/ia32_binfmt.c --- linux-2.6.19.1/arch/x86_64/ia32/ia32_binfmt.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/ia32_binfmt.c 2006-11-08 04:57:47 +0100 @@ -375,7 +375,8 @@ int ia32_setup_arg_pages(struct linux_bi kmem_cache_free(vm_area_cachep, mpnt); return ret; } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/ia32/ia32entry.S linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/ia32entry.S --- linux-2.6.19.1/arch/x86_64/ia32/ia32entry.S 2006-11-30 21:18:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/ia32entry.S 2006-11-08 04:57:41 +0100 @@ -672,7 +672,7 @@ ia32_sys_call_table: .quad sys_tgkill /* 270 */ .quad compat_sys_utimes .quad sys32_fadvise64_64 - .quad quiet_ni_syscall /* sys_vserver */ + .quad sys32_vserver .quad sys_mbind .quad compat_sys_get_mempolicy /* 275 */ .quad sys_set_mempolicy diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/ia32/sys_ia32.c linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/sys_ia32.c --- linux-2.6.19.1/arch/x86_64/ia32/sys_ia32.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/sys_ia32.c 2006-12-02 01:37:05 +0100 @@ -454,7 +454,7 @@ sys32_gettimeofday(struct compat_timeval { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/ia32/syscall32.c linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/syscall32.c --- linux-2.6.19.1/arch/x86_64/ia32/syscall32.c 2005-10-28 20:49:18 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/ia32/syscall32.c 2006-11-08 04:57:47 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -70,7 +71,7 @@ int syscall32_setup_pages(struct linux_b kmem_cache_free(vm_area_cachep, vma); return ret; } - mm->total_vm += npages; + vx_vmpages_add(mm, npages); up_write(&mm->mmap_sem); return 0; } diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/kernel/irq.c linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/kernel/irq.c --- linux-2.6.19.1/arch/x86_64/kernel/irq.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/kernel/irq.c 2006-11-30 18:28:58 +0100 @@ -123,7 +123,6 @@ asmlinkage unsigned int do_IRQ(struct pt else printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), vector); - irq_exit(); set_irq_regs(old_regs); diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/kernel/process.c --- linux-2.6.19.1/arch/x86_64/kernel/process.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/kernel/process.c 2006-11-30 20:55:45 +0100 @@ -54,7 +54,8 @@ asmlinkage extern void ret_from_fork(void); -unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; +unsigned long kernel_thread_flags = + CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD; unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); @@ -301,8 +302,8 @@ void __show_regs(struct pt_regs * regs) printk("\n"); print_modules(); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), + printk("Pid: %d:#%u, comm: %.20s %s %s %.*s\n", + current->pid, current->xid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/kernel/traps.c linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/kernel/traps.c --- linux-2.6.19.1/arch/x86_64/kernel/traps.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/kernel/traps.c 2006-11-30 20:55:45 +0100 @@ -487,8 +487,9 @@ void show_registers(struct pt_regs *regs printk("CPU %d ", cpu); __show_regs(regs); - printk("Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); + printk("Process %s (pid: %d[#%u], threadinfo %p, task %p)\n", + cur->comm, cur->pid, cur->xid, + task_thread_info(cur), cur); /* * When in-kernel, we also print out the stack and code at the @@ -657,8 +658,8 @@ static void __kprobes do_trap(int trapnr if (user_mode(regs)) { if (exception_trace && unhandled_signal(tsk, signr)) printk(KERN_INFO - "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", - tsk->comm, tsk->pid, str, + "%s[%d:#%u] trap %s rip:%lx rsp:%lx error:%lx\n", + tsk->comm, tsk->pid, tsk->xid, str, regs->rip, regs->rsp, error_code); if (info) @@ -758,8 +759,8 @@ asmlinkage void __kprobes do_general_pro if (user_mode(regs)) { if (exception_trace && unhandled_signal(tsk, SIGSEGV)) printk(KERN_INFO - "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", - tsk->comm, tsk->pid, + "%s[%d:#%u] general protection rip:%lx rsp:%lx error:%lx\n", + tsk->comm, tsk->pid, tsk->xid, regs->rip, regs->rsp, error_code); force_sig(SIGSEGV, tsk); diff -NurpP --minimal linux-2.6.19.1/arch/x86_64/mm/fault.c linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/mm/fault.c --- linux-2.6.19.1/arch/x86_64/mm/fault.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/x86_64/mm/fault.c 2006-11-08 04:57:42 +0100 @@ -514,10 +514,10 @@ bad_area_nosemaphore: if (exception_trace && unhandled_signal(tsk, SIGSEGV)) { printk( - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", + "%s%s[%d:#%u]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->rip, - regs->rsp, error_code); + tsk->comm, tsk->pid, tsk->xid, address, + regs->rip, regs->rsp, error_code); } tsk->thread.cr2 = address; diff -NurpP --minimal linux-2.6.19.1/arch/xtensa/kernel/irq.c linux-2.6.19.1-vs2.2.0-rc6/arch/xtensa/kernel/irq.c --- linux-2.6.19.1/arch/xtensa/kernel/irq.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/xtensa/kernel/irq.c 2006-11-30 18:29:29 +0100 @@ -63,9 +63,7 @@ unsigned int do_IRQ(int irq, struct pt_ sp - sizeof(struct thread_info)); } #endif - __do_IRQ(irq, regs); - irq_exit(); return 1; diff -NurpP --minimal linux-2.6.19.1/arch/xtensa/kernel/process.c linux-2.6.19.1-vs2.2.0-rc6/arch/xtensa/kernel/process.c --- linux-2.6.19.1/arch/xtensa/kernel/process.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/arch/xtensa/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -206,7 +206,7 @@ int kernel_thread(int (*fn)(void *), voi :"=r" (retval) :"i" (__NR_clone), "i" (__NR_exit), "r" (arg), "r" (fn), - "r" (flags | CLONE_VM) + "r" (flags | CLONE_VM | CLONE_KTHREAD) : "a2", "a3", "a4", "a5", "a6" ); return retval; } diff -NurpP --minimal linux-2.6.19.1/block/cfq-iosched.c linux-2.6.19.1-vs2.2.0-rc6/block/cfq-iosched.c --- linux-2.6.19.1/block/cfq-iosched.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/block/cfq-iosched.c 2006-11-08 21:52:08 +0100 @@ -221,6 +221,8 @@ static int cfq_queue_empty(request_queue static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) { + if (task->xid) + return task->xid + (1 << 16); if (rw == READ || rw == WRITE_SYNC) return task->pid; diff -NurpP --minimal linux-2.6.19.1/drivers/block/Kconfig linux-2.6.19.1-vs2.2.0-rc6/drivers/block/Kconfig --- linux-2.6.19.1/drivers/block/Kconfig 2006-12-13 07:46:36 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/block/Kconfig 2006-12-13 07:46:51 +0100 @@ -317,6 +317,13 @@ config BLK_DEV_CRYPTOLOOP instead, which can be configured to be on-disk compatible with the cryptoloop device. +config BLK_DEV_VROOT + tristate "Virtual Root device support" + depends on QUOTACTL + ---help--- + Saying Y here will allow you to use quota/fs ioctls on a shared + partition within a virtual server without compromising security. + config BLK_DEV_NBD tristate "Network block device support" depends on NET diff -NurpP --minimal linux-2.6.19.1/drivers/block/Makefile linux-2.6.19.1-vs2.2.0-rc6/drivers/block/Makefile --- linux-2.6.19.1/drivers/block/Makefile 2006-06-18 04:52:46 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/block/Makefile 2006-11-08 04:57:51 +0100 @@ -29,4 +29,5 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryp obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o +obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o diff -NurpP --minimal linux-2.6.19.1/drivers/block/loop.c linux-2.6.19.1-vs2.2.0-rc6/drivers/block/loop.c --- linux-2.6.19.1/drivers/block/loop.c 2006-11-30 21:18:39 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/block/loop.c 2006-11-30 18:53:18 +0100 @@ -74,6 +74,7 @@ #include #include #include +#include #include @@ -795,6 +796,7 @@ static int loop_set_fd(struct loop_devic lo->lo_blocksize = lo_blocksize; lo->lo_device = bdev; lo->lo_flags = lo_flags; + lo->lo_xid = vx_current_xid(); lo->lo_backing_file = file; lo->transfer = transfer_none; lo->ioctl = NULL; @@ -935,7 +937,7 @@ loop_set_status(struct loop_device *lo, struct loop_func_table *xfer; if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid && - !capable(CAP_SYS_ADMIN)) + !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) return -EPERM; if (lo->lo_state != Lo_bound) return -ENXIO; @@ -1015,7 +1017,8 @@ loop_get_status(struct loop_device *lo, memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); info->lo_encrypt_type = lo->lo_encryption ? lo->lo_encryption->number : 0; - if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { + if (lo->lo_encrypt_key_size && + vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) { info->lo_encrypt_key_size = lo->lo_encrypt_key_size; memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, lo->lo_encrypt_key_size); @@ -1326,6 +1329,9 @@ static int lo_open(struct inode *inode, { struct loop_device *lo = inode->i_bdev->bd_disk->private_data; + if (!vx_check(lo->lo_xid, VS_WATCH_P|VS_IDENT)) + return -EACCES; + mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; mutex_unlock(&lo->lo_ctl_mutex); diff -NurpP --minimal linux-2.6.19.1/drivers/block/vroot.c linux-2.6.19.1-vs2.2.0-rc6/drivers/block/vroot.c --- linux-2.6.19.1/drivers/block/vroot.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/block/vroot.c 2006-11-30 19:40:17 +0100 @@ -0,0 +1,281 @@ +/* + * linux/drivers/block/vroot.c + * + * written by Herbert Pötzl, 9/11/2002 + * ported to 2.6.10 by Herbert Pötzl, 30/12/2004 + * + * based on the loop.c code by Theodore Ts'o. + * + * Copyright (C) 2002-2006 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + * + */ + +#include +#include +#include +#include +#include + +#include +#include + + +static int max_vroot = 8; + +static struct vroot_device *vroot_dev; +static struct gendisk **disks; + + +static int vroot_set_dev( + struct vroot_device *vr, + struct file *vr_file, + struct block_device *bdev, + unsigned int arg) +{ + struct block_device *real_bdev; + struct file *file; + struct inode *inode; + int error; + + error = -EBUSY; + if (vr->vr_state != Vr_unbound) + goto out; + + error = -EBADF; + file = fget(arg); + if (!file) + goto out; + + error = -EINVAL; + inode = file->f_dentry->d_inode; + + + if (S_ISBLK(inode->i_mode)) { + real_bdev = inode->i_bdev; + vr->vr_device = real_bdev; + __iget(real_bdev->bd_inode); + } else + goto out_fput; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_set_dev: dev=" VXF_DEV, + vr->vr_number, VXD_DEV(real_bdev)); + + vr->vr_state = Vr_bound; + error = 0; + + out_fput: + fput(file); + out: + return error; +} + +static int vroot_clr_dev( + struct vroot_device *vr, + struct file *vr_file, + struct block_device *bdev) +{ + struct block_device *real_bdev; + + if (vr->vr_state != Vr_bound) + return -ENXIO; + if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */ + return -EBUSY; + + real_bdev = vr->vr_device; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_clr_dev: dev=" VXF_DEV, + vr->vr_number, VXD_DEV(real_bdev)); + + bdput(real_bdev); + vr->vr_state = Vr_unbound; + vr->vr_device = NULL; + return 0; +} + + +static int vr_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + int err; + + down(&vr->vr_ctl_mutex); + switch (cmd) { + case VROOT_SET_DEV: + err = vroot_set_dev(vr, file, inode->i_bdev, arg); + break; + case VROOT_CLR_DEV: + err = vroot_clr_dev(vr, file, inode->i_bdev); + break; + default: + err = -EINVAL; + break; + } + up(&vr->vr_ctl_mutex); + return err; +} + +static int vr_open(struct inode *inode, struct file *file) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + + down(&vr->vr_ctl_mutex); + vr->vr_refcnt++; + up(&vr->vr_ctl_mutex); + return 0; +} + +static int vr_release(struct inode *inode, struct file *file) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + + down(&vr->vr_ctl_mutex); + --vr->vr_refcnt; + up(&vr->vr_ctl_mutex); + return 0; +} + +static struct block_device_operations vr_fops = { + .owner = THIS_MODULE, + .open = vr_open, + .release = vr_release, + .ioctl = vr_ioctl, +}; + +struct block_device *__vroot_get_real_bdev(struct block_device *bdev) +{ + struct inode *inode = bdev->bd_inode; + struct vroot_device *vr; + struct block_device *real_bdev; + int minor = iminor(inode); + + vr = &vroot_dev[minor]; + real_bdev = vr->vr_device; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_get_real_bdev: dev=" VXF_DEV, + vr->vr_number, VXD_DEV(real_bdev)); + + if (vr->vr_state != Vr_bound) + return ERR_PTR(-ENXIO); + + __iget(real_bdev->bd_inode); + return real_bdev; +} + +/* + * And now the modules code and kernel interface. + */ + +module_param(max_vroot, int, 0); + +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR); + +MODULE_AUTHOR ("Herbert Pötzl"); +MODULE_DESCRIPTION ("Virtual Root Device Mapper"); + + +int __init vroot_init(void) +{ + int err, i; + + if (max_vroot < 1 || max_vroot > 256) { + max_vroot = MAX_VROOT_DEFAULT; + printk(KERN_WARNING "vroot: invalid max_vroot " + "(must be between 1 and 256), " + "using default (%d)\n", max_vroot); + } + + if (register_blkdev(VROOT_MAJOR, "vroot")) + return -EIO; + + err = -ENOMEM; + vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL); + if (!vroot_dev) + goto out_mem1; + memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device)); + + disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL); + if (!disks) + goto out_mem2; + + for (i = 0; i < max_vroot; i++) { + disks[i] = alloc_disk(1); + if (!disks[i]) + goto out_mem3; + } + + for (i = 0; i < max_vroot; i++) { + struct vroot_device *vr = &vroot_dev[i]; + struct gendisk *disk = disks[i]; + + memset(vr, 0, sizeof(*vr)); + init_MUTEX(&vr->vr_ctl_mutex); + vr->vr_number = i; + disk->major = VROOT_MAJOR; + disk->first_minor = i; + disk->fops = &vr_fops; + sprintf(disk->disk_name, "vroot%d", i); + disk->private_data = vr; + } + + err = register_vroot_grb(&__vroot_get_real_bdev); + if (err) + goto out_mem3; + + for (i = 0; i < max_vroot; i++) + add_disk(disks[i]); + printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot); + return 0; + +out_mem3: + while (i--) + put_disk(disks[i]); + kfree(disks); +out_mem2: + kfree(vroot_dev); +out_mem1: + unregister_blkdev(VROOT_MAJOR, "vroot"); + printk(KERN_ERR "vroot: ran out of memory\n"); + return err; +} + +void vroot_exit(void) +{ + int i; + + if (unregister_vroot_grb(&__vroot_get_real_bdev)) + printk(KERN_WARNING "vroot: cannot unregister grb\n"); + + for (i = 0; i < max_vroot; i++) { + del_gendisk(disks[i]); + put_disk(disks[i]); + } + if (unregister_blkdev(VROOT_MAJOR, "vroot")) + printk(KERN_WARNING "vroot: cannot unregister blkdev\n"); + + kfree(disks); + kfree(vroot_dev); +} + +module_init(vroot_init); +module_exit(vroot_exit); + +#ifndef MODULE + +static int __init max_vroot_setup(char *str) +{ + max_vroot = simple_strtol(str, NULL, 0); + return 1; +} + +__setup("max_vroot=", max_vroot_setup); + +#endif + diff -NurpP --minimal linux-2.6.19.1/drivers/char/random.c linux-2.6.19.1-vs2.2.0-rc6/drivers/char/random.c --- linux-2.6.19.1/drivers/char/random.c 2006-11-30 21:18:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/char/random.c 2006-11-08 04:57:40 +0100 @@ -1178,7 +1178,7 @@ static char sysctl_bootid[16]; static int proc_do_uuid(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table fake_table; + ctl_table fake_table = {0}; unsigned char buf[64], tmp_uuid[16], *uuid; uuid = table->data; diff -NurpP --minimal linux-2.6.19.1/drivers/char/sysrq.c linux-2.6.19.1-vs2.2.0-rc6/drivers/char/sysrq.c --- linux-2.6.19.1/drivers/char/sysrq.c 2006-11-30 21:18:41 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/char/sysrq.c 2006-12-02 01:37:05 +0100 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -260,6 +261,21 @@ static struct sysrq_key_op sysrq_unrt_op .enable_mask = SYSRQ_ENABLE_RTNICE, }; + +#ifdef CONFIG_VSERVER_DEBUG +static void sysrq_handle_vxinfo(int key, struct tty_struct *tty) +{ + dump_vx_info_inactive((key == 'x')?0:1); +} + +static struct sysrq_key_op sysrq_showvxinfo_op = { + .handler = sysrq_handle_vxinfo, + .help_msg = "conteXt", + .action_msg = "Show Context Info", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; +#endif + /* Key Operations table and lock */ static DEFINE_SPINLOCK(sysrq_key_table_lock); @@ -304,7 +320,11 @@ static struct sysrq_key_op *sysrq_key_ta /* May be assigned at init time by SMP VOYAGER */ NULL, /* v */ NULL, /* w */ +#ifdef CONFIG_VSERVER_DEBUG + &sysrq_showvxinfo_op, /* x */ +#else NULL, /* x */ +#endif NULL, /* y */ NULL /* z */ }; @@ -318,6 +338,8 @@ static int sysrq_key_table_key2index(int retval = key - '0'; else if ((key >= 'a') && (key <= 'z')) retval = key + 10 - 'a'; + else if ((key >= 'A') && (key <= 'Z')) + retval = key + 10 - 'A'; else retval = -1; return retval; diff -NurpP --minimal linux-2.6.19.1/drivers/char/tty_io.c linux-2.6.19.1-vs2.2.0-rc6/drivers/char/tty_io.c --- linux-2.6.19.1/drivers/char/tty_io.c 2006-11-30 21:18:41 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/char/tty_io.c 2006-11-08 04:57:52 +0100 @@ -103,6 +103,7 @@ #include #include +#include #undef TTY_DEBUG_HANGUP @@ -2941,13 +2942,16 @@ static int tiocsctty(struct tty_struct * static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { + pid_t pgrp; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; - return put_user(real_tty->pgrp, p); + + pgrp = vx_map_pid(real_tty->pgrp); + return put_user(pgrp, p); } /** @@ -2977,6 +2981,8 @@ static int tiocspgrp(struct tty_struct * return -ENOTTY; if (get_user(pgrp, p)) return -EFAULT; + + pgrp = vx_rmap_pid(pgrp); if (pgrp < 0) return -EINVAL; if (session_of_pgrp(pgrp) != current->signal->session) diff -NurpP --minimal linux-2.6.19.1/drivers/infiniband/core/uverbs_mem.c linux-2.6.19.1-vs2.2.0-rc6/drivers/infiniband/core/uverbs_mem.c --- linux-2.6.19.1/drivers/infiniband/core/uverbs_mem.c 2006-06-18 04:53:04 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/infiniband/core/uverbs_mem.c 2006-11-08 04:57:47 +0100 @@ -36,6 +36,7 @@ #include #include +#include #include "uverbs.h" @@ -161,7 +162,7 @@ out: if (ret < 0) __ib_umem_release(dev, mem, 0); else - current->mm->locked_vm = locked; + vx_vmlocked_sub(current->mm, current->mm->locked_vm - locked); up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); @@ -174,8 +175,8 @@ void ib_umem_release(struct ib_device *d __ib_umem_release(dev, umem, 1); down_write(¤t->mm->mmap_sem); - current->mm->locked_vm -= - PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + vx_vmlocked_sub(current->mm, + PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT); up_write(¤t->mm->mmap_sem); } @@ -184,7 +185,7 @@ static void ib_umem_account(void *work_p struct ib_umem_account_work *work = work_ptr; down_write(&work->mm->mmap_sem); - work->mm->locked_vm -= work->diff; + vx_vmlocked_sub(work->mm, work->diff); up_write(&work->mm->mmap_sem); mmput(work->mm); kfree(work); diff -NurpP --minimal linux-2.6.19.1/drivers/infiniband/hw/ipath/ipath_user_pages.c linux-2.6.19.1-vs2.2.0-rc6/drivers/infiniband/hw/ipath/ipath_user_pages.c --- linux-2.6.19.1/drivers/infiniband/hw/ipath/ipath_user_pages.c 2006-11-30 21:18:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/infiniband/hw/ipath/ipath_user_pages.c 2006-11-08 04:57:47 +0100 @@ -33,6 +33,7 @@ #include #include +#include #include "ipath_kernel.h" @@ -61,7 +62,8 @@ static int __get_user_pages(unsigned lon lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - if (num_pages > lock_limit) { + if (num_pages > lock_limit || + !vx_vmlocked_avail(current->mm, num_pages)) { ret = -ENOMEM; goto bail; } @@ -78,7 +80,7 @@ static int __get_user_pages(unsigned lon goto bail_release; } - current->mm->locked_vm += num_pages; + vx_vmlocked_add(current->mm, num_pages); ret = 0; goto bail; @@ -203,7 +205,7 @@ void ipath_release_user_pages(struct pag __ipath_release_user_pages(p, num_pages, 1); - current->mm->locked_vm -= num_pages; + vx_vmlocked_sub(current->mm, num_pages); up_write(¤t->mm->mmap_sem); } @@ -219,7 +221,7 @@ static void user_pages_account(void *ptr struct ipath_user_pages_work *work = ptr; down_write(&work->mm->mmap_sem); - work->mm->locked_vm -= work->num_pages; + vx_vmlocked_sub(work->mm, work->num_pages); up_write(&work->mm->mmap_sem); mmput(work->mm); kfree(work); diff -NurpP --minimal linux-2.6.19.1/drivers/md/dm-ioctl.c linux-2.6.19.1-vs2.2.0-rc6/drivers/md/dm-ioctl.c --- linux-2.6.19.1/drivers/md/dm-ioctl.c 2006-11-30 21:18:46 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/md/dm-ioctl.c 2006-11-30 19:57:46 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -100,7 +101,8 @@ static struct hash_cell *__get_name_cell unsigned int h = hash_str(str); list_for_each_entry (hc, _name_buckets + h, name_list) - if (!strcmp(hc->name, str)) { + if (vx_check(dm_get_xid(hc->md), VS_WATCH_P|VS_IDENT) && + !strcmp(hc->name, str)) { dm_get(hc->md); return hc; } @@ -114,7 +116,8 @@ static struct hash_cell *__get_uuid_cell unsigned int h = hash_str(str); list_for_each_entry (hc, _uuid_buckets + h, uuid_list) - if (!strcmp(hc->uuid, str)) { + if (vx_check(dm_get_xid(hc->md), VS_WATCH_P|VS_IDENT) && + !strcmp(hc->uuid, str)) { dm_get(hc->md); return hc; } @@ -349,6 +352,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl static int remove_all(struct dm_ioctl *param, size_t param_size) { + if (!vx_check(0, VS_ADMIN)) + return -EPERM; + dm_hash_remove_all(1); param->data_size = 0; return 0; @@ -396,6 +402,8 @@ static int list_devices(struct dm_ioctl */ for (i = 0; i < NUM_BUCKETS; i++) { list_for_each_entry (hc, _name_buckets + i, name_list) { + if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P|VS_IDENT)) + continue; needed += sizeof(struct dm_name_list); needed += strlen(hc->name) + 1; needed += ALIGN_MASK; @@ -419,6 +427,8 @@ static int list_devices(struct dm_ioctl */ for (i = 0; i < NUM_BUCKETS; i++) { list_for_each_entry (hc, _name_buckets + i, name_list) { + if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P|VS_IDENT)) + continue; if (old_nl) old_nl->next = (uint32_t) ((void *) nl - (void *) old_nl); @@ -609,10 +619,11 @@ static struct hash_cell *__find_device_h if (!md) goto out; - mdptr = dm_get_mdptr(md); + if (vx_check(dm_get_xid(md), VS_WATCH_P|VS_IDENT)) + mdptr = dm_get_mdptr(md); + if (!mdptr) dm_put(md); - out: return mdptr; } @@ -1405,8 +1416,8 @@ static int ctl_ioctl(struct inode *inode ioctl_fn fn = NULL; size_t param_size; - /* only root can play with this */ - if (!capable(CAP_SYS_ADMIN)) + /* only root and certain contexts can play with this */ + if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER)) return -EACCES; if (_IOC_TYPE(command) != DM_IOCTL) diff -NurpP --minimal linux-2.6.19.1/drivers/md/dm.c linux-2.6.19.1-vs2.2.0-rc6/drivers/md/dm.c --- linux-2.6.19.1/drivers/md/dm.c 2006-11-30 21:18:46 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/md/dm.c 2006-11-30 18:53:18 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #define DM_MSG_PREFIX "core" @@ -75,6 +76,7 @@ struct mapped_device { rwlock_t map_lock; atomic_t holders; atomic_t open_count; + xid_t xid; unsigned long flags; @@ -220,6 +222,7 @@ static void __exit dm_exit(void) static int dm_blk_open(struct inode *inode, struct file *file) { struct mapped_device *md; + int ret = -ENXIO; spin_lock(&_minor_lock); @@ -228,18 +231,19 @@ static int dm_blk_open(struct inode *ino goto out; if (test_bit(DMF_FREEING, &md->flags) || - test_bit(DMF_DELETING, &md->flags)) { - md = NULL; + test_bit(DMF_DELETING, &md->flags)) + goto out; + + ret = -EACCES; + if (!vx_check(md->xid, VS_IDENT)) goto out; - } dm_get(md); atomic_inc(&md->open_count); - + ret = 0; out: spin_unlock(&_minor_lock); - - return md ? 0 : -ENXIO; + return ret; } static int dm_blk_close(struct inode *inode, struct file *file) @@ -435,6 +439,14 @@ int dm_set_geometry(struct mapped_device return 0; } +/* + * Get the xid associated with a dm device + */ +xid_t dm_get_xid(struct mapped_device *md) +{ + return md->xid; +} + /*----------------------------------------------------------------- * CRUD START: * A more elegant soln is in the works that uses the queue @@ -952,6 +964,7 @@ static struct mapped_device *alloc_dev(i atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); atomic_set(&md->event_nr, 0); + md->xid = vx_current_xid(); md->queue = blk_alloc_queue(GFP_KERNEL); if (!md->queue) diff -NurpP --minimal linux-2.6.19.1/drivers/md/dm.h linux-2.6.19.1-vs2.2.0-rc6/drivers/md/dm.h --- linux-2.6.19.1/drivers/md/dm.h 2006-11-30 21:18:46 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/drivers/md/dm.h 2006-11-08 04:57:52 +0100 @@ -72,6 +72,8 @@ void dm_put_target_type(struct target_ty int dm_target_iterate(void (*iter_func)(struct target_type *tt, void *param), void *param); +xid_t dm_get_xid(struct mapped_device *md); + /*----------------------------------------------------------------- * Useful inlines. *---------------------------------------------------------------*/ diff -NurpP --minimal linux-2.6.19.1/fs/attr.c linux-2.6.19.1-vs2.2.0-rc6/fs/attr.c --- linux-2.6.19.1/fs/attr.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/attr.c 2006-11-30 19:40:56 +0100 @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include /* Taken over from the old code... */ @@ -56,6 +59,30 @@ int inode_change_ok(struct inode *inode, if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) goto error; } + + /* Check for evil vserver activity */ + if (vx_check(0, VS_ADMIN)) + goto fine; + + if (IS_BARRIER(inode)) { + vxwprintk(1, "xid=%d messing with the barrier.", + vx_current_xid()); + goto error; + } + switch (inode->i_sb->s_magic) { + case PROC_SUPER_MAGIC: + /* maybe allow that in the future? */ + vxwprintk(1, "xid=%d messing with the procfs.", + vx_current_xid()); + goto error; + case DEVPTS_SUPER_MAGIC: + /* devpts is xid tagged */ + if (vx_check((xid_t)inode->i_tag, VS_IDENT)) + goto fine; + vxwprintk(1, "xid=%d messing with the devpts.", + vx_current_xid()); + goto error; + } fine: retval = 0; error: @@ -79,6 +106,8 @@ int inode_setattr(struct inode * inode, inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode)) + inode->i_tag = attr->ia_tag; if (ia_valid & ATTR_ATIME) inode->i_atime = timespec_trunc(attr->ia_atime, inode->i_sb->s_time_gran); @@ -153,7 +182,8 @@ int notify_change(struct dentry * dentry error = security_inode_setattr(dentry, attr); if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; if (!error) error = inode_setattr(inode, attr); diff -NurpP --minimal linux-2.6.19.1/fs/binfmt_aout.c linux-2.6.19.1-vs2.2.0-rc6/fs/binfmt_aout.c --- linux-2.6.19.1/fs/binfmt_aout.c 2006-11-30 21:19:18 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/binfmt_aout.c 2006-11-08 04:57:40 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/fs/binfmt_elf.c linux-2.6.19.1-vs2.2.0-rc6/fs/binfmt_elf.c --- linux-2.6.19.1/fs/binfmt_elf.c 2006-11-30 21:19:18 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/binfmt_elf.c 2006-12-02 01:37:05 +0100 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19.1/fs/binfmt_flat.c linux-2.6.19.1-vs2.2.0-rc6/fs/binfmt_flat.c --- linux-2.6.19.1/fs/binfmt_flat.c 2006-09-20 16:58:34 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/binfmt_flat.c 2006-11-08 04:57:40 +0100 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/fs/binfmt_som.c linux-2.6.19.1-vs2.2.0-rc6/fs/binfmt_som.c --- linux-2.6.19.1/fs/binfmt_som.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/binfmt_som.c 2006-11-08 04:57:40 +0100 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/fs/dcache.c linux-2.6.19.1-vs2.2.0-rc6/fs/dcache.c --- linux-2.6.19.1/fs/dcache.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/dcache.c 2006-11-08 04:57:48 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" @@ -147,6 +148,7 @@ void dput(struct dentry *dentry) if (!dentry) return; + vx_dentry_dec(dentry); repeat: if (atomic_read(&dentry->d_count) == 1) might_sleep(); @@ -160,6 +162,8 @@ repeat: return; } + vx_dentry_dec(dentry); + /* * AV: ->d_delete() is _NOT_ allowed to block now. */ @@ -270,6 +274,7 @@ static inline struct dentry * __dget_loc if (!list_empty(&dentry->d_lru)) { dentry_stat.nr_unused--; list_del_init(&dentry->d_lru); + vx_dentry_inc(dentry); } return dentry; } @@ -861,6 +866,9 @@ struct dentry *d_alloc(struct dentry * p struct dentry *dentry; char *dname; + if (!vx_dentry_avail(1)) + return NULL; + dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); if (!dentry) return NULL; @@ -909,6 +917,7 @@ struct dentry *d_alloc(struct dentry * p if (parent) list_add(&dentry->d_u.d_child, &parent->d_subdirs); dentry_stat.nr_dentry++; + vx_dentry_inc(dentry); spin_unlock(&dcache_lock); return dentry; @@ -1258,6 +1267,7 @@ struct dentry * __d_lookup(struct dentry if (!d_unhashed(dentry)) { atomic_inc(&dentry->d_count); + vx_dentry_inc(dentry); found = dentry; } spin_unlock(&dentry->d_lock); diff -NurpP --minimal linux-2.6.19.1/fs/devpts/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/devpts/inode.c --- linux-2.6.19.1/fs/devpts/inode.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/devpts/inode.c 2006-11-30 18:53:18 +0100 @@ -19,8 +19,22 @@ #include #include #include +#include -#define DEVPTS_SUPER_MAGIC 0x1cd1 + +static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + int ret = -EACCES; + + /* devpts is xid tagged */ + if (vx_check((xid_t)inode->i_tag, VS_WATCH_P|VS_IDENT)) + ret = generic_permission(inode, mask, NULL); + return ret; +} + +static struct inode_operations devpts_file_inode_operations = { + .permission = devpts_permission, +}; static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -91,6 +105,25 @@ static int devpts_remount(struct super_b return 0; } +static int devpts_filter(struct dentry *de) +{ + /* devpts is xid tagged */ + return vx_check((xid_t)de->d_inode->i_tag, VS_WATCH_P|VS_IDENT); +} + +static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + return dcache_readdir_filter(filp, dirent, filldir, devpts_filter); +} + +static struct file_operations devpts_dir_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .readdir = devpts_readdir, +}; + static struct super_operations devpts_sops = { .statfs = simple_statfs, .remount_fs = devpts_remount, @@ -116,8 +149,10 @@ devpts_fill_super(struct super_block *s, inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_fop = &devpts_dir_operations; inode->i_nlink = 2; + /* devpts is xid tagged */ + inode->i_tag = (tag_t)vx_current_xid(); devpts_root = s->s_root = d_alloc_root(inode); if (s->s_root) @@ -175,6 +210,9 @@ int devpts_pty_new(struct tty_struct *tt inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|config.mode, device); + /* devpts is xid tagged */ + inode->i_tag = (tag_t)vx_current_xid(); + inode->i_op = &devpts_file_inode_operations; inode->i_private = tty; dentry = get_node(number); diff -NurpP --minimal linux-2.6.19.1/fs/exec.c linux-2.6.19.1-vs2.2.0-rc6/fs/exec.c --- linux-2.6.19.1/fs/exec.c 2006-12-13 07:46:36 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/exec.c 2006-12-13 07:46:51 +0100 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -436,7 +437,8 @@ int setup_arg_pages(struct linux_binprm kmem_cache_free(vm_area_cachep, mpnt); return ret; } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { @@ -1306,7 +1308,7 @@ static void format_corename(char *corena /* UNIX time of coredump */ case 't': { struct timeval tv; - do_gettimeofday(&tv); + vx_gettimeofday(&tv); rc = snprintf(out_ptr, out_end - out_ptr, "%lu", tv.tv_sec); if (rc > out_end - out_ptr) diff -NurpP --minimal linux-2.6.19.1/fs/ext2/balloc.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/balloc.c --- linux-2.6.19.1/fs/ext2/balloc.c 2006-09-20 16:58:34 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/balloc.c 2006-12-01 23:38:46 +0100 @@ -16,6 +16,8 @@ #include #include #include +#include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -102,12 +104,13 @@ static int reserve_blocks(struct super_b { struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = sbi->s_es; - unsigned free_blocks; - unsigned root_blocks; + unsigned long long free_blocks, root_blocks; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(es->s_r_blocks_count); + DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks); + if (free_blocks < count) count = free_blocks; @@ -258,6 +261,7 @@ do_more: } error_return: brelse(bitmap_bh); + DLIMIT_FREE_BLOCK(inode, freed); release_blocks(sb, freed); DQUOT_FREE_BLOCK(inode, freed); } @@ -361,6 +365,10 @@ int ext2_new_block(struct inode *inode, *err = -ENOSPC; goto out_dquot; } + if (DLIMIT_ALLOC_BLOCK(inode, es_alloc)) { + *err = -ENOSPC; + goto out_dlimit; + } ext2_debug ("goal=%lu.\n", goal); @@ -508,6 +516,8 @@ got_block: *err = 0; out_release: group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + DLIMIT_FREE_BLOCK(inode, es_alloc); +out_dlimit: release_blocks(sb, es_alloc); out_dquot: DQUOT_FREE_BLOCK(inode, dq_alloc); diff -NurpP --minimal linux-2.6.19.1/fs/ext2/ext2.h linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/ext2.h --- linux-2.6.19.1/fs/ext2/ext2.h 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/ext2.h 2006-11-08 04:57:46 +0100 @@ -166,6 +166,7 @@ extern const struct file_operations ext2 extern const struct address_space_operations ext2_aops; extern const struct address_space_operations ext2_aops_xip; extern const struct address_space_operations ext2_nobh_aops; +extern int ext2_sync_flags(struct inode *inode); /* namei.c */ extern struct inode_operations ext2_dir_inode_operations; diff -NurpP --minimal linux-2.6.19.1/fs/ext2/file.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/file.c --- linux-2.6.19.1/fs/ext2/file.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/file.c 2006-11-08 04:57:51 +0100 @@ -54,6 +54,7 @@ const struct file_operations ext2_file_o .release = ext2_release_file, .fsync = ext2_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -85,4 +86,5 @@ struct inode_operations ext2_file_inode_ #endif .setattr = ext2_setattr, .permission = ext2_permission, + .sync_flags = ext2_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ext2/ialloc.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/ialloc.c --- linux-2.6.19.1/fs/ext2/ialloc.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/ialloc.c 2006-11-08 04:57:50 +0100 @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -125,6 +127,7 @@ void ext2_free_inode (struct inode * ino ext2_xattr_delete_inode(inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); } es = EXT2_SB(sb)->s_es; @@ -464,6 +467,11 @@ struct inode *ext2_new_inode(struct inod if (!inode) return ERR_PTR(-ENOMEM); + inode->i_tag = dx_current_fstag(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto fail_dlim; + } ei = EXT2_I(inode); sbi = EXT2_SB(sb); es = sbi->s_es; @@ -577,7 +585,8 @@ got: inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; + ei->i_flags = EXT2_I(dir)->i_flags & + ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); /* dirsync is only applied to directories */ @@ -625,12 +634,15 @@ fail_free_drop: fail_drop: DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); return ERR_PTR(err); fail: + DLIMIT_FREE_INODE(inode); +fail_dlim: make_bad_inode(inode); iput(inode); return ERR_PTR(err); diff -NurpP --minimal linux-2.6.19.1/fs/ext2/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/inode.c --- linux-2.6.19.1/fs/ext2/inode.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/inode.c 2006-11-30 18:53:18 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #include "ext2.h" #include "acl.h" #include "xip.h" @@ -913,7 +914,7 @@ void ext2_truncate (struct inode * inode return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return; ext2_discard_prealloc(inode); @@ -1042,25 +1043,70 @@ void ext2_set_inode_flags(struct inode * { unsigned int flags = EXT2_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); + + if (flags & EXT2_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & EXT2_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT2_BARRIER_FL) + inode->i_flags |= S_BARRIER; + if (flags & EXT2_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT2_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & EXT2_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; if (flags & EXT2_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT2_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; } +int ext2_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + + oldflags = EXT2_I(inode)->i_flags; + newflags = oldflags & ~(EXT2_APPEND_FL | + EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL | + EXT2_BARRIER_FL | EXT2_NOATIME_FL | + EXT2_SYNC_FL | EXT2_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= EXT2_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= EXT2_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= EXT2_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= EXT2_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= EXT2_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= EXT2_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= EXT2_DIRSYNC_FL; + + if (oldflags ^ newflags) { + EXT2_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + + return 0; +} + void ext2_read_inode (struct inode * inode) { struct ext2_inode_info *ei = EXT2_I(inode); ino_t ino = inode->i_ino; struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + uid_t uid; + gid_t gid; int n; #ifdef CONFIG_EXT2_FS_POSIX_ACL @@ -1071,12 +1117,17 @@ void ext2_read_inode (struct inode * ino goto bad_inode; inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if (!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_tag)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -1173,8 +1224,8 @@ static int ext2_update_inode(struct inod struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - uid_t uid = inode->i_uid; - gid_t gid = inode->i_gid; + uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag); + gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag); struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); int n; @@ -1209,6 +1260,9 @@ static int ext2_update_inode(struct inod raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_TAGGING_INTERN + raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(inode->i_size); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -1295,7 +1349,8 @@ int ext2_setattr(struct dentry *dentry, if (error) return error; if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) || + (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) { error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0; if (error) return error; diff -NurpP --minimal linux-2.6.19.1/fs/ext2/ioctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/ioctl.c --- linux-2.6.19.1/fs/ext2/ioctl.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/ioctl.c 2006-11-08 04:57:52 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,8 @@ int ext2_ioctl (struct inode * inode, st case EXT2_IOC_SETFLAGS: { unsigned int oldflags; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -52,7 +54,9 @@ int ext2_ioctl (struct inode * inode, st * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { + if ((oldflags & EXT2_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT2_APPEND_FL | + EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } @@ -71,7 +75,8 @@ int ext2_ioctl (struct inode * inode, st case EXT2_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(inode->i_generation, (int __user *) arg)) return -EFAULT; diff -NurpP --minimal linux-2.6.19.1/fs/ext2/namei.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/namei.c --- linux-2.6.19.1/fs/ext2/namei.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/namei.c 2006-11-30 18:53:18 +0100 @@ -31,6 +31,7 @@ */ #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -66,6 +67,7 @@ static struct dentry *ext2_lookup(struct inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); + dx_propagate_tag(nd, inode); } return d_splice_alias(inode, dentry); } @@ -391,6 +393,7 @@ struct inode_operations ext2_dir_inode_o #endif .setattr = ext2_setattr, .permission = ext2_permission, + .sync_flags = ext2_sync_flags, }; struct inode_operations ext2_special_inode_operations = { @@ -402,4 +405,5 @@ struct inode_operations ext2_special_ino #endif .setattr = ext2_setattr, .permission = ext2_permission, + .sync_flags = ext2_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ext2/super.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/super.c --- linux-2.6.19.1/fs/ext2/super.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/super.c 2006-12-06 05:50:27 +0100 @@ -324,7 +324,7 @@ enum { Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota + Opt_usrquota, Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid }; static match_table_t tokens = { @@ -352,6 +352,10 @@ static match_table_t tokens = { {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_xip, "xip"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, + {Opt_tag, "tagxid"}, {Opt_grpquota, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_quota, "quota"}, @@ -420,6 +424,20 @@ static int parse_options (char * options case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + set_opt (sbi->s_mount_opt, TAGGED); + break; + case Opt_notag: + clear_opt (sbi->s_mount_opt, TAGGED); + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + set_opt (sbi->s_mount_opt, TAGGED); + break; +#endif case Opt_nocheck: clear_opt (sbi->s_mount_opt, CHECK); break; @@ -728,6 +746,8 @@ static int ext2_fill_super(struct super_ if (!parse_options ((char *) data, sbi)) goto failed_mount; + if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED) + sb->s_flags |= MS_TAGGED; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -1036,6 +1056,13 @@ static int ext2_remount (struct super_bl goto restore_opts; } + if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) && + !(sb->s_flags & MS_TAGGED)) { + printk("EXT2-fs: %s: tagging not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff -NurpP --minimal linux-2.6.19.1/fs/ext2/symlink.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/symlink.c --- linux-2.6.19.1/fs/ext2/symlink.c 2005-08-29 22:25:30 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/symlink.c 2006-11-08 04:57:46 +0100 @@ -38,6 +38,7 @@ struct inode_operations ext2_symlink_ino .listxattr = ext2_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext2_sync_flags, }; struct inode_operations ext2_fast_symlink_inode_operations = { @@ -49,4 +50,5 @@ struct inode_operations ext2_fast_symlin .listxattr = ext2_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext2_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ext2/xattr.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/xattr.c --- linux-2.6.19.1/fs/ext2/xattr.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext2/xattr.c 2006-11-08 04:57:50 +0100 @@ -60,6 +60,7 @@ #include #include #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -644,8 +645,12 @@ ext2_xattr_set2(struct inode *inode, str the inode. */ ea_bdebug(new_bh, "reusing block"); + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto cleanup; error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) { + DLIMIT_FREE_BLOCK(inode, 1); unlock_buffer(new_bh); goto cleanup; } @@ -739,6 +744,7 @@ ext2_xattr_set2(struct inode *inode, str le32_to_cpu(HDR(old_bh)->h_refcount) - 1); if (ce) mb_cache_entry_release(ce); + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); mark_buffer_dirty(old_bh); ea_bdebug(old_bh, "refcount now=%d", @@ -803,6 +809,7 @@ ext2_xattr_delete_inode(struct inode *in mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); } EXT2_I(inode)->i_file_acl = 0; diff -NurpP --minimal linux-2.6.19.1/fs/ext3/balloc.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/balloc.c --- linux-2.6.19.1/fs/ext3/balloc.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/balloc.c 2006-12-01 23:41:10 +0100 @@ -19,6 +19,8 @@ #include #include #include +#include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -613,8 +615,10 @@ void ext3_free_blocks(handle_t *handle, return; } ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); - if (dquot_freed_blocks) + if (dquot_freed_blocks) { + DLIMIT_FREE_BLOCK(inode, dquot_freed_blocks); DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + } return; } @@ -1349,18 +1353,33 @@ out: * * Check if filesystem has at least 1 free block available for allocation. */ -static int ext3_has_free_blocks(struct ext3_sb_info *sbi) +static int ext3_has_free_blocks(struct super_block *sb) { - ext3_fsblk_t free_blocks, root_blocks; + struct ext3_sb_info *sbi = EXT3_SB(sb); + unsigned long long free_blocks, root_blocks; + int cond; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): free=%llu, root=%llu", + sb, free_blocks, root_blocks); + + DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks); + + cond = (free_blocks < root_blocks + 1 && + !capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))); + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): %llu<%llu+1, %c, %u!=%u r=%d", + sb, free_blocks, root_blocks, + !capable(CAP_SYS_RESOURCE)?'1':'0', + sbi->s_resuid, current->fsuid, cond?0:1); + + return (cond ? 0 : 1); } /** @@ -1377,7 +1396,7 @@ static int ext3_has_free_blocks(struct e */ int ext3_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3) + if (!ext3_has_free_blocks(sb) || (*retries)++ > 3) return 0; jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); @@ -1440,6 +1459,8 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h *errp = -EDQUOT; return 0; } + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto out_dlimit; sbi = EXT3_SB(sb); es = EXT3_SB(sb)->s_es; @@ -1456,7 +1477,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) my_rsv = &block_i->rsv_window_node; - if (!ext3_has_free_blocks(sbi)) { + if (!ext3_has_free_blocks(sb)) { *errp = -ENOSPC; goto out; } @@ -1650,6 +1671,9 @@ allocated: io_error: *errp = -EIO; out: + if (!performed_allocation) + DLIMIT_FREE_BLOCK(inode, 1); +out_dlimit: if (fatal) { *errp = fatal; ext3_std_error(sb, fatal); diff -NurpP --minimal linux-2.6.19.1/fs/ext3/file.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/file.c --- linux-2.6.19.1/fs/ext3/file.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/file.c 2006-11-08 04:57:51 +0100 @@ -121,6 +121,7 @@ const struct file_operations ext3_file_o .release = ext3_release_file, .fsync = ext3_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -135,5 +136,6 @@ struct inode_operations ext3_file_inode_ .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .sync_flags = ext3_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ext3/ialloc.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/ialloc.c --- linux-2.6.19.1/fs/ext3/ialloc.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/ialloc.c 2006-11-08 04:57:50 +0100 @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include @@ -127,6 +129,7 @@ void ext3_free_inode (handle_t *handle, ext3_xattr_delete_inode(handle, inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); is_directory = S_ISDIR(inode->i_mode); @@ -445,6 +448,12 @@ struct inode *ext3_new_inode(handle_t *h inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); + + inode->i_tag = dx_current_fstag(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto out_dlimit; + } ei = EXT3_I(inode); sbi = EXT3_SB(sb); @@ -566,7 +575,8 @@ got: ei->i_dir_start_lookup = 0; ei->i_disksize = 0; - ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; + ei->i_flags = EXT3_I(dir)->i_flags & + ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); /* dirsync only applies to directories */ @@ -621,6 +631,8 @@ got: fail: ext3_std_error(sb, err); out: + DLIMIT_FREE_INODE(inode); +out_dlimit: iput(inode); ret = ERR_PTR(err); really_out: @@ -632,6 +644,7 @@ fail_free_drop: fail_drop: DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); diff -NurpP --minimal linux-2.6.19.1/fs/ext3/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/inode.c --- linux-2.6.19.1/fs/ext3/inode.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/inode.c 2006-11-30 19:02:16 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -2246,7 +2247,7 @@ void ext3_truncate(struct inode *inode) return; if (ext3_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return; /* @@ -2568,19 +2569,77 @@ void ext3_set_inode_flags(struct inode * { unsigned int flags = EXT3_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); + + if (flags & EXT3_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & EXT3_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT3_BARRIER_FL) + inode->i_flags |= S_BARRIER; + if (flags & EXT3_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT3_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & EXT3_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; if (flags & EXT3_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT3_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; } +int ext3_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + int err = 0; + + oldflags = EXT3_I(inode)->i_flags; + newflags = oldflags & ~(EXT3_APPEND_FL | + EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL | + EXT3_BARRIER_FL | EXT3_NOATIME_FL | + EXT3_SYNC_FL | EXT3_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= EXT3_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= EXT3_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= EXT3_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= EXT3_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= EXT3_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= EXT3_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= EXT3_DIRSYNC_FL; + + if (oldflags ^ newflags) { + handle_t *handle; + struct ext3_iloc iloc; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (IS_SYNC(inode)) + handle->h_sync = 1; + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + goto flags_err; + + EXT3_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + flags_err: + ext3_journal_stop(handle); + } + return err; +} + void ext3_read_inode(struct inode * inode) { struct ext3_iloc iloc; @@ -2588,6 +2647,8 @@ void ext3_read_inode(struct inode * inod struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh; int block; + uid_t uid; + gid_t gid; #ifdef CONFIG_EXT3_FS_POSIX_ACL ei->i_acl = EXT3_ACL_NOT_CACHED; @@ -2600,12 +2661,17 @@ void ext3_read_inode(struct inode * inod bh = iloc.bh; raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if(!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_tag)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -2729,6 +2795,8 @@ static int ext3_do_update_inode(handle_t struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; + uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag); + gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag); int err = 0, rc, block; /* For fields not not tracking in the in-memory inode, @@ -2738,29 +2806,32 @@ static int ext3_do_update_inode(handle_t raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ if(!ei->i_dtime) { raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); + cpu_to_le16(high_16_bits(uid)); raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); + cpu_to_le16(high_16_bits(gid)); } else { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); + cpu_to_le16(fs_high2lowuid(uid)); raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); + cpu_to_le16(fs_high2lowgid(gid)); raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_TAGGING_INTERN + raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -2913,7 +2984,8 @@ int ext3_setattr(struct dentry *dentry, return error; if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) { handle_t *handle; /* (user+group)*(old+new) structure, inode write (sb, @@ -2935,6 +3007,8 @@ int ext3_setattr(struct dentry *dentry, inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode)) + inode->i_tag = attr->ia_tag; error = ext3_mark_inode_dirty(handle, inode); ext3_journal_stop(handle); } diff -NurpP --minimal linux-2.6.19.1/fs/ext3/ioctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/ioctl.c --- linux-2.6.19.1/fs/ext3/ioctl.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/ioctl.c 2006-11-30 19:09:12 +0100 @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include #include +#include #include int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, @@ -37,7 +39,8 @@ int ext3_ioctl (struct inode * inode, st unsigned int oldflags; unsigned int jflag; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -61,7 +64,9 @@ int ext3_ioctl (struct inode * inode, st * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { + if ((oldflags & EXT3_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT3_APPEND_FL | + EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) { mutex_unlock(&inode->i_mutex); return -EPERM; @@ -123,7 +128,8 @@ flags_err: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(generation, (int __user *) arg)) return -EFAULT; @@ -177,7 +183,8 @@ flags_err: if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -212,7 +219,8 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(n_blocks_count, (__u32 __user *)arg)) @@ -233,7 +241,8 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, @@ -248,6 +257,38 @@ flags_err: return err; } +#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_TAGGING_NONE) + case EXT3_IOC_SETTAG: { + handle_t *handle; + struct ext3_iloc iloc; + int tag; + int err; + + /* fixme: if stealth, return -ENOTTY */ + if (!capable(CAP_CONTEXT)) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + if (!(inode->i_sb->s_flags & MS_TAGGED)) + return -ENOSYS; + if (get_user(tag, (int __user *) arg)) + return -EFAULT; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; + + inode->i_tag = (tag & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + ext3_journal_stop(handle); + return err; + } +#endif default: return -ENOTTY; diff -NurpP --minimal linux-2.6.19.1/fs/ext3/namei.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/namei.c --- linux-2.6.19.1/fs/ext3/namei.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/namei.c 2006-11-30 18:53:18 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include "namei.h" #include "xattr.h" @@ -1010,6 +1011,7 @@ static struct dentry *ext3_lookup(struct if (!inode) return ERR_PTR(-EACCES); + dx_propagate_tag(nd, inode); } return d_splice_alias(inode, dentry); } @@ -2383,6 +2385,7 @@ struct inode_operations ext3_dir_inode_o .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .sync_flags = ext3_sync_flags, }; struct inode_operations ext3_special_inode_operations = { @@ -2394,4 +2397,5 @@ struct inode_operations ext3_special_ino .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .sync_flags = ext3_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ext3/super.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/super.c --- linux-2.6.19.1/fs/ext3/super.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/super.c 2006-12-06 05:50:27 +0100 @@ -677,7 +677,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota + Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid }; static match_table_t tokens = { @@ -727,6 +727,10 @@ static match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, + {Opt_tag, "tagxid"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -820,6 +824,20 @@ static int parse_options (char *options, case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + set_opt (sbi->s_mount_opt, TAGGED); + break; + case Opt_notag: + clear_opt (sbi->s_mount_opt, TAGGED); + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + set_opt (sbi->s_mount_opt, TAGGED); + break; +#endif case Opt_nocheck: clear_opt (sbi->s_mount_opt, CHECK); break; @@ -1482,6 +1500,9 @@ static int ext3_fill_super (struct super NULL, 0)) goto failed_mount; + if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGGED) + sb->s_flags |= MS_TAGGED; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -2297,6 +2318,12 @@ static int ext3_remount (struct super_bl if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) ext3_abort(sb, __FUNCTION__, "Abort forced by user"); + if ((sbi->s_mount_opt & EXT3_MOUNT_TAGGED) && + !(sb->s_flags & MS_TAGGED)) { + printk("EXT3-fs: %s: tagging not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff -NurpP --minimal linux-2.6.19.1/fs/ext3/symlink.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/symlink.c --- linux-2.6.19.1/fs/ext3/symlink.c 2005-08-29 22:25:30 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/symlink.c 2006-11-08 04:57:46 +0100 @@ -40,6 +40,7 @@ struct inode_operations ext3_symlink_ino .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext3_sync_flags, }; struct inode_operations ext3_fast_symlink_inode_operations = { @@ -51,4 +52,5 @@ struct inode_operations ext3_fast_symlin .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext3_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ext3/xattr.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/xattr.c --- linux-2.6.19.1/fs/ext3/xattr.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext3/xattr.c 2006-11-08 04:57:50 +0100 @@ -58,6 +58,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -495,6 +496,7 @@ ext3_xattr_release_block(handle_t *handl ext3_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); unlock_buffer(bh); ea_bdebug(bh, "refcount now=%d; releasing", @@ -763,11 +765,14 @@ inserted: if (new_bh == bs->bh) ea_bdebug(new_bh, "keeping"); else { + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto cleanup; /* The old block is released after updating the inode. */ error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) - goto cleanup; + goto cleanup_dlimit; error = ext3_journal_get_write_access(handle, new_bh); if (error) @@ -844,6 +849,8 @@ cleanup: cleanup_dquot: DQUOT_FREE_BLOCK(inode, 1); +cleanup_dlimit: + DLIMIT_FREE_BLOCK(inode, 1); goto cleanup; bad_block: diff -NurpP --minimal linux-2.6.19.1/fs/ext4/balloc.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/balloc.c --- linux-2.6.19.1/fs/ext4/balloc.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/balloc.c 2006-12-02 01:51:51 +0100 @@ -19,6 +19,8 @@ #include #include #include +#include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -630,8 +632,10 @@ void ext4_free_blocks(handle_t *handle, return; } ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); - if (dquot_freed_blocks) + if (dquot_freed_blocks) { + DLIMIT_FREE_BLOCK(inode, dquot_freed_blocks); DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + } return; } @@ -1366,18 +1370,33 @@ out: * * Check if filesystem has at least 1 free block available for allocation. */ -static int ext4_has_free_blocks(struct ext4_sb_info *sbi) +static int ext4_has_free_blocks(struct super_block *sb) { - ext4_fsblk_t free_blocks, root_blocks; + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_fsblk_t free_blocks, root_blocks; + int cond; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = ext4_r_blocks_count(sbi->s_es); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + + vxdprintk(VXD_CBIT(dlim, 3), + "ext4_has_free_blocks(%p): free=%llu, root=%llu", + sb, free_blocks, root_blocks); + + DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks); + + cond = (free_blocks < root_blocks + 1 && + !capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))); + + vxdprintk(VXD_CBIT(dlim, 3), + "ext4_has_free_blocks(%p): %llu<%llu+1, %c, %u!=%u r=%d", + sb, free_blocks, root_blocks, + !capable(CAP_SYS_RESOURCE)?'1':'0', + sbi->s_resuid, current->fsuid, cond?0:1); + + return (cond ? 0 : 1); } /** @@ -1394,7 +1413,7 @@ static int ext4_has_free_blocks(struct e */ int ext4_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3) + if (!ext4_has_free_blocks(sb) || (*retries)++ > 3) return 0; jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); @@ -1457,6 +1476,8 @@ ext4_fsblk_t ext4_new_blocks(handle_t *h *errp = -EDQUOT; return 0; } + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto out_dlimit; sbi = EXT4_SB(sb); es = EXT4_SB(sb)->s_es; @@ -1473,7 +1494,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *h if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) my_rsv = &block_i->rsv_window_node; - if (!ext4_has_free_blocks(sbi)) { + if (!ext4_has_free_blocks(sb)) { *errp = -ENOSPC; goto out; } @@ -1664,6 +1685,9 @@ allocated: io_error: *errp = -EIO; out: + if (!performed_allocation) + DLIMIT_FREE_BLOCK(inode, 1); +out_dlimit: if (fatal) { *errp = fatal; ext4_std_error(sb, fatal); diff -NurpP --minimal linux-2.6.19.1/fs/ext4/file.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/file.c --- linux-2.6.19.1/fs/ext4/file.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/file.c 2006-12-01 23:01:47 +0100 @@ -121,6 +121,7 @@ const struct file_operations ext4_file_o .release = ext4_release_file, .fsync = ext4_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -135,5 +136,6 @@ struct inode_operations ext4_file_inode_ .removexattr = generic_removexattr, #endif .permission = ext4_permission, + .sync_flags = ext4_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ext4/ialloc.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/ialloc.c --- linux-2.6.19.1/fs/ext4/ialloc.c 2006-11-30 21:19:20 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/ialloc.c 2006-12-01 23:07:02 +0100 @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include "xattr.h" @@ -127,6 +129,7 @@ void ext4_free_inode (handle_t *handle, ext4_xattr_delete_inode(handle, inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); is_directory = S_ISDIR(inode->i_mode); @@ -448,6 +451,12 @@ struct inode *ext4_new_inode(handle_t *h inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); + + inode->i_tag = dx_current_fstag(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto out_dlimit; + } ei = EXT4_I(inode); sbi = EXT4_SB(sb); @@ -569,7 +578,8 @@ got: ei->i_dir_start_lookup = 0; ei->i_disksize = 0; - ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL; + ei->i_flags = EXT4_I(dir)->i_flags & + ~(EXT4_INDEX_FL|EXT4_IUNLINK_FL|EXT4_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL); /* dirsync only applies to directories */ @@ -635,6 +645,8 @@ got: fail: ext4_std_error(sb, err); out: + DLIMIT_FREE_INODE(inode); +out_dlimit: iput(inode); ret = ERR_PTR(err); really_out: @@ -646,6 +658,7 @@ fail_free_drop: fail_drop: DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); diff -NurpP --minimal linux-2.6.19.1/fs/ext4/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/inode.c --- linux-2.6.19.1/fs/ext4/inode.c 2006-11-30 21:19:20 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/inode.c 2006-12-01 23:01:47 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -2245,7 +2246,7 @@ void ext4_truncate(struct inode *inode) return; if (ext4_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return; /* @@ -2571,19 +2572,77 @@ void ext4_set_inode_flags(struct inode * { unsigned int flags = EXT4_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); + + if (flags & EXT4_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & EXT4_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT4_BARRIER_FL) + inode->i_flags |= S_BARRIER; + if (flags & EXT4_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT4_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & EXT4_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; if (flags & EXT4_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; } +int ext4_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + int err = 0; + + oldflags = EXT4_I(inode)->i_flags; + newflags = oldflags & ~(EXT4_APPEND_FL | + EXT4_IMMUTABLE_FL | EXT4_IUNLINK_FL | + EXT4_BARRIER_FL | EXT4_NOATIME_FL | + EXT4_SYNC_FL | EXT4_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= EXT4_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= EXT4_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= EXT4_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= EXT4_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= EXT4_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= EXT4_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= EXT4_DIRSYNC_FL; + + if (oldflags ^ newflags) { + handle_t *handle; + struct ext4_iloc iloc; + + handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (IS_SYNC(inode)) + handle->h_sync = 1; + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto flags_err; + + EXT4_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + flags_err: + ext4_journal_stop(handle); + } + return err; +} + void ext4_read_inode(struct inode * inode) { struct ext4_iloc iloc; @@ -2591,6 +2650,8 @@ void ext4_read_inode(struct inode * inod struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh; int block; + uid_t uid; + gid_t gid; #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; @@ -2603,12 +2664,17 @@ void ext4_read_inode(struct inode * inod bh = iloc.bh; raw_inode = ext4_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if(!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_tag)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -2736,6 +2802,8 @@ static int ext4_do_update_inode(handle_t struct ext4_inode *raw_inode = ext4_raw_inode(iloc); struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; + uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag); + gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag); int err = 0, rc, block; /* For fields not not tracking in the in-memory inode, @@ -2745,29 +2813,32 @@ static int ext4_do_update_inode(handle_t raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ if(!ei->i_dtime) { raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); + cpu_to_le16(high_16_bits(uid)); raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); + cpu_to_le16(high_16_bits(gid)); } else { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); + cpu_to_le16(fs_high2lowuid(uid)); raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); + cpu_to_le16(fs_high2lowgid(gid)); raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_TAGGING_INTERN + raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -2924,7 +2995,8 @@ int ext4_setattr(struct dentry *dentry, return error; if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) { handle_t *handle; /* (user+group)*(old+new) structure, inode write (sb, @@ -2946,6 +3018,8 @@ int ext4_setattr(struct dentry *dentry, inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode)) + inode->i_tag = attr->ia_tag; error = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); } diff -NurpP --minimal linux-2.6.19.1/fs/ext4/ioctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/ioctl.c --- linux-2.6.19.1/fs/ext4/ioctl.c 2006-11-30 21:19:20 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/ioctl.c 2006-12-01 23:08:39 +0100 @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include #include +#include #include int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, @@ -37,7 +39,8 @@ int ext4_ioctl (struct inode * inode, st unsigned int oldflags; unsigned int jflag; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -61,7 +64,9 @@ int ext4_ioctl (struct inode * inode, st * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { + if ((oldflags & EXT4_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT4_APPEND_FL | + EXT4_IMMUTABLE_FL | EXT4_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) { mutex_unlock(&inode->i_mutex); return -EPERM; @@ -123,7 +128,8 @@ flags_err: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(generation, (int __user *) arg)) return -EFAULT; @@ -177,7 +183,8 @@ flags_err: if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -212,7 +219,8 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(n_blocks_count, (__u32 __user *)arg)) @@ -233,7 +241,8 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, @@ -248,6 +257,39 @@ flags_err: return err; } +#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_TAGGING_NONE) + case EXT4_IOC_SETTAG: { + handle_t *handle; + struct ext4_iloc iloc; + int tag; + int err; + + /* fixme: if stealth, return -ENOTTY */ + if (!capable(CAP_CONTEXT)) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + if (!(inode->i_sb->s_flags & MS_TAGGED)) + return -ENOSYS; + if (get_user(tag, (int __user *) arg)) + return -EFAULT; + + handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; + + inode->i_tag = (tag & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + ext4_journal_stop(handle); + return err; + } +#endif + default: return -ENOTTY; } diff -NurpP --minimal linux-2.6.19.1/fs/ext4/namei.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/namei.c --- linux-2.6.19.1/fs/ext4/namei.c 2006-11-30 21:19:20 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/namei.c 2006-12-01 23:01:47 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include "namei.h" #include "xattr.h" @@ -1008,6 +1009,7 @@ static struct dentry *ext4_lookup(struct if (!inode) return ERR_PTR(-EACCES); + dx_propagate_tag(nd, inode); } return d_splice_alias(inode, dentry); } @@ -2381,6 +2383,7 @@ struct inode_operations ext4_dir_inode_o .removexattr = generic_removexattr, #endif .permission = ext4_permission, + .sync_flags = ext4_sync_flags, }; struct inode_operations ext4_special_inode_operations = { @@ -2392,4 +2395,5 @@ struct inode_operations ext4_special_ino .removexattr = generic_removexattr, #endif .permission = ext4_permission, + .sync_flags = ext4_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ext4/super.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/super.c --- linux-2.6.19.1/fs/ext4/super.c 2006-11-30 21:19:20 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/super.c 2006-12-06 06:37:26 +0100 @@ -728,7 +728,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota, Opt_extents, + Opt_grpquota, Opt_extents, Opt_tag, Opt_notag, Opt_tagid }; static match_table_t tokens = { @@ -779,6 +779,10 @@ static match_table_t tokens = { {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, {Opt_extents, "extents"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, + {Opt_tag, "tagxid"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -872,6 +876,20 @@ static int parse_options (char *options, case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + set_opt (sbi->s_mount_opt, TAGGED); + break; + case Opt_notag: + clear_opt (sbi->s_mount_opt, TAGGED); + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + set_opt (sbi->s_mount_opt, TAGGED); + break; +#endif case Opt_nocheck: clear_opt (sbi->s_mount_opt, CHECK); break; @@ -1539,6 +1557,9 @@ static int ext4_fill_super (struct super NULL, 0)) goto failed_mount; + if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED) + sb->s_flags |= MS_TAGGED; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -2370,6 +2391,12 @@ static int ext4_remount (struct super_bl if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) ext4_abort(sb, __FUNCTION__, "Abort forced by user"); + if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) && + !(sb->s_flags & MS_TAGGED)) { + printk("EXT4-fs: %s: tagging not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff -NurpP --minimal linux-2.6.19.1/fs/ext4/symlink.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/symlink.c --- linux-2.6.19.1/fs/ext4/symlink.c 2006-11-30 21:19:20 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/symlink.c 2006-12-01 23:01:47 +0100 @@ -40,6 +40,7 @@ struct inode_operations ext4_symlink_ino .listxattr = ext4_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext4_sync_flags, }; struct inode_operations ext4_fast_symlink_inode_operations = { @@ -51,4 +52,5 @@ struct inode_operations ext4_fast_symlin .listxattr = ext4_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext4_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ext4/xattr.c linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/xattr.c --- linux-2.6.19.1/fs/ext4/xattr.c 2006-11-30 21:19:20 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ext4/xattr.c 2006-12-01 23:01:47 +0100 @@ -58,6 +58,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -495,6 +496,7 @@ ext4_xattr_release_block(handle_t *handl ext4_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); unlock_buffer(bh); ea_bdebug(bh, "refcount now=%d; releasing", @@ -763,11 +765,14 @@ inserted: if (new_bh == bs->bh) ea_bdebug(new_bh, "keeping"); else { + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto cleanup; /* The old block is released after updating the inode. */ error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) - goto cleanup; + goto cleanup_dlimit; error = ext4_journal_get_write_access(handle, new_bh); if (error) @@ -844,6 +849,8 @@ cleanup: cleanup_dquot: DQUOT_FREE_BLOCK(inode, 1); +cleanup_dlimit: + DLIMIT_FREE_BLOCK(inode, 1); goto cleanup; bad_block: diff -NurpP --minimal linux-2.6.19.1/fs/fcntl.c linux-2.6.19.1-vs2.2.0-rc6/fs/fcntl.c --- linux-2.6.19.1/fs/fcntl.c 2006-11-30 21:19:23 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/fcntl.c 2006-11-08 04:57:48 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -85,6 +86,8 @@ repeat: error = -EMFILE; if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; + if (!vx_files_avail(1)) + goto out; error = expand_files(files, newfd); if (error < 0) @@ -125,6 +128,7 @@ static int dupfd(struct file *file, unsi FD_SET(fd, fdt->open_fds); FD_CLR(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); + vx_openfd_inc(fd); fd_install(fd, file); } else { spin_unlock(&files->file_lock); @@ -177,6 +181,9 @@ asmlinkage long sys_dup2(unsigned int ol if (tofree) filp_close(tofree, files); + else + vx_openfd_inc(newfd); /* fd was unused */ + err = newfd; out: return err; diff -NurpP --minimal linux-2.6.19.1/fs/file_table.c linux-2.6.19.1-vs2.2.0-rc6/fs/file_table.c --- linux-2.6.19.1/fs/file_table.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/file_table.c 2006-11-08 04:57:48 +0100 @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include @@ -120,6 +122,8 @@ struct file *get_empty_filp(void) f->f_gid = tsk->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ + f->f_xid = vx_current_xid(); + vx_files_inc(f); return f; over: @@ -175,6 +179,8 @@ void fastcall __fput(struct file *file) if (file->f_mode & FMODE_WRITE) put_write_access(inode); put_pid(file->f_owner.pid); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file->f_dentry = NULL; file->f_vfsmnt = NULL; @@ -240,6 +246,8 @@ void put_filp(struct file *file) { if (atomic_dec_and_test(&file->f_count)) { security_file_free(file); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file_free(file); } diff -NurpP --minimal linux-2.6.19.1/fs/gfs2/log.c linux-2.6.19.1-vs2.2.0-rc6/fs/gfs2/log.c --- linux-2.6.19.1/fs/gfs2/log.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/gfs2/log.c 2006-11-08 22:48:20 +0100 @@ -319,7 +319,7 @@ static u64 log_bmap(struct gfs2_sbd *sdp bh_map.b_size = 1 << inode->i_blkbits; error = gfs2_block_map(inode, lbn, 0, &bh_map); if (error || !bh_map.b_blocknr) - printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn); + printk(KERN_INFO "error=%d, dbn=%lu lbn=%u", error, bh_map.b_blocknr, lbn); gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr); return bh_map.b_blocknr; diff -NurpP --minimal linux-2.6.19.1/fs/hfsplus/ioctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/hfsplus/ioctl.c --- linux-2.6.19.1/fs/hfsplus/ioctl.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/hfsplus/ioctl.c 2006-11-08 04:57:52 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "hfsplus_fs.h" @@ -35,7 +36,8 @@ int hfsplus_ioctl(struct inode *inode, s flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ return put_user(flags, (int __user *)arg); case HFSPLUS_IOC_EXT2_SETFLAGS: { - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) diff -NurpP --minimal linux-2.6.19.1/fs/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/inode.c --- linux-2.6.19.1/fs/inode.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/inode.c 2006-12-06 05:50:27 +0100 @@ -115,6 +115,9 @@ static struct inode *alloc_inode(struct struct address_space * const mapping = &inode->i_data; inode->i_sb = sb; + + /* essential because of inode slab reuse */ + inode->i_tag = 0; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; atomic_set(&inode->i_count, 1); @@ -233,6 +236,8 @@ void __iget(struct inode * inode) inodes_stat.nr_unused--; } +EXPORT_SYMBOL_GPL(__iget); + /** * clear_inode - clear an inode * @inode: inode to clear diff -NurpP --minimal linux-2.6.19.1/fs/ioctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/ioctl.c --- linux-2.6.19.1/fs/ioctl.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ioctl.c 2006-11-30 18:53:18 +0100 @@ -12,10 +12,19 @@ #include #include #include +#include +#include +#include #include #include + +#ifdef CONFIG_VSERVER_LEGACY +extern int vx_proc_ioctl(struct inode *, struct file *, + unsigned int, unsigned long); +#endif + static long do_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -146,6 +155,48 @@ int vfs_ioctl(struct file *filp, unsigne else error = -ENOTTY; break; +#ifdef CONFIG_VSERVER_LEGACY +#ifndef CONFIG_TAGGING_NONE + case FIOC_GETTAG: { + struct inode *inode = filp->f_dentry->d_inode; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (capable(CAP_CONTEXT)) + error = put_user(inode->i_tag, (int __user *) arg); + break; + } + case FIOC_SETTAG: { + struct inode *inode = filp->f_dentry->d_inode; + int tag; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -ENOSYS; + if (!(inode->i_sb->s_flags & MS_TAGGED)) + break; + error = -EFAULT; + if (get_user(tag, (int __user *) arg)) + break; + error = 0; + inode->i_tag = (tag & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + break; + } +#endif + case FIOC_GETXFLG: + case FIOC_SETXFLG: + error = -ENOTTY; + if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC) + error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); + break; +#endif default: if (S_ISREG(filp->f_dentry->d_inode->i_mode)) error = file_ioctl(filp, cmd, arg); diff -NurpP --minimal linux-2.6.19.1/fs/ioprio.c linux-2.6.19.1-vs2.2.0-rc6/fs/ioprio.c --- linux-2.6.19.1/fs/ioprio.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ioprio.c 2006-11-30 18:53:18 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include static int set_task_ioprio(struct task_struct *task, int ioprio) { @@ -109,7 +110,7 @@ asmlinkage long sys_ioprio_set(int which if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) break; @@ -197,7 +198,7 @@ asmlinkage long sys_ioprio_get(int which if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) break; diff -NurpP --minimal linux-2.6.19.1/fs/jfs/acl.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/acl.c --- linux-2.6.19.1/fs/jfs/acl.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/acl.c 2006-11-08 04:57:46 +0100 @@ -232,7 +232,8 @@ int jfs_setattr(struct dentry *dentry, s return rc; if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) || + (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) { if (DQUOT_TRANSFER(inode, iattr)) return -EDQUOT; } diff -NurpP --minimal linux-2.6.19.1/fs/jfs/file.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/file.c --- linux-2.6.19.1/fs/jfs/file.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/file.c 2006-11-08 21:52:37 +0100 @@ -98,6 +98,7 @@ struct inode_operations jfs_file_inode_o .setattr = jfs_setattr, .permission = jfs_permission, #endif + .sync_flags = jfs_sync_flags, }; const struct file_operations jfs_file_operations = { @@ -109,6 +110,7 @@ const struct file_operations jfs_file_op .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .fsync = jfs_fsync, diff -NurpP --minimal linux-2.6.19.1/fs/jfs/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/inode.c --- linux-2.6.19.1/fs/jfs/inode.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/inode.c 2006-11-08 04:57:50 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -144,6 +145,7 @@ void jfs_delete_inode(struct inode *inod DQUOT_INIT(inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); } clear_inode(inode); diff -NurpP --minimal linux-2.6.19.1/fs/jfs/ioctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/ioctl.c --- linux-2.6.19.1/fs/jfs/ioctl.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/ioctl.c 2006-11-08 04:57:51 +0100 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -64,7 +65,8 @@ int jfs_ioctl(struct inode * inode, stru case JFS_IOC_SETFLAGS: { unsigned int oldflags; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -84,8 +86,8 @@ int jfs_ioctl(struct inode * inode, stru * the relevant capability. */ if ((oldflags & JFS_IMMUTABLE_FL) || - ((flags ^ oldflags) & - (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { + ((flags ^ oldflags) & (JFS_APPEND_FL | + JFS_IMMUTABLE_FL | JFS_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } diff -NurpP --minimal linux-2.6.19.1/fs/jfs/jfs_dinode.h linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_dinode.h --- linux-2.6.19.1/fs/jfs/jfs_dinode.h 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_dinode.h 2006-11-08 04:57:51 +0100 @@ -162,9 +162,12 @@ struct dinode { #define JFS_APPEND_FL 0x01000000 /* writes to file may only append */ #define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */ -#define JFS_FL_USER_VISIBLE 0x03F80000 +#define JFS_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define JFS_IUNLINK_FL 0x08000000 /* Immutable unlink */ + +#define JFS_FL_USER_VISIBLE 0x0FF80000 #define JFS_FL_USER_MODIFIABLE 0x03F80000 -#define JFS_FL_INHERIT 0x03C80000 +#define JFS_FL_INHERIT 0x0BC80000 /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */ #define JFS_IOC_GETFLAGS _IOR('f', 1, long) diff -NurpP --minimal linux-2.6.19.1/fs/jfs/jfs_dtree.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_dtree.c --- linux-2.6.19.1/fs/jfs/jfs_dtree.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_dtree.c 2006-11-08 04:57:50 +0100 @@ -102,6 +102,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -383,10 +384,10 @@ static u32 add_index(tid_t tid, struct i */ if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage)) goto clean_up; - if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { - DQUOT_FREE_BLOCK(ip, sbi->nbperpage); - goto clean_up; - } + if (DLIMIT_ALLOC_BLOCK(ip, sbi->nbperpage)) + goto clean_up_dquot; + if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) + goto clean_up_dlimit; /* * Save the table, we're going to overwrite it with the @@ -479,6 +480,12 @@ static u32 add_index(tid_t tid, struct i return index; + clean_up_dlimit: + DLIMIT_FREE_BLOCK(ip, sbi->nbperpage); + + clean_up_dquot: + DQUOT_FREE_BLOCK(ip, sbi->nbperpage); + clean_up: jfs_ip->next_index--; @@ -952,6 +959,7 @@ static int dtSplitUp(tid_t tid, struct tlock *tlck; struct lv *lv; int quota_allocation = 0; + int dlimit_allocation = 0; /* get split page */ smp = split->mp; @@ -1036,6 +1044,12 @@ static int dtSplitUp(tid_t tid, } quota_allocation += n; + if (DLIMIT_ALLOC_BLOCK(ip, n)) { + rc = -ENOSPC; + goto extendOut; + } + dlimit_allocation += n; + if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, (s64) n, &nxaddr))) goto extendOut; @@ -1309,6 +1323,9 @@ static int dtSplitUp(tid_t tid, freeKeyName: kfree(key.name); + /* Rollback dlimit allocation */ + if (rc && dlimit_allocation) + DLIMIT_FREE_BLOCK(ip, dlimit_allocation); /* Rollback quota allocation */ if (rc && quota_allocation) DQUOT_FREE_BLOCK(ip, quota_allocation); @@ -1376,6 +1393,12 @@ static int dtSplitPage(tid_t tid, struct release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); @@ -1926,6 +1949,12 @@ static int dtSplitRoot(tid_t tid, release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } BT_MARK_DIRTY(rmp, ip); /* @@ -2292,6 +2321,8 @@ static int dtDeleteUp(tid_t tid, struct xlen = lengthPXD(&fp->header.self); + /* Free dlimit allocation. */ + DLIMIT_FREE_BLOCK(ip, xlen); /* Free quota allocation. */ DQUOT_FREE_BLOCK(ip, xlen); @@ -2368,6 +2399,8 @@ static int dtDeleteUp(tid_t tid, struct xlen = lengthPXD(&p->header.self); + /* Free dlimit allocation */ + DLIMIT_FREE_BLOCK(ip, xlen); /* Free quota allocation */ DQUOT_FREE_BLOCK(ip, xlen); diff -NurpP --minimal linux-2.6.19.1/fs/jfs/jfs_extent.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_extent.c --- linux-2.6.19.1/fs/jfs/jfs_extent.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_extent.c 2006-11-08 04:57:50 +0100 @@ -18,6 +18,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_superblock.h" @@ -147,6 +148,14 @@ extAlloc(struct inode *ip, s64 xlen, s64 return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) { + DQUOT_FREE_BLOCK(ip, nxlen); + dbFree(ip, nxaddr, (s64) nxlen); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + return -ENOSPC; + } + /* determine the value of the extent flag */ xflag = abnr ? XAD_NOTRECORDED : 0; @@ -164,6 +173,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 */ if (rc) { dbFree(ip, nxaddr, nxlen); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); @@ -261,6 +271,13 @@ int extRealloc(struct inode *ip, s64 nxl mutex_unlock(&JFS_IP(ip)->commit_mutex); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) { + DQUOT_FREE_BLOCK(ip, nxlen); + dbFree(ip, nxaddr, (s64) nxlen); + up(&JFS_IP(ip)->commit_sem); + return -ENOSPC; + } delta = nxlen - xlen; @@ -297,6 +314,7 @@ int extRealloc(struct inode *ip, s64 nxl /* extend the extent */ if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { dbFree(ip, xaddr + xlen, delta); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); goto exit; } @@ -308,6 +326,7 @@ int extRealloc(struct inode *ip, s64 nxl */ if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { dbFree(ip, nxaddr, nxlen); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); goto exit; } diff -NurpP --minimal linux-2.6.19.1/fs/jfs/jfs_filsys.h linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_filsys.h --- linux-2.6.19.1/fs/jfs/jfs_filsys.h 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_filsys.h 2006-11-08 04:57:46 +0100 @@ -84,6 +84,7 @@ #define JFS_DIR_INDEX 0x00200000 /* Persistant index for */ /* directory entries */ +#define JFS_TAGGED 0x00800000 /* Context Tagging */ /* * buffer cache configuration diff -NurpP --minimal linux-2.6.19.1/fs/jfs/jfs_imap.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_imap.c --- linux-2.6.19.1/fs/jfs/jfs_imap.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_imap.c 2006-11-30 18:53:18 +0100 @@ -45,6 +45,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" @@ -3075,6 +3076,8 @@ static int copy_from_dinode(struct dinod { struct jfs_inode_info *jfs_ip = JFS_IP(ip); struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + uid_t uid; + gid_t gid; jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); @@ -3094,14 +3097,18 @@ static int copy_from_dinode(struct dinod } ip->i_nlink = le32_to_cpu(dip->di_nlink); - jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); + uid = le32_to_cpu(dip->di_uid); + gid = le32_to_cpu(dip->di_gid); + ip->i_tag = INOTAG_TAG(DX_TAG(ip), uid, gid, 0); + + jfs_ip->saved_uid = INOTAG_UID(DX_TAG(ip), uid, gid); if (sbi->uid == -1) ip->i_uid = jfs_ip->saved_uid; else { ip->i_uid = sbi->uid; } - jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); + jfs_ip->saved_gid = INOTAG_GID(DX_TAG(ip), uid, gid); if (sbi->gid == -1) ip->i_gid = jfs_ip->saved_gid; else { @@ -3166,14 +3173,12 @@ static void copy_to_dinode(struct dinode dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - if (sbi->uid == -1) - dip->di_uid = cpu_to_le32(ip->i_uid); - else - dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); - if (sbi->gid == -1) - dip->di_gid = cpu_to_le32(ip->i_gid); - else - dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); + + dip->di_uid = cpu_to_le32(TAGINO_UID(DX_TAG(ip), + (sbi->uid == -1) ? ip->i_uid : jfs_ip->saved_uid, ip->i_tag)); + dip->di_gid = cpu_to_le32(TAGINO_GID(DX_TAG(ip), + (sbi->gid == -1) ? ip->i_gid : jfs_ip->saved_gid, ip->i_tag)); + /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones diff -NurpP --minimal linux-2.6.19.1/fs/jfs/jfs_inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_inode.c --- linux-2.6.19.1/fs/jfs/jfs_inode.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_inode.c 2006-11-08 04:57:51 +0100 @@ -18,6 +18,8 @@ #include #include +#include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -30,19 +32,59 @@ void jfs_set_inode_flags(struct inode *i { unsigned int flags = JFS_IP(inode)->mode2; - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | - S_NOATIME | S_DIRSYNC | S_SYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); if (flags & JFS_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & JFS_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & JFS_BARRIER_FL) + inode->i_flags |= S_BARRIER; + + if (flags & JFS_SYNC_FL) + inode->i_flags |= S_SYNC; if (flags & JFS_APPEND_FL) inode->i_flags |= S_APPEND; if (flags & JFS_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & JFS_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; - if (flags & JFS_SYNC_FL) - inode->i_flags |= S_SYNC; +} + +int jfs_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + + oldflags = JFS_IP(inode)->mode2; + newflags = oldflags & ~(JFS_APPEND_FL | + JFS_IMMUTABLE_FL | JFS_IUNLINK_FL | + JFS_BARRIER_FL | JFS_NOATIME_FL | + JFS_SYNC_FL | JFS_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= JFS_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= JFS_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= JFS_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= JFS_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= JFS_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= JFS_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= JFS_DIRSYNC_FL; + + if (oldflags ^ newflags) { + JFS_IP(inode)->mode2 = newflags; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + return 0; } /* @@ -90,10 +132,17 @@ struct inode *ialloc(struct inode *paren jfs_inode->saved_uid = inode->i_uid; jfs_inode->saved_gid = inode->i_gid; + inode->i_tag = dx_current_fstag(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + iput(inode); + return NULL; + } + /* * Allocate inode to quota. */ if (DQUOT_ALLOC_INODE(inode)) { + DLIMIT_FREE_INODE(inode); DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; diff -NurpP --minimal linux-2.6.19.1/fs/jfs/jfs_inode.h linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_inode.h --- linux-2.6.19.1/fs/jfs/jfs_inode.h 2006-11-30 21:19:25 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_inode.h 2006-11-08 04:57:51 +0100 @@ -31,6 +31,7 @@ extern void jfs_truncate(struct inode *) extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); +extern int jfs_sync_flags(struct inode *); extern void jfs_set_inode_flags(struct inode *); extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); diff -NurpP --minimal linux-2.6.19.1/fs/jfs/jfs_xtree.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_xtree.c --- linux-2.6.19.1/fs/jfs/jfs_xtree.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/jfs_xtree.c 2006-11-08 04:57:50 +0100 @@ -21,6 +21,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" @@ -841,7 +842,12 @@ int xtInsert(tid_t tid, /* transaction hint = 0; if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) goto out; + if ((rc = DLIMIT_ALLOC_BLOCK(ip, xlen))) { + DQUOT_FREE_BLOCK(ip, xlen); + goto out; + } if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { + DLIMIT_FREE_BLOCK(ip, xlen); DQUOT_FREE_BLOCK(ip, xlen); goto out; } @@ -871,6 +877,7 @@ int xtInsert(tid_t tid, /* transaction /* undo data extent allocation */ if (*xaddrp == 0) { dbFree(ip, xaddr, (s64) xlen); + DLIMIT_FREE_BLOCK(ip, xlen); DQUOT_FREE_BLOCK(ip, xlen); } return rc; @@ -1231,6 +1238,7 @@ xtSplitPage(tid_t tid, struct inode *ip, struct tlock *tlck; struct xtlock *sxtlck = NULL, *rxtlck = NULL; int quota_allocation = 0; + int dlimit_allocation = 0; smp = split->mp; sp = XT_PAGE(ip, smp); @@ -1250,6 +1258,13 @@ xtSplitPage(tid_t tid, struct inode *ip, quota_allocation += lengthPXD(pxd); + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + rc = -ENOSPC; + goto clean_up; + } + dlimit_allocation += lengthPXD(pxd); + /* * allocate the new right page for the split */ @@ -1451,6 +1466,9 @@ xtSplitPage(tid_t tid, struct inode *ip, clean_up: + /* Rollback dlimit allocation. */ + if (dlimit_allocation) + DLIMIT_FREE_BLOCK(ip, dlimit_allocation); /* Rollback quota allocation. */ if (quota_allocation) DQUOT_FREE_BLOCK(ip, quota_allocation); @@ -1515,6 +1533,12 @@ xtSplitRoot(tid_t tid, release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); @@ -3941,6 +3965,8 @@ s64 xtTruncate(tid_t tid, struct inode * else ip->i_size = newsize; + /* update dlimit allocation to reflect freed blocks */ + DLIMIT_FREE_BLOCK(ip, nfreed); /* update quota allocation to reflect freed blocks */ DQUOT_FREE_BLOCK(ip, nfreed); diff -NurpP --minimal linux-2.6.19.1/fs/jfs/namei.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/namei.c --- linux-2.6.19.1/fs/jfs/namei.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/namei.c 2006-11-30 18:53:18 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_inode.h" @@ -1461,6 +1462,7 @@ static struct dentry *jfs_lookup(struct return ERR_PTR(-EACCES); } + dx_propagate_tag(nd, ip); dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) @@ -1513,6 +1515,7 @@ struct inode_operations jfs_dir_inode_op .setattr = jfs_setattr, .permission = jfs_permission, #endif + .sync_flags = jfs_sync_flags, }; const struct file_operations jfs_dir_operations = { diff -NurpP --minimal linux-2.6.19.1/fs/jfs/super.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/super.c --- linux-2.6.19.1/fs/jfs/super.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/super.c 2006-12-06 05:50:27 +0100 @@ -194,7 +194,8 @@ static void jfs_put_super(struct super_b enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask + Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask, + Opt_tag, Opt_notag, Opt_tagid }; static match_table_t tokens = { @@ -204,6 +205,10 @@ static match_table_t tokens = { {Opt_resize, "resize=%u"}, {Opt_resize_nosize, "resize"}, {Opt_errors, "errors=%s"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, + {Opt_tag, "tagxid"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, {Opt_usrquota, "usrquota"}, @@ -338,6 +343,20 @@ static int parse_options(char *options, } break; } +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + *flag |= JFS_TAGGED; + break; + case Opt_notag: + *flag &= JFS_TAGGED; + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + *flag |= JFS_TAGGED; + break; +#endif default: printk("jfs: Unrecognized mount option \"%s\" " " or missing value\n", p); @@ -368,6 +387,13 @@ static int jfs_remount(struct super_bloc if (!parse_options(data, sb, &newLVSize, &flag)) { return -EINVAL; } + + if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) { + printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } + if (newLVSize) { if (sb->s_flags & MS_RDONLY) { printk(KERN_ERR @@ -439,6 +465,9 @@ static int jfs_fill_super(struct super_b #ifdef CONFIG_JFS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif + /* map mount option tagxid */ + if (sbi->flag & JFS_TAGGED) + sb->s_flags |= MS_TAGGED; if (newLVSize) { printk(KERN_ERR "resize option for remount only\n"); diff -NurpP --minimal linux-2.6.19.1/fs/jfs/xattr.c linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/xattr.c --- linux-2.6.19.1/fs/jfs/xattr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/jfs/xattr.c 2006-11-08 21:52:09 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_dmap.h" @@ -263,9 +264,16 @@ static int ea_write(struct inode *ip, st if (DQUOT_ALLOC_BLOCK(ip, nblocks)) { return -EDQUOT; } + /* Allocate new blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nblocks)) { + DQUOT_FREE_BLOCK(ip, nblocks); + return -ENOSPC; + } rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno); if (rc) { + /*Rollback dlimit allocation. */ + DLIMIT_FREE_BLOCK(ip, nblocks); /*Rollback quota allocation. */ DQUOT_FREE_BLOCK(ip, nblocks); return rc; @@ -332,6 +340,8 @@ static int ea_write(struct inode *ip, st failed: /* Rollback quota allocation. */ + DLIMIT_FREE_BLOCK(ip, nblocks); + /* Rollback quota allocation. */ DQUOT_FREE_BLOCK(ip, nblocks); dbFree(ip, blkno, nblocks); @@ -468,6 +478,7 @@ static int ea_get(struct inode *inode, s s64 blkno; int rc; int quota_allocation = 0; + int dlimit_allocation = 0; /* When fsck.jfs clears a bad ea, it doesn't clear the size */ if (ji->ea.flag == 0) @@ -543,6 +554,12 @@ static int ea_get(struct inode *inode, s quota_allocation = blocks_needed; + /* Allocate new blocks to dlimit. */ + rc = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, blocks_needed)) + goto clean_up; + dlimit_allocation = blocks_needed; + rc = dbAlloc(inode, INOHINT(inode), (s64) blocks_needed, &blkno); if (rc) @@ -599,6 +616,9 @@ static int ea_get(struct inode *inode, s return ea_size; clean_up: + /* Rollback dlimit allocation */ + if (dlimit_allocation) + DLIMIT_FREE_BLOCK(inode, dlimit_allocation); /* Rollback quota allocation */ if (quota_allocation) DQUOT_FREE_BLOCK(inode, quota_allocation); @@ -675,8 +695,10 @@ static int ea_put(tid_t tid, struct inod } /* If old blocks exist, they must be removed from quota allocation. */ - if (old_blocks) + if (old_blocks) { + DLIMIT_FREE_BLOCK(inode, old_blocks); DQUOT_FREE_BLOCK(inode, old_blocks); + } inode->i_ctime = CURRENT_TIME; diff -NurpP --minimal linux-2.6.19.1/fs/libfs.c linux-2.6.19.1-vs2.2.0-rc6/fs/libfs.c --- linux-2.6.19.1/fs/libfs.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/libfs.c 2006-11-08 04:57:43 +0100 @@ -124,7 +124,8 @@ static inline unsigned char dt_type(stru * both impossible due to the lock on directory. */ -int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) +static inline int do_dcache_readdir_filter(struct file * filp, + void * dirent, filldir_t filldir, int (*filter)(struct dentry *dentry)) { struct dentry *dentry = filp->f_dentry; struct dentry *cursor = filp->private_data; @@ -157,6 +158,8 @@ int dcache_readdir(struct file * filp, v next = list_entry(p, struct dentry, d_u.d_child); if (d_unhashed(next) || !next->d_inode) continue; + if (filter && !filter(next)) + continue; spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) @@ -172,6 +175,18 @@ int dcache_readdir(struct file * filp, v return 0; } +int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + return do_dcache_readdir_filter(filp, dirent, filldir, NULL); +} + +int dcache_readdir_filter(struct file * filp, void * dirent, filldir_t filldir, + int (*filter)(struct dentry *)) +{ + return do_dcache_readdir_filter(filp, dirent, filldir, filter); +} + + ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) { return -EISDIR; @@ -611,6 +626,7 @@ EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); EXPORT_SYMBOL(dcache_readdir); +EXPORT_SYMBOL(dcache_readdir_filter); EXPORT_SYMBOL(generic_read_dir); EXPORT_SYMBOL(get_sb_pseudo); EXPORT_SYMBOL(simple_commit_write); diff -NurpP --minimal linux-2.6.19.1/fs/locks.c linux-2.6.19.1-vs2.2.0-rc6/fs/locks.c --- linux-2.6.19.1/fs/locks.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/locks.c 2006-11-30 18:53:18 +0100 @@ -125,6 +125,8 @@ #include #include #include +#include +#include #include #include @@ -147,6 +149,8 @@ static kmem_cache_t *filelock_cache __re /* Allocate an empty lock structure. */ static struct file_lock *locks_alloc_lock(void) { + if (!vx_locks_avail(1)) + return NULL; return kmem_cache_alloc(filelock_cache, SLAB_KERNEL); } @@ -172,6 +176,7 @@ static void locks_free_lock(struct file_ BUG_ON(!list_empty(&fl->fl_block)); BUG_ON(!list_empty(&fl->fl_link)); + vx_locks_dec(fl); locks_release_private(fl); kmem_cache_free(filelock_cache, fl); } @@ -191,6 +196,7 @@ void locks_init_lock(struct file_lock *f fl->fl_start = fl->fl_end = 0; fl->fl_ops = NULL; fl->fl_lmops = NULL; + fl->fl_xid = -1; } EXPORT_SYMBOL(locks_init_lock); @@ -248,6 +254,7 @@ void locks_copy_lock(struct file_lock *n new->fl_file = fl->fl_file; new->fl_ops = fl->fl_ops; new->fl_lmops = fl->fl_lmops; + new->fl_xid = fl->fl_xid; locks_copy_private(new, fl); } @@ -286,6 +293,11 @@ static int flock_make_lock(struct file * fl->fl_flags = FL_FLOCK; fl->fl_type = type; fl->fl_end = OFFSET_MAX; + + vxd_assert(filp->f_xid == vx_current_xid(), + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid()); + fl->fl_xid = filp->f_xid; + vx_locks_inc(fl); *lock = fl; return 0; @@ -451,6 +463,7 @@ static int lease_init(struct file *filp, fl->fl_owner = current->files; fl->fl_pid = current->tgid; + fl->fl_xid = vx_current_xid(); fl->fl_file = filp; fl->fl_flags = FL_LEASE; @@ -470,6 +483,11 @@ static int lease_alloc(struct file *filp if (fl == NULL) goto out; + fl->fl_xid = vx_current_xid(); + if (filp) + vxd_assert(filp->f_xid == fl->fl_xid, + "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid); + vx_locks_inc(fl); error = lease_init(filp, type, fl); if (error) { locks_free_lock(fl); @@ -790,6 +808,7 @@ find_conflict: if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); + vx_locks_inc(new_fl); locks_insert_lock(&inode->i_flock, new_fl); new_fl = NULL; error = 0; @@ -801,7 +820,8 @@ out: return error; } -static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock) +static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, + struct file_lock *conflock, xid_t xid) { struct file_lock *fl; struct file_lock *new_fl = NULL; @@ -811,6 +831,8 @@ static int __posix_lock_file_conf(struct struct file_lock **before; int error, added = 0; + vxd_assert(xid == vx_current_xid(), + "xid(%d) == current(%d)", xid, vx_current_xid()); /* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. @@ -821,7 +843,11 @@ static int __posix_lock_file_conf(struct (request->fl_type != F_UNLCK || request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { new_fl = locks_alloc_lock(); + new_fl->fl_xid = xid; + vx_locks_inc(new_fl); new_fl2 = locks_alloc_lock(); + new_fl2->fl_xid = xid; + vx_locks_inc(new_fl2); } lock_kernel(); @@ -1018,7 +1044,8 @@ static int __posix_lock_file_conf(struct */ int posix_lock_file(struct file *filp, struct file_lock *fl) { - return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, NULL); + return __posix_lock_file_conf(filp->f_dentry->d_inode, + fl, NULL, filp->f_xid); } EXPORT_SYMBOL(posix_lock_file); @@ -1033,7 +1060,8 @@ EXPORT_SYMBOL(posix_lock_file); int posix_lock_file_conf(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { - return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, conflock); + return __posix_lock_file_conf(filp->f_dentry->d_inode, + fl, conflock, filp->f_xid); } EXPORT_SYMBOL(posix_lock_file_conf); @@ -1123,7 +1151,7 @@ int locks_mandatory_area(int read_write, fl.fl_end = offset + count - 1; for (;;) { - error = __posix_lock_file_conf(inode, &fl, NULL); + error = __posix_lock_file_conf(inode, &fl, NULL, filp->f_xid); if (error != -EAGAIN) break; if (!(fl.fl_flags & FL_SLEEP)) @@ -1685,6 +1713,11 @@ int fcntl_setlk(unsigned int fd, struct if (file_lock == NULL) return -ENOLCK; + vxd_assert(filp->f_xid == vx_current_xid(), + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid()); + file_lock->fl_xid = filp->f_xid; + vx_locks_inc(file_lock); + /* * This might block, so we do it before checking the inode. */ @@ -1828,6 +1861,11 @@ int fcntl_setlk64(unsigned int fd, struc if (file_lock == NULL) return -ENOLCK; + vxd_assert(filp->f_xid == vx_current_xid(), + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid()); + file_lock->fl_xid = filp->f_xid; + vx_locks_inc(file_lock); + /* * This might block, so we do it before checking the inode. */ @@ -2123,6 +2161,10 @@ int get_locks_status(char *buffer, char list_for_each(tmp, &file_lock_list) { struct list_head *btmp; struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + + if (!vx_check(fl->fl_xid, VS_WATCH_P|VS_IDENT)) + continue; + lock_get_status(q, fl, ++i, ""); move_lock_status(&q, &pos, offset); diff -NurpP --minimal linux-2.6.19.1/fs/namei.c linux-2.6.19.1-vs2.2.0-rc6/fs/namei.c --- linux-2.6.19.1/fs/namei.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/namei.c 2006-11-30 19:41:35 +0100 @@ -32,6 +32,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -225,6 +230,31 @@ int generic_permission(struct inode *ino return -EACCES; } +static inline int dx_barrier(struct inode *inode) +{ + if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN)) { + vxwprintk(1, "xid=%d did hit the barrier.", + vx_current_xid()); + return 1; + } + return 0; +} + +static inline int dx_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + if (dx_barrier(inode)) + return -EACCES; + if (inode->i_tag == 0) + return 0; + if (dx_check(inode->i_tag, DX_ADMIN|DX_WATCH|DX_IDENT)) + return 0; + + vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_tag, inode->i_ino, + vxd_cond_path(nd)); + return -EACCES; +} + int permission(struct inode *inode, int mask, struct nameidata *nd) { umode_t mode = inode->i_mode; @@ -235,14 +265,14 @@ int permission(struct inode *inode, int /* * Nobody gets write access to a read-only fs. */ - if (IS_RDONLY(inode) && + if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt))) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; /* * Nobody gets write access to an immutable file. */ - if (IS_IMMUTABLE(inode)) + if (IS_IMMUTABLE(inode) && !IS_COW(inode)) return -EACCES; } @@ -256,6 +286,8 @@ int permission(struct inode *inode, int /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; + if ((retval = dx_permission(inode, mask, nd))) + return retval; if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, submask, nd); else @@ -431,6 +463,8 @@ static int exec_permission_lite(struct i { umode_t mode = inode->i_mode; + if (dx_barrier(inode)) + return -EACCES; if (inode->i_op && inode->i_op->permission) return -EAGAIN; @@ -736,7 +770,8 @@ static __always_inline void follow_dotdo if (nd->dentry == fs->root && nd->mnt == fs->rootmnt) { read_unlock(&fs->lock); - break; + /* FIXME: for sane '/' avoid follow_mount() */ + return; } read_unlock(&fs->lock); spin_lock(&dcache_lock); @@ -773,16 +808,34 @@ static int do_lookup(struct nameidata *n { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); + struct inode *inode; if (!dentry) goto need_lookup; if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; + inode = dentry->d_inode; + if (!inode) + goto done; + if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) { + struct proc_dir_entry *de = PDE(inode); + + if (de && !vx_hide_check(0, de->vx_flags)) + goto hidden; + } + if (!dx_check(inode->i_tag, DX_WATCH|DX_ADMIN|DX_HOSTID|DX_IDENT)) + goto hidden; done: path->mnt = mnt; path->dentry = dentry; __follow_mount(path); return 0; +hidden: + vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_tag, inode->i_ino, + vxd_path(dentry, mnt)); + dput(dentry); + return -ENOENT; need_lookup: dentry = real_lookup(nd->dentry, name, nd); @@ -1384,7 +1437,8 @@ static inline int check_sticky(struct in * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir,struct dentry *victim,int isdir) +static int may_delete(struct inode *dir, struct dentry *victim, + int isdir, struct nameidata *nd) { int error; @@ -1394,13 +1448,13 @@ static int may_delete(struct inode *dir, BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(victim->d_name.name, victim->d_inode, dir); - error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); + error = permission(dir,MAY_WRITE | MAY_EXEC, nd); if (error) return error; if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IXORUNLINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -1531,6 +1585,14 @@ int may_open(struct nameidata *nd, int a if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) return -EISDIR; +#ifdef CONFIG_VSERVER_COWBL + if (IS_COW(inode) && (flag & FMODE_WRITE)) { + if (IS_COW_LINK(inode)) + return -EMLINK; + inode->i_flags &= ~(S_IUNLINK|S_IMMUTABLE); + mark_inode_dirty(inode); + } +#endif error = vfs_permission(nd, acc_mode); if (error) return error; @@ -1547,7 +1609,8 @@ int may_open(struct nameidata *nd, int a return -EACCES; flag &= ~O_TRUNC; - } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) + } else if ((IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt)) + && (flag & FMODE_WRITE)) return -EROFS; /* * An append-only file must be opened in append mode for writing. @@ -1635,6 +1698,11 @@ int open_namei(int dfd, const char *path struct dentry *dir; int count = 0; +#ifdef CONFIG_VSERVER_COWBL + int rflag = flag; + int rmode = mode; +restart: +#endif acc_mode = ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ @@ -1728,6 +1796,22 @@ do_last: goto exit; ok: error = may_open(nd, acc_mode, flag); +#ifdef CONFIG_VSERVER_COWBL + if (error == -EMLINK) { + struct dentry *dentry; + dentry = cow_break_link(pathname); + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto exit; + } + dput(dentry); + release_open_intent(nd); + path_release(nd); + flag = rflag; + mode = rmode; + goto restart; + } +#endif if (error) goto exit; return 0; @@ -1839,9 +1923,10 @@ fail: } EXPORT_SYMBOL_GPL(lookup_create); -int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +int vfs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t dev, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1891,11 +1976,12 @@ asmlinkage long sys_mknodat(int dfd, con error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode, - new_decode_dev(dev)); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + new_decode_dev(dev), &nd); break; case S_IFIFO: case S_IFSOCK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + 0, &nd); break; case S_IFDIR: error = -EPERM; @@ -1918,9 +2004,10 @@ asmlinkage long sys_mknod(const char __u return sys_mknodat(AT_FDCWD, filename, mode, dev); } -int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +int vfs_mkdir(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1962,7 +2049,7 @@ asmlinkage long sys_mkdirat(int dfd, con if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; - error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); + error = vfs_mkdir(nd.dentry->d_inode, dentry, mode, &nd); dput(dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2006,9 +2093,10 @@ void dentry_unhash(struct dentry *dentry spin_unlock(&dcache_lock); } -int vfs_rmdir(struct inode *dir, struct dentry *dentry) +int vfs_rmdir(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 1); + int error = may_delete(dir, dentry, 1, nd); if (error) return error; @@ -2070,7 +2158,7 @@ static long do_rmdir(int dfd, const char error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit2; - error = vfs_rmdir(nd.dentry->d_inode, dentry); + error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd); dput(dentry); exit2: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2086,9 +2174,10 @@ asmlinkage long sys_rmdir(const char __u return do_rmdir(AT_FDCWD, pathname); } -int vfs_unlink(struct inode *dir, struct dentry *dentry) +int vfs_unlink(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 0); + int error = may_delete(dir, dentry, 0, nd); if (error) return error; @@ -2150,7 +2239,7 @@ static long do_unlinkat(int dfd, const c inode = dentry->d_inode; if (inode) atomic_inc(&inode->i_count); - error = vfs_unlink(nd.dentry->d_inode, dentry); + error = vfs_unlink(nd.dentry->d_inode, dentry, &nd); exit2: dput(dentry); } @@ -2185,9 +2274,10 @@ asmlinkage long sys_unlink(const char __ return do_unlinkat(AT_FDCWD, pathname); } -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) +int vfs_symlink(struct inode *dir, struct dentry *dentry, + const char *oldname, int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -2231,7 +2321,7 @@ asmlinkage long sys_symlinkat(const char if (IS_ERR(dentry)) goto out_unlock; - error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); + error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO, &nd); dput(dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2248,7 +2338,8 @@ asmlinkage long sys_symlink(const char _ return sys_symlinkat(oldname, AT_FDCWD, newname); } -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) +int vfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry, struct nameidata *nd) { struct inode *inode = old_dentry->d_inode; int error; @@ -2256,7 +2347,7 @@ int vfs_link(struct dentry *old_dentry, if (!inode) return -ENOENT; - error = may_create(dir, new_dentry, NULL); + error = may_create(dir, new_dentry, nd); if (error) return error; @@ -2266,7 +2357,7 @@ int vfs_link(struct dentry *old_dentry, /* * A link to an append-only or immutable file cannot be created. */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return -EPERM; if (!dir->i_op || !dir->i_op->link) return -EPERM; @@ -2326,7 +2417,7 @@ asmlinkage long sys_linkat(int olddfd, c error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out_unlock; - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); + error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry, &nd); dput(new_dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2458,14 +2549,14 @@ int vfs_rename(struct inode *old_dir, st if (old_dentry->d_inode == new_dentry->d_inode) return 0; - error = may_delete(old_dir, old_dentry, is_dir); + error = may_delete(old_dir, old_dentry, is_dir, NULL); if (error) return error; if (!new_dentry->d_inode) error = may_create(new_dir, new_dentry, NULL); else - error = may_delete(new_dir, new_dentry, is_dir); + error = may_delete(new_dir, new_dentry, is_dir, NULL); if (error) return error; @@ -2543,6 +2634,9 @@ static int do_rename(int olddfd, const c error = -EINVAL; if (old_dentry == trap) goto exit4; + error = -EROFS; + if (MNT_IS_RDONLY(newnd.mnt)) + goto exit4; new_dentry = lookup_hash(&newnd); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) @@ -2636,6 +2730,126 @@ int vfs_follow_link(struct nameidata *nd return __vfs_follow_link(nd, link); } + +#ifdef CONFIG_VSERVER_COWBL + +#include + +struct dentry *cow_break_link(const char *pathname) +{ + int ret, mode, pathlen; + struct nameidata old_nd, dir_nd; + struct dentry *old_dentry, *new_dentry; + struct dentry *res = ERR_PTR(-EMLINK); + struct vfsmount *old_mnt, *new_mnt; + struct file *old_file; + struct file *new_file; + char *to, *path, pad='\251'; + loff_t size; + + vxdprintk(VXD_CBIT(misc, 1), "cow_break_link(»%s«)", pathname); + path = kmalloc(PATH_MAX, GFP_KERNEL); + + ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd); + vxdprintk(VXD_CBIT(misc, 2), "path_lookup(old): %d", ret); + old_dentry = old_nd.dentry; + old_mnt = old_nd.mnt; + mode = old_dentry->d_inode->i_mode; + + to = d_path(old_dentry, old_mnt, path, PATH_MAX-2); + pathlen = strlen(to); + vxdprintk(VXD_CBIT(misc, 2), "old path »%s«", to); + + to[pathlen+1] = 0; +retry: + to[pathlen] = pad--; + if (pad <= '\240') + goto out_rel_old; + + vxdprintk(VXD_CBIT(misc, 1), "temp copy »%s«", to); + ret = path_lookup(to, + LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, &dir_nd); + + /* this puppy downs the inode sem */ + new_dentry = lookup_create(&dir_nd, 0); + vxdprintk(VXD_CBIT(misc, 2), + "lookup_create(new): %p", new_dentry); + if (!new_dentry) { + path_release(&dir_nd); + goto retry; + } + + ret = vfs_create(dir_nd.dentry->d_inode, new_dentry, mode, &dir_nd); + vxdprintk(VXD_CBIT(misc, 2), + "vfs_create(new): %d", ret); + if (ret == -EEXIST) { + + mutex_unlock(&dir_nd.dentry->d_inode->i_mutex); + dput(new_dentry); + path_release(&dir_nd); + goto retry; + } + + new_mnt = dir_nd.mnt; + + dget(old_dentry); + mntget(old_mnt); + /* this one cleans up the dentry in case of failure */ + old_file = dentry_open(old_dentry, old_mnt, O_RDONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(old): %p", old_file); + if (!old_file) + goto out_rel_both; + + dget(new_dentry); + mntget(new_mnt); + /* this one cleans up the dentry in case of failure */ + new_file = dentry_open(new_dentry, new_mnt, O_WRONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(new): %p", new_file); + if (!new_file) + goto out_fput_old; + + size = i_size_read(old_file->f_dentry->d_inode); + ret = vfs_sendfile(new_file, old_file, NULL, size, 0); + vxdprintk(VXD_CBIT(misc, 2), "vfs_sendfile: %d", ret); + + if (ret < 0) + goto out_fput_both; + + ret = vfs_rename(dir_nd.dentry->d_inode, new_dentry, + old_nd.dentry->d_parent->d_inode, old_dentry); + vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret); + if (!ret) { + res = new_dentry; + dget(new_dentry); + } + +out_fput_both: + vxdprintk(VXD_CBIT(misc, 3), + "fput(new_file=%p[#%d])", new_file, + atomic_read(&new_file->f_count)); + fput(new_file); + +out_fput_old: + vxdprintk(VXD_CBIT(misc, 3), + "fput(old_file=%p[#%d])", old_file, + atomic_read(&old_file->f_count)); + fput(old_file); + +out_rel_both: + mutex_unlock(&dir_nd.dentry->d_inode->i_mutex); + dput(new_dentry); + + path_release(&dir_nd); +out_rel_old: + path_release(&old_nd); + kfree(path); + return res; +} + +#endif + /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) { diff -NurpP --minimal linux-2.6.19.1/fs/namespace.c linux-2.6.19.1-vs2.2.0-rc6/fs/namespace.c --- linux-2.6.19.1/fs/namespace.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/namespace.c 2006-12-06 05:50:27 +0100 @@ -25,6 +25,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include "pnode.h" @@ -241,6 +245,7 @@ static struct vfsmount *clone_mnt(struct mnt->mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; + mnt->mnt_tag = old->mnt_tag; if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -349,43 +354,85 @@ static inline void mangle(struct seq_fil seq_escape(m, s, " \t\n\\"); } +static int mnt_is_reachable(struct vfsmount *mnt) +{ + struct vfsmount *root_mnt; + struct dentry *root, *point; + int ret; + + if (mnt == mnt->mnt_namespace->root) + return 1; + + spin_lock(&dcache_lock); + root_mnt = current->fs->rootmnt; + root = current->fs->root; + point = root; + + while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) { + point = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + } + + ret = (mnt == root_mnt) && is_subdir(point, root); + + spin_unlock(&dcache_lock); + + return ret; +} + static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = v; int err = 0; static struct proc_fs_info { - int flag; - char *str; + int s_flag; + int mnt_flag; + char *set_str; + char *unset_str; } fs_info[] = { - { MS_SYNCHRONOUS, ",sync" }, - { MS_DIRSYNC, ",dirsync" }, - { MS_MANDLOCK, ",mand" }, - { 0, NULL } - }; - static struct proc_fs_info mnt_info[] = { - { MNT_NOSUID, ",nosuid" }, - { MNT_NODEV, ",nodev" }, - { MNT_NOEXEC, ",noexec" }, - { MNT_NOATIME, ",noatime" }, - { MNT_NODIRATIME, ",nodiratime" }, - { 0, NULL } + { MS_RDONLY, MNT_RDONLY, "ro", "rw" }, + { MS_SYNCHRONOUS, 0, ",sync", NULL }, + { MS_DIRSYNC, 0, ",dirsync", NULL }, + { MS_MANDLOCK, 0, ",mand", NULL }, + { MS_TAGGED, 0, ",tag", NULL }, + { MS_NOATIME, MNT_NOATIME, ",noatime", NULL }, + { MS_NODIRATIME, MNT_NODIRATIME, ",nodiratime", NULL }, + { 0, MNT_NOSUID, ",nosuid", NULL }, + { 0, MNT_NODEV, ",nodev", NULL }, + { 0, MNT_NOEXEC, ",noexec", NULL }, + { 0, 0, NULL, NULL } }; - struct proc_fs_info *fs_infop; + struct proc_fs_info *p; + unsigned long s_flags = mnt->mnt_sb->s_flags; + int mnt_flags = mnt->mnt_flags; - mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); - seq_putc(m, ' '); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); - seq_putc(m, ' '); - mangle(m, mnt->mnt_sb->s_type->name); - seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); - for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_sb->s_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P)) + return 0; + + if (!vx_check(0, VS_ADMIN|VS_WATCH) && + mnt == current->fs->rootmnt) { + seq_puts(m, "/dev/root / "); + } else { + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + seq_putc(m, ' '); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); } - for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); + mangle(m, mnt->mnt_sb->s_type->name); + seq_putc(m, ' '); + for (p = fs_info; (p->s_flag | p->mnt_flag) ; p++) { + if ((s_flags & p->s_flag) || (mnt_flags & p->mnt_flag)) { + if (p->set_str) + seq_puts(m, p->set_str); + } else { + if (p->unset_str) + seq_puts(m, p->unset_str); + } } + if (mnt->mnt_flags & MNT_TAGID) + seq_printf(m, ",tag=%d", mnt->mnt_tag); if (mnt->mnt_sb->s_op->show_options) err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); @@ -404,17 +451,27 @@ static int show_vfsstat(struct seq_file struct vfsmount *mnt = v; int err = 0; - /* device */ - if (mnt->mnt_devname) { - seq_puts(m, "device "); - mangle(m, mnt->mnt_devname); - } else - seq_puts(m, "no device"); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P)) + return 0; - /* mount point */ - seq_puts(m, " mounted on "); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); - seq_putc(m, ' '); + if (!vx_check(0, VS_ADMIN|VS_WATCH) && + mnt == current->fs->rootmnt) { + seq_puts(m, "device /dev/root mounted on / "); + } else { + /* device */ + if (mnt->mnt_devname) { + seq_puts(m, "device "); + mangle(m, mnt->mnt_devname); + } else + seq_puts(m, "no device"); + + /* mount point */ + seq_puts(m, " mounted on "); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + } /* file system type */ seq_puts(m, "with fstype "); @@ -644,7 +701,7 @@ asmlinkage long sys_umount(char __user * goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) goto dput_and_out; retval = do_umount(nd.mnt, flags); @@ -668,7 +725,7 @@ asmlinkage long sys_oldumount(char __use static int mount_is_safe(struct nameidata *nd) { - if (capable(CAP_SYS_ADMIN)) + if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) return 0; return -EPERM; #ifdef notyet @@ -897,11 +954,13 @@ static int do_change_type(struct nameida /* * do loopback mount. */ -static int do_loopback(struct nameidata *nd, char *old_name, int recurse) +static int do_loopback(struct nameidata *nd, char *old_name, tag_t tag, + unsigned long flags, int mnt_flags) { struct nameidata old_nd; struct vfsmount *mnt = NULL; int err = mount_is_safe(nd); + int recurse = flags & MS_REC; if (err) return err; if (!old_name || !*old_name) @@ -927,6 +986,12 @@ static int do_loopback(struct nameidata if (!mnt) goto out; + mnt->mnt_flags = mnt_flags; + if (flags & MS_TAGID) { + mnt->mnt_tag = tag; + mnt->mnt_flags |= MNT_TAGID; + } + err = graft_tree(mnt, nd); if (err) { LIST_HEAD(umount_list); @@ -935,6 +1000,7 @@ static int do_loopback(struct nameidata spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } + mnt->mnt_flags = mnt_flags; out: up_write(&namespace_sem); @@ -948,12 +1014,12 @@ out: * on it - tough luck. */ static int do_remount(struct nameidata *nd, int flags, int mnt_flags, - void *data) + void *data, xid_t xid) { int err; struct super_block *sb = nd->mnt->mnt_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT)) return -EPERM; if (!check_mnt(nd->mnt)) @@ -987,7 +1053,7 @@ static int do_move_mount(struct nameidat struct nameidata old_nd, parent_nd; struct vfsmount *p; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -1067,7 +1133,7 @@ static int do_new_mount(struct nameidata return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) return -EPERM; mnt = do_kern_mount(type, flags, name, data); @@ -1379,6 +1445,7 @@ long do_mount(char *dev_name, char *dir_ struct nameidata nd; int retval = 0; int mnt_flags = 0; + tag_t tag = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -1394,7 +1461,19 @@ long do_mount(char *dev_name, char *dir_ if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; +#ifdef CONFIG_PROPAGATE + retval = dx_parse_tag(data_page, &tag, 1); + if (retval) { + mnt_flags |= MNT_TAGID; + /* bind and re-mounts get the tag flag */ + if (flags & (MS_BIND|MS_REMOUNT)) + flags |= MS_TAGID; + } +#endif + /* Separate the per-mountpoint flags */ + if (flags & MS_RDONLY) + mnt_flags |= MNT_RDONLY; if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; if (flags & MS_NODEV) @@ -1406,6 +1485,8 @@ long do_mount(char *dev_name, char *dir_ if (flags & MS_NODIRATIME) mnt_flags |= MNT_NODIRATIME; + if (!capable(CAP_SYS_ADMIN)) + mnt_flags |= MNT_NODEV; flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_NOATIME | MS_NODIRATIME); @@ -1420,9 +1501,9 @@ long do_mount(char *dev_name, char *dir_ if (flags & MS_REMOUNT) retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); + data_page, tag); else if (flags & MS_BIND) - retval = do_loopback(&nd, dev_name, flags & MS_REC); + retval = do_loopback(&nd, dev_name, tag, flags, mnt_flags); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&nd, flags); else if (flags & MS_MOVE) @@ -1520,7 +1601,7 @@ int copy_namespace(int flags, struct tas if (!(flags & CLONE_NEWNS)) return 0; - if (!capable(CAP_SYS_ADMIN)) { + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) { err = -EPERM; goto out; } diff -NurpP --minimal linux-2.6.19.1/fs/nfs/client.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/client.c --- linux-2.6.19.1/fs/nfs/client.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/client.c 2006-11-08 04:57:47 +0100 @@ -520,6 +520,9 @@ static int nfs_init_server_rpcclient(str if (server->flags & NFS4_MOUNT_INTR) server->client->cl_intr = 1; + server->client->cl_tag = 0; + if (server->flags & NFS_MOUNT_TAGGED) + server->client->cl_tag = 1; return 0; } @@ -676,6 +679,10 @@ static void nfs_server_set_fsinfo(struct server->acdirmin = server->acdirmax = 0; } + /* FIXME: needs fsinfo + if (server->flags & NFS_MOUNT_TAGGED) + sb->s_flags |= MS_TAGGED; */ + server->maxfilesize = fsinfo->maxfilesize; /* We're airborne Set socket buffersize */ diff -NurpP --minimal linux-2.6.19.1/fs/nfs/dir.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/dir.c --- linux-2.6.19.1/fs/nfs/dir.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/dir.c 2006-11-30 19:09:28 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -933,6 +934,7 @@ static struct dentry *nfs_lookup(struct if (IS_ERR(res)) goto out_unlock; + dx_propagate_tag(nd, inode); no_entry: res = d_materialise_unique(dentry, inode); if (res != NULL) { @@ -975,7 +977,8 @@ static int is_atomic_open(struct inode * if (nd->flags & LOOKUP_DIRECTORY) return 0; /* Are we trying to write to a read only partition? */ - if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + if ((IS_RDONLY(dir) || MNT_IS_RDONLY(nd->mnt)) && + (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) return 0; return 1; } diff -NurpP --minimal linux-2.6.19.1/fs/nfs/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/inode.c --- linux-2.6.19.1/fs/nfs/inode.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/inode.c 2006-11-30 18:53:18 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -279,8 +280,10 @@ nfs_fhget(struct super_block *sb, struct nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0); + /* maybe fattr->xid someday */ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* * report the blocks in 512byte units @@ -369,6 +372,8 @@ void nfs_setattr_update_inode(struct ino inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode)) + inode->i_tag = attr->ia_tag; spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); @@ -778,6 +783,9 @@ static int nfs_check_inode_attributes(st struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; int data_unstable; + uid_t uid; + gid_t gid; + tag_t tag; /* Has the inode gone and changed behind our back? */ @@ -805,10 +813,15 @@ static int nfs_check_inode_attributes(st if (cur_size != new_isize && nfsi->npages == 0) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid); + gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid); + tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0); + /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) - || inode->i_uid != fattr->uid - || inode->i_gid != fattr->gid) + || inode->i_uid != uid + || inode->i_gid != gid + || inode->i_tag != tag) nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ @@ -898,6 +911,9 @@ static int nfs_update_inode(struct inode loff_t cur_isize, new_isize; unsigned int invalid = 0; int data_stable; + uid_t uid; + gid_t gid; + tag_t tag; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __FUNCTION__, inode->i_sb->s_id, inode->i_ino, @@ -970,15 +986,21 @@ static int nfs_update_inode(struct inode } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid); + gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid); + tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0); + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || - inode->i_uid != fattr->uid || - inode->i_gid != fattr->gid) + inode->i_uid != uid || + inode->i_gid != gid || + inode->i_tag != tag) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = uid; + inode->i_gid = gid; + inode->i_tag = tag; if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* diff -NurpP --minimal linux-2.6.19.1/fs/nfs/nfs3xdr.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/nfs3xdr.c --- linux-2.6.19.1/fs/nfs/nfs3xdr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/nfs3xdr.c 2006-11-30 18:53:18 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -178,7 +179,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_f } static inline __be32 * -xdr_encode_sattr(__be32 *p, struct iattr *attr) +xdr_encode_sattr(__be32 *p, struct iattr *attr, int tag) { if (attr->ia_valid & ATTR_MODE) { *p++ = xdr_one; @@ -186,15 +187,17 @@ xdr_encode_sattr(__be32 *p, struct iattr } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_UID) { + if (attr->ia_valid & ATTR_UID || + (tag && (attr->ia_valid & ATTR_TAG))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_uid); + *p++ = htonl(TAGINO_UID(tag, attr->ia_uid, attr->ia_tag)); } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_GID) { + if (attr->ia_valid & ATTR_GID || + (tag && (attr->ia_valid & ATTR_TAG))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_gid); + *p++ = htonl(TAGINO_GID(tag, attr->ia_gid, attr->ia_tag)); } else { *p++ = xdr_zero; } @@ -279,7 +282,8 @@ static int nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); *p++ = htonl(args->guard); if (args->guard) p = xdr_encode_time3(p, &args->guardtime); @@ -370,7 +374,8 @@ nfs3_xdr_createargs(struct rpc_rqst *req *p++ = args->verifier[0]; *p++ = args->verifier[1]; } else - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -384,7 +389,8 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req, { p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -397,7 +403,8 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *re { p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); *p++ = htonl(args->pathlen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); @@ -415,7 +422,8 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); *p++ = htonl(args->type); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); if (args->type == NF3CHR || args->type == NF3BLK) { *p++ = htonl(MAJOR(args->rdev)); *p++ = htonl(MINOR(args->rdev)); diff -NurpP --minimal linux-2.6.19.1/fs/nfs/nfsroot.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/nfsroot.c --- linux-2.6.19.1/fs/nfs/nfsroot.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/nfsroot.c 2006-12-02 01:37:05 +0100 @@ -118,12 +118,12 @@ static int mount_port __initdata = 0; / enum { /* Options that take integer arguments */ Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin, - Opt_acregmax, Opt_acdirmin, Opt_acdirmax, + Opt_acregmax, Opt_acdirmin, Opt_acdirmax, Opt_tagid, /* Options that take no arguments */ Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, - Opt_acl, Opt_noacl, + Opt_acl, Opt_noacl, Opt_tag, Opt_notag, /* Error token */ Opt_err }; @@ -160,6 +160,10 @@ static match_table_t __initdata tokens = {Opt_tcp, "tcp"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, + {Opt_tag, "tagxid"}, {Opt_err, NULL} }; @@ -274,6 +278,20 @@ static int __init root_nfs_parse(char *n case Opt_noacl: nfs_data.flags |= NFS_MOUNT_NOACL; break; +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + nfs_data.flags |= NFS_MOUNT_TAGGED; + break; + case Opt_notag: + nfs_data.flags &= ~NFS_MOUNT_TAGGED; + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + nfs_data.flags |= NFS_MOUNT_TAGGED; + break; +#endif default: printk(KERN_WARNING "Root-NFS: unknown " "option: %s\n", p); diff -NurpP --minimal linux-2.6.19.1/fs/nfs/super.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/super.c --- linux-2.6.19.1/fs/nfs/super.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfs/super.c 2006-11-30 18:53:18 +0100 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -290,6 +291,7 @@ static void nfs_show_mount_options(struc { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, + { NFS_MOUNT_TAGGED, ",tag", "" }, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; diff -NurpP --minimal linux-2.6.19.1/fs/nfsd/auth.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/auth.c --- linux-2.6.19.1/fs/nfsd/auth.c 2006-06-18 04:54:42 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/auth.c 2006-11-30 18:53:18 +0100 @@ -9,6 +9,7 @@ #include #include #include +#include #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) @@ -41,19 +42,22 @@ int nfsd_setuser(struct svc_rqst *rqstp, get_group_info(cred.cr_group_info); if (cred.cr_uid != (uid_t) -1) - current->fsuid = cred.cr_uid; + current->fsuid = INOTAG_UID(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid); else current->fsuid = exp->ex_anon_uid; if (cred.cr_gid != (gid_t) -1) - current->fsgid = cred.cr_gid; + current->fsgid = INOTAG_GID(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid); else current->fsgid = exp->ex_anon_gid; + /* this desperately needs a tag :) */ + current->xid = (xid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0); + if (!cred.cr_group_info) return -ENOMEM; ret = set_current_groups(cred.cr_group_info); put_group_info(cred.cr_group_info); - if ((cred.cr_uid)) { + if (INOTAG_UID(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid)) { cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; } else { cap_t(current->cap_effective) |= (CAP_NFSD_MASK & diff -NurpP --minimal linux-2.6.19.1/fs/nfsd/nfs3xdr.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/nfs3xdr.c --- linux-2.6.19.1/fs/nfsd/nfs3xdr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/nfs3xdr.c 2006-11-30 18:53:18 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -111,6 +112,8 @@ static inline __be32 * decode_sattr3(__be32 *p, struct iattr *iap) { u32 tmp; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -120,12 +123,15 @@ decode_sattr3(__be32 *p, struct iattr *i } if (*p++) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = ntohl(*p++); + uid = ntohl(*p++); } if (*p++) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = ntohl(*p++); + gid = ntohl(*p++); } + iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid); + iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid); + iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0); if (*p++) { u64 newsize; @@ -163,8 +169,10 @@ encode_fattr3(struct svc_rqst *rqstp, __ *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag))); if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { diff -NurpP --minimal linux-2.6.19.1/fs/nfsd/nfs4recover.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/nfs4recover.c --- linux-2.6.19.1/fs/nfsd/nfs4recover.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/nfs4recover.c 2006-11-08 21:53:01 +0100 @@ -156,7 +156,7 @@ nfsd4_create_clid_dir(struct nfs4_client dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); goto out_put; } - status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); + status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU, NULL); out_put: dput(dentry); out_unlock: @@ -260,7 +260,7 @@ nfsd4_remove_clid_file(struct dentry *di return -EINVAL; } mutex_lock(&dir->d_inode->i_mutex); - status = vfs_unlink(dir->d_inode, dentry); + status = vfs_unlink(dir->d_inode, dentry, NULL); mutex_unlock(&dir->d_inode->i_mutex); return status; } @@ -275,7 +275,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, * a kernel from the future.... */ nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - status = vfs_rmdir(dir->d_inode, dentry); + status = vfs_rmdir(dir->d_inode, dentry, NULL); mutex_unlock(&dir->d_inode->i_mutex); return status; } diff -NurpP --minimal linux-2.6.19.1/fs/nfsd/nfs4xdr.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/nfs4xdr.c --- linux-2.6.19.1/fs/nfsd/nfs4xdr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/nfs4xdr.c 2006-11-30 18:53:18 +0100 @@ -57,6 +57,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -1727,14 +1728,18 @@ out_acl: WRITE32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); + status = nfsd4_encode_user(rqstp, + TAGINO_UID(DX_TAG(dentry->d_inode), + stat.uid, stat.tag), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); + status = nfsd4_encode_group(rqstp, + TAGINO_GID(DX_TAG(dentry->d_inode), + stat.gid, stat.tag), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) diff -NurpP --minimal linux-2.6.19.1/fs/nfsd/nfsxdr.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/nfsxdr.c --- linux-2.6.19.1/fs/nfsd/nfsxdr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/nfsxdr.c 2006-11-30 18:53:18 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -102,6 +103,8 @@ static inline __be32 * decode_sattr(__be32 *p, struct iattr *iap) { u32 tmp, tmp1; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -115,12 +118,15 @@ decode_sattr(__be32 *p, struct iattr *ia } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = tmp; + uid = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = tmp; + gid = tmp; } + iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid); + iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid); + iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0); if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; iap->ia_size = tmp; @@ -164,8 +170,10 @@ encode_fattr(struct svc_rqst *rqstp, __b *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag))); if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); diff -NurpP --minimal linux-2.6.19.1/fs/nfsd/vfs.c linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/vfs.c --- linux-2.6.19.1/fs/nfsd/vfs.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/nfsd/vfs.c 2006-11-20 21:12:32 +0100 @@ -1183,13 +1183,13 @@ nfsd_create(struct svc_rqst *rqstp, stru host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); break; case S_IFDIR: - host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); + host_err = vfs_mkdir(dirp, dchild, iap->ia_mode, NULL); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: - host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); + host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev, NULL); break; default: printk("nfsd: bad file type %o in nfsd_create\n", type); @@ -1474,11 +1474,13 @@ nfsd_symlink(struct svc_rqst *rqstp, str else { strncpy(path_alloced, path, plen); path_alloced[plen] = 0; - host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); + host_err = vfs_symlink(dentry->d_inode, dnew, + path_alloced, mode, NULL); kfree(path_alloced); } } else - host_err = vfs_symlink(dentry->d_inode, dnew, path, mode); + host_err = vfs_symlink(dentry->d_inode, dnew, + path, mode, NULL); if (!host_err) { if (EX_ISSYNC(fhp->fh_export)) @@ -1537,7 +1539,7 @@ nfsd_link(struct svc_rqst *rqstp, struct dold = tfhp->fh_dentry; dest = dold->d_inode; - host_err = vfs_link(dold, dirp, dnew); + host_err = vfs_link(dold, dirp, dnew, NULL); if (!host_err) { if (EX_ISSYNC(ffhp->fh_export)) { err = nfserrno(nfsd_sync_dir(ddir)); @@ -1702,9 +1704,9 @@ nfsd_unlink(struct svc_rqst *rqstp, stru host_err = -EPERM; } else #endif - host_err = vfs_unlink(dirp, rdentry); + host_err = vfs_unlink(dirp, rdentry, NULL); } else { /* It's RMDIR */ - host_err = vfs_rmdir(dirp, rdentry); + host_err = vfs_rmdir(dirp, rdentry, NULL); } dput(rdentry); @@ -1815,7 +1817,8 @@ nfsd_permission(struct svc_export *exp, */ if (!(acc & MAY_LOCAL_ACCESS)) if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { - if (EX_RDONLY(exp) || IS_RDONLY(inode)) + if (EX_RDONLY(exp) || IS_RDONLY(inode) + || MNT_IS_RDONLY(exp->ex_mnt)) return nfserr_rofs; if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) return nfserr_perm; diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/dlm/dlmfs.c linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/dlm/dlmfs.c --- linux-2.6.19.1/fs/ocfs2/dlm/dlmfs.c 2006-11-30 21:19:27 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/dlm/dlmfs.c 2006-12-02 01:14:52 +0100 @@ -44,6 +44,7 @@ #include #include #include +#include #include @@ -335,6 +336,7 @@ static struct inode *dlmfs_get_root_inod inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; + inode->i_tag = dx_current_fstag(sb); inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; @@ -361,6 +363,7 @@ static struct inode *dlmfs_get_inode(str inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; + inode->i_tag = dx_current_fstag(sb); inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/dlmglue.c linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/dlmglue.c --- linux-2.6.19.1/fs/ocfs2/dlmglue.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/dlmglue.c 2006-12-02 01:19:57 +0100 @@ -1326,6 +1326,7 @@ static void __ocfs2_stuff_meta_lvb(struc lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); lvb->lvb_iuid = cpu_to_be32(inode->i_uid); lvb->lvb_igid = cpu_to_be32(inode->i_gid); + lvb->lvb_itag = cpu_to_be16(inode->i_tag); lvb->lvb_imode = cpu_to_be16(inode->i_mode); lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); lvb->lvb_iatime_packed = @@ -1379,6 +1380,7 @@ static void ocfs2_refresh_inode_from_lvb inode->i_uid = be32_to_cpu(lvb->lvb_iuid); inode->i_gid = be32_to_cpu(lvb->lvb_igid); + inode->i_tag = be16_to_cpu(lvb->lvb_itag); inode->i_mode = be16_to_cpu(lvb->lvb_imode); inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); ocfs2_unpack_timespec(&inode->i_atime, diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/dlmglue.h linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/dlmglue.h --- linux-2.6.19.1/fs/ocfs2/dlmglue.h 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/dlmglue.h 2006-12-02 01:14:52 +0100 @@ -34,7 +34,7 @@ struct ocfs2_meta_lvb { __u8 lvb_version; __u8 lvb_reserved0; - __be16 lvb_reserved1; + __be16 lvb_itag; __be32 lvb_iclusters; __be32 lvb_iuid; __be32 lvb_igid; diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/file.c linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/file.c --- linux-2.6.19.1/fs/ocfs2/file.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/file.c 2006-11-08 04:57:52 +0100 @@ -800,13 +800,15 @@ int ocfs2_setattr(struct dentry *dentry, mlog(0, "uid change: %d\n", attr->ia_uid); if (attr->ia_valid & ATTR_GID) mlog(0, "gid change: %d\n", attr->ia_gid); + if (attr->ia_valid & ATTR_TAG) + mlog(0, "tag change: %d\n", attr->ia_tag); if (attr->ia_valid & ATTR_SIZE) mlog(0, "size change...\n"); if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) mlog(0, "time change...\n"); #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ - | ATTR_GID | ATTR_UID | ATTR_MODE) + | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE) if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid); return 0; @@ -1220,6 +1222,7 @@ bail: struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, + .sync_flags = ocfs2_sync_flags, }; struct inode_operations ocfs2_special_file_iops = { diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/inode.c --- linux-2.6.19.1/fs/ocfs2/inode.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/inode.c 2006-12-02 01:35:31 +0100 @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -43,6 +44,7 @@ #include "file.h" #include "heartbeat.h" #include "inode.h" +#include "ioctl.h" #include "journal.h" #include "namei.h" #include "suballoc.h" @@ -78,6 +80,10 @@ void ocfs2_set_inode_flags(struct inode if (flags & OCFS2_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & OCFS2_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & OCFS2_BARRIER_FL) + inode->i_flags |= S_BARRIER; if (flags & OCFS2_SYNC_FL) inode->i_flags |= S_SYNC; @@ -89,6 +95,39 @@ void ocfs2_set_inode_flags(struct inode inode->i_flags |= S_DIRSYNC; } +int ocfs2_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + + oldflags = OCFS2_I(inode)->ip_flags; + newflags = oldflags & ~(OCFS2_APPEND_FL | + OCFS2_IMMUTABLE_FL | OCFS2_IUNLINK_FL | + OCFS2_BARRIER_FL | OCFS2_NOATIME_FL | + OCFS2_SYNC_FL | OCFS2_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= OCFS2_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= OCFS2_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= OCFS2_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= OCFS2_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= OCFS2_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= OCFS2_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= OCFS2_DIRSYNC_FL; + + if (oldflags ^ newflags) + return ocfs2_set_inode_attr(inode, + newflags, OCFS2_FL_MASK); + return 0; +} + struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, u64 blkno, int delete_vote) @@ -236,6 +275,8 @@ int ocfs2_populate_inode(struct inode *i struct super_block *sb; struct ocfs2_super *osb; int status = -EINVAL; + uid_t uid; + gid_t gid; mlog_entry("(0x%p, size:%llu)\n", inode, (unsigned long long)fe->i_size); @@ -267,8 +308,12 @@ int ocfs2_populate_inode(struct inode *i inode->i_generation = le32_to_cpu(fe->i_generation); inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); inode->i_mode = le16_to_cpu(fe->i_mode); - inode->i_uid = le32_to_cpu(fe->i_uid); - inode->i_gid = le32_to_cpu(fe->i_gid); + uid = le32_to_cpu(fe->i_uid); + gid = le32_to_cpu(fe->i_gid); + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, + /* le16_to_cpu(raw_inode->i_raw_tag)i */ 0); /* Fast symlinks will have i_size but no allocated clusters. */ if (S_ISLNK(inode->i_mode) && !fe->i_clusters) @@ -1228,8 +1273,11 @@ int ocfs2_mark_inode_dirty(struct ocfs2_ fe->i_size = cpu_to_le64(i_size_read(inode)); fe->i_links_count = cpu_to_le16(inode->i_nlink); - fe->i_uid = cpu_to_le32(inode->i_uid); - fe->i_gid = cpu_to_le32(inode->i_gid); + fe->i_uid = cpu_to_le32(TAGINO_UID(DX_TAG(inode), + inode->i_uid, inode->i_tag)); + fe->i_gid = cpu_to_le32(TAGINO_GID(DX_TAG(inode), + inode->i_gid, inode->i_tag)); + /* i_tag = = cpu_to_le16(inode->i_tag); */ fe->i_mode = cpu_to_le16(inode->i_mode); fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec); fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); @@ -1257,15 +1305,24 @@ leave: void ocfs2_refresh_inode(struct inode *inode, struct ocfs2_dinode *fe) { + uid_t uid; + gid_t gid; + spin_lock(&OCFS2_I(inode)->ip_lock); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); + /* OCFS2_I(inode)->ip_flags &= ~OCFS2_FL_MASK; + OCFS2_I(inode)->ip_flags |= le32_to_cpu(fe->i_flags) & OCFS2_FL_MASK; */ ocfs2_set_inode_flags(inode); i_size_write(inode, le64_to_cpu(fe->i_size)); inode->i_nlink = le16_to_cpu(fe->i_links_count); - inode->i_uid = le32_to_cpu(fe->i_uid); - inode->i_gid = le32_to_cpu(fe->i_gid); + uid = le32_to_cpu(fe->i_uid); + gid = le32_to_cpu(fe->i_gid); + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, + /* le16_to_cpu(raw_inode->i_raw_tag)i */ 0); inode->i_mode = le16_to_cpu(fe->i_mode); if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0) inode->i_blocks = 0; diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/inode.h linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/inode.h --- linux-2.6.19.1/fs/ocfs2/inode.h 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/inode.h 2006-11-08 04:57:46 +0100 @@ -150,5 +150,6 @@ int ocfs2_aio_read(struct file *file, st int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); void ocfs2_set_inode_flags(struct inode *inode); +int ocfs2_sync_flags(struct inode *inode); #endif /* OCFS2_INODE_H */ diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/ioctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/ioctl.c --- linux-2.6.19.1/fs/ocfs2/ioctl.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/ioctl.c 2006-12-02 01:28:36 +0100 @@ -38,7 +38,7 @@ static int ocfs2_get_inode_attr(struct i return status; } -static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, +int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, unsigned mask) { struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode); diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/ioctl.h linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/ioctl.h --- linux-2.6.19.1/fs/ocfs2/ioctl.h 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/ioctl.h 2006-12-02 01:29:36 +0100 @@ -10,6 +10,9 @@ #ifndef OCFS2_IOCTL_H #define OCFS2_IOCTL_H +int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, + unsigned mask); + int ocfs2_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg); diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/namei.c linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/namei.c --- linux-2.6.19.1/fs/ocfs2/namei.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/namei.c 2006-11-08 04:57:52 +0100 @@ -40,6 +40,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_NAMEI #include @@ -497,6 +498,9 @@ static int ocfs2_mknod_locked(struct ocf u64 fe_blkno = 0; u16 suballoc_bit; struct inode *inode = NULL; + uid_t uid; + gid_t gid; + tag_t tag; mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, (unsigned long)dev, dentry->d_name.len, @@ -556,13 +560,19 @@ static int ocfs2_mknod_locked(struct ocf fe->i_blkno = cpu_to_le64(fe_blkno); fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); - fe->i_uid = cpu_to_le32(current->fsuid); + + tag = dx_current_fstag(osb->sb); + uid = current->fsuid; if (dir->i_mode & S_ISGID) { - fe->i_gid = cpu_to_le32(dir->i_gid); + gid = dir->i_gid; if (S_ISDIR(mode)) mode |= S_ISGID; } else - fe->i_gid = cpu_to_le32(current->fsgid); + gid = current->fsgid; + + fe->i_uid = cpu_to_le32(TAGINO_UID(DX_TAG(inode), uid, tag)); + fe->i_gid = cpu_to_le32(TAGINO_GID(DX_TAG(inode), gid, tag)); + inode->i_tag = tag; fe->i_mode = cpu_to_le16(mode); if (S_ISCHR(mode) || S_ISBLK(mode)) fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); @@ -2300,4 +2310,5 @@ struct inode_operations ocfs2_dir_iops = .rename = ocfs2_rename, .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, + .sync_flags = ocfs2_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/ocfs2.h linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/ocfs2.h --- linux-2.6.19.1/fs/ocfs2/ocfs2.h 2006-09-20 16:58:35 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/ocfs2.h 2006-11-08 04:57:52 +0100 @@ -174,6 +174,7 @@ enum ocfs2_mount_options OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ + OCFS2_MOUNT_TAGGED = 1 << 8, /* use tagging */ }; #define OCFS2_OSB_SOFT_RO 0x0001 diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/ocfs2_fs.h linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/ocfs2_fs.h --- linux-2.6.19.1/fs/ocfs2/ocfs2_fs.h 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/ocfs2_fs.h 2006-12-02 01:33:58 +0100 @@ -125,8 +125,12 @@ #define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ #define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ +#define OCFS2_BARRIER_FL (0x04000000) /* Barrier for chroot() */ +#define OCFS2_IUNLINK_FL (0x08000000) /* Immutable unlink */ + #define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ #define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ +#define OCFS2_FL_MASK (0x0F0100FF) /* * ioctl commands diff -NurpP --minimal linux-2.6.19.1/fs/ocfs2/super.c linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/super.c --- linux-2.6.19.1/fs/ocfs2/super.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/ocfs2/super.c 2006-11-08 04:57:52 +0100 @@ -141,6 +141,7 @@ enum { Opt_hb_local, Opt_data_ordered, Opt_data_writeback, + Opt_tag, Opt_notag, Opt_tagid, Opt_err, }; @@ -154,6 +155,10 @@ static match_table_t tokens = { {Opt_hb_local, OCFS2_HB_LOCAL}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, + {Opt_tag, "tag"}, + {Opt_tag, "tagxid"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, {Opt_err, NULL} }; @@ -362,6 +367,14 @@ static int ocfs2_remount(struct super_bl goto out; } + printk("ocfs2_remount: %lx,%lx\n", osb->s_mount_opt, sb->s_flags); + if ((parsed_options & OCFS2_MOUNT_TAGGED) && + !(sb->s_flags & MS_TAGGED)) { + ret = -EINVAL; + mlog(ML_ERROR, "Cannot change tagging on remount\n"); + goto out; + } + if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != (parsed_options & OCFS2_MOUNT_HB_LOCAL)) { ret = -EINVAL; @@ -635,6 +648,9 @@ static int ocfs2_fill_super(struct super ocfs2_complete_mount_recovery(osb); + if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED) + sb->s_flags |= MS_TAGGED; + printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %d, slot %d) " "with %s data mode.\n", osb->dev_str, osb->node_num, osb->slot_num, @@ -747,6 +763,20 @@ static int ocfs2_parse_options(struct su case Opt_data_writeback: *mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; break; +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + *mount_opt |= OCFS2_MOUNT_TAGGED; + break; + case Opt_notag: + *mount_opt &= ~OCFS2_MOUNT_TAGGED; + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + *mount_opt |= OCFS2_MOUNT_TAGGED; + break; +#endif default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " diff -NurpP --minimal linux-2.6.19.1/fs/open.c linux-2.6.19.1-vs2.2.0-rc6/fs/open.c --- linux-2.6.19.1/fs/open.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/open.c 2006-11-30 18:53:18 +0100 @@ -27,22 +27,31 @@ #include #include #include +#include +#include +#include +#include +#include int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { int retval = -ENODEV; if (dentry) { + struct super_block *sb = dentry->d_sb; + retval = -ENOSYS; - if (dentry->d_sb->s_op->statfs) { + if (sb->s_op->statfs) { memset(buf, 0, sizeof(*buf)); retval = security_sb_statfs(dentry); if (retval) return retval; - retval = dentry->d_sb->s_op->statfs(dentry, buf); + retval = sb->s_op->statfs(dentry, buf); if (retval == 0 && buf->f_frsize == 0) buf->f_frsize = buf->f_bsize; } + if (!vx_check(0, VS_ADMIN|VS_WATCH)) + vx_vsi_statfs(sb, buf); } return retval; } @@ -246,7 +255,7 @@ static long do_sys_truncate(const char _ goto dput_and_out; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; error = -EPERM; @@ -395,7 +404,7 @@ asmlinkage long sys_faccessat(int dfd, c special_file(nd.dentry->d_inode->i_mode)) goto out_path_release; - if(IS_RDONLY(nd.dentry->d_inode)) + if(IS_RDONLY(nd.dentry->d_inode) || MNT_IS_RDONLY(nd.mnt)) res = -EROFS; out_path_release: @@ -509,7 +518,7 @@ asmlinkage long sys_fchmod(unsigned int audit_inode(NULL, inode); err = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(file->f_vfsmnt)) goto out_putf; err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -539,11 +548,11 @@ asmlinkage long sys_fchmodat(int dfd, co error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); if (error) goto out; - inode = nd.dentry->d_inode; - error = -EROFS; - if (IS_RDONLY(inode)) + error = cow_check_and_break(&nd); + if (error) goto dput_and_out; + inode = nd.dentry->d_inode; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -568,7 +577,8 @@ asmlinkage long sys_chmod(const char __u return sys_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(struct dentry * dentry, uid_t user, gid_t group) +static int chown_common(struct dentry *dentry, struct vfsmount *mnt, + uid_t user, gid_t group) { struct inode * inode; int error; @@ -580,7 +590,7 @@ static int chown_common(struct dentry * goto out; } error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) goto out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -588,11 +598,11 @@ static int chown_common(struct dentry * newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { newattrs.ia_valid |= ATTR_UID; - newattrs.ia_uid = user; + newattrs.ia_uid = dx_map_uid(user); } if (group != (gid_t) -1) { newattrs.ia_valid |= ATTR_GID; - newattrs.ia_gid = group; + newattrs.ia_gid = dx_map_gid(group); } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; @@ -611,7 +621,11 @@ asmlinkage long sys_chown(const char __u error = user_path_walk(filename, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); +#ifdef CONFIG_VSERVER_COWBL + error = cow_check_and_break(&nd); + if (!error) +#endif + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); out: return error; @@ -631,7 +645,11 @@ asmlinkage long sys_fchownat(int dfd, co error = __user_walk_fd(dfd, filename, follow, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); +#ifdef CONFIG_VSERVER_COWBL + error = cow_check_and_break(&nd); + if (!error) +#endif + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); out: return error; @@ -645,7 +663,11 @@ asmlinkage long sys_lchown(const char __ error = user_path_walk_link(filename, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); +#ifdef CONFIG_VSERVER_COWBL + error = cow_check_and_break(&nd); + if (!error) +#endif + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); out: return error; @@ -664,7 +686,7 @@ asmlinkage long sys_fchown(unsigned int dentry = file->f_dentry; audit_inode(NULL, dentry->d_inode); - error = chown_common(dentry, user, group); + error = chown_common(dentry, file->f_vfsmnt, user, group); fput(file); out: return error; @@ -892,6 +914,7 @@ repeat: FD_SET(fd, fdt->open_fds); FD_CLR(fd, fdt->close_on_exec); files->next_fd = fd + 1; + vx_openfd_inc(fd); #if 1 /* Sanity check */ if (fdt->fd[fd] != NULL) { @@ -914,6 +937,7 @@ static void __put_unused_fd(struct files __FD_CLR(fd, fdt->open_fds); if (fd < files->next_fd) files->next_fd = fd; + vx_openfd_dec(fd); } void fastcall put_unused_fd(unsigned int fd) diff -NurpP --minimal linux-2.6.19.1/fs/proc/array.c linux-2.6.19.1-vs2.2.0-rc6/fs/proc/array.c --- linux-2.6.19.1/fs/proc/array.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/proc/array.c 2006-12-13 08:27:08 +0100 @@ -75,6 +75,8 @@ #include #include #include +#include +#include #include #include @@ -134,8 +136,9 @@ static const char *task_state_array[] = "D (disk sleep)", /* 2 */ "T (stopped)", /* 4 */ "T (tracing stop)", /* 8 */ - "Z (zombie)", /* 16 */ - "X (dead)" /* 32 */ + "H (on hold)" /* 16 */ + "Z (zombie)", /* 32 */ + "X (dead)", /* 64 */ }; static inline const char * get_task_state(struct task_struct *tsk) @@ -144,7 +147,8 @@ static inline const char * get_task_stat TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE | TASK_STOPPED | - TASK_TRACED)) | + TASK_TRACED | + TASK_ONHOLD)) | (tsk->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)); const char **p = &task_state_array[0]; @@ -161,8 +165,16 @@ static inline char * task_state(struct t struct group_info *group_info; int g; struct fdtable *fdt = NULL; + pid_t pid, ptgid, tppid, tgid; rcu_read_lock(); + tgid = vx_map_tgid(p->tgid); + pid = vx_map_pid(p->pid); + ptgid = vx_map_pid(pid_alive(p) ? + rcu_dereference(p->real_parent)->tgid : 0); + tppid = vx_map_pid(pid_alive(p) && p->ptrace ? + rcu_dereference(p->parent)->pid : 0); + buffer += sprintf(buffer, "State:\t%s\n" "SleepAVG:\t%lu%%\n" @@ -174,9 +186,7 @@ static inline char * task_state(struct t "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), (p->sleep_avg/1024)*100/(1020000000/1024), - p->tgid, p->pid, - pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, - pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, + tgid, pid, (pid > 1) ? ptgid : 0, tppid, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); @@ -283,17 +293,26 @@ static inline char * task_sig(struct tas static inline char *task_cap(struct task_struct *p, char *buffer) { - return buffer + sprintf(buffer, "CapInh:\t%016x\n" - "CapPrm:\t%016x\n" - "CapEff:\t%016x\n", - cap_t(p->cap_inheritable), - cap_t(p->cap_permitted), - cap_t(p->cap_effective)); + struct vx_info *vxi = p->vx_info; + + return buffer + sprintf(buffer, + "CapInh:\t%016x\n" + "CapPrm:\t%016x\n" + "CapEff:\t%016x\n", + (unsigned)vx_info_mbcap(vxi, p->cap_inheritable), + (unsigned)vx_info_mbcap(vxi, p->cap_permitted), + (unsigned)vx_info_mbcap(vxi, p->cap_effective)); } int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; +#ifdef CONFIG_VSERVER_LEGACY + struct vx_info *vxi; +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + struct nx_info *nxi; +#endif struct mm_struct *mm = get_task_mm(task); buffer = task_name(task, buffer); @@ -306,6 +325,46 @@ int proc_pid_status(struct task_struct * buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); buffer = cpuset_task_status_allowed(task, buffer); + + if (task_vx_flags(task, VXF_HIDE_VINFO, 0)) + goto skip; +#ifdef CONFIG_VSERVER_LEGACY + buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task)); + vxi = task_get_vx_info(task); + if (vxi) { + buffer += sprintf (buffer,"ctxflags: %08llx\n" + ,(unsigned long long)vxi->vx_flags); + buffer += sprintf (buffer,"initpid: %d\n" + ,vxi->vx_initpid); + } else { + buffer += sprintf (buffer,"ctxflags: none\n"); + buffer += sprintf (buffer,"initpid: none\n"); + } + put_vx_info(vxi); +#else + buffer += sprintf (buffer,"VxID: %d\n", vx_task_xid(task)); +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + nxi = task_get_nx_info(task); + if (nxi) { + int i; + + buffer += sprintf (buffer,"ipv4root:"); + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer," %08x/%08x" + ,nxi->ipv4[i] + ,nxi->mask[i]); + } + *buffer++ = '\n'; + buffer += sprintf (buffer,"ipv4root_bcast: %08x\n" + ,nxi->v4_bcast); + } else { + buffer += sprintf (buffer,"ipv4root: 0\n"); + buffer += sprintf (buffer,"ipv4root_bcast: 0\n"); + } + put_nx_info(nxi); +#endif +skip: #if defined(CONFIG_S390) buffer = task_show_regs(task, buffer); #endif @@ -320,7 +379,7 @@ static int do_task_stat(struct task_stru sigset_t sigign, sigcatch; char state; int res; - pid_t ppid = 0, pgid = -1, sid = -1; + pid_t pid = 0, ppid = 0, pgid = -1, sid = -1; int num_threads = 0; struct mm_struct *mm; unsigned long long start_time; @@ -389,8 +448,10 @@ static int do_task_stat(struct task_stru } sid = sig->session; - pgid = process_group(task); - ppid = rcu_dereference(task->real_parent)->tgid; + pid = vx_info_map_pid(task->vx_info, task->pid); + pgid = vx_info_map_pid(task->vx_info, process_group(task)); + ppid = (pid > 1) ? vx_info_map_tgid(task->vx_info, + rcu_dereference(task->real_parent)->tgid) : 0; unlock_task_sighand(task, &flags); } @@ -418,10 +479,21 @@ static int do_task_stat(struct task_stru /* convert nsec -> ticks */ start_time = nsec_to_clock_t(start_time); + /* fixup start time for virt uptime */ + if (vx_flags(VXF_VIRT_UPTIME, 0)) { + unsigned long long bias = + current->vx_info->cvirt.bias_clock; + + if (start_time > bias) + start_time -= bias; + else + start_time = 0; + } + res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n", - task->pid, + pid, tcomm, state, ppid, diff -NurpP --minimal linux-2.6.19.1/fs/proc/base.c linux-2.6.19.1-vs2.2.0-rc6/fs/proc/base.c --- linux-2.6.19.1/fs/proc/base.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/proc/base.c 2006-12-04 06:53:21 +0100 @@ -73,6 +73,9 @@ #include #include #include +#include +#include + #include "internal.h" /* NOTE: @@ -971,6 +974,8 @@ static struct inode *proc_pid_make_inode inode->i_uid = task->euid; inode->i_gid = task->egid; } + /* procfs is xid tagged */ + inode->i_tag = (tag_t)vx_task_xid(task); security_task_to_inode(task, inode); out: @@ -1023,7 +1028,13 @@ static int pid_revalidate(struct dentry { struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); + int ret = 0; + if (task) { + if (!vx_proc_task_visible(task)) + goto out_put; + + ret = 1; if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { inode->i_uid = task->euid; @@ -1034,11 +1045,11 @@ static int pid_revalidate(struct dentry } inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); + out_put: put_task_struct(task); - return 1; } d_drop(dentry); - return 0; + return ret; } static int pid_delete_dentry(struct dentry * dentry) @@ -1404,6 +1415,13 @@ static struct dentry *proc_pident_lookup if (!task) goto out_no_task; + /* FIXME: maybe we can come up with a generic approach? */ + if (task_vx_flags(task, VXF_HIDE_VINFO, 0) && + (dentry->d_name.len == 5) && + (!memcmp(dentry->d_name.name, "vinfo", 5) || + !memcmp(dentry->d_name.name, "ninfo", 5))) + goto out; + /* * Yes, it does not scale. And it should not. Don't add * new entries into /proc// without very good reasons. @@ -1608,14 +1626,14 @@ static int proc_self_readlink(struct den int buflen) { char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -1709,7 +1727,7 @@ out_iput: static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) { struct dentry *error; - struct task_struct *task = get_proc_task(dir); + struct task_struct *task = get_proc_task_real(dir); struct pid_entry *p, *last; error = ERR_PTR(-ENOENT); @@ -1749,6 +1767,9 @@ static int proc_base_fill_cache(struct f static struct file_operations proc_task_operations; static struct inode_operations proc_task_inode_operations; +extern int proc_pid_vx_info(struct task_struct *, char *); +extern int proc_pid_nx_info(struct task_struct *, char *); + static struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, task), DIR("fd", S_IRUSR|S_IXUSR, fd), @@ -1786,6 +1807,8 @@ static struct pid_entry tgid_base_stuff[ #ifdef CONFIG_CPUSETS REG("cpuset", S_IRUGO, cpuset), #endif + INF("vinfo", S_IRUGO, pid_vx_info), + INF("ninfo", S_IRUGO, pid_nx_info), INF("oom_score", S_IRUGO, oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL @@ -1927,7 +1950,7 @@ struct dentry *proc_pid_lookup(struct in goto out; rcu_read_lock(); - task = find_task_by_pid(tgid); + task = vx_find_proc_task_by_pid(tgid); if (task) get_task_struct(task); rcu_read_unlock(); @@ -1991,7 +2014,7 @@ static int proc_pid_fill_cache(struct fi int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; - struct task_struct *reaper = get_proc_task(filp->f_dentry->d_inode); + struct task_struct *reaper = get_proc_task_real(filp->f_dentry->d_inode); struct task_struct *task; int tgid; @@ -2010,7 +2033,10 @@ int proc_pid_readdir(struct file * filp, put_task_struct(task), task = next_tgid(tgid + 1)) { tgid = task->pid; filp->f_pos = tgid + TGID_OFFSET; - if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) { + if (!vx_proc_task_visible(task)) + continue; + if (proc_pid_fill_cache(filp, dirent, filldir, task, + vx_map_tgid(tgid)) < 0) { put_task_struct(task); goto out; } @@ -2131,9 +2157,11 @@ static struct dentry *proc_task_lookup(s tid = name_to_int(dentry); if (tid == ~0U) goto out; + if (vx_current_initpid(tid)) + goto out; rcu_read_lock(); - task = find_task_by_pid(tid); + task = vx_find_proc_task_by_pid(tid); if (task) get_task_struct(task); rcu_read_unlock(); @@ -2268,7 +2296,10 @@ static int proc_task_readdir(struct file for (task = first_tid(leader, tid, pos - 2); task; task = next_tid(task), pos++) { - tid = task->pid; + tid = vx_map_pid(task->pid); + /* FIXME: could go away now! */ + if (!vx_proc_task_visible(task)) + continue; if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { /* returning this tgid failed, save it as the first * pid for the next readir call */ diff -NurpP --minimal linux-2.6.19.1/fs/proc/generic.c linux-2.6.19.1-vs2.2.0-rc6/fs/proc/generic.c --- linux-2.6.19.1/fs/proc/generic.c 2006-06-18 04:54:45 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/proc/generic.c 2006-11-08 04:57:41 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "internal.h" @@ -395,12 +396,16 @@ struct dentry *proc_lookup(struct inode for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) continue; + if (!vx_hide_check(0, de->vx_flags)) + continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino = de->low_ino; spin_unlock(&proc_subdir_lock); error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); + /* generic proc entries belong to the host */ + inode->i_tag = 0; spin_lock(&proc_subdir_lock); break; } @@ -476,12 +481,15 @@ int proc_readdir(struct file * filp, } do { + if (!vx_hide_check(0, de->vx_flags)) + goto skip; /* filldir passes info to user space */ spin_unlock(&proc_subdir_lock); if (filldir(dirent, de->name, de->namelen, filp->f_pos, de->low_ino, de->mode >> 12) < 0) goto out; spin_lock(&proc_subdir_lock); + skip: filp->f_pos++; de = de->next; } while (de); @@ -604,6 +612,7 @@ static struct proc_dir_entry *proc_creat ent->namelen = len; ent->mode = mode; ent->nlink = nlink; + ent->vx_flags = IATTR_PROC_DEFAULT; out: return ent; } @@ -624,7 +633,8 @@ struct proc_dir_entry *proc_symlink(cons kfree(ent->data); kfree(ent); ent = NULL; - } + } else + ent->vx_flags = IATTR_PROC_SYMLINK; } else { kfree(ent); ent = NULL; diff -NurpP --minimal linux-2.6.19.1/fs/proc/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/proc/inode.c --- linux-2.6.19.1/fs/proc/inode.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/proc/inode.c 2006-11-08 04:57:41 +0100 @@ -168,6 +168,8 @@ struct inode *proc_get_inode(struct supe inode->i_uid = de->uid; inode->i_gid = de->gid; } + if (de->vx_flags) + PROC_I(inode)->vx_flags = de->vx_flags; if (de->size) inode->i_size = de->size; if (de->nlink) diff -NurpP --minimal linux-2.6.19.1/fs/proc/internal.h linux-2.6.19.1-vs2.2.0-rc6/fs/proc/internal.h --- linux-2.6.19.1/fs/proc/internal.h 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/proc/internal.h 2006-12-04 02:53:55 +0100 @@ -10,6 +10,7 @@ */ #include +#include struct vmalloc_info { unsigned long used; @@ -56,11 +57,16 @@ static inline struct pid *proc_pid(struc return PROC_I(inode)->pid; } -static inline struct task_struct *get_proc_task(struct inode *inode) +static inline struct task_struct *get_proc_task_real(struct inode *inode) { return get_pid_task(proc_pid(inode), PIDTYPE_PID); } +static inline struct task_struct *get_proc_task(struct inode *inode) +{ + return vx_get_proc_task(inode, proc_pid(inode)); +} + static inline int proc_fd(struct inode *inode) { return PROC_I(inode)->fd; diff -NurpP --minimal linux-2.6.19.1/fs/proc/proc_misc.c linux-2.6.19.1-vs2.2.0-rc6/fs/proc/proc_misc.c --- linux-2.6.19.1/fs/proc/proc_misc.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/proc/proc_misc.c 2006-12-04 07:16:28 +0100 @@ -53,6 +53,8 @@ #include #include "internal.h" +#include + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) /* @@ -82,17 +84,32 @@ static int proc_calc_metrics(char *page, static int loadavg_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { + unsigned int running, threads; int a, b, c; int len; - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); - len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + if (vx_flags(VXF_VIRT_LOAD, 0)) { + struct vx_info *vxi = current->vx_info; + + a = vxi->cvirt.load[0] + (FIXED_1/200); + b = vxi->cvirt.load[1] + (FIXED_1/200); + c = vxi->cvirt.load[2] + (FIXED_1/200); + + running = atomic_read(&vxi->cvirt.nr_running); + threads = atomic_read(&vxi->cvirt.nr_threads); + } else { + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + + running = nr_running(); + threads = nr_threads; + } + len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n", LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, init_pspace.last_pid); + running, threads, init_pspace.last_pid); return proc_calc_metrics(page, start, off, count, eof, len); } @@ -106,6 +123,9 @@ static int uptime_read_proc(char *page, do_posix_clock_monotonic_gettime(&uptime); cputime_to_timespec(idletime, &idle); + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&uptime, &idle); + len = sprintf(page,"%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, (uptime.tv_nsec / (NSEC_PER_SEC / 100)), @@ -142,7 +162,7 @@ static int meminfo_read_proc(char *page, cached = global_page_state(NR_FILE_PAGES) - total_swapcache_pages - i.bufferram; - if (cached < 0) + if (cached < 0 || vx_flags(VXF_VIRT_MEM, 0)) cached = 0; get_vmalloc_info(&vmi); @@ -252,8 +272,8 @@ static int version_read_proc(char *page, { int len; - strcpy(page, linux_banner); - len = strlen(page); + len = sprintf(page, linux_banner, + utsname()->release, utsname()->version); return proc_calc_metrics(page, start, off, count, eof, len); } diff -NurpP --minimal linux-2.6.19.1/fs/proc/root.c linux-2.6.19.1-vs2.2.0-rc6/fs/proc/root.c --- linux-2.6.19.1/fs/proc/root.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/proc/root.c 2006-11-08 04:57:41 +0100 @@ -25,6 +25,9 @@ struct proc_dir_entry *proc_net, *proc_n #ifdef CONFIG_SYSCTL struct proc_dir_entry *proc_sys_root; #endif +struct proc_dir_entry *proc_virtual; + +extern void proc_vx_init(void); static int proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) @@ -89,6 +92,7 @@ void __init proc_root_init(void) proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", NULL); + proc_vx_init(); } static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat diff -NurpP --minimal linux-2.6.19.1/fs/quota.c linux-2.6.19.1-vs2.2.0-rc6/fs/quota.c --- linux-2.6.19.1/fs/quota.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/quota.c 2006-12-06 06:00:16 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include /* Check validity of generic quotactl commands */ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) @@ -81,11 +82,11 @@ static int generic_quotactl_valid(struct if (cmd == Q_GETQUOTA) { if (((type == USRQUOTA && current->euid != id) || (type == GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; } else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO) - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; return 0; @@ -132,10 +133,10 @@ static int xqm_quotactl_valid(struct sup if (cmd == Q_XGETQUOTA) { if (((type == XQM_USRQUOTA && current->euid != id) || (type == XQM_GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) { - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; } @@ -365,6 +366,46 @@ static inline struct super_block *quotac #endif } +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE) + +#include +#include +#include +#include +#include + +static vroot_grb_func *vroot_get_real_bdev = NULL; + +static spinlock_t vroot_grb_lock = SPIN_LOCK_UNLOCKED; + +int register_vroot_grb(vroot_grb_func *func) { + int ret = -EBUSY; + + spin_lock(&vroot_grb_lock); + if (!vroot_get_real_bdev) { + vroot_get_real_bdev = func; + ret = 0; + } + spin_unlock(&vroot_grb_lock); + return ret; +} +EXPORT_SYMBOL(register_vroot_grb); + +int unregister_vroot_grb(vroot_grb_func *func) { + int ret = -EINVAL; + + spin_lock(&vroot_grb_lock); + if (vroot_get_real_bdev) { + vroot_get_real_bdev = NULL; + ret = 0; + } + spin_unlock(&vroot_grb_lock); + return ret; +} +EXPORT_SYMBOL(unregister_vroot_grb); + +#endif + /* * This is the system call interface. This communicates with * the user-level programs. Currently this only supports diskquota diff -NurpP --minimal linux-2.6.19.1/fs/read_write.c linux-2.6.19.1-vs2.2.0-rc6/fs/read_write.c --- linux-2.6.19.1/fs/read_write.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/read_write.c 2006-11-08 04:57:51 +0100 @@ -703,12 +703,77 @@ sys_writev(unsigned long fd, const struc return ret; } +ssize_t vfs_sendfile(struct file *out_file, struct file *in_file, loff_t *ppos, + size_t count, loff_t max) +{ + struct inode * in_inode, * out_inode; + loff_t pos; + ssize_t ret; + + /* verify in_file */ + in_inode = in_file->f_dentry->d_inode; + if (!in_inode) + return -EINVAL; + if (!in_file->f_op || !in_file->f_op->sendfile) + return -EINVAL; + + if (!ppos) + ppos = &in_file->f_pos; + else + if (!(in_file->f_mode & FMODE_PREAD)) + return -ESPIPE; + + ret = rw_verify_area(READ, in_file, ppos, count); + if (ret < 0) + return ret; + count = ret; + + /* verify out_file */ + out_inode = out_file->f_dentry->d_inode; + if (!out_inode) + return -EINVAL; + if (!out_file->f_op || !out_file->f_op->sendpage) + return -EINVAL; + + ret = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); + if (ret < 0) + return ret; + count = ret; + + ret = security_file_permission (out_file, MAY_WRITE); + if (ret) + return ret; + + if (!max) + max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); + + pos = *ppos; + if (unlikely(pos < 0)) + return -EINVAL; + if (unlikely(pos + count > max)) { + if (pos >= max) + return -EOVERFLOW; + count = max - pos; + } + + ret = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + + if (ret > 0) { + current->rchar += ret; + current->wchar += ret; + } + + if (*ppos > max) + return -EOVERFLOW; + return ret; +} + +EXPORT_SYMBOL(vfs_sendfile); + static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { struct file * in_file, * out_file; - struct inode * in_inode, * out_inode; - loff_t pos; ssize_t retval; int fput_needed_in, fput_needed_out; @@ -721,22 +786,6 @@ static ssize_t do_sendfile(int out_fd, i goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; - retval = -EINVAL; - in_inode = in_file->f_dentry->d_inode; - if (!in_inode) - goto fput_in; - if (!in_file->f_op || !in_file->f_op->sendfile) - goto fput_in; - retval = -ESPIPE; - if (!ppos) - ppos = &in_file->f_pos; - else - if (!(in_file->f_mode & FMODE_PREAD)) - goto fput_in; - retval = rw_verify_area(READ, in_file, ppos, count); - if (retval < 0) - goto fput_in; - count = retval; retval = security_file_permission (in_file, MAY_READ); if (retval) @@ -751,45 +800,12 @@ static ssize_t do_sendfile(int out_fd, i goto fput_in; if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; - retval = -EINVAL; - if (!out_file->f_op || !out_file->f_op->sendpage) - goto fput_out; - out_inode = out_file->f_dentry->d_inode; - retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); - if (retval < 0) - goto fput_out; - count = retval; - - retval = security_file_permission (out_file, MAY_WRITE); - if (retval) - goto fput_out; - - if (!max) - max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); - - pos = *ppos; - retval = -EINVAL; - if (unlikely(pos < 0)) - goto fput_out; - if (unlikely(pos + count > max)) { - retval = -EOVERFLOW; - if (pos >= max) - goto fput_out; - count = max - pos; - } - retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + retval = vfs_sendfile(out_file, in_file, ppos, count, max); - if (retval > 0) { - current->rchar += retval; - current->wchar += retval; - } current->syscr++; current->syscw++; - if (*ppos > max) - retval = -EOVERFLOW; - fput_out: fput_light(out_file, fput_needed_out); fput_in: diff -NurpP --minimal linux-2.6.19.1/fs/reiserfs/bitmap.c linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/bitmap.c --- linux-2.6.19.1/fs/reiserfs/bitmap.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/bitmap.c 2006-11-08 04:57:50 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #define PREALLOCATION_SIZE 9 @@ -425,8 +426,10 @@ static void _reiserfs_free_block(struct set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); journal_mark_dirty(th, s, sbh); - if (for_unformatted) + if (for_unformatted) { + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK_NODIRTY(inode, 1); + } } void reiserfs_free_block(struct reiserfs_transaction_handle *th, @@ -1034,6 +1037,7 @@ static inline int blocknrs_and_prealloc_ b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; int passno = 0; int nr_allocated = 0; + int blocks; determine_prealloc_size(hint); if (!hint->formatted_node) { @@ -1043,19 +1047,30 @@ static inline int blocknrs_and_prealloc_ "reiserquota: allocating %d blocks id=%u", amount_needed, hint->inode->i_uid); #endif - quota_ret = - DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed); - if (quota_ret) /* Quota exceeded? */ + quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, + amount_needed); + if (quota_ret) return QUOTA_EXCEEDED; + if (DLIMIT_ALLOC_BLOCK(hint->inode, amount_needed)) { + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, + amount_needed); + return NO_DISK_SPACE; + } + if (hint->preallocate && hint->prealloc_size) { #ifdef REISERQUOTA_DEBUG reiserfs_debug(s, REISERFS_DEBUG_CODE, "reiserquota: allocating (prealloc) %d blocks id=%u", hint->prealloc_size, hint->inode->i_uid); #endif - quota_ret = - DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, - hint->prealloc_size); + quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, + hint->prealloc_size); + if (!quota_ret && + DLIMIT_ALLOC_BLOCK(hint->inode, hint->prealloc_size)) { + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, + hint->prealloc_size); + quota_ret = 1; + } if (quota_ret) hint->preallocate = hint->prealloc_size = 0; } @@ -1087,7 +1102,10 @@ static inline int blocknrs_and_prealloc_ nr_allocated, hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ + /* Free not allocated blocks */ + blocks = amount_needed + hint->prealloc_size - nr_allocated; + DLIMIT_FREE_BLOCK(hint->inode, blocks); + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks); } while (nr_allocated--) reiserfs_free_block(hint->th, hint->inode, @@ -1118,10 +1136,10 @@ static inline int blocknrs_and_prealloc_ REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + - hint->prealloc_size - nr_allocated - - REISERFS_I(hint->inode)-> - i_prealloc_count); + blocks = amount_needed + hint->prealloc_size - nr_allocated - + REISERFS_I(hint->inode)->i_prealloc_count; + DLIMIT_FREE_BLOCK(hint->inode, blocks); + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks); } return CARRY_ON; diff -NurpP --minimal linux-2.6.19.1/fs/reiserfs/file.c linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/file.c --- linux-2.6.19.1/fs/reiserfs/file.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/file.c 2006-11-30 20:55:45 +0100 @@ -1575,6 +1575,7 @@ const struct file_operations reiserfs_fi .release = reiserfs_file_release, .fsync = reiserfs_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .splice_read = generic_file_splice_read, @@ -1589,4 +1590,5 @@ struct inode_operations reiserfs_file_in .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/reiserfs/inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/inode.c --- linux-2.6.19.1/fs/reiserfs/inode.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/inode.c 2006-11-30 18:53:18 +0100 @@ -16,6 +16,8 @@ #include #include #include +#include +#include static int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); @@ -50,6 +52,7 @@ void reiserfs_delete_inode(struct inode * stat data deletion */ if (!err) DQUOT_FREE_INODE(inode); + DLIMIT_FREE_INODE(inode); if (journal_end(&th, inode->i_sb, jbegin_count)) goto out; @@ -1114,6 +1117,8 @@ static void init_inode(struct inode *ino struct buffer_head *bh; struct item_head *ih; __u32 rdev; + uid_t uid; + gid_t gid; //int version = ITEM_VERSION_1; bh = PATH_PLAST_BUFFER(path); @@ -1136,12 +1141,13 @@ static void init_inode(struct inode *ino (struct stat_data_v1 *)B_I_PITEM(bh, ih); unsigned long blocks; + uid = sd_v1_uid(sd); + gid = sd_v1_gid(sd); + set_inode_item_key_version(inode, KEY_FORMAT_3_5); set_inode_sd_version(inode, STAT_DATA_V1); inode->i_mode = sd_v1_mode(sd); inode->i_nlink = sd_v1_nlink(sd); - inode->i_uid = sd_v1_uid(sd); - inode->i_gid = sd_v1_gid(sd); inode->i_size = sd_v1_size(sd); inode->i_atime.tv_sec = sd_v1_atime(sd); inode->i_mtime.tv_sec = sd_v1_mtime(sd); @@ -1183,11 +1189,12 @@ static void init_inode(struct inode *ino // (directories and symlinks) struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); + uid = sd_v2_uid(sd); + gid = sd_v2_gid(sd); + inode->i_mode = sd_v2_mode(sd); inode->i_nlink = sd_v2_nlink(sd); - inode->i_uid = sd_v2_uid(sd); inode->i_size = sd_v2_size(sd); - inode->i_gid = sd_v2_gid(sd); inode->i_mtime.tv_sec = sd_v2_mtime(sd); inode->i_atime.tv_sec = sd_v2_atime(sd); inode->i_ctime.tv_sec = sd_v2_ctime(sd); @@ -1217,6 +1224,10 @@ static void init_inode(struct inode *ino sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); } + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0); + pathrelse(path); if (S_ISREG(inode->i_mode)) { inode->i_op = &reiserfs_file_inode_operations; @@ -1239,13 +1250,15 @@ static void init_inode(struct inode *ino static void inode2sd(void *sd, struct inode *inode, loff_t size) { struct stat_data *sd_v2 = (struct stat_data *)sd; + uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag); + gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag); __u16 flags; + set_sd_v2_uid(sd_v2, uid); + set_sd_v2_gid(sd_v2, gid); set_sd_v2_mode(sd_v2, inode->i_mode); set_sd_v2_nlink(sd_v2, inode->i_nlink); - set_sd_v2_uid(sd_v2, inode->i_uid); set_sd_v2_size(sd_v2, size); - set_sd_v2_gid(sd_v2, inode->i_gid); set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); @@ -1776,6 +1789,10 @@ int reiserfs_new_inode(struct reiserfs_t BUG_ON(!th->t_trans_id); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto out_bad_dlimit; + } if (DQUOT_ALLOC_INODE(inode)) { err = -EDQUOT; goto out_end_trans; @@ -1960,6 +1977,9 @@ int reiserfs_new_inode(struct reiserfs_t DQUOT_FREE_INODE(inode); out_end_trans: + DLIMIT_FREE_INODE(inode); + + out_bad_dlimit: journal_end(th, th->t_super, th->t_blocks_allocated); /* Drop can be outside and it needs more credits so it's better to have it outside */ DQUOT_DROP(inode); @@ -2699,6 +2719,14 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (sd_attrs & REISERFS_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (sd_attrs & REISERFS_BARRIER_FL) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (sd_attrs & REISERFS_APPEND_FL) inode->i_flags |= S_APPEND; else @@ -2721,6 +2749,14 @@ void i_attrs_to_sd_attrs(struct inode *i *sd_attrs |= REISERFS_IMMUTABLE_FL; else *sd_attrs &= ~REISERFS_IMMUTABLE_FL; + if (inode->i_flags & S_IUNLINK) + *sd_attrs |= REISERFS_IUNLINK_FL; + else + *sd_attrs &= ~REISERFS_IUNLINK_FL; + if (inode->i_flags & S_BARRIER) + *sd_attrs |= REISERFS_BARRIER_FL; + else + *sd_attrs &= ~REISERFS_BARRIER_FL; if (inode->i_flags & S_SYNC) *sd_attrs |= REISERFS_SYNC_FL; else @@ -2900,6 +2936,22 @@ static ssize_t reiserfs_direct_IO(int rw reiserfs_get_blocks_direct_io, NULL); } +int reiserfs_sync_flags(struct inode *inode) +{ + u16 oldflags, newflags; + + oldflags = REISERFS_I(inode)->i_attrs; + newflags = oldflags; + i_attrs_to_sd_attrs(inode, &newflags); + + if (oldflags ^ newflags) { + REISERFS_I(inode)->i_attrs = newflags; + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + } + return 0; +} + int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -2949,9 +3001,11 @@ int reiserfs_setattr(struct dentry *dent } error = inode_change_ok(inode, attr); + if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) { error = reiserfs_chown_xattrs(inode, attr); if (!error) { @@ -2981,6 +3035,9 @@ int reiserfs_setattr(struct dentry *dent inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_TAG) && + IS_TAGGED(inode)) + inode->i_tag = attr->ia_tag; mark_inode_dirty(inode); error = journal_end(&th, inode->i_sb, jbegin_count); diff -NurpP --minimal linux-2.6.19.1/fs/reiserfs/ioctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/ioctl.c --- linux-2.6.19.1/fs/reiserfs/ioctl.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/ioctl.c 2006-11-08 04:57:52 +0100 @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -24,7 +25,7 @@ static int reiserfs_unpack(struct inode int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { - unsigned int flags; + unsigned int flags, oldflags; switch (cmd) { case REISERFS_IOC_UNPACK: @@ -43,12 +44,14 @@ int reiserfs_ioctl(struct inode *inode, flags = REISERFS_I(inode)->i_attrs; i_attrs_to_sd_attrs(inode, (__u16 *) & flags); + flags &= REISERFS_FL_USER_VISIBLE; return put_user(flags, (int __user *)arg); case REISERFS_IOC_SETFLAGS:{ if (!reiserfs_attrs(inode->i_sb)) return -ENOTTY; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) @@ -58,10 +61,12 @@ int reiserfs_ioctl(struct inode *inode, if (get_user(flags, (int __user *)arg)) return -EFAULT; - if (((flags ^ REISERFS_I(inode)-> - i_attrs) & (REISERFS_IMMUTABLE_FL | - REISERFS_APPEND_FL)) - && !capable(CAP_LINUX_IMMUTABLE)) + oldflags = REISERFS_I(inode) -> i_attrs; + if (((oldflags & REISERFS_IMMUTABLE_FL) || + ((flags ^ oldflags) & + (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | + REISERFS_APPEND_FL))) && + !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; if ((flags & REISERFS_NOTAIL_FL) && @@ -72,6 +77,9 @@ int reiserfs_ioctl(struct inode *inode, if (result) return result; } + + flags = flags & REISERFS_FL_USER_MODIFIABLE; + flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE; sd_attrs_to_i_attrs(flags, inode); REISERFS_I(inode)->i_attrs = flags; inode->i_ctime = CURRENT_TIME_SEC; @@ -83,7 +91,8 @@ int reiserfs_ioctl(struct inode *inode, case REISERFS_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(inode->i_generation, (int __user *)arg)) return -EFAULT; diff -NurpP --minimal linux-2.6.19.1/fs/reiserfs/namei.c linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/namei.c --- linux-2.6.19.1/fs/reiserfs/namei.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/namei.c 2006-11-08 04:57:47 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); @@ -361,6 +362,7 @@ static struct dentry *reiserfs_lookup(st reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); } + dx_propagate_tag(nd, inode); /* Propogate the priv_object flag so we know we're in the priv tree */ if (is_reiserfs_priv_object(dir)) @@ -596,6 +598,7 @@ static int new_inode_init(struct inode * } else { inode->i_gid = current->fsgid; } + inode->i_tag = dx_current_fstag(inode->i_sb); DQUOT_INIT(inode); return 0; } @@ -1542,6 +1545,7 @@ struct inode_operations reiserfs_dir_ino .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; /* @@ -1558,6 +1562,7 @@ struct inode_operations reiserfs_symlink .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; @@ -1571,5 +1576,6 @@ struct inode_operations reiserfs_special .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/reiserfs/stree.c linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/stree.c --- linux-2.6.19.1/fs/reiserfs/stree.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/stree.c 2006-11-08 04:57:50 +0100 @@ -56,6 +56,7 @@ #include #include #include +#include /* Does the buffer contain a disk block which is in the tree. */ inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh) @@ -1297,6 +1298,7 @@ int reiserfs_delete_item(struct reiserfs "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); #endif + DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes); DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); /* Return deleted body length */ @@ -1385,6 +1387,7 @@ void reiserfs_delete_solid_item(struct r #endif DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); + DLIMIT_FREE_SPACE(inode, quota_cut_bytes); } break; } @@ -1738,6 +1741,7 @@ int reiserfs_cut_from_item(struct reiser "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?'); #endif + DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes); DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); return n_ret_value; } @@ -1979,6 +1983,11 @@ int reiserfs_paste_into_item(struct reis pathrelse(p_s_search_path); return -EDQUOT; } + if (DLIMIT_ALLOC_SPACE(inode, n_pasted_size)) { + DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); + pathrelse(p_s_search_path); + return -ENOSPC; + } init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); #ifdef DISPLACE_NEW_PACKING_LOCALITIES @@ -2031,6 +2040,7 @@ int reiserfs_paste_into_item(struct reis n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); #endif + DLIMIT_FREE_SPACE(inode, n_pasted_size); DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); return retval; } @@ -2068,6 +2078,11 @@ int reiserfs_insert_item(struct reiserfs pathrelse(p_s_path); return -EDQUOT; } + if (DLIMIT_ALLOC_SPACE(inode, quota_bytes)) { + DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); + pathrelse(p_s_path); + return -ENOSPC; + } } init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); @@ -2115,7 +2130,9 @@ int reiserfs_insert_item(struct reiserfs "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); #endif - if (inode) + if (inode) { + DLIMIT_FREE_SPACE(inode, quota_bytes); DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); + } return retval; } diff -NurpP --minimal linux-2.6.19.1/fs/reiserfs/super.c linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/super.c --- linux-2.6.19.1/fs/reiserfs/super.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/super.c 2006-12-06 05:50:27 +0100 @@ -885,6 +885,14 @@ static int reiserfs_parse_options(struct {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, #endif +#ifndef CONFIG_TAGGING_NONE + {"tagxid",.setmask = 1 << REISERFS_TAGGED}, + {"tag",.setmask = 1 << REISERFS_TAGGED}, + {"notag",.clrmask = 1 << REISERFS_TAGGED}, +#endif +#ifdef CONFIG_PROPAGATE + {"tag",.arg_required = 'T',.values = NULL}, +#endif #ifdef CONFIG_REISERFS_FS_POSIX_ACL {"acl",.setmask = 1 << REISERFS_POSIXACL}, {"noacl",.clrmask = 1 << REISERFS_POSIXACL}, @@ -1146,6 +1154,12 @@ static int reiserfs_remount(struct super return -EINVAL; } + if ((mount_options & (1 << REISERFS_TAGGED)) && + !(s->s_flags & MS_TAGGED)) { + reiserfs_warning(s, "reiserfs: tagging not permitted on remount."); + return -EINVAL; + } + handle_attrs(s); /* Add options that are safe here */ @@ -1594,6 +1608,10 @@ static int reiserfs_fill_super(struct su goto error; } + /* map mount option tagxid */ + if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGGED)) + s->s_flags |= MS_TAGGED; + rs = SB_DISK_SUPER_BLOCK(s); /* Let's do basic sanity check to verify that underlying device is not smaller than the filesystem. If the check fails then abort and scream, diff -NurpP --minimal linux-2.6.19.1/fs/reiserfs/xattr.c linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/xattr.c --- linux-2.6.19.1/fs/reiserfs/xattr.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/reiserfs/xattr.c 2006-11-08 04:57:52 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -823,7 +824,7 @@ int reiserfs_delete_xattrs(struct inode if (dir->d_inode->i_nlink <= 2) { root = get_xa_root(inode->i_sb); reiserfs_write_lock_xattrs(inode->i_sb); - err = vfs_rmdir(root->d_inode, dir); + err = vfs_rmdir(root->d_inode, dir, NULL); reiserfs_write_unlock_xattrs(inode->i_sb); dput(root); } else { diff -NurpP --minimal linux-2.6.19.1/fs/stat.c linux-2.6.19.1-vs2.2.0-rc6/fs/stat.c --- linux-2.6.19.1/fs/stat.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/stat.c 2006-11-08 04:57:46 +0100 @@ -27,6 +27,7 @@ void generic_fillattr(struct inode *inod stat->nlink = inode->i_nlink; stat->uid = inode->i_uid; stat->gid = inode->i_gid; + stat->tag = inode->i_tag; stat->rdev = inode->i_rdev; stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; diff -NurpP --minimal linux-2.6.19.1/fs/super.c linux-2.6.19.1-vs2.2.0-rc6/fs/super.c --- linux-2.6.19.1/fs/super.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/super.c 2006-12-06 06:19:04 +0100 @@ -37,6 +37,9 @@ #include #include #include +#include +#include +#include #include @@ -853,6 +856,7 @@ struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct vfsmount *mnt; + struct super_block *sb; char *secdata = NULL; int error; @@ -878,7 +882,14 @@ vfs_kern_mount(struct file_system_type * if (error < 0) goto out_free_secdata; - error = security_sb_kern_mount(mnt->mnt_sb, secdata); + sb = mnt->mnt_sb; + error = -EPERM; + if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) && !sb->s_bdev && + (sb->s_magic != PROC_SUPER_MAGIC) && + (sb->s_magic != DEVPTS_SUPER_MAGIC)) + goto out_sb; + + error = security_sb_kern_mount(sb, secdata); if (error) goto out_sb; @@ -906,9 +917,17 @@ do_kern_mount(const char *fstype, int fl { struct file_system_type *type = get_fs_type(fstype); struct vfsmount *mnt; + if (!type) return ERR_PTR(-ENODEV); + + mnt = ERR_PTR(-EPERM); + if ((type->fs_flags & FS_BINARY_MOUNTDATA) && + !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT)) + goto out_put; + mnt = vfs_kern_mount(type, flags, name, data); +out_put: put_filesystem(type); return mnt; } diff -NurpP --minimal linux-2.6.19.1/fs/sysfs/mount.c linux-2.6.19.1-vs2.2.0-rc6/fs/sysfs/mount.c --- linux-2.6.19.1/fs/sysfs/mount.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/sysfs/mount.c 2006-11-08 04:57:53 +0100 @@ -11,8 +11,6 @@ #include "sysfs.h" -/* Random magic number */ -#define SYSFS_MAGIC 0x62656572 struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; @@ -38,7 +36,7 @@ static int sysfs_fill_super(struct super sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = SYSFS_MAGIC; + sb->s_magic = SYSFS_SUPER_MAGIC; sb->s_op = &sysfs_ops; sb->s_time_gran = 1; sysfs_sb = sb; diff -NurpP --minimal linux-2.6.19.1/fs/utimes.c linux-2.6.19.1-vs2.2.0-rc6/fs/utimes.c --- linux-2.6.19.1/fs/utimes.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/utimes.c 2006-11-08 22:44:42 +0100 @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -32,7 +34,7 @@ asmlinkage long sys_utime(char __user * inode = nd.dentry->d_inode; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; /* Don't worry, the checks are done in inode_change_ok() */ @@ -83,14 +85,13 @@ long do_utimes(int dfd, char __user *fil struct iattr newattrs; error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); - if (error) goto out; - inode = nd.dentry->d_inode; - error = -EROFS; - if (IS_RDONLY(inode)) + error = cow_check_and_break(&nd); + if (error) goto dput_and_out; + inode = nd.dentry->d_inode; /* Don't worry, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; diff -NurpP --minimal linux-2.6.19.1/fs/xattr.c linux-2.6.19.1-vs2.2.0-rc6/fs/xattr.c --- linux-2.6.19.1/fs/xattr.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xattr.c 2006-11-08 21:52:09 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -195,7 +196,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); */ static long setxattr(struct dentry *d, char __user *name, void __user *value, - size_t size, int flags) + size_t size, int flags, struct vfsmount *mnt) { int error; void *kvalue = NULL; @@ -222,6 +223,9 @@ setxattr(struct dentry *d, char __user * } } + if (MNT_IS_RDONLY(mnt)) + return -EROFS; + error = vfs_setxattr(d, kname, kvalue, size, flags); kfree(kvalue); return error; @@ -237,7 +241,7 @@ sys_setxattr(char __user *path, char __u error = user_path_walk(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); + error = setxattr(nd.dentry, name, value, size, flags, nd.mnt); path_release(&nd); return error; } @@ -252,7 +256,7 @@ sys_lsetxattr(char __user *path, char __ error = user_path_walk_link(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); + error = setxattr(nd.dentry, name, value, size, flags, nd.mnt); path_release(&nd); return error; } @@ -270,7 +274,7 @@ sys_fsetxattr(int fd, char __user *name, return error; dentry = f->f_dentry; audit_inode(NULL, dentry->d_inode); - error = setxattr(dentry, name, value, size, flags); + error = setxattr(dentry, name, value, size, flags, f->f_vfsmnt); fput(f); return error; } @@ -432,7 +436,7 @@ sys_flistxattr(int fd, char __user *list * Extended attribute REMOVE operations */ static long -removexattr(struct dentry *d, char __user *name) +removexattr(struct dentry *d, char __user *name, struct vfsmount *mnt) { int error; char kname[XATTR_NAME_MAX + 1]; @@ -443,6 +447,9 @@ removexattr(struct dentry *d, char __use if (error < 0) return error; + if (MNT_IS_RDONLY(mnt)) + return -EROFS; + return vfs_removexattr(d, kname); } @@ -455,7 +462,7 @@ sys_removexattr(char __user *path, char error = user_path_walk(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); + error = removexattr(nd.dentry, name, nd.mnt); path_release(&nd); return error; } @@ -469,7 +476,7 @@ sys_lremovexattr(char __user *path, char error = user_path_walk_link(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); + error = removexattr(nd.dentry, name, nd.mnt); path_release(&nd); return error; } @@ -486,7 +493,7 @@ sys_fremovexattr(int fd, char __user *na return error; dentry = f->f_dentry; audit_inode(NULL, dentry->d_inode); - error = removexattr(dentry, name); + error = removexattr(dentry, name, f->f_vfsmnt); fput(f); return error; } diff -NurpP --minimal linux-2.6.19.1/fs/xfs/linux-2.6/xfs_file.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_file.c --- linux-2.6.19.1/fs/xfs/linux-2.6/xfs_file.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_file.c 2006-11-08 04:57:51 +0100 @@ -453,6 +453,7 @@ const struct file_operations xfs_file_op .aio_read = xfs_file_aio_read, .aio_write = xfs_file_aio_write, .sendfile = xfs_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = xfs_file_splice_read, .splice_write = xfs_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, @@ -476,6 +477,7 @@ const struct file_operations xfs_invis_f .aio_read = xfs_file_aio_read_invis, .aio_write = xfs_file_aio_write_invis, .sendfile = xfs_file_sendfile_invis, + .sendpage = generic_file_sendpage, .splice_read = xfs_file_splice_read_invis, .splice_write = xfs_file_splice_write_invis, .unlocked_ioctl = xfs_file_ioctl_invis, diff -NurpP --minimal linux-2.6.19.1/fs/xfs/linux-2.6/xfs_ioctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_ioctl.c --- linux-2.6.19.1/fs/xfs/linux-2.6/xfs_ioctl.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_ioctl.c 2006-11-20 21:12:32 +0100 @@ -1100,6 +1100,8 @@ xfs_ioc_fsgeometry( #define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */ #define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */ #define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */ +#define LINUX_XFLAG_BARRIER 0x04000000 /* chroot() barrier */ +#define LINUX_XFLAG_IUNLINK 0x08000000 /* immutable unlink */ STATIC unsigned int xfs_merge_ioc_xflags( @@ -1140,6 +1142,10 @@ xfs_di2lxflags( if (di_flags & XFS_DIFLAG_IMMUTABLE) flags |= LINUX_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_IUNLINK) + flags |= LINUX_XFLAG_IUNLINK; + if (di_flags & XFS_DIFLAG_BARRIER) + flags |= LINUX_XFLAG_BARRIER; if (di_flags & XFS_DIFLAG_APPEND) flags |= LINUX_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) diff -NurpP --minimal linux-2.6.19.1/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_iops.c --- linux-2.6.19.1/fs/xfs/linux-2.6/xfs_iops.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_iops.c 2006-11-30 18:53:18 +0100 @@ -53,6 +53,7 @@ #include #include #include +#include /* * Get a XFS inode from a given vnode. @@ -402,6 +403,7 @@ xfs_vn_lookup( d_add(dentry, NULL); return NULL; } + dx_propagate_tag(nd, vn_to_inode(cvp)); return d_splice_alias(vn_to_inode(cvp), dentry); } @@ -659,6 +661,10 @@ xfs_vn_setattr( int flags = 0; int error; + error = inode_change_ok(inode, attr); + if (error) + return error; + if (ia_valid & ATTR_UID) { vattr.va_mask |= XFS_AT_UID; vattr.va_uid = attr->ia_uid; @@ -667,6 +673,10 @@ xfs_vn_setattr( vattr.va_mask |= XFS_AT_GID; vattr.va_gid = attr->ia_gid; } + if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode)) { + vattr.va_mask |= XFS_AT_TAG; + vattr.va_tag = attr->ia_tag; + } if (ia_valid & ATTR_SIZE) { vattr.va_mask |= XFS_AT_SIZE; vattr.va_size = attr->ia_size; @@ -712,6 +722,42 @@ xfs_vn_truncate( } STATIC int +xfs_vn_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + int flags = 0; + int error; + bhv_vattr_t vattr; + bhv_vnode_t *vp = vn_from_inode(inode); + + memset(&vattr, 0, sizeof vattr); + + vattr.va_mask = XFS_AT_XFLAGS; + error = bhv_vop_getattr(vp, &vattr, 0, NULL); + + if (error) + return error; + oldflags = vattr.va_xflags; + newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE | + XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER); + + if (IS_IMMUTABLE(inode)) + newflags |= XFS_XFLAG_IMMUTABLE; + if (IS_IUNLINK(inode)) + newflags |= XFS_XFLAG_IUNLINK; + if (IS_BARRIER(inode)) + newflags |= XFS_XFLAG_BARRIER; + + if (oldflags ^ newflags) { + vattr.va_xflags = newflags; + vattr.va_mask |= XFS_AT_XFLAGS; + error = bhv_vop_setattr(vp, &vattr, flags, NULL); + } + vn_revalidate(vp); + return error; +} + +STATIC int xfs_vn_setxattr( struct dentry *dentry, const char *name, @@ -824,6 +870,7 @@ struct inode_operations xfs_inode_operat .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .sync_flags = xfs_vn_sync_flags, }; struct inode_operations xfs_dir_inode_operations = { @@ -843,6 +890,7 @@ struct inode_operations xfs_dir_inode_op .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .sync_flags = xfs_vn_sync_flags, }; struct inode_operations xfs_symlink_inode_operations = { @@ -856,4 +904,5 @@ struct inode_operations xfs_symlink_inod .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .sync_flags = xfs_vn_sync_flags, }; diff -NurpP --minimal linux-2.6.19.1/fs/xfs/linux-2.6/xfs_linux.h linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_linux.h --- linux-2.6.19.1/fs/xfs/linux-2.6/xfs_linux.h 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_linux.h 2006-11-08 04:57:47 +0100 @@ -139,6 +139,7 @@ BUFFER_FNS(PrivateStart, unwritten); #define current_pid() (current->pid) #define current_fsuid(cred) (current->fsuid) #define current_fsgid(cred) (current->fsgid) +#define current_fstag(cred,vp) (dx_current_fstag(vn_to_inode(vp)->i_sb)) #define current_test_flags(f) (current->flags & (f)) #define current_set_flags_nested(sp, f) \ (*(sp) = current->flags, current->flags |= (f)) diff -NurpP --minimal linux-2.6.19.1/fs/xfs/linux-2.6/xfs_super.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_super.c --- linux-2.6.19.1/fs/xfs/linux-2.6/xfs_super.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_super.c 2006-12-06 05:50:27 +0100 @@ -158,6 +158,7 @@ xfs_revalidate_inode( inode->i_nlink = ip->i_d.di_nlink; inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; + inode->i_tag = ip->i_d.di_tag; switch (inode->i_mode & S_IFMT) { case S_IFBLK: @@ -185,6 +186,14 @@ xfs_revalidate_inode( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) inode->i_flags |= S_APPEND; else @@ -708,6 +717,12 @@ xfs_fs_remount( int error; error = bhv_vfs_parseargs(vfsp, options, args, 1); + if ((args->flags2 & XFSMNT2_TAGGED) && + !(sb->s_flags & MS_TAGGED)) { + printk("XFS: %s: tagging not permitted on remount.\n", + sb->s_id); + error = EINVAL; + } if (!error) error = bhv_vfs_mntupdate(vfsp, flags, args); kmem_free(args, sizeof(*args)); diff -NurpP --minimal linux-2.6.19.1/fs/xfs/linux-2.6/xfs_sysctl.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_sysctl.c --- linux-2.6.19.1/fs/xfs/linux-2.6/xfs_sysctl.c 2006-09-20 16:58:39 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_sysctl.c 2006-11-08 04:57:40 +0100 @@ -57,79 +57,79 @@ xfs_stats_clear_proc_handler( STATIC ctl_table xfs_table[] = { {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max}, {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max}, {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max}, {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.panic_mask.min, &xfs_params.panic_mask.max}, {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.error_level.min, &xfs_params.error_level.max}, {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max}, {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max}, {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max}, {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max}, {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max}, {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max}, {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max}, {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.rotorstep.min, &xfs_params.rotorstep.max}, {XFS_INHERIT_NODFRG, "inherit_nodefrag", &xfs_params.inherit_nodfrg.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nodfrg.min, &xfs_params.inherit_nodfrg.max}, /* please keep this the last entry */ #ifdef CONFIG_PROC_FS {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.stats_clear.min, &xfs_params.stats_clear.max}, #endif /* CONFIG_PROC_FS */ diff -NurpP --minimal linux-2.6.19.1/fs/xfs/linux-2.6/xfs_vnode.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_vnode.c --- linux-2.6.19.1/fs/xfs/linux-2.6/xfs_vnode.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_vnode.c 2006-11-08 04:57:46 +0100 @@ -119,6 +119,7 @@ vn_revalidate_core( inode->i_nlink = vap->va_nlink; inode->i_uid = vap->va_uid; inode->i_gid = vap->va_gid; + inode->i_tag = vap->va_tag; inode->i_blocks = vap->va_nblocks; inode->i_mtime = vap->va_mtime; inode->i_ctime = vap->va_ctime; @@ -126,6 +127,14 @@ vn_revalidate_core( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else diff -NurpP --minimal linux-2.6.19.1/fs/xfs/linux-2.6/xfs_vnode.h linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_vnode.h --- linux-2.6.19.1/fs/xfs/linux-2.6/xfs_vnode.h 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/linux-2.6/xfs_vnode.h 2006-11-08 04:57:46 +0100 @@ -350,6 +350,7 @@ typedef struct bhv_vattr { xfs_nlink_t va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ gid_t va_gid; /* owner group id */ + tag_t va_tag; /* owner group id */ xfs_ino_t va_nodeid; /* file id */ xfs_off_t va_size; /* file size in bytes */ u_long va_blocksize; /* blocksize preferred for i/o */ @@ -398,13 +399,15 @@ typedef struct bhv_vattr { #define XFS_AT_PROJID 0x04000000 #define XFS_AT_SIZE_NOPERM 0x08000000 #define XFS_AT_GENCOUNT 0x10000000 +#define XFS_AT_TAG 0x20000000 #define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\ XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\ - XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT) + XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT\ + XFS_AT_TAG) #define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ diff -NurpP --minimal linux-2.6.19.1/fs/xfs/quota/xfs_qm_syscalls.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/quota/xfs_qm_syscalls.c --- linux-2.6.19.1/fs/xfs/quota/xfs_qm_syscalls.c 2006-09-20 16:58:40 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/quota/xfs_qm_syscalls.c 2006-12-09 04:18:17 +0100 @@ -17,6 +17,7 @@ */ #include +#include #include "xfs.h" #include "xfs_fs.h" @@ -213,7 +214,7 @@ xfs_qm_scall_quotaoff( xfs_qoff_logitem_t *qoffstart; int nculprits; - if (!force && !capable(CAP_SYS_ADMIN)) + if (!force && !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); /* * No file system can have quotas enabled on disk but not in core. @@ -382,7 +383,7 @@ xfs_qm_scall_trunc_qfiles( int error; xfs_inode_t *qip; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); error = 0; if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) { @@ -427,7 +428,7 @@ xfs_qm_scall_quotaon( uint accflags; __int64_t sbflags; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); @@ -598,7 +599,7 @@ xfs_qm_scall_setqlim( int error; xfs_qcnt_t hard, soft; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); if ((newlim->d_fieldmask & diff -NurpP --minimal linux-2.6.19.1/fs/xfs/xfs_clnt.h linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_clnt.h --- linux-2.6.19.1/fs/xfs/xfs_clnt.h 2006-06-18 04:54:50 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_clnt.h 2006-11-08 04:57:46 +0100 @@ -99,5 +99,7 @@ struct xfs_mount_args { */ #define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred * I/O size in stat(2) */ +#define XFSMNT2_TAGGED 0x80000000 /* context tagging */ + #endif /* __XFS_CLNT_H__ */ diff -NurpP --minimal linux-2.6.19.1/fs/xfs/xfs_dinode.h linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_dinode.h --- linux-2.6.19.1/fs/xfs/xfs_dinode.h 2006-09-20 16:58:40 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_dinode.h 2006-11-08 04:57:46 +0100 @@ -53,7 +53,8 @@ typedef struct xfs_dinode_core __uint32_t di_gid; /* owner's group id */ __uint32_t di_nlink; /* number of links to file */ __uint16_t di_projid; /* owner's project id */ - __uint8_t di_pad[8]; /* unused, zeroed space */ + __uint16_t di_tag; /* context tagging */ + __uint8_t di_pad[6]; /* unused, zeroed space */ __uint16_t di_flushiter; /* incremented on flush */ xfs_timestamp_t di_atime; /* time last accessed */ xfs_timestamp_t di_mtime; /* time last modified */ @@ -257,6 +258,9 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ +#define XFS_DIFLAG_BARRIER_BIT 14 /* chroot() barrier */ +#define XFS_DIFLAG_IUNLINK_BIT 15 /* immutable unlink */ + #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) @@ -271,12 +275,15 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) +#define XFS_DIFLAG_BARRIER (1 << XFS_DIFLAG_BARRIER_BIT) +#define XFS_DIFLAG_IUNLINK (1 << XFS_DIFLAG_IUNLINK_BIT) #define XFS_DIFLAG_ANY \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ - XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG) + XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_BARRIER | \ + XFS_DIFLAG_IUNLINK) #endif /* __XFS_DINODE_H__ */ diff -NurpP --minimal linux-2.6.19.1/fs/xfs/xfs_fs.h linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_fs.h --- linux-2.6.19.1/fs/xfs/xfs_fs.h 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_fs.h 2006-11-08 04:57:46 +0100 @@ -66,6 +66,8 @@ struct fsxattr { #define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ #define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ #define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ +#define XFS_XFLAG_BARRIER 0x00004000 /* chroot() barrier */ +#define XFS_XFLAG_IUNLINK 0x00008000 /* immutable unlink */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* @@ -294,7 +296,8 @@ typedef struct xfs_bstat { __s32 bs_extents; /* number of extents */ __u32 bs_gen; /* generation count */ __u16 bs_projid; /* project id */ - unsigned char bs_pad[14]; /* pad space, unused */ + __u16 bs_tag; /* context tagging */ + unsigned char bs_pad[12]; /* pad space, unused */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ diff -NurpP --minimal linux-2.6.19.1/fs/xfs/xfs_inode.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_inode.c --- linux-2.6.19.1/fs/xfs/xfs_inode.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_inode.c 2006-11-30 20:55:45 +0100 @@ -50,6 +50,7 @@ #include "xfs_mac.h" #include "xfs_acl.h" +#include kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; @@ -736,20 +737,35 @@ xfs_xlate_dinode_core( xfs_dinode_core_t *buf_core = (xfs_dinode_core_t *)buf; xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; xfs_arch_t arch = ARCH_CONVERT; + uint32_t uid = 0, gid = 0; + uint16_t tag = 0; ASSERT(dir); + if (dir < 0) { + tag = mem_core->di_tag; + /* FIXME: supposed to use superblock flag */ + uid = TAGINO_UID(1, mem_core->di_uid, tag); + gid = TAGINO_GID(1, mem_core->di_gid, tag); + tag = TAGINO_TAG(1, tag); + } + INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch); INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch); INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch); INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); - INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch); - INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch); + INT_XLATE(buf_core->di_uid, uid, dir, arch); + INT_XLATE(buf_core->di_gid, gid, dir, arch); + INT_XLATE(buf_core->di_tag, tag, dir, arch); INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); if (dir > 0) { + /* FIXME: supposed to use superblock flag */ + mem_core->di_uid = INOTAG_UID(1, uid, gid); + mem_core->di_gid = INOTAG_GID(1, uid, gid); + mem_core->di_tag = INOTAG_TAG(1, uid, gid, tag); memcpy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad)); } else { @@ -797,6 +813,10 @@ _xfs_dic2xflags( flags |= XFS_XFLAG_PREALLOC; if (di_flags & XFS_DIFLAG_IMMUTABLE) flags |= XFS_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_IUNLINK) + flags |= XFS_XFLAG_IUNLINK; + if (di_flags & XFS_DIFLAG_BARRIER) + flags |= XFS_XFLAG_BARRIER; if (di_flags & XFS_DIFLAG_APPEND) flags |= XFS_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) @@ -1128,6 +1148,7 @@ xfs_ialloc( ASSERT(ip->i_d.di_nlink == nlink); ip->i_d.di_uid = current_fsuid(cr); ip->i_d.di_gid = current_fsgid(cr); + ip->i_d.di_tag = current_fstag(cr, vp); ip->i_d.di_projid = prid; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); diff -NurpP --minimal linux-2.6.19.1/fs/xfs/xfs_itable.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_itable.c --- linux-2.6.19.1/fs/xfs/xfs_itable.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_itable.c 2006-11-08 04:57:46 +0100 @@ -89,6 +89,7 @@ xfs_bulkstat_one_iget( buf->bs_mode = dic->di_mode; buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; + buf->bs_tag = dic->di_tag; buf->bs_size = dic->di_size; vn_atime_to_bstime(vp, &buf->bs_atime); buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; @@ -163,6 +164,7 @@ xfs_bulkstat_one_dinode( buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT); buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT); buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT); + buf->bs_tag = INT_GET(dic->di_tag, ARCH_CONVERT); buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT); buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT); buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT); diff -NurpP --minimal linux-2.6.19.1/fs/xfs/xfs_mount.h linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_mount.h --- linux-2.6.19.1/fs/xfs/xfs_mount.h 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_mount.h 2006-11-08 04:57:46 +0100 @@ -460,6 +460,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock counters */ +#define XFS_MOUNT_TAGGED (1ULL << 31) /* context tagging */ /* * Default minimum read and write sizes. diff -NurpP --minimal linux-2.6.19.1/fs/xfs/xfs_vfsops.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_vfsops.c --- linux-2.6.19.1/fs/xfs/xfs_vfsops.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_vfsops.c 2006-11-08 04:57:47 +0100 @@ -300,6 +300,8 @@ xfs_start_flags( if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + if (ap->flags2 & XFSMNT2_TAGGED) + mp->m_flags |= XFS_MOUNT_TAGGED; /* * no recovery flag requires a read-only mount @@ -394,6 +396,8 @@ xfs_finish_flags( return XFS_ERROR(EINVAL); } + if (ap->flags2 & XFSMNT2_TAGGED) + vfs->vfs_super->s_flags |= MS_TAGGED; return 0; } @@ -1645,6 +1649,9 @@ xfs_vget( * in stat(). */ #define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ #define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ +#define MNTOPT_TAGXID "tagxid" /* context tagging for inodes */ +#define MNTOPT_TAGGED "tag" /* context tagging for inodes */ +#define MNTOPT_NOTAGTAG "notag" /* do not use context tagging */ STATIC unsigned long suffix_strtoul(char *s, char **endp, unsigned int base) @@ -1831,6 +1838,19 @@ xfs_parseargs( args->flags |= XFSMNT_ATTR2; } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { args->flags &= ~XFSMNT_ATTR2; +#ifndef CONFIG_TAGGING_NONE + } else if (!strcmp(this_char, MNTOPT_TAGGED)) { + args->flags2 |= XFSMNT2_TAGGED; + } else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) { + args->flags2 &= ~XFSMNT2_TAGGED; + } else if (!strcmp(this_char, MNTOPT_TAGXID)) { + args->flags2 |= XFSMNT2_TAGGED; +#endif +#ifdef CONFIG_PROPAGATE + } else if (!strcmp(this_char, MNTOPT_TAGGED)) { + /* use value */ + args->flags2 |= XFSMNT2_TAGGED; +#endif } else if (!strcmp(this_char, "osyncisdsync")) { /* no-op, this is now the default */ cmn_err(CE_WARN, diff -NurpP --minimal linux-2.6.19.1/fs/xfs/xfs_vnodeops.c linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_vnodeops.c --- linux-2.6.19.1/fs/xfs/xfs_vnodeops.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/fs/xfs/xfs_vnodeops.c 2006-12-06 05:50:27 +0100 @@ -160,6 +160,7 @@ xfs_getattr( vap->va_mode = ip->i_d.di_mode; vap->va_uid = ip->i_d.di_uid; vap->va_gid = ip->i_d.di_gid; + vap->va_tag = ip->i_d.di_tag; vap->va_projid = ip->i_d.di_projid; /* @@ -260,6 +261,7 @@ xfs_setattr( uint commit_flags=0; uid_t uid=0, iuid=0; gid_t gid=0, igid=0; + tag_t tag=0, itag=0; int timeflags = 0; bhv_vnode_t *vp; xfs_prid_t projid=0, iprojid=0; @@ -316,6 +318,7 @@ xfs_setattr( (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { uint qflags = 0; + /* FIXME: handle tagging? */ if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = vap->va_uid; qflags |= XFS_QMOPT_UQUOTA; @@ -395,6 +398,8 @@ xfs_setattr( if (mask & (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| XFS_AT_GID|XFS_AT_PROJID)) { + /* FIXME: handle tagging? */ + /* * CAP_FOWNER overrides the following restrictions: * @@ -443,7 +448,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) { /* * These IDs could have changed since we last looked at them. * But, we're assured that if the ownership did change @@ -451,10 +456,12 @@ xfs_setattr( * would have changed also. */ iuid = ip->i_d.di_uid; - iprojid = ip->i_d.di_projid; igid = ip->i_d.di_gid; - gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + itag = ip->i_d.di_tag; + iprojid = ip->i_d.di_projid; uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; + gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + tag = (mask & XFS_AT_TAG) ? vap->va_tag : itag; projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : iprojid; @@ -482,6 +489,7 @@ xfs_setattr( if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { + /* FIXME: handle tagging? */ ASSERT(tp); code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? @@ -707,7 +715,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) { /* * CAP_FSETID overrides the following restrictions: * @@ -723,6 +731,9 @@ xfs_setattr( * Change the ownerships and register quota modifications * in the transaction. */ + if (itag != tag) { + ip->i_d.di_tag = tag; + } if (iuid != uid) { if (XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & XFS_AT_UID); @@ -803,6 +814,10 @@ xfs_setattr( di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) di_flags |= XFS_DIFLAG_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + di_flags |= XFS_DIFLAG_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + di_flags |= XFS_DIFLAG_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) di_flags |= XFS_DIFLAG_APPEND; if (vap->va_xflags & XFS_XFLAG_SYNC) diff -NurpP --minimal linux-2.6.19.1/include/asm-arm/tlb.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-arm/tlb.h --- linux-2.6.19.1/include/asm-arm/tlb.h 2006-06-18 04:54:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-arm/tlb.h 2006-11-08 04:57:40 +0100 @@ -28,6 +28,7 @@ #else /* !CONFIG_MMU */ #include +#include /* * TLB handling. This allows us to remove pages from the page diff -NurpP --minimal linux-2.6.19.1/include/asm-arm26/tlb.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-arm26/tlb.h --- linux-2.6.19.1/include/asm-arm26/tlb.h 2006-01-03 17:30:02 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-arm26/tlb.h 2006-11-08 04:57:40 +0100 @@ -3,6 +3,7 @@ #include #include +#include /* * TLB handling. This allows us to remove pages from the page diff -NurpP --minimal linux-2.6.19.1/include/asm-arm26/unistd.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-arm26/unistd.h --- linux-2.6.19.1/include/asm-arm26/unistd.h 2006-11-30 21:19:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-arm26/unistd.h 2006-11-08 04:57:41 +0100 @@ -302,6 +302,8 @@ #define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) #define __NR_waitid (__NR_SYSCALL_BASE+280) +#define __NR_vserver (__NR_SYSCALL_BASE+313) + /* * The following SWIs are ARM private. FIXME - make appropriate for arm26 */ diff -NurpP --minimal linux-2.6.19.1/include/asm-generic/tlb.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-generic/tlb.h --- linux-2.6.19.1/include/asm-generic/tlb.h 2006-11-30 21:19:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-generic/tlb.h 2006-11-08 04:57:40 +0100 @@ -14,6 +14,7 @@ #define _ASM_GENERIC__TLB_H #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/include/asm-i386/elf.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-i386/elf.h --- linux-2.6.19.1/include/asm-i386/elf.h 2006-11-30 21:19:31 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-i386/elf.h 2006-11-08 04:57:53 +0100 @@ -75,7 +75,7 @@ typedef struct user_fxsr_struct elf_fpxr the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +#define ELF_ET_DYN_BASE ((TASK_UNMAPPED_BASE) * 2) /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is now struct_user_regs, they are different) */ diff -NurpP --minimal linux-2.6.19.1/include/asm-ia64/tlb.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-ia64/tlb.h --- linux-2.6.19.1/include/asm-ia64/tlb.h 2006-09-20 16:58:40 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-ia64/tlb.h 2006-11-08 04:57:40 +0100 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/include/asm-powerpc/systbl.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-powerpc/systbl.h --- linux-2.6.19.1/include/asm-powerpc/systbl.h 2006-11-30 21:19:33 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-powerpc/systbl.h 2006-11-20 21:12:32 +0100 @@ -260,7 +260,7 @@ COMPAT_SYS_SPU(fstatfs64) SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64) PPC_SYS_SPU(rtas) OLDSYS(debug_setcontext) -SYSCALL(ni_syscall) +SYSX(sys_vserver, sys32_vserver, sys_vserver) COMPAT_SYS(migrate_pages) COMPAT_SYS(mbind) COMPAT_SYS(get_mempolicy) diff -NurpP --minimal linux-2.6.19.1/include/asm-powerpc/unistd.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-powerpc/unistd.h --- linux-2.6.19.1/include/asm-powerpc/unistd.h 2006-11-30 21:19:33 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-powerpc/unistd.h 2006-11-20 21:12:32 +0100 @@ -275,7 +275,7 @@ #endif #define __NR_rtas 255 #define __NR_sys_debug_setcontext 256 -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 #define __NR_migrate_pages 258 #define __NR_mbind 259 #define __NR_get_mempolicy 260 diff -NurpP --minimal linux-2.6.19.1/include/asm-s390/unistd.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-s390/unistd.h --- linux-2.6.19.1/include/asm-s390/unistd.h 2006-11-30 21:19:33 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-s390/unistd.h 2006-11-08 04:57:41 +0100 @@ -202,7 +202,7 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) -/* Number 263 is reserved for vserver */ +#define __NR_vserver 263 #define __NR_statfs64 265 #define __NR_fstatfs64 266 #define __NR_remap_file_pages 267 diff -NurpP --minimal linux-2.6.19.1/include/asm-sparc/unistd.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-sparc/unistd.h --- linux-2.6.19.1/include/asm-sparc/unistd.h 2006-11-30 21:19:34 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-sparc/unistd.h 2006-11-08 21:52:09 +0100 @@ -283,7 +283,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff -NurpP --minimal linux-2.6.19.1/include/asm-sparc64/tlb.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-sparc64/tlb.h --- linux-2.6.19.1/include/asm-sparc64/tlb.h 2006-09-20 16:58:43 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-sparc64/tlb.h 2006-11-08 04:57:40 +0100 @@ -2,6 +2,7 @@ #define _SPARC64_TLB_H #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19.1/include/asm-sparc64/unistd.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-sparc64/unistd.h --- linux-2.6.19.1/include/asm-sparc64/unistd.h 2006-11-30 21:19:35 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-sparc64/unistd.h 2006-11-08 21:52:09 +0100 @@ -285,7 +285,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff -NurpP --minimal linux-2.6.19.1/include/asm-x86_64/unistd.h linux-2.6.19.1-vs2.2.0-rc6/include/asm-x86_64/unistd.h --- linux-2.6.19.1/include/asm-x86_64/unistd.h 2006-11-30 21:19:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/asm-x86_64/unistd.h 2006-11-08 04:57:41 +0100 @@ -532,7 +532,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_utimes 235 __SYSCALL(__NR_utimes, sys_utimes) #define __NR_vserver 236 -__SYSCALL(__NR_vserver, sys_ni_syscall) +__SYSCALL(__NR_vserver, sys_vserver) #define __NR_mbind 237 __SYSCALL(__NR_mbind, sys_mbind) #define __NR_set_mempolicy 238 diff -NurpP --minimal linux-2.6.19.1/include/linux/Kbuild linux-2.6.19.1-vs2.2.0-rc6/include/linux/Kbuild --- linux-2.6.19.1/include/linux/Kbuild 2006-11-30 21:19:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/Kbuild 2006-11-08 04:57:49 +0100 @@ -345,3 +345,6 @@ unifdef-y += xfrm.h unifdef-y += zftape.h objhdr-y += version.h + +header-y += vserver/ + diff -NurpP --minimal linux-2.6.19.1/include/linux/capability.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/capability.h --- linux-2.6.19.1/include/linux/capability.h 2006-06-18 04:55:15 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/capability.h 2006-11-08 04:57:40 +0100 @@ -235,6 +235,7 @@ typedef __u32 kernel_cap_t; arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ /* Allow setting zone reclaim policy */ +/* Allow the selection of a security context */ #define CAP_SYS_ADMIN 21 @@ -288,6 +289,11 @@ typedef __u32 kernel_cap_t; #define CAP_AUDIT_CONTROL 30 +/* Allow context manipulations */ +/* Allow changing context info on files */ + +#define CAP_CONTEXT 31 + #ifdef __KERNEL__ /* * Bounding set diff -NurpP --minimal linux-2.6.19.1/include/linux/devpts_fs.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/devpts_fs.h --- linux-2.6.19.1/include/linux/devpts_fs.h 2004-08-14 12:55:59 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/devpts_fs.h 2006-11-08 04:57:53 +0100 @@ -30,5 +30,7 @@ static inline void devpts_pty_kill(int n #endif +#define DEVPTS_SUPER_MAGIC 0x00001cd1 + #endif /* _LINUX_DEVPTS_FS_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/ext2_fs.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/ext2_fs.h --- linux-2.6.19.1/include/linux/ext2_fs.h 2006-11-30 21:19:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/ext2_fs.h 2006-11-08 04:57:46 +0100 @@ -188,6 +188,8 @@ struct ext2_group_desc #define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ #define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ #define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ +#define EXT2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */ +#define EXT2_IUNLINK_FL FS_IUNLINK_FL /* Immutable unlink */ #define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ @@ -244,7 +246,7 @@ struct ext2_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_tag; /* Context Tag */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -276,6 +278,7 @@ struct ext2_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_tag osd2.linux2.l_i_tag #define i_reserved2 osd2.linux2.l_i_reserved2 #endif @@ -317,8 +320,9 @@ struct ext2_inode { #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ -#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ -#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ +#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ +#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ +#define EXT2_MOUNT_TAGGED (1<<24) /* Enable Context Tags */ #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt diff -NurpP --minimal linux-2.6.19.1/include/linux/ext3_fs.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/ext3_fs.h --- linux-2.6.19.1/include/linux/ext3_fs.h 2006-11-30 21:19:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/ext3_fs.h 2006-11-08 04:57:46 +0100 @@ -177,10 +177,20 @@ struct ext3_group_desc #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT3_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT3_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT3_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#endif +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_IOC_SETTAG FIOC_SETTAGJ +#endif /* * Inode dynamic state flags @@ -296,7 +306,7 @@ struct ext3_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_tag; /* Context Tag */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -330,6 +340,7 @@ struct ext3_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_tag osd2.linux2.l_i_tag #define i_reserved2 osd2.linux2.l_i_reserved2 #elif defined(__GNU__) @@ -384,6 +395,7 @@ struct ext3_inode { #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT3_MOUNT_TAGGED (1<<24) /* Enable Context Tags */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H @@ -812,6 +824,7 @@ struct buffer_head * ext3_bread (handle_ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, int create, int extend_disksize); +extern int ext3_sync_flags(struct inode *inode); extern void ext3_read_inode (struct inode *); extern int ext3_write_inode (struct inode *, int); diff -NurpP --minimal linux-2.6.19.1/include/linux/ext4_fs.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/ext4_fs.h --- linux-2.6.19.1/include/linux/ext4_fs.h 2006-11-30 21:19:37 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/ext4_fs.h 2006-12-01 23:14:55 +0100 @@ -189,11 +189,21 @@ struct ext4_group_desc #define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT4_IUNLINK_FL 0x08000000 /* Immutable unlink */ +#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define EXT4_FL_USER_VISIBLE 0x080BDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#endif +#ifdef CONFIG_VSERVER_LEGACY +#define EXT4_IOC_SETTAG FIOC_SETTAGJ +#endif /* * Inode dynamic state flags @@ -312,7 +322,8 @@ struct ext4_inode { __le16 l_i_file_acl_high; __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ - __u32 l_i_reserved2; + __u16 l_i_tag; /* Context Tag */ + __u16 l_i_reserved2; } linux2; struct { __u8 h_i_frag; /* Fragment number */ @@ -344,6 +355,7 @@ struct ext4_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_tag osd2.linux2.l_i_tag #define i_reserved2 osd2.linux2.l_i_reserved2 #elif defined(__GNU__) @@ -400,6 +412,7 @@ struct ext4_inode { #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ #define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ +#define EXT4_MOUNT_TAGGED (1<<24) /* Enable Context Tags */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H @@ -843,6 +856,7 @@ struct buffer_head * ext4_bread (handle_ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, int create, int extend_disksize); +extern int ext4_sync_flags(struct inode *inode); extern void ext4_read_inode (struct inode *); extern int ext4_write_inode (struct inode *, int); diff -NurpP --minimal linux-2.6.19.1/include/linux/fs.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/fs.h --- linux-2.6.19.1/include/linux/fs.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/fs.h 2006-12-06 06:00:54 +0100 @@ -120,6 +120,8 @@ extern int dir_notify_enable; #define MS_PRIVATE (1<<18) /* change to private */ #define MS_SLAVE (1<<19) /* change to slave */ #define MS_SHARED (1<<20) /* change to shared */ +#define MS_TAGGED (1<<24) /* use generic inode tagging */ +#define MS_TAGID (1<<25) /* use specific tag for this mount */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -146,6 +148,8 @@ extern int dir_notify_enable; #define S_NOCMTIME 128 /* Do not update file c/mtime */ #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE 512 /* Inode is fs-internal */ +#define S_BARRIER 1024 /* Barrier for chroot() */ +#define S_IUNLINK 2048 /* Immutable unlink */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -162,23 +166,35 @@ extern int dir_notify_enable; */ #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) -#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) +#define IS_RDONLY(inode) __IS_FLG(inode, MS_RDONLY) #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) +#define IS_TAGGED(inode) __IS_FLG(inode, MS_TAGGED) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) +#define IS_IUNLINK(inode) ((inode)->i_flags & S_IUNLINK) +#define IS_IXORUNLINK(inode) ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode)) #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) +#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER)) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) +#ifdef CONFIG_VSERVER_COWBL +# define IS_COW(inode) (IS_IUNLINK(inode) && IS_IMMUTABLE(inode)) +# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1)) +#else +# define IS_COW(inode) (0) +# define IS_COW_LINK(inode) (0) +#endif + /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -252,11 +268,17 @@ extern int dir_notify_enable; #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define FS_EXTENT_FL 0x00080000 /* Extents */ #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ +#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define FS_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define FS_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define FS_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - +#endif #define SYNC_FILE_RANGE_WAIT_BEFORE 1 #define SYNC_FILE_RANGE_WRITE 2 @@ -322,6 +344,7 @@ typedef void (dio_iodone_t)(struct kiocb #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 #define ATTR_FILE 8192 +#define ATTR_TAG 16384 /* * This is the Inode Attributes structure, used for notify_change(). It @@ -337,6 +360,7 @@ struct iattr { umode_t ia_mode; uid_t ia_uid; gid_t ia_gid; + tag_t ia_tag; loff_t ia_size; struct timespec ia_atime; struct timespec ia_mtime; @@ -350,6 +374,9 @@ struct iattr { struct file *ia_file; }; +#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */ +#define ATTR_FLAG_IUNLINK 1024 /* Immutable unlink */ + /* * Includes for diskquotas. */ @@ -547,6 +574,7 @@ struct inode { unsigned int i_nlink; uid_t i_uid; gid_t i_gid; + tag_t i_tag; dev_t i_rdev; loff_t i_size; struct timespec i_atime; @@ -735,6 +763,7 @@ struct file { loff_t f_pos; struct fown_struct f_owner; unsigned int f_uid, f_gid; + xid_t f_xid; struct file_ra_state f_ra; unsigned long f_version; @@ -817,6 +846,7 @@ struct file_lock { unsigned char fl_type; loff_t fl_start; loff_t fl_end; + xid_t fl_xid; struct fasync_struct * fl_fasync; /* for lease break notifications */ unsigned long fl_break_time; /* for nonblocking lease breaks */ @@ -1013,12 +1043,12 @@ static inline void unlock_super(struct s */ extern int vfs_permission(struct nameidata *, int); extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); -extern int vfs_mkdir(struct inode *, struct dentry *, int); -extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); -extern int vfs_symlink(struct inode *, struct dentry *, const char *, int); -extern int vfs_link(struct dentry *, struct inode *, struct dentry *); -extern int vfs_rmdir(struct inode *, struct dentry *); -extern int vfs_unlink(struct inode *, struct dentry *); +extern int vfs_mkdir(struct inode *, struct dentry *, int, struct nameidata *); +extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t, struct nameidata *); +extern int vfs_symlink(struct inode *, struct dentry *, const char *, int, struct nameidata *); +extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct nameidata *); +extern int vfs_rmdir(struct inode *, struct dentry *, struct nameidata *); +extern int vfs_unlink(struct inode *, struct dentry *, struct nameidata *); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); /* @@ -1158,6 +1188,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + int (*sync_flags) (struct inode *); }; struct seq_file; @@ -1173,6 +1204,7 @@ extern ssize_t vfs_readv(struct file *, unsigned long, loff_t *); extern ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long, loff_t *); +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t); /* * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called @@ -1751,6 +1783,7 @@ extern ssize_t generic_file_buffered_wri extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); +extern ssize_t generic_file_sendpage(struct file *, struct page *, int, size_t, loff_t *, int); extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, loff_t *, read_descriptor_t *, read_actor_t); @@ -1884,6 +1917,7 @@ extern int dcache_dir_open(struct inode extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, void *, filldir_t); +extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *)); extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_link(struct dentry *, struct inode *, struct dentry *); diff -NurpP --minimal linux-2.6.19.1/include/linux/init_task.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/init_task.h --- linux-2.6.19.1/include/linux/init_task.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/init_task.h 2006-11-08 04:57:40 +0100 @@ -140,6 +140,10 @@ extern struct group_info init_groups; .pi_lock = SPIN_LOCK_UNLOCKED, \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ + .xid = 0, \ + .vx_info = NULL, \ + .nid = 0, \ + .nx_info = NULL, \ } diff -NurpP --minimal linux-2.6.19.1/include/linux/ipc.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/ipc.h --- linux-2.6.19.1/include/linux/ipc.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/ipc.h 2006-11-08 20:20:37 +0100 @@ -63,6 +63,7 @@ struct kern_ipc_perm key_t key; uid_t uid; gid_t gid; + xid_t xid; uid_t cuid; gid_t cgid; mode_t mode; diff -NurpP --minimal linux-2.6.19.1/include/linux/loop.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/loop.h --- linux-2.6.19.1/include/linux/loop.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/loop.h 2006-11-08 04:57:52 +0100 @@ -45,6 +45,7 @@ struct loop_device { struct loop_func_table *lo_encryption; __u32 lo_init[2]; uid_t lo_key_owner; /* Who set the key */ + xid_t lo_xid; int (*ioctl)(struct loop_device *, int cmd, unsigned long arg); diff -NurpP --minimal linux-2.6.19.1/include/linux/major.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/major.h --- linux-2.6.19.1/include/linux/major.h 2006-06-18 04:55:19 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/major.h 2006-11-08 04:57:51 +0100 @@ -15,6 +15,7 @@ #define HD_MAJOR IDE0_MAJOR #define PTY_SLAVE_MAJOR 3 #define TTY_MAJOR 4 +#define VROOT_MAJOR 4 #define TTYAUX_MAJOR 5 #define LP_MAJOR 6 #define VCS_MAJOR 7 diff -NurpP --minimal linux-2.6.19.1/include/linux/mount.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/mount.h --- linux-2.6.19.1/include/linux/mount.h 2006-09-20 16:58:44 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/mount.h 2006-11-08 04:57:52 +0100 @@ -27,12 +27,16 @@ struct namespace; #define MNT_NOEXEC 0x04 #define MNT_NOATIME 0x08 #define MNT_NODIRATIME 0x10 +#define MNT_RDONLY 0x20 + +#define MNT_IS_RDONLY(m) ((m) && ((m)->mnt_flags & MNT_RDONLY)) #define MNT_SHRINKABLE 0x100 #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ #define MNT_PNODE_MASK 0x3000 /* propogation flag mask */ +#define MNT_TAGID 0x8000 struct vfsmount { struct list_head mnt_hash; @@ -54,6 +58,7 @@ struct vfsmount { struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ struct namespace *mnt_namespace; /* containing namespace */ int mnt_pinned; + tag_t mnt_tag; /* tagging used for vfsmount */ }; static inline struct vfsmount *mntget(struct vfsmount *mnt) diff -NurpP --minimal linux-2.6.19.1/include/linux/net.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/net.h --- linux-2.6.19.1/include/linux/net.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/net.h 2006-11-08 04:57:42 +0100 @@ -63,6 +63,7 @@ typedef enum { #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 +#define SOCK_USER_SOCKET 5 #ifndef ARCH_HAS_SOCKET_TYPES /** diff -NurpP --minimal linux-2.6.19.1/include/linux/nfs_mount.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/nfs_mount.h --- linux-2.6.19.1/include/linux/nfs_mount.h 2005-08-29 22:25:42 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/nfs_mount.h 2006-11-08 04:57:47 +0100 @@ -61,6 +61,7 @@ struct nfs_mount_data { #define NFS_MOUNT_NOACL 0x0800 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ +#define NFS_MOUNT_TAGGED 0x8000 /* context tagging */ #define NFS_MOUNT_FLAGMASK 0xFFFF #endif diff -NurpP --minimal linux-2.6.19.1/include/linux/nsproxy.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/nsproxy.h --- linux-2.6.19.1/include/linux/nsproxy.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/nsproxy.h 2006-11-30 20:55:45 +0100 @@ -51,4 +51,10 @@ static inline void exit_task_namespaces( put_nsproxy(ns); } } + +static inline void get_nsproxy(struct nsproxy *ns) +{ + atomic_inc(&ns->count); +} + #endif diff -NurpP --minimal linux-2.6.19.1/include/linux/percpu.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/percpu.h --- linux-2.6.19.1/include/linux/percpu.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/percpu.h 2006-11-08 04:57:40 +0100 @@ -11,7 +11,7 @@ /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM -#define PERCPU_ENOUGH_ROOM 32768 +#define PERCPU_ENOUGH_ROOM 65536 #endif /* diff -NurpP --minimal linux-2.6.19.1/include/linux/pid.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/pid.h --- linux-2.6.19.1/include/linux/pid.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/pid.h 2006-11-08 04:57:52 +0100 @@ -8,7 +8,8 @@ enum pid_type PIDTYPE_PID, PIDTYPE_PGID, PIDTYPE_SID, - PIDTYPE_MAX + PIDTYPE_MAX, + PIDTYPE_REALPID }; /* diff -NurpP --minimal linux-2.6.19.1/include/linux/proc_fs.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/proc_fs.h --- linux-2.6.19.1/include/linux/proc_fs.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/proc_fs.h 2006-12-09 03:44:31 +0100 @@ -54,6 +54,7 @@ struct proc_dir_entry { nlink_t nlink; uid_t uid; gid_t gid; + int vx_flags; loff_t size; struct inode_operations * proc_iops; const struct file_operations * proc_fops; @@ -247,10 +248,14 @@ extern void kclist_add(struct kcore_list union proc_op { int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); int (*proc_read)(struct task_struct *task, char *page); + int (*proc_vs_read)(char *page); + int (*proc_vxi_read)(struct vx_info *vxi, char *page); + int (*proc_nxi_read)(struct nx_info *nxi, char *page); }; struct proc_inode { struct pid *pid; + int vx_flags; int fd; union proc_op op; struct proc_dir_entry *pde; diff -NurpP --minimal linux-2.6.19.1/include/linux/reiserfs_fs.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/reiserfs_fs.h --- linux-2.6.19.1/include/linux/reiserfs_fs.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/reiserfs_fs.h 2006-11-08 04:57:46 +0100 @@ -821,6 +821,10 @@ struct stat_data_v1 { #define REISERFS_COMPR_FL FS_COMPR_FL #define REISERFS_NOTAIL_FL FS_NOTAIL_FL +/* unfortunately reiserfs sdattr is only 16 bit */ +#define REISERFS_BARRIER_FL (FS_BARRIER_FL >> 16) +#define REISERFS_IUNLINK_FL (FS_IUNLINK_FL >> 16) + /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ REISERFS_SYNC_FL | \ @@ -830,6 +834,14 @@ struct stat_data_v1 { REISERFS_COMPR_FL | \ REISERFS_NOTAIL_FL ) +#ifdef CONFIG_VSERVER_LEGACY +#define REISERFS_FL_USER_VISIBLE (REISERFS_IUNLINK_FL|0x80FF) +#define REISERFS_FL_USER_MODIFIABLE (REISERFS_IUNLINK_FL|0x80FF) +#else +#define REISERFS_FL_USER_VISIBLE 0x80FF +#define REISERFS_FL_USER_MODIFIABLE 0x80FF +#endif + /* Stat Data on disk (reiserfs version of UFS disk inode minus the address blocks) */ struct stat_data { @@ -1901,6 +1913,7 @@ static inline void reiserfs_update_sd(st void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); +int reiserfs_sync_flags(struct inode *inode); /* namei.c */ void set_de_name_and_namelen(struct reiserfs_dir_entry *de); diff -NurpP --minimal linux-2.6.19.1/include/linux/reiserfs_fs_sb.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/reiserfs_fs_sb.h --- linux-2.6.19.1/include/linux/reiserfs_fs_sb.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/reiserfs_fs_sb.h 2006-11-08 04:57:46 +0100 @@ -456,6 +456,7 @@ enum reiserfs_mount_options { REISERFS_POSIXACL, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, + REISERFS_TAGGED, /* Actions on error */ REISERFS_ERROR_PANIC, diff -NurpP --minimal linux-2.6.19.1/include/linux/sched.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/sched.h --- linux-2.6.19.1/include/linux/sched.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/sched.h 2006-12-13 08:26:09 +0100 @@ -26,6 +26,7 @@ #define CLONE_STOPPED 0x02000000 /* Start in stopped state */ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ +#define CLONE_KTHREAD 0x10000000 /* clone a kernel thread */ /* * Scheduling policies @@ -54,6 +55,7 @@ struct sched_param { #include #include #include +// #include #include #include @@ -92,7 +94,7 @@ struct futex_pi_state; * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. */ -#define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND) +#define CLONE_KERNEL (CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_KTHREAD) /* * These are the constant used to fake the fixed-point load-average @@ -144,12 +146,13 @@ extern unsigned long weighted_cpuload(co #define TASK_UNINTERRUPTIBLE 2 #define TASK_STOPPED 4 #define TASK_TRACED 8 +#define TASK_ONHOLD 16 /* in tsk->exit_state */ -#define EXIT_ZOMBIE 16 -#define EXIT_DEAD 32 +#define EXIT_ZOMBIE 32 +#define EXIT_DEAD 64 /* in tsk->state again */ -#define TASK_NONINTERACTIVE 64 -#define TASK_DEAD 128 +#define TASK_NONINTERACTIVE 128 +#define TASK_DEAD 256 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -264,27 +267,30 @@ extern void arch_unmap_area_topdown(stru * The mm counters are not protected by its page_table_lock, * so must be incremented atomically. */ -#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value) -#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member)) -#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member) -#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member) -#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member) typedef atomic_long_t mm_counter_t; +#define __set_mm_counter(mm, member, value) \ + atomic_long_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) \ + ((unsigned long)atomic_long_read(&(mm)->_##member)) #else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ /* * The mm counters are protected by its page_table_lock, * so can be incremented directly. */ -#define set_mm_counter(mm, member, value) (mm)->_##member = (value) -#define get_mm_counter(mm, member) ((mm)->_##member) -#define add_mm_counter(mm, member, value) (mm)->_##member += (value) -#define inc_mm_counter(mm, member) (mm)->_##member++ -#define dec_mm_counter(mm, member) (mm)->_##member-- typedef unsigned long mm_counter_t; +#define __set_mm_counter(mm, member, value) (mm)->_##member = (value) +#define get_mm_counter(mm, member) ((mm)->_##member) #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +#define set_mm_counter(mm, member, value) \ + vx_ ## member ## pages_sub((mm), (get_mm_counter(mm, member) - value)) +#define add_mm_counter(mm, member, value) \ + vx_ ## member ## pages_add((mm), (value)) +#define inc_mm_counter(mm, member) vx_ ## member ## pages_inc((mm)) +#define dec_mm_counter(mm, member) vx_ ## member ## pages_dec((mm)) + #define get_mm_rss(mm) \ (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) #define update_hiwater_rss(mm) do { \ @@ -343,6 +349,7 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; + struct vx_info *mm_vx_info; /* Token based thrashing protection. */ unsigned long swap_token_time; @@ -532,9 +539,10 @@ struct user_struct { /* Hash table maintenance information */ struct list_head uidhash_list; uid_t uid; + xid_t xid; }; -extern struct user_struct *find_user(uid_t); +extern struct user_struct *find_user(xid_t, uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -925,6 +933,14 @@ struct task_struct { void *security; struct audit_context *audit_context; + +/* vserver context data */ + struct vx_info *vx_info; + struct nx_info *nx_info; + + xid_t xid; + nid_t nid; + seccomp_t seccomp; /* Thread group tracking */ @@ -1221,13 +1237,18 @@ extern struct task_struct init_task; extern struct mm_struct init_mm; -#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) + +#define find_task_by_real_pid(nr) \ + find_task_by_pid_type(PIDTYPE_REALPID, nr) +#define find_task_by_pid(nr) \ + find_task_by_pid_type(PIDTYPE_PID, nr) + extern struct task_struct *find_task_by_pid_type(int type, int pid); extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(xid_t, uid_t); static inline struct user_struct *get_uid(struct user_struct *u) { atomic_inc(&u->__count); diff -NurpP --minimal linux-2.6.19.1/include/linux/shmem_fs.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/shmem_fs.h --- linux-2.6.19.1/include/linux/shmem_fs.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/shmem_fs.h 2006-11-08 04:57:53 +0100 @@ -8,6 +8,9 @@ #define SHMEM_NR_DIRECT 16 +#define TMPFS_SUPER_MAGIC 0x01021994 + + struct shmem_inode_info { spinlock_t lock; unsigned long flags; diff -NurpP --minimal linux-2.6.19.1/include/linux/stat.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/stat.h --- linux-2.6.19.1/include/linux/stat.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/stat.h 2006-11-08 04:57:46 +0100 @@ -63,6 +63,7 @@ struct kstat { unsigned int nlink; uid_t uid; gid_t gid; + tag_t tag; dev_t rdev; loff_t size; struct timespec atime; diff -NurpP --minimal linux-2.6.19.1/include/linux/sunrpc/auth.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/sunrpc/auth.h --- linux-2.6.19.1/include/linux/sunrpc/auth.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/sunrpc/auth.h 2006-11-08 04:57:47 +0100 @@ -24,6 +24,7 @@ struct auth_cred { uid_t uid; gid_t gid; + tag_t tag; struct group_info *group_info; }; diff -NurpP --minimal linux-2.6.19.1/include/linux/sunrpc/clnt.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/sunrpc/clnt.h --- linux-2.6.19.1/include/linux/sunrpc/clnt.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/sunrpc/clnt.h 2006-11-08 04:57:47 +0100 @@ -42,7 +42,8 @@ struct rpc_clnt { cl_intr : 1,/* interruptible */ cl_autobind : 1,/* use getport() */ cl_oneshot : 1,/* dispose after use */ - cl_dead : 1;/* abandoned */ + cl_dead : 1,/* abandoned */ + cl_tag : 1;/* context tagging */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ diff -NurpP --minimal linux-2.6.19.1/include/linux/syscalls.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/syscalls.h --- linux-2.6.19.1/include/linux/syscalls.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/syscalls.h 2006-11-08 04:57:51 +0100 @@ -294,6 +294,8 @@ asmlinkage long sys_symlink(const char _ asmlinkage long sys_unlink(const char __user *pathname); asmlinkage long sys_rename(const char __user *oldname, const char __user *newname); +asmlinkage long sys_copyfile(const char __user *from, const char __user *to, + umode_t mode); asmlinkage long sys_chmod(const char __user *filename, mode_t mode); asmlinkage long sys_fchmod(unsigned int fd, mode_t mode); diff -NurpP --minimal linux-2.6.19.1/include/linux/sysctl.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/sysctl.h --- linux-2.6.19.1/include/linux/sysctl.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/sysctl.h 2006-11-08 21:52:09 +0100 @@ -101,6 +101,7 @@ enum KERN_CAP_BSET=14, /* int: capability bounding set */ KERN_PANIC=15, /* int: panic timeout */ KERN_REALROOTDEV=16, /* real root device to mount after initrd */ + KERN_VSHELPER=17, /* string: path to vshelper policy agent */ KERN_SPARC_REBOOT=21, /* reboot command on Sparc */ KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */ @@ -932,6 +933,9 @@ typedef int ctl_handler (ctl_table *tabl typedef int proc_handler (ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos); +typedef int virt_handler (struct ctl_table *ctl, int write, xid_t xid, + void **datap, size_t *lenp); + extern int proc_dostring(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int proc_dointvec(ctl_table *, int, struct file *, @@ -1016,6 +1020,7 @@ struct ctl_table mode_t mode; ctl_table *child; proc_handler *proc_handler; /* Callback for text formatting */ + virt_handler *virt_handler; /* Context virtualization */ ctl_handler *strategy; /* Callback function for all r/w */ struct proc_dir_entry *de; /* /proc control block */ void *extra1; diff -NurpP --minimal linux-2.6.19.1/include/linux/sysfs.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/sysfs.h --- linux-2.6.19.1/include/linux/sysfs.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/sysfs.h 2006-11-08 04:57:53 +0100 @@ -13,6 +13,8 @@ #include #include +#define SYSFS_SUPER_MAGIC 0x62656572 + struct kobject; struct module; diff -NurpP --minimal linux-2.6.19.1/include/linux/time.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/time.h --- linux-2.6.19.1/include/linux/time.h 2006-09-20 16:58:44 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/time.h 2006-11-08 04:57:40 +0100 @@ -174,6 +174,9 @@ static inline void timespec_add_ns(struc } a->tv_nsec = ns; } + +#include + #endif /* __KERNEL__ */ #define NFDBITS __NFDBITS diff -NurpP --minimal linux-2.6.19.1/include/linux/types.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/types.h --- linux-2.6.19.1/include/linux/types.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/types.h 2006-11-08 04:57:40 +0100 @@ -39,6 +39,9 @@ typedef __kernel_uid32_t uid_t; typedef __kernel_gid32_t gid_t; typedef __kernel_uid16_t uid16_t; typedef __kernel_gid16_t gid16_t; +typedef unsigned int xid_t; +typedef unsigned int nid_t; +typedef unsigned int tag_t; #ifdef CONFIG_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vroot.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vroot.h --- linux-2.6.19.1/include/linux/vroot.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vroot.h 2006-11-08 04:57:51 +0100 @@ -0,0 +1,51 @@ + +/* + * include/linux/vroot.h + * + * written by Herbert Pötzl, 9/11/2002 + * ported to 2.6 by Herbert Pötzl, 30/12/2004 + * + * Copyright (C) 2002-2005 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + */ + +#ifndef _LINUX_VROOT_H +#define _LINUX_VROOT_H + + +#ifdef __KERNEL__ + +/* Possible states of device */ +enum { + Vr_unbound, + Vr_bound, +}; + +struct vroot_device { + int vr_number; + int vr_refcnt; + + struct semaphore vr_ctl_mutex; + struct block_device *vr_device; + int vr_state; +}; + + +typedef struct block_device *(vroot_grb_func)(struct block_device *); + +extern int register_vroot_grb(vroot_grb_func *); +extern int unregister_vroot_grb(vroot_grb_func *); + +#endif /* __KERNEL__ */ + +#define MAX_VROOT_DEFAULT 8 + +/* + * IOCTL commands --- we will commandeer 0x56 ('V') + */ + +#define VROOT_SET_DEV 0x5600 +#define VROOT_CLR_DEV 0x5601 + +#endif /* _LINUX_VROOT_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_base.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_base.h --- linux-2.6.19.1/include/linux/vs_base.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_base.h 2006-11-30 19:39:09 +0100 @@ -0,0 +1,9 @@ +#ifndef _VS_BASE_H +#define _VS_BASE_H + +#include "vserver/base.h" +#include "vserver/debug.h" + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_context.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_context.h --- linux-2.6.19.1/include/linux/vs_context.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_context.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,244 @@ +#ifndef _VS_CONTEXT_H +#define _VS_CONTEXT_H + +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/history.h" +#include "vserver/debug.h" + + +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__,__HERE__) + +static inline struct vx_info *__get_vx_info(struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (!vxi) + return NULL; + + vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_get_vx_info(vxi, _here); + + atomic_inc(&vxi->vx_usecnt); + return vxi; +} + + +extern void free_vx_info(struct vx_info *); + +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__,__HERE__) + +static inline void __put_vx_info(struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (!vxi) + return; + + vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_put_vx_info(vxi, _here); + + if (atomic_dec_and_test(&vxi->vx_usecnt)) + free_vx_info(vxi); +} + + +#define init_vx_info(p,i) __init_vx_info(p,i,__FILE__,__LINE__,__HERE__) + +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (vxi) { + vxlprintk(VXD_CBIT(xid, 3), + "init_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_init_vx_info(vxi, vxp, _here); + + atomic_inc(&vxi->vx_usecnt); + } + *vxp = vxi; +} + + +#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__,__HERE__) + +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxo; + + if (!vxi) + return; + + vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_set_vx_info(vxi, vxp, _here); + + atomic_inc(&vxi->vx_usecnt); + vxo = xchg(vxp, vxi); + BUG_ON(vxo); +} + + +#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__,__HERE__) + +static inline void __clr_vx_info(struct vx_info **vxp, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxo; + + vxo = xchg(vxp, NULL); + if (!vxo) + return; + + vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])", + vxo, vxo?vxo->vx_id:0, + vxo?atomic_read(&vxo->vx_usecnt):0, + _file, _line); + __vxh_clr_vx_info(vxo, vxp, _here); + + if (atomic_dec_and_test(&vxo->vx_usecnt)) + free_vx_info(vxo); +} + + +#define claim_vx_info(v,p) \ + __claim_vx_info(v,p,__FILE__,__LINE__,__HERE__) + +static inline void __claim_vx_info(struct vx_info *vxi, + struct task_struct *task, + const char *_file, int _line, void *_here) +{ + vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_tasks):0, + task, _file, _line); + __vxh_claim_vx_info(vxi, task, _here); + + atomic_inc(&vxi->vx_tasks); +} + + +extern void unhash_vx_info(struct vx_info *); + +#define release_vx_info(v,p) \ + __release_vx_info(v,p,__FILE__,__LINE__,__HERE__) + +static inline void __release_vx_info(struct vx_info *vxi, + struct task_struct *task, + const char *_file, int _line, void *_here) +{ + vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_tasks):0, + task, _file, _line); + __vxh_release_vx_info(vxi, task, _here); + + might_sleep(); + + if (atomic_dec_and_test(&vxi->vx_tasks)) + unhash_vx_info(vxi); +} + + +#define task_get_vx_info(p) \ + __task_get_vx_info(p,__FILE__,__LINE__,__HERE__) + +static inline struct vx_info *__task_get_vx_info(struct task_struct *p, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxi; + + task_lock(p); + vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)", + p, _file, _line); + vxi = __get_vx_info(p->vx_info, _file, _line, _here); + task_unlock(p); + return vxi; +} + + +static inline void __wakeup_vx_info(struct vx_info *vxi) +{ + if (waitqueue_active(&vxi->vx_wait)) + wake_up_interruptible(&vxi->vx_wait); +} + + +#define enter_vx_info(v,s) __enter_vx_info(v,s,__FILE__,__LINE__) + +static inline void __enter_vx_info(struct vx_info *vxi, + struct vx_info_save *vxis, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]", + vxi, vxi ? vxi->vx_id : 0, vxis, current, + current->xid, current->vx_info, _file, _line); + vxis->vxi = xchg(¤t->vx_info, vxi); + vxis->xid = current->xid; + current->xid = vxi ? vxi->vx_id : 0; +} + +#define leave_vx_info(s) __leave_vx_info(s,__FILE__,__LINE__) + +static inline void __leave_vx_info(struct vx_info_save *vxis, + const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]", + vxis, vxis->xid, vxis->vxi, current, + current->xid, current->vx_info, _file, _line); + (void)xchg(¤t->vx_info, vxis->vxi); + current->xid = vxis->xid; +} + + +static inline void __enter_vx_admin(struct vx_info_save *vxis) +{ + vxis->vxi = xchg(¤t->vx_info, NULL); + vxis->xid = xchg(¤t->xid, (xid_t)0); +} + +static inline void __leave_vx_admin(struct vx_info_save *vxis) +{ + (void)xchg(¤t->xid, vxis->xid); + (void)xchg(¤t->vx_info, vxis->vxi); +} + +extern void exit_vx_info(struct task_struct *, int); +extern void exit_vx_info_early(struct task_struct *, int); + + +static inline +struct task_struct *vx_child_reaper(struct task_struct *p) +{ + struct vx_info *vxi = p->vx_info; + struct task_struct *reaper = child_reaper; + + if (!vxi) + goto out; + + BUG_ON(!p->vx_info->vx_reaper); + + /* child reaper for the guest reaper */ + if (vxi->vx_reaper == p) + goto out; + + reaper = vxi->vx_reaper; +out: + vxdprintk(VXD_CBIT(xid, 3), + "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]\n", + p, p->xid, p->pid, reaper, reaper->xid, reaper->pid); + return reaper; +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_cowbl.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_cowbl.h --- linux-2.6.19.1/include/linux/vs_cowbl.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_cowbl.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,44 @@ +#ifndef _VS_COWBL_H +#define _VS_COWBL_H + +#include +#include +#include + +extern struct dentry *cow_break_link(const char *pathname); + +static inline int cow_check_and_break(struct nameidata *nd) +{ + struct inode *inode = nd->dentry->d_inode; + int error = 0; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt)) + return -EROFS; + if (IS_COW(inode)) { + if (IS_COW_LINK(inode)) { + struct dentry *new_dentry, *old_dentry = nd->dentry; + char *path, *buf; + + buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!buf) { + return -ENOMEM; + } + path = d_path(nd->dentry, nd->mnt, buf, PATH_MAX); + new_dentry = cow_break_link(path); + kfree(buf); + if (!IS_ERR(new_dentry)) { + nd->dentry = new_dentry; + dput(old_dentry); + } else + error = PTR_ERR(new_dentry); + } else { + inode->i_flags &= ~(S_IUNLINK|S_IMMUTABLE); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + } + return error; +} + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_cvirt.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_cvirt.h --- linux-2.6.19.1/include/linux/vs_cvirt.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_cvirt.h 2006-11-30 19:12:40 +0100 @@ -0,0 +1,49 @@ +#ifndef _VS_CVIRT_H +#define _VS_CVIRT_H + +#include "vserver/cvirt.h" +#include "vserver/context.h" +#include "vserver/base.h" +#include "vserver/debug.h" + + +static inline void vx_activate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_inc(&vxi->cvirt.nr_running); + } +} + +static inline void vx_deactivate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_dec(&vxi->cvirt.nr_running); + } +} + +static inline void vx_uninterruptible_inc(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) + atomic_inc(&vxi->cvirt.nr_uninterruptible); +} + +static inline void vx_uninterruptible_dec(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) + atomic_dec(&vxi->cvirt.nr_uninterruptible); +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_dlimit.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_dlimit.h --- linux-2.6.19.1/include/linux/vs_dlimit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_dlimit.h 2006-12-01 23:37:57 +0100 @@ -0,0 +1,214 @@ +#ifndef _VS_DLIMIT_H +#define _VS_DLIMIT_H + +#include "vserver/dlimit.h" +#include "vserver/base.h" +#include "vserver/debug.h" + + +#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__) + +static inline struct dl_info *__get_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return NULL; + vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_tag:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + atomic_inc(&dli->dl_usecnt); + return dli; +} + + +#define free_dl_info(i) \ + call_rcu(&i->dl_rcu, rcu_free_dl_info); + +#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__) + +static inline void __put_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return; + vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_tag:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&dli->dl_usecnt)) + free_dl_info(dli); +} + + +#define __dlimit_char(d) ((d)?'*':' ') + +static inline int __dl_alloc_space(struct super_block *sb, + tag_t tag, dlsize_t nr, const char *file, int line) +{ + struct dl_info *dli = NULL; + int ret = 0; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, tag); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_space_used + nr > dli->dl_space_total); + if (!ret) + dli->dl_space_used += nr; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "ALLOC (%p,#%d)%c %lld bytes (%d)", + sb, tag, __dlimit_char(dli), (long long)nr, + ret, file, line); + return ret; +} + +static inline void __dl_free_space(struct super_block *sb, + tag_t tag, dlsize_t nr, const char *_file, int _line) +{ + struct dl_info *dli = NULL; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, tag); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_space_used > nr) + dli->dl_space_used -= nr; + else + dli->dl_space_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "FREE (%p,#%d)%c %lld bytes", + sb, tag, __dlimit_char(dli), (long long)nr, + _file, _line); +} + +static inline int __dl_alloc_inode(struct super_block *sb, + tag_t tag, const char *_file, int _line) +{ + struct dl_info *dli; + int ret = 0; + + dli = locate_dl_info(sb, tag); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_inodes_used >= dli->dl_inodes_total); + if (!ret) + dli->dl_inodes_used++; +#if 0 + else + vxwprintk("DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d", + sb, tag, + dli->dl_inodes_used, dli->dl_inodes_total, + file, line); +#endif + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "ALLOC (%p,#%d)%c inode (%d)", + sb, tag, __dlimit_char(dli), ret, _file, _line); + return ret; +} + +static inline void __dl_free_inode(struct super_block *sb, + tag_t tag, const char *_file, int _line) +{ + struct dl_info *dli; + + dli = locate_dl_info(sb, tag); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_used > 1) + dli->dl_inodes_used--; + else + dli->dl_inodes_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "FREE (%p,#%d)%c inode", + sb, tag, __dlimit_char(dli), _file, _line); +} + +static inline void __dl_adjust_block(struct super_block *sb, tag_t tag, + unsigned long long *free_blocks, unsigned long long *root_blocks, + const char *_file, int _line) +{ + struct dl_info *dli; + uint64_t broot, bfree; + + dli = locate_dl_info(sb, tag); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + broot = (dli->dl_space_total - + (dli->dl_space_total >> 10) * dli->dl_nrlmult) + >> sb->s_blocksize_bits; + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + spin_unlock(&dli->dl_lock); + + vxlprintk(VXD_CBIT(dlim, 2), + "ADJUST: %lld,%lld on %lld,%lld [mult=%d]", + (long long)bfree, (long long)broot, + *free_blocks, *root_blocks, dli->dl_nrlmult, + _file, _line); + if (free_blocks) { + if (*free_blocks > bfree) + *free_blocks = bfree; + } + if (root_blocks) { + if (*root_blocks > broot) + *root_blocks = broot; + } + put_dl_info(dli); +} + +#define DLIMIT_ALLOC_SPACE(in, bytes) \ + __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_SPACE(in, bytes) \ + __dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \ + __FILE__, __LINE__ ) + +#define DLIMIT_ALLOC_BLOCK(in, nr) \ + __dl_alloc_space((in)->i_sb, (in)->i_tag, \ + ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_BLOCK(in, nr) \ + __dl_free_space((in)->i_sb, (in)->i_tag, \ + ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \ + __FILE__, __LINE__ ) + + +#define DLIMIT_ALLOC_INODE(in) \ + __dl_alloc_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ ) + +#define DLIMIT_FREE_INODE(in) \ + __dl_free_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ ) + + +#define DLIMIT_ADJUST_BLOCK(sb, tag, fb, rb) \ + __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ ) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_limit.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_limit.h --- linux-2.6.19.1/include/linux/vs_limit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_limit.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,140 @@ +#ifndef _VS_LIMIT_H +#define _VS_LIMIT_H + +#include "vserver/limit.h" +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/debug.h" +#include "vserver/context.h" +#include "vserver/limit_int.h" + + +#define vx_acc_cres(v,d,p,r) \ + __vx_acc_cres(v, r, d, p, __FILE__, __LINE__) + +#define vx_acc_cres_cond(x,d,p,r) \ + __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \ + r, d, p, __FILE__, __LINE__) + + +#define vx_add_cres(v,a,p,r) \ + __vx_add_cres(v, r, a, p, __FILE__, __LINE__) +#define vx_sub_cres(v,a,p,r) vx_add_cres(v,-(a),p,r) + +#define vx_add_cres_cond(x,a,p,r) \ + __vx_add_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \ + r, a, p, __FILE__, __LINE__) +#define vx_sub_cres_cond(x,a,p,r) vx_add_cres_cond(x,-(a),p,r) + + +/* process and file limits */ + +#define vx_nproc_inc(p) \ + vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC) + +#define vx_nproc_dec(p) \ + vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC) + +#define vx_files_inc(f) \ + vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE) + +#define vx_files_dec(f) \ + vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE) + +#define vx_locks_inc(l) \ + vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS) + +#define vx_locks_dec(l) \ + vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS) + +#define vx_openfd_inc(f) \ + vx_acc_cres(current->vx_info, 1, (void *)(long)(f), VLIMIT_OPENFD) + +#define vx_openfd_dec(f) \ + vx_acc_cres(current->vx_info,-1, (void *)(long)(f), VLIMIT_OPENFD) + + +#define vx_cres_avail(v,n,r) \ + __vx_cres_avail(v, r, n, __FILE__, __LINE__) + + +#define vx_nproc_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_NPROC) + +#define vx_files_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_NOFILE) + +#define vx_locks_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_LOCKS) + +#define vx_openfd_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_OPENFD) + + +/* dentry limits */ + +#define vx_dentry_inc(d) do { \ + if (atomic_read(&d->d_count) == 1) \ + vx_acc_cres(current->vx_info, 1, d, VLIMIT_DENTRY); \ + } while (0) + +#define vx_dentry_dec(d) do { \ + if (atomic_read(&d->d_count) == 0) \ + vx_acc_cres(current->vx_info,-1, d, VLIMIT_DENTRY); \ + } while (0) + +#define vx_dentry_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_DENTRY) + + +/* socket limits */ + +#define vx_sock_inc(s) \ + vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK) + +#define vx_sock_dec(s) \ + vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK) + +#define vx_sock_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_NSOCK) + + +/* ipc resource limits */ + +#define vx_ipcmsg_add(v,u,a) \ + vx_add_cres(v, a, u, RLIMIT_MSGQUEUE) + +#define vx_ipcmsg_sub(v,u,a) \ + vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE) + +#define vx_ipcmsg_avail(v,a) \ + vx_cres_avail(v, a, RLIMIT_MSGQUEUE) + + +#define vx_ipcshm_add(v,k,a) \ + vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM) + +#define vx_ipcshm_sub(v,k,a) \ + vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM) + +#define vx_ipcshm_avail(v,a) \ + vx_cres_avail(v, a, VLIMIT_SHMEM) + + +#define vx_semary_inc(a) \ + vx_acc_cres(current->vx_info, 1, a, VLIMIT_SEMARY) + +#define vx_semary_dec(a) \ + vx_acc_cres(current->vx_info,-1, a, VLIMIT_SEMARY) + + +#define vx_nsems_add(a,n) \ + vx_add_cres(current->vx_info, n, a, VLIMIT_NSEMS) + +#define vx_nsems_sub(a,n) \ + vx_sub_cres(current->vx_info, n, a, VLIMIT_NSEMS) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_memory.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_memory.h --- linux-2.6.19.1/include/linux/vs_memory.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_memory.h 2006-11-30 19:31:41 +0100 @@ -0,0 +1,159 @@ +#ifndef _VS_MEMORY_H +#define _VS_MEMORY_H + +#include "vserver/limit.h" +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/debug.h" +#include "vserver/context.h" +#include "vserver/limit_int.h" + + +#define __acc_add_long(a,v) (*(v) += (a)) +#define __acc_inc_long(v) (++*(v)) +#define __acc_dec_long(v) (--*(v)) + +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS +#define __acc_add_atomic(a,v) atomic_long_add(a,v) +#define __acc_inc_atomic(v) atomic_long_inc(v) +#define __acc_dec_atomic(v) atomic_long_dec(v) +#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +#define __acc_add_atomic(a,v) __acc_add_long(a,v) +#define __acc_inc_atomic(v) __acc_inc_long(v) +#define __acc_dec_atomic(v) __acc_dec_long(v) +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ + + +#define vx_acc_page(m,d,v,r) do { \ + if ((d) > 0) \ + __acc_inc_long(&(m->v)); \ + else \ + __acc_dec_long(&(m->v)); \ + __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__); \ +} while (0) + +#define vx_acc_page_atomic(m,d,v,r) do { \ + if ((d) > 0) \ + __acc_inc_atomic(&(m->v)); \ + else \ + __acc_dec_atomic(&(m->v)); \ + __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__); \ +} while (0) + + +#define vx_acc_pages(m,p,v,r) do { \ + unsigned long __p = (p); \ + __acc_add_long(__p, &(m->v)); \ + __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__); \ +} while (0) + +#define vx_acc_pages_atomic(m,p,v,r) do { \ + unsigned long __p = (p); \ + __acc_add_atomic(__p, &(m->v)); \ + __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__); \ +} while (0) + + + +#define vx_acc_vmpage(m,d) \ + vx_acc_page(m, d, total_vm, RLIMIT_AS) +#define vx_acc_vmlpage(m,d) \ + vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_file_rsspage(m,d) \ + vx_acc_page_atomic(m, d, _file_rss, VLIMIT_MAPPED) +#define vx_acc_anon_rsspage(m,d) \ + vx_acc_page_atomic(m, d, _anon_rss, VLIMIT_ANON) + +#define vx_acc_vmpages(m,p) \ + vx_acc_pages(m, p, total_vm, RLIMIT_AS) +#define vx_acc_vmlpages(m,p) \ + vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_file_rsspages(m,p) \ + vx_acc_pages_atomic(m, p, _file_rss, VLIMIT_MAPPED) +#define vx_acc_anon_rsspages(m,p) \ + vx_acc_pages_atomic(m, p, _anon_rss, VLIMIT_ANON) + +#define vx_pages_add(s,r,p) __vx_add_cres(s, r, p, 0, __FILE__, __LINE__) +#define vx_pages_sub(s,r,p) vx_pages_add(s, r, -(p)) + +#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1) +#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1) +#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p) +#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p)) + +#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1) +#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1) +#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p) +#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p)) + +#define vx_file_rsspages_inc(m) vx_acc_file_rsspage(m, 1) +#define vx_file_rsspages_dec(m) vx_acc_file_rsspage(m,-1) +#define vx_file_rsspages_add(m,p) vx_acc_file_rsspages(m, p) +#define vx_file_rsspages_sub(m,p) vx_acc_file_rsspages(m,-(p)) + +#define vx_anon_rsspages_inc(m) vx_acc_anon_rsspage(m, 1) +#define vx_anon_rsspages_dec(m) vx_acc_anon_rsspage(m,-1) +#define vx_anon_rsspages_add(m,p) vx_acc_anon_rsspages(m, p) +#define vx_anon_rsspages_sub(m,p) vx_acc_anon_rsspages(m,-(p)) + + +#define vx_pages_avail(m,p,r) \ + __vx_cres_avail((m)->mm_vx_info, r, p, __FILE__, __LINE__) + +#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS) +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK) +#define vx_anon_avail(m,p) vx_pages_avail(m, p, VLIMIT_ANON) +#define vx_mapped_avail(m,p) vx_pages_avail(m, p, VLIMIT_MAPPED) + +#define vx_rss_avail(m,p) \ + __vx_cres_array_avail((m)->mm_vx_info, VLA_RSS, p, __FILE__, __LINE__) + + +enum { + VXPT_UNKNOWN = 0, + VXPT_ANON, + VXPT_NONE, + VXPT_FILE, + VXPT_SWAP, + VXPT_WRITE +}; + +#if 0 +#define vx_page_fault(mm,vma,type,ret) +#else + +static inline +void __vx_page_fault(struct mm_struct *mm, + struct vm_area_struct *vma, int type, int ret) +{ + struct vx_info *vxi = mm->mm_vx_info; + int what; +/* + static char *page_type[6] = + { "UNKNOWN", "ANON","NONE", "FILE", "SWAP", "WRITE" }; + static char *page_what[4] = + { "FAULT_OOM", "FAULT_SIGBUS", "FAULT_MINOR", "FAULT_MAJOR" }; +*/ + + if (!vxi) + return; + + what = (ret & 0x3); + +/* printk("[%d] page[%d][%d] %2x %s %s\n", vxi->vx_id, + type, what, ret, page_type[type], page_what[what]); +*/ + if (ret & VM_FAULT_WRITE) + what |= 0x4; + atomic_inc(&vxi->cacct.page[type][what]); +} + +#define vx_page_fault(mm,vma,type,ret) __vx_page_fault(mm,vma,type,ret) +#endif + + +extern unsigned long vx_badness(struct task_struct *task, struct mm_struct *mm); + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_network.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_network.h --- linux-2.6.19.1/include/linux/vs_network.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_network.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,183 @@ +#ifndef _NX_VS_NETWORK_H +#define _NX_VS_NETWORK_H + +#include "vserver/context.h" +#include "vserver/network.h" +#include "vserver/base.h" +#include "vserver/debug.h" + + +#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__) + +static inline struct nx_info *__get_nx_info(struct nx_info *nxi, + const char *_file, int _line) +{ + if (!nxi) + return NULL; + + vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + return nxi; +} + + +extern void free_nx_info(struct nx_info *); + +#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__) + +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line) +{ + if (!nxi) + return; + + vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + if (atomic_dec_and_test(&nxi->nx_usecnt)) + free_nx_info(nxi); +} + + +#define init_nx_info(p,i) __init_nx_info(p,i,__FILE__,__LINE__) + +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + if (nxi) { + vxlprintk(VXD_CBIT(nid, 3), + "init_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + } + *nxp = nxi; +} + + +#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__) + +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + struct nx_info *nxo; + + if (!nxi) + return; + + vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + nxo = xchg(nxp, nxi); + BUG_ON(nxo); +} + +#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__) + +static inline void __clr_nx_info(struct nx_info **nxp, + const char *_file, int _line) +{ + struct nx_info *nxo; + + nxo = xchg(nxp, NULL); + if (!nxo) + return; + + vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])", + nxo, nxo?nxo->nx_id:0, + nxo?atomic_read(&nxo->nx_usecnt):0, + _file, _line); + + if (atomic_dec_and_test(&nxo->nx_usecnt)) + free_nx_info(nxo); +} + + +#define claim_nx_info(v,p) __claim_nx_info(v,p,__FILE__,__LINE__) + +static inline void __claim_nx_info(struct nx_info *nxi, + struct task_struct *task, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_tasks):0, + task, _file, _line); + + atomic_inc(&nxi->nx_tasks); +} + + +extern void unhash_nx_info(struct nx_info *); + +#define release_nx_info(v,p) __release_nx_info(v,p,__FILE__,__LINE__) + +static inline void __release_nx_info(struct nx_info *nxi, + struct task_struct *task, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_tasks):0, + task, _file, _line); + + might_sleep(); + + if (atomic_dec_and_test(&nxi->nx_tasks)) + unhash_nx_info(nxi); +} + + +#define task_get_nx_info(i) __task_get_nx_info(i,__FILE__,__LINE__) + +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct nx_info *nxi; + + task_lock(p); + vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)", + p, _file, _line); + nxi = __get_nx_info(p->nx_info, _file, _line); + task_unlock(p); + return nxi; +} + + + + +static inline int addr_in_nx_info(struct nx_info *nxi, uint32_t addr) +{ + int n,i; + + if (!nxi) + return 1; + + n = nxi->nbipv4; + if (n && (nxi->ipv4[0] == 0)) + return 1; + for (i=0; iipv4[i] == addr) + return 1; + } + return 0; +} + +static inline void exit_nx_info(struct task_struct *p) +{ + if (p->nx_info) + release_nx_info(p->nx_info, p); +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_pid.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_pid.h --- linux-2.6.19.1/include/linux/vs_pid.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_pid.h 2006-12-04 05:21:53 +0100 @@ -0,0 +1,108 @@ +#ifndef _VS_PID_H +#define _VS_PID_H + +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/debug.h" + + +/* pid faking stuff */ + + +#define vx_info_map_pid(v,p) \ + __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p) +#define vx_map_pid(p) vx_info_map_pid(current->vx_info, p) +#define vx_map_tgid(p) vx_map_pid(p) + +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_map_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid && pid == vxi->vx_initpid)?1:pid, + func, file, line); + if (pid == 0) + return 0; + if (pid == vxi->vx_initpid) + return 1; + } + return pid; +} + +#define vx_info_rmap_pid(v,p) \ + __vx_info_rmap_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p) +#define vx_rmap_tgid(p) vx_rmap_pid(p) + +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_rmap_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid == 1)?vxi->vx_initpid:pid, + func, file, line); + if ((pid == 1) && vxi->vx_initpid) + return vxi->vx_initpid; + if (pid == vxi->vx_initpid) + return ~0U; + } + return pid; +} + + +#define VXF_FAKE_INIT (VXF_INFO_INIT|VXF_STATE_INIT) + +static inline +int vx_proc_task_visible(struct task_struct *task) +{ + if ((task->pid == 1) && + !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT)) + /* show a blend through init */ + goto visible; + if (vx_check(vx_task_xid(task), VS_WATCH|VS_IDENT)) + goto visible; + return 0; +visible: + return 1; +} + +static inline +struct task_struct *vx_find_proc_task_by_pid(int pid) +{ + struct task_struct *task = find_task_by_pid(pid); + + if (task && !vx_proc_task_visible(task)) { + vxdprintk(VXD_CBIT(misc, 6), + "dropping task (find) %p[#%u,%u] for %p[#%u,%u]", + task, task->xid, task->pid, + current, current->xid, current->pid); + task = NULL; + } + return task; +} + +static inline +struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid) +{ + struct task_struct *task = get_pid_task(pid, PIDTYPE_PID); + + if (task && !vx_proc_task_visible(task)) { + vxdprintk(VXD_CBIT(misc, 6), + "dropping task (get) %p[#%u,%u] for %p[#%u,%u]", + task, task->xid, task->pid, + current, current->xid, current->pid); + put_task_struct(task); + task = NULL; + } + return task; +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_sched.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_sched.h --- linux-2.6.19.1/include/linux/vs_sched.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_sched.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,109 @@ +#ifndef _VS_SCHED_H +#define _VS_SCHED_H + +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/sched.h" + + +#define VAVAVOOM_RATIO 50 + +#define MAX_PRIO_BIAS 20 +#define MIN_PRIO_BIAS -20 + + +#ifdef CONFIG_VSERVER_HARDCPU + +/* + * effective_prio - return the priority that is based on the static + * priority but is modified by bonuses/penalties. + * + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] + * into a -4 ... 0 ... +4 bonus/penalty range. + * + * Additionally, we scale another amount based on the number of + * CPU tokens currently held by the context, if the process is + * part of a context (and the appropriate SCHED flag is set). + * This ranges from -5 ... 0 ... +15, quadratically. + * + * So, the total bonus is -9 .. 0 .. +19 + * We use ~50% of the full 0...39 priority range so that: + * + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. + * unless that context is far exceeding its CPU allocation. + * + * Both properties are important to certain workloads. + */ +static inline +int vx_effective_vavavoom(struct _vx_sched_pc *sched_pc, int max_prio) +{ + int vavavoom, max; + + /* lots of tokens = lots of vavavoom + * no tokens = no vavavoom */ + if ((vavavoom = sched_pc->tokens) >= 0) { + max = sched_pc->tokens_max; + vavavoom = max - vavavoom; + max = max * max; + vavavoom = max_prio * VAVAVOOM_RATIO / 100 + * (vavavoom*vavavoom - (max >> 2)) / max; + return vavavoom; + } + return 0; +} + + +static inline +int vx_adjust_prio(struct task_struct *p, int prio, int max_user) +{ + struct vx_info *vxi = p->vx_info; + + if (!vxi) + return prio; + + if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) { + struct _vx_sched_pc *sched_pc = &vx_cpu(vxi, sched_pc); + int vavavoom = vx_effective_vavavoom(sched_pc, max_user); + + vxi->sched.vavavoom = vavavoom; + prio += vavavoom; + } + prio += vxi->sched.prio_bias; + return prio; +} + +#else /* !CONFIG_VSERVER_HARDCPU */ + +static inline +int vx_adjust_prio(struct task_struct *p, int prio, int max_user) +{ + struct vx_info *vxi = p->vx_info; + + if (vxi) + prio += vxi->sched.prio_bias; + return prio; +} + +#endif /* CONFIG_VSERVER_HARDCPU */ + + +static inline void vx_account_user(struct vx_info *vxi, + cputime_t cputime, int nice) +{ + if (!vxi) + return; + vx_cpu(vxi, sched_pc).user_ticks += cputime; +} + +static inline void vx_account_system(struct vx_info *vxi, + cputime_t cputime, int idle) +{ + if (!vxi) + return; + vx_cpu(vxi, sched_pc).sys_ticks += cputime; +} + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_socket.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_socket.h --- linux-2.6.19.1/include/linux/vs_socket.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_socket.h 2006-11-30 19:13:42 +0100 @@ -0,0 +1,67 @@ +#ifndef _VS_SOCKET_H +#define _VS_SOCKET_H + +#include "vserver/debug.h" +#include "vserver/base.h" +#include "vserver/cacct.h" +#include "vserver/context.h" + + +/* socket accounting */ + +#include + +static inline int vx_sock_type(int family) +{ + switch (family) { + case PF_UNSPEC: + return VXA_SOCK_UNSPEC; + case PF_UNIX: + return VXA_SOCK_UNIX; + case PF_INET: + return VXA_SOCK_INET; + case PF_INET6: + return VXA_SOCK_INET6; + case PF_PACKET: + return VXA_SOCK_PACKET; + default: + return VXA_SOCK_OTHER; + } +} + +#define vx_acc_sock(v,f,p,s) \ + __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__) + +static inline void __vx_acc_sock(struct vx_info *vxi, + int family, int pos, int size, char *file, int line) +{ + if (vxi) { + int type = vx_sock_type(family); + + atomic_inc(&vxi->cacct.sock[type][pos].count); + atomic_add(size, &vxi->cacct.sock[type][pos].total); + } +} + +#define vx_sock_recv(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s)) +#define vx_sock_send(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s)) +#define vx_sock_fail(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s)) + + +#define sock_vx_init(s) do { \ + (s)->sk_xid = 0; \ + (s)->sk_vx_info = NULL; \ + } while (0) + +#define sock_nx_init(s) do { \ + (s)->sk_nid = 0; \ + (s)->sk_nx_info = NULL; \ + } while (0) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_tag.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_tag.h --- linux-2.6.19.1/include/linux/vs_tag.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_tag.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,44 @@ +#ifndef _VS_TAG_H +#define _VS_TAG_H + +#include + +/* check conditions */ + +#define DX_ADMIN 0x0001 +#define DX_WATCH 0x0002 +#define DX_HOSTID 0x0008 + +#define DX_IDENT 0x0010 + +#define DX_ARG_MASK 0x0010 + + +#define dx_task_tag(t) ((t)->xid) + +#define dx_current_tag() dx_task_tag(current) + +#define dx_check(c,m) __dx_check(dx_current_tag(),c,m) + +#define dx_weak_check(c,m) ((m) ? dx_check(c,m) : 1) + + +/* + * check current context for ADMIN/WATCH and + * optionally against supplied argument + */ +static inline int __dx_check(tag_t cid, tag_t id, unsigned int mode) +{ + if (mode & DX_ARG_MASK) { + if ((mode & DX_IDENT) && + (id == cid)) + return 1; + } + return (((mode & DX_ADMIN) && (cid == 0)) || + ((mode & DX_WATCH) && (cid == 1)) || + ((mode & DX_HOSTID) && (id == 0))); +} + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vs_time.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_time.h --- linux-2.6.19.1/include/linux/vs_time.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vs_time.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,19 @@ +#ifndef _VS_TIME_H +#define _VS_TIME_H + + +/* time faking stuff */ + +#ifdef CONFIG_VSERVER_VTIME + +extern void vx_gettimeofday(struct timeval *tv); +extern int vx_settimeofday(struct timespec *ts); + +#else +#define vx_gettimeofday(t) do_gettimeofday(t) +#define vx_settimeofday(t) do_settimeofday(t) +#endif + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/Kbuild linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/Kbuild --- linux-2.6.19.1/include/linux/vserver/Kbuild 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/Kbuild 2006-12-06 21:03:02 +0100 @@ -0,0 +1,9 @@ + +unifdef-y += context_cmd.h network_cmd.h space_cmd.h \ + cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \ + inode_cmd.h sched_cmd.h signal_cmd.h debug_cmd.h + +unifdef-y += switch.h network.h monitor.h + +unifdef-y += legacy.h + diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/base.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/base.h --- linux-2.6.19.1/include/linux/vserver/base.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/base.h 2006-11-30 19:29:45 +0100 @@ -0,0 +1,237 @@ +#ifndef _VX_BASE_H +#define _VX_BASE_H + + +/* context state changes */ + +enum { + VSC_STARTUP = 1, + VSC_SHUTDOWN, + + VSC_NETUP, + VSC_NETDOWN, +}; + + +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */ + +#ifdef CONFIG_VSERVER_DYNAMIC_IDS +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */ +#else +#define MIN_D_CONTEXT 65536 +#endif + +/* check conditions */ + +#define VS_ADMIN 0x0001 +#define VS_WATCH 0x0002 +#define VS_HIDE 0x0004 +#define VS_HOSTID 0x0008 + +#define VS_IDENT 0x0010 +#define VS_EQUIV 0x0020 +#define VS_PARENT 0x0040 +#define VS_CHILD 0x0080 + +#define VS_ARG_MASK 0x00F0 + +#define VS_DYNAMIC 0x0100 +#define VS_STATIC 0x0200 + +#define VS_ATR_MASK 0x0F00 + +#ifdef CONFIG_VSERVER_PRIVACY +#define VS_ADMIN_P (0) +#define VS_WATCH_P (0) +#else +#define VS_ADMIN_P VS_ADMIN +#define VS_WATCH_P VS_WATCH +#endif + +#define VS_HARDIRQ 0x1000 +#define VS_SOFTIRQ 0x2000 +#define VS_IRQ 0x4000 + +#define VS_IRQ_MASK 0xF000 + +#include + +/* + * check current context for ADMIN/WATCH and + * optionally against supplied argument + */ +static inline int __vs_check(int cid, int id, unsigned int mode) +{ + if (mode & VS_ARG_MASK) { + if ((mode & VS_IDENT) && + (id == cid)) + return 1; + } + if (mode & VS_ATR_MASK) { + if ((mode & VS_DYNAMIC) && + (id >= MIN_D_CONTEXT) && + (id <= MAX_S_CONTEXT)) + return 1; + if ((mode & VS_STATIC) && + (id > 1) && (id < MIN_D_CONTEXT)) + return 1; + } + if (mode & VS_IRQ_MASK) { + if ((mode & VS_IRQ) && unlikely(in_interrupt())) + return 1; + if ((mode & VS_HARDIRQ) && unlikely(in_irq())) + return 1; + if ((mode & VS_SOFTIRQ) && unlikely(in_softirq())) + return 1; + } + return (((mode & VS_ADMIN) && (cid == 0)) || + ((mode & VS_WATCH) && (cid == 1)) || + ((mode & VS_HOSTID) && (id == 0))); +} + +#define vx_task_xid(t) ((t)->xid) + +#define vx_current_xid() vx_task_xid(current) + +#define current_vx_info() (current->vx_info) + + +#define vx_check(c,m) __vs_check(vx_current_xid(),c,(m)|VS_IRQ) + +#define vx_weak_check(c,m) ((m) ? vx_check(c,m) : 1) + + +#define nx_task_nid(t) ((t)->nid) + +#define nx_current_nid() nx_task_nid(current) + +#define current_nx_info() (current->nx_info) + + +#define nx_check(c,m) __vs_check(nx_current_nid(),c,m) + +#define nx_weak_check(c,m) ((m) ? nx_check(c,m) : 1) + + + +/* generic flag merging */ + +#define vs_check_flags(v,m,f) (((v) & (m)) ^ (f)) + +#define vs_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m))) + +#define vs_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m))) + +#define vs_check_bit(v,n) ((v) & (1LL << (n))) + + +/* context flags */ + +#define __vx_flags(v) ((v) ? (v)->vx_flags : 0) + +#define vx_current_flags() __vx_flags(current->vx_info) + +#define vx_info_flags(v,m,f) \ + vs_check_flags(__vx_flags(v),(m),(f)) + +#define task_vx_flags(t,m,f) \ + ((t) && vx_info_flags((t)->vx_info, (m), (f))) + +#define vx_flags(m,f) vx_info_flags(current->vx_info,(m),(f)) + + +/* context caps */ + +#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0) + +#define vx_current_ccaps() __vx_ccaps(current->vx_info) + +#define vx_info_ccaps(v,c) (__vx_ccaps(v) & (c)) + +#define vx_ccaps(c) vx_info_ccaps(current->vx_info,(c)) + + + +/* network flags */ + +#define __nx_flags(v) ((v) ? (v)->nx_flags : 0) + +#define nx_current_flags() __nx_flags(current->nx_info) + +#define nx_info_flags(v,m,f) \ + vs_check_flags(__nx_flags(v),(m),(f)) + +#define task_nx_flags(t,m,f) \ + ((t) && nx_info_flags((t)->nx_info, (m), (f))) + +#define nx_flags(m,f) nx_info_flags(current->nx_info,(m),(f)) + + +/* network caps */ + +#define __nx_ncaps(v) ((v) ? (v)->nx_ncaps : 0) + +#define nx_current_ncaps() __nx_ncaps(current->nx_info) + +#define nx_info_ncaps(v,c) (__nx_ncaps(v) & (c)) + +#define nx_ncaps(c) nx_info_ncaps(current->nx_info,(c)) + + +/* context mask capabilities */ + +#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 ) + +#define vx_info_mcaps(v,c) (__vx_mcaps(v) & (c)) + +#define vx_mcaps(c) vx_info_mcaps(current->vx_info,(c)) + + +/* context bcap mask */ + +#define __vx_bcaps(v) ((v) ? (v)->vx_bcaps : ~0 ) + +#define vx_current_bcaps() __vx_bcaps(current->vx_info) + +#define vx_info_bcaps(v,c) (__vx_bcaps(v) & (c)) + +#define vx_bcaps(c) vx_info_bcaps(current->vx_info,(c)) + + +#define vx_info_cap_bset(v) ((v) ? (v)->vx_cap_bset : cap_bset) + +#define vx_current_cap_bset() vx_info_cap_bset(current->vx_info) + + +#define __vx_info_mbcap(v,b) \ + (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \ + vx_info_bcaps(v, b) : (b)) + +#define vx_info_mbcap(v,b) __vx_info_mbcap(v,cap_t(b)) + +#define task_vx_mbcap(t,b) \ + vx_info_mbcap((t)->vx_info, (t)->b) + +#define vx_mbcap(b) task_vx_mbcap(current,b) + +#define vx_cap_raised(v,c,f) (vx_info_mbcap(v,c) & CAP_TO_MASK(f)) + +#define vx_capable(b,c) (capable(b) || \ + (cap_raised(current->cap_effective,b) && vx_ccaps(c))) + + +#define vx_current_initpid(n) \ + (current->vx_info && \ + (current->vx_info->vx_initpid == (n))) + + +#define __vx_state(v) ((v) ? ((v)->vx_state) : 0) + +#define vx_info_state(v,m) (__vx_state(v) & (m)) + + +#define __nx_state(v) ((v) ? ((v)->nx_state) : 0) + +#define nx_info_state(v,m) (__nx_state(v) & (m)) + +#endif diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/cacct.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cacct.h --- linux-2.6.19.1/include/linux/vserver/cacct.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cacct.h 2006-11-08 04:57:44 +0100 @@ -0,0 +1,15 @@ +#ifndef _VX_CACCT_H +#define _VX_CACCT_H + + +enum sock_acc_field { + VXA_SOCK_UNSPEC = 0, + VXA_SOCK_UNIX, + VXA_SOCK_INET, + VXA_SOCK_INET6, + VXA_SOCK_PACKET, + VXA_SOCK_OTHER, + VXA_SOCK_SIZE /* array size */ +}; + +#endif /* _VX_CACCT_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/cacct_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cacct_cmd.h --- linux-2.6.19.1/include/linux/vserver/cacct_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cacct_cmd.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,23 @@ +#ifndef _VX_CACCT_CMD_H +#define _VX_CACCT_CMD_H + + +/* virtual host info name commands */ + +#define VCMD_sock_stat VC_CMD(VSTAT, 5, 0) + +struct vcmd_sock_stat_v0 { + uint32_t field; + uint32_t count[3]; + uint64_t total[3]; +}; + + +#ifdef __KERNEL__ + +#include + +extern int vc_sock_stat(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CACCT_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/cacct_def.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cacct_def.h --- linux-2.6.19.1/include/linux/vserver/cacct_def.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cacct_def.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,43 @@ +#ifndef _VX_CACCT_DEF_H +#define _VX_CACCT_DEF_H + +#include +#include + + +struct _vx_sock_acc { + atomic_t count; + atomic_t total; +}; + +/* context sub struct */ + +struct _vx_cacct { + struct _vx_sock_acc sock[VXA_SOCK_SIZE][3]; + atomic_t slab[8]; + atomic_t page[6][8]; +}; + +#ifdef CONFIG_VSERVER_DEBUG + +static inline void __dump_vx_cacct(struct _vx_cacct *cacct) +{ + int i,j; + + printk("\t_vx_cacct:"); + for (i=0; i<6; i++) { + struct _vx_sock_acc *ptr = cacct->sock[i]; + + printk("\t [%d] =", i); + for (j=0; j<3; j++) { + printk(" [%d] = %8d, %8d", j, + atomic_read(&ptr[j].count), + atomic_read(&ptr[j].total)); + } + printk("\n"); + } +} + +#endif + +#endif /* _VX_CACCT_DEF_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/cacct_int.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cacct_int.h --- linux-2.6.19.1/include/linux/vserver/cacct_int.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cacct_int.h 2006-11-08 04:57:44 +0100 @@ -0,0 +1,21 @@ +#ifndef _VX_CACCT_INT_H +#define _VX_CACCT_INT_H + + +#ifdef __KERNEL__ + +static inline +unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].count); +} + + +static inline +unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].total); +} + +#endif /* __KERNEL__ */ +#endif /* _VX_CACCT_INT_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/context.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/context.h --- linux-2.6.19.1/include/linux/vserver/context.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/context.h 2006-12-09 03:44:12 +0100 @@ -0,0 +1,172 @@ +#ifndef _VX_CONTEXT_H +#define _VX_CONTEXT_H + +#include +#include + + +#define VX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +/* context flags */ + +#define VXF_INFO_SCHED 0x00000002 +#define VXF_INFO_NPROC 0x00000004 +#define VXF_INFO_PRIVATE 0x00000008 + +#define VXF_INFO_INIT 0x00000010 +#define VXF_INFO_HIDE 0x00000020 +#define VXF_INFO_ULIMIT 0x00000040 +#define VXF_INFO_NSPACE 0x00000080 + +#define VXF_SCHED_HARD 0x00000100 +#define VXF_SCHED_PRIO 0x00000200 +#define VXF_SCHED_PAUSE 0x00000400 + +#define VXF_VIRT_MEM 0x00010000 +#define VXF_VIRT_UPTIME 0x00020000 +#define VXF_VIRT_CPU 0x00040000 +#define VXF_VIRT_LOAD 0x00080000 +#define VXF_VIRT_TIME 0x00100000 + +#define VXF_HIDE_MOUNT 0x01000000 +#define VXF_HIDE_NETIF 0x02000000 +#define VXF_HIDE_VINFO 0x04000000 + +#define VXF_STATE_SETUP (1ULL<<32) +#define VXF_STATE_INIT (1ULL<<33) +#define VXF_STATE_ADMIN (1ULL<<34) + +#define VXF_SC_HELPER (1ULL<<36) +#define VXF_REBOOT_KILL (1ULL<<37) +#define VXF_PERSISTENT (1ULL<<38) + +#define VXF_FORK_RSS (1ULL<<48) +#define VXF_PROLIFIC (1ULL<<49) + +#define VXF_IGNEG_NICE (1ULL<<52) + +#define VXF_ONE_TIME (0x0007ULL<<32) + +#define VXF_INIT_SET (VXF_STATE_SETUP|VXF_STATE_INIT|VXF_STATE_ADMIN) + + +/* context migration */ + +#define VXM_SET_INIT 0x00000001 +#define VXM_SET_REAPER 0x00000002 + +/* context caps */ + +#define VXC_CAP_MASK 0x00000000 + +#define VXC_SET_UTSNAME 0x00000001 +#define VXC_SET_RLIMIT 0x00000002 + +#define VXC_RAW_ICMP 0x00000100 +#define VXC_SYSLOG 0x00001000 + +#define VXC_SECURE_MOUNT 0x00010000 +#define VXC_SECURE_REMOUNT 0x00020000 +#define VXC_BINARY_MOUNT 0x00040000 + +#define VXC_QUOTA_CTL 0x00100000 +#define VXC_ADMIN_MAPPER 0x00200000 +#define VXC_ADMIN_CLOOP 0x00400000 + + +#ifdef __KERNEL__ + +#include +#include +#include + +#include "limit_def.h" +#include "sched_def.h" +#include "cvirt_def.h" +#include "cacct_def.h" + +struct _vx_info_pc { + struct _vx_sched_pc sched_pc; + struct _vx_cvirt_pc cvirt_pc; +}; + +struct vx_info { + struct hlist_node vx_hlist; /* linked list of contexts */ + xid_t vx_id; /* context id */ + atomic_t vx_usecnt; /* usage count */ + atomic_t vx_tasks; /* tasks count */ + struct vx_info *vx_parent; /* parent context */ + int vx_state; /* context state */ + + unsigned long vx_nsmask; /* assignment mask */ + struct nsproxy *vx_nsproxy; /* private namespace */ + struct fs_struct *vx_fs; /* private namespace fs */ + + uint64_t vx_flags; /* context flags */ + uint64_t vx_bcaps; /* bounding caps (system) */ + uint64_t vx_ccaps; /* context caps (vserver) */ + kernel_cap_t vx_cap_bset; /* the guest's bset */ + + struct task_struct *vx_reaper; /* guest reaper process */ + pid_t vx_initpid; /* PID of guest init */ + + struct _vx_limit limit; /* vserver limits */ + struct _vx_sched sched; /* vserver scheduler */ + struct _vx_cvirt cvirt; /* virtual/bias stuff */ + struct _vx_cacct cacct; /* context accounting */ + +#ifndef CONFIG_SMP + struct _vx_info_pc info_pc; /* per cpu data */ +#else + struct _vx_info_pc *ptr_pc; /* per cpu array */ +#endif + + wait_queue_head_t vx_wait; /* context exit waitqueue */ + int reboot_cmd; /* last sys_reboot() cmd */ + int exit_code; /* last process exit code */ + + char vx_name[65]; /* vserver name */ +}; + +#ifndef CONFIG_SMP +#define vx_ptr_pc(vxi) (&(vxi)->info_pc) +#define vx_per_cpu(vxi, v, id) vx_ptr_pc(vxi)->v +#else +#define vx_ptr_pc(vxi) ((vxi)->ptr_pc) +#define vx_per_cpu(vxi, v, id) per_cpu_ptr(vx_ptr_pc(vxi), id)->v +#endif + +#define vx_cpu(vxi, v) vx_per_cpu(vxi, v, smp_processor_id()) + + +struct vx_info_save { + struct vx_info *vxi; + xid_t xid; +}; + + +/* status flags */ + +#define VXS_HASHED 0x0001 +#define VXS_PAUSED 0x0010 +#define VXS_SHUTDOWN 0x0100 +#define VXS_HELPER 0x1000 +#define VXS_RELEASED 0x8000 + + +extern void claim_vx_info(struct vx_info *, struct task_struct *); +extern void release_vx_info(struct vx_info *, struct task_struct *); + +extern struct vx_info *lookup_vx_info(int); +extern struct vx_info *lookup_or_create_vx_info(int); + +extern int get_xid_list(int, unsigned int *, int); +extern int xid_is_hashed(xid_t); + +extern int vx_migrate_task(struct task_struct *, struct vx_info *, int); + +extern long vs_state_change(struct vx_info *, unsigned int); + + +#endif /* __KERNEL__ */ +#endif /* _VX_CONTEXT_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/context_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/context_cmd.h --- linux-2.6.19.1/include/linux/vserver/context_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/context_cmd.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,123 @@ +#ifndef _VX_CONTEXT_CMD_H +#define _VX_CONTEXT_CMD_H + + +/* vinfo commands */ + +#define VCMD_task_xid VC_CMD(VINFO, 1, 0) + +#ifdef __KERNEL__ +extern int vc_task_xid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_vx_info VC_CMD(VINFO, 5, 0) + +struct vcmd_vx_info_v0 { + uint32_t xid; + uint32_t initpid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_vx_info(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_ctx_stat VC_CMD(VSTAT, 0, 0) + +struct vcmd_ctx_stat_v0 { + uint32_t usecnt; + uint32_t tasks; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_ctx_stat(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ + +/* context commands */ + +#define VCMD_ctx_create_v0 VC_CMD(VPROC, 1, 0) +#define VCMD_ctx_create VC_CMD(VPROC, 1, 1) + +struct vcmd_ctx_create { + uint64_t flagword; +}; + +#define VCMD_ctx_migrate_v0 VC_CMD(PROCMIG, 1, 0) +#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 1) + +struct vcmd_ctx_migrate { + uint64_t flagword; +}; + +#ifdef __KERNEL__ +extern int vc_ctx_create(uint32_t, void __user *); +extern int vc_ctx_migrate(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ + + +/* flag commands */ + +#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0) +#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0) + +struct vcmd_ctx_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_cflags(struct vx_info *, void __user *); +extern int vc_set_cflags(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ + + +/* context caps commands */ + +#define VCMD_get_ccaps_v0 VC_CMD(FLAGS, 3, 0) +#define VCMD_set_ccaps_v0 VC_CMD(FLAGS, 4, 0) + +struct vcmd_ctx_caps_v0 { + uint64_t bcaps; + uint64_t ccaps; + uint64_t cmask; +}; + +#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 1) +#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 1) + +struct vcmd_ctx_caps_v1 { + uint64_t ccaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ccaps_v0(struct vx_info *, void __user *); +extern int vc_set_ccaps_v0(struct vx_info *, void __user *); +extern int vc_get_ccaps(struct vx_info *, void __user *); +extern int vc_set_ccaps(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ + + +/* bcaps commands */ + +#define VCMD_get_bcaps VC_CMD(FLAGS, 9, 0) +#define VCMD_set_bcaps VC_CMD(FLAGS,10, 0) + +struct vcmd_bcaps { + uint64_t bcaps; + uint64_t bmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_bcaps(struct vx_info *, void __user *); +extern int vc_set_bcaps(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CONTEXT_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/cvirt.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cvirt.h --- linux-2.6.19.1/include/linux/vserver/cvirt.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cvirt.h 2006-11-14 02:23:11 +0100 @@ -0,0 +1,20 @@ +#ifndef _VX_CVIRT_H +#define _VX_CVIRT_H + + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *, struct timespec *); + + +struct vx_info; + +void vx_update_load(struct vx_info *); + + +int vx_do_syslog(int, char __user *, int); + +#endif /* __KERNEL__ */ +#endif /* _VX_CVIRT_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/cvirt_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cvirt_cmd.h --- linux-2.6.19.1/include/linux/vserver/cvirt_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cvirt_cmd.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,53 @@ +#ifndef _VX_CVIRT_CMD_H +#define _VX_CVIRT_CMD_H + + +/* virtual host info name commands */ + +#define VCMD_set_vhi_name VC_CMD(VHOST, 1, 0) +#define VCMD_get_vhi_name VC_CMD(VHOST, 2, 0) + +struct vcmd_vhi_name_v0 { + uint32_t field; + char name[65]; +}; + + +enum vhi_name_field { + VHIN_CONTEXT=0, + VHIN_SYSNAME, + VHIN_NODENAME, + VHIN_RELEASE, + VHIN_VERSION, + VHIN_MACHINE, + VHIN_DOMAINNAME, +}; + + +#ifdef __KERNEL__ + +#include + +extern int vc_set_vhi_name(struct vx_info *, void __user *); +extern int vc_get_vhi_name(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_virt_stat VC_CMD(VSTAT, 3, 0) + +struct vcmd_virt_stat_v0 { + uint64_t offset; + uint64_t uptime; + uint32_t nr_threads; + uint32_t nr_running; + uint32_t nr_uninterruptible; + uint32_t nr_onhold; + uint32_t nr_forks; + uint32_t load[3]; +}; + +#ifdef __KERNEL__ +extern int vc_virt_stat(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CVIRT_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/cvirt_def.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cvirt_def.h --- linux-2.6.19.1/include/linux/vserver/cvirt_def.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/cvirt_def.h 2006-11-14 00:04:36 +0100 @@ -0,0 +1,82 @@ +#ifndef _VX_CVIRT_DEF_H +#define _VX_CVIRT_DEF_H + +#include +#include +#include +#include +#include +#include + + +struct _vx_usage_stat { + uint64_t user; + uint64_t nice; + uint64_t system; + uint64_t softirq; + uint64_t irq; + uint64_t idle; + uint64_t iowait; +}; + +struct _vx_syslog { + wait_queue_head_t log_wait; + spinlock_t logbuf_lock; /* lock for the log buffer */ + + unsigned long log_start; /* next char to be read by syslog() */ + unsigned long con_start; /* next char to be sent to consoles */ + unsigned long log_end; /* most-recently-written-char + 1 */ + unsigned long logged_chars; /* #chars since last read+clear operation */ + + char log_buf[1024]; +}; + + +/* context sub struct */ + +struct _vx_cvirt { +// int max_threads; /* maximum allowed threads */ + atomic_t nr_threads; /* number of current threads */ + atomic_t nr_running; /* number of running threads */ + atomic_t nr_uninterruptible; /* number of uninterruptible threads */ + + atomic_t nr_onhold; /* processes on hold */ + uint32_t onhold_last; /* jiffies when put on hold */ + + struct timeval bias_tv; /* time offset to the host */ + struct timespec bias_idle; + struct timespec bias_uptime; /* context creation point */ + uint64_t bias_clock; /* offset in clock_t */ + + spinlock_t load_lock; /* lock for the load averages */ + atomic_t load_updates; /* nr of load updates done so far */ + uint32_t load_last; /* last time load was calculated */ + uint32_t load[3]; /* load averages 1,5,15 */ + + atomic_t total_forks; /* number of forks so far */ + + struct _vx_syslog syslog; +}; + +struct _vx_cvirt_pc { + struct _vx_usage_stat cpustat; +}; + + +#ifdef CONFIG_VSERVER_DEBUG + +static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt) +{ + printk("\t_vx_cvirt:\n"); + printk("\t threads: %4d, %4d, %4d, %4d\n", + atomic_read(&cvirt->nr_threads), + atomic_read(&cvirt->nr_running), + atomic_read(&cvirt->nr_uninterruptible), + atomic_read(&cvirt->nr_onhold)); + /* add rest here */ + printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks)); +} + +#endif + +#endif /* _VX_CVIRT_DEF_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/debug.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/debug.h --- linux-2.6.19.1/include/linux/vserver/debug.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/debug.h 2006-11-08 04:57:48 +0100 @@ -0,0 +1,112 @@ +#ifndef _VX_DEBUG_H +#define _VX_DEBUG_H + + +#define VXD_CBIT(n,m) (vx_debug_ ## n & (1 << (m))) +#define VXD_CMIN(n,m) (vx_debug_ ## n > (m)) +#define VXD_MASK(n,m) (vx_debug_ ## n & (m)) + +#define VXD_QPOS(v,p) (((uint32_t)(v) >> ((p)*8)) & 0xFF) +#define VXD_QUAD(v) VXD_QPOS(v,0), VXD_QPOS(v,1), \ + VXD_QPOS(v,2), VXD_QPOS(v,3) +#define VXF_QUAD "%u.%u.%u.%u" + +#define VXD_DEV(d) (d), (d)->bd_inode->i_ino, \ + imajor((d)->bd_inode), iminor((d)->bd_inode) +#define VXF_DEV "%p[%lu,%d:%d]" + + +#define __FUNC__ __func__ + + +#ifdef CONFIG_VSERVER_DEBUG + +extern unsigned int vx_debug_switch; +extern unsigned int vx_debug_xid; +extern unsigned int vx_debug_nid; +extern unsigned int vx_debug_tag; +extern unsigned int vx_debug_net; +extern unsigned int vx_debug_limit; +extern unsigned int vx_debug_cres; +extern unsigned int vx_debug_dlim; +extern unsigned int vx_debug_quota; +extern unsigned int vx_debug_cvirt; +extern unsigned int vx_debug_misc; + + +#define VX_LOGLEVEL "vxD: " +#define VX_WARNLEVEL KERN_WARNING "vxW: " + +#define vxdprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f "\n" , ##x); \ + } while (0) + +#define vxlprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f " @%s:%d\n", x); \ + } while (0) + +#define vxfprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \ + } while (0) + + +#define vxwprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_WARNLEVEL f "\n" , ##x); \ + } while (0) + + +#define vxd_path(d,m) \ + ({ static char _buffer[PATH_MAX]; \ + d_path((d), (m), _buffer, sizeof(_buffer)); }) + +#define vxd_cond_path(n) \ + ((n) ? vxd_path((n)->dentry, (n)->mnt) : "" ) + + +struct vx_info; + +void dump_vx_info(struct vx_info *, int); +void dump_vx_info_inactive(int); + +#else /* CONFIG_VSERVER_DEBUG */ + +#define vx_debug_switch 0 +#define vx_debug_xid 0 +#define vx_debug_nid 0 +#define vx_debug_tag 0 +#define vx_debug_net 0 +#define vx_debug_limit 0 +#define vx_debug_cres 0 +#define vx_debug_dlim 0 +#define vx_debug_cvirt 0 + +#define vxdprintk(x...) do { } while (0) +#define vxlprintk(x...) do { } while (0) +#define vxfprintk(x...) do { } while (0) +#define vxwprintk(x...) do { } while (0) + +#define vxd_path "" +#define vxd_cond_path vxd_path + +#endif /* CONFIG_VSERVER_DEBUG */ + + +#ifdef CONFIG_VSERVER_DEBUG +#define vxd_assert_lock(l) assert_spin_locked(l) +#define vxd_assert(c,f,x...) vxlprintk(!(c), \ + "assertion [" f "] failed.", ##x, __FILE__, __LINE__) +#else +#define vxd_assert_lock(l) do { } while (0) +#define vxd_assert(c,f,x...) do { } while (0) +#endif + + +#endif /* _VX_DEBUG_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/debug_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/debug_cmd.h --- linux-2.6.19.1/include/linux/vserver/debug_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/debug_cmd.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,58 @@ +#ifndef _VX_DEBUG_CMD_H +#define _VX_DEBUG_CMD_H + + +/* debug commands */ + +#define VCMD_dump_history VC_CMD(DEBUG, 1, 0) + +#define VCMD_read_history VC_CMD(DEBUG, 5, 0) +#define VCMD_read_monitor VC_CMD(DEBUG, 6, 0) + +struct vcmd_read_history_v0 { + uint32_t index; + uint32_t count; + char __user *data; +}; + +struct vcmd_read_monitor_v0 { + uint32_t index; + uint32_t count; + char __user *data; +}; + + +#ifdef __KERNEL__ + +#ifdef CONFIG_COMPAT + +#include + +struct vcmd_read_history_v0_x32 { + uint32_t index; + uint32_t count; + compat_uptr_t data_ptr; +}; + +struct vcmd_read_monitor_v0_x32 { + uint32_t index; + uint32_t count; + compat_uptr_t data_ptr; +}; + +#endif /* CONFIG_COMPAT */ + +extern int vc_dump_history(uint32_t); + +extern int vc_read_history(uint32_t, void __user *); +extern int vc_read_monitor(uint32_t, void __user *); + +#ifdef CONFIG_COMPAT + +extern int vc_read_history_x32(uint32_t, void __user *); +extern int vc_read_monitor_x32(uint32_t, void __user *); + +#endif /* CONFIG_COMPAT */ + +#endif /* __KERNEL__ */ +#endif /* _VX_DEBUG_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/dlimit.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/dlimit.h --- linux-2.6.19.1/include/linux/vserver/dlimit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/dlimit.h 2006-11-08 04:57:50 +0100 @@ -0,0 +1,53 @@ +#ifndef _VX_DLIMIT_H +#define _VX_DLIMIT_H + +#include "switch.h" + + +#ifdef __KERNEL__ + +/* keep in sync with CDLIM_INFINITY */ + +#define DLIM_INFINITY (~0ULL) + +#include + +struct super_block; + +struct dl_info { + struct hlist_node dl_hlist; /* linked list of contexts */ + struct rcu_head dl_rcu; /* the rcu head */ + tag_t dl_tag; /* context tag */ + atomic_t dl_usecnt; /* usage count */ + atomic_t dl_refcnt; /* reference count */ + + struct super_block *dl_sb; /* associated superblock */ + + spinlock_t dl_lock; /* protect the values */ + + unsigned long long dl_space_used; /* used space in bytes */ + unsigned long long dl_space_total; /* maximum space in bytes */ + unsigned long dl_inodes_used; /* used inodes */ + unsigned long dl_inodes_total; /* maximum inodes */ + + unsigned int dl_nrlmult; /* non root limit mult */ +}; + +struct rcu_head; + +extern void rcu_free_dl_info(struct rcu_head *); +extern void unhash_dl_info(struct dl_info *); + +extern struct dl_info *locate_dl_info(struct super_block *, tag_t); + + +struct kstatfs; + +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *); + +typedef uint64_t dlsize_t; + +#endif /* __KERNEL__ */ +#else /* _VX_DLIMIT_H */ +#warning duplicate inclusion +#endif /* _VX_DLIMIT_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/dlimit_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/dlimit_cmd.h --- linux-2.6.19.1/include/linux/vserver/dlimit_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/dlimit_cmd.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,74 @@ +#ifndef _VX_DLIMIT_CMD_H +#define _VX_DLIMIT_CMD_H + + +/* dlimit vserver commands */ + +#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0) +#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0) + +#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0) +#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0) + +struct vcmd_ctx_dlimit_base_v0 { + const char __user *name; + uint32_t flags; +}; + +struct vcmd_ctx_dlimit_v0 { + const char __user *name; + uint32_t space_used; /* used space in kbytes */ + uint32_t space_total; /* maximum space in kbytes */ + uint32_t inodes_used; /* used inodes */ + uint32_t inodes_total; /* maximum inodes */ + uint32_t reserved; /* reserved for root in % */ + uint32_t flags; +}; + +#define CDLIM_UNSET ((uint32_t)0UL) +#define CDLIM_INFINITY ((uint32_t)~0UL) +#define CDLIM_KEEP ((uint32_t)~1UL) + +#ifdef __KERNEL__ + +#ifdef CONFIG_COMPAT + +#include + +struct vcmd_ctx_dlimit_base_v0_x32 { + compat_uptr_t name_ptr; + uint32_t flags; +}; + +struct vcmd_ctx_dlimit_v0_x32 { + compat_uptr_t name_ptr; + uint32_t space_used; /* used space in kbytes */ + uint32_t space_total; /* maximum space in kbytes */ + uint32_t inodes_used; /* used inodes */ + uint32_t inodes_total; /* maximum inodes */ + uint32_t reserved; /* reserved for root in % */ + uint32_t flags; +}; + +#endif /* CONFIG_COMPAT */ + +#include + +extern int vc_add_dlimit(uint32_t, void __user *); +extern int vc_rem_dlimit(uint32_t, void __user *); + +extern int vc_set_dlimit(uint32_t, void __user *); +extern int vc_get_dlimit(uint32_t, void __user *); + +#ifdef CONFIG_COMPAT + +extern int vc_add_dlimit_x32(uint32_t, void __user *); +extern int vc_rem_dlimit_x32(uint32_t, void __user *); + +extern int vc_set_dlimit_x32(uint32_t, void __user *); +extern int vc_get_dlimit_x32(uint32_t, void __user *); + +#endif /* CONFIG_COMPAT */ + +#endif /* __KERNEL__ */ +#endif /* _VX_DLIMIT_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/global.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/global.h --- linux-2.6.19.1/include/linux/vserver/global.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/global.h 2006-12-06 21:03:02 +0100 @@ -0,0 +1,11 @@ +#ifndef _VX_GLOBAL_H +#define _VX_GLOBAL_H + + +extern atomic_t vx_global_ctotal; +extern atomic_t vx_global_cactive; + +extern atomic_t nx_global_ctotal; +extern atomic_t nx_global_cactive; + +#endif /* _VX_GLOBAL_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/history.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/history.h --- linux-2.6.19.1/include/linux/vserver/history.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/history.h 2006-11-30 19:59:44 +0100 @@ -0,0 +1,197 @@ +#ifndef _VX_HISTORY_H +#define _VX_HISTORY_H + + +enum { + VXH_UNUSED=0, + VXH_THROW_OOPS=1, + + VXH_GET_VX_INFO, + VXH_PUT_VX_INFO, + VXH_INIT_VX_INFO, + VXH_SET_VX_INFO, + VXH_CLR_VX_INFO, + VXH_CLAIM_VX_INFO, + VXH_RELEASE_VX_INFO, + VXH_ALLOC_VX_INFO, + VXH_DEALLOC_VX_INFO, + VXH_HASH_VX_INFO, + VXH_UNHASH_VX_INFO, + VXH_LOC_VX_INFO, + VXH_LOOKUP_VX_INFO, + VXH_CREATE_VX_INFO, +}; + +struct _vxhe_vxi { + struct vx_info *ptr; + unsigned xid; + unsigned usecnt; + unsigned tasks; +}; + +struct _vxhe_set_clr { + void *data; +}; + +struct _vxhe_loc_lookup { + unsigned arg; +}; + +struct _vx_hist_entry { + void *loc; + unsigned short seq; + unsigned short type; + struct _vxhe_vxi vxi; + union { + struct _vxhe_set_clr sc; + struct _vxhe_loc_lookup ll; + }; +}; + +#ifdef CONFIG_VSERVER_HISTORY + +extern unsigned volatile int vxh_active; + +struct _vx_hist_entry *vxh_advance(void *loc); + + +static inline +void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi) +{ + entry->vxi.ptr = vxi; + if (vxi) { + entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt); + entry->vxi.tasks = atomic_read(&vxi->vx_tasks); + entry->vxi.xid = vxi->vx_id; + } +} + + +#define __HERE__ current_text_addr() + +#define __VXH_BODY(__type, __data, __here) \ + struct _vx_hist_entry *entry; \ + \ + preempt_disable(); \ + entry = vxh_advance(__here); \ + __data; \ + entry->type = __type; \ + preempt_enable(); + + + /* pass vxi only */ + +#define __VXH_SMPL \ + __vxh_copy_vxi(entry, vxi) + +static inline +void __vxh_smpl(struct vx_info *vxi, int __type, void *__here) +{ + __VXH_BODY(__type, __VXH_SMPL, __here) +} + + /* pass vxi and data (void *) */ + +#define __VXH_DATA \ + __vxh_copy_vxi(entry, vxi); \ + entry->sc.data = data + +static inline +void __vxh_data(struct vx_info *vxi, void *data, + int __type, void *__here) +{ + __VXH_BODY(__type, __VXH_DATA, __here) +} + + /* pass vxi and arg (long) */ + +#define __VXH_LONG \ + __vxh_copy_vxi(entry, vxi); \ + entry->ll.arg = arg + +static inline +void __vxh_long(struct vx_info *vxi, long arg, + int __type, void *__here) +{ + __VXH_BODY(__type, __VXH_LONG, __here) +} + + +static inline +void __vxh_throw_oops(void *__here) +{ + __VXH_BODY(VXH_THROW_OOPS, {}, __here); + /* prevent further acquisition */ + vxh_active = 0; +} + + +#define vxh_throw_oops() __vxh_throw_oops(__HERE__); + +#define __vxh_get_vx_info(v,h) __vxh_smpl(v, VXH_GET_VX_INFO, h); +#define __vxh_put_vx_info(v,h) __vxh_smpl(v, VXH_PUT_VX_INFO, h); + +#define __vxh_init_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_INIT_VX_INFO, h); +#define __vxh_set_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_SET_VX_INFO, h); +#define __vxh_clr_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_CLR_VX_INFO, h); + +#define __vxh_claim_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_CLAIM_VX_INFO, h); +#define __vxh_release_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_RELEASE_VX_INFO, h); + +#define vxh_alloc_vx_info(v) \ + __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__); +#define vxh_dealloc_vx_info(v) \ + __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__); + +#define vxh_hash_vx_info(v) \ + __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__); +#define vxh_unhash_vx_info(v) \ + __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__); + +#define vxh_loc_vx_info(v,l) \ + __vxh_long(v,l, VXH_LOC_VX_INFO, __HERE__); +#define vxh_lookup_vx_info(v,l) \ + __vxh_long(v,l, VXH_LOOKUP_VX_INFO, __HERE__); +#define vxh_create_vx_info(v,l) \ + __vxh_long(v,l, VXH_CREATE_VX_INFO, __HERE__); + +extern void vxh_dump_history(void); + + +#else /* CONFIG_VSERVER_HISTORY */ + +#define __HERE__ 0 + +#define vxh_throw_oops() do { } while (0) + +#define __vxh_get_vx_info(v,h) do { } while (0) +#define __vxh_put_vx_info(v,h) do { } while (0) + +#define __vxh_init_vx_info(v,d,h) do { } while (0) +#define __vxh_set_vx_info(v,d,h) do { } while (0) +#define __vxh_clr_vx_info(v,d,h) do { } while (0) + +#define __vxh_claim_vx_info(v,d,h) do { } while (0) +#define __vxh_release_vx_info(v,d,h) do { } while (0) + +#define vxh_alloc_vx_info(v) do { } while (0) +#define vxh_dealloc_vx_info(v) do { } while (0) + +#define vxh_hash_vx_info(v) do { } while (0) +#define vxh_unhash_vx_info(v) do { } while (0) + +#define vxh_loc_vx_info(a,v) do { } while (0) +#define vxh_lookup_vx_info(a,v) do { } while (0) +#define vxh_create_vx_info(a,v) do { } while (0) + +#define vxh_dump_history() do { } while (0) + + +#endif /* CONFIG_VSERVER_HISTORY */ + +#endif /* _VX_HISTORY_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/inode.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/inode.h --- linux-2.6.19.1/include/linux/vserver/inode.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/inode.h 2006-11-08 04:57:45 +0100 @@ -0,0 +1,38 @@ +#ifndef _VX_INODE_H +#define _VX_INODE_H + + +#define IATTR_TAG 0x01000000 + +#define IATTR_ADMIN 0x00000001 +#define IATTR_WATCH 0x00000002 +#define IATTR_HIDE 0x00000004 +#define IATTR_FLAGS 0x00000007 + +#define IATTR_BARRIER 0x00010000 +#define IATTR_IUNLINK 0x00020000 +#define IATTR_IMMUTABLE 0x00040000 + +#ifdef __KERNEL__ + + +#ifdef CONFIG_VSERVER_PROC_SECURE +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#else +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#endif + +#define vx_hide_check(c,m) (((m) & IATTR_HIDE) ? vx_check(c,m) : 1) + +#endif /* __KERNEL__ */ + +/* inode ioctls */ + +#define FIOC_GETXFLG _IOR('x', 5, long) +#define FIOC_SETXFLG _IOW('x', 6, long) + +#else /* _VX_INODE_H */ +#warning duplicate inclusion +#endif /* _VX_INODE_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/inode_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/inode_cmd.h --- linux-2.6.19.1/include/linux/vserver/inode_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/inode_cmd.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,61 @@ +#ifndef _VX_INODE_CMD_H +#define _VX_INODE_CMD_H + + +/* inode vserver commands */ + +#define VCMD_get_iattr_v0 VC_CMD(INODE, 1, 0) +#define VCMD_set_iattr_v0 VC_CMD(INODE, 2, 0) + +#define VCMD_get_iattr VC_CMD(INODE, 1, 1) +#define VCMD_set_iattr VC_CMD(INODE, 2, 1) + +struct vcmd_ctx_iattr_v0 { + /* device handle in id */ + uint64_t ino; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + +struct vcmd_ctx_iattr_v1 { + const char __user *name; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + + +#ifdef __KERNEL__ + + +#ifdef CONFIG_COMPAT + +#include + +struct vcmd_ctx_iattr_v1_x32 { + compat_uptr_t name_ptr; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + +#endif /* CONFIG_COMPAT */ + +#include + +extern int vc_get_iattr_v0(uint32_t, void __user *); +extern int vc_set_iattr_v0(uint32_t, void __user *); + +extern int vc_get_iattr(uint32_t, void __user *); +extern int vc_set_iattr(uint32_t, void __user *); + +#ifdef CONFIG_COMPAT + +extern int vc_get_iattr_x32(uint32_t, void __user *); +extern int vc_set_iattr_x32(uint32_t, void __user *); + +#endif /* CONFIG_COMPAT */ + +#endif /* __KERNEL__ */ +#endif /* _VX_INODE_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/legacy.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/legacy.h --- linux-2.6.19.1/include/linux/vserver/legacy.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/legacy.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,49 @@ +#ifndef _VX_LEGACY_H +#define _VX_LEGACY_H + +#include "switch.h" + + +/* compatibiliy vserver commands */ + +#define VCMD_new_s_context VC_CMD(COMPAT, 1, 1) +#define VCMD_set_ipv4root VC_CMD(COMPAT, 2, 3) + +#define VCMD_create_context VC_CMD(VSETUP, 1, 0) + +/* compatibiliy vserver arguments */ + +struct vcmd_new_s_context_v1 { + uint32_t remove_cap; + uint32_t flags; +}; + +struct vcmd_set_ipv4root_v3 { + /* number of pairs in id */ + uint32_t broadcast; + struct { + uint32_t ip; + uint32_t mask; + } nx_mask_pair[NB_IPV4ROOT]; +}; + + +#define VX_INFO_LOCK 1 /* Can't request a new vx_id */ +#define VX_INFO_NPROC 4 /* Limit number of processes in a context */ +#define VX_INFO_PRIVATE 8 /* Noone can join this security context */ +#define VX_INFO_INIT 16 /* This process wants to become the */ + /* logical process 1 of the security */ + /* context */ +#define VX_INFO_HIDEINFO 32 /* Hide some information in /proc */ +#define VX_INFO_ULIMIT 64 /* Use ulimit of the current process */ + /* to become the global limits */ + /* of the context */ +#define VX_INFO_NAMESPACE 128 /* save private namespace */ + + +#ifdef __KERNEL__ +extern int vc_new_s_context(uint32_t, void __user *); +extern int vc_set_ipv4root(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_LEGACY_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/limit.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/limit.h --- linux-2.6.19.1/include/linux/vserver/limit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/limit.h 2006-11-30 19:31:41 +0100 @@ -0,0 +1,68 @@ +#ifndef _VX_LIMIT_H +#define _VX_LIMIT_H + + +#define VLIMIT_NSOCK 16 +#define VLIMIT_OPENFD 17 +#define VLIMIT_ANON 18 +#define VLIMIT_SHMEM 19 +#define VLIMIT_SEMARY 20 +#define VLIMIT_NSEMS 21 +#define VLIMIT_DENTRY 22 +#define VLIMIT_MAPPED 23 + + +#ifdef __KERNEL__ + +#define VLIM_NOCHECK ((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS)) + +/* keep in sync with CRLIM_INFINITY */ + +#define VLIM_INFINITY (~0ULL) + +#ifndef RLIM_INFINITY +#warning RLIM_INFINITY is undefined +#endif + +#define __rlim_val(l,r,v) ((l)->res[(r)].v) + +#define __rlim_soft(l,r) __rlim_val(l,r,soft) +#define __rlim_hard(l,r) __rlim_val(l,r,hard) + +#define __rlim_rcur(l,r) __rlim_val(l,r,rcur) +#define __rlim_rmin(l,r) __rlim_val(l,r,rmin) +#define __rlim_rmax(l,r) __rlim_val(l,r,rmax) + +#define __rlim_lhit(l,r) __rlim_val(l,r,lhit) +#define __rlim_hit(l,r) atomic_inc(&__rlim_lhit(l,r)) + +typedef atomic_long_t rlim_atomic_t; +typedef unsigned long rlim_t; + +#define __rlim_get(l,r) atomic_long_read(&__rlim_rcur(l,r)) +#define __rlim_set(l,r,v) atomic_long_set(&__rlim_rcur(l,r), v) +#define __rlim_inc(l,r) atomic_long_inc(&__rlim_rcur(l,r)) +#define __rlim_dec(l,r) atomic_long_dec(&__rlim_rcur(l,r)) +#define __rlim_add(l,r,v) atomic_long_add(v, &__rlim_rcur(l,r)) +#define __rlim_sub(l,r,v) atomic_long_sub(v, &__rlim_rcur(l,r)) + + +#if (RLIM_INFINITY == VLIM_INFINITY) +#define VX_VLIM(r) ((long long)(long)(r)) +#define VX_RLIM(v) ((rlim_t)(v)) +#else +#define VX_VLIM(r) (((r) == RLIM_INFINITY) \ + ? VLIM_INFINITY : (long long)(r)) +#define VX_RLIM(v) (((v) == VLIM_INFINITY) \ + ? RLIM_INFINITY : (rlim_t)(v)) +#endif + +struct sysinfo; + +void vx_vsi_meminfo(struct sysinfo *); +void vx_vsi_swapinfo(struct sysinfo *); + +#define NUM_LIMITS 24 + +#endif /* __KERNEL__ */ +#endif /* _VX_LIMIT_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/limit_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/limit_cmd.h --- linux-2.6.19.1/include/linux/vserver/limit_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/limit_cmd.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,69 @@ +#ifndef _VX_LIMIT_CMD_H +#define _VX_LIMIT_CMD_H + + +/* rlimit vserver commands */ + +#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0) +#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0) +#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0) +#define VCMD_reset_minmax VC_CMD(RLIMIT, 9, 0) + +struct vcmd_ctx_rlimit_v0 { + uint32_t id; + uint64_t minimum; + uint64_t softlimit; + uint64_t maximum; +}; + +struct vcmd_ctx_rlimit_mask_v0 { + uint32_t minimum; + uint32_t softlimit; + uint32_t maximum; +}; + +#define VCMD_rlimit_stat VC_CMD(VSTAT, 1, 0) + +struct vcmd_rlimit_stat_v0 { + uint32_t id; + uint32_t hits; + uint64_t value; + uint64_t minimum; + uint64_t maximum; +}; + +#define CRLIM_UNSET (0ULL) +#define CRLIM_INFINITY (~0ULL) +#define CRLIM_KEEP (~1ULL) + +#ifdef __KERNEL__ + +#ifdef CONFIG_IA32_EMULATION + +struct vcmd_ctx_rlimit_v0_x32 { + uint32_t id; + uint64_t minimum; + uint64_t softlimit; + uint64_t maximum; +} __attribute__ ((aligned (4))); + +#endif /* CONFIG_IA32_EMULATION */ + +#include + +extern int vc_get_rlimit_mask(uint32_t, void __user *); +extern int vc_get_rlimit(struct vx_info *, void __user *); +extern int vc_set_rlimit(struct vx_info *, void __user *); +extern int vc_reset_minmax(struct vx_info *, void __user *); + +extern int vc_rlimit_stat(struct vx_info *, void __user *); + +#ifdef CONFIG_IA32_EMULATION + +extern int vc_get_rlimit_x32(struct vx_info *, void __user *); +extern int vc_set_rlimit_x32(struct vx_info *, void __user *); + +#endif /* CONFIG_IA32_EMULATION */ + +#endif /* __KERNEL__ */ +#endif /* _VX_LIMIT_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/limit_def.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/limit_def.h --- linux-2.6.19.1/include/linux/vserver/limit_def.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/limit_def.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,47 @@ +#ifndef _VX_LIMIT_DEF_H +#define _VX_LIMIT_DEF_H + +#include +#include + +#include "limit.h" + + +struct _vx_res_limit { + rlim_t soft; /* Context soft limit */ + rlim_t hard; /* Context hard limit */ + + rlim_atomic_t rcur; /* Current value */ + rlim_t rmin; /* Context minimum */ + rlim_t rmax; /* Context maximum */ + + atomic_t lhit; /* Limit hits */ +}; + +/* context sub struct */ + +struct _vx_limit { + struct _vx_res_limit res[NUM_LIMITS]; +}; + +#ifdef CONFIG_VSERVER_DEBUG + +static inline void __dump_vx_limit(struct _vx_limit *limit) +{ + int i; + + printk("\t_vx_limit:"); + for (i=0; ivx_id : -1), vlimit_name[res], res, + (vxi ? (long)__rlim_get(&vxi->limit, res) : 0), + (dir > 0) ? "++" : "--", _data, _file, _line); + if (!vxi) + return; + + if (dir > 0) + __rlim_inc(&vxi->limit, res); + else + __rlim_dec(&vxi->limit, res); +} + +static inline void __vx_add_cres(struct vx_info *vxi, + int res, int amount, void *_data, char *_file, int _line) +{ + if (VXD_RCRES_COND(res)) + vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)", + (vxi ? vxi->vx_id : -1), vlimit_name[res], res, + (vxi ? (long)__rlim_get(&vxi->limit, res) : 0), + amount, _data, _file, _line); + if (amount == 0) + return; + if (!vxi) + return; + __rlim_add(&vxi->limit, res, amount); +} + +static inline +int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value) +{ + int cond = (value > __rlim_rmax(limit, res)); + + if (cond) + __rlim_rmax(limit, res) = value; + return cond; +} + +static inline +int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value) +{ + int cond = (value < __rlim_rmin(limit, res)); + + if (cond) + __rlim_rmin(limit, res) = value; + return cond; +} + +static inline +void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value) +{ + if (!__vx_cres_adjust_max(limit, res, value)) + __vx_cres_adjust_min(limit, res, value); +} + + +/* return values: + +1 ... no limit hit + -1 ... over soft limit + 0 ... over hard limit */ + +static inline int __vx_cres_avail(struct vx_info *vxi, + int res, int num, char *_file, int _line) +{ + struct _vx_limit *limit; + rlim_t value; + + if (VXD_RLIMIT_COND(res)) + vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d", + (vxi ? vxi->vx_id : -1), vlimit_name[res], res, + (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1), + (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1), + (vxi ? (long)__rlim_get(&vxi->limit, res) : 0), + num, _file, _line); + if (!vxi) + return 1; + + limit = &vxi->limit; + value = __rlim_get(limit, res); + + if (!__vx_cres_adjust_max(limit, res, value)) + __vx_cres_adjust_min(limit, res, value); + + if (num == 0) + return 1; + + if (__rlim_soft(limit, res) == RLIM_INFINITY) + return -1; + if (value + num <= __rlim_soft(limit, res)) + return -1; + + if (__rlim_hard(limit, res) == RLIM_INFINITY) + return 1; + if (value + num <= __rlim_hard(limit, res)) + return 1; + + __rlim_hit(limit, res); + return 0; +} + + +static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 }; + +static inline +rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array) +{ + rlim_t value, sum = 0; + int res; + + while ((res = *array++)) { + value = __rlim_get(limit, res); + __vx_cres_fixup(limit, res, value); + sum += value; + } + return sum; +} + +static inline +rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array) +{ + rlim_t value = __vx_cres_array_sum(limit, array + 1); + int res = *array; + + if (value == __rlim_get(limit, res)) + return value; + + __rlim_set(limit, res, value); + /* now adjust min/max */ + if (!__vx_cres_adjust_max(limit, res, value)) + __vx_cres_adjust_min(limit, res, value); + + return value; +} + +static inline int __vx_cres_array_avail(struct vx_info *vxi, + const int *array, int num, char *_file, int _line) +{ + struct _vx_limit *limit; + rlim_t value = 0; + int res; + + if (num == 0) + return 1; + if (!vxi) + return 1; + + limit = &vxi->limit; + res = *array; + value = __vx_cres_array_sum(limit, array+1); + + __rlim_set(limit, res, value); + __vx_cres_fixup(limit, res, value); + + return __vx_cres_avail(vxi, res, num, _file, _line); +} + + +#endif /* __KERNEL__ */ +#endif /* _VX_LIMIT_INT_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/monitor.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/monitor.h --- linux-2.6.19.1/include/linux/vserver/monitor.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/monitor.h 2006-11-08 04:57:48 +0100 @@ -0,0 +1,95 @@ +#ifndef _VX_MONITOR_H +#define _VX_MONITOR_H + + +enum { + VXM_UNUSED = 0, + + VXM_SYNC = 0x10, + + VXM_UPDATE = 0x20, + VXM_UPDATE_1, + VXM_UPDATE_2, + + VXM_RQINFO_1 = 0x24, + VXM_RQINFO_2, + + VXM_ACTIVATE = 0x40, + VXM_DEACTIVATE, + VXM_IDLE, + + VXM_HOLD = 0x44, + VXM_UNHOLD, + + VXM_MIGRATE = 0x48, + VXM_RESCHED, + + /* all other bits are flags */ + VXM_SCHED = 0x80, +}; + +struct _vxm_update_1 { + uint32_t tokens_max; + uint32_t fill_rate; + uint32_t interval; +}; + +struct _vxm_update_2 { + uint32_t tokens_min; + uint32_t fill_rate; + uint32_t interval; +}; + +struct _vxm_rqinfo_1 { + uint16_t running; + uint16_t onhold; + uint16_t iowait; + uint16_t uintr; + uint32_t idle_tokens; +}; + +struct _vxm_rqinfo_2 { + uint32_t norm_time; + uint32_t idle_time; + uint32_t idle_skip; +}; + +struct _vxm_sched { + uint32_t tokens; + uint32_t norm_time; + uint32_t idle_time; +}; + +struct _vxm_task { + uint16_t pid; + uint16_t state; +}; + +struct _vxm_event { + uint32_t jif; + union { + uint32_t seq; + uint32_t sec; + }; + union { + uint32_t tokens; + uint32_t nsec; + struct _vxm_task tsk; + }; +}; + +struct _vx_mon_entry { + uint16_t type; + uint16_t xid; + union { + struct _vxm_event ev; + struct _vxm_sched sd; + struct _vxm_update_1 u1; + struct _vxm_update_2 u2; + struct _vxm_rqinfo_1 q1; + struct _vxm_rqinfo_2 q2; + }; +}; + + +#endif /* _VX_MONITOR_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/network.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/network.h --- linux-2.6.19.1/include/linux/vserver/network.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/network.h 2006-12-06 21:03:02 +0100 @@ -0,0 +1,142 @@ +#ifndef _VX_NETWORK_H +#define _VX_NETWORK_H + +#include + + +#define MAX_N_CONTEXT 65535 /* Arbitrary limit */ + +#define NX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +#define NB_IPV4ROOT 16 + + +/* network flags */ + +#define NXF_INFO_PRIVATE 0x00000008 + +#define NXF_STATE_SETUP (1ULL<<32) +#define NXF_STATE_ADMIN (1ULL<<34) + +#define NXF_SC_HELPER (1ULL<<36) +#define NXF_PERSISTENT (1ULL<<38) + +#define NXF_ONE_TIME (0x0005ULL<<32) + +#define NXF_INIT_SET (NXF_STATE_ADMIN) + + +/* address types */ + +#define NXA_TYPE_IPV4 1 +#define NXA_TYPE_IPV6 2 + +#define NXA_MOD_BCAST (1<<8) + +#define NXA_TYPE_ANY ((uint16_t)-1) + + +#ifdef __KERNEL__ + +#include +#include +#include +#include + + +struct nx_info { + struct hlist_node nx_hlist; /* linked list of nxinfos */ + nid_t nx_id; /* vnet id */ + atomic_t nx_usecnt; /* usage count */ + atomic_t nx_tasks; /* tasks count */ + int nx_state; /* context state */ + + uint64_t nx_flags; /* network flag word */ + uint64_t nx_ncaps; /* network capabilities */ + + int nbipv4; + __u32 ipv4[NB_IPV4ROOT]; /* Process can only bind to these IPs */ + /* The first one is used to connect */ + /* and for bind any service */ + /* The other must be used explicity */ + __u32 mask[NB_IPV4ROOT]; /* Netmask for each ipv4 */ + /* Used to select the proper source */ + /* address for sockets */ + __u32 v4_bcast; /* Broadcast address to receive UDP */ + + char nx_name[65]; /* network context name */ +}; + + +/* status flags */ + +#define NXS_HASHED 0x0001 +#define NXS_SHUTDOWN 0x0100 +#define NXS_RELEASED 0x8000 + +/* check conditions */ + +#define NX_ADMIN 0x0001 +#define NX_WATCH 0x0002 +#define NX_BLEND 0x0004 +#define NX_HOSTID 0x0008 + +#define NX_IDENT 0x0010 +#define NX_EQUIV 0x0020 +#define NX_PARENT 0x0040 +#define NX_CHILD 0x0080 + +#define NX_ARG_MASK 0x00F0 + +#define NX_DYNAMIC 0x0100 +#define NX_STATIC 0x0200 + +#define NX_ATR_MASK 0x0F00 + + +extern struct nx_info *lookup_nx_info(int); + +extern int get_nid_list(int, unsigned int *, int); +extern int nid_is_hashed(nid_t); + +extern int nx_migrate_task(struct task_struct *, struct nx_info *); + +extern long vs_net_change(struct nx_info *, unsigned int); + +struct in_ifaddr; +struct net_device; + +#ifdef CONFIG_INET +int ifa_in_nx_info(struct in_ifaddr *, struct nx_info *); +int dev_in_nx_info(struct net_device *, struct nx_info *); + +#else /* CONFIG_INET */ +static inline +int ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n) +{ + return 1; +} + +static inline +int dev_in_nx_info(struct net_device *d, struct nx_info *n) +{ + return 1; +} +#endif /* CONFIG_INET */ + +struct sock; + +#ifdef CONFIG_INET +int nx_addr_conflict(struct nx_info *, uint32_t, struct sock *); +#else /* CONFIG_INET */ +static inline +int nx_addr_conflict(struct nx_info *n, uint32_t a, struct sock *s) +{ + return 1; +} +#endif /* CONFIG_INET */ + +#endif /* __KERNEL__ */ +#else /* _VX_NETWORK_H */ +#warning duplicate inclusion +#endif /* _VX_NETWORK_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/network_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/network_cmd.h --- linux-2.6.19.1/include/linux/vserver/network_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/network_cmd.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,89 @@ +#ifndef _VX_NETWORK_CMD_H +#define _VX_NETWORK_CMD_H + + +/* vinfo commands */ + +#define VCMD_task_nid VC_CMD(VINFO, 2, 0) + +#ifdef __KERNEL__ +extern int vc_task_nid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_nx_info VC_CMD(VINFO, 6, 0) + +struct vcmd_nx_info_v0 { + uint32_t nid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_nx_info(struct nx_info *, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_net_create_v0 VC_CMD(VNET, 1, 0) +#define VCMD_net_create VC_CMD(VNET, 1, 1) + +struct vcmd_net_create { + uint64_t flagword; +}; + +#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0) + +#define VCMD_net_add VC_CMD(NETALT, 1, 0) +#define VCMD_net_remove VC_CMD(NETALT, 2, 0) + +struct vcmd_net_addr_v0 { + uint16_t type; + uint16_t count; + uint32_t ip[4]; + uint32_t mask[4]; + /* more to come */ +}; + + +#ifdef __KERNEL__ +extern int vc_net_create(uint32_t, void __user *); +extern int vc_net_migrate(struct nx_info *, void __user *); + +extern int vc_net_add(struct nx_info *, void __user *); +extern int vc_net_remove(struct nx_info *, void __user *); + +#endif /* __KERNEL__ */ + + +/* flag commands */ + +#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0) +#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0) + +struct vcmd_net_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_nflags(struct nx_info *, void __user *); +extern int vc_set_nflags(struct nx_info *, void __user *); + +#endif /* __KERNEL__ */ + + +/* network caps commands */ + +#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0) +#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0) + +struct vcmd_net_caps_v0 { + uint64_t ncaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ncaps(struct nx_info *, void __user *); +extern int vc_set_ncaps(struct nx_info *, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CONTEXT_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/sched.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/sched.h --- linux-2.6.19.1/include/linux/vserver/sched.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/sched.h 2006-11-08 04:57:45 +0100 @@ -0,0 +1,26 @@ +#ifndef _VX_SCHED_H +#define _VX_SCHED_H + + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *, struct timespec *); + + +struct vx_info; + +void vx_update_load(struct vx_info *); + + +int vx_tokens_recalc(struct _vx_sched_pc *, + unsigned long *, unsigned long *, int [2]); + +void vx_update_sched_param(struct _vx_sched *sched, + struct _vx_sched_pc *sched_pc); + +#endif /* __KERNEL__ */ +#else /* _VX_SCHED_H */ +#warning duplicate inclusion +#endif /* _VX_SCHED_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/sched_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/sched_cmd.h --- linux-2.6.19.1/include/linux/vserver/sched_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/sched_cmd.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,72 @@ +#ifndef _VX_SCHED_CMD_H +#define _VX_SCHED_CMD_H + + +/* sched vserver commands */ + +#define VCMD_set_sched_v2 VC_CMD(SCHED, 1, 2) +#define VCMD_set_sched_v3 VC_CMD(SCHED, 1, 3) +#define VCMD_set_sched VC_CMD(SCHED, 1, 4) + +struct vcmd_set_sched_v2 { + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + uint64_t cpu_mask; +}; + +struct vcmd_set_sched_v3 { + uint32_t set_mask; + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + int32_t priority_bias; +}; + +struct vcmd_set_sched_v4 { + uint32_t set_mask; + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + int32_t prio_bias; + int32_t cpu_id; + int32_t bucket_id; +}; + + +#define VXSM_FILL_RATE 0x0001 +#define VXSM_INTERVAL 0x0002 +#define VXSM_FILL_RATE2 0x0004 +#define VXSM_INTERVAL2 0x0008 +#define VXSM_TOKENS 0x0010 +#define VXSM_TOKENS_MIN 0x0020 +#define VXSM_TOKENS_MAX 0x0040 +#define VXSM_PRIO_BIAS 0x0100 + +#define VXSM_IDLE_TIME 0x0200 +#define VXSM_FORCE 0x0400 + +#define VXSM_V3_MASK 0x0173 +#define VXSM_SET_MASK 0x01FF + +#define VXSM_CPU_ID 0x1000 +#define VXSM_BUCKET_ID 0x2000 + +#define SCHED_KEEP (-2) /* only for v2 */ + +#ifdef __KERNEL__ + +#include + +extern int vc_set_sched_v2(struct vx_info *, void __user *); +extern int vc_set_sched_v3(struct vx_info *, void __user *); +extern int vc_set_sched(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_SCHED_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/sched_def.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/sched_def.h --- linux-2.6.19.1/include/linux/vserver/sched_def.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/sched_def.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,67 @@ +#ifndef _VX_SCHED_DEF_H +#define _VX_SCHED_DEF_H + +#include +#include +#include +#include +#include + + +/* context sub struct */ + +struct _vx_sched { + spinlock_t tokens_lock; /* lock for token bucket */ + + int tokens; /* number of CPU tokens */ + int fill_rate[2]; /* Fill rate: add X tokens... */ + int interval[2]; /* Divisor: per Y jiffies */ + int tokens_min; /* Limit: minimum for unhold */ + int tokens_max; /* Limit: no more than N tokens */ + + unsigned update_mask; /* which features should be updated */ + cpumask_t update; /* CPUs which should update */ + + int prio_bias; /* bias offset for priority */ + int vavavoom; /* last calculated vavavoom */ +}; + +struct _vx_sched_pc { + int tokens; /* number of CPU tokens */ + int flags; /* bucket flags */ + + int fill_rate[2]; /* Fill rate: add X tokens... */ + int interval[2]; /* Divisor: per Y jiffies */ + int tokens_min; /* Limit: minimum for unhold */ + int tokens_max; /* Limit: no more than N tokens */ + + unsigned long norm_time; /* last time accounted */ + unsigned long idle_time; /* non linear time for fair sched */ + unsigned long token_time; /* token time for accounting */ + unsigned long onhold; /* jiffies when put on hold */ + + uint64_t user_ticks; /* token tick events */ + uint64_t sys_ticks; /* token tick events */ + uint64_t hold_ticks; /* token ticks paused */ +}; + + +#define VXSF_ONHOLD 0x0001 +#define VXSF_IDLE_TIME 0x0100 + +#ifdef CONFIG_VSERVER_DEBUG + +static inline void __dump_vx_sched(struct _vx_sched *sched) +{ + printk("\t_vx_sched:\n"); + printk("\t tokens: %4d/%4d, %4d/%4d, %4d, %4d\n", + sched->fill_rate[0], sched->interval[0], + sched->fill_rate[1], sched->interval[1], + sched->tokens_min, sched->tokens_max); + printk("\t priority = %4d, %4d\n", + sched->prio_bias, sched->vavavoom); +} + +#endif + +#endif /* _VX_SCHED_DEF_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/signal.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/signal.h --- linux-2.6.19.1/include/linux/vserver/signal.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/signal.h 2006-11-08 04:57:45 +0100 @@ -0,0 +1,14 @@ +#ifndef _VX_SIGNAL_H +#define _VX_SIGNAL_H + + +#ifdef __KERNEL__ + +struct vx_info; + +int vx_info_kill(struct vx_info *, int, int); + +#endif /* __KERNEL__ */ +#else /* _VX_SIGNAL_H */ +#warning duplicate inclusion +#endif /* _VX_SIGNAL_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/signal_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/signal_cmd.h --- linux-2.6.19.1/include/linux/vserver/signal_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/signal_cmd.h 2006-12-05 17:36:09 +0100 @@ -0,0 +1,43 @@ +#ifndef _VX_SIGNAL_CMD_H +#define _VX_SIGNAL_CMD_H + + +/* signalling vserver commands */ + +#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0) +#define VCMD_wait_exit VC_CMD(EVENT, 99, 0) + +struct vcmd_ctx_kill_v0 { + int32_t pid; + int32_t sig; +}; + +struct vcmd_wait_exit_v0 { + int32_t reboot_cmd; + int32_t exit_code; +}; + +#ifdef __KERNEL__ + +extern int vc_ctx_kill(struct vx_info *, void __user *); +extern int vc_wait_exit(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ + +/* process alteration commands */ + +#define VCMD_get_pflags VC_CMD(PROCALT, 5, 0) +#define VCMD_set_pflags VC_CMD(PROCALT, 6, 0) + +struct vcmd_pflags_v0 { + uint32_t flagword; + uint32_t mask; +}; + +#ifdef __KERNEL__ + +extern int vc_get_pflags(uint32_t pid, void __user *); +extern int vc_set_pflags(uint32_t pid, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_SIGNAL_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/space.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/space.h --- linux-2.6.19.1/include/linux/vserver/space.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/space.h 2006-12-05 18:57:02 +0100 @@ -0,0 +1,13 @@ +#ifndef _VX_SPACE_H +#define _VX_SPACE_H + + +#include + +struct vx_info; + +int vx_set_space(struct vx_info *vxi, unsigned long mask); + +#else /* _VX_SPACE_H */ +#warning duplicate inclusion +#endif /* _VX_SPACE_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/space_cmd.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/space_cmd.h --- linux-2.6.19.1/include/linux/vserver/space_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/space_cmd.h 2006-12-05 18:14:49 +0100 @@ -0,0 +1,26 @@ +#ifndef _VX_SPACE_CMD_H +#define _VX_SPACE_CMD_H + + +#define VCMD_enter_space_v0 VC_CMD(PROCALT, 1, 0) +#define VCMD_enter_space VC_CMD(PROCALT, 1, 1) + +#define VCMD_set_space_v0 VC_CMD(PROCALT, 3, 0) +#define VCMD_set_space VC_CMD(PROCALT, 3, 1) + +#define VCMD_get_space_mask VC_CMD(PROCALT, 4, 0) + + +struct vcmd_space_mask { + uint64_t mask; +}; + + +#ifdef __KERNEL__ + +extern int vc_enter_space(struct vx_info *, void __user *); +extern int vc_set_space(struct vx_info *, void __user *); +extern int vc_get_space_mask(struct vx_info *, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_SPACE_CMD_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/switch.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/switch.h --- linux-2.6.19.1/include/linux/vserver/switch.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/switch.h 2006-11-08 04:57:42 +0100 @@ -0,0 +1,100 @@ +#ifndef _VX_SWITCH_H +#define _VX_SWITCH_H + +#include + + +#define VC_CATEGORY(c) (((c) >> 24) & 0x3F) +#define VC_COMMAND(c) (((c) >> 16) & 0xFF) +#define VC_VERSION(c) ((c) & 0xFFF) + +#define VC_CMD(c,i,v) ((((VC_CAT_ ## c) & 0x3F) << 24) \ + | (((i) & 0xFF) << 16) | ((v) & 0xFFF)) + +/* + + Syscall Matrix V2.8 + + |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL| + |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | | + |INFO |SETUP | |MOVE | | | | | | + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICES| | + HOST | 00| 01| 02| 03| 04| 05| | 06| 07| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | | + PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + MEMORY | | | | | | | |SWAP | | + | 16| 17| 18| 19| 20| 21| | 22| 23| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | | + | 24| 25| 26| 27| 28| 29| | 30| 31| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + DISK | | | | |DLIMIT | | |INODE | | + VFS | 32| 33| 34| 35| 36| 37| | 38| 39| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + OTHER |VSTAT | | | | | | |VINFO | | + | 40| 41| 42| 43| 44| 45| | 46| 47| + =======+=======+=======+=======+=======+=======+=======+ +=======+=======+ + SPECIAL|EVENT | | | |FLAGS | | | | | + | 48| 49| 50| 51| 52| 53| | 54| 55| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT | + | 56| 57| 58| 59| 60|TEST 61| | 62| 63| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + +*/ + +#define VC_CAT_VERSION 0 + +#define VC_CAT_VSETUP 1 +#define VC_CAT_VHOST 2 + +#define VC_CAT_VPROC 9 +#define VC_CAT_PROCALT 10 +#define VC_CAT_PROCMIG 11 +#define VC_CAT_PROCTRL 12 + +#define VC_CAT_SCHED 14 + +#define VC_CAT_VNET 25 +#define VC_CAT_NETALT 26 +#define VC_CAT_NETMIG 27 +#define VC_CAT_NETCTRL 28 + +#define VC_CAT_DLIMIT 36 +#define VC_CAT_INODE 38 + +#define VC_CAT_VSTAT 40 +#define VC_CAT_VINFO 46 +#define VC_CAT_EVENT 48 + +#define VC_CAT_FLAGS 52 +#define VC_CAT_DEBUG 56 +#define VC_CAT_RLIMIT 60 + +#define VC_CAT_SYSTEST 61 +#define VC_CAT_COMPAT 63 + +/* interface version */ + +#define VCI_VERSION 0x00020102 +#define VCI_LEGACY_VERSION 0x000100FF + +/* query version */ + +#define VCMD_get_version VC_CMD(VERSION, 0, 0) +#define VCMD_get_vci VC_CMD(VERSION, 1, 0) + + +#ifdef __KERNEL__ + +#include + + +#else /* __KERNEL__ */ +#define __user +#endif /* __KERNEL__ */ + +#endif /* _VX_SWITCH_H */ diff -NurpP --minimal linux-2.6.19.1/include/linux/vserver/tag.h linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/tag.h --- linux-2.6.19.1/include/linux/vserver/tag.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/linux/vserver/tag.h 2006-11-08 04:57:46 +0100 @@ -0,0 +1,153 @@ +#ifndef _DX_TAG_H +#define _DX_TAG_H + + +#define DX_TAG(in) (IS_TAGGED(in)) + + +#ifdef CONFIG_DX_TAG_NFSD +#define DX_TAG_NFSD 1 +#else +#define DX_TAG_NFSD 0 +#endif + + +#ifdef CONFIG_TAGGING_NONE + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOTAG_TAG(cond, uid, gid, tag) (0) + +#define TAGINO_UID(cond, uid, tag) (uid) +#define TAGINO_GID(cond, gid, tag) (gid) + +#endif + + +#ifdef CONFIG_TAGGING_GID16 + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0x0000FFFF + +#define INOTAG_TAG(cond, uid, gid, tag) \ + ((cond) ? (((gid) >> 16) & 0xFFFF) : 0) + +#define TAGINO_UID(cond, uid, tag) (uid) +#define TAGINO_GID(cond, gid, tag) \ + ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid)) + +#endif + + +#ifdef CONFIG_TAGGING_ID24 + +#define MAX_UID 0x00FFFFFF +#define MAX_GID 0x00FFFFFF + +#define INOTAG_TAG(cond, uid, gid, tag) \ + ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0) + +#define TAGINO_UID(cond, uid, tag) \ + ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid)) +#define TAGINO_GID(cond, gid, tag) \ + ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid)) + +#endif + + +#ifdef CONFIG_TAGGING_UID16 + +#define MAX_UID 0x0000FFFF +#define MAX_GID 0xFFFFFFFF + +#define INOTAG_TAG(cond, uid, gid, tag) \ + ((cond) ? (((uid) >> 16) & 0xFFFF) : 0) + +#define TAGINO_UID(cond, uid, tag) \ + ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid)) +#define TAGINO_GID(cond, gid, tag) (gid) + +#endif + + +#ifdef CONFIG_TAGGING_INTERN + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOTAG_TAG(cond, uid, gid, tag) \ + ((cond) ? (tag) : 0) + +#define TAGINO_UID(cond, uid, tag) (uid) +#define TAGINO_GID(cond, gid, tag) (gid) + +#endif + + +#ifdef CONFIG_TAGGING_RUNTIME + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOTAG_TAG(cond, uid, gid, tag) (0) + +#define TAGINO_UID(cond, uid, tag) (uid) +#define TAGINO_GID(cond, gid, tag) (gid) + +#endif + + +#ifndef CONFIG_TAGGING_NONE +#define dx_current_fstag(sb) \ + ((sb)->s_flags & MS_TAGGED ? dx_current_tag(): 0) +#else +#define dx_current_fstag(sb) (0) +#endif + +#ifndef CONFIG_TAGGING_INTERN +#define TAGINO_TAG(cond, tag) (0) +#else +#define TAGINO_TAG(cond, tag) ((cond) ? (tag) : 0) +#endif + +#define INOTAG_UID(cond, uid, gid) \ + ((cond) ? ((uid) & MAX_UID) : (uid)) +#define INOTAG_GID(cond, uid, gid) \ + ((cond) ? ((gid) & MAX_GID) : (gid)) + + +static inline uid_t dx_map_uid(uid_t uid) +{ + if ((uid > MAX_UID) && (uid != -1)) + uid = -2; + return (uid & MAX_UID); +} + +static inline gid_t dx_map_gid(gid_t gid) +{ + if ((gid > MAX_GID) && (gid != -1)) + gid = -2; + return (gid & MAX_GID); +} + + +#ifdef CONFIG_VSERVER_LEGACY +#define FIOC_GETTAG _IOR('x', 1, long) +#define FIOC_SETTAG _IOW('x', 2, long) +#define FIOC_SETTAGJ _IOW('x', 3, long) +#endif + +#ifdef CONFIG_PROPAGATE + +int dx_parse_tag(char *string, tag_t *tag, int remove); + +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode); + +#define dx_propagate_tag(n,i) __dx_propagate_tag(n,i) + +#else +#define dx_propagate_tag(n,i) do { } while (0) +#endif + +#endif /* _DX_TAG_H */ diff -NurpP --minimal linux-2.6.19.1/include/net/af_unix.h linux-2.6.19.1-vs2.2.0-rc6/include/net/af_unix.h --- linux-2.6.19.1/include/net/af_unix.h 2006-09-20 16:58:44 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/include/net/af_unix.h 2006-12-04 05:06:17 +0100 @@ -4,6 +4,7 @@ #include #include #include +#include #include extern void unix_inflight(struct file *fp); @@ -17,9 +18,9 @@ extern spinlock_t unix_table_lock; extern atomic_t unix_tot_inflight; -static inline struct sock *first_unix_socket(int *i) +static inline struct sock *next_unix_socket_table(int *i) { - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { + for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { if (!hlist_empty(&unix_socket_table[*i])) return __sk_head(&unix_socket_table[*i]); } @@ -28,16 +29,19 @@ static inline struct sock *first_unix_so static inline struct sock *next_unix_socket(int *i, struct sock *s) { - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; + do { + if (s) + s = sk_next(s); + if (!s) + s = next_unix_socket_table(i); + } while (s && !nx_check(s->sk_nid, VS_WATCH_P|VS_IDENT)); + return s; +} + +static inline struct sock *first_unix_socket(int *i) +{ + *i = 0; + return next_unix_socket(i, NULL); } #define forall_unix_sockets(i, s) \ diff -NurpP --minimal linux-2.6.19.1/include/net/inet_hashtables.h linux-2.6.19.1-vs2.2.0-rc6/include/net/inet_hashtables.h --- linux-2.6.19.1/include/net/inet_hashtables.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/net/inet_hashtables.h 2006-11-08 04:57:42 +0100 @@ -271,6 +271,26 @@ static inline int inet_iif(const struct return ((struct rtable *)skb->dst)->rt_iif; } +/* + * Check if a given address matches for an inet socket + * + * nxi: the socket's nx_info if any + * addr: to be verified address + * saddr: socket addresses + */ +static inline int inet_addr_match ( + struct nx_info *nxi, + uint32_t addr, + uint32_t saddr) +{ + if (addr && (saddr == addr)) + return 1; + if (!saddr) + return addr_in_nx_info(nxi, addr); + return 0; +} + + extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, diff -NurpP --minimal linux-2.6.19.1/include/net/inet_sock.h linux-2.6.19.1-vs2.2.0-rc6/include/net/inet_sock.h --- linux-2.6.19.1/include/net/inet_sock.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/net/inet_sock.h 2006-11-08 04:57:42 +0100 @@ -112,6 +112,7 @@ struct inet_sock { /* Socket demultiplex comparisons on incoming packets. */ __be32 daddr; __be32 rcv_saddr; + __be32 rcv_saddr2; /* Second bound ipv4 addr, for ipv4root */ __be16 dport; __u16 num; __be32 saddr; diff -NurpP --minimal linux-2.6.19.1/include/net/inet_timewait_sock.h linux-2.6.19.1-vs2.2.0-rc6/include/net/inet_timewait_sock.h --- linux-2.6.19.1/include/net/inet_timewait_sock.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/net/inet_timewait_sock.h 2006-11-08 04:57:42 +0100 @@ -115,6 +115,10 @@ struct inet_timewait_sock { #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot +#define tw_xid __tw_common.skc_xid +#define tw_vx_info __tw_common.skc_vx_info +#define tw_nid __tw_common.skc_nid +#define tw_nx_info __tw_common.skc_nx_info volatile unsigned char tw_substate; /* 3 bits hole, try to pack */ unsigned char tw_rcv_wscale; diff -NurpP --minimal linux-2.6.19.1/include/net/route.h linux-2.6.19.1-vs2.2.0-rc6/include/net/route.h --- linux-2.6.19.1/include/net/route.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/net/route.h 2006-12-02 02:59:41 +0100 @@ -27,12 +27,16 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -144,6 +148,59 @@ static inline char rt_tos2priority(u8 to return ip_tos2prio[IPTOS_TOS(tos)>>1]; } +#define IPI_LOOPBACK htonl(INADDR_LOOPBACK) + +static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl) +{ + int err; + int i, n = nxi->nbipv4; + u32 ipv4root = nxi->ipv4[0]; + + if (ipv4root == 0) + return 0; + + if (fl->fl4_src == 0) { + if (n > 1) { + u32 foundsrc; + + err = __ip_route_output_key(rp, fl); + if (err) { + fl->fl4_src = ipv4root; + err = __ip_route_output_key(rp, fl); + } + if (err) + return err; + + foundsrc = (*rp)->rt_src; + ip_rt_put(*rp); + + for (i=0; imask[i]; + u32 ipv4 = nxi->ipv4[i]; + u32 net4 = ipv4 & mask; + + if (foundsrc == ipv4) { + fl->fl4_src = ipv4; + break; + } + if (!fl->fl4_src && (foundsrc & mask) == net4) + fl->fl4_src = ipv4; + } + } + if (fl->fl4_src == 0) + fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK) + ? IPI_LOOPBACK : ipv4root; + } else { + for (i=0; iipv4[i] == fl->fl4_src) + break; + } + if (i == n) + return -EPERM; + } + return 0; +} + static inline int ip_route_connect(struct rtable **rp, __be32 dst, __be32 src, u32 tos, int oif, u8 protocol, __be16 sport, __be16 dport, struct sock *sk) @@ -158,7 +215,27 @@ static inline int ip_route_connect(struc .dport = dport } } }; int err; - if (!dst || !src) { + struct nx_info *nx_info = current->nx_info; + + if (sk) + nx_info = sk->sk_nx_info; + vxdprintk(VXD_CBIT(net, 4), + "ip_route_connect(%p) %p,%p;%lx", + sk, nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (nx_info) { + err = ip_find_src(nx_info, rp, &fl); + if (err) + return err; + if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VS_ADMIN)) + fl.fl4_dst = nx_info->ipv4[0]; +#ifdef CONFIG_VSERVER_REMAP_SADDR + if (fl.fl4_src == IPI_LOOPBACK && !vx_check(0, VS_ADMIN)) + fl.fl4_src = nx_info->ipv4[0]; +#endif + } + if (!fl.fl4_dst || !fl.fl4_src) { err = __ip_route_output_key(rp, &fl); if (err) return err; diff -NurpP --minimal linux-2.6.19.1/include/net/sock.h linux-2.6.19.1-vs2.2.0-rc6/include/net/sock.h --- linux-2.6.19.1/include/net/sock.h 2006-11-30 21:19:41 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/include/net/sock.h 2006-11-30 20:55:45 +0100 @@ -118,6 +118,10 @@ struct sock_common { atomic_t skc_refcnt; unsigned int skc_hash; struct proto *skc_prot; + xid_t skc_xid; + struct vx_info *skc_vx_info; + nid_t skc_nid; + struct nx_info *skc_nx_info; }; /** @@ -194,6 +198,10 @@ struct sock { #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot +#define sk_xid __sk_common.skc_xid +#define sk_vx_info __sk_common.skc_vx_info +#define sk_nid __sk_common.skc_nid +#define sk_nx_info __sk_common.skc_nx_info unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; diff -NurpP --minimal linux-2.6.19.1/init/Makefile linux-2.6.19.1-vs2.2.0-rc6/init/Makefile --- linux-2.6.19.1/init/Makefile 2006-09-20 16:58:44 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/init/Makefile 2006-12-06 21:03:02 +0100 @@ -15,6 +15,7 @@ clean-files := ../include/linux/compile. # dependencies on generated files need to be listed explicitly +$(obj)/main.o: include/linux/compile.h $(obj)/version.o: include/linux/compile.h # compile.h changes depending on hostname, generation number, etc, diff -NurpP --minimal linux-2.6.19.1/init/main.c linux-2.6.19.1-vs2.2.0-rc6/init/main.c --- linux-2.6.19.1/init/main.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/init/main.c 2006-12-05 18:20:49 +0100 @@ -49,6 +49,8 @@ #include #include #include +#include +#include #include #include @@ -501,7 +503,7 @@ asmlinkage void __init start_kernel(void boot_cpu_init(); page_address_init(); printk(KERN_NOTICE); - printk(linux_banner); + printk(linux_banner, UTS_RELEASE, UTS_VERSION); setup_arch(&command_line); unwind_setup(); setup_per_cpu_areas(); diff -NurpP --minimal linux-2.6.19.1/init/version.c linux-2.6.19.1-vs2.2.0-rc6/init/version.c --- linux-2.6.19.1/init/version.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/init/version.c 2006-12-04 07:14:19 +0100 @@ -35,5 +35,6 @@ struct uts_namespace init_uts_ns = { EXPORT_SYMBOL_GPL(init_uts_ns); const char linux_banner[] = - "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" - LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; + "Linux version %s (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") %s\n"; + diff -NurpP --minimal linux-2.6.19.1/ipc/mqueue.c linux-2.6.19.1-vs2.2.0-rc6/ipc/mqueue.c --- linux-2.6.19.1/ipc/mqueue.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/ipc/mqueue.c 2006-11-08 04:57:52 +0100 @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include "util.h" @@ -151,17 +153,20 @@ static struct inode *mqueue_get_inode(st spin_lock(&mq_lock); if (u->mq_bytes + mq_bytes < u->mq_bytes || u->mq_bytes + mq_bytes > - p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) { + p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur || + !vx_ipcmsg_avail(p->vx_info, mq_bytes)) { spin_unlock(&mq_lock); goto out_inode; } u->mq_bytes += mq_bytes; + vx_ipcmsg_add(p->vx_info, u, mq_bytes); spin_unlock(&mq_lock); info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); if (!info->messages) { spin_lock(&mq_lock); u->mq_bytes -= mq_bytes; + vx_ipcmsg_sub(p->vx_info, u, mq_bytes); spin_unlock(&mq_lock); goto out_inode; } @@ -259,10 +264,14 @@ static void mqueue_delete_inode(struct i (info->attr.mq_maxmsg * info->attr.mq_msgsize)); user = info->user; if (user) { + struct vx_info *vxi = lookup_vx_info(user->xid); + spin_lock(&mq_lock); user->mq_bytes -= mq_bytes; + vx_ipcmsg_sub(vxi, user, mq_bytes); queues_count--; spin_unlock(&mq_lock); + put_vx_info(vxi); free_uid(user); } } @@ -747,7 +756,7 @@ asmlinkage long sys_mq_unlink(const char if (inode) atomic_inc(&inode->i_count); - err = vfs_unlink(dentry->d_parent->d_inode, dentry); + err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL); out_err: dput(dentry); diff -NurpP --minimal linux-2.6.19.1/ipc/msg.c linux-2.6.19.1-vs2.2.0-rc6/ipc/msg.c --- linux-2.6.19.1/ipc/msg.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/ipc/msg.c 2006-11-30 18:53:18 +0100 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -149,6 +150,7 @@ static int newque (struct ipc_namespace msq->q_perm.mode = msgflg & S_IRWXUGO; msq->q_perm.key = key; + msq->q_perm.xid = vx_current_xid(); msq->q_perm.security = NULL; retval = security_msg_queue_alloc(msq); @@ -903,6 +905,9 @@ static int sysvipc_msg_proc_show(struct { struct msg_queue *msq = it; + if (!vx_check(msq->q_perm.xid, VS_WATCH_P|VS_IDENT)) + return 0; + return seq_printf(s, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", msq->q_perm.key, diff -NurpP --minimal linux-2.6.19.1/ipc/sem.c linux-2.6.19.1-vs2.2.0-rc6/ipc/sem.c --- linux-2.6.19.1/ipc/sem.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/ipc/sem.c 2006-11-30 18:53:18 +0100 @@ -83,6 +83,8 @@ #include #include #include +#include +#include #include #include "util.h" @@ -230,6 +232,7 @@ static int newary (struct ipc_namespace sma->sem_perm.mode = (semflg & S_IRWXUGO); sma->sem_perm.key = key; + sma->sem_perm.xid = vx_current_xid(); sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); @@ -245,6 +248,9 @@ static int newary (struct ipc_namespace return -ENOSPC; } ns->used_sems += nsems; + /* FIXME: obsoleted? */ + vx_semary_inc(sma); + vx_nsems_add(sma, nsems); sma->sem_id = sem_buildid(ns, id, sma->sem_perm.seq); sma->sem_base = (struct sem *) &sma[1]; @@ -526,6 +532,9 @@ static void freeary (struct ipc_namespac sem_unlock(sma); ns->used_sems -= sma->sem_nsems; + /* FIXME: obsoleted? */ + vx_nsems_sub(sma, sma->sem_nsems); + vx_semary_dec(sma); size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem); security_sem_free(sma); ipc_rcu_putref(sma); @@ -1403,6 +1412,9 @@ static int sysvipc_sem_proc_show(struct { struct sem_array *sma = it; + if (!vx_check(sma->sem_perm.xid, VS_WATCH_P|VS_IDENT)) + return 0; + return seq_printf(s, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", sma->sem_perm.key, diff -NurpP --minimal linux-2.6.19.1/ipc/shm.c linux-2.6.19.1-vs2.2.0-rc6/ipc/shm.c --- linux-2.6.19.1/ipc/shm.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/ipc/shm.c 2006-11-30 18:53:18 +0100 @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include @@ -181,7 +183,12 @@ static void shm_open(struct vm_area_stru */ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) { - ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid); + int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + + vx_ipcshm_sub(vxi, shp, numpages); + ns->shm_tot -= numpages; + shm_rmid(ns, shp->id); shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) @@ -191,6 +198,7 @@ static void shm_destroy(struct ipc_names shp->mlock_user); fput (shp->shm_file); security_shm_free(shp); + put_vx_info(vxi); ipc_rcu_putref(shp); } @@ -282,11 +290,15 @@ static int newseg (struct ipc_namespace if (ns->shm_tot + numpages >= ns->shm_ctlall) return -ENOSPC; + if (!vx_ipcshm_avail(current->vx_info, numpages)) + return -ENOSPC; + shp = ipc_rcu_alloc(sizeof(*shp)); if (!shp) return -ENOMEM; shp->shm_perm.key = key; + shp->shm_perm.xid = vx_current_xid(); shp->shm_perm.mode = (shmflg & S_IRWXUGO); shp->mlock_user = NULL; @@ -339,6 +351,7 @@ static int newseg (struct ipc_namespace file->f_op = &shm_file_operations; ns->shm_tot += numpages; + vx_ipcshm_add(current->vx_info, key, numpages); shm_unlock(shp); return shp->id; @@ -993,6 +1006,9 @@ static int sysvipc_shm_proc_show(struct #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" + if (!vx_check(shp->shm_perm.xid, VS_WATCH_P|VS_IDENT)) + return 0; + if (sizeof(size_t) <= sizeof(int)) format = SMALL_STRING; else diff -NurpP --minimal linux-2.6.19.1/ipc/util.c linux-2.6.19.1-vs2.2.0-rc6/ipc/util.c --- linux-2.6.19.1/ipc/util.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/ipc/util.c 2006-11-30 18:53:18 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -261,7 +262,9 @@ int ipc_findkey(struct ipc_ids* ids, key */ for (id = 0; id <= max_id; id++) { p = ids->entries->p[id]; - if(p==NULL) + if (p==NULL) + continue; + if (!vx_check(p->xid, VS_WATCH_P|VS_IDENT)) continue; if (key == p->key) return id; @@ -574,6 +577,9 @@ int ipcperms (struct kern_ipc_perm *ipcp if (unlikely((err = audit_ipc_obj(ipcp)))) return err; + + if (!vx_check(ipcp->xid, VS_WATCH_P|VS_IDENT)) /* maybe just VS_IDENT? */ + return -1; requested_mode = (flag >> 6) | (flag >> 3) | flag; granted_mode = ipcp->mode; if (current->euid == ipcp->cuid || current->euid == ipcp->uid) diff -NurpP --minimal linux-2.6.19.1/kernel/Makefile linux-2.6.19.1-vs2.2.0-rc6/kernel/Makefile --- linux-2.6.19.1/kernel/Makefile 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/Makefile 2006-11-08 23:23:13 +0100 @@ -10,6 +10,8 @@ obj-y = sched.o fork.o exec_domain.o kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o +obj-y += vserver/ + obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o diff -NurpP --minimal linux-2.6.19.1/kernel/capability.c linux-2.6.19.1-vs2.2.0-rc6/kernel/capability.c --- linux-2.6.19.1/kernel/capability.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/capability.c 2006-11-30 19:37:57 +0100 @@ -12,6 +12,7 @@ #include #include #include +#include #include unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ @@ -244,8 +245,12 @@ int __capable(struct task_struct *t, int } EXPORT_SYMBOL(__capable); +#include int capable(int cap) { + /* here for now so we don't require task locking */ + if (vs_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap)) + return 0; return __capable(current, cap); } EXPORT_SYMBOL(capable); diff -NurpP --minimal linux-2.6.19.1/kernel/compat.c linux-2.6.19.1-vs2.2.0-rc6/kernel/compat.c --- linux-2.6.19.1/kernel/compat.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/compat.c 2006-11-08 21:52:09 +0100 @@ -846,7 +846,7 @@ asmlinkage long compat_sys_time(compat_t compat_time_t i; struct timeval tv; - do_gettimeofday(&tv); + vx_gettimeofday(&tv); i = tv.tv_sec; if (tloc) { @@ -870,7 +870,7 @@ asmlinkage long compat_sys_stime(compat_ if (err) return err; - do_settimeofday(&tv); + vx_settimeofday(&tv); return 0; } diff -NurpP --minimal linux-2.6.19.1/kernel/exit.c linux-2.6.19.1-vs2.2.0-rc6/kernel/exit.c --- linux-2.6.19.1/kernel/exit.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/exit.c 2006-11-08 04:57:53 +0100 @@ -41,6 +41,9 @@ #include /* for audit_free() */ #include #include +#include +#include +#include #include #include @@ -437,9 +440,11 @@ static void close_files(struct files_str struct file * file = xchg(&fdt->fd[i], NULL); if (file) filp_close(file, files); + vx_openfd_dec(i); } i++; set >>= 1; + cond_resched(); } } } @@ -592,6 +597,11 @@ static void exit_mm(struct task_struct * static inline void choose_new_parent(struct task_struct *p, struct task_struct *reaper) { + /* check for reaper context */ + vxwprintk((p->xid != reaper->xid) && (reaper != child_reaper), + "rogue reaper: %p[%d,#%u] <> %p[%d,#%u]", + p, p->pid, p->xid, reaper, reaper->pid, reaper->xid); + /* * Make sure we're not reparenting to ourselves and that * the parent is not a zombie. @@ -674,7 +684,7 @@ forget_original_parent(struct task_struc do { reaper = next_thread(reaper); if (reaper == father) { - reaper = child_reaper; + reaper = vx_child_reaper(father); break; } } while (reaper->exit_state); @@ -698,7 +708,7 @@ forget_original_parent(struct task_struc if (father == p->real_parent) { /* reparent with a reaper, real father it's us */ - choose_new_parent(p, reaper); + choose_new_parent(p, vx_child_reaper(p)); reparent_thread(p, father, 0); } else { /* reparent ptraced task to its real parent */ @@ -934,6 +944,8 @@ fastcall NORET_TYPE void do_exit(long co tsk->exit_code = code; proc_exit_connector(tsk); + /* needs to stay before exit_notify() */ + exit_vx_info_early(tsk, code); exit_notify(tsk); exit_task_namespaces(tsk); #ifdef CONFIG_NUMA @@ -959,6 +971,10 @@ fastcall NORET_TYPE void do_exit(long co if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); + /* needs to stay after exit_notify() */ + exit_vx_info(tsk, code); + exit_nx_info(tsk); + preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; diff -NurpP --minimal linux-2.6.19.1/kernel/fork.c linux-2.6.19.1-vs2.2.0-rc6/kernel/fork.c --- linux-2.6.19.1/kernel/fork.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/fork.c 2006-11-30 20:55:45 +0100 @@ -48,6 +48,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -107,6 +111,8 @@ void free_task(struct task_struct *tsk) { free_thread_info(tsk->thread_info); rt_mutex_debug_task_free(tsk); + clr_vx_info(&tsk->vx_info); + clr_nx_info(&tsk->nx_info); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -214,6 +220,8 @@ static inline int dup_mmap(struct mm_str mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; + __set_mm_counter(mm, file_rss, 0); + __set_mm_counter(mm, anon_rss, 0); cpus_clear(mm->cpu_vm_mask); mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; @@ -225,7 +233,7 @@ static inline int dup_mmap(struct mm_str if (mpnt->vm_flags & VM_DONTCOPY) { long pages = vma_pages(mpnt); - mm->total_vm -= pages; + vx_vmpages_sub(mm, pages); vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, -pages); continue; @@ -332,8 +340,6 @@ static struct mm_struct * mm_init(struct INIT_LIST_HEAD(&mm->mmlist); mm->core_waiters = 0; mm->nr_ptes = 0; - set_mm_counter(mm, file_rss, 0); - set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); rwlock_init(&mm->ioctx_list_lock); mm->ioctx_list = NULL; @@ -342,6 +348,7 @@ static struct mm_struct * mm_init(struct if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + set_vx_info(&mm->mm_vx_info, current->vx_info); return mm; } free_mm(mm); @@ -373,6 +380,7 @@ void fastcall __mmdrop(struct mm_struct BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); + clr_vx_info(&mm->mm_vx_info); free_mm(mm); } @@ -478,6 +486,7 @@ static struct mm_struct *dup_mm(struct t goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); + mm->mm_vx_info = NULL; if (!mm_init(mm)) goto fail_nomem; @@ -505,6 +514,7 @@ fail_nocontext: * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() */ + clr_vx_info(&mm->mm_vx_info); mm_free_pgd(mm); free_mm(mm); return NULL; @@ -700,6 +710,8 @@ static struct files_struct *dup_fd(struc struct file *f = *old_fds++; if (f) { get_file(f); + /* FIXME: sum it first for check and performance */ + vx_openfd_inc(open_files - i); } else { /* * The fd may be claimed in the fd bitmap but not yet @@ -955,6 +967,8 @@ static struct task_struct *copy_process( { int retval; struct task_struct *p = NULL; + struct vx_info *vxi; + struct nx_info *nxi; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -989,12 +1003,30 @@ static struct task_struct *copy_process( DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + init_vx_info(&p->vx_info, current->vx_info); + init_nx_info(&p->nx_info, current->nx_info); + + /* check vserver memory */ + if (p->mm && !(clone_flags & CLONE_VM)) { + if (vx_vmpages_avail(p->mm, p->mm->total_vm)) + vx_pages_add(p->vx_info, RLIMIT_AS, p->mm->total_vm); + else + goto bad_fork_free; + } + if (p->mm && vx_flags(VXF_FORK_RSS, 0)) { + if (!vx_rss_avail(p->mm, get_mm_counter(p->mm, file_rss))) + goto bad_fork_cleanup_vm; + } + retval = -EAGAIN; + if (!vx_nproc_avail(1)) + goto bad_fork_cleanup_vm; + if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) - goto bad_fork_free; + goto bad_fork_cleanup_vm; } atomic_inc(&p->user->__count); @@ -1256,6 +1288,18 @@ static struct task_struct *copy_process( total_forks++; spin_unlock(¤t->sighand->siglock); + + /* p is copy of current */ + vxi = p->vx_info; + if (vxi) { + claim_vx_info(vxi, p); + atomic_inc(&vxi->cvirt.nr_threads); + atomic_inc(&vxi->cvirt.total_forks); + vx_nproc_inc(p); + } + nxi = p->nx_info; + if (nxi) + claim_nx_info(nxi, p); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); return p; @@ -1297,6 +1341,9 @@ bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); +bad_fork_cleanup_vm: + if (p->mm && !(clone_flags & CLONE_VM)) + vx_pages_sub(p->vx_info, RLIMIT_AS, p->mm->total_vm); bad_fork_free: free_task(p); fork_out: @@ -1357,6 +1404,19 @@ long do_fork(unsigned long clone_flags, if (!pid) return -EAGAIN; + + /* kernel threads are host only */ + if ((clone_flags & CLONE_KTHREAD) && !vx_check(0, VS_ADMIN)) { + vxwprintk(1, "xid=%d tried to spawn a kernel thread.", + vx_current_xid()); + free_pid(pid); + return -EPERM; + } + + /* fake ipc/uts on namespace */ + if (clone_flags & CLONE_NEWNS) + clone_flags |= CLONE_NEWUTS|CLONE_NEWIPC; + nr = pid->nr; if (unlikely(current->ptrace)) { trace = fork_traceflag (clone_flags); diff -NurpP --minimal linux-2.6.19.1/kernel/kthread.c linux-2.6.19.1-vs2.2.0-rc6/kernel/kthread.c --- linux-2.6.19.1/kernel/kthread.c 2006-09-20 16:58:44 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/kthread.c 2006-11-08 04:57:52 +0100 @@ -123,7 +123,7 @@ static void keventd_create_kthread(void } else { wait_for_completion(&create->started); read_lock(&tasklist_lock); - create->result = find_task_by_pid(pid); + create->result = find_task_by_real_pid(pid); read_unlock(&tasklist_lock); } complete(&create->done); diff -NurpP --minimal linux-2.6.19.1/kernel/nsproxy.c linux-2.6.19.1-vs2.2.0-rc6/kernel/nsproxy.c --- linux-2.6.19.1/kernel/nsproxy.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/nsproxy.c 2006-11-14 02:14:48 +0100 @@ -22,11 +22,6 @@ struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); -static inline void get_nsproxy(struct nsproxy *ns) -{ - atomic_inc(&ns->count); -} - void get_task_namespaces(struct task_struct *tsk) { struct nsproxy *ns = tsk->nsproxy; diff -NurpP --minimal linux-2.6.19.1/kernel/pid.c linux-2.6.19.1-vs2.2.0-rc6/kernel/pid.c --- linux-2.6.19.1/kernel/pid.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/pid.c 2006-12-04 01:47:02 +0100 @@ -27,6 +27,7 @@ #include #include #include +#include #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) static struct hlist_head *pid_hash; @@ -299,6 +300,10 @@ struct task_struct * fastcall pid_task(s */ struct task_struct *find_task_by_pid_type(int type, int nr) { + if (type == PIDTYPE_PID) + nr = vx_rmap_pid(nr); + else if (type == PIDTYPE_REALPID) + type = PIDTYPE_PID; return pid_task(find_pid(nr), type); } diff -NurpP --minimal linux-2.6.19.1/kernel/posix-timers.c linux-2.6.19.1-vs2.2.0-rc6/kernel/posix-timers.c --- linux-2.6.19.1/kernel/posix-timers.c 2006-11-30 21:19:43 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/posix-timers.c 2006-11-08 04:57:52 +0100 @@ -48,6 +48,7 @@ #include #include #include +#include /* * Management arrays for POSIX timers. Timers are kept in slab memory @@ -298,6 +299,10 @@ void do_schedule_next_timer(struct sigin int posix_timer_event(struct k_itimer *timr,int si_private) { + struct vx_info_save vxis; + int ret; + + enter_vx_info(task_get_vx_info(timr->it_process), &vxis); memset(&timr->sigq->info, 0, sizeof(siginfo_t)); timr->sigq->info.si_sys_private = si_private; /* Send signal to the process that owns this timer.*/ @@ -310,11 +315,11 @@ int posix_timer_event(struct k_itimer *t if (timr->it_sigev_notify & SIGEV_THREAD_ID) { struct task_struct *leader; - int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, - timr->it_process); + ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, + timr->it_process); if (likely(ret >= 0)) - return ret; + goto out; timr->it_sigev_notify = SIGEV_SIGNAL; leader = timr->it_process->group_leader; @@ -322,8 +327,12 @@ int posix_timer_event(struct k_itimer *t timr->it_process = leader; } - return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, - timr->it_process); + ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq, + timr->it_process); +out: + leave_vx_info(&vxis); + put_vx_info(vxis.vxi); + return ret; } EXPORT_SYMBOL_GPL(posix_timer_event); @@ -372,7 +381,7 @@ static struct task_struct * good_sigeven struct task_struct *rtn = current->group_leader; if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) || + (!(rtn = find_task_by_real_pid(event->sigev_notify_thread_id)) || rtn->tgid != current->tgid || (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) return NULL; diff -NurpP --minimal linux-2.6.19.1/kernel/printk.c linux-2.6.19.1-vs2.2.0-rc6/kernel/printk.c --- linux-2.6.19.1/kernel/printk.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/printk.c 2006-11-30 19:14:49 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -185,18 +186,13 @@ int do_syslog(int type, char __user *buf unsigned long i, j, limit, count; int do_clear = 0; char c; - int error = 0; + int error; error = security_syslog(type); if (error) return error; - switch (type) { - case 0: /* Close log */ - break; - case 1: /* Open log */ - break; - case 2: /* Read from log */ + if ((type >= 2) && (type <= 4)) { error = -EINVAL; if (!buf || len < 0) goto out; @@ -207,6 +203,16 @@ int do_syslog(int type, char __user *buf error = -EFAULT; goto out; } + } + if (!vx_check(0, VS_ADMIN|VS_WATCH)) + return vx_do_syslog(type, buf, len); + + switch (type) { + case 0: /* Close log */ + break; + case 1: /* Open log */ + break; + case 2: /* Read from log */ error = wait_event_interruptible(log_wait, (log_start - log_end)); if (error) @@ -231,16 +237,6 @@ int do_syslog(int type, char __user *buf do_clear = 1; /* FALL THRU */ case 3: /* Read last kernel messages */ - error = -EINVAL; - if (!buf || len < 0) - goto out; - error = 0; - if (!len) - goto out; - if (!access_ok(VERIFY_WRITE, buf, len)) { - error = -EFAULT; - goto out; - } count = len; if (count > log_buf_len) count = log_buf_len; diff -NurpP --minimal linux-2.6.19.1/kernel/ptrace.c linux-2.6.19.1-vs2.2.0-rc6/kernel/ptrace.c --- linux-2.6.19.1/kernel/ptrace.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/ptrace.c 2006-11-30 19:38:18 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -144,6 +145,11 @@ static int may_attach(struct task_struct dumpable = task->mm->dumpable; if (!dumpable && !capable(CAP_SYS_PTRACE)) return -EPERM; + if (!vx_check(task->xid, VS_ADMIN_P|VS_IDENT)) + return -EPERM; + if (!vx_check(task->xid, VS_IDENT) && + !task_vx_flags(task, VXF_STATE_ADMIN, 0)) + return -EACCES; return security_ptrace(current, task); } @@ -468,6 +474,10 @@ asmlinkage long sys_ptrace(long request, goto out; } + ret = -EPERM; + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out_put_task_struct; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_put_task_struct; diff -NurpP --minimal linux-2.6.19.1/kernel/sched.c linux-2.6.19.1-vs2.2.0-rc6/kernel/sched.c --- linux-2.6.19.1/kernel/sched.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/sched.c 2006-11-30 18:53:18 +0100 @@ -55,6 +55,8 @@ #include #include +#include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -243,6 +245,16 @@ struct rq { struct task_struct *migration_thread; struct list_head migration_queue; #endif + unsigned long norm_time; + unsigned long idle_time; +#ifdef CONFIG_VSERVER_IDLETIME + int idle_skip; +#endif +#ifdef CONFIG_VSERVER_HARDCPU + struct list_head hold_queue; + unsigned long nr_onhold; + int idle_tokens; +#endif #ifdef CONFIG_SCHEDSTATS /* latency stats */ @@ -672,6 +684,7 @@ sched_info_switch(struct task_struct *pr */ static void dequeue_task(struct task_struct *p, struct prio_array *array) { + BUG_ON(p->state & TASK_ONHOLD); array->nr_active--; list_del(&p->run_list); if (list_empty(array->queue + p->prio)) @@ -680,6 +693,7 @@ static void dequeue_task(struct task_str static void enqueue_task(struct task_struct *p, struct prio_array *array) { + BUG_ON(p->state & TASK_ONHOLD); sched_info_queued(p); list_add_tail(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); @@ -693,12 +707,14 @@ static void enqueue_task(struct task_str */ static void requeue_task(struct task_struct *p, struct prio_array *array) { + BUG_ON(p->state & TASK_ONHOLD); list_move_tail(&p->run_list, array->queue + p->prio); } static inline void enqueue_task_head(struct task_struct *p, struct prio_array *array) { + BUG_ON(p->state & TASK_ONHOLD); list_add(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); array->nr_active++; @@ -727,6 +743,10 @@ static inline int __normal_prio(struct t bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; prio = p->static_prio - bonus; + + /* adjust effective priority */ + prio = vx_adjust_prio(p, prio, MAX_USER_PRIO); + if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) @@ -836,6 +856,9 @@ static int effective_prio(struct task_st return p->prio; } +#include "sched_mon.h" + + /* * __activate_task - move a task to the runqueue. */ @@ -845,6 +868,7 @@ static void __activate_task(struct task_ if (batch_task(p)) target = rq->expired; + vxm_activate_task(p, rq); enqueue_task(p, target); inc_nr_running(p, rq); } @@ -854,6 +878,7 @@ static void __activate_task(struct task_ */ static inline void __activate_idle_task(struct task_struct *p, struct rq *rq) { + vxm_activate_idle(p, rq); enqueue_task_head(p, rq->active); inc_nr_running(p, rq); } @@ -975,19 +1000,30 @@ static void activate_task(struct task_st } p->timestamp = now; + vx_activate_task(p); __activate_task(p, rq); } /* * deactivate_task - remove a task from the runqueue. */ -static void deactivate_task(struct task_struct *p, struct rq *rq) +static void __deactivate_task(struct task_struct *p, struct rq *rq) { dec_nr_running(p, rq); dequeue_task(p, p->array); + vxm_deactivate_task(p, rq); p->array = NULL; } +static inline +void deactivate_task(struct task_struct *p, struct rq *rq) +{ + vx_deactivate_task(p); + __deactivate_task(p, rq); +} + +#include "sched_hard.h" + /* * resched_task - mark a task 'to be rescheduled now'. * @@ -1063,6 +1099,7 @@ migrate_task(struct task_struct *p, int { struct rq *rq = task_rq(p); + vxm_migrate_task(p, rq, dest_cpu); /* * If the task is not on a runqueue (and not running), then * it is sufficient to simply update the task's cpu field. @@ -1391,6 +1428,12 @@ static int try_to_wake_up(struct task_st rq = task_rq_lock(p, &flags); old_state = p->state; + + /* we need to unhold suspended tasks */ + if (old_state & TASK_ONHOLD) { + vx_unhold_task(p, rq); + old_state = p->state; + } if (!(old_state & state)) goto out; @@ -1496,6 +1539,7 @@ out_activate: #endif /* CONFIG_SMP */ if (old_state == TASK_UNINTERRUPTIBLE) { rq->nr_uninterruptible--; + vx_uninterruptible_dec(p); /* * Tasks on involuntary sleep don't earn * sleep_avg beyond just interactive state. @@ -1642,6 +1686,7 @@ void fastcall wake_up_new_task(struct ta p->prio = effective_prio(p); + vx_activate_task(p); if (likely(cpu == this_cpu)) { if (!(clone_flags & CLONE_VM)) { /* @@ -1653,6 +1698,7 @@ void fastcall wake_up_new_task(struct ta __activate_task(p, rq); else { p->prio = current->prio; + BUG_ON(p->state & TASK_ONHOLD); p->normal_prio = current->normal_prio; list_add_tail(&p->run_list, ¤t->run_list); p->array = current->array; @@ -2973,13 +3019,16 @@ static inline int expired_starving(struc void account_user_time(struct task_struct *p, cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + struct vx_info *vxi = p->vx_info; /* p is _always_ current */ cputime64_t tmp; + int nice = (TASK_NICE(p) > 0); p->utime = cputime_add(p->utime, cputime); + vx_account_user(vxi, cputime, nice); /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (nice) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); @@ -2995,10 +3044,12 @@ void account_system_time(struct task_str cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + struct vx_info *vxi = p->vx_info; /* p is _always_ current */ struct rq *rq = this_rq(); cputime64_t tmp; p->stime = cputime_add(p->stime, cputime); + vx_account_system(vxi, cputime, (p == rq->idle)); /* Add system time to cpustat. */ tmp = cputime_to_cputime64(cputime); @@ -3052,12 +3103,14 @@ void scheduler_tick(void) struct rq *rq = cpu_rq(cpu); update_cpu_clock(p, rq, now); + vxm_sync(now, cpu); rq->timestamp_last_tick = now; if (p == rq->idle) { if (wake_priority_sleeper(rq)) goto out; + vx_idle_resched(rq); rebalance_tick(cpu, rq, SCHED_IDLE); return; } @@ -3090,7 +3143,7 @@ void scheduler_tick(void) } goto out_unlock; } - if (!--p->time_slice) { + if (vx_need_resched(p, --p->time_slice, cpu)) { dequeue_task(p, rq->active); set_tsk_need_resched(p); p->prio = effective_prio(p); @@ -3377,14 +3430,25 @@ need_resched_nonpreemptible: unlikely(signal_pending(prev)))) prev->state = TASK_RUNNING; else { - if (prev->state == TASK_UNINTERRUPTIBLE) + if (prev->state == TASK_UNINTERRUPTIBLE) { rq->nr_uninterruptible++; + vx_uninterruptible_inc(prev); + } deactivate_task(prev, rq); } } cpu = smp_processor_id(); + vx_set_rq_time(rq, jiffies); +try_unhold: + vx_try_unhold(rq, cpu); +pick_next: + if (unlikely(!rq->nr_running)) { + /* can we skip idle time? */ + if (vx_try_skip(rq, cpu)) + goto try_unhold; + idle_balance(cpu, rq); if (!rq->nr_running) { next = rq->idle; @@ -3411,6 +3475,10 @@ need_resched_nonpreemptible: queue = array->queue + idx; next = list_entry(queue->next, struct task_struct, run_list); + /* check before we schedule this context */ + if (!vx_schedule(next, rq, cpu)) + goto pick_next; + if (!rt_task(next) && interactive_sleep(next->sleep_type)) { unsigned long long delta = now - next->timestamp; if (unlikely((long long)(now - next->timestamp) < 0)) @@ -4013,7 +4081,7 @@ asmlinkage long sys_nice(int increment) nice = 19; if (increment < 0 && !can_nice(current, nice)) - return -EPERM; + return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM; retval = security_task_setnice(current, nice); if (retval) @@ -4186,6 +4254,7 @@ recheck: oldprio = p->prio; __setscheduler(p, policy, param->sched_priority); if (array) { + vx_activate_task(p); __activate_task(p, rq); /* * Reschedule if we are currently running on this runqueue and @@ -4976,6 +5045,7 @@ static int __migrate_task(struct task_st p->timestamp = p->timestamp - rq_src->timestamp_last_tick + rq_dest->timestamp_last_tick; deactivate_task(p, rq_src); + vx_activate_task(p); __activate_task(p, rq_dest); if (TASK_PREEMPTS_CURR(p, rq_dest)) resched_task(rq_dest->curr); @@ -6819,7 +6889,10 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->migration_queue); #endif atomic_set(&rq->nr_iowait, 0); - +#ifdef CONFIG_VSERVER_HARDCPU + INIT_LIST_HEAD(&rq->hold_queue); + rq->nr_onhold = 0; +#endif for (j = 0; j < 2; j++) { array = rq->arrays + j; for (k = 0; k < MAX_PRIO; k++) { @@ -6895,6 +6968,7 @@ void normalize_rt_tasks(void) deactivate_task(p, task_rq(p)); __setscheduler(p, SCHED_NORMAL, 0); if (array) { + vx_activate_task(p); __activate_task(p, task_rq(p)); resched_task(rq->curr); } diff -NurpP --minimal linux-2.6.19.1/kernel/sched_hard.h linux-2.6.19.1-vs2.2.0-rc6/kernel/sched_hard.h --- linux-2.6.19.1/kernel/sched_hard.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/sched_hard.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,324 @@ + +#ifdef CONFIG_VSERVER_IDLELIMIT + +/* + * vx_idle_resched - reschedule after maxidle + */ +static inline +void vx_idle_resched(struct rq *rq) +{ + /* maybe have a better criterion for paused */ + if (!--rq->idle_tokens && !list_empty(&rq->hold_queue)) + set_need_resched(); +} + +#else /* !CONFIG_VSERVER_IDLELIMIT */ + +#define vx_idle_resched(rq) + +#endif /* CONFIG_VSERVER_IDLELIMIT */ + + + +#ifdef CONFIG_VSERVER_IDLETIME + +#define vx_set_rq_min_skip(rq, min) \ + (rq)->idle_skip = (min) + +#define vx_save_min_skip(ret, min, val) \ + __vx_save_min_skip(ret, min, val) + +static inline +void __vx_save_min_skip(int ret, int *min, int val) +{ + if (ret > -2) + return; + if ((*min > val) || !*min) + *min = val; +} + +static inline +int vx_try_skip(struct rq *rq, int cpu) +{ + /* artificially advance time */ + if (rq->idle_skip > 0) { + vxdprintk(list_empty(&rq->hold_queue), + "hold queue empty on cpu %d", cpu); + rq->idle_time += rq->idle_skip; + vxm_idle_skip(rq, cpu); + return 1; + } + return 0; +} + +#else /* !CONFIG_VSERVER_IDLETIME */ + +#define vx_set_rq_min_skip(rq, min) \ + ({ int dummy = (min); dummy; }) + +#define vx_save_min_skip(ret, min, val) + +static inline +int vx_try_skip(struct rq *rq, int cpu) +{ + return 0; +} + +#endif /* CONFIG_VSERVER_IDLETIME */ + + + +#ifdef CONFIG_VSERVER_HARDCPU + +#define vx_set_rq_max_idle(rq, max) \ + (rq)->idle_tokens = (max) + +#define vx_save_max_idle(ret, min, val) \ + __vx_save_max_idle(ret, min, val) + +static inline +void __vx_save_max_idle(int ret, int *min, int val) +{ + if (*min > val) + *min = val; +} + + +/* + * vx_hold_task - put a task on the hold queue + */ +static inline +void vx_hold_task(struct task_struct *p, struct rq *rq) +{ + __deactivate_task(p, rq); + p->state |= TASK_ONHOLD; + /* a new one on hold */ + rq->nr_onhold++; + vxm_hold_task(p, rq); + list_add_tail(&p->run_list, &rq->hold_queue); +} + +/* + * vx_unhold_task - put a task back to the runqueue + */ +static inline +void vx_unhold_task(struct task_struct *p, struct rq *rq) +{ + list_del(&p->run_list); + /* one less waiting */ + rq->nr_onhold--; + p->state &= ~TASK_ONHOLD; + enqueue_task(p, rq->expired); + inc_nr_running(p, rq); + vxm_unhold_task(p, rq); + + if (p->static_prio < rq->best_expired_prio) + rq->best_expired_prio = p->static_prio; +} + +unsigned long nr_onhold(void) +{ + unsigned long i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->nr_onhold; + + return sum; +} + + + +static inline +int __vx_tokens_avail(struct _vx_sched_pc *sched_pc) +{ + return sched_pc->tokens; +} + +static inline +void __vx_consume_token(struct _vx_sched_pc *sched_pc) +{ + sched_pc->tokens--; +} + +static inline +int vx_need_resched(struct task_struct *p, int slice, int cpu) +{ + struct vx_info *vxi = p->vx_info; + + if (vx_info_flags(vxi, VXF_SCHED_HARD|VXF_SCHED_PRIO, 0)) { + struct _vx_sched_pc *sched_pc = + &vx_per_cpu(vxi, sched_pc, cpu); + int tokens; + + /* maybe we can simplify that to decrement + the token counter unconditional? */ + + if ((tokens = __vx_tokens_avail(sched_pc)) > 0) + __vx_consume_token(sched_pc); + + /* for tokens > 0, one token was consumed */ + if (tokens < 2) + slice = 0; + } + vxm_need_resched(p, slice, cpu); + return (slice == 0); +} + + +#define vx_set_rq_time(rq, time) do { \ + rq->norm_time = time; \ +} while (0) + + +static inline +void vx_try_unhold(struct rq *rq, int cpu) +{ + struct vx_info *vxi = NULL; + struct list_head *l, *n; + int maxidle = HZ; + int minskip = 0; + + /* nothing to do? what about pause? */ + if (list_empty(&rq->hold_queue)) + return; + + list_for_each_safe(l, n, &rq->hold_queue) { + int ret, delta_min[2]; + struct _vx_sched_pc *sched_pc; + struct task_struct *p; + + p = list_entry(l, struct task_struct, run_list); + /* don't bother with same context */ + if (vxi == p->vx_info) + continue; + + vxi = p->vx_info; + /* ignore paused contexts */ + if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0)) + continue; + + sched_pc = &vx_per_cpu(vxi, sched_pc, cpu); + + /* recalc tokens */ + vxm_sched_info(sched_pc, vxi, cpu); + ret = vx_tokens_recalc(sched_pc, + &rq->norm_time, &rq->idle_time, delta_min); + vxm_tokens_recalc(sched_pc, rq, vxi, cpu); + + if (ret > 0) { + /* we found a runable context */ + vx_unhold_task(p, rq); + break; + } + vx_save_max_idle(ret, &maxidle, delta_min[0]); + vx_save_min_skip(ret, &minskip, delta_min[1]); + } + vx_set_rq_max_idle(rq, maxidle); + vx_set_rq_min_skip(rq, minskip); + vxm_rq_max_min(rq, cpu); +} + + +static inline +int vx_schedule(struct task_struct *next, struct rq *rq, int cpu) +{ + struct vx_info *vxi = next->vx_info; + struct _vx_sched_pc *sched_pc; + int delta_min[2]; + int flags, ret; + + if (!vxi) + return 1; + + flags = vxi->vx_flags; + + if (unlikely(vs_check_flags(flags , VXF_SCHED_PAUSE, 0))) + goto put_on_hold; + if (!vs_check_flags(flags , VXF_SCHED_HARD|VXF_SCHED_PRIO, 0)) + return 1; + + sched_pc = &vx_per_cpu(vxi, sched_pc, cpu); +#ifdef CONFIG_SMP + /* update scheduler params */ + if (cpu_isset(cpu, vxi->sched.update)) { + vx_update_sched_param(&vxi->sched, sched_pc); + vxm_update_sched(sched_pc, vxi, cpu); + cpu_clear(cpu, vxi->sched.update); + } +#endif + vxm_sched_info(sched_pc, vxi, cpu); + ret = vx_tokens_recalc(sched_pc, + &rq->norm_time, &rq->idle_time, delta_min); + vxm_tokens_recalc(sched_pc, rq, vxi, cpu); + + if (!vs_check_flags(flags , VXF_SCHED_HARD, 0)) + return 1; + + if (unlikely(ret < 0)) { + vx_save_max_idle(ret, &rq->idle_tokens, delta_min[0]); + vx_save_min_skip(ret, &rq->idle_skip, delta_min[1]); + vxm_rq_max_min(rq, cpu); + put_on_hold: + vx_hold_task(next, rq); + return 0; + } + return 1; +} + + +#else /* CONFIG_VSERVER_HARDCPU */ + +static inline +void vx_hold_task(struct task_struct *p, struct rq *rq) +{ + return; +} + +static inline +void vx_unhold_task(struct task_struct *p, struct rq *rq) +{ + return; +} + +unsigned long nr_onhold(void) +{ + return 0; +} + + +static inline +int vx_need_resched(struct task_struct *p, int slice, int cpu) +{ + return (slice == 0); +} + + +#define vx_set_rq_time(rq, time) + +static inline +void vx_try_unhold(struct rq *rq, int cpu) +{ + return; +} + +static inline +int vx_schedule(struct task_struct *next, struct rq *rq, int cpu) +{ + struct vx_info *vxi = next->vx_info; + struct _vx_sched_pc *sched_pc; + int delta_min[2]; + int ret; + + if (!vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) + return 1; + + sched_pc = &vx_per_cpu(vxi, sched_pc, cpu); + vxm_sched_info(sched_pc, vxi, cpu); + ret = vx_tokens_recalc(sched_pc, + &rq->norm_time, &rq->idle_time, delta_min); + vxm_tokens_recalc(sched_pc, rq, vxi, cpu); + return 1; +} + +#endif /* CONFIG_VSERVER_HARDCPU */ + diff -NurpP --minimal linux-2.6.19.1/kernel/sched_mon.h linux-2.6.19.1-vs2.2.0-rc6/kernel/sched_mon.h --- linux-2.6.19.1/kernel/sched_mon.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/sched_mon.h 2006-11-08 04:57:48 +0100 @@ -0,0 +1,200 @@ + +#include + +#ifdef CONFIG_VSERVER_MONITOR + +#ifdef CONFIG_VSERVER_HARDCPU +#define HARDCPU(x) (x) +#else +#define HARDCPU(x) (0) +#endif + +#ifdef CONFIG_VSERVER_IDLETIME +#define IDLETIME(x) (x) +#else +#define IDLETIME(x) (0) +#endif + +struct _vx_mon_entry *vxm_advance(int cpu); + + +static inline +void __vxm_basic(struct _vx_mon_entry *entry, xid_t xid, int type) +{ + entry->type = type; + entry->xid = xid; +} + +static inline +void __vxm_sync(int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, 0, VXM_SYNC); + entry->ev.sec = xtime.tv_sec; + entry->ev.nsec = xtime.tv_nsec; +} + +static inline +void __vxm_task(struct task_struct *p, int type) +{ + struct _vx_mon_entry *entry = vxm_advance(task_cpu(p)); + + __vxm_basic(entry, p->xid, type); + entry->ev.tsk.pid = p->pid; + entry->ev.tsk.state = p->state; +} + +static inline +void __vxm_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, vxi->vx_id, (VXM_SCHED | s->flags)); + entry->sd.tokens = s->tokens; + entry->sd.norm_time = s->norm_time; + entry->sd.idle_time = s->idle_time; +} + +static inline +void __vxm_rqinfo1(struct rq *q, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + entry->type = VXM_RQINFO_1; + entry->xid = ((unsigned long)q >> 16) & 0xffff; + entry->q1.running = q->nr_running; + entry->q1.onhold = HARDCPU(q->nr_onhold); + entry->q1.iowait = atomic_read(&q->nr_iowait); + entry->q1.uintr = q->nr_uninterruptible; + entry->q1.idle_tokens = IDLETIME(q->idle_tokens); +} + +static inline +void __vxm_rqinfo2(struct rq *q, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + entry->type = VXM_RQINFO_2; + entry->xid = (unsigned long)q & 0xffff; + entry->q2.norm_time = q->norm_time; + entry->q2.idle_time = q->idle_time; + entry->q2.idle_skip = IDLETIME(q->idle_skip); +} + +static inline +void __vxm_update(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, vxi->vx_id, VXM_UPDATE); + entry->ev.tokens = s->tokens; +} + +static inline +void __vxm_update1(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_1); + entry->u1.tokens_max = s->tokens_max; + entry->u1.fill_rate = s->fill_rate[0]; + entry->u1.interval = s->interval[0]; +} + +static inline +void __vxm_update2(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_2); + entry->u2.tokens_min = s->tokens_min; + entry->u2.fill_rate = s->fill_rate[1]; + entry->u2.interval = s->interval[1]; +} + + +#define vxm_activate_task(p,q) __vxm_task(p, VXM_ACTIVATE) +#define vxm_activate_idle(p,q) __vxm_task(p, VXM_IDLE) +#define vxm_deactivate_task(p,q) __vxm_task(p, VXM_DEACTIVATE) +#define vxm_hold_task(p,q) __vxm_task(p, VXM_HOLD) +#define vxm_unhold_task(p,q) __vxm_task(p, VXM_UNHOLD) + +static inline +void vxm_migrate_task(struct task_struct *p, struct rq *rq, int dest) +{ + __vxm_task(p, VXM_MIGRATE); + __vxm_rqinfo1(rq, task_cpu(p)); + __vxm_rqinfo2(rq, task_cpu(p)); +} + +static inline +void vxm_idle_skip(struct rq *rq, int cpu) +{ + __vxm_rqinfo1(rq, cpu); + __vxm_rqinfo2(rq, cpu); +} + +static inline +void vxm_need_resched(struct task_struct *p, int slice, int cpu) +{ + if (slice) + return; + + __vxm_task(p, VXM_RESCHED); +} + +static inline +void vxm_sync(unsigned long now, int cpu) +{ + if (!CONFIG_VSERVER_MONITOR_SYNC || + (now % CONFIG_VSERVER_MONITOR_SYNC)) + return; + + __vxm_sync(cpu); +} + +#define vxm_sched_info(s,v,c) __vxm_sched(s,v,c) + +static inline +void vxm_tokens_recalc(struct _vx_sched_pc *s, struct rq *rq, + struct vx_info *vxi, int cpu) +{ + __vxm_sched(s, vxi, cpu); + __vxm_rqinfo2(rq, cpu); +} + +static inline +void vxm_update_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + __vxm_sched(s, vxi, cpu); + __vxm_update(s, vxi, cpu); + __vxm_update1(s, vxi, cpu); + __vxm_update2(s, vxi, cpu); +} + +static inline +void vxm_rq_max_min(struct rq *rq, int cpu) +{ + __vxm_rqinfo1(rq, cpu); + __vxm_rqinfo2(rq, cpu); +} + +#else /* CONFIG_VSERVER_MONITOR */ + +#define vxm_activate_task(t,q) do { } while (0) +#define vxm_activate_idle(t,q) do { } while (0) +#define vxm_deactivate_task(t,q) do { } while (0) +#define vxm_hold_task(t,q) do { } while (0) +#define vxm_unhold_task(t,q) do { } while (0) +#define vxm_migrate_task(t,q,d) do { } while (0) +#define vxm_idle_skip(q,c) do { } while (0) +#define vxm_need_resched(t,s,c) do { } while (0) +#define vxm_sync(s,c) do { } while (0) +#define vxm_sched_info(s,v,c) do { } while (0) +#define vxm_tokens_recalc(s,q,v,c) do { } while (0) +#define vxm_update_sched(s,v,c) do { } while (0) +#define vxm_rq_max_min(q,c) do { } while (0) + +#endif /* CONFIG_VSERVER_MONITOR */ + diff -NurpP --minimal linux-2.6.19.1/kernel/signal.c linux-2.6.19.1-vs2.2.0-rc6/kernel/signal.c --- linux-2.6.19.1/kernel/signal.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/signal.c 2006-11-30 19:43:08 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -577,17 +578,30 @@ static int check_kill_permission(int sig struct task_struct *t) { int error = -EINVAL; + if (!valid_signal(sig)) return error; + + if ((info != SEND_SIG_NOINFO) && + (is_si_special(info) || !SI_FROMUSER(info))) + goto skip; + error = -EPERM; - if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && ((sig != SIGCONT) || + if (((sig != SIGCONT) || (current->signal->session != t->signal->session)) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) && !capable(CAP_KILL)) return error; + error = -ESRCH; + if (!vx_check(vx_task_xid(t), VS_WATCH_P|VS_IDENT)) { + vxwprintk(current->xid, + "signal xid mismatch %p[#%u,%u] xid=#%u\n", + t, vx_task_xid(t), t->pid, current->xid); + return error; + } +skip: error = security_task_kill(t, info, sig, 0); if (!error) audit_signal_info(sig, t); /* Let audit system see the signal */ @@ -1125,7 +1139,7 @@ int kill_pid_info(int sig, struct siginf } p = pid_task(pid, PIDTYPE_PID); error = -ESRCH; - if (p) + if (p && vx_check(vx_task_xid(p), VS_IDENT)) error = group_send_sig_info(sig, info, p); if (unlikely(acquired_tasklist_lock)) read_unlock(&tasklist_lock); @@ -1197,7 +1211,8 @@ static int kill_something_info(int sig, read_lock(&tasklist_lock); for_each_process(p) { - if (p->pid > 1 && p->tgid != current->tgid) { + if (vx_check(vx_task_xid(p), VS_ADMIN_P|VS_IDENT) && + p->pid > 1 && p->tgid != current->tgid) { int err = group_send_sig_info(sig, info, p); ++count; if (err != -EPERM) @@ -1881,6 +1896,11 @@ relock: if (current == child_reaper) continue; + /* virtual init is protected against user signals */ + if ((info->si_code == SI_USER) && + vx_current_initpid(current->pid)) + continue; + if (sig_kernel_stop(signr)) { /* * The default action is to stop all threads in diff -NurpP --minimal linux-2.6.19.1/kernel/softirq.c linux-2.6.19.1-vs2.2.0-rc6/kernel/softirq.c --- linux-2.6.19.1/kernel/softirq.c 2006-12-13 07:46:36 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/softirq.c 2006-12-13 07:46:51 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include /* diff -NurpP --minimal linux-2.6.19.1/kernel/sys.c linux-2.6.19.1-vs2.2.0-rc6/kernel/sys.c --- linux-2.6.19.1/kernel/sys.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/sys.c 2006-12-02 01:37:05 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include #include #include @@ -569,7 +571,10 @@ static int set_one_prio(struct task_stru goto out; } if (niceval < task_nice(p) && !can_nice(p, niceval)) { - error = -EACCES; + if (vx_flags(VXF_IGNEG_NICE, 0)) + error = 0; + else + error = -EACCES; goto out; } no_nice = security_task_setnice(p, niceval); @@ -621,7 +626,8 @@ asmlinkage long sys_setpriority(int whic if (!who) who = current->uid; else - if ((who != current->uid) && !(user = find_user(who))) + if ((who != current->uid) && + !(user = find_user(vx_current_xid(), who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) @@ -679,7 +685,8 @@ asmlinkage long sys_getpriority(int whic if (!who) who = current->uid; else - if ((who != current->uid) && !(user = find_user(who))) + if ((who != current->uid) && + !(user = find_user(vx_current_xid(), who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) @@ -792,6 +799,9 @@ void kernel_power_off(void) machine_power_off(); } EXPORT_SYMBOL_GPL(kernel_power_off); + +long vs_reboot(unsigned int, void __user *); + /* * Reboot system call: for obvious reasons only root may call it, * and even root needs to set up some magic numbers in the registers @@ -822,6 +832,9 @@ asmlinkage long sys_reboot(int magic1, i if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) cmd = LINUX_REBOOT_CMD_HALT; + if (!vx_check(0, VS_ADMIN|VS_WATCH)) + return vs_reboot(cmd, arg); + lock_kernel(); switch (cmd) { case LINUX_REBOOT_CMD_RESTART: @@ -1001,7 +1014,7 @@ static int set_user(uid_t new_ruid, int { struct user_struct *new_user; - new_user = alloc_uid(new_ruid); + new_user = alloc_uid(vx_current_xid(), new_ruid); if (!new_user) return -EAGAIN; @@ -1356,15 +1369,18 @@ asmlinkage long sys_setpgid(pid_t pid, p { struct task_struct *p; struct task_struct *group_leader = current->group_leader; + pid_t rpgid; int err = -EINVAL; if (!pid) - pid = group_leader->pid; + pid = vx_map_pid(group_leader->pid); if (!pgid) pgid = pid; if (pgid < 0) return -EINVAL; + rpgid = vx_rmap_pid(pgid); + /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ @@ -1399,22 +1415,22 @@ asmlinkage long sys_setpgid(pid_t pid, p if (pgid != pid) { struct task_struct *p; - do_each_task_pid(pgid, PIDTYPE_PGID, p) { + do_each_task_pid(rpgid, PIDTYPE_PGID, p) { if (p->signal->session == group_leader->signal->session) goto ok_pgid; - } while_each_task_pid(pgid, PIDTYPE_PGID, p); + } while_each_task_pid(rpgid, PIDTYPE_PGID, p); goto out; } ok_pgid: - err = security_task_setpgid(p, pgid); + err = security_task_setpgid(p, rpgid); if (err) goto out; - if (process_group(p) != pgid) { + if (process_group(p) != rpgid) { detach_pid(p, PIDTYPE_PGID); - p->signal->pgrp = pgid; - attach_pid(p, PIDTYPE_PGID, pgid); + p->signal->pgrp = rpgid; + attach_pid(p, PIDTYPE_PGID, rpgid); } err = 0; @@ -1427,7 +1443,7 @@ out: asmlinkage long sys_getpgid(pid_t pid) { if (!pid) - return process_group(current); + return vx_rmap_pid(process_group(current)); else { int retval; struct task_struct *p; @@ -1439,7 +1455,7 @@ asmlinkage long sys_getpgid(pid_t pid) if (p) { retval = security_task_getpgid(p); if (!retval) - retval = process_group(p); + retval = vx_rmap_pid(process_group(p)); } read_unlock(&tasklist_lock); return retval; @@ -1789,7 +1805,7 @@ asmlinkage long sys_sethostname(char __u int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -1834,7 +1850,7 @@ asmlinkage long sys_setdomainname(char _ int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -1901,7 +1917,7 @@ asmlinkage long sys_setrlimit(unsigned i return -EINVAL; old_rlim = current->signal->rlim + resource; if ((new_rlim.rlim_max > old_rlim->rlim_max) && - !capable(CAP_SYS_RESOURCE)) + !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT)) return -EPERM; if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) return -EPERM; diff -NurpP --minimal linux-2.6.19.1/kernel/sysctl.c linux-2.6.19.1-vs2.2.0-rc6/kernel/sysctl.c --- linux-2.6.19.1/kernel/sysctl.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/sysctl.c 2006-12-02 01:37:05 +0100 @@ -87,6 +87,7 @@ static int ngroups_max = NGROUPS_MAX; #ifdef CONFIG_KMOD extern char modprobe_path[]; #endif +extern char vshelper_path[]; #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif @@ -456,6 +457,15 @@ static ctl_table kern_table[] = { .strategy = &sysctl_string, }, #endif + { + .ctl_name = KERN_VSHELPER, + .procname = "vshelper", + .data = &vshelper_path, + .maxlen = 256, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, #ifdef CONFIG_CHR_DEV_SG { .ctl_name = KERN_SG_BIG_BUFF, diff -NurpP --minimal linux-2.6.19.1/kernel/time.c linux-2.6.19.1-vs2.2.0-rc6/kernel/time.c --- linux-2.6.19.1/kernel/time.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/time.c 2006-11-08 04:57:44 +0100 @@ -61,7 +61,7 @@ asmlinkage long sys_time(time_t __user * time_t i; struct timeval tv; - do_gettimeofday(&tv); + vx_gettimeofday(&tv); i = tv.tv_sec; if (tloc) { @@ -92,7 +92,7 @@ asmlinkage long sys_stime(time_t __user if (err) return err; - do_settimeofday(&tv); + vx_settimeofday(&tv); return 0; } @@ -102,7 +102,7 @@ asmlinkage long sys_gettimeofday(struct { if (likely(tv != NULL)) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (copy_to_user(tv, &ktv, sizeof(ktv))) return -EFAULT; } @@ -176,7 +176,7 @@ int do_sys_settimeofday(struct timespec /* SMP safe, again the code in arch/foo/time.c should * globally block out interrupts when it runs. */ - return do_settimeofday(tv); + return vx_settimeofday(tv); } return 0; } @@ -359,7 +359,7 @@ void getnstimeofday(struct timespec *tv) { struct timeval x; - do_gettimeofday(&x); + vx_gettimeofday(&x); tv->tv_sec = x.tv_sec; tv->tv_nsec = x.tv_usec * NSEC_PER_USEC; } diff -NurpP --minimal linux-2.6.19.1/kernel/timer.c linux-2.6.19.1-vs2.2.0-rc6/kernel/timer.c --- linux-2.6.19.1/kernel/timer.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/timer.c 2006-11-08 06:18:54 +0100 @@ -34,6 +34,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -1082,12 +1086,6 @@ asmlinkage unsigned long sys_alarm(unsig #endif -#ifndef __alpha__ - -/* - * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this - * should be moved into arch/i386 instead? - */ /** * sys_getpid - return the thread group id of the current process @@ -1100,7 +1098,7 @@ asmlinkage unsigned long sys_alarm(unsig */ asmlinkage long sys_getpid(void) { - return current->tgid; + return vx_map_tgid(current->tgid); } /* @@ -1116,10 +1114,23 @@ asmlinkage long sys_getppid(void) rcu_read_lock(); pid = rcu_dereference(current->real_parent)->tgid; rcu_read_unlock(); + return vx_map_pid(pid); +} - return pid; +#ifdef __alpha__ + +/* + * The Alpha uses getxpid, getxuid, and getxgid instead. + */ + +asmlinkage long do_getxpid(long *ppid) +{ + *ppid = sys_getppid(); + return sys_getpid(); } +#else /* _alpha_ */ + asmlinkage long sys_getuid(void) { /* Only we change this so SMP safe */ @@ -1281,6 +1292,8 @@ asmlinkage long sys_sysinfo(struct sysin tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; tp.tv_sec++; } + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&tp, NULL); val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); diff -NurpP --minimal linux-2.6.19.1/kernel/user.c linux-2.6.19.1-vs2.2.0-rc6/kernel/user.c --- linux-2.6.19.1/kernel/user.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/user.c 2006-11-08 21:52:09 +0100 @@ -23,8 +23,8 @@ #define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) #define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) -#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) -#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) +#define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK) +#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid))) static kmem_cache_t *uid_cachep; static struct list_head uidhash_table[UIDHASH_SZ]; @@ -66,7 +66,7 @@ static inline void uid_hash_remove(struc list_del(&up->uidhash_list); } -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) +static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent) { struct list_head *up; @@ -75,7 +75,7 @@ static inline struct user_struct *uid_ha user = list_entry(up, struct user_struct, uidhash_list); - if(user->uid == uid) { + if(user->uid == uid && user->xid == xid) { atomic_inc(&user->__count); return user; } @@ -90,13 +90,13 @@ static inline struct user_struct *uid_ha * * If the user_struct could not be found, return NULL. */ -struct user_struct *find_user(uid_t uid) +struct user_struct *find_user(xid_t xid, uid_t uid) { struct user_struct *ret; unsigned long flags; spin_lock_irqsave(&uidhash_lock, flags); - ret = uid_hash_find(uid, uidhashentry(uid)); + ret = uid_hash_find(xid, uid, uidhashentry(xid, uid)); spin_unlock_irqrestore(&uidhash_lock, flags); return ret; } @@ -120,13 +120,13 @@ void free_uid(struct user_struct *up) } } -struct user_struct * alloc_uid(uid_t uid) +struct user_struct * alloc_uid(xid_t xid, uid_t uid) { - struct list_head *hashent = uidhashentry(uid); + struct list_head *hashent = uidhashentry(xid, uid); struct user_struct *up; spin_lock_irq(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); spin_unlock_irq(&uidhash_lock); if (!up) { @@ -136,6 +136,7 @@ struct user_struct * alloc_uid(uid_t uid if (!new) return NULL; new->uid = uid; + new->xid = xid; atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); atomic_set(&new->files, 0); @@ -158,7 +159,7 @@ struct user_struct * alloc_uid(uid_t uid * on adding the same user already.. */ spin_lock_irq(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); if (up) { key_put(new->uid_keyring); key_put(new->session_keyring); @@ -215,7 +216,7 @@ static int __init uid_cache_init(void) /* Insert the root user immediately (init already runs as root) */ spin_lock_irq(&uidhash_lock); - uid_hash_insert(&root_user, uidhashentry(0)); + uid_hash_insert(&root_user, uidhashentry(0,0)); spin_unlock_irq(&uidhash_lock); return 0; diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/Kconfig linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/Kconfig --- linux-2.6.19.1/kernel/vserver/Kconfig 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/Kconfig 2006-12-19 18:42:14 +0100 @@ -0,0 +1,269 @@ +# +# Linux VServer configuration +# + +menu "Linux VServer" + +config VSERVER_LEGACY + bool "Enable Legacy Kernel API" + default n + help + This enables the legacy API used in vs1.xx, maintaining + compatibility with older vserver tools, and guest images + that are configured using the legacy method. + +config VSERVER_LEGACY_VERSION + bool "Show a Legacy Version ID" + depends on VSERVER_LEGACY + default n + help + This shows a special legacy version to very old tools + which do not handle the current version correctly. + + This will probably disable some features of newer tools + so better avoid it, unless you really, really need it + for backwards compatibility. + +config VSERVER_DYNAMIC_IDS + bool "Enable dynamic context IDs" + depends on EXPERIMENTAL && VSERVER_LEGACY + default n + help + This enables support of in kernel dynamic context IDs, + which is deprecated and will probably be removed in the + next release. + +config VSERVER_LEGACYNET + bool "Enable Legacy Networking Kernel API" + default n + help + This enables the legacy networking API which is used + by older tools (pre 0.30.210) to set up the network + context (chbind). + +config VSERVER_REMAP_SADDR + bool "Remap Source IP Address" + depends on EXPERIMENTAL + default n + help + This allows to remap the source IP address of 'local' + connections from 127.0.0.1 to the first assigned + guest IP. + +config VSERVER_COWBL + bool "Enable COW Immutable Link Breaking" + depends on EXPERIMENTAL + default y + help + This enables the COW (Copy-On-Write) link break code. + It allows you to treat unified files like normal files + when writing to them (which will implicitely break the + link and create a copy of the unified file) + +config VSERVER_VTIME + bool "Enable Virtualized Guest Time" + depends on EXPERIMENTAL + default n + help + This enables per guest time offsets to allow for + adjusting the system clock individually per guest. + this adds some overhead to the time functions and + therefore should not be enabled without good reason. + +config VSERVER_PROC_SECURE + bool "Enable Proc Security" + depends on PROC_FS + default y + help + This configures ProcFS security to initially hide + non-process entries for all contexts except the main and + spectator context (i.e. for all guests), which is a secure + default. + + (note: on 1.2x the entries were visible by default) + +config VSERVER_HARDCPU + bool "Enable Hard CPU Limits" + depends on EXPERIMENTAL + default n + help + Activate the Hard CPU Limits + + This will compile in code that allows the Token Bucket + Scheduler to put processes on hold when a context's + tokens are depleted (provided that its per-context + sched_hard flag is set). + + Processes belonging to that context will not be able + to consume CPU resources again until a per-context + configured minimum of tokens has been reached. + +config VSERVER_IDLETIME + bool "Avoid idle CPUs by skipping Time" + depends on VSERVER_HARDCPU + default n + help + This option allows the scheduler to artificially + advance time (per cpu) when otherwise the idle + task would be scheduled, thus keeping the cpu + busy and sharing the available resources among + certain contexts. + +config VSERVER_IDLELIMIT + bool "Limit the IDLE task" + depends on VSERVER_HARDCPU + default n + help + Limit the idle slices, so the the next context + will be scheduled as soon as possible. + + This might improve interactivity and latency, but + will also marginally increase scheduling overhead. + +choice + prompt "Persistent Inode Tagging" + default TAGGING_ID24 + help + This adds persistent context information to filesystems + mounted with the tagxid option. Tagging is a requirement + for per-context disk limits and per-context quota. + + +config TAGGING_NONE + bool "Disabled" + help + do not store per-context information in inodes. + +config TAGGING_UID16 + bool "UID16/GID32" + help + reduces UID to 16 bit, but leaves GID at 32 bit. + +config TAGGING_GID16 + bool "UID32/GID16" + help + reduces GID to 16 bit, but leaves UID at 32 bit. + +config TAGGING_ID24 + bool "UID24/GID24" + help + uses the upper 8bit from UID and GID for XID tagging + which leaves 24bit for UID/GID each, which should be + more than sufficient for normal use. + +config TAGGING_INTERN + bool "UID32/GID32" + help + this uses otherwise reserved inode fields in the on + disk representation, which limits the use to a few + filesystems (currently ext2 and ext3) + +config TAGGING_RUNTIME + bool "Runtime" + depends on EXPERIMENTAL + help + inodes are tagged when first accessed, this doesn't + require any persistant information, but might give + funny results for mixed access. + +endchoice + +config TAG_NFSD + bool "Tag NFSD User Auth and Files" + default n + help + Enable this if you do want the in-kernel NFS + Server to use the tagging specified above. + (will require patched clients too) + +config PROPAGATE + bool "Enable Inode Tag Propagation" + default n + depends on EXPERIMENTAL + help + This allows for the tagid= mount option to specify + a tagid which is to be used for the entire mount + tree. + +config VSERVER_PRIVACY + bool "Honor Privacy Aspects of Guests" + default y + help + When enabled, most context checks will disallow + access to structures assigned to a specific context, + like ptys or loop devices. + +config VSERVER_DEBUG + bool "VServer Debugging Code" + default n + help + Set this to yes if you want to be able to activate + debugging output at runtime. It adds a probably small + overhead to all vserver related functions and + increases the kernel size by about 20k. + +config VSERVER_HISTORY + bool "VServer History Tracing" + depends on VSERVER_DEBUG + default n + help + Set this to yes if you want to record the history of + linux-vserver activities, so they can be replayed in + the event of a kernel panic or oops. + +config VSERVER_HISTORY_SIZE + int "Per-CPU History Size (32-65536)" + depends on VSERVER_HISTORY + range 32 65536 + default 64 + help + This allows you to specify the number of entries in + the per-CPU history buffer. + +config VSERVER_MONITOR + bool "VServer Scheduling Monitor" + depends on VSERVER_DEBUG + default n + help + Set this to yes if you want to record the scheduling + decisions, so that they can be relayed to userspace + for detailed analysis. + +config VSERVER_MONITOR_SIZE + int "Per-CPU Monitor Queue Size (32-65536)" + depends on VSERVER_MONITOR + range 32 65536 + default 1024 + help + This allows you to specify the number of entries in + the per-CPU scheduling monitor buffer. + +config VSERVER_MONITOR_SYNC + int "Per-CPU Monitor Sync Interval (0-65536)" + depends on VSERVER_MONITOR + range 0 65536 + default 256 + help + This allows you to specify the interval in ticks + when a time sync entry is inserted. + +endmenu + + +config VSERVER + bool + default y + select UTS_NS + select IPC_NS + +config VSERVER_SECURITY + bool + depends on SECURITY + default y + select SECURITY_CAPABILITIES + +config VSERVER_NGNET + bool + depends on EXPERIMENTAL && !VSERVER_LEGACYNET + default y + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/Makefile linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/Makefile --- linux-2.6.19.1/kernel/vserver/Makefile 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/Makefile 2006-12-07 23:58:43 +0100 @@ -0,0 +1,17 @@ +# +# Makefile for the Linux vserver routines. +# + + +obj-y += vserver.o + +vserver-y := switch.o context.o space.o sched.o network.o inode.o \ + limit.o cvirt.o cacct.o signal.o helper.o init.o dlimit.o + +vserver-$(CONFIG_PROC_FS) += proc.o +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o +vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o +vserver-$(CONFIG_VSERVER_LEGACYNET) += legacynet.o +vserver-$(CONFIG_VSERVER_HISTORY) += history.o +vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/cacct.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cacct.c --- linux-2.6.19.1/kernel/vserver/cacct.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cacct.c 2006-11-30 18:53:18 +0100 @@ -0,0 +1,44 @@ +/* + * linux/kernel/vserver/cacct.c + * + * Virtual Server: Context Accounting + * + * Copyright (C) 2006 Herbert Pötzl + * + * V0.01 added accounting stats + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +int vc_sock_stat(struct vx_info *vxi, void __user *data) +{ + struct vcmd_sock_stat_v0 vc_data; + int j, field; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + field = vc_data.field; + if ((field < 0) || (field >= VXA_SOCK_SIZE)) + return -EINVAL; + + for (j=0; j<3; j++) { + vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j); + vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/cacct_init.h linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cacct_init.h --- linux-2.6.19.1/kernel/vserver/cacct_init.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cacct_init.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,25 @@ + + +static inline void vx_info_init_cacct(struct _vx_cacct *cacct) +{ + int i,j; + + + for (i=0; isock[i][j].count, 0); + atomic_set(&cacct->sock[i][j].total, 0); + } + } + for (i=0; i<8; i++) + atomic_set(&cacct->slab[i], 0); + for (i=0; i<5; i++) + for (j=0; j<4; j++) + atomic_set(&cacct->page[i][j], 0); +} + +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct) +{ + return; +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/cacct_proc.h linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cacct_proc.h --- linux-2.6.19.1/kernel/vserver/cacct_proc.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cacct_proc.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,58 @@ +#ifndef _VX_CACCT_PROC_H +#define _VX_CACCT_PROC_H + +#include + + +#define VX_SOCKA_TOP \ + "Type\t recv #/bytes\t\t send #/bytes\t\t fail #/bytes\n" + +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer) +{ + int i,j, length = 0; + static char *type[VXA_SOCK_SIZE] = { + "UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER" }; + + length += sprintf(buffer + length, VX_SOCKA_TOP); + for (i=0; islab[1]) + ,atomic_read(&cacct->slab[4]) + ,atomic_read(&cacct->slab[0]) + ,atomic_read(&cacct->slab[2]) + ); + + length += sprintf(buffer + length, "\n"); + for (i=0; i<5; i++) { + length += sprintf(buffer + length, + "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n" + ,i + ,atomic_read(&cacct->page[i][0]) + ,atomic_read(&cacct->page[i][1]) + ,atomic_read(&cacct->page[i][2]) + ,atomic_read(&cacct->page[i][3]) + ,atomic_read(&cacct->page[i][4]) + ,atomic_read(&cacct->page[i][5]) + ,atomic_read(&cacct->page[i][6]) + ,atomic_read(&cacct->page[i][7]) + ); + } + + return length; +} + +#endif /* _VX_CACCT_PROC_H */ diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/context.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/context.c --- linux-2.6.19.1/kernel/vserver/context.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/context.c 2006-12-10 23:00:38 +0100 @@ -0,0 +1,1091 @@ +/* + * linux/kernel/vserver/context.c + * + * Virtual Server: Context Support + * + * Copyright (C) 2003-2006 Herbert Pötzl + * + * V0.01 context helper + * V0.02 vx_ctx_kill syscall command + * V0.03 replaced context_info calls + * V0.04 redesign of struct (de)alloc + * V0.05 rlimit basic implementation + * V0.06 task_xid and info commands + * V0.07 context flags and caps + * V0.08 switch to RCU based hash + * V0.09 revert to non RCU for now + * V0.10 and back to working RCU hash + * V0.11 and back to locking again + * V0.12 referenced context store + * V0.13 separate per cpu data + * V0.14 changed vcmds to vxi arg + * V0.15 added context stat + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "cvirt_init.h" +#include "cacct_init.h" +#include "limit_init.h" +#include "sched_init.h" + + +atomic_t vx_global_ctotal = ATOMIC_INIT(0); +atomic_t vx_global_cactive = ATOMIC_INIT(0); + + +/* now inactive context structures */ + +static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT; + +static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED; + + +/* __alloc_vx_info() + + * allocate an initialized vx_info struct + * doesn't make it visible (hash) */ + +static struct vx_info *__alloc_vx_info(xid_t xid) +{ + struct vx_info *new = NULL; + int cpu; + + vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct vx_info)); +#ifdef CONFIG_SMP + new->ptr_pc = alloc_percpu(struct _vx_info_pc); + if (!new->ptr_pc) + goto error; +#endif + new->vx_id = xid; + INIT_HLIST_NODE(&new->vx_hlist); + atomic_set(&new->vx_usecnt, 0); + atomic_set(&new->vx_tasks, 0); + new->vx_parent = NULL; + new->vx_state = 0; + init_waitqueue_head(&new->vx_wait); + + /* prepare reaper */ + get_task_struct(child_reaper); + new->vx_reaper = child_reaper; + + /* rest of init goes here */ + vx_info_init_limit(&new->limit); + vx_info_init_sched(&new->sched); + vx_info_init_cvirt(&new->cvirt); + vx_info_init_cacct(&new->cacct); + + /* per cpu data structures */ + for_each_possible_cpu(cpu) { + vx_info_init_sched_pc( + &vx_per_cpu(new, sched_pc, cpu), cpu); + vx_info_init_cvirt_pc( + &vx_per_cpu(new, cvirt_pc, cpu), cpu); + } + + new->vx_flags = VXF_INIT_SET; + new->vx_bcaps = CAP_INIT_EFF_SET; + new->vx_ccaps = 0; + new->vx_cap_bset = cap_bset; + + new->reboot_cmd = 0; + new->exit_code = 0; + + vxdprintk(VXD_CBIT(xid, 0), + "alloc_vx_info(%d) = %p", xid, new); + vxh_alloc_vx_info(new); + atomic_inc(&vx_global_ctotal); + return new; +#ifdef CONFIG_SMP +error: + kfree(new); + return 0; +#endif +} + +/* __dealloc_vx_info() + + * final disposal of vx_info */ + +static void __dealloc_vx_info(struct vx_info *vxi) +{ + int cpu; + + vxdprintk(VXD_CBIT(xid, 0), + "dealloc_vx_info(%p)", vxi); + vxh_dealloc_vx_info(vxi); + + vxi->vx_id = -1; + + vx_info_exit_limit(&vxi->limit); + vx_info_exit_sched(&vxi->sched); + vx_info_exit_cvirt(&vxi->cvirt); + vx_info_exit_cacct(&vxi->cacct); + + for_each_possible_cpu(cpu) { + vx_info_exit_sched_pc( + &vx_per_cpu(vxi, sched_pc, cpu), cpu); + vx_info_exit_cvirt_pc( + &vx_per_cpu(vxi, cvirt_pc, cpu), cpu); + } + + vxi->vx_state |= VXS_RELEASED; + +#ifdef CONFIG_SMP + free_percpu(vxi->ptr_pc); +#endif + kfree(vxi); + atomic_dec(&vx_global_ctotal); +} + +static void __shutdown_vx_info(struct vx_info *vxi) +{ + struct nsproxy *nsproxy; + struct fs_struct *fs; + + might_sleep(); + + vxi->vx_state |= VXS_SHUTDOWN; + vs_state_change(vxi, VSC_SHUTDOWN); + + nsproxy = xchg(&vxi->vx_nsproxy, NULL); + if (nsproxy) + put_nsproxy(nsproxy); + + fs = xchg(&vxi->vx_fs, NULL); + if (fs) + put_fs_struct(fs); +} + +/* exported stuff */ + +void free_vx_info(struct vx_info *vxi) +{ + unsigned long flags; + + /* context shutdown is mandatory */ + BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN)); + + BUG_ON(atomic_read(&vxi->vx_usecnt)); + BUG_ON(atomic_read(&vxi->vx_tasks)); + + BUG_ON(vx_info_state(vxi, VXS_HASHED)); + + BUG_ON(vxi->vx_nsproxy); + BUG_ON(vxi->vx_fs); + + spin_lock_irqsave(&vx_info_inactive_lock, flags); + hlist_del(&vxi->vx_hlist); + spin_unlock_irqrestore(&vx_info_inactive_lock, flags); + + __dealloc_vx_info(vxi); +} + + +/* hash table for vx_info hash */ + +#define VX_HASH_SIZE 13 + +static struct hlist_head vx_info_hash[VX_HASH_SIZE] = + { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT }; + +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(xid_t xid) +{ + return (xid % VX_HASH_SIZE); +} + + + +/* __hash_vx_info() + + * add the vxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_vx_info(struct vx_info *vxi) +{ + struct hlist_head *head; + + vxd_assert_lock(&vx_info_hash_lock); + vxdprintk(VXD_CBIT(xid, 4), + "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id); + vxh_hash_vx_info(vxi); + + /* context must not be hashed */ + BUG_ON(vx_info_state(vxi, VXS_HASHED)); + + vxi->vx_state |= VXS_HASHED; + head = &vx_info_hash[__hashval(vxi->vx_id)]; + hlist_add_head(&vxi->vx_hlist, head); + atomic_inc(&vx_global_cactive); +} + +/* __unhash_vx_info() + + * remove the vxi from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_vx_info(struct vx_info *vxi) +{ + unsigned long flags; + + vxd_assert_lock(&vx_info_hash_lock); + vxdprintk(VXD_CBIT(xid, 4), + "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id); + vxh_unhash_vx_info(vxi); + + /* context must be hashed */ + BUG_ON(!vx_info_state(vxi, VXS_HASHED)); + + vxi->vx_state &= ~VXS_HASHED; + hlist_del_init(&vxi->vx_hlist); + spin_lock_irqsave(&vx_info_inactive_lock, flags); + hlist_add_head(&vxi->vx_hlist, &vx_info_inactive); + spin_unlock_irqrestore(&vx_info_inactive_lock, flags); + atomic_dec(&vx_global_cactive); +} + + +/* __lookup_vx_info() + + * requires the hash_lock to be held + * doesn't increment the vx_refcnt */ + +static inline struct vx_info *__lookup_vx_info(xid_t xid) +{ + struct hlist_head *head = &vx_info_hash[__hashval(xid)]; + struct hlist_node *pos; + struct vx_info *vxi; + + vxd_assert_lock(&vx_info_hash_lock); + hlist_for_each(pos, head) { + vxi = hlist_entry(pos, struct vx_info, vx_hlist); + + if (vxi->vx_id == xid) + goto found; + } + vxi = NULL; +found: + vxdprintk(VXD_CBIT(xid, 0), + "__lookup_vx_info(#%u): %p[#%u]", + xid, vxi, vxi?vxi->vx_id:0); + vxh_lookup_vx_info(vxi, xid); + return vxi; +} + + +/* __vx_dynamic_id() + + * find unused dynamic xid + * requires the hash_lock to be held */ + +static inline xid_t __vx_dynamic_id(void) +{ + static xid_t seq = MAX_S_CONTEXT; + xid_t barrier = seq; + + vxd_assert_lock(&vx_info_hash_lock); + do { + if (++seq > MAX_S_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__lookup_vx_info(seq)) { + vxdprintk(VXD_CBIT(xid, 4), + "__vx_dynamic_id: [#%d]", seq); + return seq; + } + } while (barrier != seq); + return 0; +} + +#ifdef CONFIG_VSERVER_LEGACY + +/* __loc_vx_info() + + * locate or create the requested context + * get() it and if new hash it */ + +static struct vx_info * __loc_vx_info(int id, int *err) +{ + struct vx_info *new, *vxi = NULL; + + vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id); + + if (!(new = __alloc_vx_info(id))) { + *err = -ENOMEM; + return NULL; + } + + /* required to make dynamic xids unique */ + spin_lock(&vx_info_hash_lock); + + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { +#ifdef CONFIG_VSERVER_DYNAMIC_IDS + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + goto out_unlock; + } + new->vx_id = id; +#else + printk(KERN_ERR "dynamic contexts disabled.\n"); + goto out_unlock; +#endif + } + /* existing context requested */ + else if ((vxi = __lookup_vx_info(id))) { + /* context in setup is not available */ + if (vxi->vx_flags & VXF_STATE_SETUP) { + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (not available)", id, vxi); + vxi = NULL; + *err = -EBUSY; + } else { + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (found)", id, vxi); + get_vx_info(vxi); + *err = 0; + } + goto out_unlock; + } + + /* new context requested */ + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (new)", id, new); + __hash_vx_info(get_vx_info(new)); + vxi = new, new = NULL; + *err = 1; + +out_unlock: + spin_unlock(&vx_info_hash_lock); + vxh_loc_vx_info(vxi, id); + if (new) + __dealloc_vx_info(new); + return vxi; +} + +#endif + +/* __create_vx_info() + + * create the requested context + * get() and hash it */ + +static struct vx_info * __create_vx_info(int id) +{ + struct vx_info *new, *vxi = NULL; + + vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id); + + if (!(new = __alloc_vx_info(id))) + return ERR_PTR(-ENOMEM); + + /* required to make dynamic xids unique */ + spin_lock(&vx_info_hash_lock); + + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { +#ifdef CONFIG_VSERVER_DYNAMIC_IDS + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + vxi = ERR_PTR(-EAGAIN); + goto out_unlock; + } + new->vx_id = id; +#else + printk(KERN_ERR "dynamic contexts disabled.\n"); + vxi = ERR_PTR(-EINVAL); + goto out_unlock; +#endif + } + /* static context requested */ + else if ((vxi = __lookup_vx_info(id))) { + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) = %p (already there)", id, vxi); + if (vx_info_flags(vxi, VXF_STATE_SETUP, 0)) + vxi = ERR_PTR(-EBUSY); + else + vxi = ERR_PTR(-EEXIST); + goto out_unlock; + } +#ifdef CONFIG_VSERVER_DYNAMIC_IDS + /* dynamic xid creation blocker */ + else if (id >= MIN_D_CONTEXT) { + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) (dynamic rejected)", id); + vxi = ERR_PTR(-EINVAL); + goto out_unlock; + } +#endif + + /* new context */ + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) = %p (new)", id, new); + __hash_vx_info(get_vx_info(new)); + vxi = new, new = NULL; + +out_unlock: + spin_unlock(&vx_info_hash_lock); + vxh_create_vx_info(IS_ERR(vxi)?NULL:vxi, id); + if (new) + __dealloc_vx_info(new); + return vxi; +} + + +/* exported stuff */ + + +void unhash_vx_info(struct vx_info *vxi) +{ + __shutdown_vx_info(vxi); + spin_lock(&vx_info_hash_lock); + __unhash_vx_info(vxi); + spin_unlock(&vx_info_hash_lock); + __wakeup_vx_info(vxi); +} + + +/* lookup_vx_info() + + * search for a vx_info and get() it + * negative id means current */ + +struct vx_info *lookup_vx_info(int id) +{ + struct vx_info *vxi = NULL; + + if (id < 0) { + vxi = get_vx_info(current->vx_info); + } else if (id > 1) { + spin_lock(&vx_info_hash_lock); + vxi = get_vx_info(__lookup_vx_info(id)); + spin_unlock(&vx_info_hash_lock); + } + return vxi; +} + +/* xid_is_hashed() + + * verify that xid is still hashed */ + +int xid_is_hashed(xid_t xid) +{ + int hashed; + + spin_lock(&vx_info_hash_lock); + hashed = (__lookup_vx_info(xid) != NULL); + spin_unlock(&vx_info_hash_lock); + return hashed; +} + +#ifdef CONFIG_VSERVER_LEGACY + +struct vx_info *lookup_or_create_vx_info(int id) +{ + int err; + + return __loc_vx_info(id, &err); +} + +#endif + +#ifdef CONFIG_PROC_FS + +/* get_xid_list() + + * get a subset of hashed xids for proc + * assumes size is at least one */ + +int get_xid_list(int index, unsigned int *xids, int size) +{ + int hindex, nr_xids = 0; + + /* only show current and children */ + if (!vx_check(0, VS_ADMIN|VS_WATCH)) { + if (index > 0) + return 0; + xids[nr_xids] = vx_current_xid(); + return 1; + } + + for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) { + struct hlist_head *head = &vx_info_hash[hindex]; + struct hlist_node *pos; + + spin_lock(&vx_info_hash_lock); + hlist_for_each(pos, head) { + struct vx_info *vxi; + + if (--index > 0) + continue; + + vxi = hlist_entry(pos, struct vx_info, vx_hlist); + xids[nr_xids] = vxi->vx_id; + if (++nr_xids >= size) { + spin_unlock(&vx_info_hash_lock); + goto out; + } + } + /* keep the lock time short */ + spin_unlock(&vx_info_hash_lock); + } +out: + return nr_xids; +} +#endif + +#ifdef CONFIG_VSERVER_DEBUG + +void dump_vx_info_inactive(int level) +{ + struct hlist_node *entry, *next; + + hlist_for_each_safe(entry, next, &vx_info_inactive) { + struct vx_info *vxi = + list_entry(entry, struct vx_info, vx_hlist); + + dump_vx_info(vxi, level); + } +} + +#endif + +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) +{ + struct user_struct *new_user, *old_user; + + if (!p || !vxi) + BUG(); + + if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0)) + return -EACCES; + + new_user = alloc_uid(vxi->vx_id, p->uid); + if (!new_user) + return -ENOMEM; + + old_user = p->user; + if (new_user != old_user) { + atomic_inc(&new_user->processes); + atomic_dec(&old_user->processes); + p->user = new_user; + } + free_uid(old_user); + return 0; +} + +void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p) +{ + p->cap_effective &= vxi->vx_cap_bset; + p->cap_inheritable &= vxi->vx_cap_bset; + p->cap_permitted &= vxi->vx_cap_bset; +} + + +#include + +static int vx_openfd_task(struct task_struct *tsk) +{ + struct files_struct *files = tsk->files; + struct fdtable *fdt; + const unsigned long *bptr; + int count, total; + + /* no rcu_read_lock() because of spin_lock() */ + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + bptr = fdt->open_fds->fds_bits; + count = fdt->max_fds / (sizeof(unsigned long) * 8); + for (total = 0; count > 0; count--) { + if (*bptr) + total += hweight_long(*bptr); + bptr++; + } + spin_unlock(&files->file_lock); + return total; +} + + +/* for *space compatibility */ + +asmlinkage long sys_unshare(unsigned long); + +/* + * migrate task to new context + * gets vxi, puts old_vxi on change + * optionally unshares namespaces (hack) + */ + +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare) +{ + struct vx_info *old_vxi; + int ret = 0; + + if (!p || !vxi) + BUG(); + + vxdprintk(VXD_CBIT(xid, 5), + "vx_migrate_task(%p,%p[#%d.%d])", p, vxi, + vxi->vx_id, atomic_read(&vxi->vx_usecnt)); + + if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) && + !vx_info_flags(vxi, VXF_STATE_SETUP, 0)) + return -EACCES; + + old_vxi = task_get_vx_info(p); + if (old_vxi == vxi) + goto out; + + if (!(ret = vx_migrate_user(p, vxi))) { + int openfd; + + task_lock(p); + openfd = vx_openfd_task(p); + + if (old_vxi) { + atomic_dec(&old_vxi->cvirt.nr_threads); + atomic_dec(&old_vxi->cvirt.nr_running); + __rlim_dec(&old_vxi->limit, RLIMIT_NPROC); + /* FIXME: what about the struct files here? */ + __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd); + /* account for the executable */ + __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY); + } + atomic_inc(&vxi->cvirt.nr_threads); + atomic_inc(&vxi->cvirt.nr_running); + __rlim_inc(&vxi->limit, RLIMIT_NPROC); + /* FIXME: what about the struct files here? */ + __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd); + /* account for the executable */ + __rlim_inc(&vxi->limit, VLIMIT_DENTRY); + + if (old_vxi) { + release_vx_info(old_vxi, p); + clr_vx_info(&p->vx_info); + } + claim_vx_info(vxi, p); + set_vx_info(&p->vx_info, vxi); + p->xid = vxi->vx_id; + + vxdprintk(VXD_CBIT(xid, 5), + "moved task %p into vxi:%p[#%d]", + p, vxi, vxi->vx_id); + + vx_mask_cap_bset(vxi, p); + task_unlock(p); + + /* hack for *spaces to provide compatibility */ + if (unshare) { + ret = sys_unshare(CLONE_NEWUTS|CLONE_NEWIPC); + vx_set_space(vxi, CLONE_NEWUTS|CLONE_NEWIPC); + } + } +out: + put_vx_info(old_vxi); + return ret; +} + +int vx_set_reaper(struct vx_info *vxi, struct task_struct *p) +{ + struct task_struct *old_reaper; + + if (!vxi) + return -EINVAL; + + vxdprintk(VXD_CBIT(xid, 6), + "vx_set_reaper(%p[#%d],%p[#%d,%d])", + vxi, vxi->vx_id, p, p->xid, p->pid); + + old_reaper = vxi->vx_reaper; + if (old_reaper == p) + return 0; + + /* set new child reaper */ + get_task_struct(p); + vxi->vx_reaper = p; + put_task_struct(old_reaper); + return 0; +} + +int vx_set_init(struct vx_info *vxi, struct task_struct *p) +{ + if (!vxi) + return -EINVAL; + + vxdprintk(VXD_CBIT(xid, 6), + "vx_set_init(%p[#%d],%p[#%d,%d,%d])", + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid); + + vxi->vx_flags &= ~VXF_STATE_INIT; + vxi->vx_initpid = p->tgid; + return 0; +} + +void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code) +{ + vxdprintk(VXD_CBIT(xid, 6), + "vx_exit_init(%p[#%d],%p[#%d,%d,%d])", + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid); + + vxi->exit_code = code; + vxi->vx_initpid = 0; +} + + +void vx_set_persistent(struct vx_info *vxi) +{ + vxdprintk(VXD_CBIT(xid, 6), + "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id); + + get_vx_info(vxi); + claim_vx_info(vxi, current); +} + +void vx_clear_persistent(struct vx_info *vxi) +{ + vxdprintk(VXD_CBIT(xid, 6), + "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id); + + release_vx_info(vxi, current); + put_vx_info(vxi); +} + +void vx_update_persistent(struct vx_info *vxi) +{ + if (vx_info_flags(vxi, VXF_PERSISTENT, 0)) + vx_set_persistent(vxi); + else + vx_clear_persistent(vxi); +} + + +/* task must be current or locked */ + +void exit_vx_info(struct task_struct *p, int code) +{ + struct vx_info *vxi = p->vx_info; + + if (vxi) { + atomic_dec(&vxi->cvirt.nr_threads); + vx_nproc_dec(p); + + vxi->exit_code = code; + release_vx_info(vxi, p); + } +} + +void exit_vx_info_early(struct task_struct *p, int code) +{ + struct vx_info *vxi = p->vx_info; + + if (vxi) { + if (vxi->vx_initpid == p->tgid) + vx_exit_init(vxi, p, code); + if (vxi->vx_reaper == p) + vx_set_reaper(vxi, child_reaper); + } +} + + +/* vserver syscall commands below here */ + +/* taks xid and vx_info functions */ + +#include + + +int vc_task_xid(uint32_t id, void __user *data) +{ + xid_t xid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VS_ADMIN|VS_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + xid = (tsk) ? tsk->xid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + xid = vx_current_xid(); + return xid; +} + + +int vc_vx_info(struct vx_info *vxi, void __user *data) +{ + struct vcmd_vx_info_v0 vc_data; + + vc_data.xid = vxi->vx_id; + vc_data.initpid = vxi->vx_initpid; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +int vc_ctx_stat(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_stat_v0 vc_data; + + vc_data.usecnt = atomic_read(&vxi->vx_usecnt); + vc_data.tasks = atomic_read(&vxi->vx_tasks); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* context functions */ + +int vc_ctx_create(uint32_t xid, void __user *data) +{ + struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET }; + struct vx_info *new_vxi; + int ret; + + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if ((xid > MAX_S_CONTEXT) && (xid != VX_DYNAMIC_ID)) + return -EINVAL; + if (xid < 2) + return -EINVAL; + + new_vxi = __create_vx_info(xid); + if (IS_ERR(new_vxi)) + return PTR_ERR(new_vxi); + + /* initial flags */ + new_vxi->vx_flags = vc_data.flagword; + + /* get a reference for persistent contexts */ + if ((vc_data.flagword & VXF_PERSISTENT)) + vx_set_persistent(new_vxi); + + ret = -ENOEXEC; + if (vs_state_change(new_vxi, VSC_STARTUP)) + goto out_unhash; + ret = vx_migrate_task(current, new_vxi, (!data)); + if (!ret) { + /* return context id on success */ + ret = new_vxi->vx_id; + goto out; + } +out_unhash: + /* prepare for context disposal */ + new_vxi->vx_state |= VXS_SHUTDOWN; + if ((vc_data.flagword & VXF_PERSISTENT)) + vx_clear_persistent(new_vxi); + __unhash_vx_info(new_vxi); +out: + put_vx_info(new_vxi); + return ret; +} + + +int vc_ctx_migrate(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_migrate vc_data = { .flagword = 0 }; + int ret; + + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = vx_migrate_task(current, vxi, 0); + if (ret) + return ret; + if (vc_data.flagword & VXM_SET_INIT) + ret = vx_set_init(vxi, current); + if (ret) + return ret; + if (vc_data.flagword & VXM_SET_REAPER) + ret = vx_set_reaper(vxi, current); + return ret; +} + + +int vc_get_cflags(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_flags_v0 vc_data; + + vc_data.flagword = vxi->vx_flags; + + /* special STATE flag handling */ + vc_data.mask = vs_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_cflags(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_flags_v0 vc_data; + uint64_t mask, trigger; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* special STATE flag handling */ + mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME); + trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword); + + if (vxi == current->vx_info) { + if (trigger & VXF_STATE_SETUP) + vx_mask_cap_bset(vxi, current); + if (trigger & VXF_STATE_INIT) { + int ret; + + ret = vx_set_init(vxi, current); + if (ret) + return ret; + ret = vx_set_reaper(vxi, current); + if (ret) + return ret; + } + } + + vxi->vx_flags = vs_mask_flags(vxi->vx_flags, + vc_data.flagword, mask); + if (trigger & VXF_PERSISTENT) + vx_update_persistent(vxi); + + return 0; +} + +static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps) +{ + if (bcaps) + *bcaps = vxi->vx_bcaps; + if (ccaps) + *ccaps = vxi->vx_ccaps; + + return 0; +} + +int vc_get_ccaps_v0(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_caps_v0 vc_data; + int ret; + + ret = do_get_caps(vxi, &vc_data.bcaps, &vc_data.ccaps); + if (ret) + return ret; + vc_data.cmask = ~0UL; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_get_ccaps(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_caps_v1 vc_data; + int ret; + + ret = do_get_caps(vxi, NULL, &vc_data.ccaps); + if (ret) + return ret; + vc_data.cmask = ~0UL; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +static int do_set_caps(struct vx_info *vxi, + uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask) +{ + vxi->vx_bcaps = vs_mask_flags(vxi->vx_bcaps, bcaps, bmask); + vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask); + + return 0; +} + +int vc_set_ccaps_v0(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_caps_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* simulate old &= behaviour for bcaps */ + return do_set_caps(vxi, 0, ~vc_data.bcaps, + vc_data.ccaps, vc_data.cmask); +} + +int vc_set_ccaps(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_caps_v1 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask); +} + +int vc_get_bcaps(struct vx_info *vxi, void __user *data) +{ + struct vcmd_bcaps vc_data; + int ret; + + ret = do_get_caps(vxi, &vc_data.bcaps, NULL); + if (ret) + return ret; + vc_data.bmask = ~0UL; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_bcaps(struct vx_info *vxi, void __user *data) +{ + struct vcmd_bcaps vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0); +} + +#include + +EXPORT_SYMBOL_GPL(free_vx_info); + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/cvirt.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cvirt.c --- linux-2.6.19.1/kernel/vserver/cvirt.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cvirt.c 2006-11-14 02:30:02 +0100 @@ -0,0 +1,305 @@ +/* + * linux/kernel/vserver/cvirt.c + * + * Virtual Server: Context Virtualization + * + * Copyright (C) 2004-2006 Herbert Pötzl + * + * V0.01 broken out from limit.c + * V0.02 added utsname stuff + * V0.03 changed vcmds to vxi arg + * + */ + +#include +#include +#include +#include +#include +#include +#include +//#include + +#include +#include + + +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle) +{ + struct vx_info *vxi = current->vx_info; + + set_normalized_timespec(uptime, + uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec, + uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec); + if (!idle) + return; + set_normalized_timespec(idle, + idle->tv_sec - vxi->cvirt.bias_idle.tv_sec, + idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec); + return; +} + +uint64_t vx_idle_jiffies(void) +{ + return init_task.utime + init_task.stime; +} + + + +static inline uint32_t __update_loadavg(uint32_t load, + int wsize, int delta, int n) +{ + unsigned long long calc, prev; + + /* just set it to n */ + if (unlikely(delta >= wsize)) + return (n << FSHIFT); + + calc = delta * n; + calc <<= FSHIFT; + prev = (wsize - delta); + prev *= load; + calc += prev; + do_div(calc, wsize); + return calc; +} + + +void vx_update_load(struct vx_info *vxi) +{ + uint32_t now, last, delta; + unsigned int nr_running, nr_uninterruptible; + unsigned int total; + unsigned long flags; + + spin_lock_irqsave(&vxi->cvirt.load_lock, flags); + + now = jiffies; + last = vxi->cvirt.load_last; + delta = now - last; + + if (delta < 5*HZ) + goto out; + + nr_running = atomic_read(&vxi->cvirt.nr_running); + nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible); + total = nr_running + nr_uninterruptible; + + vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0], + 60*HZ, delta, total); + vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1], + 5*60*HZ, delta, total); + vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2], + 15*60*HZ, delta, total); + + vxi->cvirt.load_last = now; +out: + atomic_inc(&vxi->cvirt.load_updates); + spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags); +} + + +/* + * Commands to do_syslog: + * + * 0 -- Close the log. Currently a NOP. + * 1 -- Open the log. Currently a NOP. + * 2 -- Read from the log. + * 3 -- Read all messages remaining in the ring buffer. + * 4 -- Read and clear all messages remaining in the ring buffer + * 5 -- Clear ring buffer. + * 6 -- Disable printk's to console + * 7 -- Enable printk's to console + * 8 -- Set level of messages printed to console + * 9 -- Return number of unread characters in the log buffer + * 10 -- Return size of the log buffer + */ +int vx_do_syslog(int type, char __user *buf, int len) +{ + int error = 0; + int do_clear = 0; + struct vx_info *vxi = current->vx_info; + struct _vx_syslog *log; + + if (!vxi) + return -EINVAL; + log = &vxi->cvirt.syslog; + + switch (type) { + case 0: /* Close log */ + case 1: /* Open log */ + break; + case 2: /* Read from log */ + error = wait_event_interruptible(log->log_wait, + (log->log_start - log->log_end)); + if (error) + break; + spin_lock_irq(&log->logbuf_lock); + spin_unlock_irq(&log->logbuf_lock); + break; + case 4: /* Read/clear last kernel messages */ + do_clear = 1; + /* fall through */ + case 3: /* Read last kernel messages */ + return 0; + + case 5: /* Clear ring buffer */ + return 0; + + case 6: /* Disable logging to console */ + case 7: /* Enable logging to console */ + case 8: /* Set level of messages printed to console */ + break; + + case 9: /* Number of chars in the log buffer */ + return 0; + case 10: /* Size of the log buffer */ + return 0; + default: + error = -EINVAL; + break; + } + return error; +} + + +/* virtual host info names */ + +static char * vx_vhi_name(struct vx_info *vxi, int id) +{ + struct nsproxy *nsproxy; + struct uts_namespace *uts; + + + if (id == VHIN_CONTEXT) + return vxi->vx_name; + + nsproxy = vxi->vx_nsproxy; + if (!nsproxy) + return NULL; + + uts = nsproxy->uts_ns; + if (!uts) + return NULL; + + switch (id) { + case VHIN_SYSNAME: + return uts->name.sysname; + case VHIN_NODENAME: + return uts->name.nodename; + case VHIN_RELEASE: + return uts->name.release; + case VHIN_VERSION: + return uts->name.version; + case VHIN_MACHINE: + return uts->name.machine; + case VHIN_DOMAINNAME: + return uts->name.domainname; + default: + return NULL; + } + return NULL; +} + +int vc_set_vhi_name(struct vx_info *vxi, void __user *data) +{ + struct vcmd_vhi_name_v0 vc_data; + char *name; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + name = vx_vhi_name(vxi, vc_data.field); + if (!name) + return -EINVAL; + + memcpy(name, vc_data.name, 65); + return 0; +} + +int vc_get_vhi_name(struct vx_info *vxi, void __user *data) +{ + struct vcmd_vhi_name_v0 vc_data; + char *name; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + name = vx_vhi_name(vxi, vc_data.field); + if (!name) + return -EINVAL; + + memcpy(vc_data.name, name, 65); + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +int vc_virt_stat(struct vx_info *vxi, void __user *data) +{ + struct vcmd_virt_stat_v0 vc_data; + struct _vx_cvirt *cvirt = &vxi->cvirt; + struct timespec uptime; + + do_posix_clock_monotonic_gettime(&uptime); + set_normalized_timespec(&uptime, + uptime.tv_sec - cvirt->bias_uptime.tv_sec, + uptime.tv_nsec - cvirt->bias_uptime.tv_nsec); + + vc_data.offset = timeval_to_ns(&cvirt->bias_tv); + vc_data.uptime = timespec_to_ns(&uptime); + vc_data.nr_threads = atomic_read(&cvirt->nr_threads); + vc_data.nr_running = atomic_read(&cvirt->nr_running); + vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible); + vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold); + vc_data.nr_forks = atomic_read(&cvirt->total_forks); + vc_data.load[0] = cvirt->load[0]; + vc_data.load[1] = cvirt->load[1]; + vc_data.load[2] = cvirt->load[2]; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +#ifdef CONFIG_VSERVER_VTIME + +/* virtualized time base */ + +void vx_gettimeofday(struct timeval *tv) +{ + do_gettimeofday(tv); + if (!vx_flags(VXF_VIRT_TIME, 0)) + return; + + tv->tv_sec += current->vx_info->cvirt.bias_tv.tv_sec; + tv->tv_usec += current->vx_info->cvirt.bias_tv.tv_usec; + + if (tv->tv_usec >= USEC_PER_SEC) { + tv->tv_sec++; + tv->tv_usec -= USEC_PER_SEC; + } else if (tv->tv_usec < 0) { + tv->tv_sec--; + tv->tv_usec += USEC_PER_SEC; + } +} + +int vx_settimeofday(struct timespec *ts) +{ + struct timeval tv; + + if (!vx_flags(VXF_VIRT_TIME, 0)) + return do_settimeofday(ts); + + do_gettimeofday(&tv); + current->vx_info->cvirt.bias_tv.tv_sec = + ts->tv_sec - tv.tv_sec; + current->vx_info->cvirt.bias_tv.tv_usec = + (ts->tv_nsec/NSEC_PER_USEC) - tv.tv_usec; + return 0; +} + +#endif + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/cvirt_init.h linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cvirt_init.h --- linux-2.6.19.1/kernel/vserver/cvirt_init.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cvirt_init.h 2006-11-14 02:11:41 +0100 @@ -0,0 +1,71 @@ + + +extern uint64_t vx_idle_jiffies(void); + +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt) +{ + uint64_t idle_jiffies = vx_idle_jiffies(); + uint64_t nsuptime; + + do_posix_clock_monotonic_gettime(&cvirt->bias_uptime); + nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec + * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec; + cvirt->bias_clock = nsec_to_clock_t(nsuptime); + cvirt->bias_tv.tv_sec = 0; + cvirt->bias_tv.tv_usec = 0; + + jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle); + atomic_set(&cvirt->nr_threads, 0); + atomic_set(&cvirt->nr_running, 0); + atomic_set(&cvirt->nr_uninterruptible, 0); + atomic_set(&cvirt->nr_onhold, 0); + + spin_lock_init(&cvirt->load_lock); + cvirt->load_last = jiffies; + atomic_set(&cvirt->load_updates, 0); + cvirt->load[0] = 0; + cvirt->load[1] = 0; + cvirt->load[2] = 0; + atomic_set(&cvirt->total_forks, 0); + + spin_lock_init(&cvirt->syslog.logbuf_lock); + init_waitqueue_head(&cvirt->syslog.log_wait); + cvirt->syslog.log_start = 0; + cvirt->syslog.log_end = 0; + cvirt->syslog.con_start = 0; + cvirt->syslog.logged_chars = 0; +} + +static inline +void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu) +{ + // cvirt_pc->cpustat = { 0 }; +} + +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt) +{ +#ifdef CONFIG_VSERVER_DEBUG + int value; + + vxwprintk((value = atomic_read(&cvirt->nr_threads)), + "!!! cvirt: %p[nr_threads] = %d on exit.", + cvirt, value); + vxwprintk((value = atomic_read(&cvirt->nr_running)), + "!!! cvirt: %p[nr_running] = %d on exit.", + cvirt, value); + vxwprintk((value = atomic_read(&cvirt->nr_uninterruptible)), + "!!! cvirt: %p[nr_uninterruptible] = %d on exit.", + cvirt, value); + vxwprintk((value = atomic_read(&cvirt->nr_onhold)), + "!!! cvirt: %p[nr_onhold] = %d on exit.", + cvirt, value); +#endif + return; +} + +static inline +void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu) +{ + return; +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/cvirt_proc.h linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cvirt_proc.h --- linux-2.6.19.1/kernel/vserver/cvirt_proc.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/cvirt_proc.h 2006-11-14 03:08:43 +0100 @@ -0,0 +1,138 @@ +#ifndef _VX_CVIRT_PROC_H +#define _VX_CVIRT_PROC_H + +#include +#include +#include +#include + + +static inline +int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer) +{ + struct namespace *ns; + struct uts_namespace *uts; + struct ipc_namespace *ipc; + struct vfsmount *mnt; + char *path, *root; + int length = 0; + + if (!nsproxy) + goto out; + + length += sprintf(buffer + length, + "NSProxy:\t%p [%p,%p,%p]\n", + nsproxy, nsproxy->namespace, + nsproxy->uts_ns, nsproxy->ipc_ns); + + ns = nsproxy->namespace; + if (!ns) + goto skip_ns; + + path = kmalloc(PATH_MAX, GFP_KERNEL); + if (!path) + goto skip_ns; + + mnt = ns->root; + root = d_path(mnt->mnt_root, mnt->mnt_parent, path, PATH_MAX-2); + length += sprintf(buffer + length, + "Namespace:\t%p [#%u]\n" + "RootPath:\t%s\n" + ,ns , atomic_read(&ns->count) + ,root); + kfree(path); +skip_ns: + + uts = nsproxy->uts_ns; + if (!uts) + goto skip_uts; + + length += sprintf(buffer + length, + "SysName:\t%.*s\n" + "NodeName:\t%.*s\n" + "Release:\t%.*s\n" + "Version:\t%.*s\n" + "Machine:\t%.*s\n" + "DomainName:\t%.*s\n" + ,__NEW_UTS_LEN, uts->name.sysname + ,__NEW_UTS_LEN, uts->name.nodename + ,__NEW_UTS_LEN, uts->name.release + ,__NEW_UTS_LEN, uts->name.version + ,__NEW_UTS_LEN, uts->name.machine + ,__NEW_UTS_LEN, uts->name.domainname + ); +skip_uts: + + ipc = nsproxy->ipc_ns; + if (!ipc) + goto skip_ipc; + + length += sprintf(buffer + length, + "SEMS:\t\t%d %d %d %d %d\n" + "MSG:\t\t%d %d %d\n" + "SHM:\t\t%lu %lu %d %d\n" + ,ipc->sem_ctls[0], ipc->sem_ctls[1] + ,ipc->sem_ctls[2], ipc->sem_ctls[3] + ,ipc->used_sems + ,ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni + ,(unsigned long)ipc->shm_ctlmax + ,(unsigned long)ipc->shm_ctlall + ,ipc->shm_ctlmni, ipc->shm_tot + ); +skip_ipc: + +out: + return length; +} + + +#include + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static inline +int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer) +{ + int length = 0; + int a, b, c; + + length += sprintf(buffer + length, + "BiasUptime:\t%lu.%02lu\n", + (unsigned long)cvirt->bias_uptime.tv_sec, + (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100))); + + a = cvirt->load[0] + (FIXED_1/200); + b = cvirt->load[1] + (FIXED_1/200); + c = cvirt->load[2] + (FIXED_1/200); + length += sprintf(buffer + length, + "nr_threads:\t%d\n" + "nr_running:\t%d\n" + "nr_unintr:\t%d\n" + "nr_onhold:\t%d\n" + "load_updates:\t%d\n" + "loadavg:\t%d.%02d %d.%02d %d.%02d\n" + "total_forks:\t%d\n" + ,atomic_read(&cvirt->nr_threads) + ,atomic_read(&cvirt->nr_running) + ,atomic_read(&cvirt->nr_uninterruptible) + ,atomic_read(&cvirt->nr_onhold) + ,atomic_read(&cvirt->load_updates) + ,LOAD_INT(a), LOAD_FRAC(a) + ,LOAD_INT(b), LOAD_FRAC(b) + ,LOAD_INT(c), LOAD_FRAC(c) + ,atomic_read(&cvirt->total_forks) + ); + + return length; +} + +static inline +int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, + char *buffer, int cpu) +{ + int length = 0; + return length; +} + +#endif /* _VX_CVIRT_PROC_H */ diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/debug.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/debug.c --- linux-2.6.19.1/kernel/vserver/debug.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/debug.c 2006-11-30 18:53:18 +0100 @@ -0,0 +1,35 @@ +/* + * kernel/vserver/debug.c + * + * Copyright (C) 2005 Herbert Pötzl + * + * V0.01 vx_info dump support + * + */ + +#include +#include +#include +#include + +#include + + +void dump_vx_info(struct vx_info *vxi, int level) +{ + printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id, + atomic_read(&vxi->vx_usecnt), + atomic_read(&vxi->vx_tasks), + vxi->vx_state); + if (level > 0) { + __dump_vx_limit(&vxi->limit); + __dump_vx_sched(&vxi->sched); + __dump_vx_cvirt(&vxi->cvirt); + __dump_vx_cacct(&vxi->cacct); + } + printk("---\n"); +} + + +EXPORT_SYMBOL_GPL(dump_vx_info); + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/dlimit.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/dlimit.c --- linux-2.6.19.1/kernel/vserver/dlimit.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/dlimit.c 2006-11-08 04:57:50 +0100 @@ -0,0 +1,527 @@ +/* + * linux/kernel/vserver/dlimit.c + * + * Virtual Server: Context Disk Limits + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 initial version + * V0.02 compat32 splitup + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* __alloc_dl_info() + + * allocate an initialized dl_info struct + * doesn't make it visible (hash) */ + +static struct dl_info *__alloc_dl_info(struct super_block *sb, tag_t tag) +{ + struct dl_info *new = NULL; + + vxdprintk(VXD_CBIT(dlim, 5), + "alloc_dl_info(%p,%d)*", sb, tag); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct dl_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct dl_info)); + new->dl_tag = tag; + new->dl_sb = sb; + INIT_RCU_HEAD(&new->dl_rcu); + INIT_HLIST_NODE(&new->dl_hlist); + spin_lock_init(&new->dl_lock); + atomic_set(&new->dl_refcnt, 0); + atomic_set(&new->dl_usecnt, 0); + + /* rest of init goes here */ + + vxdprintk(VXD_CBIT(dlim, 4), + "alloc_dl_info(%p,%d) = %p", sb, tag, new); + return new; +} + +/* __dealloc_dl_info() + + * final disposal of dl_info */ + +static void __dealloc_dl_info(struct dl_info *dli) +{ + vxdprintk(VXD_CBIT(dlim, 4), + "dealloc_dl_info(%p)", dli); + + dli->dl_hlist.next = LIST_POISON1; + dli->dl_tag = -1; + dli->dl_sb = 0; + + BUG_ON(atomic_read(&dli->dl_usecnt)); + BUG_ON(atomic_read(&dli->dl_refcnt)); + + kfree(dli); +} + + +/* hash table for dl_info hash */ + +#define DL_HASH_SIZE 13 + +struct hlist_head dl_info_hash[DL_HASH_SIZE]; + +static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(struct super_block *sb, tag_t tag) +{ + return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE); +} + + + +/* __hash_dl_info() + + * add the dli to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_dl_info(struct dl_info *dli) +{ + struct hlist_head *head; + + vxdprintk(VXD_CBIT(dlim, 6), + "__hash_dl_info: %p[#%d]", dli, dli->dl_tag); + get_dl_info(dli); + head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)]; + hlist_add_head_rcu(&dli->dl_hlist, head); +} + +/* __unhash_dl_info() + + * remove the dli from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_dl_info(struct dl_info *dli) +{ + vxdprintk(VXD_CBIT(dlim, 6), + "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag); + hlist_del_rcu(&dli->dl_hlist); + put_dl_info(dli); +} + + +/* __lookup_dl_info() + + * requires the rcu_read_lock() + * doesn't increment the dl_refcnt */ + +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, tag_t tag) +{ + struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)]; + struct hlist_node *pos; + struct dl_info *dli; + + hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) { + + if (dli->dl_tag == tag && dli->dl_sb == sb) { + return dli; + } + } + return NULL; +} + + +struct dl_info *locate_dl_info(struct super_block *sb, tag_t tag) +{ + struct dl_info *dli; + + rcu_read_lock(); + dli = get_dl_info(__lookup_dl_info(sb, tag)); + vxdprintk(VXD_CBIT(dlim, 7), + "locate_dl_info(%p,#%d) = %p", sb, tag, dli); + rcu_read_unlock(); + return dli; +} + +void rcu_free_dl_info(struct rcu_head *head) +{ + struct dl_info *dli = container_of(head, struct dl_info, dl_rcu); + int usecnt, refcnt; + + BUG_ON(!dli || !head); + + usecnt = atomic_read(&dli->dl_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&dli->dl_refcnt); + BUG_ON(refcnt < 0); + + vxdprintk(VXD_CBIT(dlim, 3), + "rcu_free_dl_info(%p)", dli); + if (!usecnt) + __dealloc_dl_info(dli); + else + printk("!!! rcu didn't free\n"); +} + + + + +static int do_addrem_dlimit(uint32_t id, const char __user *name, + uint32_t flags, int add) +{ + struct nameidata nd; + int ret; + + ret = user_path_walk_link(name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + if (add) { + dli = __alloc_dl_info(sb, id); + spin_lock(&dl_info_hash_lock); + + ret = -EEXIST; + if (__lookup_dl_info(sb, id)) + goto out_unlock; + __hash_dl_info(dli); + dli = NULL; + } else { + spin_lock(&dl_info_hash_lock); + dli = __lookup_dl_info(sb, id); + + ret = -ESRCH; + if (!dli) + goto out_unlock; + __unhash_dl_info(dli); + } + ret = 0; + out_unlock: + spin_unlock(&dl_info_hash_lock); + if (add && dli) + __dealloc_dl_info(dli); + out_release: + path_release(&nd); + } + return ret; +} + +int vc_add_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1); +} + +int vc_rem_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0); +} + +#ifdef CONFIG_COMPAT + +int vc_add_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0_x32 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, + compat_ptr(vc_data.name_ptr), vc_data.flags, 1); +} + +int vc_rem_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0_x32 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, + compat_ptr(vc_data.name_ptr), vc_data.flags, 0); +} + +#endif /* CONFIG_COMPAT */ + + +static inline +int do_set_dlimit(uint32_t id, const char __user *name, + uint32_t space_used, uint32_t space_total, + uint32_t inodes_used, uint32_t inodes_total, + uint32_t reserved, uint32_t flags) +{ + struct nameidata nd; + int ret; + + ret = user_path_walk_link(name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + if ((reserved != CDLIM_KEEP && + reserved > 100) || + (inodes_used != CDLIM_KEEP && + inodes_used > inodes_total) || + (space_used != CDLIM_KEEP && + space_used > space_total)) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + + if (inodes_used != CDLIM_KEEP) + dli->dl_inodes_used = inodes_used; + if (inodes_total != CDLIM_KEEP) + dli->dl_inodes_total = inodes_total; + if (space_used != CDLIM_KEEP) { + dli->dl_space_used = space_used; + dli->dl_space_used <<= 10; + } + if (space_total == CDLIM_INFINITY) + dli->dl_space_total = DLIM_INFINITY; + else if (space_total != CDLIM_KEEP) { + dli->dl_space_total = space_total; + dli->dl_space_total <<= 10; + } + if (reserved != CDLIM_KEEP) + dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100; + + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = 0; + + out_release: + path_release(&nd); + } + return ret; +} + +int vc_set_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_dlimit(id, vc_data.name, + vc_data.space_used, vc_data.space_total, + vc_data.inodes_used, vc_data.inodes_total, + vc_data.reserved, vc_data.flags); +} + +#ifdef CONFIG_COMPAT + +int vc_set_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0_x32 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_dlimit(id, compat_ptr(vc_data.name_ptr), + vc_data.space_used, vc_data.space_total, + vc_data.inodes_used, vc_data.inodes_total, + vc_data.reserved, vc_data.flags); +} + +#endif /* CONFIG_COMPAT */ + + +static inline +int do_get_dlimit(uint32_t id, const char __user *name, + uint32_t *space_used, uint32_t *space_total, + uint32_t *inodes_used, uint32_t *inodes_total, + uint32_t *reserved, uint32_t *flags) +{ + struct nameidata nd; + int ret; + + ret = user_path_walk_link(name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + *inodes_used = dli->dl_inodes_used; + *inodes_total = dli->dl_inodes_total; + *space_used = dli->dl_space_used >> 10; + if (dli->dl_space_total == DLIM_INFINITY) + *space_total = CDLIM_INFINITY; + else + *space_total = dli->dl_space_total >> 10; + + *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10); + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = -EFAULT; + + ret = 0; + out_release: + path_release(&nd); + } + return ret; +} + + +int vc_get_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0 vc_data; + int ret; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_dlimit(id, vc_data.name, + &vc_data.space_used, &vc_data.space_total, + &vc_data.inodes_used, &vc_data.inodes_total, + &vc_data.reserved, &vc_data.flags); + if (ret) + return ret; + + if (copy_to_user(data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +#ifdef CONFIG_COMPAT + +int vc_get_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0_x32 vc_data; + int ret; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr), + &vc_data.space_used, &vc_data.space_total, + &vc_data.inodes_used, &vc_data.inodes_total, + &vc_data.reserved, &vc_data.flags); + if (ret) + return ret; + + if (copy_to_user(data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +#endif /* CONFIG_COMPAT */ + + +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct dl_info *dli; + __u64 blimit, bfree, bavail; + __u32 ifree; + + dli = locate_dl_info(sb, dx_current_tag()); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY) + goto no_ilim; + + /* reduce max inodes available to limit */ + if (buf->f_files > dli->dl_inodes_total) + buf->f_files = dli->dl_inodes_total; + + ifree = dli->dl_inodes_total - dli->dl_inodes_used; + /* reduce free inodes to min */ + if (ifree < buf->f_ffree) + buf->f_ffree = ifree; + +no_ilim: + if (dli->dl_space_total == DLIM_INFINITY) + goto no_blim; + + blimit = dli->dl_space_total >> sb->s_blocksize_bits; + + if (dli->dl_space_total < dli->dl_space_used) + bfree = 0; + else + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + + bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult); + if (bavail < dli->dl_space_used) + bavail = 0; + else + bavail = (bavail - dli->dl_space_used) + >> sb->s_blocksize_bits; + + /* reduce max space available to limit */ + if (buf->f_blocks > blimit) + buf->f_blocks = blimit; + + /* reduce free space to min */ + if (bfree < buf->f_bfree) + buf->f_bfree = bfree; + + /* reduce avail space to min */ + if (bavail < buf->f_bavail) + buf->f_bavail = bavail; + +no_blim: + spin_unlock(&dli->dl_lock); + put_dl_info(dli); + + return; +} + +#include + +EXPORT_SYMBOL_GPL(locate_dl_info); +EXPORT_SYMBOL_GPL(rcu_free_dl_info); + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/helper.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/helper.c --- linux-2.6.19.1/kernel/vserver/helper.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/helper.c 2006-11-08 04:57:47 +0100 @@ -0,0 +1,208 @@ +/* + * linux/kernel/vserver/helper.c + * + * Virtual Context Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic helper + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +char vshelper_path[255] = "/sbin/vshelper"; + + +static int do_vshelper(char *name, char *argv[], char *envp[], int sync) +{ + int ret; + + if ((ret = call_usermodehelper(name, argv, envp, sync))) { + printk( KERN_WARNING + "%s: (%s %s) returned %s with %d\n", + name, argv[1], argv[2], + sync?"sync":"async", ret); + } + vxdprintk(VXD_CBIT(switch, 4), + "%s: (%s %s) returned %s with %d", + name, argv[1], argv[2], sync?"sync":"async", ret); + return ret; +} + +/* + * vshelper path is set via /proc/sys + * invoked by vserver sys_reboot(), with + * the following arguments + * + * argv [0] = vshelper_path; + * argv [1] = action: "restart", "halt", "poweroff", ... + * argv [2] = context identifier + * + * envp [*] = type-specific parameters + */ + +long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg) +{ + char id_buf[8], cmd_buf[16]; + char uid_buf[16], pid_buf[16]; + int ret; + + char *argv[] = {vshelper_path, NULL, id_buf, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + uid_buf, pid_buf, cmd_buf, 0}; + + if (vx_info_state(vxi, VXS_HELPER)) + return -EAGAIN; + vxi->vx_state |= VXS_HELPER; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id); + + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid); + snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid); + + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + argv[1] = "restart"; + break; + + case LINUX_REBOOT_CMD_HALT: + argv[1] = "halt"; + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + argv[1] = "poweroff"; + break; + + case LINUX_REBOOT_CMD_SW_SUSPEND: + argv[1] = "swsusp"; + break; + + default: + vxi->vx_state &= ~VXS_HELPER; + return 0; + } + +#ifndef CONFIG_VSERVER_LEGACY + ret = do_vshelper(vshelper_path, argv, envp, 1); +#else + ret = do_vshelper(vshelper_path, argv, envp, 0); +#endif + vxi->vx_state &= ~VXS_HELPER; + __wakeup_vx_info(vxi); + return (ret) ? -EPERM : 0; +} + + +long vs_reboot(unsigned int cmd, void __user * arg) +{ + struct vx_info *vxi = current->vx_info; + long ret = 0; + + vxdprintk(VXD_CBIT(misc, 5), + "vs_reboot(%p[#%d],%d)", + vxi, vxi?vxi->vx_id:0, cmd); + + ret = vs_reboot_helper(vxi, cmd, arg); + if (ret) + return ret; + + vxi->reboot_cmd = cmd; + if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) { + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + case LINUX_REBOOT_CMD_HALT: + case LINUX_REBOOT_CMD_POWER_OFF: + vx_info_kill(vxi, 0, SIGKILL); + vx_info_kill(vxi, 1, SIGKILL); + default: + break; + } + } + return 0; +} + + +/* + * argv [0] = vshelper_path; + * argv [1] = action: "startup", "shutdown" + * argv [2] = context identifier + * + * envp [*] = type-specific parameters + */ + +long vs_state_change(struct vx_info *vxi, unsigned int cmd) +{ + char id_buf[8], cmd_buf[16]; + char *argv[] = {vshelper_path, NULL, id_buf, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0}; + + if (!vx_info_flags(vxi, VXF_SC_HELPER, 0)) + return 0; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id); + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + + switch (cmd) { + case VSC_STARTUP: + argv[1] = "startup"; + break; + case VSC_SHUTDOWN: + argv[1] = "shutdown"; + break; + default: + return 0; + } + + return do_vshelper(vshelper_path, argv, envp, 1); +} + + +/* + * argv [0] = vshelper_path; + * argv [1] = action: "netup", "netdown" + * argv [2] = context identifier + * + * envp [*] = type-specific parameters + */ + +long vs_net_change(struct nx_info *nxi, unsigned int cmd) +{ + char id_buf[8], cmd_buf[16]; + char *argv[] = {vshelper_path, NULL, id_buf, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0}; + + if (!nx_info_flags(nxi, NXF_SC_HELPER, 0)) + return 0; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", nxi->nx_id); + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + + switch (cmd) { + case VSC_NETUP: + argv[1] = "netup"; + break; + case VSC_NETDOWN: + argv[1] = "netdown"; + break; + default: + return 0; + } + + return do_vshelper(vshelper_path, argv, envp, 1); +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/history.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/history.c --- linux-2.6.19.1/kernel/vserver/history.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/history.c 2006-11-30 18:53:18 +0100 @@ -0,0 +1,264 @@ +/* + * kernel/vserver/history.c + * + * Virtual Context History Backtrace + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic structure + * V0.02 hash/unhash and trace + * V0.03 preemption fixes + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + + +#ifdef CONFIG_VSERVER_HISTORY +#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE +#else +#define VXH_SIZE 64 +#endif + +struct _vx_history { + unsigned int counter; + + struct _vx_hist_entry entry[VXH_SIZE+1]; +}; + + +DEFINE_PER_CPU(struct _vx_history, vx_history_buffer); + +unsigned volatile int vxh_active = 1; + +static atomic_t sequence = ATOMIC_INIT(0); + + +/* vxh_advance() + + * requires disabled preemption */ + +struct _vx_hist_entry *vxh_advance(void *loc) +{ + unsigned int cpu = smp_processor_id(); + struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu); + struct _vx_hist_entry *entry; + unsigned int index; + + index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE; + entry = &hist->entry[index]; + + entry->seq = atomic_inc_return(&sequence); + entry->loc = loc; + return entry; +} + +EXPORT_SYMBOL_GPL(vxh_advance); + + +#define VXH_LOC_FMTS "(#%04x,*%d):%p" + +#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc + + +#define VXH_VXI_FMTS "%p[#%d,%d.%d]" + +#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \ + (e)->vxi.ptr?(e)->vxi.xid:0, \ + (e)->vxi.ptr?(e)->vxi.usecnt:0, \ + (e)->vxi.ptr?(e)->vxi.tasks:0 + +void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu) +{ + switch (e->type) { + case VXH_THROW_OOPS: + printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e)); + break; + + case VXH_GET_VX_INFO: + case VXH_PUT_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_GET_VX_INFO)?"get":"put", + VXH_VXI_ARGS(e)); + break; + + case VXH_INIT_VX_INFO: + case VXH_SET_VX_INFO: + case VXH_CLR_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n", + VXH_LOC_ARGS(e), + (e->type==VXH_INIT_VX_INFO)?"init": + ((e->type==VXH_SET_VX_INFO)?"set":"clr"), + VXH_VXI_ARGS(e), e->sc.data); + break; + + case VXH_CLAIM_VX_INFO: + case VXH_RELEASE_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n", + VXH_LOC_ARGS(e), + (e->type==VXH_CLAIM_VX_INFO)?"claim":"release", + VXH_VXI_ARGS(e), e->sc.data); + break; + + case VXH_ALLOC_VX_INFO: + case VXH_DEALLOC_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_ALLOC_VX_INFO)?"alloc":"dealloc", + VXH_VXI_ARGS(e)); + break; + + case VXH_HASH_VX_INFO: + case VXH_UNHASH_VX_INFO: + printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_HASH_VX_INFO)?"hash":"unhash", + VXH_VXI_ARGS(e)); + break; + + case VXH_LOC_VX_INFO: + case VXH_LOOKUP_VX_INFO: + case VXH_CREATE_VX_INFO: + printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_CREATE_VX_INFO)?"create": + ((e->type==VXH_LOC_VX_INFO)?"loc":"lookup"), + e->ll.arg, VXH_VXI_ARGS(e)); + break; + } +} + +static void __vxh_dump_history(void) +{ + unsigned int i, cpu; + + printk("History:\tSEQ: %8x\tNR_CPUS: %d\n", + atomic_read(&sequence), NR_CPUS); + + for (i=0; i < VXH_SIZE; i++) { + for_each_online_cpu(cpu) { + struct _vx_history *hist = + &per_cpu(vx_history_buffer, cpu); + unsigned int index = (hist->counter-i) % VXH_SIZE; + struct _vx_hist_entry *entry = &hist->entry[index]; + + vxh_dump_entry(entry, cpu); + } + } +} + +void vxh_dump_history(void) +{ + vxh_active = 0; +#ifdef CONFIG_SMP + local_irq_enable(); + smp_send_stop(); + local_irq_disable(); +#endif + __vxh_dump_history(); +} + + +/* vserver syscall commands below here */ + + +int vc_dump_history(uint32_t id) +{ + vxh_active = 0; + __vxh_dump_history(); + vxh_active = 1; + + return 0; +} + + +int do_read_history(struct __user _vx_hist_entry *data, + int cpu, uint32_t *index, uint32_t *count) +{ + int pos, ret = 0; + struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu); + int end = hist->counter; + int start = end - VXH_SIZE + 2; + int idx = *index; + + /* special case: get current pos */ + if (!*count) { + *index = end; + return 0; + } + + /* have we lost some data? */ + if (idx < start) + idx = start; + + for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) { + struct _vx_hist_entry *entry = + &hist->entry[idx % VXH_SIZE]; + + /* send entry to userspace */ + ret = copy_to_user (&data[pos], entry, sizeof(*entry)); + if (ret) + break; + } + /* save new index and count */ + *index = idx; + *count = pos; + return ret ? ret : (*index < end); +} + +int vc_read_history(uint32_t id, void __user *data) +{ + struct vcmd_read_history_v0 vc_data; + int ret; + + if (id >= NR_CPUS) + return -EINVAL; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data, + id, &vc_data.index, &vc_data.count); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT + +int vc_read_history_x32(uint32_t id, void __user *data) +{ + struct vcmd_read_history_v0_x32 vc_data; + int ret; + + if (id >= NR_CPUS) + return -EINVAL; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_read_history((struct __user _vx_hist_entry *) + compat_ptr(vc_data.data_ptr), + id, &vc_data.index, &vc_data.count); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return ret; +} + +#endif /* CONFIG_COMPAT */ + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/init.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/init.c --- linux-2.6.19.1/kernel/vserver/init.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/init.c 2006-12-10 23:00:38 +0100 @@ -0,0 +1,47 @@ +/* + * linux/kernel/init.c + * + * Virtual Server Init + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include + +int vserver_register_sysctl(void); +void vserver_unregister_sysctl(void); + + +static int __init init_vserver(void) +{ + int ret = 0; + +#ifdef CONFIG_VSERVER_DEBUG + vserver_register_sysctl(); +#endif + return ret; +} + + +static void __exit exit_vserver(void) +{ + +#ifdef CONFIG_VSERVER_DEBUG + vserver_unregister_sysctl(); +#endif + return; +} + +/* FIXME: GFP_ZONETYPES gone +long vx_slab[GFP_ZONETYPES]; */ +long vx_area; + + +module_init(init_vserver); +module_exit(exit_vserver); + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/inode.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/inode.c --- linux-2.6.19.1/kernel/vserver/inode.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/inode.c 2006-11-30 19:42:07 +0100 @@ -0,0 +1,369 @@ +/* + * linux/kernel/vserver/inode.c + * + * Virtual Server: File System Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 separated from vcontext V0.05 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask) +{ + struct proc_dir_entry *entry; + + if (!in || !in->i_sb) + return -ESRCH; + + *flags = IATTR_TAG + | (IS_BARRIER(in) ? IATTR_BARRIER : 0) + | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0) + | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0); + *mask = IATTR_IUNLINK | IATTR_IMMUTABLE; + + if (S_ISDIR(in->i_mode)) + *mask |= IATTR_BARRIER; + + if (IS_TAGGED(in)) { + *tag = in->i_tag; + *mask |= IATTR_TAG; + } + + switch (in->i_sb->s_magic) { + case PROC_SUPER_MAGIC: + entry = PROC_I(in)->pde; + + /* check for specific inodes? */ + if (entry) + *mask |= IATTR_FLAGS; + if (entry) + *flags |= (entry->vx_flags & IATTR_FLAGS); + else + *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS); + break; + + case DEVPTS_SUPER_MAGIC: + *tag = in->i_tag; + *mask |= IATTR_TAG; + break; + + default: + break; + } + return 0; +} + +int vc_get_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data = { .xid = -1 }; + int ret; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_get_iattr(nd.dentry->d_inode, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT + +int vc_get_iattr_x32(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1_x32 vc_data = { .xid = -1 }; + int ret; + + if (!vx_check(0, VS_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd); + if (!ret) { + ret = __vc_get_iattr(nd.dentry->d_inode, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#endif /* CONFIG_COMPAT */ + + +static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask) +{ + struct inode *in = de->d_inode; + int error = 0, is_proc = 0, has_tag = 0; + struct iattr attr = { 0 }; + + if (!in || !in->i_sb) + return -ESRCH; + + is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC); + if ((*mask & IATTR_FLAGS) && !is_proc) + return -EINVAL; + + has_tag = IS_TAGGED(in) || + (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC); + if ((*mask & IATTR_TAG) && !has_tag) + return -EINVAL; + + mutex_lock(&in->i_mutex); + if (*mask & IATTR_TAG) { + attr.ia_tag = *tag; + attr.ia_valid |= ATTR_TAG; + } + + if (*mask & IATTR_FLAGS) { + struct proc_dir_entry *entry = PROC_I(in)->pde; + unsigned int iflags = PROC_I(in)->vx_flags; + + iflags = (iflags & ~(*mask & IATTR_FLAGS)) + | (*flags & IATTR_FLAGS); + PROC_I(in)->vx_flags = iflags; + if (entry) + entry->vx_flags = iflags; + } + + if (*mask & (IATTR_BARRIER | IATTR_IUNLINK | IATTR_IMMUTABLE)) { + if (*mask & IATTR_IMMUTABLE) { + if (*flags & IATTR_IMMUTABLE) + in->i_flags |= S_IMMUTABLE; + else + in->i_flags &= ~S_IMMUTABLE; + } + if (*mask & IATTR_IUNLINK) { + if (*flags & IATTR_IUNLINK) + in->i_flags |= S_IUNLINK; + else + in->i_flags &= ~S_IUNLINK; + } + if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) { + if (*flags & IATTR_BARRIER) + in->i_flags |= S_BARRIER; + else + in->i_flags &= ~S_BARRIER; + } + if (in->i_op && in->i_op->sync_flags) { + error = in->i_op->sync_flags(in); + if (error) + goto out; + } + } + + if (attr.ia_valid) { + if (in->i_op && in->i_op->setattr) + error = in->i_op->setattr(de, &attr); + else { + error = inode_change_ok(in, &attr); + if (!error) + error = inode_setattr(in, &attr); + } + } + +out: + mutex_unlock(&in->i_mutex); + return error; +} + +int vc_set_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data; + int ret; + + if (!capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_set_iattr(nd.dentry, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT + +int vc_set_iattr_x32(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1_x32 vc_data; + int ret; + + if (!capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd); + if (!ret) { + ret = __vc_set_iattr(nd.dentry, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#endif /* CONFIG_COMPAT */ + +#ifdef CONFIG_VSERVER_LEGACY + +#define PROC_DYNAMIC_FIRST 0xF0000000UL + +int vx_proc_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + struct proc_dir_entry *entry; + int error = 0; + int flags; + + if (inode->i_ino < PROC_DYNAMIC_FIRST) + return -ENOTTY; + + entry = PROC_I(inode)->pde; + if (!entry) + return -ENOTTY; + + switch(cmd) { + case FIOC_GETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + flags = entry->vx_flags; + if (capable(CAP_CONTEXT)) + error = put_user(flags, (int __user *) arg); + break; + } + case FIOC_SETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -EFAULT; + if (get_user(flags, (int __user *) arg)) + break; + error = 0; + entry->vx_flags = flags; + break; + } + default: + return -ENOTTY; + } + return error; +} +#endif /* CONFIG_VSERVER_LEGACY */ + +#ifdef CONFIG_PROPAGATE + +int dx_parse_tag(char *string, tag_t *tag, int remove) +{ + static match_table_t tokens = { + {1, "tagid=%u"}, + {0, NULL} + }; + substring_t args[MAX_OPT_ARGS]; + int token, option = 0; + + if (!string) + return 0; + + token = match_token(string, tokens, args); + if (token && tag && !match_int(args, &option)) + *tag = option; + + vxdprintk(VXD_CBIT(tag, 7), + "dx_parse_tag(»%s«): %d:#%d", + string, token, option); + + if ((token == 1) && remove) { + char *p = strstr(string, "tagid="); + char *q = p; + + if (p) { + while (*q != '\0' && *q != ',') + q++; + while (*q) + *p++ = *q++; + while (*p) + *p++ = '\0'; + } + } + return token; +} + +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode) +{ + tag_t new_tag = 0; + struct vfsmount *mnt; + int propagate; + + if (!nd) + return; + mnt = nd->mnt; + if (!mnt) + return; + + propagate = (mnt->mnt_flags & MNT_TAGID); + if (propagate) + new_tag = mnt->mnt_tag; + + vxdprintk(VXD_CBIT(tag, 7), + "dx_propagate_tag(%p[#%lu.%d]): %d,%d", + inode, inode->i_ino, inode->i_tag, + new_tag, (propagate)?1:0); + + if (propagate) + inode->i_tag = new_tag; +} + +#include + +EXPORT_SYMBOL_GPL(__dx_propagate_tag); + +#endif /* CONFIG_PROPAGATE */ + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/legacy.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/legacy.c --- linux-2.6.19.1/kernel/vserver/legacy.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/legacy.c 2006-12-05 18:18:31 +0100 @@ -0,0 +1,115 @@ +/* + * linux/kernel/vserver/legacy.c + * + * Virtual Server: Legacy Funtions + * + * Copyright (C) 2001-2003 Jacques Gelinas + * Copyright (C) 2003-2006 Herbert Pötzl + * + * V0.01 broken out from vcontext.c V0.05 + * V0.02 updated to spaces *sigh* + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +extern int vx_set_init(struct vx_info *, struct task_struct *); + +static int vx_set_initpid(struct vx_info *vxi, int pid) +{ + struct task_struct *init; + + init = find_task_by_real_pid(pid); + if (!init) + return -ESRCH; + return vx_set_init(vxi, init); +} + +int vc_new_s_context(uint32_t ctx, void __user *data) +{ + int ret = -ENOMEM; + struct vcmd_new_s_context_v1 vc_data; + struct vx_info *new_vxi; + + if (copy_from_user(&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* legacy hack, will be removed soon */ + if (ctx == -2) { + /* assign flags and initpid */ + if (!current->vx_info) + return -EINVAL; + ret = 0; + if (vc_data.flags & VX_INFO_INIT) + ret = vx_set_initpid(current->vx_info, current->tgid); + if (ret == 0) { + /* We keep the same vx_id, but lower the capabilities */ + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + ret = vx_current_xid(); + current->vx_info->vx_flags |= vc_data.flags; + } + return ret; + } + + if (!vx_check(0, VS_ADMIN) || !capable(CAP_SYS_ADMIN) + /* might make sense in the future, or not ... */ + || vx_flags(VX_INFO_PRIVATE, 0)) + return -EPERM; + + /* ugly hack for Spectator */ + if (ctx == 1) { + current->xid = 1; + return 0; + } + + if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) || + (ctx == 0)) + return -EINVAL; + + if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT)) + new_vxi = lookup_or_create_vx_info(ctx); + else + new_vxi = lookup_vx_info(ctx); + + if (!new_vxi) + return -EINVAL; + + ret = -EPERM; + if (!vx_info_flags(new_vxi, VXF_STATE_SETUP, 0) && + vx_info_flags(new_vxi, VX_INFO_PRIVATE, 0)) + goto out_put; + + new_vxi->vx_flags &= ~VXF_STATE_SETUP; + + ret = vx_migrate_task(current, new_vxi, 1); + if (ret == 0) { + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + new_vxi->vx_flags |= vc_data.flags; + if (vc_data.flags & VX_INFO_INIT) + vx_set_initpid(new_vxi, current->tgid); + /* FIXME: nsproxy + if (vc_data.flags & VX_INFO_NAMESPACE) + vx_set_namespace(new_vxi, + current->namespace, current->fs); */ + if (vc_data.flags & VX_INFO_NPROC) + __rlim_set(&new_vxi->limit, RLIMIT_NPROC, + current->signal->rlim[RLIMIT_NPROC].rlim_max); + + /* tweak some defaults for legacy */ + new_vxi->vx_flags |= (VXF_HIDE_NETIF|VXF_INFO_INIT); + ret = new_vxi->vx_id; + } +out_put: + put_vx_info(new_vxi); + return ret; +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/legacynet.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/legacynet.c --- linux-2.6.19.1/kernel/vserver/legacynet.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/legacynet.c 2006-11-08 04:57:49 +0100 @@ -0,0 +1,84 @@ + +/* + * linux/kernel/vserver/legacynet.c + * + * Virtual Server: Legacy Network Funtions + * + * Copyright (C) 2001-2003 Jacques Gelinas + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from legacy.c + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +extern struct nx_info *create_nx_info(void); + +/* set ipv4 root (syscall) */ + +int vc_set_ipv4root(uint32_t nbip, void __user *data) +{ + int i, err = -EPERM; + struct vcmd_set_ipv4root_v3 vc_data; + struct nx_info *new_nxi, *nxi = current->nx_info; + + if (nbip < 0 || nbip > NB_IPV4ROOT) + return -EINVAL; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN)) + /* We are allowed to change everything */ + err = 0; + else if (nxi) { + int found = 0; + + /* We are allowed to select a subset of the currently + installed IP numbers. No new one are allowed + We can't change the broadcast address though */ + for (i=0; inbipv4; j++) { + if (nxip == nxi->ipv4[j]) { + found++; + break; + } + } + } + if ((found == nbip) && + (vc_data.broadcast == nxi->v4_bcast)) + err = 0; + } + if (err) + return err; + + new_nxi = create_nx_info(); + if (IS_ERR(new_nxi)) + return -EINVAL; + + new_nxi->nbipv4 = nbip; + for (i=0; iipv4[i] = vc_data.nx_mask_pair[i].ip; + new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask; + } + new_nxi->v4_bcast = vc_data.broadcast; + if (nxi) + printk("!!! switching nx_info %p->%p\n", nxi, new_nxi); + + nx_migrate_task(current, new_nxi); + put_nx_info(new_nxi); + return 0; +} + + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/limit.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/limit.c --- linux-2.6.19.1/kernel/vserver/limit.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/limit.c 2006-11-30 19:32:53 +0100 @@ -0,0 +1,317 @@ +/* + * linux/kernel/vserver/limit.c + * + * Virtual Server: Context Limits + * + * Copyright (C) 2004-2006 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * V0.02 changed vcmds to vxi arg + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +const char *vlimit_name[NUM_LIMITS] = { + [RLIMIT_CPU] = "CPU", + [RLIMIT_RSS] = "RSS", + [RLIMIT_NPROC] = "NPROC", + [RLIMIT_NOFILE] = "NOFILE", + [RLIMIT_MEMLOCK] = "VML", + [RLIMIT_AS] = "VM", + [RLIMIT_LOCKS] = "LOCKS", + [RLIMIT_SIGPENDING] = "SIGP", + [RLIMIT_MSGQUEUE] = "MSGQ", + + [VLIMIT_NSOCK] = "NSOCK", + [VLIMIT_OPENFD] = "OPENFD", + [VLIMIT_ANON] = "ANON", + [VLIMIT_SHMEM] = "SHMEM", + [VLIMIT_DENTRY] = "DENTRY", +}; + +EXPORT_SYMBOL_GPL(vlimit_name); + +#define MASK_ENTRY(x) (1 << (x)) + +const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = { + /* minimum */ + 0 + , /* softlimit */ + MASK_ENTRY( RLIMIT_RSS ) | + MASK_ENTRY( VLIMIT_ANON ) | + 0 + , /* maximum */ + MASK_ENTRY( RLIMIT_RSS ) | + MASK_ENTRY( RLIMIT_NPROC ) | + MASK_ENTRY( RLIMIT_NOFILE ) | + MASK_ENTRY( RLIMIT_MEMLOCK ) | + MASK_ENTRY( RLIMIT_AS ) | + MASK_ENTRY( RLIMIT_LOCKS ) | + MASK_ENTRY( RLIMIT_MSGQUEUE ) | + + MASK_ENTRY( VLIMIT_NSOCK ) | + MASK_ENTRY( VLIMIT_OPENFD ) | + MASK_ENTRY( VLIMIT_ANON ) | + MASK_ENTRY( VLIMIT_SHMEM ) | + MASK_ENTRY( VLIMIT_DENTRY ) | + 0 +}; + /* accounting only */ +uint32_t account_mask = + MASK_ENTRY( VLIMIT_SEMARY ) | + MASK_ENTRY( VLIMIT_NSEMS ) | + 0; + + +static int is_valid_vlimit(int id) +{ + uint32_t mask = vlimit_mask.minimum | + vlimit_mask.softlimit | vlimit_mask.maximum; + return mask & (1 << id); +} + +static int is_accounted_vlimit(int id) +{ + if (is_valid_vlimit(id)) + return 1; + return account_mask & (1 << id); +} + + +static inline uint64_t vc_get_soft(struct vx_info *vxi, int id) +{ + rlim_t limit = __rlim_soft(&vxi->limit, id); + return VX_VLIM(limit); +} + +static inline uint64_t vc_get_hard(struct vx_info *vxi, int id) +{ + rlim_t limit = __rlim_hard(&vxi->limit, id); + return VX_VLIM(limit); +} + +static int do_get_rlimit(struct vx_info *vxi, uint32_t id, + uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum) +{ + if (!is_valid_vlimit(id)) + return -EINVAL; + + if (minimum) + *minimum = CRLIM_UNSET; + if (softlimit) + *softlimit = vc_get_soft(vxi, id); + if (maximum) + *maximum = vc_get_hard(vxi, id); + return 0; +} + +int vc_get_rlimit(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_rlimit_v0 vc_data; + int ret; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_rlimit(vxi, vc_data.id, + &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum); + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +static int do_set_rlimit(struct vx_info *vxi, uint32_t id, + uint64_t minimum, uint64_t softlimit, uint64_t maximum) +{ + if (!is_valid_vlimit(id)) + return -EINVAL; + + if (maximum != CRLIM_KEEP) + __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum); + if (softlimit != CRLIM_KEEP) + __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit); + + /* clamp soft limit */ + if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id)) + __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id); + + return 0; +} + +int vc_set_rlimit(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_rlimit_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_rlimit(vxi, vc_data.id, + vc_data.minimum, vc_data.softlimit, vc_data.maximum); +} + +#ifdef CONFIG_IA32_EMULATION + +int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_rlimit_v0_x32 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_rlimit(vxi, vc_data.id, + vc_data.minimum, vc_data.softlimit, vc_data.maximum); +} + +int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_rlimit_v0_x32 vc_data; + int ret; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_rlimit(vxi, vc_data.id, + &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum); + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +#endif /* CONFIG_IA32_EMULATION */ + + +int vc_get_rlimit_mask(uint32_t id, void __user *data) +{ + if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask))) + return -EFAULT; + return 0; +} + + +static inline void vx_reset_minmax(struct _vx_limit *limit) +{ + rlim_t value; + int lim; + + for (lim=0; limlimit); + return 0; +} + + +int vc_rlimit_stat(struct vx_info *vxi, void __user *data) +{ + struct vcmd_rlimit_stat_v0 vc_data; + struct _vx_limit *limit = &vxi->limit; + int id; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + id = vc_data.id; + if (!is_accounted_vlimit(id)) + return -EINVAL; + + vc_data.hits = atomic_read(&__rlim_lhit(limit, id)); + vc_data.value = __rlim_get(limit, id); + vc_data.minimum = __rlim_rmin(limit, id); + vc_data.maximum = __rlim_rmax(limit, id); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +void vx_vsi_meminfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long totalram, freeram; + rlim_t v; + + /* we blindly accept the max */ + v = __rlim_soft(&vxi->limit, RLIMIT_RSS); + totalram = (v != RLIM_INFINITY) ? v : val->totalram; + + /* total minus used equals free */ + v = __vx_cres_array_fixup(&vxi->limit, VLA_RSS); + freeram = (v < totalram) ? totalram - v : 0; + + val->totalram = totalram; + val->freeram = freeram; + val->bufferram = 0; + val->totalhigh = 0; + val->freehigh = 0; + return; +} + +void vx_vsi_swapinfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long totalswap, freeswap; + rlim_t v, w; + + v = __rlim_soft(&vxi->limit, RLIMIT_RSS); + if (v == RLIM_INFINITY) { + val->freeswap = val->totalswap; + return; + } + + /* we blindly accept the max */ + w = __rlim_hard(&vxi->limit, RLIMIT_RSS); + totalswap = (w != RLIM_INFINITY) ? (w - v) : val->totalswap; + + /* currently 'used' swap */ + w = __vx_cres_array_fixup(&vxi->limit, VLA_RSS); + w -= (w > v) ? v : w; + + /* total minus used equals free */ + freeswap = (w < totalswap) ? totalswap - w : 0; + + val->totalswap = totalswap; + val->freeswap = freeswap; + return; +} + + +unsigned long vx_badness(struct task_struct *task, struct mm_struct *mm) +{ + struct vx_info *vxi = mm->mm_vx_info; + unsigned long points; + rlim_t v, w; + + if (!vxi) + return 0; + + v = __vx_cres_array_fixup(&vxi->limit, VLA_RSS); + w = __rlim_soft(&vxi->limit, RLIMIT_RSS); + points = (v > w) ? (v - w) : 0; + + return points; +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/limit_init.h linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/limit_init.h --- linux-2.6.19.1/kernel/vserver/limit_init.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/limit_init.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,33 @@ + + +static inline void vx_info_init_limit(struct _vx_limit *limit) +{ + int lim; + + for (lim=0; lim + +static inline void vx_limit_fixup(struct _vx_limit *limit) +{ + rlim_t value; + int res; + + /* complex resources first */ + __vx_cres_array_fixup(limit, VLA_RSS); + + for (res=0; res __rlim_hard(limit, res)) + __rlim_rmax(limit, res) = __rlim_hard(limit, res); + } +} + + +#define VX_LIMIT_FMT ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n" +#define VX_LIMIT_TOP \ + "Limit\t current\t min/max\t\t soft/hard\t\thits\n" + +#define VX_LIMIT_ARG(r) \ + ,(unsigned long)__rlim_get(limit, r) \ + ,(unsigned long)__rlim_rmin(limit, r) \ + ,(unsigned long)__rlim_rmax(limit, r) \ + ,VX_VLIM(__rlim_soft(limit, r)) \ + ,VX_VLIM(__rlim_hard(limit, r)) \ + ,atomic_read(&__rlim_lhit(limit, r)) + +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer) +{ + vx_limit_fixup(limit); + return sprintf(buffer, VX_LIMIT_TOP + "PROC" VX_LIMIT_FMT + "VM" VX_LIMIT_FMT + "VML" VX_LIMIT_FMT + "RSS" VX_LIMIT_FMT + "ANON" VX_LIMIT_FMT + "RMAP" VX_LIMIT_FMT + "FILES" VX_LIMIT_FMT + "OFD" VX_LIMIT_FMT + "LOCKS" VX_LIMIT_FMT + "SOCK" VX_LIMIT_FMT + "MSGQ" VX_LIMIT_FMT + "SHM" VX_LIMIT_FMT + "SEMA" VX_LIMIT_FMT + "SEMS" VX_LIMIT_FMT + "DENT" VX_LIMIT_FMT + VX_LIMIT_ARG(RLIMIT_NPROC) + VX_LIMIT_ARG(RLIMIT_AS) + VX_LIMIT_ARG(RLIMIT_MEMLOCK) + VX_LIMIT_ARG(RLIMIT_RSS) + VX_LIMIT_ARG(VLIMIT_ANON) + VX_LIMIT_ARG(VLIMIT_MAPPED) + VX_LIMIT_ARG(RLIMIT_NOFILE) + VX_LIMIT_ARG(VLIMIT_OPENFD) + VX_LIMIT_ARG(RLIMIT_LOCKS) + VX_LIMIT_ARG(VLIMIT_NSOCK) + VX_LIMIT_ARG(RLIMIT_MSGQUEUE) + VX_LIMIT_ARG(VLIMIT_SHMEM) + VX_LIMIT_ARG(VLIMIT_SEMARY) + VX_LIMIT_ARG(VLIMIT_NSEMS) + VX_LIMIT_ARG(VLIMIT_DENTRY) + ); +} + +#endif /* _VX_LIMIT_PROC_H */ + + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/monitor.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/monitor.c --- linux-2.6.19.1/kernel/vserver/monitor.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/monitor.c 2006-11-08 04:57:48 +0100 @@ -0,0 +1,142 @@ +/* + * kernel/vserver/monitor.c + * + * Virtual Context Scheduler Monitor + * + * Copyright (C) 2006 Herbert Pötzl + * + * V0.01 basic design + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + + +#ifdef CONFIG_VSERVER_MONITOR +#define VXM_SIZE CONFIG_VSERVER_MONITOR_SIZE +#else +#define VXM_SIZE 64 +#endif + +struct _vx_monitor { + unsigned int counter; + + struct _vx_mon_entry entry[VXM_SIZE+1]; +}; + + +DEFINE_PER_CPU(struct _vx_monitor, vx_monitor_buffer); + +unsigned volatile int vxm_active = 1; + +static atomic_t sequence = ATOMIC_INIT(0); + + +/* vxm_advance() + + * requires disabled preemption */ + +struct _vx_mon_entry *vxm_advance(int cpu) +{ + struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu); + struct _vx_mon_entry *entry; + unsigned int index; + + index = vxm_active ? (mon->counter++ % VXM_SIZE) : VXM_SIZE; + entry = &mon->entry[index]; + + entry->ev.seq = atomic_inc_return(&sequence); + entry->ev.jif = jiffies; + return entry; +} + +EXPORT_SYMBOL_GPL(vxm_advance); + + +int do_read_monitor(struct __user _vx_mon_entry *data, + int cpu, uint32_t *index, uint32_t *count) +{ + int pos, ret = 0; + struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu); + int end = mon->counter; + int start = end - VXM_SIZE + 2; + int idx = *index; + + /* special case: get current pos */ + if (!*count) { + *index = end; + return 0; + } + + /* have we lost some data? */ + if (idx < start) + idx = start; + + for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) { + struct _vx_mon_entry *entry = + &mon->entry[idx % VXM_SIZE]; + + /* send entry to userspace */ + ret = copy_to_user (&data[pos], entry, sizeof(*entry)); + if (ret) + break; + } + /* save new index and count */ + *index = idx; + *count = pos; + return ret ? ret : (*index < end); +} + +int vc_read_monitor(uint32_t id, void __user *data) +{ + struct vcmd_read_monitor_v0 vc_data; + int ret; + + if (id >= NR_CPUS) + return -EINVAL; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_read_monitor((struct __user _vx_mon_entry *)vc_data.data, + id, &vc_data.index, &vc_data.count); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT + +int vc_read_monitor_x32(uint32_t id, void __user *data) +{ + struct vcmd_read_monitor_v0_x32 vc_data; + int ret; + + if (id >= NR_CPUS) + return -EINVAL; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_read_monitor((struct __user _vx_mon_entry *) + compat_ptr(vc_data.data_ptr), + id, &vc_data.index, &vc_data.count); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return ret; +} + +#endif /* CONFIG_COMPAT */ + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/network.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/network.c --- linux-2.6.19.1/kernel/vserver/network.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/network.c 2006-12-06 21:03:02 +0100 @@ -0,0 +1,763 @@ +/* + * linux/kernel/vserver/network.c + * + * Virtual Server: Network Support + * + * Copyright (C) 2003-2006 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * V0.02 cleaned up implementation + * V0.03 added equiv nx commands + * V0.04 switch to RCU based hash + * V0.05 and back to locking again + * V0.06 changed vcmds to nxi arg + * + */ + +#include +#include +#include + +#include +#include +#include + + +atomic_t nx_global_ctotal = ATOMIC_INIT(0); +atomic_t nx_global_cactive = ATOMIC_INIT(0); + + +/* __alloc_nx_info() + + * allocate an initialized nx_info struct + * doesn't make it visible (hash) */ + +static struct nx_info *__alloc_nx_info(nid_t nid) +{ + struct nx_info *new = NULL; + + vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct nx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct nx_info)); + new->nx_id = nid; + INIT_HLIST_NODE(&new->nx_hlist); + atomic_set(&new->nx_usecnt, 0); + atomic_set(&new->nx_tasks, 0); + new->nx_state = 0; + + new->nx_flags = NXF_INIT_SET; + + /* rest of init goes here */ + + vxdprintk(VXD_CBIT(nid, 0), + "alloc_nx_info(%d) = %p", nid, new); + atomic_inc(&nx_global_ctotal); + return new; +} + +/* __dealloc_nx_info() + + * final disposal of nx_info */ + +static void __dealloc_nx_info(struct nx_info *nxi) +{ + vxdprintk(VXD_CBIT(nid, 0), + "dealloc_nx_info(%p)", nxi); + + nxi->nx_hlist.next = LIST_POISON1; + nxi->nx_id = -1; + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_tasks)); + + nxi->nx_state |= NXS_RELEASED; + kfree(nxi); + atomic_dec(&nx_global_ctotal); +} + +static void __shutdown_nx_info(struct nx_info *nxi) +{ + nxi->nx_state |= NXS_SHUTDOWN; + vs_net_change(nxi, VSC_NETDOWN); +} + +/* exported stuff */ + +void free_nx_info(struct nx_info *nxi) +{ + /* context shutdown is mandatory */ + BUG_ON(nxi->nx_state != NXS_SHUTDOWN); + + /* context must not be hashed */ + BUG_ON(nxi->nx_state & NXS_HASHED); + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_tasks)); + + __dealloc_nx_info(nxi); +} + + +/* hash table for nx_info hash */ + +#define NX_HASH_SIZE 13 + +struct hlist_head nx_info_hash[NX_HASH_SIZE]; + +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(nid_t nid) +{ + return (nid % NX_HASH_SIZE); +} + + + +/* __hash_nx_info() + + * add the nxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_nx_info(struct nx_info *nxi) +{ + struct hlist_head *head; + + vxd_assert_lock(&nx_info_hash_lock); + vxdprintk(VXD_CBIT(nid, 4), + "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id); + + /* context must not be hashed */ + BUG_ON(nx_info_state(nxi, NXS_HASHED)); + + nxi->nx_state |= NXS_HASHED; + head = &nx_info_hash[__hashval(nxi->nx_id)]; + hlist_add_head(&nxi->nx_hlist, head); + atomic_inc(&nx_global_cactive); +} + +/* __unhash_nx_info() + + * remove the nxi from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_nx_info(struct nx_info *nxi) +{ + vxd_assert_lock(&nx_info_hash_lock); + vxdprintk(VXD_CBIT(nid, 4), + "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id); + + /* context must be hashed */ + BUG_ON(!nx_info_state(nxi, NXS_HASHED)); + + nxi->nx_state &= ~NXS_HASHED; + hlist_del(&nxi->nx_hlist); + atomic_dec(&nx_global_cactive); +} + + +/* __lookup_nx_info() + + * requires the hash_lock to be held + * doesn't increment the nx_refcnt */ + +static inline struct nx_info *__lookup_nx_info(nid_t nid) +{ + struct hlist_head *head = &nx_info_hash[__hashval(nid)]; + struct hlist_node *pos; + struct nx_info *nxi; + + vxd_assert_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { + nxi = hlist_entry(pos, struct nx_info, nx_hlist); + + if (nxi->nx_id == nid) + goto found; + } + nxi = NULL; +found: + vxdprintk(VXD_CBIT(nid, 0), + "__lookup_nx_info(#%u): %p[#%u]", + nid, nxi, nxi?nxi->nx_id:0); + return nxi; +} + + +/* __nx_dynamic_id() + + * find unused dynamic nid + * requires the hash_lock to be held */ + +static inline nid_t __nx_dynamic_id(void) +{ + static nid_t seq = MAX_N_CONTEXT; + nid_t barrier = seq; + + vxd_assert_lock(&nx_info_hash_lock); + do { + if (++seq > MAX_N_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__lookup_nx_info(seq)) { + vxdprintk(VXD_CBIT(nid, 4), + "__nx_dynamic_id: [#%d]", seq); + return seq; + } + } while (barrier != seq); + return 0; +} + +/* __create_nx_info() + + * create the requested context + * get() and hash it */ + +static struct nx_info * __create_nx_info(int id) +{ + struct nx_info *new, *nxi = NULL; + + vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id); + + if (!(new = __alloc_nx_info(id))) + return ERR_PTR(-ENOMEM); + + /* required to make dynamic xids unique */ + spin_lock(&nx_info_hash_lock); + + /* dynamic context requested */ + if (id == NX_DYNAMIC_ID) { +#ifdef CONFIG_VSERVER_DYNAMIC_IDS + id = __nx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + nxi = ERR_PTR(-EAGAIN); + goto out_unlock; + } + new->nx_id = id; +#else + printk(KERN_ERR "dynamic contexts disabled.\n"); + nxi = ERR_PTR(-EINVAL); + goto out_unlock; +#endif + } + /* static context requested */ + else if ((nxi = __lookup_nx_info(id))) { + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) = %p (already there)", id, nxi); + if (nx_info_flags(nxi, NXF_STATE_SETUP, 0)) + nxi = ERR_PTR(-EBUSY); + else + nxi = ERR_PTR(-EEXIST); + goto out_unlock; + } + /* dynamic nid creation blocker */ + else if (id >= MIN_D_CONTEXT) { + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) (dynamic rejected)", id); + nxi = ERR_PTR(-EINVAL); + goto out_unlock; + } + + /* new context */ + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) = %p (new)", id, new); + __hash_nx_info(get_nx_info(new)); + nxi = new, new = NULL; + +out_unlock: + spin_unlock(&nx_info_hash_lock); + if (new) + __dealloc_nx_info(new); + return nxi; +} + + + +/* exported stuff */ + + +void unhash_nx_info(struct nx_info *nxi) +{ + __shutdown_nx_info(nxi); + spin_lock(&nx_info_hash_lock); + __unhash_nx_info(nxi); + spin_unlock(&nx_info_hash_lock); +} + +#ifdef CONFIG_VSERVER_LEGACYNET + +struct nx_info *create_nx_info(void) +{ + return __create_nx_info(NX_DYNAMIC_ID); +} + +#endif + +/* lookup_nx_info() + + * search for a nx_info and get() it + * negative id means current */ + +struct nx_info *lookup_nx_info(int id) +{ + struct nx_info *nxi = NULL; + + if (id < 0) { + nxi = get_nx_info(current->nx_info); + } else if (id > 1) { + spin_lock(&nx_info_hash_lock); + nxi = get_nx_info(__lookup_nx_info(id)); + spin_unlock(&nx_info_hash_lock); + } + return nxi; +} + +/* nid_is_hashed() + + * verify that nid is still hashed */ + +int nid_is_hashed(nid_t nid) +{ + int hashed; + + spin_lock(&nx_info_hash_lock); + hashed = (__lookup_nx_info(nid) != NULL); + spin_unlock(&nx_info_hash_lock); + return hashed; +} + + +#ifdef CONFIG_PROC_FS + +/* get_nid_list() + + * get a subset of hashed nids for proc + * assumes size is at least one */ + +int get_nid_list(int index, unsigned int *nids, int size) +{ + int hindex, nr_nids = 0; + + /* only show current and children */ + if (!nx_check(0, VS_ADMIN|VS_WATCH)) { + if (index > 0) + return 0; + nids[nr_nids] = nx_current_nid(); + return 1; + } + + for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) { + struct hlist_head *head = &nx_info_hash[hindex]; + struct hlist_node *pos; + + spin_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { + struct nx_info *nxi; + + if (--index > 0) + continue; + + nxi = hlist_entry(pos, struct nx_info, nx_hlist); + nids[nr_nids] = nxi->nx_id; + if (++nr_nids >= size) { + spin_unlock(&nx_info_hash_lock); + goto out; + } + } + /* keep the lock time short */ + spin_unlock(&nx_info_hash_lock); + } +out: + return nr_nids; +} +#endif + + +/* + * migrate task to new network + * gets nxi, puts old_nxi on change + */ + +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) +{ + struct nx_info *old_nxi; + int ret = 0; + + if (!p || !nxi) + BUG(); + + vxdprintk(VXD_CBIT(nid, 5), + "nx_migrate_task(%p,%p[#%d.%d.%d])", + p, nxi, nxi->nx_id, + atomic_read(&nxi->nx_usecnt), + atomic_read(&nxi->nx_tasks)); + + if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) && + !nx_info_flags(nxi, NXF_STATE_SETUP, 0)) + return -EACCES; + + /* maybe disallow this completely? */ + old_nxi = task_get_nx_info(p); + if (old_nxi == nxi) + goto out; + + task_lock(p); + if (old_nxi) + clr_nx_info(&p->nx_info); + claim_nx_info(nxi, p); + set_nx_info(&p->nx_info, nxi); + p->nid = nxi->nx_id; + task_unlock(p); + + vxdprintk(VXD_CBIT(nid, 5), + "moved task %p into nxi:%p[#%d]", + p, nxi, nxi->nx_id); + + if (old_nxi) + release_nx_info(old_nxi, p); + ret = 0; +out: + put_nx_info(old_nxi); + return ret; +} + + +#ifdef CONFIG_INET + +#include +#include + +int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi) +{ + if (!nxi) + return 1; + if (!ifa) + return 0; + return addr_in_nx_info(nxi, ifa->ifa_local); +} + +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) +{ + struct in_device *in_dev; + struct in_ifaddr **ifap; + struct in_ifaddr *ifa; + int ret = 0; + + if (!nxi) + return 1; + + in_dev = in_dev_get(dev); + if (!in_dev) + goto out; + + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + ifap = &ifa->ifa_next) { + if (addr_in_nx_info(nxi, ifa->ifa_local)) { + ret = 1; + break; + } + } + in_dev_put(in_dev); +out: + return ret; +} + +/* + * check if address is covered by socket + * + * sk: the socket to check against + * addr: the address in question (must be != 0) + */ +static inline int __addr_in_socket(struct sock *sk, uint32_t addr) +{ + struct nx_info *nxi = sk->sk_nx_info; + uint32_t saddr = inet_rcv_saddr(sk); + + vxdprintk(VXD_CBIT(net, 5), + "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx", + sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (saddr) { + /* direct address match */ + return (saddr == addr); + } else if (nxi) { + /* match against nx_info */ + return addr_in_nx_info(nxi, addr); + } else { + /* unrestricted any socket */ + return 1; + } +} + + +int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk) +{ + vxdprintk(VXD_CBIT(net, 2), + "nx_addr_conflict(%p,%p) %d.%d,%d.%d", + nxi, sk, VXD_QUAD(addr)); + + if (addr) { + /* check real address */ + return __addr_in_socket(sk, addr); + } else if (nxi) { + /* check against nx_info */ + int i, n = nxi->nbipv4; + + for (i=0; iipv4[i])) + return 1; + return 0; + } else { + /* check against any */ + return 1; + } +} + +#endif /* CONFIG_INET */ + +void nx_set_persistent(struct nx_info *nxi) +{ + get_nx_info(nxi); + claim_nx_info(nxi, current); +} + +void nx_clear_persistent(struct nx_info *nxi) +{ + vxdprintk(VXD_CBIT(nid, 6), + "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id); + + release_nx_info(nxi, current); + put_nx_info(nxi); +} + +void nx_update_persistent(struct nx_info *nxi) +{ + if (nx_info_flags(nxi, NXF_PERSISTENT, 0)) + nx_set_persistent(nxi); + else + nx_clear_persistent(nxi); +} + +/* vserver syscall commands below here */ + +/* taks nid and nx_info functions */ + +#include + + +int vc_task_nid(uint32_t id, void __user *data) +{ + nid_t nid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VS_ADMIN|VS_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + nid = (tsk) ? tsk->nid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + nid = nx_current_nid(); + return nid; +} + + +int vc_nx_info(struct nx_info *nxi, void __user *data) +{ + struct vcmd_nx_info_v0 vc_data; + + vc_data.nid = nxi->nx_id; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* network functions */ + +int vc_net_create(uint32_t nid, void __user *data) +{ + struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET }; + struct nx_info *new_nxi; + int ret; + + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if ((nid > MAX_S_CONTEXT) && (nid != NX_DYNAMIC_ID)) + return -EINVAL; + if (nid < 2) + return -EINVAL; + + new_nxi = __create_nx_info(nid); + if (IS_ERR(new_nxi)) + return PTR_ERR(new_nxi); + + /* initial flags */ + new_nxi->nx_flags = vc_data.flagword; + + /* get a reference for persistent contexts */ + if ((vc_data.flagword & NXF_PERSISTENT)) + nx_set_persistent(new_nxi); + + ret = -ENOEXEC; + if (vs_net_change(new_nxi, VSC_NETUP)) + goto out_unhash; + ret = nx_migrate_task(current, new_nxi); + if (!ret) { + /* return context id on success */ + ret = new_nxi->nx_id; + goto out; + } +out_unhash: + /* prepare for context disposal */ + new_nxi->nx_state |= NXS_SHUTDOWN; + if ((vc_data.flagword & NXF_PERSISTENT)) + nx_clear_persistent(new_nxi); + __unhash_nx_info(new_nxi); +out: + put_nx_info(new_nxi); + return ret; +} + + +int vc_net_migrate(struct nx_info *nxi, void __user *data) +{ + return nx_migrate_task(current, nxi); +} + +int vc_net_add(struct nx_info *nxi, void __user *data) +{ + struct vcmd_net_addr_v0 vc_data; + int index, pos, ret = 0; + + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + switch (vc_data.type) { + case NXA_TYPE_IPV4: + if ((vc_data.count < 1) || (vc_data.count > 4)) + return -EINVAL; + break; + + default: + break; + } + + switch (vc_data.type) { + case NXA_TYPE_IPV4: + index = 0; + while ((index < vc_data.count) && + ((pos = nxi->nbipv4) < NB_IPV4ROOT)) { + nxi->ipv4[pos] = vc_data.ip[index]; + nxi->mask[pos] = vc_data.mask[index]; + index++; + nxi->nbipv4++; + } + ret = index; + break; + + case NXA_TYPE_IPV4|NXA_MOD_BCAST: + nxi->v4_bcast = vc_data.ip[0]; + ret = 1; + break; + + default: + ret = -EINVAL; + break; + } + return ret; +} + +int vc_net_remove(struct nx_info * nxi, void __user *data) +{ + struct vcmd_net_addr_v0 vc_data; + + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + switch (vc_data.type) { + case NXA_TYPE_ANY: + nxi->nbipv4 = 0; + break; + + default: + return -EINVAL; + } + return 0; +} + +int vc_get_nflags(struct nx_info *nxi, void __user *data) +{ + struct vcmd_net_flags_v0 vc_data; + + vc_data.flagword = nxi->nx_flags; + + /* special STATE flag handling */ + vc_data.mask = vs_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_nflags(struct nx_info *nxi, void __user *data) +{ + struct vcmd_net_flags_v0 vc_data; + uint64_t mask, trigger; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* special STATE flag handling */ + mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME); + trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword); + + nxi->nx_flags = vs_mask_flags(nxi->nx_flags, + vc_data.flagword, mask); + if (trigger & NXF_PERSISTENT) + nx_update_persistent(nxi); + + return 0; +} + +int vc_get_ncaps(struct nx_info *nxi, void __user *data) +{ + struct vcmd_net_caps_v0 vc_data; + + vc_data.ncaps = nxi->nx_ncaps; + vc_data.cmask = ~0UL; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_ncaps(struct nx_info *nxi, void __user *data) +{ + struct vcmd_net_caps_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps, + vc_data.ncaps, vc_data.cmask); + return 0; +} + + +#include + +EXPORT_SYMBOL_GPL(free_nx_info); +EXPORT_SYMBOL_GPL(unhash_nx_info); + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/proc.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/proc.c --- linux-2.6.19.1/kernel/vserver/proc.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/proc.c 2006-12-10 00:14:21 +0100 @@ -0,0 +1,1006 @@ +/* + * linux/kernel/vserver/proc.c + * + * Virtual Context Support + * + * Copyright (C) 2003-2006 Herbert Pötzl + * + * V0.01 basic structure + * V0.02 adaptation vs1.3.0 + * V0.03 proc permissions + * V0.04 locking/generic + * V0.05 next generation procfs + * V0.06 inode validation + * V0.07 generic rewrite vid + * V0.08 remove inode type + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "cvirt_proc.h" +#include "cacct_proc.h" +#include "limit_proc.h" +#include "sched_proc.h" +#include "vci_config.h" + +static struct proc_dir_entry *proc_virtual; + +static struct proc_dir_entry *proc_virtnet; + + + +// #define PROC_VID_MASK 0x60 + + +/* first the actual feeds */ + + +static int proc_vci(char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + "VCISyscall:\t%d\n" + "VCIKernel:\t%08x\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF + ,__NR_vserver + ,vci_kernel_config() + ); +} + +static int proc_virtual_info(char *buffer) +{ + return proc_vci(buffer); +} + +static int proc_virtual_status(char *buffer) +{ + return sprintf(buffer, + "#CTotal:\t%d\n" + "#CActive:\t%d\n" + ,atomic_read(&vx_global_ctotal) + ,atomic_read(&vx_global_cactive) + ); +} + + +int proc_vxi_info (struct vx_info *vxi, char *buffer) +{ + int length; + + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + "Init:\t%d\n" + ,vxi->vx_id + ,vxi + ,vxi->vx_initpid + ); + return length; +} + +int proc_vxi_status (struct vx_info *vxi, char *buffer) +{ + int length; + + length = sprintf(buffer, + "UseCnt:\t%d\n" + "Tasks:\t%d\n" + "Flags:\t%016llx\n" + "BCaps:\t%016llx\n" + "CCaps:\t%016llx\n" + "Spaces:\t%08lx\n" +// "Ticks:\t%d\n" + ,atomic_read(&vxi->vx_usecnt) + ,atomic_read(&vxi->vx_tasks) + ,(unsigned long long)vxi->vx_flags + ,(unsigned long long)vxi->vx_bcaps + ,(unsigned long long)vxi->vx_ccaps + ,vxi->vx_nsmask +// ,atomic_read(&vxi->limit.ticks) + ); + return length; +} + +int proc_vxi_limit (struct vx_info *vxi, char *buffer) +{ + return vx_info_proc_limit(&vxi->limit, buffer); +} + +int proc_vxi_sched (struct vx_info *vxi, char *buffer) +{ + int cpu, length; + + length = vx_info_proc_sched(&vxi->sched, buffer); + for_each_online_cpu(cpu) { + length += vx_info_proc_sched_pc( + &vx_per_cpu(vxi, sched_pc, cpu), + buffer + length, cpu); + } + return length; +} + +int proc_vxi_nsproxy (struct vx_info *vxi, char *buffer) +{ + return vx_info_proc_nsproxy(vxi->vx_nsproxy, buffer); +} + +int proc_vxi_cvirt (struct vx_info *vxi, char *buffer) +{ + int cpu, length; + + vx_update_load(vxi); + length = vx_info_proc_cvirt(&vxi->cvirt, buffer); + for_each_online_cpu(cpu) { + length += vx_info_proc_cvirt_pc( + &vx_per_cpu(vxi, cvirt_pc, cpu), + buffer + length, cpu); + } + return length; +} + +int proc_vxi_cacct (struct vx_info *vxi, char *buffer) +{ + return vx_info_proc_cacct(&vxi->cacct, buffer); +} + + +static int proc_virtnet_info(char *buffer) +{ + return proc_vci(buffer); +} + +static int proc_virtnet_status(char *buffer) +{ + return sprintf(buffer, + "#CTotal:\t%d\n" + "#CActive:\t%d\n" + ,atomic_read(&nx_global_ctotal) + ,atomic_read(&nx_global_cactive) + ); +} + +int proc_nxi_info (struct nx_info *nxi, char *buffer) +{ + int length, i; + + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + ,nxi->nx_id + ,nxi + ); + for (i=0; inbipv4; i++) { + length += sprintf(buffer + length, + "%d:\t" NIPQUAD_FMT "/" NIPQUAD_FMT "\n", i, + NIPQUAD(nxi->ipv4[i]), NIPQUAD(nxi->mask[i])); + } + return length; +} + +int proc_nxi_status (struct nx_info *nxi, char *buffer) +{ + int length; + + length = sprintf(buffer, + "UseCnt:\t%d\n" + "Tasks:\t%d\n" + ,atomic_read(&nxi->nx_usecnt) + ,atomic_read(&nxi->nx_tasks) + ); + return length; +} + + + +/* here the inode helpers */ + +struct vs_entry { + int len; + char *name; + mode_t mode; + struct inode_operations *iop; + struct file_operations *fop; + union proc_op op; +}; + +static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p) +{ + struct inode *inode = new_inode(sb); + + if (!inode) + goto out; + + inode->i_mode = p->mode; + if (p->iop) + inode->i_op = p->iop; + if (p->fop) + inode->i_fop = p->fop; + + inode->i_nlink = (p->mode & S_IFDIR) ? 2 : 1; + inode->i_flags |= S_IMMUTABLE; + + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_tag = 0; +out: + return inode; +} + +static struct dentry *vs_proc_instantiate(struct inode *dir, + struct dentry *dentry, int id, void *ptr) +{ + struct vs_entry *p = ptr; + struct inode *inode = vs_proc_make_inode(dir->i_sb, p); + struct dentry *error = ERR_PTR(-EINVAL); + + if (!inode) + goto out; + + PROC_I(inode)->op = p->op; + PROC_I(inode)->fd = id; + d_add(dentry, inode); + error = NULL; +out: + return error; +} + +/* Lookups */ + +typedef struct dentry *instantiate_t(struct inode *, struct dentry *, int, void *); + +/* + * Fill a directory entry. + * + * If possible create the dcache entry and derive our inode number and + * file type from dcache entry. + * + * Since all of the proc inode numbers are dynamically generated, the inode + * numbers do not exist until the inode is cache. This means creating the + * the dcache entry in readdir is necessary to keep the inode numbers + * reported by readdir in sync with the inode numbers reported + * by stat. + */ +static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, + char *name, int len, instantiate_t instantiate, int id, void *ptr) +{ + struct dentry *child, *dir = filp->f_dentry; + struct inode *inode; + struct qstr qname; + ino_t ino = 0; + unsigned type = DT_UNKNOWN; + + qname.name = name; + qname.len = len; + qname.hash = full_name_hash(name, len); + + child = d_lookup(dir, &qname); + if (!child) { + struct dentry *new; + new = d_alloc(dir, &qname); + if (new) { + child = instantiate(dir->d_inode, new, id, ptr); + if (child) + dput(new); + else + child = new; + } + } + if (!child || IS_ERR(child) || !child->d_inode) + goto end_instantiate; + inode = child->d_inode; + if (inode) { + ino = inode->i_ino; + type = inode->i_mode >> 12; + } + dput(child); +end_instantiate: + if (!ino) + ino = find_inode_number(dir, &qname); + if (!ino) + ino = 1; + return filldir(dirent, name, len, filp->f_pos, ino, type); +} + + + +/* get and revalidate vx_info/xid */ + +static inline +struct vx_info *get_proc_vx_info(struct inode *inode) +{ + return lookup_vx_info(PROC_I(inode)->fd); +} + +static int proc_xid_revalidate(struct dentry * dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + xid_t xid = PROC_I(inode)->fd; + + if (!xid || xid_is_hashed(xid)) + return 1; + d_drop(dentry); + return 0; +} + + +/* get and revalidate nx_info/nid */ + +static int proc_nid_revalidate(struct dentry * dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + nid_t nid = PROC_I(inode)->fd; + + if (!nid || nid_is_hashed(nid)) + return 1; + d_drop(dentry); + return 0; +} + + + +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) + +static ssize_t proc_vs_info_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + unsigned long page; + ssize_t length = 0; + + if (count > PROC_BLOCK_SIZE) + count = PROC_BLOCK_SIZE; + + /* fade that out as soon as stable */ + WARN_ON(PROC_I(inode)->fd); + + if (!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + BUG_ON(!PROC_I(inode)->op.proc_vs_read); + length = PROC_I(inode)->op.proc_vs_read((char*)page); + + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, + (char *)page, length); + + free_page(page); + return length; +} + +static ssize_t proc_vx_info_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + struct vx_info *vxi = NULL; + xid_t xid = PROC_I(inode)->fd; + unsigned long page; + ssize_t length = 0; + + if (count > PROC_BLOCK_SIZE) + count = PROC_BLOCK_SIZE; + + /* fade that out as soon as stable */ + WARN_ON(!xid); + vxi = lookup_vx_info(xid); + if (!vxi) + goto out; + + length = -ENOMEM; + if (!(page = __get_free_page(GFP_KERNEL))) + goto out_put; + + BUG_ON(!PROC_I(inode)->op.proc_vxi_read); + length = PROC_I(inode)->op.proc_vxi_read(vxi, (char*)page); + + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, + (char *)page, length); + + free_page(page); +out_put: + put_vx_info(vxi); +out: + return length; +} + +static ssize_t proc_nx_info_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + struct nx_info *nxi = NULL; + nid_t nid = PROC_I(inode)->fd; + unsigned long page; + ssize_t length = 0; + + if (count > PROC_BLOCK_SIZE) + count = PROC_BLOCK_SIZE; + + /* fade that out as soon as stable */ + WARN_ON(!nid); + nxi = lookup_nx_info(nid); + if (!nxi) + goto out; + + length = -ENOMEM; + if (!(page = __get_free_page(GFP_KERNEL))) + goto out_put; + + BUG_ON(!PROC_I(inode)->op.proc_nxi_read); + length = PROC_I(inode)->op.proc_nxi_read(nxi, (char*)page); + + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, + (char *)page, length); + + free_page(page); +out_put: + put_nx_info(nxi); +out: + return length; +} + + + +/* here comes the lower level */ + + +#define NOD(NAME, MODE, IOP, FOP, OP) { \ + .len = sizeof(NAME) - 1, \ + .name = (NAME), \ + .mode = MODE, \ + .iop = IOP, \ + .fop = FOP, \ + .op = OP, \ +} + + +#define DIR(NAME, MODE, OTYPE) \ + NOD(NAME, (S_IFDIR|(MODE)), \ + &proc_##OTYPE##_inode_operations, \ + &proc_##OTYPE##_file_operations, { } ) + +#define INF(NAME, MODE, OTYPE) \ + NOD(NAME, (S_IFREG|(MODE)), NULL, \ + &proc_vs_info_file_operations, \ + { .proc_vs_read = &proc_##OTYPE } ) + +#define VINF(NAME, MODE, OTYPE) \ + NOD(NAME, (S_IFREG|(MODE)), NULL, \ + &proc_vx_info_file_operations, \ + { .proc_vxi_read = &proc_##OTYPE } ) + +#define NINF(NAME, MODE, OTYPE) \ + NOD(NAME, (S_IFREG|(MODE)), NULL, \ + &proc_nx_info_file_operations, \ + { .proc_nxi_read = &proc_##OTYPE } ) + + +static struct file_operations proc_vs_info_file_operations = { + .read = proc_vs_info_read, +}; + +static struct file_operations proc_vx_info_file_operations = { + .read = proc_vx_info_read, +}; + +static struct dentry_operations proc_xid_dentry_operations = { + .d_revalidate = proc_xid_revalidate, +}; + +static struct vs_entry vx_base_stuff[] = { + VINF("info", S_IRUGO, vxi_info), + VINF("status", S_IRUGO, vxi_status), + VINF("limit", S_IRUGO, vxi_limit), + VINF("sched", S_IRUGO, vxi_sched), + VINF("nsproxy", S_IRUGO, vxi_nsproxy), + VINF("cvirt", S_IRUGO, vxi_cvirt), + VINF("cacct", S_IRUGO, vxi_cacct), + {} +}; + + + + +static struct dentry *proc_xid_instantiate(struct inode *dir, + struct dentry *dentry, int id, void *ptr) +{ + dentry->d_op = &proc_xid_dentry_operations; + return vs_proc_instantiate(dir, dentry, id, ptr); +} + +static struct dentry *proc_xid_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct vs_entry *p = vx_base_stuff; + struct dentry *error = ERR_PTR(-ENOENT); + + for (; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + goto out; + + error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p); +out: + return error; +} + +static int proc_xid_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + struct vs_entry *p = vx_base_stuff; + int size = sizeof(vx_base_stuff)/sizeof(struct vs_entry); + int pos, index; + u64 ino; + + pos = filp->f_pos; + switch (pos) { + case 0: + ino = inode->i_ino; + if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + default: + index = pos - 2; + if (index >= size) + goto out; + for (p += index; p->name; p++) { + if (proc_fill_cache(filp, dirent, filldir, p->name, p->len, + vs_proc_instantiate, PROC_I(inode)->fd, p)) + goto out; + pos++; + } + } +out: + filp->f_pos = pos; + return 1; +} + + + +static struct file_operations proc_nx_info_file_operations = { + .read = proc_nx_info_read, +}; + +static struct dentry_operations proc_nid_dentry_operations = { + .d_revalidate = proc_nid_revalidate, +}; + +static struct vs_entry nx_base_stuff[] = { + NINF("info", S_IRUGO, nxi_info), + NINF("status", S_IRUGO, nxi_status), + {} +}; + + +static struct dentry *proc_nid_instantiate(struct inode *dir, + struct dentry *dentry, int id, void *ptr) +{ + dentry->d_op = &proc_nid_dentry_operations; + return vs_proc_instantiate(dir, dentry, id, ptr); +} + +static struct dentry *proc_nid_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct vs_entry *p = nx_base_stuff; + struct dentry *error = ERR_PTR(-ENOENT); + + for (; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + goto out; + + error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p); +out: + return error; +} + +static int proc_nid_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + struct vs_entry *p = nx_base_stuff; + int size = sizeof(nx_base_stuff)/sizeof(struct vs_entry); + int pos, index; + u64 ino; + + pos = filp->f_pos; + switch (pos) { + case 0: + ino = inode->i_ino; + if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + default: + index = pos - 2; + if (index >= size) + goto out; + for (p += index; p->name; p++) { + if (proc_fill_cache(filp, dirent, filldir, p->name, p->len, + vs_proc_instantiate, PROC_I(inode)->fd, p)) + goto out; + pos++; + } + } +out: + filp->f_pos = pos; + return 1; +} + + +#define MAX_MULBY10 ((~0U-9)/10) + +static inline int atovid(const char *str, int len) +{ + int vid, c; + + vid = 0; + while (len-- > 0) { + c = *str - '0'; + str++; + if (c > 9) + return -1; + if (vid >= MAX_MULBY10) + return -1; + vid *= 10; + vid += c; + if (!vid) + return -1; + } + return vid; +} + +/* now the upper level (virtual) */ + + +static struct file_operations proc_xid_file_operations = { + .read = generic_read_dir, + .readdir = proc_xid_readdir, +}; + +static struct inode_operations proc_xid_inode_operations = { + .lookup = proc_xid_lookup, +}; + +static struct vs_entry vx_virtual_stuff[] = { + INF("info", S_IRUGO, virtual_info), + INF("status", S_IRUGO, virtual_status), + DIR(NULL, S_IRUGO|S_IXUGO, xid), +}; + + +static struct dentry *proc_virtual_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct vs_entry *p = vx_virtual_stuff; + struct dentry *error = ERR_PTR(-ENOENT); + int id = 0; + + for (; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (p->name) + goto instantiate; + + id = atovid(dentry->d_name.name, dentry->d_name.len); + if ((id < 0) || !xid_is_hashed(id)) + goto out; + +instantiate: + error = proc_xid_instantiate(dir, dentry, id, p); +out: + return error; +} + +static struct file_operations proc_nid_file_operations = { + .read = generic_read_dir, + .readdir = proc_nid_readdir, +}; + +static struct inode_operations proc_nid_inode_operations = { + .lookup = proc_nid_lookup, +}; + +static struct vs_entry nx_virtnet_stuff[] = { + INF("info", S_IRUGO, virtnet_info), + INF("status", S_IRUGO, virtnet_status), + DIR(NULL, S_IRUGO|S_IXUGO, nid), +}; + + +static struct dentry *proc_virtnet_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct vs_entry *p = nx_virtnet_stuff; + struct dentry *error = ERR_PTR(-ENOENT); + int id = 0; + + for (; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (p->name) + goto instantiate; + + id = atovid(dentry->d_name.name, dentry->d_name.len); + if ((id < 0) || !nid_is_hashed(id)) + goto out; + +instantiate: + error = proc_nid_instantiate(dir, dentry, id, p); +out: + return error; +} + + + +#define PROC_NUMBUF 10 +#define PROC_MAXVIDS 32 + +int proc_virtual_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + struct vs_entry *p = vx_virtual_stuff; + int size = sizeof(vx_virtual_stuff)/sizeof(struct vs_entry); + int pos, index; + unsigned int xid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr_xids, i; + u64 ino; + + pos = filp->f_pos; + switch (pos) { + case 0: + ino = inode->i_ino; + if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + default: + index = pos - 2; + if (index >= size) + goto entries; + for (p += index; p->name; p++) { + if (proc_fill_cache(filp, dirent, filldir, p->name, p->len, + vs_proc_instantiate, 0, p)) + goto out; + pos++; + } + entries: + index = pos - size; + p = &vx_virtual_stuff[size-1]; + nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS); + for (i = 0; i < nr_xids; i++) { + int n, xid = xid_array[i]; + unsigned int j = PROC_NUMBUF; + + n = xid; + do buf[--j] = '0' + (n % 10); while (n /= 10); + + if (proc_fill_cache(filp, dirent, filldir, buf+j, PROC_NUMBUF-j, + vs_proc_instantiate, xid, p)) + goto out; + pos++; + } + } +out: + filp->f_pos = pos; + return 0; +} + + +static struct file_operations proc_virtual_dir_operations = { + .read = generic_read_dir, + .readdir = proc_virtual_readdir, +}; + +static struct inode_operations proc_virtual_dir_inode_operations = { + .lookup = proc_virtual_lookup, +}; + + + + + +int proc_virtnet_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + struct vs_entry *p = nx_virtnet_stuff; + int size = sizeof(nx_virtnet_stuff)/sizeof(struct vs_entry); + int pos, index; + unsigned int nid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr_nids, i; + u64 ino; + + pos = filp->f_pos; + switch (pos) { + case 0: + ino = inode->i_ino; + if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + default: + index = pos - 2; + if (index >= size) + goto entries; + for (p += index; p->name; p++) { + if (proc_fill_cache(filp, dirent, filldir, p->name, p->len, + vs_proc_instantiate, 0, p)) + goto out; + pos++; + } + entries: + index = pos - size; + p = &nx_virtnet_stuff[size-1]; + nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS); + for (i = 0; i < nr_nids; i++) { + int n, nid = nid_array[i]; + unsigned int j = PROC_NUMBUF; + + n = nid; + do buf[--j] = '0' + (n % 10); while (n /= 10); + + if (proc_fill_cache(filp, dirent, filldir, buf+j, PROC_NUMBUF-j, + vs_proc_instantiate, nid, p)) + goto out; + pos++; + } + } +out: + filp->f_pos = pos; + return 0; +} + + +static struct file_operations proc_virtnet_dir_operations = { + .read = generic_read_dir, + .readdir = proc_virtnet_readdir, +}; + +static struct inode_operations proc_virtnet_dir_inode_operations = { + .lookup = proc_virtnet_lookup, +}; + + + +void proc_vx_init(void) +{ + struct proc_dir_entry *ent; + + ent = proc_mkdir("virtual", 0); + if (ent) { + ent->proc_fops = &proc_virtual_dir_operations; + ent->proc_iops = &proc_virtual_dir_inode_operations; + } + proc_virtual = ent; + + ent = proc_mkdir("virtnet", 0); + if (ent) { + ent->proc_fops = &proc_virtnet_dir_operations; + ent->proc_iops = &proc_virtnet_dir_inode_operations; + } + proc_virtnet = ent; +} + + + + +/* per pid info */ + + +int proc_pid_vx_info(struct task_struct *p, char *buffer) +{ + struct vx_info *vxi; + char * orig = buffer; + + buffer += sprintf (buffer,"XID:\t%d\n", vx_task_xid(p)); + + vxi = task_get_vx_info(p); + if (!vxi) + goto out; + + buffer += sprintf (buffer,"BCaps:\t%016llx\n" + ,(unsigned long long)vxi->vx_bcaps); + buffer += sprintf (buffer,"CCaps:\t%016llx\n" + ,(unsigned long long)vxi->vx_ccaps); + buffer += sprintf (buffer,"CFlags:\t%016llx\n" + ,(unsigned long long)vxi->vx_flags); + buffer += sprintf (buffer,"CIPid:\t%d\n" + ,vxi->vx_initpid); + + put_vx_info(vxi); +out: + return buffer - orig; +} + + +int proc_pid_nx_info(struct task_struct *p, char *buffer) +{ + struct nx_info *nxi; + char * orig = buffer; + int i; + + buffer += sprintf (buffer,"NID:\t%d\n", nx_task_nid(p)); + + nxi = task_get_nx_info(p); + if (!nxi) + goto out; + + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer, + "V4Root[%d]:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i + ,NIPQUAD(nxi->ipv4[i]) + ,NIPQUAD(nxi->mask[i])); + } + buffer += sprintf (buffer, + "V4Root[bcast]:\t%d.%d.%d.%d\n" + ,NIPQUAD(nxi->v4_bcast)); + + put_nx_info(nxi); +out: + return buffer - orig; +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/sched.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/sched.c --- linux-2.6.19.1/kernel/vserver/sched.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/sched.c 2006-11-08 04:57:45 +0100 @@ -0,0 +1,318 @@ +/* + * linux/kernel/vserver/sched.c + * + * Virtual Server: Scheduler Support + * + * Copyright (C) 2004-2006 Herbert Pötzl + * + * V0.01 adapted Sam Vilains version to 2.6.3 + * V0.02 removed legacy interface + * V0.03 changed vcmds to vxi arg + * + */ + +#include +#include +#include +#include + +#include +#include + +#define vxd_check_range(val, min, max) do { \ + vxlprintk((valmax), \ + "check_range(%ld,%ld,%ld)", \ + (long)val, (long)min, (long)max, \ + __FILE__, __LINE__); \ + } while (0) + + +void vx_update_sched_param(struct _vx_sched *sched, + struct _vx_sched_pc *sched_pc) +{ + unsigned int set_mask = sched->update_mask; + + if (set_mask & VXSM_FILL_RATE) + sched_pc->fill_rate[0] = sched->fill_rate[0]; + if (set_mask & VXSM_INTERVAL) + sched_pc->interval[0] = sched->interval[0]; + if (set_mask & VXSM_FILL_RATE2) + sched_pc->fill_rate[1] = sched->fill_rate[1]; + if (set_mask & VXSM_INTERVAL2) + sched_pc->interval[1] = sched->interval[1]; + if (set_mask & VXSM_TOKENS) + sched_pc->tokens = sched->tokens; + if (set_mask & VXSM_TOKENS_MIN) + sched_pc->tokens_min = sched->tokens_min; + if (set_mask & VXSM_TOKENS_MAX) + sched_pc->tokens_max = sched->tokens_max; + + if (set_mask & VXSM_IDLE_TIME) + sched_pc->flags |= VXSF_IDLE_TIME; + else + sched_pc->flags &= ~VXSF_IDLE_TIME; + + /* reset time */ + sched_pc->norm_time = jiffies; +} + + +/* + * recalculate the context's scheduling tokens + * + * ret > 0 : number of tokens available + * ret < 0 : on hold, check delta_min[] + * -1 only jiffies + * -2 also idle time + * + */ +int vx_tokens_recalc(struct _vx_sched_pc *sched_pc, + unsigned long *norm_time, unsigned long *idle_time, int delta_min[2]) +{ + long delta; + long tokens = 0; + int flags = sched_pc->flags; + + /* how much time did pass? */ + delta = *norm_time - sched_pc->norm_time; + vxd_check_range(delta, 0, INT_MAX); + + if (delta >= sched_pc->interval[0]) { + long tokens, integral; + + /* calc integral token part */ + tokens = delta / sched_pc->interval[0]; + integral = tokens * sched_pc->interval[0]; + tokens *= sched_pc->fill_rate[0]; +#ifdef CONFIG_VSERVER_HARDCPU + delta_min[0] = delta - integral; + vxd_check_range(delta_min[0], 0, sched_pc->interval[0]); +#endif + /* advance time */ + sched_pc->norm_time += delta; + + /* add tokens */ + sched_pc->tokens += tokens; + sched_pc->token_time += tokens; + } + else + delta_min[0] = delta; + +#ifdef CONFIG_VSERVER_IDLETIME + if (!(flags & VXSF_IDLE_TIME)) + goto skip_idle; + + /* how much was the idle skip? */ + delta = *idle_time - sched_pc->idle_time; + vxd_check_range(delta, 0, INT_MAX); + + if (delta >= sched_pc->interval[1]) { + long tokens, integral; + + /* calc fair share token part */ + tokens = delta / sched_pc->interval[1]; + integral = tokens * sched_pc->interval[1]; + tokens *= sched_pc->fill_rate[1]; + delta_min[1] = delta - integral; + vxd_check_range(delta_min[1], 0, sched_pc->interval[1]); + + /* advance idle time */ + sched_pc->idle_time += integral; + + /* add tokens */ + sched_pc->tokens += tokens; + sched_pc->token_time += tokens; + } + else + delta_min[1] = delta; +skip_idle: +#endif + + /* clip at maximum */ + if (sched_pc->tokens > sched_pc->tokens_max) + sched_pc->tokens = sched_pc->tokens_max; + tokens = sched_pc->tokens; + + if ((flags & VXSF_ONHOLD)) { + /* can we unhold? */ + if (tokens >= sched_pc->tokens_min) { + flags &= ~VXSF_ONHOLD; + sched_pc->hold_ticks += + *norm_time - sched_pc->onhold; + } + else + goto on_hold; + } else { + /* put on hold? */ + if (tokens <= 0) { + flags |= VXSF_ONHOLD; + sched_pc->onhold = *norm_time; + goto on_hold; + } + } + sched_pc->flags = flags; + return tokens; + +on_hold: + tokens = sched_pc->tokens_min - tokens; + sched_pc->flags = flags; + BUG_ON(tokens < 0); + +#ifdef CONFIG_VSERVER_HARDCPU + /* next interval? */ + if (!sched_pc->fill_rate[0]) + delta_min[0] = HZ; + else if (tokens > sched_pc->fill_rate[0]) + delta_min[0] += sched_pc->interval[0] * + tokens / sched_pc->fill_rate[0]; + else + delta_min[0] = sched_pc->interval[0] - delta_min[0]; + vxd_check_range(delta_min[0], 0, INT_MAX); + +#ifdef CONFIG_VSERVER_IDLETIME + if (!(flags & VXSF_IDLE_TIME)) + return -1; + + /* next interval? */ + if (!sched_pc->fill_rate[1]) + delta_min[1] = HZ; + else if (tokens > sched_pc->fill_rate[1]) + delta_min[1] += sched_pc->interval[1] * + tokens / sched_pc->fill_rate[1]; + else + delta_min[1] = sched_pc->interval[1] - delta_min[1]; + vxd_check_range(delta_min[1], 0, INT_MAX); + + return -2; +#else + return -1; +#endif /* CONFIG_VSERVER_IDLETIME */ +#else + return 0; +#endif /* CONFIG_VSERVER_HARDCPU */ +} + + +static int do_set_sched(struct vx_info *vxi, struct vcmd_set_sched_v4 *data) +{ + unsigned int set_mask = data->set_mask; + unsigned int update_mask; + + /* Sanity check data values */ + if (data->fill_rate < 0) + data->fill_rate = 1; + if (data->interval <= 0) + data->interval = HZ; + if (data->tokens_max <= 0) + data->tokens_max = HZ; + if (data->tokens_min < 0) + data->tokens_min = data->fill_rate*3; + if (data->tokens_min >= data->tokens_max) + data->tokens_min = data->tokens_max; + + if (data->prio_bias > MAX_PRIO_BIAS) + data->prio_bias = MAX_PRIO_BIAS; + if (data->prio_bias < MIN_PRIO_BIAS) + data->prio_bias = MIN_PRIO_BIAS; + + spin_lock(&vxi->sched.tokens_lock); + + if (set_mask & VXSM_FILL_RATE) + vxi->sched.fill_rate[0] = data->fill_rate; + if (set_mask & VXSM_INTERVAL) + vxi->sched.interval[0] = data->interval; + if (set_mask & VXSM_FILL_RATE2) + vxi->sched.fill_rate[1] = data->fill_rate; + if (set_mask & VXSM_INTERVAL2) + vxi->sched.interval[1] = data->interval; + if (set_mask & VXSM_TOKENS) + vxi->sched.tokens = data->tokens; + if (set_mask & VXSM_TOKENS_MIN) + vxi->sched.tokens_min = data->tokens_min; + if (set_mask & VXSM_TOKENS_MAX) + vxi->sched.tokens_max = data->tokens_max; + if (set_mask & VXSM_PRIO_BIAS) + vxi->sched.prio_bias = data->prio_bias; + + update_mask = vxi->sched.update_mask & VXSM_SET_MASK; + update_mask |= (set_mask & (VXSM_SET_MASK|VXSM_IDLE_TIME)); + vxi->sched.update_mask = update_mask; +#ifdef CONFIG_SMP + rmb(); + if (set_mask & VXSM_CPU_ID) + vxi->sched.update = cpumask_of_cpu(data->cpu_id); + else + vxi->sched.update = CPU_MASK_ALL; + /* forced reload? */ + if (set_mask & VXSM_FORCE) { + int cpu; + + for_each_possible_cpu(cpu) + vx_update_sched_param(&vxi->sched, + &vx_per_cpu(vxi, sched_pc, cpu)); + } +#else + /* on UP we update immediately */ + vx_update_sched_param(&vxi->sched, + &vx_per_cpu(vxi, sched_pc, 0)); +#endif + + spin_unlock(&vxi->sched.tokens_lock); + return 0; +} + + +#ifdef CONFIG_VSERVER_LEGACY + +#define COPY_MASK_V2(name, mask) \ + if (vc_data.name != SCHED_KEEP) { \ + vc_data_v4.name = vc_data.name; \ + vc_data_v4.set_mask |= mask; \ + } + +int vc_set_sched_v2(struct vx_info *vxi, void __user *data) +{ + struct vcmd_set_sched_v2 vc_data; + struct vcmd_set_sched_v4 vc_data_v4 = { .set_mask = 0 }; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + COPY_MASK_V2(fill_rate, VXSM_FILL_RATE); + COPY_MASK_V2(interval, VXSM_INTERVAL); + COPY_MASK_V2(tokens, VXSM_TOKENS); + COPY_MASK_V2(tokens_min, VXSM_TOKENS_MIN); + COPY_MASK_V2(tokens_max, VXSM_TOKENS_MAX); + vc_data_v4.bucket_id = 0; + + do_set_sched(vxi, &vc_data_v4); + return 0; +} +#endif + +int vc_set_sched_v3(struct vx_info *vxi, void __user *data) +{ + struct vcmd_set_sched_v3 vc_data; + struct vcmd_set_sched_v4 vc_data_v4; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* structures are binary compatible */ + memcpy(&vc_data_v4, &vc_data, sizeof(vc_data)); + vc_data_v4.set_mask &= VXSM_V3_MASK; + vc_data_v4.bucket_id = 0; + + return do_set_sched(vxi, &vc_data_v4); +} + +int vc_set_sched(struct vx_info *vxi, void __user *data) +{ + struct vcmd_set_sched_v4 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_sched(vxi, &vc_data); +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/sched_init.h linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/sched_init.h --- linux-2.6.19.1/kernel/vserver/sched_init.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/sched_init.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,49 @@ + +static inline void vx_info_init_sched(struct _vx_sched *sched) +{ + static struct lock_class_key tokens_lock_key; + + /* scheduling; hard code starting values as constants */ + sched->fill_rate[0] = 1; + sched->interval[0] = 4; + sched->fill_rate[1] = 1; + sched->interval[1] = 8; + sched->tokens = HZ >> 2; + sched->tokens_min = HZ >> 4; + sched->tokens_max = HZ >> 1; + sched->tokens_lock = SPIN_LOCK_UNLOCKED; + sched->prio_bias = 0; + sched->vavavoom = 0; + + lockdep_set_class(&sched->tokens_lock, &tokens_lock_key); +} + +static inline +void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu) +{ + sched_pc->fill_rate[0] = 1; + sched_pc->interval[0] = 4; + sched_pc->fill_rate[1] = 1; + sched_pc->interval[1] = 8; + sched_pc->tokens = HZ >> 2; + sched_pc->tokens_min = HZ >> 4; + sched_pc->tokens_max = HZ >> 1; + sched_pc->token_time = 0; + sched_pc->idle_time = 0; + sched_pc->norm_time = jiffies; + + sched_pc->user_ticks = 0; + sched_pc->sys_ticks = 0; + sched_pc->hold_ticks = 0; +} + +static inline void vx_info_exit_sched(struct _vx_sched *sched) +{ + return; +} + +static inline +void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu) +{ + return; +} diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/sched_proc.h linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/sched_proc.h --- linux-2.6.19.1/kernel/vserver/sched_proc.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/sched_proc.h 2006-11-08 04:57:49 +0100 @@ -0,0 +1,59 @@ +#ifndef _VX_SCHED_PROC_H +#define _VX_SCHED_PROC_H + + +static inline +int vx_info_proc_sched(struct _vx_sched *sched, char *buffer) +{ + int length = 0; + + length += sprintf(buffer, + "FillRate:\t%8d,%d\n" + "Interval:\t%8d,%d\n" + "TokensMin:\t%8d\n" + "TokensMax:\t%8d\n" + "PrioBias:\t%8d\n" + "VaVaVoom:\t%8d\n" + ,sched->fill_rate[0] + ,sched->fill_rate[1] + ,sched->interval[0] + ,sched->interval[1] + ,sched->tokens_min + ,sched->tokens_max + ,sched->prio_bias + ,sched->vavavoom + ); + return length; +} + +static inline +int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc, + char *buffer, int cpu) +{ + int length = 0; + + length += sprintf(buffer + length, + "cpu %d: %lld %lld %lld %ld %ld" + ,cpu + ,(unsigned long long)sched_pc->user_ticks + ,(unsigned long long)sched_pc->sys_ticks + ,(unsigned long long)sched_pc->hold_ticks + ,sched_pc->token_time + ,sched_pc->idle_time + ); + length += sprintf(buffer + length, + " %c%c %d %d %d %d/%d %d/%d\n" + ,(sched_pc->flags & VXSF_ONHOLD) ? 'H' : 'R' + ,(sched_pc->flags & VXSF_IDLE_TIME) ? 'I' : '-' + ,sched_pc->tokens + ,sched_pc->tokens_min + ,sched_pc->tokens_max + ,sched_pc->fill_rate[0] + ,sched_pc->interval[0] + ,sched_pc->fill_rate[1] + ,sched_pc->interval[1] + ); + return length; +} + +#endif /* _VX_SCHED_PROC_H */ diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/signal.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/signal.c --- linux-2.6.19.1/kernel/vserver/signal.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/signal.c 2006-11-08 04:57:45 +0100 @@ -0,0 +1,136 @@ +/* + * linux/kernel/vserver/signal.c + * + * Virtual Server: Signal Support + * + * Copyright (C) 2003-2006 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * V0.02 changed vcmds to vxi arg + * + */ + +#include + +#include +#include + +#include +#include + + +int vx_info_kill(struct vx_info *vxi, int pid, int sig) +{ + int retval, count=0; + struct task_struct *p; + unsigned long priv = 0; + + retval = -ESRCH; + vxdprintk(VXD_CBIT(misc, 4), + "vx_info_kill(%p[#%d],%d,%d)*", + vxi, vxi->vx_id, pid, sig); + read_lock(&tasklist_lock); + switch (pid) { + case 0: + priv = 1; + case -1: + for_each_process(p) { + int err = 0; + + if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 || + (pid && vxi->vx_initpid == p->pid)) + continue; + + err = group_send_sig_info(sig, (void*)priv, p); + ++count; + if (err != -EPERM) + retval = err; + } + break; + + case 1: + if (vxi->vx_initpid) { + pid = vxi->vx_initpid; + /* for now, only SIGINT to private init ... */ + if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) && + /* ... as long as there are tasks left */ + (atomic_read(&vxi->vx_tasks) > 1)) + sig = SIGINT; + priv = 1; + } + /* fallthrough */ + default: + p = find_task_by_real_pid(pid); + if (p) { + if (vx_task_xid(p) == vxi->vx_id) + retval = group_send_sig_info(sig, + (void*)priv, p); + } + break; + } + read_unlock(&tasklist_lock); + vxdprintk(VXD_CBIT(misc, 4), + "vx_info_kill(%p[#%d],%d,%d) = %d", + vxi, vxi->vx_id, pid, sig, retval); + return retval; +} + +int vc_ctx_kill(struct vx_info *vxi, void __user *data) +{ + struct vcmd_ctx_kill_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* special check to allow guest shutdown */ + if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) && + /* forbid killall pid=0 when init is present */ + (((vc_data.pid < 1) && vxi->vx_initpid) || + (vc_data.pid > 1))) + return -EACCES; + + return vx_info_kill(vxi, vc_data.pid, vc_data.sig); +} + + +static int __wait_exit(struct vx_info *vxi) +{ + DECLARE_WAITQUEUE(wait, current); + int ret = 0; + + add_wait_queue(&vxi->vx_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + +wait: + if (vx_info_state(vxi, + VXS_SHUTDOWN|VXS_HASHED|VXS_HELPER) == VXS_SHUTDOWN) + goto out; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + goto out; + } + schedule(); + goto wait; + +out: + set_current_state(TASK_RUNNING); + remove_wait_queue(&vxi->vx_wait, &wait); + return ret; +} + + + +int vc_wait_exit(struct vx_info *vxi, void __user *data) +{ + struct vcmd_wait_exit_v0 vc_data; + int ret; + + ret = __wait_exit(vxi); + vc_data.reboot_cmd = vxi->reboot_cmd; + vc_data.exit_code = vxi->exit_code; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/space.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/space.c --- linux-2.6.19.1/kernel/vserver/space.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/space.c 2006-12-17 19:29:27 +0100 @@ -0,0 +1,223 @@ +/* + * linux/kernel/vserver/space.c + * + * Virtual Server: Context Space Support + * + * Copyright (C) 2003-2006 Herbert Pötzl + * + * V0.01 broken out from context.c 0.07 + * V0.02 added task locking for namespace + * V0.03 broken out vx_enter_namespace + * V0.04 added *space support and commands + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* namespace functions */ + +#include + +const struct vcmd_space_mask space_mask = { + .mask = CLONE_NEWNS | + CLONE_NEWUTS | + CLONE_NEWIPC | + CLONE_FS +}; + + +/* + * build a new nsproxy mix + * assumes that both proxies are 'const' + * does not touch nsproxy refcounts + */ + +struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy, + struct nsproxy *new_nsproxy, unsigned long mask) +{ + struct namespace *old_ns; + struct uts_namespace *old_uts; + struct ipc_namespace *old_ipc; + struct nsproxy *nsproxy; + + old_ns = old_nsproxy->namespace; + old_uts = old_nsproxy->uts_ns; + old_ipc = old_nsproxy->ipc_ns; + + nsproxy = dup_namespaces(old_nsproxy); + if (!nsproxy) + goto out; + + if (mask & CLONE_NEWNS) { + nsproxy->namespace = new_nsproxy->namespace; + if (nsproxy->namespace) + get_namespace(nsproxy->namespace); + } else + old_ns = NULL; + + if (mask & CLONE_NEWUTS) { + nsproxy->uts_ns = new_nsproxy->uts_ns; + if (nsproxy->uts_ns) + get_uts_ns(nsproxy->uts_ns); + } else + old_uts = NULL; + + if (mask & CLONE_NEWIPC) { + nsproxy->ipc_ns = new_nsproxy->ipc_ns; + if (nsproxy->ipc_ns) + get_ipc_ns(nsproxy->ipc_ns); + } else + old_ipc = NULL; + + if (old_ns) + put_namespace(old_ns); + if (old_uts) + put_uts_ns(old_uts); + if (old_ipc) + put_ipc_ns(old_ipc); +out: + return nsproxy; +} + +static inline +void __vs_merge_nsproxy(struct nsproxy **ptr, + struct nsproxy *nsproxy, unsigned long mask) +{ + struct nsproxy *old = *ptr; + struct nsproxy null_proxy = { .namespace = NULL }; + + BUG_ON(!nsproxy); + + if (mask) + *ptr = vs_mix_nsproxy(old ? old : &null_proxy, + nsproxy, mask); + else { + *ptr = nsproxy; + get_nsproxy(nsproxy); + } + if (old) + put_nsproxy(old); +} + +static inline +void __vs_merge_fs(struct fs_struct **ptr, struct fs_struct *fs) +{ + struct fs_struct *old = *ptr; + + *ptr = fs; + atomic_inc(&fs->count); + if (old) + put_fs_struct(old); +} + + +int vx_enter_space(struct vx_info *vxi, unsigned long mask) +{ + struct fs_struct *fs = NULL; + struct nsproxy *nsproxy; + + if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0)) + return -EACCES; + + if (!mask) + mask = vxi->vx_nsmask; + + if ((mask & vxi->vx_nsmask) != mask) + return -EINVAL; + + nsproxy = vxi->vx_nsproxy; + if ((mask & CLONE_FS)) { + BUG_ON(!vxi->vx_fs); + fs = copy_fs_struct(vxi->vx_fs); + if (!fs) + return -ENOMEM; + } + + task_lock(current); + if (nsproxy) + __vs_merge_nsproxy(¤t->nsproxy, nsproxy, mask); + if (fs) + __vs_merge_fs(¤t->fs, fs); + task_unlock(current); + return 0; +} + + +int vx_set_space(struct vx_info *vxi, unsigned long mask) +{ + struct fs_struct *fs, *fs_copy = NULL; + struct nsproxy *nsproxy; + int ret; + + if (!mask) + mask = space_mask.mask; + + if ((mask & space_mask.mask) != mask) + return -EINVAL; + + task_lock(current); + fs = current->fs; + atomic_inc(&fs->count); + nsproxy = current->nsproxy; + get_nsproxy(nsproxy); + task_unlock(current); + + ret = -ENOMEM; + if ((mask & CLONE_FS)) { + fs_copy = copy_fs_struct(fs); + if (!fs_copy) + goto out_put; + } + + if (nsproxy) + __vs_merge_nsproxy(&vxi->vx_nsproxy, nsproxy, mask); + if (fs_copy) + __vs_merge_fs(&vxi->vx_fs, fs_copy); + vxi->vx_nsmask |= mask; + + ret = 0; +out_put: + put_fs_struct(fs); + put_nsproxy(nsproxy); + return ret; +} + + +int vc_enter_space(struct vx_info *vxi, void __user *data) +{ + struct vcmd_space_mask vc_data = { .mask = 0 }; + + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return vx_enter_space(vxi, vc_data.mask); +} + +int vc_set_space(struct vx_info *vxi, void __user *data) +{ + struct vcmd_space_mask vc_data = { .mask = 0 }; + + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return vx_set_space(vxi, vc_data.mask); +} + +int vc_get_space_mask(struct vx_info *vxi, void __user *data) +{ + if (copy_to_user(data, &space_mask, sizeof(space_mask))) + return -EFAULT; + return 0; +} + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/switch.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/switch.c --- linux-2.6.19.1/kernel/vserver/switch.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/switch.c 2006-12-05 18:15:48 +0100 @@ -0,0 +1,504 @@ +/* + * linux/kernel/vserver/switch.c + * + * Virtual Server: Syscall Switch + * + * Copyright (C) 2003-2006 Herbert Pötzl + * + * V0.01 syscall switch + * V0.02 added signal to context + * V0.03 added rlimit functions + * V0.04 added iattr, task/xid functions + * V0.05 added debug/history stuff + * V0.06 added compat32 layer + * V0.07 vcmd args and perms + * V0.08 added status commands + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +static inline +int vc_get_version(uint32_t id) +{ +#ifdef CONFIG_VSERVER_LEGACY_VERSION + if (id == 63) + return VCI_LEGACY_VERSION; +#endif + return VCI_VERSION; +} + +#include "vci_config.h" + +static inline +int vc_get_vci(uint32_t id) +{ + return vci_kernel_config(); +} + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +#ifdef CONFIG_COMPAT +#define __COMPAT(name, id, data, compat) \ + (compat) ? name ## _x32 (id, data) : name (id, data) +#else +#define __COMPAT(name, id, data, compat) \ + name (id, data) +#endif + + +static inline +long do_vcmd(uint32_t cmd, uint32_t id, + struct vx_info *vxi, struct nx_info *nxi, + void __user *data, int compat) +{ + switch (cmd) { + + case VCMD_get_version: + return vc_get_version(id); + case VCMD_get_vci: + return vc_get_vci(id); + + case VCMD_task_xid: + return vc_task_xid(id, data); + case VCMD_vx_info: + return vc_vx_info(vxi, data); + + case VCMD_task_nid: + return vc_task_nid(id, data); + case VCMD_nx_info: + return vc_nx_info(nxi, data); + + case VCMD_set_space_v0: + /* this is version 1 */ + case VCMD_set_space: + return vc_set_space(vxi, data); + + case VCMD_get_space_mask: + return vc_get_space_mask(vxi, data); + +#ifdef CONFIG_IA32_EMULATION + case VCMD_get_rlimit: + return __COMPAT(vc_get_rlimit, vxi, data, compat); + case VCMD_set_rlimit: + return __COMPAT(vc_set_rlimit, vxi, data, compat); +#else + case VCMD_get_rlimit: + return vc_get_rlimit(vxi, data); + case VCMD_set_rlimit: + return vc_set_rlimit(vxi, data); +#endif + case VCMD_get_rlimit_mask: + return vc_get_rlimit_mask(id, data); + case VCMD_reset_minmax: + return vc_reset_minmax(vxi, data); + + case VCMD_get_vhi_name: + return vc_get_vhi_name(vxi, data); + case VCMD_set_vhi_name: + return vc_set_vhi_name(vxi, data); + + case VCMD_ctx_stat: + return vc_ctx_stat(vxi, data); + case VCMD_virt_stat: + return vc_virt_stat(vxi, data); + case VCMD_sock_stat: + return vc_sock_stat(vxi, data); + case VCMD_rlimit_stat: + return vc_rlimit_stat(vxi, data); + + case VCMD_set_cflags: + return vc_set_cflags(vxi, data); + case VCMD_get_cflags: + return vc_get_cflags(vxi, data); + + case VCMD_set_ccaps_v0: + return vc_set_ccaps_v0(vxi, data); + /* this is version 1 */ + case VCMD_set_ccaps: + return vc_set_ccaps(vxi, data); + case VCMD_get_ccaps_v0: + return vc_get_ccaps_v0(vxi, data); + /* this is version 1 */ + case VCMD_get_ccaps: + return vc_get_ccaps(vxi, data); + case VCMD_set_bcaps: + return vc_set_bcaps(vxi, data); + case VCMD_get_bcaps: + return vc_get_bcaps(vxi, data); + + case VCMD_set_nflags: + return vc_set_nflags(nxi, data); + case VCMD_get_nflags: + return vc_get_nflags(nxi, data); + + case VCMD_set_ncaps: + return vc_set_ncaps(nxi, data); + case VCMD_get_ncaps: + return vc_get_ncaps(nxi, data); + +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_set_sched_v2: + return vc_set_sched_v2(vxi, data); +#endif + case VCMD_set_sched_v3: + return vc_set_sched_v3(vxi, data); + /* this is version 4 */ + case VCMD_set_sched: + return vc_set_sched(vxi, data); + + case VCMD_add_dlimit: + return __COMPAT(vc_add_dlimit, id, data, compat); + case VCMD_rem_dlimit: + return __COMPAT(vc_rem_dlimit, id, data, compat); + case VCMD_set_dlimit: + return __COMPAT(vc_set_dlimit, id, data, compat); + case VCMD_get_dlimit: + return __COMPAT(vc_get_dlimit, id, data, compat); + + case VCMD_ctx_kill: + return vc_ctx_kill(vxi, data); + + case VCMD_wait_exit: + return vc_wait_exit(vxi, data); + +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_create_context: + return vc_ctx_create(id, NULL); +#endif + + case VCMD_get_iattr: + return __COMPAT(vc_get_iattr, id, data, compat); + case VCMD_set_iattr: + return __COMPAT(vc_set_iattr, id, data, compat); + + case VCMD_enter_space_v0: + return vc_enter_space(vxi, NULL); + /* this is version 1 */ + case VCMD_enter_space: + return vc_enter_space(vxi, data); + + case VCMD_ctx_create_v0: + return vc_ctx_create(id, NULL); + case VCMD_ctx_create: + return vc_ctx_create(id, data); + case VCMD_ctx_migrate_v0: + return vc_ctx_migrate(vxi, NULL); + case VCMD_ctx_migrate: + return vc_ctx_migrate(vxi, data); + + case VCMD_net_create_v0: + return vc_net_create(id, NULL); + case VCMD_net_create: + return vc_net_create(id, data); + case VCMD_net_migrate: + return vc_net_migrate(nxi, data); + case VCMD_net_add: + return vc_net_add(nxi, data); + case VCMD_net_remove: + return vc_net_remove(nxi, data); + +#ifdef CONFIG_VSERVER_HISTORY + case VCMD_dump_history: + return vc_dump_history(id); + case VCMD_read_history: + return __COMPAT(vc_read_history, id, data, compat); +#endif +#ifdef CONFIG_VSERVER_MONITOR + case VCMD_read_monitor: + return __COMPAT(vc_read_monitor, id, data, compat); +#endif +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_new_s_context: + return vc_new_s_context(id, data); +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + case VCMD_set_ipv4root: + return vc_set_ipv4root(id, data); +#endif + default: + vxwprintk(1, "unimplemented VCMD_%02d_%d[%d]", + VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd)); + } + return -ENOSYS; +} + + +#define __VCMD(vcmd, _perm, _args, _flags) \ + case VCMD_ ## vcmd: perm = _perm; \ + args = _args; flags = _flags; break + + +#define VCA_NONE 0x00 +#define VCA_VXI 0x01 +#define VCA_NXI 0x02 + +#define VCF_NONE 0x00 +#define VCF_INFO 0x01 +#define VCF_ADMIN 0x02 +#define VCF_ARES 0x06 /* includes admin */ +#define VCF_SETUP 0x08 + + +static inline +long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat) +{ + long ret; + int permit = -1, state = 0; + int perm = -1, args = 0, flags = 0; + struct vx_info *vxi = NULL; + struct nx_info *nxi = NULL; + + switch (cmd) { + /* unpriviledged commands */ + __VCMD(get_version, 0, VCA_NONE, 0); + __VCMD(get_vci, 0, VCA_NONE, 0); + __VCMD(get_rlimit_mask, 0, VCA_NONE, 0); + __VCMD(get_space_mask, 0, VCA_NONE, 0); + + /* info commands */ + __VCMD(task_xid, 2, VCA_NONE, 0); + __VCMD(reset_minmax, 2, VCA_VXI, 0); + __VCMD(vx_info, 3, VCA_VXI, VCF_INFO); + __VCMD(get_bcaps, 3, VCA_VXI, VCF_INFO); + __VCMD(get_ccaps_v0, 3, VCA_VXI, VCF_INFO); + __VCMD(get_ccaps, 3, VCA_VXI, VCF_INFO); + __VCMD(get_cflags, 3, VCA_VXI, VCF_INFO); + __VCMD(get_vhi_name, 3, VCA_VXI, VCF_INFO); + __VCMD(get_rlimit, 3, VCA_VXI, VCF_INFO); + + __VCMD(ctx_stat, 3, VCA_VXI, VCF_INFO); + __VCMD(virt_stat, 3, VCA_VXI, VCF_INFO); + __VCMD(sock_stat, 3, VCA_VXI, VCF_INFO); + __VCMD(rlimit_stat, 3, VCA_VXI, VCF_INFO); + + __VCMD(task_nid, 2, VCA_NONE, 0); + __VCMD(nx_info, 3, VCA_NXI, VCF_INFO); + __VCMD(get_ncaps, 3, VCA_NXI, VCF_INFO); + __VCMD(get_nflags, 3, VCA_NXI, VCF_INFO); + + __VCMD(get_iattr, 2, VCA_NONE, 0); + __VCMD(get_dlimit, 3, VCA_NONE, VCF_INFO); + + /* lower admin commands */ + __VCMD(wait_exit, 4, VCA_VXI, VCF_INFO); + __VCMD(ctx_create_v0, 5, VCA_NONE, 0); + __VCMD(ctx_create, 5, VCA_NONE, 0); + __VCMD(ctx_migrate_v0, 5, VCA_VXI, VCF_ADMIN); + __VCMD(ctx_migrate, 5, VCA_VXI, VCF_ADMIN); + __VCMD(enter_space_v0, 5, VCA_VXI, VCF_ADMIN); + __VCMD(enter_space, 5, VCA_VXI, VCF_ADMIN); + + __VCMD(net_create_v0, 5, VCA_NONE, 0); + __VCMD(net_create, 5, VCA_NONE, 0); + __VCMD(net_migrate, 5, VCA_NXI, VCF_ADMIN); + + /* higher admin commands */ + __VCMD(ctx_kill, 6, VCA_VXI, VCF_ARES); + __VCMD(set_space_v0, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + __VCMD(set_space, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + + __VCMD(set_ccaps_v0, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + __VCMD(set_ccaps, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + __VCMD(set_bcaps, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + __VCMD(set_cflags, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + + __VCMD(set_vhi_name, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + __VCMD(set_rlimit, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + __VCMD(set_sched, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + __VCMD(set_sched_v2, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + __VCMD(set_sched_v3, 7, VCA_VXI, VCF_ARES|VCF_SETUP); + + __VCMD(set_ncaps, 7, VCA_NXI, VCF_ARES|VCF_SETUP); + __VCMD(set_nflags, 7, VCA_NXI, VCF_ARES|VCF_SETUP); + __VCMD(net_add, 8, VCA_NXI, VCF_ARES|VCF_SETUP); + __VCMD(net_remove, 8, VCA_NXI, VCF_ARES|VCF_SETUP); + + __VCMD(set_iattr, 7, VCA_NONE, 0); + __VCMD(set_dlimit, 7, VCA_NONE, VCF_ARES); + __VCMD(add_dlimit, 8, VCA_NONE, VCF_ARES); + __VCMD(rem_dlimit, 8, VCA_NONE, VCF_ARES); + + /* debug level admin commands */ +#ifdef CONFIG_VSERVER_HISTORY + __VCMD(dump_history, 9, VCA_NONE, 0); + __VCMD(read_history, 9, VCA_NONE, 0); +#endif +#ifdef CONFIG_VSERVER_MONITOR + __VCMD(read_monitor, 9, VCA_NONE, 0); +#endif + + /* legacy commands */ +#ifdef CONFIG_VSERVER_LEGACY + __VCMD(create_context, 5, VCA_NONE, 0); + __VCMD(new_s_context, 5, VCA_NONE, 0); +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + __VCMD(set_ipv4root, 5, VCA_NONE, 0); +#endif + default: + perm = -1; + } + + vxdprintk(VXD_CBIT(switch, 0), + "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), id, data, compat, + perm, args, flags); + + ret = -ENOSYS; + if (perm < 0) + goto out; + + state = 1; +#ifdef CONFIG_VSERVER_LEGACY + if (!capable(CAP_CONTEXT) && + /* dirty hack for capremove */ + !(cmd==VCMD_new_s_context && id==-2)) + goto out; +#else + if (!capable(CAP_CONTEXT)) + goto out; +#endif + + state = 2; + /* moved here from the individual commands */ + ret = -EPERM; + if ((perm > 1) && !capable(CAP_SYS_ADMIN)) + goto out; + + state = 3; + /* vcmd involves resource management */ + ret = -EPERM; + if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE)) + goto out; + + state = 4; + /* various legacy exceptions */ + switch (cmd) { +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_set_cflags: + case VCMD_set_ccaps_v0: + ret = 0; + if (vx_check(0, VS_WATCH)) + goto out; + break; + + case VCMD_ctx_create_v0: +#endif + /* will go away when admin is a cap */ + case VCMD_ctx_migrate_v0: + case VCMD_ctx_migrate: + if (id == 1) { + current->xid = 1; + ret = 1; + goto out; + } + break; + + /* legacy special casing */ + case VCMD_set_space_v0: + id = -1; + break; + } + + /* vcmds are fine by default */ + permit = 1; + + /* admin type vcmds require admin ... */ + if (flags & VCF_ADMIN) + permit = vx_check(0, VS_ADMIN) ? 1 : 0; + + /* ... but setup type vcmds override that */ + if (!permit && (flags & VCF_SETUP)) + permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0; + + state = 5; + ret = -EPERM; + if (!permit) + goto out; + + state = 6; + ret = -ESRCH; + if (args & VCA_VXI) { + vxi = lookup_vx_info(id); + if (!vxi) + goto out; + + if ((flags & VCF_ADMIN) && + /* special case kill for shutdown */ + (cmd != VCMD_ctx_kill) && + /* can context be administrated? */ + !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) { + ret = -EACCES; + goto out_vxi; + } + } + state = 7; + if (args & VCA_NXI) { + nxi = lookup_nx_info(id); + if (!nxi) + goto out_vxi; + + if ((flags & VCF_ADMIN) && + /* can context be administrated? */ + !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) { + ret = -EACCES; + goto out_nxi; + } + } + + state = 8; + ret = do_vcmd(cmd, id, vxi, nxi, data, compat); + +out_nxi: + if (args & VCA_NXI) + put_nx_info(nxi); +out_vxi: + if (args & VCA_VXI) + put_vx_info(vxi); +out: + vxdprintk(VXD_CBIT(switch, 1), + "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), ret, ret, state, permit); + return ret; +} + +asmlinkage long +sys_vserver(uint32_t cmd, uint32_t id, void __user *data) +{ + return do_vserver(cmd, id, data, 0); +} + +#ifdef CONFIG_COMPAT + +asmlinkage long +sys32_vserver(uint32_t cmd, uint32_t id, void __user *data) +{ + return do_vserver(cmd, id, data, 1); +} + +#endif /* CONFIG_COMPAT */ diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/sysctl.c linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/sysctl.c --- linux-2.6.19.1/kernel/vserver/sysctl.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/sysctl.c 2006-11-08 04:57:40 +0100 @@ -0,0 +1,242 @@ +/* + * kernel/vserver/sysctl.c + * + * Virtual Context Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#define CTL_VSERVER 4242 /* unused? */ + +enum { + CTL_DEBUG_ERROR = 0, + CTL_DEBUG_SWITCH = 1, + CTL_DEBUG_XID, + CTL_DEBUG_NID, + CTL_DEBUG_TAG, + CTL_DEBUG_NET, + CTL_DEBUG_LIMIT, + CTL_DEBUG_CRES, + CTL_DEBUG_DLIM, + CTL_DEBUG_QUOTA, + CTL_DEBUG_CVIRT, + CTL_DEBUG_MISC, +}; + + +unsigned int vx_debug_switch = 0; +unsigned int vx_debug_xid = 0; +unsigned int vx_debug_nid = 0; +unsigned int vx_debug_tag = 0; +unsigned int vx_debug_net = 0; +unsigned int vx_debug_limit = 0; +unsigned int vx_debug_cres = 0; +unsigned int vx_debug_dlim = 0; +unsigned int vx_debug_quota = 0; +unsigned int vx_debug_cvirt = 0; +unsigned int vx_debug_misc = 0; + + +static struct ctl_table_header *vserver_table_header; +static ctl_table vserver_table[]; + + +void vserver_register_sysctl(void) +{ + if (!vserver_table_header) { + vserver_table_header = register_sysctl_table(vserver_table, 1); + } + +} + +void vserver_unregister_sysctl(void) +{ + if (vserver_table_header) { + unregister_sysctl_table(vserver_table_header); + vserver_table_header = NULL; + } +} + + +static int proc_dodebug(ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char tmpbuf[20], *p, c; + unsigned int value; + size_t left, len; + + if ((*ppos && !write) || !*lenp) { + *lenp = 0; + return 0; + } + + left = *lenp; + + if (write) { + if (!access_ok(VERIFY_READ, buffer, left)) + return -EFAULT; + p = (char *) buffer; + while (left && __get_user(c, p) >= 0 && isspace(c)) + left--, p++; + if (!left) + goto done; + + if (left > sizeof(tmpbuf) - 1) + return -EINVAL; + if (copy_from_user(tmpbuf, p, left)) + return -EFAULT; + tmpbuf[left] = '\0'; + + for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--) + value = 10 * value + (*p - '0'); + if (*p && !isspace(*p)) + return -EINVAL; + while (left && isspace(*p)) + left--, p++; + *(unsigned int *) table->data = value; + } else { + if (!access_ok(VERIFY_WRITE, buffer, left)) + return -EFAULT; + len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data); + if (len > left) + len = left; + if (__copy_to_user(buffer, tmpbuf, len)) + return -EFAULT; + if ((left -= len) > 0) { + if (put_user('\n', (char *)buffer + len)) + return -EFAULT; + left--; + } + } + +done: + *lenp -= left; + *ppos += *lenp; + return 0; +} + + +#define CTL_ENTRY(ctl, name) \ + { \ + .ctl_name = ctl, \ + .procname = #name, \ + .data = &vx_##name, \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_dodebug \ + } + +static ctl_table debug_table[] = { + CTL_ENTRY (CTL_DEBUG_SWITCH, debug_switch), + CTL_ENTRY (CTL_DEBUG_XID, debug_xid), + CTL_ENTRY (CTL_DEBUG_NID, debug_nid), + CTL_ENTRY (CTL_DEBUG_TAG, debug_tag), + CTL_ENTRY (CTL_DEBUG_NET, debug_net), + CTL_ENTRY (CTL_DEBUG_LIMIT, debug_limit), + CTL_ENTRY (CTL_DEBUG_CRES, debug_cres), + CTL_ENTRY (CTL_DEBUG_DLIM, debug_dlim), + CTL_ENTRY (CTL_DEBUG_QUOTA, debug_quota), + CTL_ENTRY (CTL_DEBUG_CVIRT, debug_cvirt), + CTL_ENTRY (CTL_DEBUG_MISC, debug_misc), + { .ctl_name = 0 } +}; + +static ctl_table vserver_table[] = { + { + .ctl_name = CTL_VSERVER, + .procname = "vserver", + .mode = 0555, + .child = debug_table + }, + { .ctl_name = 0 } +}; + + +static match_table_t tokens = { + { CTL_DEBUG_SWITCH, "switch=%x" }, + { CTL_DEBUG_XID, "xid=%x" }, + { CTL_DEBUG_NID, "nid=%x" }, + { CTL_DEBUG_TAG, "tag=%x" }, + { CTL_DEBUG_NET, "net=%x" }, + { CTL_DEBUG_LIMIT, "limit=%x" }, + { CTL_DEBUG_CRES, "cres=%x" }, + { CTL_DEBUG_DLIM, "dlim=%x" }, + { CTL_DEBUG_QUOTA, "quota=%x" }, + { CTL_DEBUG_CVIRT, "cvirt=%x" }, + { CTL_DEBUG_MISC, "misc=%x" }, + { CTL_DEBUG_ERROR, NULL } +}; + +#define HANDLE_CASE(id, name, val) \ + case CTL_DEBUG_ ## id: \ + vx_debug_ ## name = val; \ + printk("vs_debug_" #name "=0x%x\n", val); \ + break + + +static int __init vs_debug_setup(char *str) +{ + char *p; + int token; + + printk("vs_debug_setup(%s)\n", str); + while ((p = strsep(&str, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + unsigned int value; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + value = (token>0)?simple_strtoul(args[0].from, NULL, 0):0; + + switch (token) { + HANDLE_CASE(SWITCH, switch, value); + HANDLE_CASE(XID, xid, value); + HANDLE_CASE(NID, nid, value); + HANDLE_CASE(TAG, tag, value); + HANDLE_CASE(NET, net, value); + HANDLE_CASE(LIMIT, limit, value); + HANDLE_CASE(CRES, cres, value); + HANDLE_CASE(DLIM, dlim, value); + HANDLE_CASE(QUOTA, quota, value); + HANDLE_CASE(CVIRT, cvirt, value); + HANDLE_CASE(MISC, misc, value); + default: + return -EINVAL; + break; + } + } + return 1; +} + +__setup("vsdebug=", vs_debug_setup); + + + +EXPORT_SYMBOL_GPL(vx_debug_switch); +EXPORT_SYMBOL_GPL(vx_debug_xid); +EXPORT_SYMBOL_GPL(vx_debug_nid); +EXPORT_SYMBOL_GPL(vx_debug_net); +EXPORT_SYMBOL_GPL(vx_debug_limit); +EXPORT_SYMBOL_GPL(vx_debug_cres); +EXPORT_SYMBOL_GPL(vx_debug_dlim); +EXPORT_SYMBOL_GPL(vx_debug_quota); +EXPORT_SYMBOL_GPL(vx_debug_cvirt); +EXPORT_SYMBOL_GPL(vx_debug_misc); + diff -NurpP --minimal linux-2.6.19.1/kernel/vserver/vci_config.h linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/vci_config.h --- linux-2.6.19.1/kernel/vserver/vci_config.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/kernel/vserver/vci_config.h 2006-12-09 03:51:07 +0100 @@ -0,0 +1,86 @@ + +enum { + VCI_KCBIT_NO_DYNAMIC = 0, + VCI_KCBIT_LEGACY = 1, + VCI_KCBIT_LEGACYNET = 2, + VCI_KCBIT_NGNET = 3, + + VCI_KCBIT_PROC_SECURE = 4, + VCI_KCBIT_HARDCPU = 5, + VCI_KCBIT_IDLELIMIT = 6, + VCI_KCBIT_IDLETIME = 7, + + VCI_KCBIT_COWBL = 8, + VCI_KCBIT_FULLCOWBL = 9, + VCI_KCBIT_SPACES = 10, + + VCI_KCBIT_LEGACY_VERSION = 15, + VCI_KCBIT_DEBUG = 16, + VCI_KCBIT_HISTORY = 20, + VCI_KCBIT_TAGGED = 24, +}; + + +static inline uint32_t vci_kernel_config(void) +{ + return + /* various legacy options */ +#ifndef CONFIG_VSERVER_DYNAMIC_IDS + (1 << VCI_KCBIT_NO_DYNAMIC) | +#endif +#ifdef CONFIG_VSERVER_LEGACY + (1 << VCI_KCBIT_LEGACY) | +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + (1 << VCI_KCBIT_LEGACYNET) | +#endif +#ifdef CONFIG_VSERVER_LEGACY_VERSION + (1 << VCI_KCBIT_LEGACY_VERSION) | +#endif + + /* configured features */ +#ifdef CONFIG_VSERVER_PROC_SECURE + (1 << VCI_KCBIT_PROC_SECURE) | +#endif +#ifdef CONFIG_VSERVER_HARDCPU + (1 << VCI_KCBIT_HARDCPU) | +#endif +#ifdef CONFIG_VSERVER_IDLELIMIT + (1 << VCI_KCBIT_IDLELIMIT) | +#endif +#ifdef CONFIG_VSERVER_IDLETIME + (1 << VCI_KCBIT_IDLETIME) | +#endif +#ifdef CONFIG_VSERVER_COWBL + (1 << VCI_KCBIT_COWBL) | + (1 << VCI_KCBIT_FULLCOWBL) | +#endif + (1 << VCI_KCBIT_SPACES) | + + /* debug options */ +#ifdef CONFIG_VSERVER_DEBUG + (1 << VCI_KCBIT_DEBUG) | +#endif +#ifdef CONFIG_VSERVER_HISTORY + (1 << VCI_KCBIT_HISTORY) | +#endif + + /* inode context tagging */ +#if defined(CONFIG_TAGGING_NONE) + (0 << VCI_KCBIT_TAGGED) | +#elif defined(CONFIG_TAGGING_UID16) + (1 << VCI_KCBIT_TAGGED) | +#elif defined(CONFIG_TAGGING_GID16) + (2 << VCI_KCBIT_TAGGED) | +#elif defined(CONFIG_TAGGING_ID24) + (3 << VCI_KCBIT_TAGGED) | +#elif defined(CONFIG_TAGGING_INTERN) + (4 << VCI_KCBIT_TAGGED) | +#elif defined(CONFIG_TAGGING_RUNTIME) + (5 << VCI_KCBIT_TAGGED) | +#else + (7 << VCI_KCBIT_TAGGED) | +#endif + 0; +} + diff -NurpP --minimal linux-2.6.19.1/mm/filemap.c linux-2.6.19.1-vs2.2.0-rc6/mm/filemap.c --- linux-2.6.19.1/mm/filemap.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/filemap.c 2006-11-08 22:42:35 +0100 @@ -1236,6 +1236,31 @@ int file_send_actor(read_descriptor_t * return written; } +/* FIXME: It would be as simple as this, if we had a (void __user*) to write. + * We already have a kernel buffer, so it should be even simpler, right? ;) + * + * Yes, sorta. After duplicating the complete path of generic_file_write(), + * at least some special cases could be removed, so the copy is simpler than + * the original. But it remains a copy, so overall complexity increases. + */ +static ssize_t +generic_kernel_file_write(struct file *, const char *, size_t, loff_t *); + +ssize_t generic_file_sendpage(struct file *file, struct page *page, + int offset, size_t size, loff_t *ppos, int more) +{ + ssize_t ret; + char *kaddr; + + kaddr = kmap(page); + ret = generic_kernel_file_write(file, kaddr + offset, size, ppos); + kunmap(page); + + return ret; +} + +EXPORT_SYMBOL(generic_file_sendpage); + ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos, size_t count, read_actor_t actor, void *target) { @@ -1913,6 +1938,19 @@ int remove_suid(struct dentry *dentry) } EXPORT_SYMBOL(remove_suid); +static inline size_t +filemap_copy_from_kernel(struct page *page, unsigned long offset, + const char *buf, unsigned bytes) +{ + char *kaddr; + + kaddr = kmap(page); + memcpy(kaddr + offset, buf, bytes); + kunmap(page); + + return bytes; +} + size_t __filemap_copy_from_user_iovec_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) @@ -2219,6 +2257,175 @@ zero_length_segment: } EXPORT_SYMBOL(generic_file_buffered_write); +static inline void +filemap_set_next_kvec(const struct kvec **iovp, size_t *basep, size_t bytes) +{ + const struct kvec *iov = *iovp; + size_t base = *basep; + + while (bytes) { + int copy = min(bytes, iov->iov_len - base); + + bytes -= copy; + base += copy; + if (iov->iov_len == base) { + iov++; + base = 0; + } + } + *iovp = iov; + *basep = base; +} + +/* + * TODO: + * This largely tries to copy generic_file_aio_write_nolock(), although it + * doesn't have to be nearly as generic. A real cleanup should either + * merge this into generic_file_aio_write_nolock() as well or keep it special + * and remove as much code as possible. + */ +static ssize_t +generic_kernel_file_aio_write_nolock(struct kiocb *iocb, const struct kvec*iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct file *file = iocb->ki_filp; + struct address_space * mapping = file->f_mapping; + const struct address_space_operations *a_ops = mapping->a_ops; + size_t ocount; /* original count */ + size_t count; /* after file limit checks */ + struct inode *inode = mapping->host; + long status = 0; + loff_t pos; + struct page *page; + struct page *cached_page = NULL; + const int isblk = S_ISBLK(inode->i_mode); + ssize_t written; + ssize_t err; + size_t bytes; + struct pagevec lru_pvec; + const struct kvec *cur_iov = iov; /* current kvec */ + size_t iov_base = 0; /* offset in the current kvec */ + unsigned long seg; + char *buf; + + ocount = 0; + for (seg = 0; seg < nr_segs; seg++) { + const struct kvec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + ocount += iv->iov_len; + if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) + return -EINVAL; + } + + count = ocount; + pos = *ppos; + pagevec_init(&lru_pvec, 0); + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = mapping->backing_dev_info; + written = 0; + + err = generic_write_checks(file, &pos, &count, isblk); + if (err) + goto out; + + + if (count == 0) + goto out; + + remove_suid(file->f_dentry); + file_update_time(file); + + /* There is no sane reason to use O_DIRECT */ + BUG_ON(file->f_flags & O_DIRECT); + + buf = iov->iov_base; + do { + unsigned long index; + unsigned long offset; + size_t copied; + + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) + bytes = count; + + page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); + if (!page) { + status = -ENOMEM; + break; + } + + status = a_ops->prepare_write(file, page, offset, offset+bytes); + if (unlikely(status)) { + loff_t isize = i_size_read(inode); + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. + */ + unlock_page(page); + page_cache_release(page); + if (pos + bytes > isize) + vmtruncate(inode, isize); + break; + } + + BUG_ON(nr_segs != 1); + copied = filemap_copy_from_kernel(page, offset, buf, bytes); + + flush_dcache_page(page); + status = a_ops->commit_write(file, page, offset, offset+bytes); + if (likely(copied > 0)) { + if (!status) + status = copied; + + if (status >= 0) { + written += status; + count -= status; + pos += status; + buf += status; + if (unlikely(nr_segs > 1)) + filemap_set_next_kvec(&cur_iov, + &iov_base, status); + } + } + if (unlikely(copied != bytes)) + if (status >= 0) + status = -EFAULT; + unlock_page(page); + mark_page_accessed(page); + page_cache_release(page); + if (status < 0) + break; + balance_dirty_pages_ratelimited(mapping); + cond_resched(); + } while (count); + *ppos = pos; + + if (cached_page) + page_cache_release(cached_page); + + /* + * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC + */ + if (status >= 0) { + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) + status = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + } + + err = written ? written : status; +out: + pagevec_lru_add(&lru_pvec); + current->backing_dev_info = 0; + return err; +} + static ssize_t __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) @@ -2335,6 +2542,36 @@ out: return written ? written : err; } +static ssize_t +generic_kernel_file_write_nolock(struct file *file, const struct kvec *iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct kiocb kiocb; + ssize_t ret; + + init_sync_kiocb(&kiocb, file); + ret = generic_kernel_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(&kiocb); + return ret; +} + +static ssize_t generic_kernel_file_write(struct file *file, const char *buf, + size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_mapping->host; + ssize_t err; + struct kvec local_iov = { .iov_base = (char *) buf, + .iov_len = count }; + + mutex_lock(&inode->i_mutex); + err = generic_kernel_file_write_nolock(file, &local_iov, 1, ppos); + mutex_unlock(&inode->i_mutex); + + return err; +} + + ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { diff -NurpP --minimal linux-2.6.19.1/mm/filemap_xip.c linux-2.6.19.1-vs2.2.0-rc6/mm/filemap_xip.c --- linux-2.6.19.1/mm/filemap_xip.c 2006-09-20 16:58:44 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/filemap_xip.c 2006-11-08 04:57:40 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "filemap.h" diff -NurpP --minimal linux-2.6.19.1/mm/fremap.c linux-2.6.19.1-vs2.2.0-rc6/mm/fremap.c --- linux-2.6.19.1/mm/fremap.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/fremap.c 2006-11-30 19:31:41 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -74,6 +75,8 @@ int install_page(struct mm_struct *mm, s err = -ENOMEM; if (page_mapcount(page) > INT_MAX/2) goto unlock; + if (!vx_rss_avail(mm, 1)) + goto unlock; if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) inc_mm_counter(mm, file_rss); diff -NurpP --minimal linux-2.6.19.1/mm/hugetlb.c linux-2.6.19.1-vs2.2.0-rc6/mm/hugetlb.c --- linux-2.6.19.1/mm/hugetlb.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/hugetlb.c 2006-11-08 04:57:40 +0100 @@ -19,6 +19,7 @@ #include #include +#include #include "internal.h" const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; diff -NurpP --minimal linux-2.6.19.1/mm/memory.c linux-2.6.19.1-vs2.2.0-rc6/mm/memory.c --- linux-2.6.19.1/mm/memory.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/memory.c 2006-11-30 19:31:41 +0100 @@ -498,6 +498,9 @@ static int copy_pte_range(struct mm_stru int progress = 0; int rss[2]; + if (!vx_rss_avail(dst_mm, ((end - addr)/PAGE_SIZE + 1))) + return -ENOMEM; + again: rss[1] = rss[0] = 0; dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); @@ -2011,6 +2014,11 @@ static int do_swap_page(struct mm_struct grab_swap_token(); } + if (!vx_rss_avail(mm, 1)) { + ret = VM_FAULT_OOM; + goto out; + } + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); mark_page_accessed(page); lock_page(page); @@ -2083,6 +2091,8 @@ static int do_anonymous_page(struct mm_s /* Allocate our own private page. */ pte_unmap(page_table); + if (!vx_rss_avail(mm, 1)) + goto oom; if (unlikely(anon_vma_prepare(vma))) goto oom; page = alloc_zeroed_user_highpage(vma, address); @@ -2156,6 +2166,9 @@ static int do_no_page(struct mm_struct * pte_unmap(page_table); BUG_ON(vma->vm_flags & VM_PFNMAP); + if (!vx_rss_avail(mm, 1)) + return VM_FAULT_OOM; + if (vma->vm_file) { mapping = vma->vm_file->f_mapping; sequence = mapping->truncate_count; @@ -2380,6 +2393,7 @@ static inline int handle_pte_fault(struc pte_t entry; pte_t old_entry; spinlock_t *ptl; + int ret, type = VXPT_UNKNOWN; old_entry = entry = *pte; if (!pte_present(entry)) { @@ -2408,9 +2422,12 @@ static inline int handle_pte_fault(struc if (unlikely(!pte_same(*pte, entry))) goto unlock; if (write_access) { - if (!pte_write(entry)) - return do_wp_page(mm, vma, address, + if (!pte_write(entry)) { + ret = do_wp_page(mm, vma, address, pte, pmd, ptl, entry); + type = VXPT_WRITE; + goto out; + } entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); @@ -2430,7 +2447,10 @@ static inline int handle_pte_fault(struc } unlock: pte_unmap_unlock(pte, ptl); - return VM_FAULT_MINOR; + ret = VM_FAULT_MINOR; +out: + vx_page_fault(mm, vma, type, ret); + return ret; } /* diff -NurpP --minimal linux-2.6.19.1/mm/mlock.c linux-2.6.19.1-vs2.2.0-rc6/mm/mlock.c --- linux-2.6.19.1/mm/mlock.c 2006-04-09 13:49:58 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/mlock.c 2006-11-08 04:57:47 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, @@ -65,7 +66,7 @@ success: ret = make_pages_present(start, end); } - vma->vm_mm->locked_vm -= pages; + vx_vmlocked_sub(vma->vm_mm, pages); out: if (ret == -ENOMEM) ret = -EAGAIN; @@ -123,7 +124,7 @@ static int do_mlock(unsigned long start, asmlinkage long sys_mlock(unsigned long start, size_t len) { - unsigned long locked; + unsigned long locked, grow; unsigned long lock_limit; int error = -ENOMEM; @@ -134,8 +135,10 @@ asmlinkage long sys_mlock(unsigned long len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; - locked = len >> PAGE_SHIFT; - locked += current->mm->locked_vm; + grow = len >> PAGE_SHIFT; + if (!vx_vmlocked_avail(current->mm, grow)) + goto out; + locked = current->mm->locked_vm + grow; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; @@ -143,6 +146,7 @@ asmlinkage long sys_mlock(unsigned long /* check against resource limits */ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = do_mlock(start, len, 1); +out: up_write(¤t->mm->mmap_sem); return error; } @@ -202,6 +206,8 @@ asmlinkage long sys_mlockall(int flags) lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; + if (!vx_vmlocked_avail(current->mm, current->mm->total_vm)) + goto out; if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); diff -NurpP --minimal linux-2.6.19.1/mm/mmap.c linux-2.6.19.1-vs2.2.0-rc6/mm/mmap.c --- linux-2.6.19.1/mm/mmap.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/mmap.c 2006-11-20 21:12:32 +0100 @@ -1141,10 +1141,10 @@ munmap_back: kmem_cache_free(vm_area_cachep, vma); } out: - mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } if (flags & MAP_POPULATE) { @@ -1504,9 +1504,9 @@ static int acct_stack_growth(struct vm_a return -ENOMEM; /* Ok, everything looks good - let it rip */ - mm->total_vm += grow; + vx_vmpages_add(mm, grow); if (vma->vm_flags & VM_LOCKED) - mm->locked_vm += grow; + vx_vmlocked_add(mm, grow); vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow); return 0; } @@ -1659,9 +1659,9 @@ static void remove_vma_list(struct mm_st do { long nrpages = vma_pages(vma); - mm->total_vm -= nrpages; + vx_vmpages_sub(mm, nrpages); if (vma->vm_flags & VM_LOCKED) - mm->locked_vm -= nrpages; + vx_vmlocked_sub(mm, nrpages); vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); vma = remove_vma(vma); } while (vma); @@ -1900,6 +1900,8 @@ unsigned long do_brk(unsigned long addr, lock_limit >>= PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; + if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT)) + return -ENOMEM; } /* @@ -1926,7 +1928,8 @@ unsigned long do_brk(unsigned long addr, if (mm->map_count > sysctl_max_map_count) return -ENOMEM; - if (security_vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> PAGE_SHIFT) || + !vx_vmpages_avail(mm, len >> PAGE_SHIFT)) return -ENOMEM; /* Can we just expand an old private anonymous mapping? */ @@ -1952,9 +1955,9 @@ unsigned long do_brk(unsigned long addr, (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; vma_link(mm, vma, prev, rb_link, rb_parent); out: - mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); if (flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } return addr; @@ -1980,6 +1983,11 @@ void exit_mmap(struct mm_struct *mm) free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); + set_mm_counter(mm, file_rss, 0); + set_mm_counter(mm, anon_rss, 0); + vx_vmpages_sub(mm, mm->total_vm); + vx_vmlocked_sub(mm, mm->locked_vm); + /* * Walk the list again, actually closing and freeing it, * with preemption enabled, without holding any MM locks. @@ -2019,7 +2027,8 @@ int insert_vm_struct(struct mm_struct * if (__vma && __vma->vm_start < vma->vm_end) return -ENOMEM; if ((vma->vm_flags & VM_ACCOUNT) && - security_vm_enough_memory(vma_pages(vma))) + (security_vm_enough_memory(vma_pages(vma)) || + !vx_vmpages_avail(mm, vma_pages(vma)))) return -ENOMEM; vma_link(mm, vma, prev, rb_link, rb_parent); return 0; @@ -2092,5 +2101,7 @@ int may_expand_vm(struct mm_struct *mm, if (cur + npages > lim) return 0; + if (!vx_vmpages_avail(mm, npages)) + return 0; return 1; } diff -NurpP --minimal linux-2.6.19.1/mm/mremap.c linux-2.6.19.1-vs2.2.0-rc6/mm/mremap.c --- linux-2.6.19.1/mm/mremap.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/mremap.c 2006-11-08 04:57:47 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -213,7 +214,7 @@ static unsigned long move_vma(struct vm_ * If this were a serious issue, we'd add a flag to do_munmap(). */ hiwater_vm = mm->hiwater_vm; - mm->total_vm += new_len >> PAGE_SHIFT; + vx_vmpages_add(mm, new_len >> PAGE_SHIFT); vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); if (do_munmap(mm, old_addr, old_len) < 0) { @@ -231,7 +232,7 @@ static unsigned long move_vma(struct vm_ } if (vm_flags & VM_LOCKED) { - mm->locked_vm += new_len >> PAGE_SHIFT; + vx_vmlocked_add(mm, new_len >> PAGE_SHIFT); if (new_len > old_len) make_pages_present(new_addr + old_len, new_addr + new_len); @@ -338,6 +339,9 @@ unsigned long do_mremap(unsigned long ad ret = -EAGAIN; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) goto out; + if (!vx_vmlocked_avail(current->mm, + (new_len - old_len) >> PAGE_SHIFT)) + goto out; } if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) { ret = -ENOMEM; @@ -366,10 +370,10 @@ unsigned long do_mremap(unsigned long ad vma_adjust(vma, vma->vm_start, addr + new_len, vma->vm_pgoff, NULL); - mm->total_vm += pages; + vx_vmpages_add(mm, pages); vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages); if (vma->vm_flags & VM_LOCKED) { - mm->locked_vm += pages; + vx_vmlocked_add(mm, pages); make_pages_present(addr + old_len, addr + new_len); } diff -NurpP --minimal linux-2.6.19.1/mm/nommu.c linux-2.6.19.1-vs2.2.0-rc6/mm/nommu.c --- linux-2.6.19.1/mm/nommu.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/nommu.c 2006-11-08 04:57:47 +0100 @@ -921,7 +921,7 @@ unsigned long do_mmap_pgoff(struct file realalloc += kobjsize(vma); askedalloc += sizeof(*vma); - current->mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(current->mm, len >> PAGE_SHIFT); add_nommu_vma(vma); @@ -1046,7 +1046,7 @@ int do_munmap(struct mm_struct *mm, unsi kfree(vml); update_hiwater_vm(mm); - mm->total_vm -= len >> PAGE_SHIFT; + vx_vmpages_sub(mm, len >> PAGE_SHIFT); #ifdef DEBUG show_process_blocks(); @@ -1078,7 +1078,7 @@ void exit_mmap(struct mm_struct * mm) printk("Exit_mmap:\n"); #endif - mm->total_vm = 0; + vx_vmpages_sub(mm, mm->total_vm); while ((tmp = mm->context.vmlist)) { mm->context.vmlist = tmp->next; diff -NurpP --minimal linux-2.6.19.1/mm/oom_kill.c linux-2.6.19.1-vs2.2.0-rc6/mm/oom_kill.c --- linux-2.6.19.1/mm/oom_kill.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/oom_kill.c 2006-11-30 19:33:42 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include int sysctl_panic_on_oom; /* #define DEBUG */ @@ -72,6 +73,12 @@ unsigned long badness(struct task_struct points = mm->total_vm; /* + * add points for context badness + */ + + points += vx_badness(p, mm); + + /* * After this unlock we can no longer dereference local variable `mm' */ task_unlock(p); @@ -154,8 +161,8 @@ unsigned long badness(struct task_struct } #ifdef DEBUG - printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n", - p->pid, p->comm, points); + printk(KERN_DEBUG "OOMkill: task %d:#%u (%s) got %d points\n", + p->pid, p->xid, p->comm, points); #endif return points; } @@ -279,8 +286,8 @@ static void __oom_kill_task(struct task_ } if (message) { - printk(KERN_ERR "%s: Killed process %d (%s).\n", - message, p->pid, p->comm); + printk(KERN_ERR "%s: Killed process %d:#%u (%s).\n", + message, p->pid, p->xid, p->comm); } /* @@ -341,8 +348,8 @@ static int oom_kill_process(struct task_ return 0; } - printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li" - " and children.\n", p->pid, p->comm, points); + printk(KERN_ERR "Out of Memory: Kill process %d:#%u (%s) score %li" + " and children.\n", p->pid, p->xid, p->comm, points); /* Try to kill a child first */ list_for_each(tsk, &p->children) { c = list_entry(tsk, struct task_struct, sibling); diff -NurpP --minimal linux-2.6.19.1/mm/page_alloc.c linux-2.6.19.1-vs2.2.0-rc6/mm/page_alloc.c --- linux-2.6.19.1/mm/page_alloc.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/page_alloc.c 2006-11-30 20:55:45 +0100 @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include @@ -1274,6 +1276,9 @@ void si_meminfo(struct sysinfo *val) val->totalhigh = totalhigh_pages; val->freehigh = nr_free_highpages(); val->mem_unit = PAGE_SIZE; + + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_meminfo(val); } EXPORT_SYMBOL(si_meminfo); @@ -1293,6 +1298,9 @@ void si_meminfo_node(struct sysinfo *val val->freehigh = 0; #endif val->mem_unit = PAGE_SIZE; + + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_meminfo(val); } #endif diff -NurpP --minimal linux-2.6.19.1/mm/rmap.c linux-2.6.19.1-vs2.2.0-rc6/mm/rmap.c --- linux-2.6.19.1/mm/rmap.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/rmap.c 2006-11-08 04:57:40 +0100 @@ -47,6 +47,7 @@ #include #include #include +#include #include diff -NurpP --minimal linux-2.6.19.1/mm/shmem.c linux-2.6.19.1-vs2.2.0-rc6/mm/shmem.c --- linux-2.6.19.1/mm/shmem.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/shmem.c 2006-11-08 04:57:53 +0100 @@ -55,7 +55,6 @@ #include /* This magic number is used in glibc for posix shared memory */ -#define TMPFS_MAGIC 0x01021994 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) @@ -1658,7 +1657,7 @@ static int shmem_statfs(struct dentry *d { struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); - buf->f_type = TMPFS_MAGIC; + buf->f_type = TMPFS_SUPER_MAGIC; buf->f_bsize = PAGE_CACHE_SIZE; buf->f_namelen = NAME_MAX; spin_lock(&sbinfo->stat_lock); @@ -2232,7 +2231,7 @@ static int shmem_fill_super(struct super sb->s_maxbytes = SHMEM_MAX_BYTES; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = TMPFS_MAGIC; + sb->s_magic = TMPFS_SUPER_MAGIC; sb->s_op = &shmem_ops; sb->s_time_gran = 1; #ifdef CONFIG_TMPFS_POSIX_ACL diff -NurpP --minimal linux-2.6.19.1/mm/slab.c linux-2.6.19.1-vs2.2.0-rc6/mm/slab.c --- linux-2.6.19.1/mm/slab.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/slab.c 2006-11-08 21:52:09 +0100 @@ -499,6 +499,8 @@ struct kmem_cache { #define STATS_INC_FREEMISS(x) do { } while (0) #endif +#include "slab_vs.h" + #if DEBUG /* @@ -3109,6 +3111,8 @@ static __always_inline void *__cache_all */ if (NUMA_BUILD && !objp) objp = __cache_alloc_node(cachep, flags, numa_node_id()); + + vx_slab_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); @@ -3202,6 +3206,7 @@ retry: obj = slab_get_obj(cachep, slabp, nodeid); check_slabp(cachep, slabp); + vx_slab_alloc(cachep, flags); l3->free_objects--; /* move slabp to correct slabp list: */ list_del(&slabp->list); @@ -3339,6 +3344,7 @@ static inline void __cache_free(struct k check_irq_off(); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); + vx_slab_free(cachep); if (cache_free_alien(cachep, objp)) return; diff -NurpP --minimal linux-2.6.19.1/mm/slab_vs.h linux-2.6.19.1-vs2.2.0-rc6/mm/slab_vs.h --- linux-2.6.19.1/mm/slab_vs.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/slab_vs.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,27 @@ + +#include + +#include + +static inline +void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags) +{ + int what = gfp_zone(cachep->gfpflags); + + if (!current->vx_info) + return; + + atomic_add(cachep->buffer_size, ¤t->vx_info->cacct.slab[what]); +} + +static inline +void vx_slab_free(struct kmem_cache *cachep) +{ + int what = gfp_zone(cachep->gfpflags); + + if (!current->vx_info) + return; + + atomic_sub(cachep->buffer_size, ¤t->vx_info->cacct.slab[what]); +} + diff -NurpP --minimal linux-2.6.19.1/mm/swapfile.c linux-2.6.19.1-vs2.2.0-rc6/mm/swapfile.c --- linux-2.6.19.1/mm/swapfile.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/mm/swapfile.c 2006-11-08 06:23:28 +0100 @@ -31,6 +31,8 @@ #include #include #include +#include +#include DEFINE_SPINLOCK(swap_lock); unsigned int nr_swapfiles; @@ -1667,6 +1669,8 @@ void si_swapinfo(struct sysinfo *val) val->freeswap = nr_swap_pages + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; spin_unlock(&swap_lock); + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_swapinfo(val); } /* diff -NurpP --minimal linux-2.6.19.1/net/core/dev.c linux-2.6.19.1-vs2.2.0-rc6/net/core/dev.c --- linux-2.6.19.1/net/core/dev.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/core/dev.c 2006-11-30 19:15:58 +0100 @@ -117,6 +117,8 @@ #include #include #include +#include /* remove with NXF_HIDE_NETIF */ +#include /* * The list of packet types we will receive (as opposed to discard) @@ -2051,6 +2053,9 @@ static int dev_ifconf(char __user *arg) total = 0; for (dev = dev_base; dev; dev = dev->next) { + if (vx_flags(VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, current->nx_info)) + continue; for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2111,6 +2116,10 @@ void dev_seq_stop(struct seq_file *seq, static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { + struct nx_info *nxi = current->nx_info; + + if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi)) + return; if (dev->get_stats) { struct net_device_stats *stats = dev->get_stats(dev); diff -NurpP --minimal linux-2.6.19.1/net/core/rtnetlink.c linux-2.6.19.1-vs2.2.0-rc6/net/core/rtnetlink.c --- linux-2.6.19.1/net/core/rtnetlink.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/core/rtnetlink.c 2006-11-30 19:16:31 +0100 @@ -36,6 +36,7 @@ #include #include #include +#include /* remove with NXF_HIDE_NETIF */ #include #include @@ -359,6 +360,9 @@ static int rtnl_dump_ifinfo(struct sk_bu for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; + if (vx_info_flags(skb->sk->sk_vx_info, VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, skb->sk->sk_nx_info)) + continue; if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) diff -NurpP --minimal linux-2.6.19.1/net/core/sock.c linux-2.6.19.1-vs2.2.0-rc6/net/core/sock.c --- linux-2.6.19.1/net/core/sock.c 2006-11-30 21:19:44 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/core/sock.c 2006-11-08 21:52:09 +0100 @@ -124,6 +124,9 @@ #include #include +#include +#include +#include #ifdef CONFIG_INET #include @@ -855,6 +858,8 @@ struct sock *sk_alloc(int family, gfp_t sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); } + sock_vx_init(sk); + sock_nx_init(sk); if (security_sk_alloc(sk, family, priority)) goto out_free; @@ -893,6 +898,11 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + vx_sock_dec(sk); + clr_vx_info(&sk->sk_vx_info); + sk->sk_xid = -1; + clr_nx_info(&sk->sk_nx_info); + sk->sk_nid = -1; if (sk->sk_prot_creator->slab != NULL) kmem_cache_free(sk->sk_prot_creator->slab, sk); else @@ -910,6 +920,8 @@ struct sock *sk_clone(const struct sock sock_copy(newsk, sk); /* SANITY */ + sock_vx_init(newsk); + sock_nx_init(newsk); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); @@ -955,6 +967,12 @@ struct sock *sk_clone(const struct sock newsk->sk_priority = 0; atomic_set(&newsk->sk_refcnt, 2); + set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info); + newsk->sk_xid = sk->sk_xid; + vx_sock_inc(newsk); + set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info); + newsk->sk_nid = sk->sk_nid; + /* * Increment the counter in the same struct proto as the master * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that @@ -1524,6 +1542,11 @@ void sock_init_data(struct socket *sock, sk->sk_stamp.tv_sec = -1L; sk->sk_stamp.tv_usec = -1L; + set_vx_info(&sk->sk_vx_info, current->vx_info); + sk->sk_xid = vx_current_xid(); + vx_sock_inc(sk); + set_nx_info(&sk->sk_nx_info, current->nx_info); + sk->sk_nid = nx_current_nid(); atomic_set(&sk->sk_refcnt, 1); } diff -NurpP --minimal linux-2.6.19.1/net/ipv4/af_inet.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/af_inet.c --- linux-2.6.19.1/net/ipv4/af_inet.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/af_inet.c 2006-11-08 04:57:50 +0100 @@ -114,6 +114,7 @@ #ifdef CONFIG_IP_MROUTE #include #endif +#include DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly; @@ -282,9 +283,11 @@ lookup_protocol: } err = -EPERM; + if ((protocol == IPPROTO_ICMP) && vx_ccaps(VXC_RAW_ICMP)) + goto override; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; - +override: sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; @@ -401,6 +404,10 @@ int inet_bind(struct socket *sock, struc unsigned short snum; int chk_addr_ret; int err; + __u32 s_addr; /* Address used for validation */ + __u32 s_addr1; /* Address used for socket */ + __u32 s_addr2; /* Broadcast address for the socket */ + struct nx_info *nxi = sk->sk_nx_info; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { @@ -411,7 +418,40 @@ int inet_bind(struct socket *sock, struc if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + s_addr = addr->sin_addr.s_addr; + s_addr1 = s_addr; + s_addr2 = 0xffffffffl; + + vxdprintk(VXD_CBIT(net, 3), + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d", + sk, sk->sk_nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0), + VXD_QUAD(s_addr)); + if (nxi) { + __u32 v4_bcast = nxi->v4_bcast; + __u32 ipv4root = nxi->ipv4[0]; + int nbipv4 = nxi->nbipv4; + + if (s_addr == 0) { + /* bind to any for 1-n */ + s_addr = ipv4root; + s_addr1 = (nbipv4 > 1) ? 0 : s_addr; + s_addr2 = v4_bcast; + } else if (s_addr == IPI_LOOPBACK) { + /* rewrite localhost to ipv4root */ + s_addr = ipv4root; + s_addr1 = ipv4root; + } else if (s_addr != v4_bcast) { + /* normal address bind */ + if (!addr_in_nx_info(nxi, s_addr)) + return -EADDRNOTAVAIL; + } + } + chk_addr_ret = inet_addr_type(s_addr); + + vxdprintk(VXD_CBIT(net, 3), + "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d", + sk, VXD_QUAD(s_addr), VXD_QUAD(s_addr1), VXD_QUAD(s_addr2)); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -423,7 +463,7 @@ int inet_bind(struct socket *sock, struc err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !inet->freebind && - addr->sin_addr.s_addr != INADDR_ANY && + s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -448,7 +488,8 @@ int inet_bind(struct socket *sock, struc if (sk->sk_state != TCP_CLOSE || inet->num) goto out_release_sock; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->rcv_saddr = inet->saddr = s_addr1; + inet->rcv_saddr2 = s_addr2; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->saddr = 0; /* Use device */ diff -NurpP --minimal linux-2.6.19.1/net/ipv4/devinet.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/devinet.c --- linux-2.6.19.1/net/ipv4/devinet.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/devinet.c 2006-11-30 18:53:18 +0100 @@ -58,6 +58,7 @@ #include #endif #include +#include #include #include @@ -675,6 +676,9 @@ int devinet_ioctl(unsigned int cmd, void *colon = ':'; if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { + struct nx_info *nxi = current->nx_info; + int hide_netif = vx_flags(VXF_HIDE_NETIF, 0); + if (tryaddrmatch) { /* Matthias Andree */ /* compare label and address (4.4BSD style) */ @@ -683,6 +687,8 @@ int devinet_ioctl(unsigned int cmd, void This is checked above. */ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { + if (hide_netif && !ifa_in_nx_info(ifa, nxi)) + continue; if (!strcmp(ifr.ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == ifa->ifa_address) { @@ -695,9 +701,12 @@ int devinet_ioctl(unsigned int cmd, void comparing just the label */ if (!ifa) { for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; - ifap = &ifa->ifa_next) + ifap = &ifa->ifa_next) { + if (hide_netif && !ifa_in_nx_info(ifa, nxi)) + continue; if (!strcmp(ifr.ifr_name, ifa->ifa_label)) break; + } } } @@ -848,6 +857,9 @@ static int inet_gifconf(struct net_devic goto out; for (; ifa; ifa = ifa->ifa_next) { + if (vx_flags(VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, current->nx_info)) + continue; if (!buf) { done += sizeof(ifr); continue; @@ -1164,6 +1176,7 @@ static int inet_dump_ifaddr(struct sk_bu struct net_device *dev; struct in_device *in_dev; struct in_ifaddr *ifa; + struct sock *sk = skb->sk; int s_ip_idx, s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; @@ -1181,6 +1194,9 @@ static int inet_dump_ifaddr(struct sk_bu for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { + if (sk && vx_info_flags(sk->sk_vx_info, VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, sk->sk_nx_info)) + continue; if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, diff -NurpP --minimal linux-2.6.19.1/net/ipv4/fib_hash.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/fib_hash.c --- linux-2.6.19.1/net/ipv4/fib_hash.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/fib_hash.c 2006-11-30 18:53:18 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -981,6 +982,8 @@ static unsigned fib_flag_trans(int type, return flags; } +extern int dev_in_nx_info(struct net_device *, struct nx_info *); + /* * This outputs /proc/net/route. * @@ -1011,7 +1014,8 @@ static int fib_seq_show(struct seq_file prefix = f->fn_key; mask = FZ_MASK(iter->zone); flags = fib_flag_trans(fa->fa_type, mask, fi); - if (fi) + if (fi && (!vx_flags(VXF_HIDE_NETIF, 0) || + dev_in_nx_info(fi->fib_dev, current->nx_info))) snprintf(bf, sizeof(bf), "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", fi->fib_dev ? fi->fib_dev->name : "*", prefix, diff -NurpP --minimal linux-2.6.19.1/net/ipv4/inet_connection_sock.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/inet_connection_sock.c --- linux-2.6.19.1/net/ipv4/inet_connection_sock.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/inet_connection_sock.c 2006-11-08 04:57:42 +0100 @@ -39,7 +39,6 @@ int sysctl_local_port_range[2] = { 1024, int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { - const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -52,9 +51,8 @@ int inet_csk_bind_conflict(const struct sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) + if (nx_addr_conflict(sk->sk_nx_info, + inet_rcv_saddr(sk), sk2)) break; } } diff -NurpP --minimal linux-2.6.19.1/net/ipv4/inet_diag.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/inet_diag.c --- linux-2.6.19.1/net/ipv4/inet_diag.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/inet_diag.c 2006-12-04 05:06:57 +0100 @@ -18,6 +18,7 @@ #include #include #include +// #include #include #include @@ -693,6 +694,8 @@ static int inet_diag_dump(struct sk_buff sk_for_each(sk, node, &hashinfo->listening_hash[i]) { struct inet_sock *inet = inet_sk(sk); + if (!nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT)) + continue; if (num < s_num) { num++; continue; @@ -753,6 +756,8 @@ skip_listen_ht: sk_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); + if (!nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT)) + continue; if (num < s_num) goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) @@ -777,6 +782,8 @@ next_normal: inet_twsk_for_each(tw, node, &hashinfo->ehash[i + hashinfo->ehash_size].chain) { + if (!vx_check(tw->tw_xid, VS_WATCH_P|VS_IDENT)) + continue; if (num < s_num) goto next_dying; if (r->id.idiag_sport != tw->tw_sport && diff -NurpP --minimal linux-2.6.19.1/net/ipv4/inet_hashtables.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/inet_hashtables.c --- linux-2.6.19.1/net/ipv4/inet_hashtables.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/inet_hashtables.c 2006-11-08 04:57:42 +0100 @@ -140,11 +140,10 @@ static struct sock *inet_lookup_listener const __be32 rcv_saddr = inet->rcv_saddr; int score = sk->sk_family == PF_INET ? 1 : 0; - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; + if (inet_addr_match(sk->sk_nx_info, daddr, rcv_saddr)) score += 2; - } + else + continue; if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) continue; @@ -175,7 +174,7 @@ struct sock *__inet_lookup_listener(stru const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && !sk->sk_bound_dev_if) goto sherry_cache; diff -NurpP --minimal linux-2.6.19.1/net/ipv4/raw.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/raw.c --- linux-2.6.19.1/net/ipv4/raw.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/raw.c 2006-12-04 05:07:18 +0100 @@ -78,6 +78,7 @@ #include #include #include +// #include struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE]; DEFINE_RWLOCK(raw_v4_lock); @@ -101,6 +102,27 @@ static void raw_v4_unhash(struct sock *s write_unlock_bh(&raw_v4_lock); } + +/* + * Check if a given address matches for a socket + * + * nxi: the socket's nx_info if any + * addr: to be verified address + * saddr/baddr: socket addresses + */ +static inline int raw_addr_match ( + struct nx_info *nxi, + uint32_t addr, + uint32_t saddr, + uint32_t baddr) +{ + if (addr && (saddr == addr || baddr == addr)) + return 1; + if (!saddr) + return addr_in_nx_info(nxi, addr); + return 0; +} + struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif) @@ -112,7 +134,8 @@ struct sock *__raw_v4_lookup(struct sock if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + raw_addr_match(sk->sk_nx_info, laddr, + inet->rcv_saddr, inet->rcv_saddr2) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -312,6 +335,11 @@ static int raw_send_hdrinc(struct sock * iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } + err = -EPERM; + if (!vx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) + && (!addr_in_nx_info(sk->sk_nx_info, iph->saddr))) + goto error_free; + err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) @@ -323,6 +351,7 @@ out: error_fault: err = -EFAULT; +error_free: kfree_skb(skb); error: IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); @@ -489,6 +518,12 @@ static int raw_sendmsg(struct kiocb *ioc } security_sk_classify_flow(sk, &fl); + if (sk->sk_nx_info) { + err = ip_find_src(sk->sk_nx_info, &rt, &fl); + + if (err) + goto done; + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) @@ -793,7 +828,8 @@ static struct sock *raw_get_first(struct struct hlist_node *node; sk_for_each(sk, node, &raw_v4_htable[state->bucket]) - if (sk->sk_family == PF_INET) + if (sk->sk_family == PF_INET && + nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT)) goto found; } sk = NULL; @@ -809,7 +845,8 @@ static struct sock *raw_get_next(struct sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != PF_INET); + } while (sk && (sk->sk_family != PF_INET || + !nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT))); if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { sk = sk_head(&raw_v4_htable[state->bucket]); diff -NurpP --minimal linux-2.6.19.1/net/ipv4/tcp.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/tcp.c --- linux-2.6.19.1/net/ipv4/tcp.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/tcp.c 2006-11-30 20:55:45 +0100 @@ -258,6 +258,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/net/ipv4/tcp_ipv4.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/tcp_ipv4.c --- linux-2.6.19.1/net/ipv4/tcp_ipv4.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/tcp_ipv4.c 2006-12-04 04:51:13 +0100 @@ -77,6 +77,7 @@ #include #include #include +// #include int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; @@ -1389,6 +1390,12 @@ static void *listening_get_next(struct s req = req->dl_next; while (1) { while (req) { + vxdprintk(VXD_CBIT(net, 6), + "sk,req: %p [#%d] (from %d)", req->sk, + (req->sk)?req->sk->sk_nid:0, nx_current_nid()); + if (req->sk && + !nx_check(req->sk->sk_nid, VS_WATCH_P|VS_IDENT)) + continue; if (req->rsk_ops->family == st->family) { cur = req; goto out; @@ -1413,6 +1420,10 @@ get_req: } get_sk: sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)", + sk, sk->sk_nid, nx_current_nid()); + if (!nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT)) + continue; if (sk->sk_family == st->family) { cur = sk; goto out; @@ -1464,18 +1475,26 @@ static void *established_get_first(struc read_lock(&tcp_hashinfo.ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { - if (sk->sk_family != st->family) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egf: %p [#%d] (from %d)", + sk, sk->sk_nid, nx_current_nid()); + if (!nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT)) + continue; + if (sk->sk_family != st->family) continue; - } rc = sk; goto out; } st->state = TCP_SEQ_STATE_TIME_WAIT; inet_twsk_for_each(tw, node, &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { - if (tw->tw_family != st->family) { + vxdprintk(VXD_CBIT(net, 6), + "tw: %p [#%d] (from %d)", + tw, tw->tw_nid, nx_current_nid()); + if (!nx_check(tw->tw_nid, VS_WATCH_P|VS_IDENT)) + continue; + if (tw->tw_family != st->family) continue; - } rc = tw; goto out; } @@ -1499,7 +1518,8 @@ static void *established_get_next(struct tw = cur; tw = tw_next(tw); get_tw: - while (tw && tw->tw_family != st->family) { + while (tw && (tw->tw_family != st->family || + !nx_check(tw->tw_nid, VS_WATCH_P|VS_IDENT))) { tw = tw_next(tw); } if (tw) { @@ -1523,6 +1543,11 @@ get_tw: sk = sk_next(sk); sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egn: %p [#%d] (from %d)", + sk, sk->sk_nid, nx_current_nid()); + if (!nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT)) + continue; if (sk->sk_family == st->family) goto found; } diff -NurpP --minimal linux-2.6.19.1/net/ipv4/tcp_minisocks.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/tcp_minisocks.c --- linux-2.6.19.1/net/ipv4/tcp_minisocks.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/tcp_minisocks.c 2006-11-08 04:57:42 +0100 @@ -28,6 +28,10 @@ #include #include +#include +#include +#include + #ifdef CONFIG_SYSCTL #define SYNC_INIT 0 /* let the user enable it */ #else @@ -294,6 +298,11 @@ void tcp_time_wait(struct sock *sk, int tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + tw->tw_xid = sk->sk_xid; + tw->tw_vx_info = NULL; + tw->tw_nid = sk->sk_nid; + tw->tw_nx_info = NULL; + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); diff -NurpP --minimal linux-2.6.19.1/net/ipv4/udp.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/udp.c --- linux-2.6.19.1/net/ipv4/udp.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv4/udp.c 2006-12-04 05:07:46 +0100 @@ -108,6 +108,7 @@ #include #include #include +// #include /* * Snmp MIB for the UDP layer @@ -195,6 +196,8 @@ gotit: (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + /* FIXME: nx_addr_conflict(sk->sk_nx_info, + inet_rcv_saddr(sk), sk2) && */ (*saddr_cmp)(sk, sk2) ) goto fail; } @@ -260,6 +263,11 @@ static struct sock *udp_v4_lookup_longwa if (inet->rcv_saddr != daddr) continue; score+=2; + } else if (sk->sk_nx_info) { + if (addr_in_nx_info(sk->sk_nx_info, daddr)) + score+=2; + else + continue; } if (inet->daddr) { if (inet->daddr != saddr) @@ -316,7 +324,8 @@ static inline struct sock *udp_v4_mcast_ if (inet->num != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr && + inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr) || ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; @@ -626,7 +635,20 @@ int udp_sendmsg(struct kiocb *iocb, stru .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + struct nx_info *nxi = sk->sk_nx_info; + security_sk_classify_flow(sk, &fl); + if (nxi) { + err = ip_find_src(nxi, &rt, &fl); + if (err) + goto out; + if (daddr == IPI_LOOPBACK && !vx_check(0, VS_ADMIN)) + daddr = fl.fl4_dst = nxi->ipv4[0]; +#ifdef CONFIG_VSERVER_REMAP_SADDR + if (saddr == IPI_LOOPBACK && !vx_check(0, VS_ADMIN)) + saddr = fl.fl4_src = nxi->ipv4[0]; +#endif + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; @@ -1451,8 +1473,10 @@ static struct sock *udp_get_first(struct for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; + sk_for_each(sk, node, &udp_hash[state->bucket]) { - if (sk->sk_family == state->family) + if (sk->sk_family == state->family && + nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT)) goto found; } } @@ -1469,7 +1493,8 @@ static struct sock *udp_get_next(struct sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != state->family); + } while (sk && (sk->sk_family != state->family || + !nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT))); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { sk = sk_head(&udp_hash[state->bucket]); diff -NurpP --minimal linux-2.6.19.1/net/ipv6/addrconf.c linux-2.6.19.1-vs2.2.0-rc6/net/ipv6/addrconf.c --- linux-2.6.19.1/net/ipv6/addrconf.c 2006-11-30 21:19:45 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/ipv6/addrconf.c 2006-12-08 00:31:24 +0100 @@ -2730,7 +2730,10 @@ static void if6_seq_stop(struct seq_file static int if6_seq_show(struct seq_file *seq, void *v) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; - seq_printf(seq, + + /* no ipv6 inside a vserver for now */ + if (vx_check(0, VS_ADMIN|VS_WATCH)) + seq_printf(seq, NIP6_SEQFMT " %02x %02x %02x %02x %8s\n", NIP6(ifp->addr), ifp->idev->dev->ifindex, @@ -3203,6 +3206,10 @@ static int inet6_dump_addr(struct sk_buf struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; read_lock(&dev_base_lock); @@ -3480,6 +3487,10 @@ static int inet6_dump_ifinfo(struct sk_b struct net_device *dev; struct inet6_dev *idev; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + read_lock(&dev_base_lock); for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) diff -NurpP --minimal linux-2.6.19.1/net/netlink/af_netlink.c linux-2.6.19.1-vs2.2.0-rc6/net/netlink/af_netlink.c --- linux-2.6.19.1/net/netlink/af_netlink.c 2006-11-30 21:19:46 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/netlink/af_netlink.c 2006-11-08 21:52:09 +0100 @@ -56,6 +56,9 @@ #include #include #include +#include +#include +#include #include #include diff -NurpP --minimal linux-2.6.19.1/net/socket.c linux-2.6.19.1-vs2.2.0-rc6/net/socket.c --- linux-2.6.19.1/net/socket.c 2006-11-30 21:19:46 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/socket.c 2006-12-04 05:08:37 +0100 @@ -93,6 +93,8 @@ #include #include +#include +#include static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, @@ -540,7 +542,7 @@ static inline int __sock_sendmsg(struct struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); - int err; + int err, len; si->sock = sock; si->scm = NULL; @@ -551,7 +553,22 @@ static inline int __sock_sendmsg(struct if (err) return err; - return sock->ops->sendmsg(iocb, sock, msg, size); + len = sock->ops->sendmsg(iocb, sock, msg, size); + if (sock->sk) { + if (len == size) + vx_sock_send(sock->sk, size); + else + vx_sock_fail(sock->sk, size); + } + vxdprintk(VXD_CBIT(net, 7), + "__sock_sendmsg: %p[%p,%p,%p;%d/%d]:%d/%d", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + (sock->sk)?sock->sk->sk_nid:0, + (unsigned int)size, len); + return len; } int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -589,7 +606,7 @@ int kernel_sendmsg(struct socket *sock, static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - int err; + int err, len; struct sock_iocb *si = kiocb_to_siocb(iocb); si->sock = sock; @@ -602,7 +619,18 @@ static inline int __sock_recvmsg(struct if (err) return err; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + len = sock->ops->recvmsg(iocb, sock, msg, size, flags); + if ((len >= 0) && sock->sk) + vx_sock_recv(sock->sk, len); + vxdprintk(VXD_CBIT(net, 7), + "__sock_recvmsg: %p[%p,%p,%p;%d/%d]:%d/%d", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + (sock->sk)?sock->sk->sk_nid:0, + (unsigned int)size, len); + return len; } int sock_recvmsg(struct socket *sock, struct msghdr *msg, @@ -1061,6 +1089,10 @@ static int __sock_create(int family, int if (type < 0 || type >= SOCK_MAX) return -EINVAL; + /* disable IPv6 inside vservers for now */ + if (family == PF_INET6 && !vx_check(0, VS_ADMIN)) + return -EAFNOSUPPORT; + /* Compatibility. This uglymoron is moved from INET layer to here to avoid @@ -1178,6 +1210,7 @@ asmlinkage long sys_socket(int family, i if (retval < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock->flags); retval = sock_map_fd(sock); if (retval < 0) goto out_release; @@ -1209,10 +1242,12 @@ asmlinkage long sys_socketpair(int famil err = sock_create(family, type, protocol, &sock1); if (err < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock1->flags); err = sock_create(family, type, protocol, &sock2); if (err < 0) goto out_release_1; + set_bit(SOCK_USER_SOCKET, &sock2->flags); err = sock1->ops->socketpair(sock1, sock2); if (err < 0) diff -NurpP --minimal linux-2.6.19.1/net/sunrpc/auth.c linux-2.6.19.1-vs2.2.0-rc6/net/sunrpc/auth.c --- linux-2.6.19.1/net/sunrpc/auth.c 2006-11-30 21:19:46 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/sunrpc/auth.c 2006-11-08 04:57:47 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -263,6 +264,7 @@ rpcauth_lookupcred(struct rpc_auth *auth struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, + .tag = dx_current_tag(), .group_info = current->group_info, }; struct rpc_cred *ret; @@ -282,6 +284,7 @@ rpcauth_bindcred(struct rpc_task *task) struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, + .tag = dx_current_tag(), .group_info = current->group_info, }; struct rpc_cred *ret; diff -NurpP --minimal linux-2.6.19.1/net/sunrpc/auth_unix.c linux-2.6.19.1-vs2.2.0-rc6/net/sunrpc/auth_unix.c --- linux-2.6.19.1/net/sunrpc/auth_unix.c 2006-11-30 21:19:46 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/sunrpc/auth_unix.c 2006-11-08 04:57:47 +0100 @@ -11,12 +11,14 @@ #include #include #include +#include #define NFS_NGROUPS 16 struct unx_cred { struct rpc_cred uc_base; gid_t uc_gid; + tag_t uc_tag; gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid @@ -78,6 +80,7 @@ unx_create_cred(struct rpc_auth *auth, s if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { cred->uc_uid = 0; cred->uc_gid = 0; + cred->uc_tag = dx_current_tag(); cred->uc_gids[0] = NOGROUP; } else { int groups = acred->group_info->ngroups; @@ -86,6 +89,7 @@ unx_create_cred(struct rpc_auth *auth, s cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; + cred->uc_tag = acred->tag; for (i = 0; i < groups; i++) cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) @@ -117,7 +121,8 @@ unx_match(struct auth_cred *acred, struc int groups; if (cred->uc_uid != acred->uid - || cred->uc_gid != acred->gid) + || cred->uc_gid != acred->gid + || cred->uc_tag != acred->tag) return 0; groups = acred->group_info->ngroups; @@ -143,7 +148,7 @@ unx_marshal(struct rpc_task *task, __be3 struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; __be32 *base, *hold; - int i; + int i, tag; *p++ = htonl(RPC_AUTH_UNIX); base = p++; @@ -153,9 +158,12 @@ unx_marshal(struct rpc_task *task, __be3 * Copy the UTS nodename captured when the client was created. */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); + tag = task->tk_client->cl_tag; - *p++ = htonl((u32) cred->uc_uid); - *p++ = htonl((u32) cred->uc_gid); + *p++ = htonl((u32) TAGINO_UID(tag, + cred->uc_uid, cred->uc_tag)); + *p++ = htonl((u32) TAGINO_GID(tag, + cred->uc_gid, cred->uc_tag)); hold = p++; for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) *p++ = htonl((u32) cred->uc_gids[i]); diff -NurpP --minimal linux-2.6.19.1/net/sunrpc/clnt.c linux-2.6.19.1-vs2.2.0-rc6/net/sunrpc/clnt.c --- linux-2.6.19.1/net/sunrpc/clnt.c 2006-11-30 21:19:46 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/sunrpc/clnt.c 2006-11-08 04:57:47 +0100 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -238,7 +239,9 @@ struct rpc_clnt *rpc_create(struct rpc_c clnt->cl_autobind = 1; if (args->flags & RPC_CLNT_CREATE_ONESHOT) clnt->cl_oneshot = 1; - + /* FIXME: handle RPC_CLNT_CREATE_TAGGED + if (args->flags & RPC_CLNT_CREATE_TAGGED) + clnt->cl_tag = 1; */ return clnt; } EXPORT_SYMBOL_GPL(rpc_create); diff -NurpP --minimal linux-2.6.19.1/net/unix/af_unix.c linux-2.6.19.1-vs2.2.0-rc6/net/unix/af_unix.c --- linux-2.6.19.1/net/unix/af_unix.c 2006-11-30 21:19:46 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/net/unix/af_unix.c 2006-12-04 05:08:47 +0100 @@ -116,6 +116,8 @@ #include #include #include +#include +#include int sysctl_unix_max_dgram_qlen __read_mostly = 10; @@ -252,6 +254,8 @@ static struct sock *__unix_find_socket_b sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); + if (!nx_check(s->sk_nid, VS_WATCH_P|VS_IDENT)) + continue; if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) goto found; @@ -807,7 +811,7 @@ static int unix_bind(struct socket *sock */ mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current->fs->umask); - err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); + err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0, NULL); if (err) goto out_mknod_dput; mutex_unlock(&nd.dentry->d_inode->i_mutex); diff -NurpP --minimal linux-2.6.19.1/net/x25/af_x25.c linux-2.6.19.1-vs2.2.0-rc6/net/x25/af_x25.c --- linux-2.6.19.1/net/x25/af_x25.c 2006-09-20 16:58:54 +0200 +++ linux-2.6.19.1-vs2.2.0-rc6/net/x25/af_x25.c 2006-11-08 04:57:42 +0100 @@ -501,7 +501,10 @@ static int x25_create(struct socket *soc x25 = x25_sk(sk); - sock_init_data(sock, sk); + sk->sk_socket = sock; + sk->sk_type = sock->type; + sk->sk_sleep = &sock->wait; + sock->sk = sk; x25_init_timers(sk); Files linux-2.6.19.1/scripts/kconfig/mconf and linux-2.6.19.1-vs2.2.0-rc6/scripts/kconfig/mconf differ diff -NurpP --minimal linux-2.6.19.1/security/commoncap.c linux-2.6.19.1-vs2.2.0-rc6/security/commoncap.c --- linux-2.6.19.1/security/commoncap.c 2006-11-30 21:19:47 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/security/commoncap.c 2006-11-30 19:54:52 +0100 @@ -23,10 +23,11 @@ #include #include #include +#include int cap_netlink_send(struct sock *sk, struct sk_buff *skb) { - NETLINK_CB(skb).eff_cap = current->cap_effective; + cap_t(NETLINK_CB(skb).eff_cap) = vx_mbcap(cap_effective); return 0; } @@ -44,7 +45,7 @@ EXPORT_SYMBOL(cap_netlink_recv); int cap_capable (struct task_struct *tsk, int cap) { /* Derived from include/linux/sched.h:capable. */ - if (cap_raised(tsk->cap_effective, cap)) + if (vx_cap_raised(tsk->vx_info, tsk->cap_effective, cap)) return 0; return -EPERM; } @@ -142,7 +143,8 @@ void cap_bprm_apply_creds (struct linux_ /* Derived from fs/exec.c:compute_creds. */ kernel_cap_t new_permitted, working; - new_permitted = cap_intersect (bprm->cap_permitted, cap_bset); + new_permitted = cap_intersect (bprm->cap_permitted, + vx_current_cap_bset()); working = cap_intersect (bprm->cap_inheritable, current->cap_inheritable); new_permitted = cap_combine (new_permitted, working); @@ -311,7 +313,8 @@ void cap_task_reparent_to_init (struct t int cap_syslog (int type) { - if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) + if ((type != 3 && type != 10) && + !vx_capable(CAP_SYS_ADMIN, VXC_SYSLOG)) return -EPERM; return 0; } diff -NurpP --minimal linux-2.6.19.1/security/dummy.c linux-2.6.19.1-vs2.2.0-rc6/security/dummy.c --- linux-2.6.19.1/security/dummy.c 2006-11-30 21:19:47 +0100 +++ linux-2.6.19.1-vs2.2.0-rc6/security/dummy.c 2006-12-06 05:50:27 +0100 @@ -28,6 +28,7 @@ #include #include #include +#include static int dummy_ptrace (struct task_struct *parent, struct task_struct *child) { @@ -678,7 +679,7 @@ static int dummy_sem_semop (struct sem_a static int dummy_netlink_send (struct sock *sk, struct sk_buff *skb) { - NETLINK_CB(skb).eff_cap = current->cap_effective; + cap_t(NETLINK_CB(skb).eff_cap) = vx_mbcap(cap_effective); return 0; }