diff -NurpP --minimal linux-2.6.17.13/Makefile linux-2.6.17.13-vs2.0.2.1/Makefile --- linux-2.6.17.13/Makefile 2006-09-13 18:43:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/Makefile 2006-09-13 18:44:30 +0200 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 17 -EXTRAVERSION = +EXTRAVERSION = -vs2.0.2.1-gentoo NAME=Crazed Snow-Weasel # *DOCUMENTATION* diff -NurpP --minimal linux-2.6.17.13/arch/alpha/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/alpha/Kconfig --- linux-2.6.17.13/arch/alpha/Kconfig 2006-06-18 04:51:38 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/alpha/Kconfig 2006-08-17 00:28:21 +0200 @@ -632,6 +632,8 @@ source "arch/alpha/oprofile/Kconfig" source "arch/alpha/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/alpha/kernel/entry.S linux-2.6.17.13-vs2.0.2.1/arch/alpha/kernel/entry.S --- linux-2.6.17.13/arch/alpha/kernel/entry.S 2006-04-09 13:49:39 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/alpha/kernel/entry.S 2006-08-17 00:28:21 +0200 @@ -874,24 +874,15 @@ sys_getxgid: .globl sys_getxpid .ent sys_getxpid sys_getxpid: + lda $sp, -16($sp) + stq $26, 0($sp) .prologue 0 - ldq $2, TI_TASK($8) - /* See linux/kernel/timer.c sys_getppid for discussion - about this loop. */ - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - ldl $0, TASK_TGID($2) -1: ldl $1, TASK_TGID($4) -#ifdef CONFIG_SMP - mov $4, $5 - mb - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - cmpeq $4, $5, $5 - beq $5, 1b -#endif - stq $1, 80($sp) + lda $16, 96($sp) + jsr $26, do_getxpid + ldq $26, 0($sp) + + lda $sp, 16($sp) ret .end sys_getxpid diff -NurpP --minimal linux-2.6.17.13/arch/alpha/kernel/osf_sys.c linux-2.6.17.13-vs2.0.2.1/arch/alpha/kernel/osf_sys.c --- linux-2.6.17.13/arch/alpha/kernel/osf_sys.c 2006-06-18 04:51:38 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/alpha/kernel/osf_sys.c 2006-08-17 00:28:21 +0200 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -399,18 +400,20 @@ asmlinkage int osf_utsname(char __user *name) { int error; + struct new_utsname *ptr; down_read(&uts_sem); + ptr = vx_new_utsname(); error = -EFAULT; - if (copy_to_user(name + 0, system_utsname.sysname, 32)) + if (copy_to_user(name + 0, ptr->sysname, 32)) goto out; - if (copy_to_user(name + 32, system_utsname.nodename, 32)) + if (copy_to_user(name + 32, ptr->nodename, 32)) goto out; - if (copy_to_user(name + 64, system_utsname.release, 32)) + if (copy_to_user(name + 64, ptr->release, 32)) goto out; - if (copy_to_user(name + 96, system_utsname.version, 32)) + if (copy_to_user(name + 96, ptr->version, 32)) goto out; - if (copy_to_user(name + 128, system_utsname.machine, 32)) + if (copy_to_user(name + 128, ptr->machine, 32)) goto out; error = 0; @@ -439,6 +442,7 @@ osf_getdomainname(char __user *name, int { unsigned len; int i; + char *domainname; if (!access_ok(VERIFY_WRITE, name, namelen)) return -EFAULT; @@ -448,9 +452,10 @@ osf_getdomainname(char __user *name, int len = 32; down_read(&uts_sem); + domainname = vx_new_uts(domainname); for (i = 0; i < len; ++i) { - __put_user(system_utsname.domainname[i], name + i); - if (system_utsname.domainname[i] == '\0') + __put_user(domainname[i], name + i); + if (domainname[i] == '\0') break; } up_read(&uts_sem); @@ -607,30 +612,30 @@ osf_sigstack(struct sigstack __user *uss asmlinkage long osf_sysinfo(int command, char __user *buf, long count) { - static char * sysinfo_table[] = { - system_utsname.sysname, - system_utsname.nodename, - system_utsname.release, - system_utsname.version, - system_utsname.machine, - "alpha", /* instruction set architecture */ - "dummy", /* hardware serial number */ - "dummy", /* hardware manufacturer */ - "dummy", /* secure RPC domain */ - }; unsigned long offset; char *res; long len, err = -EINVAL; offset = command-1; - if (offset >= sizeof(sysinfo_table)/sizeof(char *)) { + if (offset >= 9) { /* Digital UNIX has a few unpublished interfaces here */ printk("sysinfo(%d)", command); goto out; } down_read(&uts_sem); - res = sysinfo_table[offset]; + switch (offset) + { + case 0: res = vx_new_uts(sysname); break; + case 1: res = vx_new_uts(nodename); break; + case 2: res = vx_new_uts(release); break; + case 3: res = vx_new_uts(version); break; + case 4: res = vx_new_uts(machine); break; + case 5: res = "alpha"; break; + default: + res = "dummy"; + break; + } len = strlen(res)+1; if (len > count) len = count; diff -NurpP --minimal linux-2.6.17.13/arch/alpha/kernel/ptrace.c linux-2.6.17.13-vs2.0.2.1/arch/alpha/kernel/ptrace.c --- linux-2.6.17.13/arch/alpha/kernel/ptrace.c 2006-04-09 13:49:39 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/alpha/kernel/ptrace.c 2006-08-17 00:28:21 +0200 @@ -283,6 +283,11 @@ do_sys_ptrace(long request, long pid, lo goto out_notsk; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + ret = -EPERM; + goto out; + } + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -NurpP --minimal linux-2.6.17.13/arch/alpha/kernel/systbls.S linux-2.6.17.13-vs2.0.2.1/arch/alpha/kernel/systbls.S --- linux-2.6.17.13/arch/alpha/kernel/systbls.S 2005-08-29 22:24:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/alpha/kernel/systbls.S 2006-08-17 00:28:21 +0200 @@ -447,7 +447,7 @@ sys_call_table: .quad sys_stat64 /* 425 */ .quad sys_lstat64 .quad sys_fstat64 - .quad sys_ni_syscall /* sys_vserver */ + .quad sys_vserver /* sys_vserver */ .quad sys_ni_syscall /* sys_mbind */ .quad sys_ni_syscall /* sys_get_mempolicy */ .quad sys_ni_syscall /* sys_set_mempolicy */ diff -NurpP --minimal linux-2.6.17.13/arch/alpha/mm/init.c linux-2.6.17.13-vs2.0.2.1/arch/alpha/mm/init.c --- linux-2.6.17.13/arch/alpha/mm/init.c 2006-06-18 04:51:38 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/alpha/mm/init.c 2006-08-17 00:28:21 +0200 @@ -21,6 +21,7 @@ #include #include /* max_low_pfn */ #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/arch/arm/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/arm/Kconfig --- linux-2.6.17.13/arch/arm/Kconfig 2006-06-18 04:51:38 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/arm/Kconfig 2006-08-17 00:28:21 +0200 @@ -873,6 +873,8 @@ source "arch/arm/oprofile/Kconfig" source "arch/arm/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/arm/kernel/calls.S linux-2.6.17.13-vs2.0.2.1/arch/arm/kernel/calls.S --- linux-2.6.17.13/arch/arm/kernel/calls.S 2006-02-18 14:39:40 +0100 +++ linux-2.6.17.13-vs2.0.2.1/arch/arm/kernel/calls.S 2006-08-17 00:28:21 +0200 @@ -322,7 +322,7 @@ /* 310 */ CALL(sys_request_key) CALL(sys_keyctl) CALL(ABI(sys_semtimedop, sys_oabi_semtimedop)) -/* vserver */ CALL(sys_ni_syscall) + CALL(sys_vserver) CALL(sys_ioprio_set) /* 315 */ CALL(sys_ioprio_get) CALL(sys_inotify_init) diff -NurpP --minimal linux-2.6.17.13/arch/arm26/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/arm26/Kconfig --- linux-2.6.17.13/arch/arm26/Kconfig 2006-06-18 04:51:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/arm26/Kconfig 2006-08-17 00:28:21 +0200 @@ -234,6 +234,8 @@ source "drivers/usb/Kconfig" source "arch/arm26/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/arm26/kernel/calls.S linux-2.6.17.13-vs2.0.2.1/arch/arm26/kernel/calls.S --- linux-2.6.17.13/arch/arm26/kernel/calls.S 2005-03-02 12:38:19 +0100 +++ linux-2.6.17.13-vs2.0.2.1/arch/arm26/kernel/calls.S 2006-08-17 00:28:21 +0200 @@ -257,6 +257,11 @@ __syscall_start: .long sys_lremovexattr .long sys_fremovexattr .long sys_tkill + + .rept 313 - (. - __syscall_start) / 4 + .long sys_ni_syscall + .endr + .long sys_vserver /* 313 */ __syscall_end: .rept NR_syscalls - (__syscall_end - __syscall_start) / 4 diff -NurpP --minimal linux-2.6.17.13/arch/arm26/kernel/traps.c linux-2.6.17.13-vs2.0.2.1/arch/arm26/kernel/traps.c --- linux-2.6.17.13/arch/arm26/kernel/traps.c 2006-06-18 04:51:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/arm26/kernel/traps.c 2006-08-17 00:28:21 +0200 @@ -186,8 +186,9 @@ NORET_TYPE void die(const char *str, str printk("Internal error: %s: %x\n", str, err); printk("CPU: %d\n", smp_processor_id()); show_regs(regs); - printk("Process %s (pid: %d, stack limit = 0x%p)\n", - current->comm, current->pid, end_of_stack(tsk)); + printk("Process %s (pid: %d[#%u], stack limit = 0x%p)\n", + current->comm, current->pid, + current->xid, end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { __dump_stack(tsk, (unsigned long)(regs + 1)); diff -NurpP --minimal linux-2.6.17.13/arch/cris/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/cris/Kconfig --- linux-2.6.17.13/arch/cris/Kconfig 2006-06-18 04:51:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/cris/Kconfig 2006-08-17 00:28:21 +0200 @@ -181,6 +181,8 @@ source "drivers/usb/Kconfig" source "arch/cris/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/frv/mm/mmu-context.c linux-2.6.17.13-vs2.0.2.1/arch/frv/mm/mmu-context.c --- linux-2.6.17.13/arch/frv/mm/mmu-context.c 2006-06-18 04:51:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/frv/mm/mmu-context.c 2006-08-17 00:28:21 +0200 @@ -11,6 +11,7 @@ #include #include +#include #include #define NR_CXN 4096 diff -NurpP --minimal linux-2.6.17.13/arch/h8300/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/h8300/Kconfig --- linux-2.6.17.13/arch/h8300/Kconfig 2006-06-18 04:51:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/h8300/Kconfig 2006-08-17 00:28:21 +0200 @@ -199,6 +199,8 @@ source "fs/Kconfig" source "arch/h8300/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/i386/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/i386/Kconfig --- linux-2.6.17.13/arch/i386/Kconfig 2006-09-13 18:43:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/i386/Kconfig 2006-08-17 00:28:21 +0200 @@ -1088,6 +1088,8 @@ endmenu source "arch/i386/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/i386/kernel/sys_i386.c linux-2.6.17.13-vs2.0.2.1/arch/i386/kernel/sys_i386.c --- linux-2.6.17.13/arch/i386/kernel/sys_i386.c 2006-06-18 04:51:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/i386/kernel/sys_i386.c 2006-08-17 00:28:21 +0200 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -210,7 +211,7 @@ asmlinkage int sys_uname(struct old_utsn if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } @@ -218,6 +219,7 @@ asmlinkage int sys_uname(struct old_utsn asmlinkage int sys_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -226,15 +228,16 @@ asmlinkage int sys_olduname(struct oldol down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error |= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error |= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error |= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error |= __put_user(0,name->machine+__OLD_UTS_LEN); up_read(&uts_sem); diff -NurpP --minimal linux-2.6.17.13/arch/i386/kernel/syscall_table.S linux-2.6.17.13-vs2.0.2.1/arch/i386/kernel/syscall_table.S --- linux-2.6.17.13/arch/i386/kernel/syscall_table.S 2006-06-18 04:51:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/i386/kernel/syscall_table.S 2006-08-17 00:28:21 +0200 @@ -272,7 +272,7 @@ ENTRY(sys_call_table) .long sys_tgkill /* 270 */ .long sys_utimes .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ + .long sys_vserver .long sys_mbind .long sys_get_mempolicy .long sys_set_mempolicy diff -NurpP --minimal linux-2.6.17.13/arch/i386/kernel/traps.c linux-2.6.17.13-vs2.0.2.1/arch/i386/kernel/traps.c --- linux-2.6.17.13/arch/i386/kernel/traps.c 2006-06-18 04:51:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/i386/kernel/traps.c 2006-08-17 00:28:21 +0200 @@ -53,6 +53,7 @@ #include #include +#include #include "mach_traps.h" @@ -268,8 +269,9 @@ void show_registers(struct pt_regs *regs regs->esi, regs->edi, regs->ebp, esp); printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)", - current->comm, current->pid, current_thread_info(), current); + printk(KERN_EMERG "Process %s (pid: %d[#%u], threadinfo=%p task=%p)", + current->comm, current->pid, current->xid, + current_thread_info(), current); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. @@ -351,6 +353,8 @@ void die(const char * str, struct pt_reg oops_enter(); + vxh_throw_oops(); + if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); spin_lock_irqsave(&die.lock, flags); @@ -387,9 +391,9 @@ void die(const char * str, struct pt_reg if (nl) printk("\n"); if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) != - NOTIFY_STOP) { + current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { show_registers(regs); + vxh_dump_history(); /* Executive summary in case the oops scrolled away */ esp = (unsigned long) (®s->esp); savesegment(ss, ss); diff -NurpP --minimal linux-2.6.17.13/arch/ia64/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/ia64/Kconfig --- linux-2.6.17.13/arch/ia64/Kconfig 2006-09-13 18:43:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/ia64/Kconfig 2006-08-17 00:28:21 +0200 @@ -506,6 +506,8 @@ endmenu source "arch/ia64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/ia64/ia32/binfmt_elf32.c linux-2.6.17.13-vs2.0.2.1/arch/ia64/ia32/binfmt_elf32.c --- linux-2.6.17.13/arch/ia64/ia32/binfmt_elf32.c 2006-06-18 04:51:55 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/ia64/ia32/binfmt_elf32.c 2006-08-17 00:28:21 +0200 @@ -239,7 +239,8 @@ ia32_setup_arg_pages (struct linux_binpr kmem_cache_free(vm_area_cachep, mpnt); return ret; } - current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(current->mm, current->mm->total_vm - vma_pages(mpnt)); + current->mm->stack_vm = current->mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.17.13/arch/ia64/ia32/ia32_entry.S linux-2.6.17.13-vs2.0.2.1/arch/ia64/ia32/ia32_entry.S --- linux-2.6.17.13/arch/ia64/ia32/ia32_entry.S 2006-06-18 04:51:55 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/ia64/ia32/ia32_entry.S 2006-08-17 00:28:21 +0200 @@ -483,7 +483,7 @@ ia32_syscall_table: data8 sys_tgkill /* 270 */ data8 compat_sys_utimes data8 sys32_fadvise64_64 - data8 sys_ni_syscall + data8 sys32_vserver data8 sys_ni_syscall data8 sys_ni_syscall /* 275 */ data8 sys_ni_syscall diff -NurpP --minimal linux-2.6.17.13/arch/ia64/kernel/entry.S linux-2.6.17.13-vs2.0.2.1/arch/ia64/kernel/entry.S --- linux-2.6.17.13/arch/ia64/kernel/entry.S 2006-06-18 04:51:55 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/ia64/kernel/entry.S 2006-08-17 00:28:21 +0200 @@ -1577,7 +1577,7 @@ sys_call_table: data8 sys_mq_notify data8 sys_mq_getsetattr data8 sys_ni_syscall // reserved for kexec_load - data8 sys_ni_syscall // reserved for vserver + data8 sys_vserver data8 sys_waitid // 1270 data8 sys_add_key data8 sys_request_key diff -NurpP --minimal linux-2.6.17.13/arch/ia64/kernel/perfmon.c linux-2.6.17.13-vs2.0.2.1/arch/ia64/kernel/perfmon.c --- linux-2.6.17.13/arch/ia64/kernel/perfmon.c 2006-06-18 04:51:56 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/ia64/kernel/perfmon.c 2006-08-17 00:28:21 +0200 @@ -41,6 +41,8 @@ #include #include #include +#include +#include #include #include @@ -2355,7 +2357,7 @@ pfm_smpl_buffer_alloc(struct task_struct */ insert_vm_struct(mm, vma); - mm->total_vm += size >> PAGE_SHIFT; + vx_vmpages_add(mm, size >> PAGE_SHIFT); vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, vma_pages(vma)); up_write(&task->mm->mmap_sem); diff -NurpP --minimal linux-2.6.17.13/arch/ia64/kernel/ptrace.c linux-2.6.17.13-vs2.0.2.1/arch/ia64/kernel/ptrace.c --- linux-2.6.17.13/arch/ia64/kernel/ptrace.c 2006-06-18 04:51:56 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/ia64/kernel/ptrace.c 2006-08-17 00:28:21 +0200 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -1443,6 +1444,9 @@ sys_ptrace (long request, pid_t pid, uns read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; + ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; diff -NurpP --minimal linux-2.6.17.13/arch/ia64/mm/fault.c linux-2.6.17.13-vs2.0.2.1/arch/ia64/mm/fault.c --- linux-2.6.17.13/arch/ia64/mm/fault.c 2006-06-18 04:51:56 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/ia64/mm/fault.c 2006-08-17 00:28:21 +0200 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/arch/ia64/sn/kernel/xpc_main.c linux-2.6.17.13-vs2.0.2.1/arch/ia64/sn/kernel/xpc_main.c --- linux-2.6.17.13/arch/ia64/sn/kernel/xpc_main.c 2006-09-13 18:43:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/ia64/sn/kernel/xpc_main.c 2006-09-12 17:57:43 +0200 @@ -108,6 +108,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 0644, NULL, &proc_dointvec_minmax, + NULL, &sysctl_intvec, NULL, &xpc_hb_min_interval, @@ -121,6 +122,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 0644, NULL, &proc_dointvec_minmax, + NULL, &sysctl_intvec, NULL, &xpc_hb_check_min_interval, @@ -145,6 +147,7 @@ static ctl_table xpc_sys_xpc_dir[] = { 0644, NULL, &proc_dointvec_minmax, + NULL, &sysctl_intvec, NULL, &xpc_disengage_request_min_timelimit, diff -NurpP --minimal linux-2.6.17.13/arch/m32r/kernel/sys_m32r.c linux-2.6.17.13-vs2.0.2.1/arch/m32r/kernel/sys_m32r.c --- linux-2.6.17.13/arch/m32r/kernel/sys_m32r.c 2006-04-09 13:49:43 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/m32r/kernel/sys_m32r.c 2006-08-17 00:28:21 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -206,7 +207,7 @@ asmlinkage int sys_uname(struct old_utsn if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } diff -NurpP --minimal linux-2.6.17.13/arch/m68k/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/m68k/Kconfig --- linux-2.6.17.13/arch/m68k/Kconfig 2006-06-18 04:51:57 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/m68k/Kconfig 2006-08-17 00:28:21 +0200 @@ -654,6 +654,8 @@ source "fs/Kconfig" source "arch/m68k/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/m68k/kernel/ptrace.c linux-2.6.17.13-vs2.0.2.1/arch/m68k/kernel/ptrace.c --- linux-2.6.17.13/arch/m68k/kernel/ptrace.c 2006-01-03 17:29:10 +0100 +++ linux-2.6.17.13-vs2.0.2.1/arch/m68k/kernel/ptrace.c 2006-08-17 00:28:21 +0200 @@ -280,6 +280,8 @@ long arch_ptrace(struct task_struct *chi ret = ptrace_request(child, request, addr, data); break; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; return ret; out_eio: diff -NurpP --minimal linux-2.6.17.13/arch/m68knommu/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/m68knommu/Kconfig --- linux-2.6.17.13/arch/m68knommu/Kconfig 2006-06-18 04:51:58 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/m68knommu/Kconfig 2006-08-17 00:28:21 +0200 @@ -654,6 +654,8 @@ source "fs/Kconfig" source "arch/m68knommu/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/mips/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/mips/Kconfig --- linux-2.6.17.13/arch/mips/Kconfig 2006-06-18 04:51:58 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/Kconfig 2006-08-17 00:28:21 +0200 @@ -1852,6 +1852,8 @@ source "arch/mips/oprofile/Kconfig" source "arch/mips/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/mips/kernel/linux32.c linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/linux32.c --- linux-2.6.17.13/arch/mips/kernel/linux32.c 2006-06-18 04:52:06 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/linux32.c 2006-08-17 00:28:21 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -1040,7 +1041,7 @@ asmlinkage long sys32_newuname(struct ne int ret = 0; down_read(&uts_sem); - if (copy_to_user(name,&system_utsname,sizeof *name)) + if (copy_to_user(name, vx_new_utsname(), sizeof *name)) ret = -EFAULT; up_read(&uts_sem); diff -NurpP --minimal linux-2.6.17.13/arch/mips/kernel/mips-mt.c linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/mips-mt.c --- linux-2.6.17.13/arch/mips/kernel/mips-mt.c 2006-06-18 04:52:06 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/mips-mt.c 2006-08-17 00:28:21 +0200 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/arch/mips/kernel/ptrace.c linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/ptrace.c --- linux-2.6.17.13/arch/mips/kernel/ptrace.c 2006-06-18 04:52:06 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/ptrace.c 2006-08-17 00:28:21 +0200 @@ -490,6 +490,8 @@ asmlinkage void do_syscall_trace(struct goto out; if (!test_thread_flag(TIF_SYSCALL_TRACE)) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; /* The 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ diff -NurpP --minimal linux-2.6.17.13/arch/mips/kernel/scall32-o32.S linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/scall32-o32.S --- linux-2.6.17.13/arch/mips/kernel/scall32-o32.S 2006-06-18 04:52:06 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/scall32-o32.S 2006-08-17 00:28:21 +0200 @@ -618,7 +618,7 @@ einval: li v0, -EINVAL sys sys_mq_timedreceive 5 sys sys_mq_notify 2 /* 4275 */ sys sys_mq_getsetattr 3 - sys sys_ni_syscall 0 /* sys_vserver */ + sys sys_vserver 3 sys sys_waitid 5 sys sys_ni_syscall 0 /* available, was setaltroot */ sys sys_add_key 5 /* 4280 */ diff -NurpP --minimal linux-2.6.17.13/arch/mips/kernel/scall64-64.S linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/scall64-64.S --- linux-2.6.17.13/arch/mips/kernel/scall64-64.S 2006-06-18 04:52:06 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/scall64-64.S 2006-08-17 00:28:21 +0200 @@ -433,7 +433,7 @@ sys_call_table: PTR sys_mq_timedreceive PTR sys_mq_notify PTR sys_mq_getsetattr /* 5235 */ - PTR sys_ni_syscall /* sys_vserver */ + PTR sys_vserver PTR sys_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key diff -NurpP --minimal linux-2.6.17.13/arch/mips/kernel/scall64-n32.S linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/scall64-n32.S --- linux-2.6.17.13/arch/mips/kernel/scall64-n32.S 2006-06-18 04:52:06 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/scall64-n32.S 2006-08-17 00:28:21 +0200 @@ -359,7 +359,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* 6240, sys_vserver */ + PTR sys32_vserver /* 6240 */ PTR sysn32_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key diff -NurpP --minimal linux-2.6.17.13/arch/mips/kernel/scall64-o32.S linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/scall64-o32.S --- linux-2.6.17.13/arch/mips/kernel/scall64-o32.S 2006-06-18 04:52:06 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/scall64-o32.S 2006-08-17 00:28:21 +0200 @@ -481,7 +481,7 @@ sys_call_table: PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify /* 4275 */ PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* sys_vserver */ + PTR sys32_vserver PTR sys32_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key /* 4280 */ diff -NurpP --minimal linux-2.6.17.13/arch/mips/kernel/syscall.c linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/syscall.c --- linux-2.6.17.13/arch/mips/kernel/syscall.c 2006-06-18 04:52:06 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/syscall.c 2006-08-17 00:28:21 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -232,7 +233,7 @@ out: */ asmlinkage int sys_uname(struct old_utsname __user * name) { - if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name))) return 0; return -EFAULT; } @@ -243,21 +244,23 @@ asmlinkage int sys_uname(struct old_utsn asmlinkage int sys_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) return -EFAULT; - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error = __put_user(0,name->machine+__OLD_UTS_LEN); error = error ? -EFAULT : 0; diff -NurpP --minimal linux-2.6.17.13/arch/mips/kernel/sysirix.c linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/sysirix.c --- linux-2.6.17.13/arch/mips/kernel/sysirix.c 2006-06-18 04:52:06 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/mips/kernel/sysirix.c 2006-08-17 00:28:21 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -884,7 +885,7 @@ asmlinkage int irix_getdomainname(char _ down_read(&uts_sem); if (len > __NEW_UTS_LEN) len = __NEW_UTS_LEN; - err = copy_to_user(name, system_utsname.domainname, len) ? -EFAULT : 0; + err = copy_to_user(name, vx_new_uts(domainname), len) ? -EFAULT : 0; up_read(&uts_sem); return err; @@ -1127,11 +1128,11 @@ struct iuname { asmlinkage int irix_uname(struct iuname __user *buf) { down_read(&uts_sem); - if (copy_from_user(system_utsname.sysname, buf->sysname, 65) - || copy_from_user(system_utsname.nodename, buf->nodename, 65) - || copy_from_user(system_utsname.release, buf->release, 65) - || copy_from_user(system_utsname.version, buf->version, 65) - || copy_from_user(system_utsname.machine, buf->machine, 65)) { + if (copy_from_user(vx_new_uts(sysname), buf->sysname, 65) + || copy_from_user(vx_new_uts(nodename), buf->nodename, 65) + || copy_from_user(vx_new_uts(release), buf->release, 65) + || copy_from_user(vx_new_uts(version), buf->version, 65) + || copy_from_user(vx_new_uts(machine), buf->machine, 65)) { return -EFAULT; } up_read(&uts_sem); diff -NurpP --minimal linux-2.6.17.13/arch/parisc/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/parisc/Kconfig --- linux-2.6.17.13/arch/parisc/Kconfig 2006-06-18 04:52:14 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/parisc/Kconfig 2006-08-17 00:28:21 +0200 @@ -253,6 +253,8 @@ source "arch/parisc/oprofile/Kconfig" source "arch/parisc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/parisc/hpux/sys_hpux.c linux-2.6.17.13-vs2.0.2.1/arch/parisc/hpux/sys_hpux.c --- linux-2.6.17.13/arch/parisc/hpux/sys_hpux.c 2006-02-15 13:54:11 +0100 +++ linux-2.6.17.13-vs2.0.2.1/arch/parisc/hpux/sys_hpux.c 2006-08-17 00:28:21 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -266,15 +267,15 @@ static int hpux_uname(struct hpux_utsnam down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,HPUX_UTSLEN-1); + error = __copy_to_user(&name->sysname,vx_new_uts(sysname),HPUX_UTSLEN-1); error |= __put_user(0,name->sysname+HPUX_UTSLEN-1); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename,HPUX_UTSLEN-1); + error |= __copy_to_user(&name->nodename,vx_new_uts(nodename),HPUX_UTSLEN-1); error |= __put_user(0,name->nodename+HPUX_UTSLEN-1); - error |= __copy_to_user(&name->release,&system_utsname.release,HPUX_UTSLEN-1); + error |= __copy_to_user(&name->release,vx_new_uts(release),HPUX_UTSLEN-1); error |= __put_user(0,name->release+HPUX_UTSLEN-1); - error |= __copy_to_user(&name->version,&system_utsname.version,HPUX_UTSLEN-1); + error |= __copy_to_user(&name->version,vx_new_uts(version),HPUX_UTSLEN-1); error |= __put_user(0,name->version+HPUX_UTSLEN-1); - error |= __copy_to_user(&name->machine,&system_utsname.machine,HPUX_UTSLEN-1); + error |= __copy_to_user(&name->machine,vx_new_uts(machine),HPUX_UTSLEN-1); error |= __put_user(0,name->machine+HPUX_UTSLEN-1); up_read(&uts_sem); @@ -373,8 +374,8 @@ int hpux_utssys(char *ubuf, int n, int t /* TODO: print a warning about using this? */ down_write(&uts_sem); error = -EFAULT; - if (!copy_from_user(system_utsname.sysname, ubuf, len)) { - system_utsname.sysname[len] = 0; + if (!copy_from_user(vx_new_uts(sysname), ubuf, len)) { + vx_new_uts(sysname)[len] = 0; error = 0; } up_write(&uts_sem); @@ -400,8 +401,8 @@ int hpux_utssys(char *ubuf, int n, int t /* TODO: print a warning about this? */ down_write(&uts_sem); error = -EFAULT; - if (!copy_from_user(system_utsname.release, ubuf, len)) { - system_utsname.release[len] = 0; + if (!copy_from_user(vx_new_uts(release), ubuf, len)) { + vx_new_uts(release)[len] = 0; error = 0; } up_write(&uts_sem); @@ -422,13 +423,13 @@ int hpux_getdomainname(char *name, int l down_read(&uts_sem); - nlen = strlen(system_utsname.domainname) + 1; + nlen = strlen(vx_new_uts(domainname)) + 1; if (nlen < len) len = nlen; if(len > __NEW_UTS_LEN) goto done; - if(copy_to_user(name, system_utsname.domainname, len)) + if(copy_to_user(name, vx_new_uts(domainname), len)) goto done; err = 0; done: diff -NurpP --minimal linux-2.6.17.13/arch/parisc/kernel/sys_parisc32.c linux-2.6.17.13-vs2.0.2.1/arch/parisc/kernel/sys_parisc32.c --- linux-2.6.17.13/arch/parisc/kernel/sys_parisc32.c 2006-06-18 04:52:15 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/parisc/kernel/sys_parisc32.c 2006-08-17 00:28:21 +0200 @@ -599,6 +599,7 @@ asmlinkage int sys32_sysinfo(struct sysi do { seq = read_seqbegin(&xtime_lock); + /* FIXME: requires vx virtualization */ val.uptime = jiffies / HZ; val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); diff -NurpP --minimal linux-2.6.17.13/arch/parisc/kernel/syscall_table.S linux-2.6.17.13-vs2.0.2.1/arch/parisc/kernel/syscall_table.S --- linux-2.6.17.13/arch/parisc/kernel/syscall_table.S 2006-06-18 04:52:15 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/parisc/kernel/syscall_table.S 2006-08-17 00:28:21 +0200 @@ -368,7 +368,7 @@ ENTRY_COMP(mbind) /* 260 */ ENTRY_COMP(get_mempolicy) ENTRY_COMP(set_mempolicy) - ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */ + ENTRY_DIFF(vserver) ENTRY_SAME(add_key) ENTRY_SAME(request_key) /* 265 */ ENTRY_SAME(keyctl) diff -NurpP --minimal linux-2.6.17.13/arch/powerpc/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/powerpc/Kconfig --- linux-2.6.17.13/arch/powerpc/Kconfig 2006-09-13 18:43:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/powerpc/Kconfig 2006-08-17 00:28:21 +0200 @@ -1018,6 +1018,8 @@ endmenu source "arch/powerpc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" config KEYS_COMPAT diff -NurpP --minimal linux-2.6.17.13/arch/powerpc/kernel/process.c linux-2.6.17.13-vs2.0.2.1/arch/powerpc/kernel/process.c --- linux-2.6.17.13/arch/powerpc/kernel/process.c 2006-06-18 04:52:16 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/powerpc/kernel/process.c 2006-08-17 00:28:21 +0200 @@ -432,8 +432,9 @@ void show_regs(struct pt_regs * regs) trap = TRAP(regs); if (trap == 0x300 || trap == 0x600) printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); - printk("TASK = %p[%d] '%s' THREAD: %p", - current, current->pid, current->comm, task_thread_info(current)); + printk("TASK = %p[%d,#%u] '%s' THREAD: %p", + current, current->pid, current->xid, + current->comm, task_thread_info(current)); #ifdef CONFIG_SMP printk(" CPU: %d", smp_processor_id()); diff -NurpP --minimal linux-2.6.17.13/arch/powerpc/kernel/syscalls.c linux-2.6.17.13-vs2.0.2.1/arch/powerpc/kernel/syscalls.c --- linux-2.6.17.13/arch/powerpc/kernel/syscalls.c 2006-06-18 04:52:17 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/powerpc/kernel/syscalls.c 2006-08-17 00:28:21 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -260,7 +261,7 @@ long ppc_newuname(struct new_utsname __u int err = 0; down_read(&uts_sem); - if (copy_to_user(name, &system_utsname, sizeof(*name))) + if (copy_to_user(name, vx_new_utsname(), sizeof(*name))) err = -EFAULT; up_read(&uts_sem); if (!err) @@ -273,7 +274,7 @@ int sys_uname(struct old_utsname __user int err = 0; down_read(&uts_sem); - if (copy_to_user(name, &system_utsname, sizeof(*name))) + if (copy_to_user(name, vx_new_utsname(), sizeof(*name))) err = -EFAULT; up_read(&uts_sem); if (!err) @@ -284,25 +285,22 @@ int sys_uname(struct old_utsname __user int sys_olduname(struct oldold_utsname __user *name) { int error; + struct new_utsname *ptr; if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname, &system_utsname.sysname, - __OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname, ptr->sysname, __OLD_UTS_LEN); error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &system_utsname.nodename, - __OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename, ptr->nodename, __OLD_UTS_LEN); error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &system_utsname.release, - __OLD_UTS_LEN); + error |= __copy_to_user(&name->release, ptr->release, __OLD_UTS_LEN); error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &system_utsname.version, - __OLD_UTS_LEN); + error |= __copy_to_user(&name->version, ptr->version, __OLD_UTS_LEN); error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &system_utsname.machine, - __OLD_UTS_LEN); + error |= __copy_to_user(&name->machine, ptr->machine, __OLD_UTS_LEN); error |= override_machine(name->machine); up_read(&uts_sem); diff -NurpP --minimal linux-2.6.17.13/arch/powerpc/kernel/systbl.S linux-2.6.17.13-vs2.0.2.1/arch/powerpc/kernel/systbl.S --- linux-2.6.17.13/arch/powerpc/kernel/systbl.S 2006-06-18 04:52:17 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/powerpc/kernel/systbl.S 2006-08-17 00:28:21 +0200 @@ -296,7 +296,7 @@ COMPAT_SYS(fstatfs64) SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64) PPC_SYS(rtas) OLDSYS(debug_setcontext) -SYSCALL(ni_syscall) +SYSX(sys_vserver, sys32_vserver, sys_vserver) SYSCALL(ni_syscall) COMPAT_SYS(mbind) COMPAT_SYS(get_mempolicy) diff -NurpP --minimal linux-2.6.17.13/arch/powerpc/kernel/vdso.c linux-2.6.17.13-vs2.0.2.1/arch/powerpc/kernel/vdso.c --- linux-2.6.17.13/arch/powerpc/kernel/vdso.c 2006-06-18 04:52:17 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/powerpc/kernel/vdso.c 2006-08-17 00:28:21 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -293,7 +294,7 @@ int arch_setup_additional_pages(struct l kmem_cache_free(vm_area_cachep, vma); return -ENOMEM; } - mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + vx_vmpages_add(mm, (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); up_write(&mm->mmap_sem); return 0; diff -NurpP --minimal linux-2.6.17.13/arch/ppc/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/ppc/Kconfig --- linux-2.6.17.13/arch/ppc/Kconfig 2006-06-18 04:52:22 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/ppc/Kconfig 2006-08-17 00:28:21 +0200 @@ -1414,6 +1414,8 @@ source "arch/powerpc/oprofile/Kconfig" source "arch/ppc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/s390/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/s390/Kconfig --- linux-2.6.17.13/arch/s390/Kconfig 2006-06-18 04:52:32 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/s390/Kconfig 2006-08-17 00:28:21 +0200 @@ -478,6 +478,8 @@ source "arch/s390/oprofile/Kconfig" source "arch/s390/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/s390/kernel/process.c linux-2.6.17.13-vs2.0.2.1/arch/s390/kernel/process.c --- linux-2.6.17.13/arch/s390/kernel/process.c 2006-06-18 04:52:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/s390/kernel/process.c 2006-08-17 00:28:21 +0200 @@ -165,9 +165,9 @@ void show_regs(struct pt_regs *regs) struct task_struct *tsk = current; printk("CPU: %d %s\n", task_thread_info(tsk)->cpu, print_tainted()); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, (void *) tsk, - (void *) tsk->thread.ksp); + printk("Process %s (pid: %d[#%u], task: %p, ksp: %p)\n", + current->comm, current->pid, current->xid, + (void *) tsk, (void *) tsk->thread.ksp); show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ diff -NurpP --minimal linux-2.6.17.13/arch/s390/kernel/ptrace.c linux-2.6.17.13-vs2.0.2.1/arch/s390/kernel/ptrace.c --- linux-2.6.17.13/arch/s390/kernel/ptrace.c 2006-06-18 04:52:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/s390/kernel/ptrace.c 2006-08-17 00:28:21 +0200 @@ -723,7 +723,13 @@ sys_ptrace(long request, long pid, long goto out; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + ret = -EPERM; + goto out_tsk; + } + ret = do_ptrace(child, request, addr, data); +out_tsk: put_task_struct(child); out: unlock_kernel(); diff -NurpP --minimal linux-2.6.17.13/arch/s390/kernel/syscalls.S linux-2.6.17.13-vs2.0.2.1/arch/s390/kernel/syscalls.S --- linux-2.6.17.13/arch/s390/kernel/syscalls.S 2006-06-18 04:52:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/s390/kernel/syscalls.S 2006-08-17 00:28:21 +0200 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) -NI_SYSCALL /* reserved for vserver */ +SYSCALL(sys_vserver,sys_vserver,sys32_vserver) SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) diff -NurpP --minimal linux-2.6.17.13/arch/sh/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/sh/Kconfig --- linux-2.6.17.13/arch/sh/Kconfig 2006-06-18 04:52:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sh/Kconfig 2006-08-17 00:28:21 +0200 @@ -646,6 +646,8 @@ source "arch/sh/oprofile/Kconfig" source "arch/sh/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/sh/kernel/kgdb_stub.c linux-2.6.17.13-vs2.0.2.1/arch/sh/kernel/kgdb_stub.c --- linux-2.6.17.13/arch/sh/kernel/kgdb_stub.c 2004-08-14 12:54:51 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sh/kernel/kgdb_stub.c 2006-08-17 00:28:21 +0200 @@ -412,7 +412,7 @@ static struct task_struct *get_thread(in if (pid == PID_MAX) pid = 0; /* First check via PID */ - thread = find_task_by_pid(pid); + thread = find_task_by_real_pid(pid); if (thread) return thread; diff -NurpP --minimal linux-2.6.17.13/arch/sh/kernel/setup.c linux-2.6.17.13-vs2.0.2.1/arch/sh/kernel/setup.c --- linux-2.6.17.13/arch/sh/kernel/setup.c 2006-06-18 04:52:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sh/kernel/setup.c 2006-08-17 00:28:21 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -481,7 +482,7 @@ static int show_cpuinfo(struct seq_file seq_printf(m, "machine\t\t: %s\n", get_system_type()); seq_printf(m, "processor\t: %d\n", cpu); - seq_printf(m, "cpu family\t: %s\n", system_utsname.machine); + seq_printf(m, "cpu family\t: %s\n", vx_new_uts(machine)); seq_printf(m, "cpu type\t: %s\n", get_cpu_subtype()); show_cpuflags(m); diff -NurpP --minimal linux-2.6.17.13/arch/sh/kernel/sys_sh.c linux-2.6.17.13-vs2.0.2.1/arch/sh/kernel/sys_sh.c --- linux-2.6.17.13/arch/sh/kernel/sys_sh.c 2005-08-29 22:24:55 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sh/kernel/sys_sh.c 2006-08-17 00:28:21 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -267,7 +268,7 @@ asmlinkage int sys_uname(struct old_utsn if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } diff -NurpP --minimal linux-2.6.17.13/arch/sh64/kernel/sys_sh64.c linux-2.6.17.13-vs2.0.2.1/arch/sh64/kernel/sys_sh64.c --- linux-2.6.17.13/arch/sh64/kernel/sys_sh64.c 2005-06-22 02:37:59 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sh64/kernel/sys_sh64.c 2006-08-17 00:28:21 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -279,7 +280,7 @@ asmlinkage int sys_uname(struct old_utsn if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } diff -NurpP --minimal linux-2.6.17.13/arch/sparc/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/sparc/Kconfig --- linux-2.6.17.13/arch/sparc/Kconfig 2006-06-18 04:52:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc/Kconfig 2006-08-17 00:28:21 +0200 @@ -291,6 +291,8 @@ source "fs/Kconfig" source "arch/sparc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/sparc/kernel/ptrace.c linux-2.6.17.13-vs2.0.2.1/arch/sparc/kernel/ptrace.c --- linux-2.6.17.13/arch/sparc/kernel/ptrace.c 2006-04-09 13:49:44 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc/kernel/ptrace.c 2006-08-17 00:28:21 +0200 @@ -299,6 +299,10 @@ asmlinkage void do_ptrace(struct pt_regs pt_error_return(regs, -ret); goto out; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff -NurpP --minimal linux-2.6.17.13/arch/sparc/kernel/sys_sparc.c linux-2.6.17.13-vs2.0.2.1/arch/sparc/kernel/sys_sparc.c --- linux-2.6.17.13/arch/sparc/kernel/sys_sparc.c 2006-09-13 18:43:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc/kernel/sys_sparc.c 2006-08-25 05:44:21 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -473,13 +474,13 @@ asmlinkage int sys_getdomainname(char __ down_read(&uts_sem); - nlen = strlen(system_utsname.domainname) + 1; + nlen = strlen(vx_new_uts(domainname)) + 1; if (nlen < len) len = nlen; if (len > __NEW_UTS_LEN) goto done; - if (copy_to_user(name, system_utsname.domainname, len)) + if (copy_to_user(name, vx_new_uts(domainname), len)) goto done; err = 0; done: diff -NurpP --minimal linux-2.6.17.13/arch/sparc/kernel/sys_sunos.c linux-2.6.17.13-vs2.0.2.1/arch/sparc/kernel/sys_sunos.c --- linux-2.6.17.13/arch/sparc/kernel/sys_sunos.c 2006-02-15 13:54:13 +0100 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc/kernel/sys_sunos.c 2006-08-17 00:28:21 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -482,14 +483,16 @@ struct sunos_utsname { asmlinkage int sunos_uname(struct sunos_utsname __user *name) { int ret; + struct new_utsname *ptr; down_read(&uts_sem); - ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0], sizeof(name->sname) - 1); + ptr = vx_new_utsname(); + ret = copy_to_user(&name->sname[0], ptr->sysname, sizeof(name->sname) - 1); if (!ret) { - ret |= __copy_to_user(&name->nname[0], &system_utsname.nodename[0], sizeof(name->nname) - 1); + ret |= __copy_to_user(&name->nname[0], ptr->nodename, sizeof(name->nname) - 1); ret |= __put_user('\0', &name->nname[8]); - ret |= __copy_to_user(&name->rel[0], &system_utsname.release[0], sizeof(name->rel) - 1); - ret |= __copy_to_user(&name->ver[0], &system_utsname.version[0], sizeof(name->ver) - 1); - ret |= __copy_to_user(&name->mach[0], &system_utsname.machine[0], sizeof(name->mach) - 1); + ret |= __copy_to_user(&name->rel[0], ptr->release, sizeof(name->rel) - 1); + ret |= __copy_to_user(&name->ver[0], ptr->version, sizeof(name->ver) - 1); + ret |= __copy_to_user(&name->mach[0], ptr->machine, sizeof(name->mach) - 1); } up_read(&uts_sem); return ret ? -EFAULT : 0; diff -NurpP --minimal linux-2.6.17.13/arch/sparc/kernel/systbls.S linux-2.6.17.13-vs2.0.2.1/arch/sparc/kernel/systbls.S --- linux-2.6.17.13/arch/sparc/kernel/systbls.S 2006-06-18 04:52:34 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc/kernel/systbls.S 2006-08-17 00:28:21 +0200 @@ -72,7 +72,7 @@ sys_call_table: /*250*/ .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat diff -NurpP --minimal linux-2.6.17.13/arch/sparc64/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/sparc64/Kconfig --- linux-2.6.17.13/arch/sparc64/Kconfig 2006-06-18 04:52:34 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc64/Kconfig 2006-08-17 00:28:21 +0200 @@ -423,6 +423,8 @@ endmenu source "arch/sparc64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/sparc64/kernel/binfmt_aout32.c linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/binfmt_aout32.c --- linux-2.6.17.13/arch/sparc64/kernel/binfmt_aout32.c 2006-06-18 04:52:34 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/binfmt_aout32.c 2006-08-17 00:28:21 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/arch/sparc64/kernel/ptrace.c linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/ptrace.c --- linux-2.6.17.13/arch/sparc64/kernel/ptrace.c 2006-06-18 04:52:35 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/ptrace.c 2006-08-17 00:28:21 +0200 @@ -212,6 +212,10 @@ asmlinkage void do_ptrace(struct pt_regs pt_error_return(regs, -ret); goto out; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff -NurpP --minimal linux-2.6.17.13/arch/sparc64/kernel/sys_sparc.c linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/sys_sparc.c --- linux-2.6.17.13/arch/sparc64/kernel/sys_sparc.c 2006-09-13 18:43:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/sys_sparc.c 2006-08-25 05:44:21 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -711,13 +712,13 @@ asmlinkage long sys_getdomainname(char _ down_read(&uts_sem); - nlen = strlen(system_utsname.domainname) + 1; + nlen = strlen(vx_new_uts(domainname)) + 1; if (nlen < len) len = nlen; if (len > __NEW_UTS_LEN) goto done; - if (copy_to_user(name, system_utsname.domainname, len)) + if (copy_to_user(name, vx_new_uts(domainname), len)) goto done; err = 0; done: diff -NurpP --minimal linux-2.6.17.13/arch/sparc64/kernel/sys_sunos32.c linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/sys_sunos32.c --- linux-2.6.17.13/arch/sparc64/kernel/sys_sunos32.c 2006-02-15 13:54:13 +0100 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/sys_sunos32.c 2006-08-17 00:28:21 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -437,18 +438,20 @@ struct sunos_utsname { asmlinkage int sunos_uname(struct sunos_utsname __user *name) { int ret; + struct new_utsname *ptr; down_read(&uts_sem); - ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0], + ptr = vx_new_utsname(); + ret = copy_to_user(&name->sname[0], ptr->sysname, sizeof(name->sname) - 1); - ret |= copy_to_user(&name->nname[0], &system_utsname.nodename[0], + ret |= copy_to_user(&name->nname[0], ptr->nodename, sizeof(name->nname) - 1); ret |= put_user('\0', &name->nname[8]); - ret |= copy_to_user(&name->rel[0], &system_utsname.release[0], + ret |= copy_to_user(&name->rel[0], ptr->release, sizeof(name->rel) - 1); - ret |= copy_to_user(&name->ver[0], &system_utsname.version[0], + ret |= copy_to_user(&name->ver[0], ptr->version, sizeof(name->ver) - 1); - ret |= copy_to_user(&name->mach[0], &system_utsname.machine[0], + ret |= copy_to_user(&name->mach[0], ptr->machine, sizeof(name->mach) - 1); up_read(&uts_sem); return (ret ? -EFAULT : 0); diff -NurpP --minimal linux-2.6.17.13/arch/sparc64/kernel/systbls.S linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/systbls.S --- linux-2.6.17.13/arch/sparc64/kernel/systbls.S 2006-06-18 04:52:35 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc64/kernel/systbls.S 2006-08-17 00:28:21 +0200 @@ -73,7 +73,7 @@ sys_call_table32: /*250*/ .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy + .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid /*280*/ .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat @@ -143,7 +143,7 @@ sys_call_table: /*250*/ .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat diff -NurpP --minimal linux-2.6.17.13/arch/sparc64/solaris/fs.c linux-2.6.17.13-vs2.0.2.1/arch/sparc64/solaris/fs.c --- linux-2.6.17.13/arch/sparc64/solaris/fs.c 2006-04-09 13:49:44 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc64/solaris/fs.c 2006-08-17 00:28:21 +0200 @@ -363,7 +363,7 @@ static int report_statvfs(struct vfsmoun int j = strlen (p); if (j > 15) j = 15; - if (IS_RDONLY(inode)) i = 1; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1; if (mnt->mnt_flags & MNT_NOSUID) i |= 2; if (!sysv_valid_dev(inode->i_sb->s_dev)) return -EOVERFLOW; @@ -399,7 +399,7 @@ static int report_statvfs64(struct vfsmo int j = strlen (p); if (j > 15) j = 15; - if (IS_RDONLY(inode)) i = 1; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1; if (mnt->mnt_flags & MNT_NOSUID) i |= 2; if (!sysv_valid_dev(inode->i_sb->s_dev)) return -EOVERFLOW; diff -NurpP --minimal linux-2.6.17.13/arch/sparc64/solaris/misc.c linux-2.6.17.13-vs2.0.2.1/arch/sparc64/solaris/misc.c --- linux-2.6.17.13/arch/sparc64/solaris/misc.c 2006-06-18 04:52:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/sparc64/solaris/misc.c 2006-08-17 00:28:21 +0200 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -239,7 +240,7 @@ asmlinkage int solaris_utssys(u32 buf, u /* Let's cheat */ err = set_utsfield(v->sysname, "SunOS", 1, 0); down_read(&uts_sem); - err |= set_utsfield(v->nodename, system_utsname.nodename, + err |= set_utsfield(v->nodename, vx_new_uts(nodename), 1, 1); up_read(&uts_sem); err |= set_utsfield(v->release, "2.6", 0, 0); @@ -263,7 +264,7 @@ asmlinkage int solaris_utsname(u32 buf) /* Why should we not lie a bit? */ down_read(&uts_sem); err = set_utsfield(v->sysname, "SunOS", 0, 0); - err |= set_utsfield(v->nodename, system_utsname.nodename, 1, 1); + err |= set_utsfield(v->nodename, vx_new_uts(nodename), 1, 1); err |= set_utsfield(v->release, "5.6", 0, 0); err |= set_utsfield(v->version, "Generic", 0, 0); err |= set_utsfield(v->machine, machine(), 0, 0); @@ -295,7 +296,7 @@ asmlinkage int solaris_sysinfo(int cmd, case SI_HOSTNAME: r = buffer + 256; down_read(&uts_sem); - for (p = system_utsname.nodename, q = buffer; + for (p = vx_new_uts(nodename), q = buffer; q < r && *p && *p != '.'; *q++ = *p++); up_read(&uts_sem); *q = 0; diff -NurpP --minimal linux-2.6.17.13/arch/um/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/um/Kconfig --- linux-2.6.17.13/arch/um/Kconfig 2006-06-18 04:52:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/um/Kconfig 2006-08-17 00:28:21 +0200 @@ -279,6 +279,8 @@ source "drivers/connector/Kconfig" source "fs/Kconfig" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/um/drivers/mconsole_kern.c linux-2.6.17.13-vs2.0.2.1/arch/um/drivers/mconsole_kern.c --- linux-2.6.17.13/arch/um/drivers/mconsole_kern.c 2006-06-18 04:52:37 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/um/drivers/mconsole_kern.c 2006-08-17 00:28:21 +0200 @@ -23,6 +23,7 @@ #include "linux/list.h" #include "linux/mm.h" #include "linux/console.h" +#include "linux/vs_cvirt.h" #include "asm/irq.h" #include "asm/uaccess.h" #include "user_util.h" diff -NurpP --minimal linux-2.6.17.13/arch/um/kernel/syscall_kern.c linux-2.6.17.13-vs2.0.2.1/arch/um/kernel/syscall_kern.c --- linux-2.6.17.13/arch/um/kernel/syscall_kern.c 2006-06-18 04:52:38 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/um/kernel/syscall_kern.c 2006-08-17 00:28:21 +0200 @@ -15,6 +15,8 @@ #include "linux/unistd.h" #include "linux/slab.h" #include "linux/utime.h" +#include + #include "asm/mman.h" #include "asm/uaccess.h" #include "kern_util.h" @@ -110,7 +112,7 @@ long sys_uname(struct old_utsname __user if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } @@ -118,6 +120,7 @@ long sys_uname(struct old_utsname __user long sys_olduname(struct oldold_utsname __user * name) { long error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -126,19 +129,20 @@ long sys_olduname(struct oldold_utsname down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname, + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname, __OLD_UTS_LEN); error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename, + error |= __copy_to_user(&name->nodename,ptr->nodename, __OLD_UTS_LEN); error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release, + error |= __copy_to_user(&name->release,ptr->release, __OLD_UTS_LEN); error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version, + error |= __copy_to_user(&name->version,ptr->version, __OLD_UTS_LEN); error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine, + error |= __copy_to_user(&name->machine,ptr->machine, __OLD_UTS_LEN); error |= __put_user(0,name->machine+__OLD_UTS_LEN); diff -NurpP --minimal linux-2.6.17.13/arch/um/sys-x86_64/syscalls.c linux-2.6.17.13-vs2.0.2.1/arch/um/sys-x86_64/syscalls.c --- linux-2.6.17.13/arch/um/sys-x86_64/syscalls.c 2006-06-18 04:52:42 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/um/sys-x86_64/syscalls.c 2006-08-17 00:28:21 +0200 @@ -9,6 +9,7 @@ #include "linux/shm.h" #include "linux/utsname.h" #include "linux/personality.h" +#include "linux/vs_cvirt.h" #include "asm/uaccess.h" #define __FRAME_OFFSETS #include "asm/ptrace.h" @@ -21,7 +22,7 @@ asmlinkage long sys_uname64(struct new_u { int err; down_read(&uts_sem); - err = copy_to_user(name, &system_utsname, sizeof (*name)); + err = copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); diff -NurpP --minimal linux-2.6.17.13/arch/v850/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/v850/Kconfig --- linux-2.6.17.13/arch/v850/Kconfig 2006-06-18 04:52:42 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/v850/Kconfig 2006-08-17 00:28:21 +0200 @@ -326,6 +326,8 @@ source "drivers/usb/Kconfig" source "arch/v850/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/v850/kernel/ptrace.c linux-2.6.17.13-vs2.0.2.1/arch/v850/kernel/ptrace.c --- linux-2.6.17.13/arch/v850/kernel/ptrace.c 2006-04-09 13:49:44 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/v850/kernel/ptrace.c 2006-08-17 00:28:21 +0200 @@ -117,6 +117,9 @@ long arch_ptrace(struct task_struct *chi { int rval; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out; + switch (request) { unsigned long val, copied; diff -NurpP --minimal linux-2.6.17.13/arch/x86_64/Kconfig linux-2.6.17.13-vs2.0.2.1/arch/x86_64/Kconfig --- linux-2.6.17.13/arch/x86_64/Kconfig 2006-09-13 18:43:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/x86_64/Kconfig 2006-08-17 00:28:21 +0200 @@ -617,6 +617,8 @@ endmenu source "arch/x86_64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.17.13/arch/x86_64/ia32/ia32_aout.c linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/ia32_aout.c --- linux-2.6.17.13/arch/x86_64/ia32/ia32_aout.c 2006-01-03 17:29:20 +0100 +++ linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/ia32_aout.c 2006-08-17 00:28:21 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/ia32_binfmt.c --- linux-2.6.17.13/arch/x86_64/ia32/ia32_binfmt.c 2006-06-18 04:52:43 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/ia32_binfmt.c 2006-08-17 00:28:21 +0200 @@ -371,7 +371,8 @@ int ia32_setup_arg_pages(struct linux_bi kmem_cache_free(vm_area_cachep, mpnt); return ret; } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.17.13/arch/x86_64/ia32/ia32entry.S linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/ia32entry.S --- linux-2.6.17.13/arch/x86_64/ia32/ia32entry.S 2006-06-18 04:52:43 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/ia32entry.S 2006-08-17 00:28:21 +0200 @@ -652,7 +652,7 @@ ia32_sys_call_table: .quad sys_tgkill /* 270 */ .quad compat_sys_utimes .quad sys32_fadvise64_64 - .quad quiet_ni_syscall /* sys_vserver */ + .quad sys32_vserver .quad sys_mbind .quad compat_sys_get_mempolicy /* 275 */ .quad sys_set_mempolicy diff -NurpP --minimal linux-2.6.17.13/arch/x86_64/ia32/sys_ia32.c linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/sys_ia32.c --- linux-2.6.17.13/arch/x86_64/ia32/sys_ia32.c 2006-06-18 04:52:43 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/sys_ia32.c 2006-08-17 00:28:21 +0200 @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -793,6 +794,7 @@ asmlinkage long sys32_mmap2(unsigned lon asmlinkage long sys32_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -801,13 +803,14 @@ asmlinkage long sys32_olduname(struct ol down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); __put_user(0,name->sysname+__OLD_UTS_LEN); - __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); __put_user(0,name->nodename+__OLD_UTS_LEN); - __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); __put_user(0,name->release+__OLD_UTS_LEN); - __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); __put_user(0,name->version+__OLD_UTS_LEN); { char *arch = "x86_64"; @@ -830,7 +833,7 @@ long sys32_uname(struct old_utsname __us if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); diff -NurpP --minimal linux-2.6.17.13/arch/x86_64/ia32/syscall32.c linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/syscall32.c --- linux-2.6.17.13/arch/x86_64/ia32/syscall32.c 2005-10-28 20:49:18 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/x86_64/ia32/syscall32.c 2006-08-17 00:28:21 +0200 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -70,7 +71,7 @@ int syscall32_setup_pages(struct linux_b kmem_cache_free(vm_area_cachep, vma); return ret; } - mm->total_vm += npages; + vx_vmpages_add(mm, npages); up_write(&mm->mmap_sem); return 0; } diff -NurpP --minimal linux-2.6.17.13/arch/x86_64/kernel/sys_x86_64.c linux-2.6.17.13-vs2.0.2.1/arch/x86_64/kernel/sys_x86_64.c --- linux-2.6.17.13/arch/x86_64/kernel/sys_x86_64.c 2006-01-03 17:29:20 +0100 +++ linux-2.6.17.13-vs2.0.2.1/arch/x86_64/kernel/sys_x86_64.c 2006-08-17 00:28:21 +0200 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -148,7 +149,7 @@ asmlinkage long sys_uname(struct new_uts { int err; down_read(&uts_sem); - err = copy_to_user(name, &system_utsname, sizeof (*name)); + err = copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); diff -NurpP --minimal linux-2.6.17.13/arch/x86_64/kernel/traps.c linux-2.6.17.13-vs2.0.2.1/arch/x86_64/kernel/traps.c --- linux-2.6.17.13/arch/x86_64/kernel/traps.c 2006-06-18 04:52:44 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/x86_64/kernel/traps.c 2006-08-17 00:28:21 +0200 @@ -324,8 +324,9 @@ void show_registers(struct pt_regs *regs printk("CPU %d ", cpu); __show_regs(regs); - printk("Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); + printk("Process %s (pid: %d[#%u], threadinfo %p, task %p)\n", + cur->comm, cur->pid, cur->xid, + task_thread_info(cur), cur); /* * When in-kernel, we also print out the stack and code at the diff -NurpP --minimal linux-2.6.17.13/arch/xtensa/kernel/syscalls.c linux-2.6.17.13-vs2.0.2.1/arch/xtensa/kernel/syscalls.c --- linux-2.6.17.13/arch/xtensa/kernel/syscalls.c 2005-08-29 22:24:57 +0200 +++ linux-2.6.17.13-vs2.0.2.1/arch/xtensa/kernel/syscalls.c 2006-08-17 00:28:21 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -129,7 +130,7 @@ out: int sys_uname(struct old_utsname * name) { - if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name))) return 0; return -EFAULT; } diff -NurpP --minimal linux-2.6.17.13/drivers/block/Kconfig linux-2.6.17.13-vs2.0.2.1/drivers/block/Kconfig --- linux-2.6.17.13/drivers/block/Kconfig 2006-06-18 04:52:46 +0200 +++ linux-2.6.17.13-vs2.0.2.1/drivers/block/Kconfig 2006-08-17 00:28:21 +0200 @@ -315,6 +315,13 @@ config BLK_DEV_CRYPTOLOOP instead, which can be configured to be on-disk compatible with the cryptoloop device. +config BLK_DEV_VROOT + tristate "Virtual Root device support" + depends on QUOTACTL + ---help--- + Saying Y here will allow you to use quota/fs ioctls on a shared + partition within a virtual server without compromising security. + config BLK_DEV_NBD tristate "Network block device support" depends on NET diff -NurpP --minimal linux-2.6.17.13/drivers/block/Makefile linux-2.6.17.13-vs2.0.2.1/drivers/block/Makefile --- linux-2.6.17.13/drivers/block/Makefile 2006-06-18 04:52:46 +0200 +++ linux-2.6.17.13-vs2.0.2.1/drivers/block/Makefile 2006-08-17 00:28:21 +0200 @@ -29,4 +29,5 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryp obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o +obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o diff -NurpP --minimal linux-2.6.17.13/drivers/block/vroot.c linux-2.6.17.13-vs2.0.2.1/drivers/block/vroot.c --- linux-2.6.17.13/drivers/block/vroot.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/drivers/block/vroot.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,288 @@ +/* + * linux/drivers/block/vroot.c + * + * written by Herbert Pötzl, 9/11/2002 + * ported to 2.6.10 by Herbert Pötzl, 30/12/2004 + * + * based on the loop.c code by Theodore Ts'o. + * + * Copyright (C) 2002-2005 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +static int max_vroot = 8; + +static struct vroot_device *vroot_dev; +static struct gendisk **disks; + + +static int vroot_set_dev( + struct vroot_device *vr, + struct file *vr_file, + struct block_device *bdev, + unsigned int arg) +{ + struct block_device *real_bdev; + struct file *file; + struct inode *inode; + int error; + + error = -EBUSY; + if (vr->vr_state != Vr_unbound) + goto out; + + error = -EBADF; + file = fget(arg); + if (!file) + goto out; + + error = -EINVAL; + inode = file->f_dentry->d_inode; + + + if (S_ISBLK(inode->i_mode)) { + real_bdev = inode->i_bdev; + vr->vr_device = real_bdev; + __iget(real_bdev->bd_inode); + } else + goto out_fput; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_set_dev: dev=" VXF_DEV, + vr->vr_number, VXD_DEV(real_bdev)); + + vr->vr_state = Vr_bound; + error = 0; + + out_fput: + fput(file); + out: + return error; +} + +static int vroot_clr_dev( + struct vroot_device *vr, + struct file *vr_file, + struct block_device *bdev) +{ + struct block_device *real_bdev; + + if (vr->vr_state != Vr_bound) + return -ENXIO; + if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */ + return -EBUSY; + + real_bdev = vr->vr_device; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_clr_dev: dev=" VXF_DEV, + vr->vr_number, VXD_DEV(real_bdev)); + + bdput(real_bdev); + vr->vr_state = Vr_unbound; + vr->vr_device = NULL; + return 0; +} + + +static int vr_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + int err; + + down(&vr->vr_ctl_mutex); + switch (cmd) { + case VROOT_SET_DEV: + err = vroot_set_dev(vr, file, inode->i_bdev, arg); + break; + case VROOT_CLR_DEV: + err = vroot_clr_dev(vr, file, inode->i_bdev); + break; + default: + err = -EINVAL; + break; + } + up(&vr->vr_ctl_mutex); + return err; +} + +static int vr_open(struct inode *inode, struct file *file) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + + down(&vr->vr_ctl_mutex); + vr->vr_refcnt++; + up(&vr->vr_ctl_mutex); + return 0; +} + +static int vr_release(struct inode *inode, struct file *file) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + + down(&vr->vr_ctl_mutex); + --vr->vr_refcnt; + up(&vr->vr_ctl_mutex); + return 0; +} + +static struct block_device_operations vr_fops = { + .owner = THIS_MODULE, + .open = vr_open, + .release = vr_release, + .ioctl = vr_ioctl, +}; + +struct block_device *__vroot_get_real_bdev(struct block_device *bdev) +{ + struct inode *inode = bdev->bd_inode; + struct vroot_device *vr; + struct block_device *real_bdev; + int minor = iminor(inode); + + vr = &vroot_dev[minor]; + real_bdev = vr->vr_device; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_get_real_bdev: dev=" VXF_DEV, + vr->vr_number, VXD_DEV(real_bdev)); + + if (vr->vr_state != Vr_bound) + return ERR_PTR(-ENXIO); + + __iget(real_bdev->bd_inode); + return real_bdev; +} + +/* + * And now the modules code and kernel interface. + */ + +module_param(max_vroot, int, 0); + +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR); + +MODULE_AUTHOR ("Herbert Pötzl"); +MODULE_DESCRIPTION ("Virtual Root Device Mapper"); + + +int __init vroot_init(void) +{ + int err, i; + + if (max_vroot < 1 || max_vroot > 256) { + max_vroot = MAX_VROOT_DEFAULT; + printk(KERN_WARNING "vroot: invalid max_vroot " + "(must be between 1 and 256), " + "using default (%d)\n", max_vroot); + } + + if (register_blkdev(VROOT_MAJOR, "vroot")) + return -EIO; + + err = -ENOMEM; + vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL); + if (!vroot_dev) + goto out_mem1; + memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device)); + + disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL); + if (!disks) + goto out_mem2; + + for (i = 0; i < max_vroot; i++) { + disks[i] = alloc_disk(1); + if (!disks[i]) + goto out_mem3; + } + + devfs_mk_dir("vroot"); + + for (i = 0; i < max_vroot; i++) { + struct vroot_device *vr = &vroot_dev[i]; + struct gendisk *disk = disks[i]; + + memset(vr, 0, sizeof(*vr)); + init_MUTEX(&vr->vr_ctl_mutex); + vr->vr_number = i; + disk->major = VROOT_MAJOR; + disk->first_minor = i; + disk->fops = &vr_fops; + sprintf(disk->disk_name, "vroot%d", i); + sprintf(disk->devfs_name, "vroot/%d", i); + disk->private_data = vr; + } + + err = register_vroot_grb(&__vroot_get_real_bdev); + if (err) + goto out_reg; + + for (i = 0; i < max_vroot; i++) + add_disk(disks[i]); + printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot); + return 0; + +out_reg: + devfs_remove("vroot"); +out_mem3: + while (i--) + put_disk(disks[i]); + kfree(disks); +out_mem2: + kfree(vroot_dev); +out_mem1: + unregister_blkdev(VROOT_MAJOR, "vroot"); + printk(KERN_ERR "vroot: ran out of memory\n"); + return err; +} + +void vroot_exit(void) +{ + int i; + + if (unregister_vroot_grb(&__vroot_get_real_bdev)) + printk(KERN_WARNING "vroot: cannot unregister grb\n"); + + for (i = 0; i < max_vroot; i++) { + del_gendisk(disks[i]); + put_disk(disks[i]); + } + devfs_remove("vroot"); + if (unregister_blkdev(VROOT_MAJOR, "vroot")) + printk(KERN_WARNING "vroot: cannot unregister blkdev\n"); + + kfree(disks); + kfree(vroot_dev); +} + +module_init(vroot_init); +module_exit(vroot_exit); + +#ifndef MODULE + +static int __init max_vroot_setup(char *str) +{ + max_vroot = simple_strtol(str, NULL, 0); + return 1; +} + +__setup("max_vroot=", max_vroot_setup); + +#endif + diff -NurpP --minimal linux-2.6.17.13/drivers/char/random.c linux-2.6.17.13-vs2.0.2.1/drivers/char/random.c --- linux-2.6.17.13/drivers/char/random.c 2006-06-18 04:52:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/drivers/char/random.c 2006-08-17 00:28:21 +0200 @@ -1174,7 +1174,7 @@ static char sysctl_bootid[16]; static int proc_do_uuid(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table fake_table; + ctl_table fake_table = {0}; unsigned char buf[64], tmp_uuid[16], *uuid; uuid = table->data; diff -NurpP --minimal linux-2.6.17.13/drivers/char/tty_io.c linux-2.6.17.13-vs2.0.2.1/drivers/char/tty_io.c --- linux-2.6.17.13/drivers/char/tty_io.c 2006-09-13 18:43:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/drivers/char/tty_io.c 2006-08-17 00:28:21 +0200 @@ -103,6 +103,7 @@ #include #include #include +#include #include @@ -2388,13 +2389,16 @@ static int tiocsctty(struct tty_struct * static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { + pid_t pgrp; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; - return put_user(real_tty->pgrp, p); + + pgrp = vx_map_pid(real_tty->pgrp); + return put_user(pgrp, p); } static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) @@ -2412,6 +2416,8 @@ static int tiocspgrp(struct tty_struct * return -ENOTTY; if (get_user(pgrp, p)) return -EFAULT; + + pgrp = vx_rmap_pid(pgrp); if (pgrp < 0) return -EINVAL; if (session_of_pgrp(pgrp) != current->signal->session) diff -NurpP --minimal linux-2.6.17.13/drivers/infiniband/core/uverbs_mem.c linux-2.6.17.13-vs2.0.2.1/drivers/infiniband/core/uverbs_mem.c --- linux-2.6.17.13/drivers/infiniband/core/uverbs_mem.c 2006-06-18 04:53:04 +0200 +++ linux-2.6.17.13-vs2.0.2.1/drivers/infiniband/core/uverbs_mem.c 2006-08-17 00:28:21 +0200 @@ -36,6 +36,7 @@ #include #include +#include #include "uverbs.h" @@ -161,7 +162,7 @@ out: if (ret < 0) __ib_umem_release(dev, mem, 0); else - current->mm->locked_vm = locked; + vx_vmlocked_sub(current->mm, current->mm->locked_vm - locked); up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); @@ -174,8 +175,8 @@ void ib_umem_release(struct ib_device *d __ib_umem_release(dev, umem, 1); down_write(¤t->mm->mmap_sem); - current->mm->locked_vm -= - PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + vx_vmlocked_sub(current->mm, + PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT); up_write(¤t->mm->mmap_sem); } @@ -184,7 +185,7 @@ static void ib_umem_account(void *work_p struct ib_umem_account_work *work = work_ptr; down_write(&work->mm->mmap_sem); - work->mm->locked_vm -= work->diff; + vx_vmlocked_sub(work->mm, work->diff); up_write(&work->mm->mmap_sem); mmput(work->mm); kfree(work); diff -NurpP --minimal linux-2.6.17.13/drivers/infiniband/hw/ipath/ipath_user_pages.c linux-2.6.17.13-vs2.0.2.1/drivers/infiniband/hw/ipath/ipath_user_pages.c --- linux-2.6.17.13/drivers/infiniband/hw/ipath/ipath_user_pages.c 2006-06-18 04:53:04 +0200 +++ linux-2.6.17.13-vs2.0.2.1/drivers/infiniband/hw/ipath/ipath_user_pages.c 2006-08-17 00:28:21 +0200 @@ -32,6 +32,7 @@ #include #include +#include #include "ipath_kernel.h" @@ -71,7 +72,8 @@ static int __get_user_pages(unsigned lon lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - if (num_pages > lock_limit) { + if (num_pages > lock_limit || + !vx_vmlocked_avail(current->mm, num_pages)) { ret = -ENOMEM; goto bail; } @@ -88,7 +90,7 @@ static int __get_user_pages(unsigned lon goto bail_release; } - current->mm->locked_vm += num_pages; + vx_vmlocked_add(current->mm, num_pages); ret = 0; goto bail; @@ -157,7 +159,7 @@ void ipath_release_user_pages(struct pag __ipath_release_user_pages(p, num_pages, 1); - current->mm->locked_vm -= num_pages; + vx_vmlocked_sub(current->mm, num_pages); up_write(¤t->mm->mmap_sem); } @@ -173,7 +175,7 @@ static void user_pages_account(void *ptr struct ipath_user_pages_work *work = ptr; down_write(&work->mm->mmap_sem); - work->mm->locked_vm -= work->num_pages; + vx_vmlocked_sub(work->mm, work->num_pages); up_write(&work->mm->mmap_sem); mmput(work->mm); kfree(work); diff -NurpP --minimal linux-2.6.17.13/fs/attr.c linux-2.6.17.13-vs2.0.2.1/fs/attr.c --- linux-2.6.17.13/fs/attr.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/attr.c 2006-08-17 00:28:21 +0200 @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include /* Taken over from the old code... */ @@ -56,6 +59,28 @@ int inode_change_ok(struct inode *inode, if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) goto error; } + + /* Check for evil vserver activity */ + if (vx_check(0, VX_ADMIN)) + goto fine; + + if (IS_BARRIER(inode)) { + vxwprintk(1, "xid=%d messing with the barrier.", + vx_current_xid()); + goto error; + } + switch (inode->i_sb->s_magic) { + case PROC_SUPER_MAGIC: + vxwprintk(1, "xid=%d messing with the procfs.", + vx_current_xid()); + goto error; + case DEVPTS_SUPER_MAGIC: + if (vx_check(inode->i_xid, VX_IDENT)) + goto fine; + vxwprintk(1, "xid=%d messing with the devpts.", + vx_current_xid()); + goto error; + } fine: retval = 0; error: @@ -79,6 +104,8 @@ int inode_setattr(struct inode * inode, inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((ia_valid & ATTR_XID) && IS_TAGXID(inode)) + inode->i_xid = attr->ia_xid; if (ia_valid & ATTR_ATIME) inode->i_atime = timespec_trunc(attr->ia_atime, inode->i_sb->s_time_gran); @@ -153,7 +180,8 @@ int notify_change(struct dentry * dentry error = security_inode_setattr(dentry, attr); if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_XID && attr->ia_xid != inode->i_xid)) error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; if (!error) error = inode_setattr(inode, attr); diff -NurpP --minimal linux-2.6.17.13/fs/binfmt_aout.c linux-2.6.17.13-vs2.0.2.1/fs/binfmt_aout.c --- linux-2.6.17.13/fs/binfmt_aout.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/binfmt_aout.c 2006-08-17 00:28:21 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/fs/binfmt_elf.c linux-2.6.17.13-vs2.0.2.1/fs/binfmt_elf.c --- linux-2.6.17.13/fs/binfmt_elf.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/binfmt_elf.c 2006-09-12 17:57:43 +0200 @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/fs/binfmt_elf_fdpic.c linux-2.6.17.13-vs2.0.2.1/fs/binfmt_elf_fdpic.c --- linux-2.6.17.13/fs/binfmt_elf_fdpic.c 2006-06-18 04:54:29 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/binfmt_elf_fdpic.c 2006-08-17 00:28:21 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/fs/binfmt_flat.c linux-2.6.17.13-vs2.0.2.1/fs/binfmt_flat.c --- linux-2.6.17.13/fs/binfmt_flat.c 2006-06-18 04:54:29 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/binfmt_flat.c 2006-08-17 00:28:21 +0200 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/fs/binfmt_som.c linux-2.6.17.13-vs2.0.2.1/fs/binfmt_som.c --- linux-2.6.17.13/fs/binfmt_som.c 2006-01-03 17:29:55 +0100 +++ linux-2.6.17.13-vs2.0.2.1/fs/binfmt_som.c 2006-08-17 00:28:21 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/fs/devpts/inode.c linux-2.6.17.13-vs2.0.2.1/fs/devpts/inode.c --- linux-2.6.17.13/fs/devpts/inode.c 2006-06-18 04:54:31 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/devpts/inode.c 2006-08-17 00:28:21 +0200 @@ -20,7 +20,19 @@ #include #include -#define DEVPTS_SUPER_MAGIC 0x1cd1 + +static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + int ret = -EACCES; + + if (vx_check(inode->i_xid, VX_IDENT)) + ret = generic_permission(inode, mask, NULL); + return ret; +} + +static struct inode_operations devpts_file_inode_operations = { + .permission = devpts_permission, +}; static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -91,6 +103,24 @@ static int devpts_remount(struct super_b return 0; } +static int devpts_filter(struct dentry *de) +{ + return vx_check(de->d_inode->i_xid, VX_IDENT); +} + +static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + return dcache_readdir_filter(filp, dirent, filldir, devpts_filter); +} + +static struct file_operations devpts_dir_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .readdir = devpts_readdir, +}; + static struct super_operations devpts_sops = { .statfs = simple_statfs, .remount_fs = devpts_remount, @@ -117,8 +147,9 @@ devpts_fill_super(struct super_block *s, inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_fop = &devpts_dir_operations; inode->i_nlink = 2; + inode->i_xid = vx_current_xid(); devpts_root = s->s_root = d_alloc_root(inode); if (s->s_root) @@ -177,6 +208,8 @@ int devpts_pty_new(struct tty_struct *tt inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|config.mode, device); + inode->i_xid = vx_current_xid(); + inode->i_op = &devpts_file_inode_operations; inode->u.generic_ip = tty; dentry = get_node(number); diff -NurpP --minimal linux-2.6.17.13/fs/exec.c linux-2.6.17.13-vs2.0.2.1/fs/exec.c --- linux-2.6.17.13/fs/exec.c 2006-06-18 04:54:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/exec.c 2006-08-17 00:28:21 +0200 @@ -49,6 +49,8 @@ #include #include #include +#include +#include #include #include @@ -436,7 +438,8 @@ int setup_arg_pages(struct linux_binprm kmem_cache_free(vm_area_cachep, mpnt); return ret; } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { @@ -1337,7 +1340,7 @@ static void format_corename(char *corena case 'h': down_read(&uts_sem); rc = snprintf(out_ptr, out_end - out_ptr, - "%s", system_utsname.nodename); + "%s", vx_new_uts(nodename)); up_read(&uts_sem); if (rc > out_end - out_ptr) goto out; diff -NurpP --minimal linux-2.6.17.13/fs/ext2/balloc.c linux-2.6.17.13-vs2.0.2.1/fs/ext2/balloc.c --- linux-2.6.17.13/fs/ext2/balloc.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/balloc.c 2006-08-17 00:28:21 +0200 @@ -17,6 +17,7 @@ #include #include #include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -109,6 +110,8 @@ static int reserve_blocks(struct super_b free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(es->s_r_blocks_count); + DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks); + if (free_blocks < count) count = free_blocks; @@ -259,6 +262,7 @@ do_more: } error_return: brelse(bitmap_bh); + DLIMIT_FREE_BLOCK(inode, freed); release_blocks(sb, freed); DQUOT_FREE_BLOCK(inode, freed); } @@ -362,6 +366,10 @@ int ext2_new_block(struct inode *inode, *err = -ENOSPC; goto out_dquot; } + if (DLIMIT_ALLOC_BLOCK(inode, es_alloc)) { + *err = -ENOSPC; + goto out_dlimit; + } ext2_debug ("goal=%lu.\n", goal); @@ -509,6 +517,8 @@ got_block: *err = 0; out_release: group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + DLIMIT_FREE_BLOCK(inode, es_alloc); +out_dlimit: release_blocks(sb, es_alloc); out_dquot: DQUOT_FREE_BLOCK(inode, dq_alloc); diff -NurpP --minimal linux-2.6.17.13/fs/ext2/ext2.h linux-2.6.17.13-vs2.0.2.1/fs/ext2/ext2.h --- linux-2.6.17.13/fs/ext2/ext2.h 2006-06-18 04:54:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/ext2.h 2006-08-17 00:28:21 +0200 @@ -165,6 +165,7 @@ extern const struct file_operations ext2 extern struct address_space_operations ext2_aops; extern struct address_space_operations ext2_aops_xip; extern struct address_space_operations ext2_nobh_aops; +extern int ext2_sync_flags(struct inode *inode); /* namei.c */ extern struct inode_operations ext2_dir_inode_operations; diff -NurpP --minimal linux-2.6.17.13/fs/ext2/file.c linux-2.6.17.13-vs2.0.2.1/fs/ext2/file.c --- linux-2.6.17.13/fs/ext2/file.c 2006-06-18 04:54:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/file.c 2006-08-17 00:28:21 +0200 @@ -81,4 +81,5 @@ struct inode_operations ext2_file_inode_ #endif .setattr = ext2_setattr, .permission = ext2_permission, + .sync_flags = ext2_sync_flags, }; diff -NurpP --minimal linux-2.6.17.13/fs/ext2/ialloc.c linux-2.6.17.13-vs2.0.2.1/fs/ext2/ialloc.c --- linux-2.6.17.13/fs/ext2/ialloc.c 2006-02-18 14:40:21 +0100 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/ialloc.c 2006-08-17 00:28:21 +0200 @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -126,6 +128,7 @@ void ext2_free_inode (struct inode * ino ext2_xattr_delete_inode(inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); } es = EXT2_SB(sb)->s_es; @@ -465,6 +468,11 @@ struct inode *ext2_new_inode(struct inod if (!inode) return ERR_PTR(-ENOMEM); + inode->i_xid = vx_current_fsxid(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto fail_dlim; + } ei = EXT2_I(inode); sbi = EXT2_SB(sb); es = sbi->s_es; @@ -579,7 +587,8 @@ got: inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; + ei->i_flags = EXT2_I(dir)->i_flags & + ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); /* dirsync is only applied to directories */ @@ -627,12 +636,15 @@ fail_free_drop: fail_drop: DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); return ERR_PTR(err); fail: + DLIMIT_FREE_INODE(inode); +fail_dlim: make_bad_inode(inode); iput(inode); return ERR_PTR(err); diff -NurpP --minimal linux-2.6.17.13/fs/ext2/inode.c linux-2.6.17.13-vs2.0.2.1/fs/ext2/inode.c --- linux-2.6.17.13/fs/ext2/inode.c 2006-06-18 04:54:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/inode.c 2006-08-17 00:28:21 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include "ext2.h" #include "acl.h" #include "xip.h" @@ -1042,25 +1043,70 @@ void ext2_set_inode_flags(struct inode * { unsigned int flags = EXT2_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); + + if (flags & EXT2_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & EXT2_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT2_BARRIER_FL) + inode->i_flags |= S_BARRIER; + if (flags & EXT2_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT2_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & EXT2_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; if (flags & EXT2_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT2_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; } +int ext2_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + + oldflags = EXT2_I(inode)->i_flags; + newflags = oldflags & ~(EXT2_APPEND_FL | + EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL | + EXT2_BARRIER_FL | EXT2_NOATIME_FL | + EXT2_SYNC_FL | EXT2_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= EXT2_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= EXT2_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= EXT2_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= EXT2_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= EXT2_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= EXT2_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= EXT2_DIRSYNC_FL; + + if (oldflags ^ newflags) { + EXT2_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + + return 0; +} + void ext2_read_inode (struct inode * inode) { struct ext2_inode_info *ei = EXT2_I(inode); ino_t ino = inode->i_ino; struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + uid_t uid; + gid_t gid; int n; #ifdef CONFIG_EXT2_FS_POSIX_ACL @@ -1071,12 +1117,17 @@ void ext2_read_inode (struct inode * ino goto bad_inode; inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if (!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -1174,8 +1225,8 @@ static int ext2_update_inode(struct inod struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - uid_t uid = inode->i_uid; - gid_t gid = inode->i_gid; + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); int n; @@ -1210,6 +1261,9 @@ static int ext2_update_inode(struct inod raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_INOXID_INTERN + raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(inode->i_size); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -1296,7 +1350,8 @@ int ext2_setattr(struct dentry *dentry, if (error) return error; if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) || + (iattr->ia_valid & ATTR_XID && iattr->ia_xid != inode->i_xid)) { error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0; if (error) return error; diff -NurpP --minimal linux-2.6.17.13/fs/ext2/ioctl.c linux-2.6.17.13-vs2.0.2.1/fs/ext2/ioctl.c --- linux-2.6.17.13/fs/ext2/ioctl.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/ioctl.c 2006-08-17 00:28:21 +0200 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,8 @@ int ext2_ioctl (struct inode * inode, st case EXT2_IOC_SETFLAGS: { unsigned int oldflags; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -50,7 +52,9 @@ int ext2_ioctl (struct inode * inode, st * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { + if ((oldflags & EXT2_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT2_APPEND_FL | + EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } @@ -69,7 +73,8 @@ int ext2_ioctl (struct inode * inode, st case EXT2_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(inode->i_generation, (int __user *) arg)) return -EFAULT; diff -NurpP --minimal linux-2.6.17.13/fs/ext2/namei.c linux-2.6.17.13-vs2.0.2.1/fs/ext2/namei.c --- linux-2.6.17.13/fs/ext2/namei.c 2006-06-18 04:54:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/namei.c 2006-08-17 00:28:21 +0200 @@ -31,6 +31,7 @@ */ #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -66,6 +67,7 @@ static struct dentry *ext2_lookup(struct inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); + vx_propagate_xid(nd, inode); } return d_splice_alias(inode, dentry); } @@ -391,6 +393,7 @@ struct inode_operations ext2_dir_inode_o #endif .setattr = ext2_setattr, .permission = ext2_permission, + .sync_flags = ext2_sync_flags, }; struct inode_operations ext2_special_inode_operations = { @@ -402,4 +405,5 @@ struct inode_operations ext2_special_ino #endif .setattr = ext2_setattr, .permission = ext2_permission, + .sync_flags = ext2_sync_flags, }; diff -NurpP --minimal linux-2.6.17.13/fs/ext2/super.c linux-2.6.17.13-vs2.0.2.1/fs/ext2/super.c --- linux-2.6.17.13/fs/ext2/super.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/super.c 2006-09-12 17:57:43 +0200 @@ -328,7 +328,7 @@ enum { Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota + Opt_usrquota, Opt_grpquota, Opt_tagxid }; static match_table_t tokens = { @@ -356,6 +356,7 @@ static match_table_t tokens = { {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_xip, "xip"}, + {Opt_tagxid, "tagxid"}, {Opt_grpquota, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_quota, "quota"}, @@ -419,6 +420,11 @@ static int parse_options (char * options case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + set_opt (sbi->s_mount_opt, TAGXID); + break; +#endif case Opt_nocheck: clear_opt (sbi->s_mount_opt, CHECK); break; @@ -720,6 +726,8 @@ static int ext2_fill_super(struct super_ if (!parse_options ((char *) data, sbi)) goto failed_mount; + if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGXID) + sb->s_flags |= MS_TAGXID; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -1029,6 +1037,13 @@ static int ext2_remount (struct super_bl goto restore_opts; } + if ((sbi->s_mount_opt & EXT2_MOUNT_TAGXID) && + !(sb->s_flags & MS_TAGXID)) { + printk("EXT2-fs: %s: tagxid not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff -NurpP --minimal linux-2.6.17.13/fs/ext2/symlink.c linux-2.6.17.13-vs2.0.2.1/fs/ext2/symlink.c --- linux-2.6.17.13/fs/ext2/symlink.c 2005-08-29 22:25:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/symlink.c 2006-08-17 00:28:21 +0200 @@ -38,6 +38,7 @@ struct inode_operations ext2_symlink_ino .listxattr = ext2_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext2_sync_flags, }; struct inode_operations ext2_fast_symlink_inode_operations = { @@ -49,4 +50,5 @@ struct inode_operations ext2_fast_symlin .listxattr = ext2_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext2_sync_flags, }; diff -NurpP --minimal linux-2.6.17.13/fs/ext2/xattr.c linux-2.6.17.13-vs2.0.2.1/fs/ext2/xattr.c --- linux-2.6.17.13/fs/ext2/xattr.c 2006-02-18 14:40:21 +0100 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext2/xattr.c 2006-08-17 00:28:21 +0200 @@ -60,6 +60,7 @@ #include #include #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -645,8 +646,12 @@ ext2_xattr_set2(struct inode *inode, str the inode. */ ea_bdebug(new_bh, "reusing block"); + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto cleanup; error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) { + DLIMIT_FREE_BLOCK(inode, 1); unlock_buffer(new_bh); goto cleanup; } @@ -740,6 +745,7 @@ ext2_xattr_set2(struct inode *inode, str le32_to_cpu(HDR(old_bh)->h_refcount) - 1); if (ce) mb_cache_entry_release(ce); + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); mark_buffer_dirty(old_bh); ea_bdebug(old_bh, "refcount now=%d", @@ -804,6 +810,7 @@ ext2_xattr_delete_inode(struct inode *in mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); } EXT2_I(inode)->i_file_acl = 0; diff -NurpP --minimal linux-2.6.17.13/fs/ext3/balloc.c linux-2.6.17.13-vs2.0.2.1/fs/ext3/balloc.c --- linux-2.6.17.13/fs/ext3/balloc.c 2006-06-18 04:54:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext3/balloc.c 2006-08-17 00:28:21 +0200 @@ -20,6 +20,7 @@ #include #include #include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -504,8 +505,10 @@ void ext3_free_blocks(handle_t *handle, return; } ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); - if (dquot_freed_blocks) + if (dquot_freed_blocks) { + DLIMIT_FREE_BLOCK(inode, dquot_freed_blocks); DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + } return; } @@ -1162,18 +1165,32 @@ out: return ret; } -static int ext3_has_free_blocks(struct ext3_sb_info *sbi) +static int ext3_has_free_blocks(struct super_block *sb) { - int free_blocks, root_blocks; + struct ext3_sb_info *sbi = EXT3_SB(sb); + int free_blocks, root_blocks, cond; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): free=%u, root=%u", + sb, free_blocks, root_blocks); + + DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks); + + cond = (free_blocks < root_blocks + 1 && + !capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))); + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d", + sb, free_blocks, root_blocks, + !capable(CAP_SYS_RESOURCE)?'1':'0', + sbi->s_resuid, current->fsuid, cond?0:1); + + return (cond ? 0 : 1); } /* @@ -1184,7 +1201,7 @@ static int ext3_has_free_blocks(struct e */ int ext3_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3) + if (!ext3_has_free_blocks(sb) || (*retries)++ > 3) return 0; jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); @@ -1240,6 +1257,8 @@ int ext3_new_blocks(handle_t *handle, st *errp = -EDQUOT; return 0; } + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto out_dlimit; sbi = EXT3_SB(sb); es = EXT3_SB(sb)->s_es; @@ -1256,7 +1275,7 @@ int ext3_new_blocks(handle_t *handle, st if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) my_rsv = &block_i->rsv_window_node; - if (!ext3_has_free_blocks(sbi)) { + if (!ext3_has_free_blocks(sb)) { *errp = -ENOSPC; goto out; } @@ -1448,6 +1467,9 @@ allocated: io_error: *errp = -EIO; out: + if (!performed_allocation) + DLIMIT_FREE_BLOCK(inode, 1); +out_dlimit: if (fatal) { *errp = fatal; ext3_std_error(sb, fatal); diff -NurpP --minimal linux-2.6.17.13/fs/ext3/file.c linux-2.6.17.13-vs2.0.2.1/fs/ext3/file.c --- linux-2.6.17.13/fs/ext3/file.c 2006-06-18 04:54:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext3/file.c 2006-08-17 00:28:21 +0200 @@ -133,5 +133,6 @@ struct inode_operations ext3_file_inode_ .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .sync_flags = ext3_sync_flags, }; diff -NurpP --minimal linux-2.6.17.13/fs/ext3/ialloc.c linux-2.6.17.13-vs2.0.2.1/fs/ext3/ialloc.c --- linux-2.6.17.13/fs/ext3/ialloc.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext3/ialloc.c 2006-08-17 00:28:21 +0200 @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include @@ -127,6 +129,7 @@ void ext3_free_inode (handle_t *handle, ext3_xattr_delete_inode(handle, inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); is_directory = S_ISDIR(inode->i_mode); @@ -443,6 +446,12 @@ struct inode *ext3_new_inode(handle_t *h inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); + + inode->i_xid = vx_current_fsxid(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto out_dlimit; + } ei = EXT3_I(inode); sbi = EXT3_SB(sb); @@ -565,7 +574,8 @@ got: ei->i_dir_start_lookup = 0; ei->i_disksize = 0; - ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; + ei->i_flags = EXT3_I(dir)->i_flags & + ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); /* dirsync only applies to directories */ @@ -620,6 +630,8 @@ got: fail: ext3_std_error(sb, err); out: + DLIMIT_FREE_INODE(inode); +out_dlimit: iput(inode); ret = ERR_PTR(err); really_out: @@ -631,6 +643,7 @@ fail_free_drop: fail_drop: DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); diff -NurpP --minimal linux-2.6.17.13/fs/ext3/inode.c linux-2.6.17.13-vs2.0.2.1/fs/ext3/inode.c --- linux-2.6.17.13/fs/ext3/inode.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext3/inode.c 2006-08-17 00:28:21 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -2563,19 +2564,77 @@ void ext3_set_inode_flags(struct inode * { unsigned int flags = EXT3_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); + + if (flags & EXT3_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & EXT3_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT3_BARRIER_FL) + inode->i_flags |= S_BARRIER; + if (flags & EXT3_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT3_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & EXT3_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; if (flags & EXT3_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT3_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; } +int ext3_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + int err = 0; + + oldflags = EXT3_I(inode)->i_flags; + newflags = oldflags & ~(EXT3_APPEND_FL | + EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL | + EXT3_BARRIER_FL | EXT3_NOATIME_FL | + EXT3_SYNC_FL | EXT3_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= EXT3_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= EXT3_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= EXT3_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= EXT3_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= EXT3_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= EXT3_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= EXT3_DIRSYNC_FL; + + if (oldflags ^ newflags) { + handle_t *handle; + struct ext3_iloc iloc; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (IS_SYNC(inode)) + handle->h_sync = 1; + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + goto flags_err; + + EXT3_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + flags_err: + ext3_journal_stop(handle); + } + return err; +} + void ext3_read_inode(struct inode * inode) { struct ext3_iloc iloc; @@ -2583,6 +2642,8 @@ void ext3_read_inode(struct inode * inod struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh; int block; + uid_t uid; + gid_t gid; #ifdef CONFIG_EXT3_FS_POSIX_ACL ei->i_acl = EXT3_ACL_NOT_CACHED; @@ -2595,12 +2656,17 @@ void ext3_read_inode(struct inode * inod bh = iloc.bh; raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if(!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -2727,6 +2793,8 @@ static int ext3_do_update_inode(handle_t struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); int err = 0, rc, block; /* For fields not not tracking in the in-memory inode, @@ -2736,29 +2804,32 @@ static int ext3_do_update_inode(handle_t raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ if(!ei->i_dtime) { raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); + cpu_to_le16(high_16_bits(uid)); raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); + cpu_to_le16(high_16_bits(gid)); } else { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); + cpu_to_le16(fs_high2lowuid(uid)); raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); + cpu_to_le16(fs_high2lowgid(gid)); raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_INOXID_INTERN + raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -2911,7 +2982,8 @@ int ext3_setattr(struct dentry *dentry, return error; if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_XID && attr->ia_xid != inode->i_xid)) { handle_t *handle; /* (user+group)*(old+new) structure, inode write (sb, @@ -2933,6 +3005,8 @@ int ext3_setattr(struct dentry *dentry, inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_XID) && IS_TAGXID(inode)) + inode->i_xid = attr->ia_xid; error = ext3_mark_inode_dirty(handle, inode); ext3_journal_stop(handle); } diff -NurpP --minimal linux-2.6.17.13/fs/ext3/ioctl.c linux-2.6.17.13-vs2.0.2.1/fs/ext3/ioctl.c --- linux-2.6.17.13/fs/ext3/ioctl.c 2006-06-18 04:54:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext3/ioctl.c 2006-08-17 00:28:21 +0200 @@ -8,11 +8,13 @@ */ #include +#include #include #include #include #include #include +#include #include @@ -36,7 +38,8 @@ int ext3_ioctl (struct inode * inode, st unsigned int oldflags; unsigned int jflag; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -60,7 +63,9 @@ int ext3_ioctl (struct inode * inode, st * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { + if ((oldflags & EXT3_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT3_APPEND_FL | + EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) { mutex_unlock(&inode->i_mutex); return -EPERM; @@ -122,7 +127,8 @@ flags_err: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(generation, (int __user *) arg)) return -EFAULT; @@ -176,7 +182,8 @@ flags_err: if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -211,7 +218,8 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(n_blocks_count, (__u32 __user *)arg)) @@ -232,7 +240,8 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, @@ -247,6 +256,38 @@ flags_err: return err; } +#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE) + case EXT3_IOC_SETXID: { + handle_t *handle; + struct ext3_iloc iloc; + int xid; + int err; + + /* fixme: if stealth, return -ENOTTY */ + if (!capable(CAP_CONTEXT)) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + if (!(inode->i_sb->s_flags & MS_TAGXID)) + return -ENOSYS; + if (get_user(xid, (int __user *) arg)) + return -EFAULT; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; + + inode->i_xid = (xid & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + ext3_journal_stop(handle); + return err; + } +#endif default: return -ENOTTY; diff -NurpP --minimal linux-2.6.17.13/fs/ext3/namei.c linux-2.6.17.13-vs2.0.2.1/fs/ext3/namei.c --- linux-2.6.17.13/fs/ext3/namei.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext3/namei.c 2006-08-17 00:28:21 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include "namei.h" #include "xattr.h" @@ -1009,6 +1010,7 @@ static struct dentry *ext3_lookup(struct if (!inode) return ERR_PTR(-EACCES); + vx_propagate_xid(nd, inode); } return d_splice_alias(inode, dentry); } @@ -2384,6 +2386,7 @@ struct inode_operations ext3_dir_inode_o .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .sync_flags = ext3_sync_flags, }; struct inode_operations ext3_special_inode_operations = { @@ -2395,4 +2398,5 @@ struct inode_operations ext3_special_ino .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .sync_flags = ext3_sync_flags, }; diff -NurpP --minimal linux-2.6.17.13/fs/ext3/super.c linux-2.6.17.13-vs2.0.2.1/fs/ext3/super.c --- linux-2.6.17.13/fs/ext3/super.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext3/super.c 2006-08-25 05:44:21 +0200 @@ -675,7 +675,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota + Opt_grpquota, Opt_tagxid }; static match_table_t tokens = { @@ -724,6 +724,7 @@ static match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, + {Opt_tagxid, "tagxid"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -816,6 +817,11 @@ static int parse_options (char *options, case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + set_opt (sbi->s_mount_opt, TAGXID); + break; +#endif case Opt_nocheck: clear_opt (sbi->s_mount_opt, CHECK); break; @@ -1470,6 +1476,9 @@ static int ext3_fill_super (struct super NULL, 0)) goto failed_mount; + if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGXID) + sb->s_flags |= MS_TAGXID; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -2273,6 +2282,12 @@ static int ext3_remount (struct super_bl if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) ext3_abort(sb, __FUNCTION__, "Abort forced by user"); + if ((sbi->s_mount_opt & EXT3_MOUNT_TAGXID) && + !(sb->s_flags & MS_TAGXID)) { + printk("EXT3-fs: %s: tagxid not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff -NurpP --minimal linux-2.6.17.13/fs/ext3/symlink.c linux-2.6.17.13-vs2.0.2.1/fs/ext3/symlink.c --- linux-2.6.17.13/fs/ext3/symlink.c 2005-08-29 22:25:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext3/symlink.c 2006-08-17 00:28:21 +0200 @@ -40,6 +40,7 @@ struct inode_operations ext3_symlink_ino .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext3_sync_flags, }; struct inode_operations ext3_fast_symlink_inode_operations = { @@ -51,4 +52,5 @@ struct inode_operations ext3_fast_symlin .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext3_sync_flags, }; diff -NurpP --minimal linux-2.6.17.13/fs/ext3/xattr.c linux-2.6.17.13-vs2.0.2.1/fs/ext3/xattr.c --- linux-2.6.17.13/fs/ext3/xattr.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ext3/xattr.c 2006-08-17 00:28:21 +0200 @@ -58,6 +58,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -495,6 +496,7 @@ ext3_xattr_release_block(handle_t *handl ext3_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); unlock_buffer(bh); ea_bdebug(bh, "refcount now=%d; releasing", @@ -763,11 +765,14 @@ inserted: if (new_bh == bs->bh) ea_bdebug(new_bh, "keeping"); else { + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto cleanup; /* The old block is released after updating the inode. */ error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) - goto cleanup; + goto cleanup_dlimit; error = ext3_journal_get_write_access(handle, new_bh); if (error) @@ -843,6 +848,8 @@ cleanup: cleanup_dquot: DQUOT_FREE_BLOCK(inode, 1); +cleanup_dlimit: + DLIMIT_FREE_BLOCK(inode, 1); goto cleanup; bad_block: diff -NurpP --minimal linux-2.6.17.13/fs/fcntl.c linux-2.6.17.13-vs2.0.2.1/fs/fcntl.c --- linux-2.6.17.13/fs/fcntl.c 2006-06-18 04:54:34 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/fcntl.c 2006-08-17 00:28:21 +0200 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -85,6 +86,8 @@ repeat: error = -EMFILE; if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; + if (!vx_files_avail(1)) + goto out; error = expand_files(files, newfd); if (error < 0) @@ -125,6 +128,7 @@ static int dupfd(struct file *file, unsi FD_SET(fd, fdt->open_fds); FD_CLR(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); + vx_openfd_inc(fd); fd_install(fd, file); } else { spin_unlock(&files->file_lock); @@ -177,6 +181,9 @@ asmlinkage long sys_dup2(unsigned int ol if (tofree) filp_close(tofree, files); + else + vx_openfd_inc(newfd); /* fd was unused */ + err = newfd; out: return err; @@ -479,7 +486,7 @@ void send_sigio(struct fown_struct *fown read_lock(&tasklist_lock); if (pid > 0) { - p = find_task_by_pid(pid); + p = find_task_by_real_pid(pid); if (p) { send_sigio_to_task(p, fown, fd, band); } @@ -514,7 +521,7 @@ int send_sigurg(struct fown_struct *fown read_lock(&tasklist_lock); if (pid > 0) { - p = find_task_by_pid(pid); + p = find_task_by_real_pid(pid); if (p) { send_sigurg_to_task(p, fown); } diff -NurpP --minimal linux-2.6.17.13/fs/file_table.c linux-2.6.17.13-vs2.0.2.1/fs/file_table.c --- linux-2.6.17.13/fs/file_table.c 2006-06-18 04:54:34 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/file_table.c 2006-08-17 00:28:21 +0200 @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include @@ -121,6 +123,8 @@ struct file *get_empty_filp(void) f->f_gid = tsk->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ + f->f_xid = vx_current_xid(); + vx_files_inc(f); return f; over: @@ -175,6 +179,8 @@ void fastcall __fput(struct file *file) fops_put(file->f_op); if (file->f_mode & FMODE_WRITE) put_write_access(inode); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file->f_dentry = NULL; file->f_vfsmnt = NULL; @@ -240,6 +246,8 @@ void put_filp(struct file *file) { if (atomic_dec_and_test(&file->f_count)) { security_file_free(file); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file_free(file); } diff -NurpP --minimal linux-2.6.17.13/fs/hfsplus/ioctl.c linux-2.6.17.13-vs2.0.2.1/fs/hfsplus/ioctl.c --- linux-2.6.17.13/fs/hfsplus/ioctl.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/hfsplus/ioctl.c 2006-08-17 00:28:21 +0200 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "hfsplus_fs.h" @@ -35,7 +36,8 @@ int hfsplus_ioctl(struct inode *inode, s flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */ return put_user(flags, (int __user *)arg); case HFSPLUS_IOC_EXT2_SETFLAGS: { - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) diff -NurpP --minimal linux-2.6.17.13/fs/inode.c linux-2.6.17.13-vs2.0.2.1/fs/inode.c --- linux-2.6.17.13/fs/inode.c 2006-06-18 04:54:35 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/inode.c 2006-08-17 00:28:21 +0200 @@ -116,6 +116,9 @@ static struct inode *alloc_inode(struct struct address_space * const mapping = &inode->i_data; inode->i_sb = sb; + + /* essential because of inode slab reuse */ + inode->i_xid = 0; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; atomic_set(&inode->i_count, 1); @@ -235,6 +238,8 @@ void __iget(struct inode * inode) inodes_stat.nr_unused--; } +EXPORT_SYMBOL_GPL(__iget); + /** * clear_inode - clear an inode * @inode: inode to clear diff -NurpP --minimal linux-2.6.17.13/fs/ioctl.c linux-2.6.17.13-vs2.0.2.1/fs/ioctl.c --- linux-2.6.17.13/fs/ioctl.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ioctl.c 2006-08-17 00:28:21 +0200 @@ -13,10 +13,19 @@ #include #include #include +#include +#include +#include #include #include + +#ifdef CONFIG_VSERVER_LEGACY +extern int vx_proc_ioctl(struct inode *, struct file *, + unsigned int, unsigned long); +#endif + static long do_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -147,6 +156,48 @@ int vfs_ioctl(struct file *filp, unsigne else error = -ENOTTY; break; +#ifdef CONFIG_VSERVER_LEGACY +#ifndef CONFIG_INOXID_NONE + case FIOC_GETXID: { + struct inode *inode = filp->f_dentry->d_inode; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (capable(CAP_CONTEXT)) + error = put_user(inode->i_xid, (int __user *) arg); + break; + } + case FIOC_SETXID: { + struct inode *inode = filp->f_dentry->d_inode; + int xid; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -ENOSYS; + if (!(inode->i_sb->s_flags & MS_TAGXID)) + break; + error = -EFAULT; + if (get_user(xid, (int __user *) arg)) + break; + error = 0; + inode->i_xid = (xid & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + break; + } +#endif + case FIOC_GETXFLG: + case FIOC_SETXFLG: + error = -ENOTTY; + if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC) + error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); + break; +#endif default: if (S_ISREG(filp->f_dentry->d_inode->i_mode)) error = file_ioctl(filp, cmd, arg); diff -NurpP --minimal linux-2.6.17.13/fs/ioprio.c linux-2.6.17.13-vs2.0.2.1/fs/ioprio.c --- linux-2.6.17.13/fs/ioprio.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/ioprio.c 2006-08-17 00:28:21 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include static int set_task_ioprio(struct task_struct *task, int ioprio) { @@ -95,7 +96,7 @@ asmlinkage long sys_ioprio_set(int which if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) break; @@ -149,7 +150,7 @@ asmlinkage long sys_ioprio_get(int which if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) break; diff -NurpP --minimal linux-2.6.17.13/fs/jfs/acl.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/acl.c --- linux-2.6.17.13/fs/jfs/acl.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/acl.c 2006-08-17 00:28:21 +0200 @@ -232,7 +232,8 @@ int jfs_setattr(struct dentry *dentry, s return rc; if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) || + (iattr->ia_valid & ATTR_XID && iattr->ia_xid != inode->i_xid)) { if (DQUOT_TRANSFER(inode, iattr)) return -EDQUOT; } diff -NurpP --minimal linux-2.6.17.13/fs/jfs/file.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/file.c --- linux-2.6.17.13/fs/jfs/file.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/file.c 2006-08-17 00:28:21 +0200 @@ -98,6 +98,7 @@ struct inode_operations jfs_file_inode_o .setattr = jfs_setattr, .permission = jfs_permission, #endif + .sync_flags = jfs_sync_flags, }; const struct file_operations jfs_file_operations = { diff -NurpP --minimal linux-2.6.17.13/fs/jfs/inode.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/inode.c --- linux-2.6.17.13/fs/jfs/inode.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/inode.c 2006-08-17 00:28:21 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -144,6 +145,7 @@ void jfs_delete_inode(struct inode *inod DQUOT_INIT(inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); } clear_inode(inode); diff -NurpP --minimal linux-2.6.17.13/fs/jfs/ioctl.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/ioctl.c --- linux-2.6.17.13/fs/jfs/ioctl.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/ioctl.c 2006-08-17 00:28:21 +0200 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -65,7 +66,8 @@ int jfs_ioctl(struct inode * inode, stru case JFS_IOC_SETFLAGS: { unsigned int oldflags; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -85,8 +87,8 @@ int jfs_ioctl(struct inode * inode, stru * the relevant capability. */ if ((oldflags & JFS_IMMUTABLE_FL) || - ((flags ^ oldflags) & - (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { + ((flags ^ oldflags) & (JFS_APPEND_FL | + JFS_IMMUTABLE_FL | JFS_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } diff -NurpP --minimal linux-2.6.17.13/fs/jfs/jfs_dinode.h linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_dinode.h --- linux-2.6.17.13/fs/jfs/jfs_dinode.h 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_dinode.h 2006-08-17 00:28:21 +0200 @@ -162,9 +162,12 @@ struct dinode { #define JFS_APPEND_FL 0x01000000 /* writes to file may only append */ #define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */ -#define JFS_FL_USER_VISIBLE 0x03F80000 +#define JFS_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define JFS_IUNLINK_FL 0x08000000 /* Immutable unlink */ + +#define JFS_FL_USER_VISIBLE 0x0FF80000 #define JFS_FL_USER_MODIFIABLE 0x03F80000 -#define JFS_FL_INHERIT 0x03C80000 +#define JFS_FL_INHERIT 0x0BC80000 /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */ #define JFS_IOC_GETFLAGS _IOR('f', 1, long) diff -NurpP --minimal linux-2.6.17.13/fs/jfs/jfs_dtree.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_dtree.c --- linux-2.6.17.13/fs/jfs/jfs_dtree.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_dtree.c 2006-08-17 00:28:21 +0200 @@ -102,6 +102,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -383,10 +384,10 @@ static u32 add_index(tid_t tid, struct i */ if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage)) goto clean_up; - if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { - DQUOT_FREE_BLOCK(ip, sbi->nbperpage); - goto clean_up; - } + if (DLIMIT_ALLOC_BLOCK(ip, sbi->nbperpage)) + goto clean_up_quota; + if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) + goto clean_up_dlim; /* * Save the table, we're going to overwrite it with the @@ -479,6 +480,10 @@ static u32 add_index(tid_t tid, struct i return index; + clean_up_dlim: + DLIMIT_FREE_BLOCK(ip, sbi->nbperpage); + clean_up_quota: + DQUOT_FREE_BLOCK(ip, sbi->nbperpage); clean_up: jfs_ip->next_index--; @@ -952,6 +957,7 @@ static int dtSplitUp(tid_t tid, struct tlock *tlck; struct lv *lv; int quota_allocation = 0; + int dlimit_allocation = 0; /* get split page */ smp = split->mp; @@ -1036,6 +1042,12 @@ static int dtSplitUp(tid_t tid, } quota_allocation += n; + if (DLIMIT_ALLOC_BLOCK(ip, n)) { + rc = -ENOSPC; + goto extendOut; + } + dlimit_allocation += n; + if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, (s64) n, &nxaddr))) goto extendOut; @@ -1309,6 +1321,9 @@ static int dtSplitUp(tid_t tid, freeKeyName: kfree(key.name); + /* Rollback dlimit allocation */ + if (rc && dlimit_allocation) + DLIMIT_FREE_BLOCK(ip, dlimit_allocation); /* Rollback quota allocation */ if (rc && quota_allocation) DQUOT_FREE_BLOCK(ip, quota_allocation); @@ -1376,6 +1391,12 @@ static int dtSplitPage(tid_t tid, struct release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); @@ -1926,6 +1947,12 @@ static int dtSplitRoot(tid_t tid, release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } BT_MARK_DIRTY(rmp, ip); /* @@ -2292,6 +2319,8 @@ static int dtDeleteUp(tid_t tid, struct xlen = lengthPXD(&fp->header.self); + /* Free dlimit allocation. */ + DLIMIT_FREE_BLOCK(ip, xlen); /* Free quota allocation. */ DQUOT_FREE_BLOCK(ip, xlen); @@ -2368,6 +2397,8 @@ static int dtDeleteUp(tid_t tid, struct xlen = lengthPXD(&p->header.self); + /* Free dlimit allocation */ + DLIMIT_FREE_BLOCK(ip, xlen); /* Free quota allocation */ DQUOT_FREE_BLOCK(ip, xlen); diff -NurpP --minimal linux-2.6.17.13/fs/jfs/jfs_extent.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_extent.c --- linux-2.6.17.13/fs/jfs/jfs_extent.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_extent.c 2006-08-17 00:28:21 +0200 @@ -18,6 +18,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_superblock.h" @@ -146,6 +147,13 @@ extAlloc(struct inode *ip, s64 xlen, s64 mutex_unlock(&JFS_IP(ip)->commit_mutex); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) { + DQUOT_FREE_BLOCK(ip, nxlen); + dbFree(ip, nxaddr, (s64) nxlen); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + return -ENOSPC; + } /* determine the value of the extent flag */ xflag = (abnr == TRUE) ? XAD_NOTRECORDED : 0; @@ -164,6 +172,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 */ if (rc) { dbFree(ip, nxaddr, nxlen); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); @@ -261,6 +270,13 @@ int extRealloc(struct inode *ip, s64 nxl mutex_unlock(&JFS_IP(ip)->commit_mutex); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) { + DQUOT_FREE_BLOCK(ip, nxlen); + dbFree(ip, nxaddr, (s64) nxlen); + up(&JFS_IP(ip)->commit_sem); + return -ENOSPC; + } delta = nxlen - xlen; @@ -297,6 +313,7 @@ int extRealloc(struct inode *ip, s64 nxl /* extend the extent */ if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { dbFree(ip, xaddr + xlen, delta); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); goto exit; } @@ -308,6 +325,7 @@ int extRealloc(struct inode *ip, s64 nxl */ if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { dbFree(ip, nxaddr, nxlen); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); goto exit; } diff -NurpP --minimal linux-2.6.17.13/fs/jfs/jfs_filsys.h linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_filsys.h --- linux-2.6.17.13/fs/jfs/jfs_filsys.h 2005-10-28 20:49:44 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_filsys.h 2006-08-17 00:28:21 +0200 @@ -84,6 +84,7 @@ #define JFS_DIR_INDEX 0x00200000 /* Persistant index for */ /* directory entries */ +#define JFS_TAGXID 0x00800000 /* xid tagging */ /* * buffer cache configuration diff -NurpP --minimal linux-2.6.17.13/fs/jfs/jfs_imap.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_imap.c --- linux-2.6.17.13/fs/jfs/jfs_imap.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_imap.c 2006-08-17 00:28:21 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" @@ -3075,6 +3076,8 @@ static int copy_from_dinode(struct dinod { struct jfs_inode_info *jfs_ip = JFS_IP(ip); struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + uid_t uid; + gid_t gid; jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); @@ -3094,14 +3097,18 @@ static int copy_from_dinode(struct dinod } ip->i_nlink = le32_to_cpu(dip->di_nlink); - jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); + uid = le32_to_cpu(dip->di_uid); + gid = le32_to_cpu(dip->di_gid); + ip->i_xid = INOXID_XID(XID_TAG(ip), uid, gid, 0); + + jfs_ip->saved_uid = INOXID_UID(XID_TAG(ip), uid, gid); if (sbi->uid == -1) ip->i_uid = jfs_ip->saved_uid; else { ip->i_uid = sbi->uid; } - jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); + jfs_ip->saved_gid = INOXID_GID(XID_TAG(ip), uid, gid); if (sbi->gid == -1) ip->i_gid = jfs_ip->saved_gid; else { @@ -3167,14 +3174,12 @@ static void copy_to_dinode(struct dinode dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - if (sbi->uid == -1) - dip->di_uid = cpu_to_le32(ip->i_uid); - else - dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); - if (sbi->gid == -1) - dip->di_gid = cpu_to_le32(ip->i_gid); - else - dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); + + dip->di_uid = cpu_to_le32(XIDINO_UID(XID_TAG(ip), + (sbi->uid == -1) ? ip->i_uid : jfs_ip->saved_uid, ip->i_xid)); + dip->di_gid = cpu_to_le32(XIDINO_GID(XID_TAG(ip), + (sbi->gid == -1) ? ip->i_gid : jfs_ip->saved_gid, ip->i_xid)); + /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones diff -NurpP --minimal linux-2.6.17.13/fs/jfs/jfs_inode.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_inode.c --- linux-2.6.17.13/fs/jfs/jfs_inode.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_inode.c 2006-08-17 00:28:21 +0200 @@ -18,6 +18,8 @@ #include #include +#include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -30,19 +32,59 @@ void jfs_set_inode_flags(struct inode *i { unsigned int flags = JFS_IP(inode)->mode2; - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | - S_NOATIME | S_DIRSYNC | S_SYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); if (flags & JFS_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & JFS_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & JFS_BARRIER_FL) + inode->i_flags |= S_BARRIER; + + if (flags & JFS_SYNC_FL) + inode->i_flags |= S_SYNC; if (flags & JFS_APPEND_FL) inode->i_flags |= S_APPEND; if (flags & JFS_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & JFS_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; - if (flags & JFS_SYNC_FL) - inode->i_flags |= S_SYNC; +} + +int jfs_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + + oldflags = JFS_IP(inode)->mode2; + newflags = oldflags & ~(JFS_APPEND_FL | + JFS_IMMUTABLE_FL | JFS_IUNLINK_FL | + JFS_BARRIER_FL | JFS_NOATIME_FL | + JFS_SYNC_FL | JFS_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= JFS_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= JFS_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= JFS_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= JFS_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= JFS_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= JFS_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= JFS_DIRSYNC_FL; + + if (oldflags ^ newflags) { + JFS_IP(inode)->mode2 = newflags; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + return 0; } /* @@ -89,10 +131,17 @@ struct inode *ialloc(struct inode *paren jfs_inode->saved_uid = inode->i_uid; jfs_inode->saved_gid = inode->i_gid; + inode->i_xid = vx_current_fsxid(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + iput(inode); + return NULL; + } + /* * Allocate inode to quota. */ if (DQUOT_ALLOC_INODE(inode)) { + DLIMIT_FREE_INODE(inode); DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; diff -NurpP --minimal linux-2.6.17.13/fs/jfs/jfs_inode.h linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_inode.h --- linux-2.6.17.13/fs/jfs/jfs_inode.h 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_inode.h 2006-08-17 00:28:21 +0200 @@ -31,6 +31,7 @@ extern void jfs_truncate(struct inode *) extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); +extern int jfs_sync_flags(struct inode *); extern void jfs_set_inode_flags(struct inode *); extern struct address_space_operations jfs_aops; diff -NurpP --minimal linux-2.6.17.13/fs/jfs/jfs_xtree.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_xtree.c --- linux-2.6.17.13/fs/jfs/jfs_xtree.c 2006-01-03 17:29:57 +0100 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/jfs_xtree.c 2006-08-17 00:28:21 +0200 @@ -21,6 +21,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" @@ -841,7 +842,12 @@ int xtInsert(tid_t tid, /* transaction hint = 0; if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) goto out; + if ((rc = DLIMIT_ALLOC_BLOCK(ip, xlen))) { + DQUOT_FREE_BLOCK(ip, xlen); + goto out; + } if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { + DLIMIT_FREE_BLOCK(ip, xlen); DQUOT_FREE_BLOCK(ip, xlen); goto out; } @@ -871,6 +877,7 @@ int xtInsert(tid_t tid, /* transaction /* undo data extent allocation */ if (*xaddrp == 0) { dbFree(ip, xaddr, (s64) xlen); + DLIMIT_FREE_BLOCK(ip, xlen); DQUOT_FREE_BLOCK(ip, xlen); } return rc; @@ -1231,6 +1238,7 @@ xtSplitPage(tid_t tid, struct inode *ip, struct tlock *tlck; struct xtlock *sxtlck = NULL, *rxtlck = NULL; int quota_allocation = 0; + int dlimit_allocation = 0; smp = split->mp; sp = XT_PAGE(ip, smp); @@ -1250,6 +1258,13 @@ xtSplitPage(tid_t tid, struct inode *ip, quota_allocation += lengthPXD(pxd); + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + rc = -ENOSPC; + goto clean_up; + } + dlimit_allocation += lengthPXD(pxd); + /* * allocate the new right page for the split */ @@ -1451,6 +1466,9 @@ xtSplitPage(tid_t tid, struct inode *ip, clean_up: + /* Rollback dlimit allocation. */ + if (dlimit_allocation) + DLIMIT_FREE_BLOCK(ip, dlimit_allocation); /* Rollback quota allocation. */ if (quota_allocation) DQUOT_FREE_BLOCK(ip, quota_allocation); @@ -1515,6 +1533,12 @@ xtSplitRoot(tid_t tid, release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); @@ -3941,6 +3965,8 @@ s64 xtTruncate(tid_t tid, struct inode * else ip->i_size = newsize; + /* update dlimit allocation to reflect freed blocks */ + DLIMIT_FREE_BLOCK(ip, nfreed); /* update quota allocation to reflect freed blocks */ DQUOT_FREE_BLOCK(ip, nfreed); diff -NurpP --minimal linux-2.6.17.13/fs/jfs/namei.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/namei.c --- linux-2.6.17.13/fs/jfs/namei.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/namei.c 2006-08-17 00:28:21 +0200 @@ -20,6 +20,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_inode.h" @@ -1465,6 +1466,7 @@ static struct dentry *jfs_lookup(struct return ERR_PTR(-EACCES); } + vx_propagate_xid(nd, ip); dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) @@ -1517,6 +1519,7 @@ struct inode_operations jfs_dir_inode_op .setattr = jfs_setattr, .permission = jfs_permission, #endif + .sync_flags = jfs_sync_flags, }; const struct file_operations jfs_dir_operations = { diff -NurpP --minimal linux-2.6.17.13/fs/jfs/super.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/super.c --- linux-2.6.17.13/fs/jfs/super.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/super.c 2006-08-17 00:28:21 +0200 @@ -194,7 +194,8 @@ static void jfs_put_super(struct super_b enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask + Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask, + Opt_tagxid }; static match_table_t tokens = { @@ -204,6 +205,7 @@ static match_table_t tokens = { {Opt_resize, "resize=%u"}, {Opt_resize_nosize, "resize"}, {Opt_errors, "errors=%s"}, + {Opt_tagxid, "tagxid"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, {Opt_usrquota, "usrquota"}, @@ -338,6 +340,11 @@ static int parse_options(char *options, } break; } +#ifndef CONFIG_TAGGING_NONE + case Opt_tagxid: + *flag |= JFS_TAGXID; + break; +#endif default: printk("jfs: Unrecognized mount option \"%s\" " " or missing value\n", p); @@ -368,6 +375,13 @@ static int jfs_remount(struct super_bloc if (!parse_options(data, sb, &newLVSize, &flag)) { return -EINVAL; } + + if ((flag & JFS_TAGXID) && !(sb->s_flags & MS_TAGXID)) { + printk(KERN_ERR "JFS: %s: tagxid not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } + if (newLVSize) { if (sb->s_flags & MS_RDONLY) { printk(KERN_ERR @@ -439,6 +453,9 @@ static int jfs_fill_super(struct super_b #ifdef CONFIG_JFS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif + /* map mount option tagxid */ + if (sbi->flag & JFS_TAGXID) + sb->s_flags |= MS_TAGXID; if (newLVSize) { printk(KERN_ERR "resize option for remount only\n"); diff -NurpP --minimal linux-2.6.17.13/fs/jfs/xattr.c linux-2.6.17.13-vs2.0.2.1/fs/jfs/xattr.c --- linux-2.6.17.13/fs/jfs/xattr.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/jfs/xattr.c 2006-08-17 00:28:21 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_dmap.h" @@ -263,9 +264,16 @@ static int ea_write(struct inode *ip, st if (DQUOT_ALLOC_BLOCK(ip, nblocks)) { return -EDQUOT; } + /* Allocate new blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nblocks)) { + DQUOT_FREE_BLOCK(ip, nblocks); + return -ENOSPC; + } rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno); if (rc) { + /*Rollback dlimit allocation. */ + DLIMIT_FREE_BLOCK(ip, nblocks); /*Rollback quota allocation. */ DQUOT_FREE_BLOCK(ip, nblocks); return rc; @@ -332,6 +340,8 @@ static int ea_write(struct inode *ip, st failed: /* Rollback quota allocation. */ + DLIMIT_FREE_BLOCK(ip, nblocks); + /* Rollback quota allocation. */ DQUOT_FREE_BLOCK(ip, nblocks); dbFree(ip, blkno, nblocks); @@ -468,6 +478,7 @@ static int ea_get(struct inode *inode, s s64 blkno; int rc; int quota_allocation = 0; + int dlimit_allocation = 0; /* When fsck.jfs clears a bad ea, it doesn't clear the size */ if (ji->ea.flag == 0) @@ -543,6 +554,12 @@ static int ea_get(struct inode *inode, s quota_allocation = blocks_needed; + /* Allocate new blocks to dlimit. */ + rc = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, blocks_needed)) + goto clean_up; + dlimit_allocation = blocks_needed; + rc = dbAlloc(inode, INOHINT(inode), (s64) blocks_needed, &blkno); if (rc) @@ -599,6 +616,9 @@ static int ea_get(struct inode *inode, s return ea_size; clean_up: + /* Rollback dlimit allocation */ + if (dlimit_allocation) + DLIMIT_FREE_BLOCK(inode, dlimit_allocation); /* Rollback quota allocation */ if (quota_allocation) DQUOT_FREE_BLOCK(inode, quota_allocation); @@ -675,8 +695,10 @@ static int ea_put(tid_t tid, struct inod } /* If old blocks exist, they must be removed from quota allocation. */ - if (old_blocks) + if (old_blocks) { + DLIMIT_FREE_BLOCK(inode, old_blocks); DQUOT_FREE_BLOCK(inode, old_blocks); + } inode->i_ctime = CURRENT_TIME; diff -NurpP --minimal linux-2.6.17.13/fs/libfs.c linux-2.6.17.13-vs2.0.2.1/fs/libfs.c --- linux-2.6.17.13/fs/libfs.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/libfs.c 2006-08-17 00:28:21 +0200 @@ -124,7 +124,8 @@ static inline unsigned char dt_type(stru * both impossible due to the lock on directory. */ -int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) +static inline int do_dcache_readdir_filter(struct file * filp, + void * dirent, filldir_t filldir, int (*filter)(struct dentry *dentry)) { struct dentry *dentry = filp->f_dentry; struct dentry *cursor = filp->private_data; @@ -158,6 +159,8 @@ int dcache_readdir(struct file * filp, v next = list_entry(p, struct dentry, d_u.d_child); if (d_unhashed(next) || !next->d_inode) continue; + if (filter && !filter(next)) + continue; spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) @@ -174,6 +177,18 @@ int dcache_readdir(struct file * filp, v return 0; } +int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + return do_dcache_readdir_filter(filp, dirent, filldir, NULL); +} + +int dcache_readdir_filter(struct file * filp, void * dirent, filldir_t filldir, + int (*filter)(struct dentry *)) +{ + return do_dcache_readdir_filter(filp, dirent, filldir, filter); +} + + ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) { return -EISDIR; @@ -623,6 +638,7 @@ EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); EXPORT_SYMBOL(dcache_readdir); +EXPORT_SYMBOL(dcache_readdir_filter); EXPORT_SYMBOL(generic_read_dir); EXPORT_SYMBOL(get_sb_pseudo); EXPORT_SYMBOL(simple_commit_write); diff -NurpP --minimal linux-2.6.17.13/fs/lockd/clntproc.c linux-2.6.17.13-vs2.0.2.1/fs/lockd/clntproc.c --- linux-2.6.17.13/fs/lockd/clntproc.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/lockd/clntproc.c 2006-08-17 00:28:21 +0200 @@ -18,6 +18,7 @@ #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_CLIENT #define NLMCLNT_GRACE_WAIT (5*HZ) @@ -130,11 +131,11 @@ static void nlmclnt_setlockargs(struct n nlmclnt_next_cookie(&argp->cookie); argp->state = nsm_local_state; memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh)); - lock->caller = system_utsname.nodename; + lock->caller = vx_new_uts(nodename); lock->oh.data = req->a_owner; lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", (unsigned int)fl->fl_u.nfs_fl.owner->pid, - system_utsname.nodename); + vx_new_uts(nodename)); lock->svid = fl->fl_u.nfs_fl.owner->pid; lock->fl.fl_start = fl->fl_start; lock->fl.fl_end = fl->fl_end; diff -NurpP --minimal linux-2.6.17.13/fs/locks.c linux-2.6.17.13-vs2.0.2.1/fs/locks.c --- linux-2.6.17.13/fs/locks.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/locks.c 2006-09-12 17:57:43 +0200 @@ -125,6 +125,7 @@ #include #include #include +#include #include #include @@ -147,6 +148,8 @@ static kmem_cache_t *filelock_cache __re /* Allocate an empty lock structure. */ static struct file_lock *locks_alloc_lock(void) { + if (!vx_locks_avail(1)) + return NULL; return kmem_cache_alloc(filelock_cache, SLAB_KERNEL); } @@ -172,6 +175,7 @@ static void locks_free_lock(struct file_ BUG_ON(!list_empty(&fl->fl_block)); BUG_ON(!list_empty(&fl->fl_link)); + vx_locks_dec(fl); locks_release_private(fl); kmem_cache_free(filelock_cache, fl); } @@ -191,6 +195,7 @@ void locks_init_lock(struct file_lock *f fl->fl_start = fl->fl_end = 0; fl->fl_ops = NULL; fl->fl_lmops = NULL; + fl->fl_xid = -1; } EXPORT_SYMBOL(locks_init_lock); @@ -248,6 +253,7 @@ void locks_copy_lock(struct file_lock *n new->fl_file = fl->fl_file; new->fl_ops = fl->fl_ops; new->fl_lmops = fl->fl_lmops; + new->fl_xid = fl->fl_xid; locks_copy_private(new, fl); } @@ -286,6 +292,9 @@ static int flock_make_lock(struct file * fl->fl_flags = FL_FLOCK; fl->fl_type = type; fl->fl_end = OFFSET_MAX; + + fl->fl_xid = filp->f_xid; + vx_locks_inc(fl); *lock = fl; return 0; @@ -451,6 +460,7 @@ static int lease_init(struct file *filp, fl->fl_owner = current->files; fl->fl_pid = current->tgid; + fl->fl_xid = vx_current_xid(); fl->fl_file = filp; fl->fl_flags = FL_LEASE; @@ -470,6 +480,8 @@ static int lease_alloc(struct file *filp if (fl == NULL) goto out; + fl->fl_xid = vx_current_xid(); + vx_locks_inc(fl); error = lease_init(filp, type, fl); if (error) { locks_free_lock(fl); @@ -780,6 +792,7 @@ static int flock_lock_file(struct file * goto out; } locks_copy_lock(new_fl, request); + vx_locks_inc(new_fl); locks_insert_lock(&inode->i_flock, new_fl); new_fl = NULL; error = 0; @@ -791,7 +804,8 @@ out: return error; } -static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock) +static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, + struct file_lock *conflock, xid_t xid) { struct file_lock *fl; struct file_lock *new_fl, *new_fl2; @@ -805,7 +819,11 @@ static int __posix_lock_file_conf(struct * so we get them in advance to avoid races. */ new_fl = locks_alloc_lock(); + new_fl->fl_xid = xid; + vx_locks_inc(new_fl); new_fl2 = locks_alloc_lock(); + new_fl2->fl_xid = xid; + vx_locks_inc(new_fl2); lock_kernel(); if (request->fl_type != F_UNLCK) { @@ -986,7 +1004,8 @@ static int __posix_lock_file_conf(struct */ int posix_lock_file(struct file *filp, struct file_lock *fl) { - return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, NULL); + return __posix_lock_file_conf(filp->f_dentry->d_inode, + fl, NULL, filp->f_xid); } EXPORT_SYMBOL(posix_lock_file); @@ -1001,7 +1020,8 @@ EXPORT_SYMBOL(posix_lock_file); int posix_lock_file_conf(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { - return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, conflock); + return __posix_lock_file_conf(filp->f_dentry->d_inode, + fl, conflock, filp->f_xid); } EXPORT_SYMBOL(posix_lock_file_conf); @@ -1091,7 +1111,7 @@ int locks_mandatory_area(int read_write, fl.fl_end = offset + count - 1; for (;;) { - error = __posix_lock_file_conf(inode, &fl, NULL); + error = __posix_lock_file_conf(inode, &fl, NULL, filp->f_xid); if (error != -EAGAIN) break; if (!(fl.fl_flags & FL_SLEEP)) @@ -1653,6 +1673,9 @@ int fcntl_setlk(unsigned int fd, struct if (file_lock == NULL) return -ENOLCK; + file_lock->fl_xid = filp->f_xid; + vx_locks_inc(file_lock); + /* * This might block, so we do it before checking the inode. */ @@ -1796,6 +1819,9 @@ int fcntl_setlk64(unsigned int fd, struc if (file_lock == NULL) return -ENOLCK; + file_lock->fl_xid = filp->f_xid; + vx_locks_inc(file_lock); + /* * This might block, so we do it before checking the inode. */ @@ -2106,6 +2132,10 @@ int get_locks_status(char *buffer, char list_for_each(tmp, &file_lock_list) { struct list_head *btmp; struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + + if (!vx_check(fl->fl_xid, VX_IDENT|VX_WATCH)) + continue; + lock_get_status(q, fl, ++i, ""); move_lock_status(&q, &pos, offset); diff -NurpP --minimal linux-2.6.17.13/fs/namei.c linux-2.6.17.13-vs2.0.2.1/fs/namei.c --- linux-2.6.17.13/fs/namei.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/namei.c 2006-09-12 15:26:43 +0200 @@ -32,6 +32,9 @@ #include #include #include +#include +#include +#include #include #include @@ -225,6 +228,31 @@ int generic_permission(struct inode *ino return -EACCES; } +static inline int vx_barrier(struct inode *inode) +{ + if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) { + vxwprintk(1, "xid=%d did hit the barrier.", + vx_current_xid()); + return 1; + } + return 0; +} + +static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + if (vx_barrier(inode)) + return -EACCES; + if (inode->i_xid == 0) + return 0; + if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT)) + return 0; + + vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_xid, inode->i_ino, + vxd_cond_path(nd)); + return -EACCES; +} + int permission(struct inode *inode, int mask, struct nameidata *nd) { int retval, submask; @@ -235,7 +263,7 @@ int permission(struct inode *inode, int /* * Nobody gets write access to a read-only fs. */ - if (IS_RDONLY(inode) && + if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt))) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; @@ -249,6 +277,8 @@ int permission(struct inode *inode, int /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; + if ((retval = xid_permission(inode, mask, nd))) + return retval; if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, submask, nd); else @@ -403,6 +433,8 @@ static int exec_permission_lite(struct i { umode_t mode = inode->i_mode; + if (vx_barrier(inode)) + return -EACCES; if (inode->i_op && inode->i_op->permission) return -EAGAIN; @@ -705,7 +737,8 @@ static __always_inline void follow_dotdo if (nd->dentry == current->fs->root && nd->mnt == current->fs->rootmnt) { read_unlock(¤t->fs->lock); - break; + /* for sane '/' avoid follow_mount() */ + return; } read_unlock(¤t->fs->lock); spin_lock(&dcache_lock); @@ -742,16 +775,34 @@ static int do_lookup(struct nameidata *n { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); + struct inode *inode; if (!dentry) goto need_lookup; if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; + inode = dentry->d_inode; + if (!inode) + goto done; + if (!vx_check(inode->i_xid, VX_WATCH|VX_ADMIN|VX_HOSTID|VX_IDENT)) + goto hidden; + if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) { + struct proc_dir_entry *de = PDE(inode); + + if (de && !vx_hide_check(0, de->vx_flags)) + goto hidden; + } done: path->mnt = mnt; path->dentry = dentry; __follow_mount(path); return 0; +hidden: + vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_xid, inode->i_ino, + vxd_path(dentry, mnt)); + dput(dentry); + return -ENOENT; need_lookup: dentry = real_lookup(nd->dentry, name, nd); @@ -1349,7 +1400,8 @@ static inline int check_sticky(struct in * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir,struct dentry *victim,int isdir) +static int may_delete(struct inode *dir, struct dentry *victim, + int isdir, struct nameidata *nd) { int error; @@ -1359,13 +1411,13 @@ static int may_delete(struct inode *dir, BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(victim->d_name.name, victim->d_inode, dir->i_ino); - error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); + error = permission(dir,MAY_WRITE | MAY_EXEC, nd); if (error) return error; if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IXORUNLINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -1512,7 +1564,8 @@ int may_open(struct nameidata *nd, int a return -EACCES; flag &= ~O_TRUNC; - } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) + } else if ((IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt)) + && (flag & FMODE_WRITE)) return -EROFS; /* * An append-only file must be opened in append mode for writing. @@ -1790,9 +1843,10 @@ fail: } EXPORT_SYMBOL_GPL(lookup_create); -int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +int vfs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t dev, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1842,11 +1896,12 @@ asmlinkage long sys_mknodat(int dfd, con error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode, - new_decode_dev(dev)); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + new_decode_dev(dev), &nd); break; case S_IFIFO: case S_IFSOCK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + 0, &nd); break; case S_IFDIR: error = -EPERM; @@ -1869,9 +1924,10 @@ asmlinkage long sys_mknod(const char __u return sys_mknodat(AT_FDCWD, filename, mode, dev); } -int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +int vfs_mkdir(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1910,7 +1966,8 @@ asmlinkage long sys_mkdirat(int dfd, con if (!IS_ERR(dentry)) { if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; - error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); + error = vfs_mkdir(nd.dentry->d_inode, dentry, + mode, &nd); dput(dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -1955,9 +2012,10 @@ void dentry_unhash(struct dentry *dentry spin_unlock(&dcache_lock); } -int vfs_rmdir(struct inode *dir, struct dentry *dentry) +int vfs_rmdir(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 1); + int error = may_delete(dir, dentry, 1, nd); if (error) return error; @@ -2018,7 +2076,7 @@ static long do_rmdir(int dfd, const char dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); + error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd); dput(dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2034,9 +2092,10 @@ asmlinkage long sys_rmdir(const char __u return do_rmdir(AT_FDCWD, pathname); } -int vfs_unlink(struct inode *dir, struct dentry *dentry) +int vfs_unlink(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 0); + int error = may_delete(dir, dentry, 0, nd); if (error) return error; @@ -2098,7 +2157,7 @@ static long do_unlinkat(int dfd, const c inode = dentry->d_inode; if (inode) atomic_inc(&inode->i_count); - error = vfs_unlink(nd.dentry->d_inode, dentry); + error = vfs_unlink(nd.dentry->d_inode, dentry, &nd); exit2: dput(dentry); } @@ -2133,9 +2192,10 @@ asmlinkage long sys_unlink(const char __ return do_unlinkat(AT_FDCWD, pathname); } -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) +int vfs_symlink(struct inode *dir, struct dentry *dentry, + const char *oldname, int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -2176,7 +2236,8 @@ asmlinkage long sys_symlinkat(const char dentry = lookup_create(&nd, 0); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); + error = vfs_symlink(nd.dentry->d_inode, dentry, + from, S_IALLUGO, &nd); dput(dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2193,7 +2254,8 @@ asmlinkage long sys_symlink(const char _ return sys_symlinkat(oldname, AT_FDCWD, newname); } -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) +int vfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry, struct nameidata *nd) { struct inode *inode = old_dentry->d_inode; int error; @@ -2201,7 +2263,7 @@ int vfs_link(struct dentry *old_dentry, if (!inode) return -ENOENT; - error = may_create(dir, new_dentry, NULL); + error = may_create(dir, new_dentry, nd); if (error) return error; @@ -2211,7 +2273,7 @@ int vfs_link(struct dentry *old_dentry, /* * A link to an append-only or immutable file cannot be created. */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return -EPERM; if (!dir->i_op || !dir->i_op->link) return -EPERM; @@ -2268,7 +2330,8 @@ asmlinkage long sys_linkat(int olddfd, c new_dentry = lookup_create(&nd, 0); error = PTR_ERR(new_dentry); if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); + error = vfs_link(old_nd.dentry, nd.dentry->d_inode, + new_dentry, &nd); dput(new_dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2400,14 +2463,14 @@ int vfs_rename(struct inode *old_dir, st if (old_dentry->d_inode == new_dentry->d_inode) return 0; - error = may_delete(old_dir, old_dentry, is_dir); + error = may_delete(old_dir, old_dentry, is_dir, NULL); if (error) return error; if (!new_dentry->d_inode) error = may_create(new_dir, new_dentry, NULL); else - error = may_delete(new_dir, new_dentry, is_dir); + error = may_delete(new_dir, new_dentry, is_dir, NULL); if (error) return error; @@ -2485,6 +2548,9 @@ static int do_rename(int olddfd, const c error = -EINVAL; if (old_dentry == trap) goto exit4; + error = -EROFS; + if (MNT_IS_RDONLY(newnd.mnt)) + goto exit4; new_dentry = lookup_hash(&newnd); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) diff -NurpP --minimal linux-2.6.17.13/fs/namespace.c linux-2.6.17.13-vs2.0.2.1/fs/namespace.c --- linux-2.6.17.13/fs/namespace.c 2006-06-18 04:54:37 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/namespace.c 2006-09-03 18:30:49 +0200 @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include #include "pnode.h" @@ -241,6 +243,7 @@ static struct vfsmount *clone_mnt(struct mnt->mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; + mnt->mnt_xid = old->mnt_xid; if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -349,43 +352,85 @@ static inline void mangle(struct seq_fil seq_escape(m, s, " \t\n\\"); } +static int mnt_is_reachable(struct vfsmount *mnt) +{ + struct vfsmount *root_mnt; + struct dentry *root, *point; + int ret; + + if (mnt == mnt->mnt_namespace->root) + return 1; + + spin_lock(&dcache_lock); + root_mnt = current->fs->rootmnt; + root = current->fs->root; + point = root; + + while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) { + point = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + } + + ret = (mnt == root_mnt) && is_subdir(point, root); + + spin_unlock(&dcache_lock); + + return ret; +} + static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = v; int err = 0; static struct proc_fs_info { - int flag; - char *str; + int s_flag; + int mnt_flag; + char *set_str; + char *unset_str; } fs_info[] = { - { MS_SYNCHRONOUS, ",sync" }, - { MS_DIRSYNC, ",dirsync" }, - { MS_MANDLOCK, ",mand" }, - { 0, NULL } - }; - static struct proc_fs_info mnt_info[] = { - { MNT_NOSUID, ",nosuid" }, - { MNT_NODEV, ",nodev" }, - { MNT_NOEXEC, ",noexec" }, - { MNT_NOATIME, ",noatime" }, - { MNT_NODIRATIME, ",nodiratime" }, - { 0, NULL } + { MS_RDONLY, MNT_RDONLY, "ro", "rw" }, + { MS_SYNCHRONOUS, 0, ",sync", NULL }, + { MS_DIRSYNC, 0, ",dirsync", NULL }, + { MS_MANDLOCK, 0, ",mand", NULL }, + { MS_TAGXID, 0, ",tagxid", NULL }, + { MS_NOATIME, MNT_NOATIME, ",noatime", NULL }, + { MS_NODIRATIME, MNT_NODIRATIME, ",nodiratime", NULL }, + { 0, MNT_NOSUID, ",nosuid", NULL }, + { 0, MNT_NODEV, ",nodev", NULL }, + { 0, MNT_NOEXEC, ",noexec", NULL }, + { 0, 0, NULL, NULL } }; - struct proc_fs_info *fs_infop; + struct proc_fs_info *p; + unsigned long s_flags = mnt->mnt_sb->s_flags; + int mnt_flags = mnt->mnt_flags; - mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); - seq_putc(m, ' '); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); - seq_putc(m, ' '); - mangle(m, mnt->mnt_sb->s_type->name); - seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); - for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_sb->s_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!mnt_is_reachable(mnt)) + return 0; + + if (!vx_check(0, VX_ADMIN|VX_WATCH) && + mnt == current->fs->rootmnt) { + seq_puts(m, "/dev/root / "); + } else { + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + seq_putc(m, ' '); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); } - for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); + mangle(m, mnt->mnt_sb->s_type->name); + seq_putc(m, ' '); + for (p = fs_info; (p->s_flag | p->mnt_flag) ; p++) { + if ((s_flags & p->s_flag) || (mnt_flags & p->mnt_flag)) { + if (p->set_str) + seq_puts(m, p->set_str); + } else { + if (p->unset_str) + seq_puts(m, p->unset_str); + } } + if (mnt->mnt_flags & MNT_XID) + seq_printf(m, ",xid=%d", mnt->mnt_xid); if (mnt->mnt_sb->s_op->show_options) err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); @@ -404,17 +449,27 @@ static int show_vfsstat(struct seq_file struct vfsmount *mnt = v; int err = 0; - /* device */ - if (mnt->mnt_devname) { - seq_puts(m, "device "); - mangle(m, mnt->mnt_devname); - } else - seq_puts(m, "no device"); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!mnt_is_reachable(mnt) && !vx_check(0, VX_WATCH)) + return 0; - /* mount point */ - seq_puts(m, " mounted on "); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); - seq_putc(m, ' '); + if (!vx_check(0, VX_ADMIN|VX_WATCH) && + mnt == current->fs->rootmnt) { + seq_puts(m, "device /dev/root mounted on / "); + } else { + /* device */ + if (mnt->mnt_devname) { + seq_puts(m, "device "); + mangle(m, mnt->mnt_devname); + } else + seq_puts(m, "no device"); + + /* mount point */ + seq_puts(m, " mounted on "); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + } /* file system type */ seq_puts(m, "with fstype "); @@ -646,7 +701,7 @@ asmlinkage long sys_umount(char __user * goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) goto dput_and_out; retval = do_umount(nd.mnt, flags); @@ -670,7 +725,7 @@ asmlinkage long sys_oldumount(char __use static int mount_is_safe(struct nameidata *nd) { - if (capable(CAP_SYS_ADMIN)) + if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) return 0; return -EPERM; #ifdef notyet @@ -899,11 +954,13 @@ static int do_change_type(struct nameida /* * do loopback mount. */ -static int do_loopback(struct nameidata *nd, char *old_name, int recurse) +static int do_loopback(struct nameidata *nd, char *old_name, xid_t xid, + unsigned long flags, int mnt_flags) { struct nameidata old_nd; struct vfsmount *mnt = NULL; int err = mount_is_safe(nd); + int recurse = flags & MS_REC; if (err) return err; if (!old_name || !*old_name) @@ -929,6 +986,12 @@ static int do_loopback(struct nameidata if (!mnt) goto out; + mnt->mnt_flags = mnt_flags; + if (flags & MS_XID) { + mnt->mnt_xid = xid; + mnt->mnt_flags |= MNT_XID; + } + err = graft_tree(mnt, nd); if (err) { LIST_HEAD(umount_list); @@ -937,6 +1000,7 @@ static int do_loopback(struct nameidata spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } + mnt->mnt_flags = mnt_flags; out: up_write(&namespace_sem); @@ -950,12 +1014,12 @@ out: * on it - tough luck. */ static int do_remount(struct nameidata *nd, int flags, int mnt_flags, - void *data) + void *data, xid_t xid) { int err; struct super_block *sb = nd->mnt->mnt_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT)) return -EPERM; if (!check_mnt(nd->mnt)) @@ -989,7 +1053,7 @@ static int do_move_mount(struct nameidat struct nameidata old_nd, parent_nd; struct vfsmount *p; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -1069,7 +1133,7 @@ static int do_new_mount(struct nameidata return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) return -EPERM; mnt = do_kern_mount(type, flags, name, data); @@ -1307,6 +1371,7 @@ long do_mount(char *dev_name, char *dir_ struct nameidata nd; int retval = 0; int mnt_flags = 0; + xid_t xid = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -1322,7 +1387,17 @@ long do_mount(char *dev_name, char *dir_ if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; + retval = vx_parse_xid(data_page, &xid, 1); + if (retval) { + mnt_flags |= MNT_XID; + /* bind and re-mounts get xid flag */ + if (flags & (MS_BIND|MS_REMOUNT)) + flags |= MS_XID; + } + /* Separate the per-mountpoint flags */ + if (flags & MS_RDONLY) + mnt_flags |= MNT_RDONLY; if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; if (flags & MS_NODEV) @@ -1334,6 +1409,8 @@ long do_mount(char *dev_name, char *dir_ if (flags & MS_NODIRATIME) mnt_flags |= MNT_NODIRATIME; + if (!capable(CAP_SYS_ADMIN)) + mnt_flags |= MNT_NODEV; flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_NOATIME | MS_NODIRATIME); @@ -1348,9 +1425,9 @@ long do_mount(char *dev_name, char *dir_ if (flags & MS_REMOUNT) retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); + data_page, xid); else if (flags & MS_BIND) - retval = do_loopback(&nd, dev_name, flags & MS_REC); + retval = do_loopback(&nd, dev_name, xid, flags, mnt_flags); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&nd, flags); else if (flags & MS_MOVE) @@ -1448,7 +1525,7 @@ int copy_namespace(int flags, struct tas if (!(flags & CLONE_NEWNS)) return 0; - if (!capable(CAP_SYS_ADMIN)) { + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) { err = -EPERM; goto out; } diff -NurpP --minimal linux-2.6.17.13/fs/nfs/dir.c linux-2.6.17.13-vs2.0.2.1/fs/nfs/dir.c --- linux-2.6.17.13/fs/nfs/dir.c 2006-06-18 04:54:38 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfs/dir.c 2006-08-17 00:28:21 +0200 @@ -28,9 +28,11 @@ #include #include #include +#include #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -904,6 +906,7 @@ static struct dentry *nfs_lookup(struct res = (struct dentry *)inode; if (IS_ERR(res)) goto out_unlock; + vx_propagate_xid(nd, inode); no_entry: res = d_add_unique(dentry, inode); if (res != NULL) @@ -937,7 +940,8 @@ static int is_atomic_open(struct inode * if (nd->flags & LOOKUP_DIRECTORY) return 0; /* Are we trying to write to a read only partition? */ - if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + if ((IS_RDONLY(dir) || MNT_IS_RDONLY(nd->mnt)) && + (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) return 0; return 1; } diff -NurpP --minimal linux-2.6.17.13/fs/nfs/inode.c linux-2.6.17.13-vs2.0.2.1/fs/nfs/inode.c --- linux-2.6.17.13/fs/nfs/inode.c 2006-06-18 04:54:41 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfs/inode.c 2006-08-17 00:28:21 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -343,12 +344,16 @@ nfs_sb_init(struct super_block *sb, rpc_ } server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + if (server->flags & NFS_MOUNT_TAGXID) + sb->s_flags |= MS_TAGXID; + sb->s_maxbytes = fsinfo.maxfilesize; if (sb->s_maxbytes > MAX_LFS_FILESIZE) sb->s_maxbytes = MAX_LFS_FILESIZE; server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; + server->client->cl_tagxid = (server->flags & NFS_MOUNT_TAGXID) ? 1 : 0; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); @@ -423,6 +428,7 @@ nfs_create_client(struct nfs_server *ser clnt->cl_intr = 1; clnt->cl_softrtry = 1; + clnt->cl_tagxid = 1; return clnt; @@ -602,6 +608,7 @@ static void nfs_show_mount_options(struc { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, + { NFS_MOUNT_TAGXID, ",tagxid", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; @@ -903,8 +910,10 @@ nfs_fhget(struct super_block *sb, struct nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); + /* maybe fattr->xid someday */ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* * report the blocks in 512byte units @@ -995,6 +1004,8 @@ void nfs_setattr_update_inode(struct ino inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_XID) && IS_TAGXID(inode)) + inode->i_xid = attr->ia_xid; spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); @@ -1397,6 +1408,9 @@ static int nfs_check_inode_attributes(st struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; int data_unstable; + uid_t uid; + gid_t gid; + xid_t xid; if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -1434,10 +1448,15 @@ static int nfs_check_inode_attributes(st if (cur_size != new_isize && nfsi->npages == 0) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); + /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) - || inode->i_uid != fattr->uid - || inode->i_gid != fattr->gid) + || inode->i_uid != uid + || inode->i_gid != gid + || inode->i_xid != xid) nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ @@ -1522,6 +1541,9 @@ static int nfs_update_inode(struct inode loff_t cur_isize, new_isize; unsigned int invalid = 0; int data_stable; + uid_t uid; + gid_t gid; + xid_t xid; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __FUNCTION__, inode->i_sb->s_id, inode->i_ino, @@ -1591,15 +1613,21 @@ static int nfs_update_inode(struct inode } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || - inode->i_uid != fattr->uid || - inode->i_gid != fattr->gid) + inode->i_uid != uid || + inode->i_gid != gid || + inode->i_xid != xid) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = uid; + inode->i_gid = gid; + inode->i_xid = xid; if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* diff -NurpP --minimal linux-2.6.17.13/fs/nfs/nfs3xdr.c linux-2.6.17.13-vs2.0.2.1/fs/nfs/nfs3xdr.c --- linux-2.6.17.13/fs/nfs/nfs3xdr.c 2006-06-18 04:54:41 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfs/nfs3xdr.c 2006-08-17 00:28:21 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_XDR @@ -178,7 +179,7 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt } static inline u32 * -xdr_encode_sattr(u32 *p, struct iattr *attr) +xdr_encode_sattr(u32 *p, struct iattr *attr, int tagxid) { if (attr->ia_valid & ATTR_MODE) { *p++ = xdr_one; @@ -186,15 +187,17 @@ xdr_encode_sattr(u32 *p, struct iattr *a } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_UID) { + if (attr->ia_valid & ATTR_UID || + (tagxid && (attr->ia_valid & ATTR_XID))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_uid); + *p++ = htonl(XIDINO_UID(tagxid, attr->ia_uid, attr->ia_xid)); } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_GID) { + if (attr->ia_valid & ATTR_GID || + (tagxid && (attr->ia_valid & ATTR_XID))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_gid); + *p++ = htonl(XIDINO_GID(tagxid, attr->ia_gid, attr->ia_xid)); } else { *p++ = xdr_zero; } @@ -279,7 +282,8 @@ static int nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); *p++ = htonl(args->guard); if (args->guard) p = xdr_encode_time3(p, &args->guardtime); @@ -370,7 +374,8 @@ nfs3_xdr_createargs(struct rpc_rqst *req *p++ = args->verifier[0]; *p++ = args->verifier[1]; } else - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -384,7 +389,8 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req, { p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -397,7 +403,8 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *re { p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); p = xdr_encode_array(p, args->topath, args->tolen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -412,7 +419,8 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); *p++ = htonl(args->type); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); if (args->type == NF3CHR || args->type == NF3BLK) { *p++ = htonl(MAJOR(args->rdev)); *p++ = htonl(MINOR(args->rdev)); diff -NurpP --minimal linux-2.6.17.13/fs/nfs/nfsroot.c linux-2.6.17.13-vs2.0.2.1/fs/nfs/nfsroot.c --- linux-2.6.17.13/fs/nfs/nfsroot.c 2006-02-18 14:40:23 +0100 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfs/nfsroot.c 2006-08-17 00:28:21 +0200 @@ -87,6 +87,7 @@ #include #include #include +#include /* Define this to allow debugging output */ #undef NFSROOT_DEBUG @@ -124,7 +125,7 @@ enum { Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, - Opt_acl, Opt_noacl, + Opt_acl, Opt_noacl, Opt_tagxid, /* Error token */ Opt_err }; @@ -161,6 +162,7 @@ static match_table_t __initdata tokens = {Opt_tcp, "tcp"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_tagxid, "tagxid"}, {Opt_err, NULL} }; @@ -275,6 +277,11 @@ static int __init root_nfs_parse(char *n case Opt_noacl: nfs_data.flags |= NFS_MOUNT_NOACL; break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + nfs_data.flags |= NFS_MOUNT_TAGXID; + break; +#endif default: printk(KERN_WARNING "Root-NFS: unknown " "option: %s\n", p); @@ -312,7 +319,7 @@ static int __init root_nfs_name(char *na /* Override them by options set on kernel command-line */ root_nfs_parse(name, buf); - cp = system_utsname.nodename; + cp = vx_new_uts(nodename); if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); return -1; diff -NurpP --minimal linux-2.6.17.13/fs/nfsd/auth.c linux-2.6.17.13-vs2.0.2.1/fs/nfsd/auth.c --- linux-2.6.17.13/fs/nfsd/auth.c 2006-06-18 04:54:42 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfsd/auth.c 2006-08-17 00:28:21 +0200 @@ -9,6 +9,7 @@ #include #include #include +#include #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) @@ -41,19 +42,22 @@ int nfsd_setuser(struct svc_rqst *rqstp, get_group_info(cred.cr_group_info); if (cred.cr_uid != (uid_t) -1) - current->fsuid = cred.cr_uid; + current->fsuid = INOXID_UID(XID_TAG_NFSD, cred.cr_uid, cred.cr_gid); else current->fsuid = exp->ex_anon_uid; if (cred.cr_gid != (gid_t) -1) - current->fsgid = cred.cr_gid; + current->fsgid = INOXID_GID(XID_TAG_NFSD, cred.cr_uid, cred.cr_gid); else current->fsgid = exp->ex_anon_gid; + /* this desperately needs a tag :) */ + current->xid = INOXID_XID(XID_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0); + if (!cred.cr_group_info) return -ENOMEM; ret = set_current_groups(cred.cr_group_info); put_group_info(cred.cr_group_info); - if ((cred.cr_uid)) { + if (INOXID_UID(XID_TAG_NFSD, cred.cr_uid, cred.cr_gid)) { cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; } else { cap_t(current->cap_effective) |= (CAP_NFSD_MASK & diff -NurpP --minimal linux-2.6.17.13/fs/nfsd/nfs3xdr.c linux-2.6.17.13-vs2.0.2.1/fs/nfsd/nfs3xdr.c --- linux-2.6.17.13/fs/nfsd/nfs3xdr.c 2006-04-09 13:49:54 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfsd/nfs3xdr.c 2006-08-17 00:28:21 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -111,6 +112,8 @@ static inline u32 * decode_sattr3(u32 *p, struct iattr *iap) { u32 tmp; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -120,12 +123,15 @@ decode_sattr3(u32 *p, struct iattr *iap) } if (*p++) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = ntohl(*p++); + uid = ntohl(*p++); } if (*p++) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = ntohl(*p++); + gid = ntohl(*p++); } + iap->ia_uid = INOXID_UID(XID_TAG_NFSD, uid, gid); + iap->ia_gid = INOXID_GID(XID_TAG_NFSD, uid, gid); + iap->ia_xid = INOXID_XID(XID_TAG_NFSD, uid, gid, 0); if (*p++) { u64 newsize; @@ -163,8 +169,10 @@ encode_fattr3(struct svc_rqst *rqstp, u3 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + XIDINO_UID(XID_TAG(dentry->d_inode), stat->uid, stat->xid))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + XIDINO_GID(XID_TAG(dentry->d_inode), stat->gid, stat->xid))); if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { diff -NurpP --minimal linux-2.6.17.13/fs/nfsd/nfs4recover.c linux-2.6.17.13-vs2.0.2.1/fs/nfsd/nfs4recover.c --- linux-2.6.17.13/fs/nfsd/nfs4recover.c 2006-02-18 14:40:23 +0100 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfsd/nfs4recover.c 2006-08-17 00:28:21 +0200 @@ -155,7 +155,7 @@ nfsd4_create_clid_dir(struct nfs4_client dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); goto out_put; } - status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); + status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU, NULL); out_put: dput(dentry); out_unlock: @@ -259,7 +259,7 @@ nfsd4_remove_clid_file(struct dentry *di return -EINVAL; } mutex_lock(&dir->d_inode->i_mutex); - status = vfs_unlink(dir->d_inode, dentry); + status = vfs_unlink(dir->d_inode, dentry, NULL); mutex_unlock(&dir->d_inode->i_mutex); return status; } @@ -274,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, * a kernel from the future.... */ nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); mutex_lock(&dir->d_inode->i_mutex); - status = vfs_rmdir(dir->d_inode, dentry); + status = vfs_rmdir(dir->d_inode, dentry, NULL); mutex_unlock(&dir->d_inode->i_mutex); return status; } diff -NurpP --minimal linux-2.6.17.13/fs/nfsd/nfs4xdr.c linux-2.6.17.13-vs2.0.2.1/fs/nfsd/nfs4xdr.c --- linux-2.6.17.13/fs/nfsd/nfs4xdr.c 2006-06-18 04:54:42 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfsd/nfs4xdr.c 2006-08-17 00:28:21 +0200 @@ -57,6 +57,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -1560,14 +1561,18 @@ out_acl: WRITE32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); + status = nfsd4_encode_user(rqstp, + XIDINO_UID(XID_TAG(dentry->d_inode), + stat.uid, stat.xid), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); + status = nfsd4_encode_group(rqstp, + XIDINO_GID(XID_TAG(dentry->d_inode), + stat.gid, stat.xid), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) diff -NurpP --minimal linux-2.6.17.13/fs/nfsd/nfsxdr.c linux-2.6.17.13-vs2.0.2.1/fs/nfsd/nfsxdr.c --- linux-2.6.17.13/fs/nfsd/nfsxdr.c 2006-04-09 13:49:54 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfsd/nfsxdr.c 2006-08-17 00:28:21 +0200 @@ -15,6 +15,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -102,6 +103,8 @@ static inline u32 * decode_sattr(u32 *p, struct iattr *iap) { u32 tmp, tmp1; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -115,12 +118,15 @@ decode_sattr(u32 *p, struct iattr *iap) } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = tmp; + uid = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = tmp; + gid = tmp; } + iap->ia_uid = INOXID_UID(XID_TAG_NFSD, uid, gid); + iap->ia_gid = INOXID_GID(XID_TAG_NFSD, uid, gid); + iap->ia_xid = INOXID_XID(XID_TAG_NFSD, uid, gid, 0); if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; iap->ia_size = tmp; @@ -164,8 +170,10 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + XIDINO_UID(XID_TAG(dentry->d_inode), stat->uid, stat->xid))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + XIDINO_GID(XID_TAG(dentry->d_inode), stat->gid, stat->xid))); if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); diff -NurpP --minimal linux-2.6.17.13/fs/nfsd/vfs.c linux-2.6.17.13-vs2.0.2.1/fs/nfsd/vfs.c --- linux-2.6.17.13/fs/nfsd/vfs.c 2006-06-18 04:54:42 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/nfsd/vfs.c 2006-08-17 00:28:21 +0200 @@ -1156,13 +1156,13 @@ nfsd_create(struct svc_rqst *rqstp, stru err = vfs_create(dirp, dchild, iap->ia_mode, NULL); break; case S_IFDIR: - err = vfs_mkdir(dirp, dchild, iap->ia_mode); + err = vfs_mkdir(dirp, dchild, iap->ia_mode, NULL); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: - err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); + err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev, NULL); break; default: printk("nfsd: bad file type %o in nfsd_create\n", type); @@ -1442,11 +1442,13 @@ nfsd_symlink(struct svc_rqst *rqstp, str else { strncpy(path_alloced, path, plen); path_alloced[plen] = 0; - err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); + err = vfs_symlink(dentry->d_inode, dnew, + path_alloced, mode, NULL); kfree(path_alloced); } } else - err = vfs_symlink(dentry->d_inode, dnew, path, mode); + err = vfs_symlink(dentry->d_inode, dnew, + path, mode, NULL); if (!err) if (EX_ISSYNC(fhp->fh_export)) @@ -1504,7 +1506,7 @@ nfsd_link(struct svc_rqst *rqstp, struct dold = tfhp->fh_dentry; dest = dold->d_inode; - err = vfs_link(dold, dirp, dnew); + err = vfs_link(dold, dirp, dnew, NULL); if (!err) { if (EX_ISSYNC(ffhp->fh_export)) { err = nfserrno(nfsd_sync_dir(ddir)); @@ -1666,9 +1668,9 @@ nfsd_unlink(struct svc_rqst *rqstp, stru err = -EPERM; } else #endif - err = vfs_unlink(dirp, rdentry); + err = vfs_unlink(dirp, rdentry, NULL); } else { /* It's RMDIR */ - err = vfs_rmdir(dirp, rdentry); + err = vfs_rmdir(dirp, rdentry, NULL); } dput(rdentry); @@ -1777,7 +1779,8 @@ nfsd_permission(struct svc_export *exp, */ if (!(acc & MAY_LOCAL_ACCESS)) if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { - if (EX_RDONLY(exp) || IS_RDONLY(inode)) + if (EX_RDONLY(exp) || IS_RDONLY(inode) + || MNT_IS_RDONLY(exp->ex_mnt)) return nfserr_rofs; if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) return nfserr_perm; diff -NurpP --minimal linux-2.6.17.13/fs/open.c linux-2.6.17.13-vs2.0.2.1/fs/open.c --- linux-2.6.17.13/fs/open.c 2006-06-18 04:54:44 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/open.c 2006-08-17 00:28:21 +0200 @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include @@ -46,6 +49,8 @@ int vfs_statfs(struct super_block *sb, s if (retval == 0 && buf->f_frsize == 0) buf->f_frsize = buf->f_bsize; } + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + vx_vsi_statfs(sb, buf); } return retval; } @@ -249,7 +254,7 @@ static long do_sys_truncate(const char _ goto dput_and_out; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; error = -EPERM; @@ -379,7 +384,7 @@ asmlinkage long sys_utime(char __user * inode = nd.dentry->d_inode; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; /* Don't worry, the checks are done in inode_change_ok() */ @@ -436,7 +441,7 @@ long do_utimes(int dfd, char __user *fil inode = nd.dentry->d_inode; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; /* Don't worry, the checks are done in inode_change_ok() */ @@ -523,7 +528,8 @@ asmlinkage long sys_faccessat(int dfd, c if (!res) { res = vfs_permission(&nd, mode); /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) + if(!res && (mode & S_IWOTH) + && (IS_RDONLY(nd.dentry->d_inode) || MNT_IS_RDONLY(nd.mnt)) && !special_file(nd.dentry->d_inode->i_mode)) res = -EROFS; path_release(&nd); @@ -636,7 +642,7 @@ asmlinkage long sys_fchmod(unsigned int audit_inode(NULL, inode, 0); err = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(file->f_vfsmnt)) goto out_putf; err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -669,7 +675,7 @@ asmlinkage long sys_fchmodat(int dfd, co inode = nd.dentry->d_inode; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; error = -EPERM; @@ -695,7 +701,8 @@ asmlinkage long sys_chmod(const char __u return sys_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(struct dentry * dentry, uid_t user, gid_t group) +static int chown_common(struct dentry *dentry, struct vfsmount *mnt, + uid_t user, gid_t group) { struct inode * inode; int error; @@ -707,7 +714,7 @@ static int chown_common(struct dentry * goto out; } error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) goto out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -715,11 +722,11 @@ static int chown_common(struct dentry * newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { newattrs.ia_valid |= ATTR_UID; - newattrs.ia_uid = user; + newattrs.ia_uid = vx_map_uid(user); } if (group != (gid_t) -1) { newattrs.ia_valid |= ATTR_GID; - newattrs.ia_gid = group; + newattrs.ia_gid = vx_map_gid(group); } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; @@ -737,7 +744,7 @@ asmlinkage long sys_chown(const char __u error = user_path_walk(filename, &nd); if (!error) { - error = chown_common(nd.dentry, user, group); + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); } return error; @@ -756,7 +763,7 @@ asmlinkage long sys_fchownat(int dfd, co follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; error = __user_walk_fd(dfd, filename, follow, &nd); if (!error) { - error = chown_common(nd.dentry, user, group); + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); } out: @@ -770,7 +777,7 @@ asmlinkage long sys_lchown(const char __ error = user_path_walk_link(filename, &nd); if (!error) { - error = chown_common(nd.dentry, user, group); + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); } return error; @@ -787,7 +794,7 @@ asmlinkage long sys_fchown(unsigned int struct dentry * dentry; dentry = file->f_dentry; audit_inode(NULL, dentry->d_inode, 0); - error = chown_common(dentry, user, group); + error = chown_common(dentry, file->f_vfsmnt, user, group); fput(file); } return error; @@ -1015,6 +1022,7 @@ repeat: FD_SET(fd, fdt->open_fds); FD_CLR(fd, fdt->close_on_exec); files->next_fd = fd + 1; + vx_openfd_inc(fd); #if 1 /* Sanity check */ if (fdt->fd[fd] != NULL) { @@ -1037,6 +1045,7 @@ static void __put_unused_fd(struct files __FD_CLR(fd, fdt->open_fds); if (fd < files->next_fd) files->next_fd = fd; + vx_openfd_dec(fd); } void fastcall put_unused_fd(unsigned int fd) diff -NurpP --minimal linux-2.6.17.13/fs/proc/array.c linux-2.6.17.13-vs2.0.2.1/fs/proc/array.c --- linux-2.6.17.13/fs/proc/array.c 2006-06-18 04:54:45 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/proc/array.c 2006-08-17 00:28:21 +0200 @@ -75,6 +75,9 @@ #include #include #include +#include +#include +#include #include #include @@ -135,7 +138,9 @@ static const char *task_state_array[] = "T (stopped)", /* 4 */ "T (tracing stop)", /* 8 */ "Z (zombie)", /* 16 */ - "X (dead)" /* 32 */ + "X (dead)", /* 32 */ + "N (noninteractive)", /* 64 */ + "H (on hold)" /* 128 */ }; static inline const char * get_task_state(struct task_struct *tsk) @@ -144,7 +149,8 @@ static inline const char * get_task_stat TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE | TASK_STOPPED | - TASK_TRACED)) | + TASK_TRACED | + TASK_ONHOLD)) | (tsk->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)); const char **p = &task_state_array[0]; @@ -161,8 +167,13 @@ static inline char * task_state(struct t struct group_info *group_info; int g; struct fdtable *fdt = NULL; + pid_t pid, ptgid, tppid, tgid; read_lock(&tasklist_lock); + tgid = vx_map_tgid(p->tgid); + pid = vx_map_pid(p->pid); + ptgid = vx_map_pid(p->group_leader->real_parent->tgid); + tppid = vx_map_pid(p->parent->pid); buffer += sprintf(buffer, "State:\t%s\n" "SleepAVG:\t%lu%%\n" @@ -174,9 +185,8 @@ static inline char * task_state(struct t "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), (p->sleep_avg/1024)*100/(1020000000/1024), - p->tgid, - p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, - pid_alive(p) && p->ptrace ? p->parent->pid : 0, + tgid, pid, (pid > 1) ? ptgid : 0, + pid_alive(p) && p->ptrace ? tppid : 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); read_unlock(&tasklist_lock); @@ -296,6 +306,12 @@ static inline char *task_cap(struct task int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; +#ifdef CONFIG_VSERVER_LEGACY + struct vx_info *vxi; +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + struct nx_info *nxi; +#endif struct mm_struct *mm = get_task_mm(task); buffer = task_name(task, buffer); @@ -308,6 +324,46 @@ int proc_pid_status(struct task_struct * buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); buffer = cpuset_task_status_allowed(task, buffer); + + if (task_vx_flags(task, VXF_INFO_HIDE, 0)) + goto skip; +#ifdef CONFIG_VSERVER_LEGACY + buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task)); + vxi = task_get_vx_info(task); + if (vxi) { + buffer += sprintf (buffer,"ctxflags: %08llx\n" + ,(unsigned long long)vxi->vx_flags); + buffer += sprintf (buffer,"initpid: %d\n" + ,vxi->vx_initpid); + } else { + buffer += sprintf (buffer,"ctxflags: none\n"); + buffer += sprintf (buffer,"initpid: none\n"); + } + put_vx_info(vxi); +#else + buffer += sprintf (buffer,"VxID: %d\n", vx_task_xid(task)); +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + nxi = task_get_nx_info(task); + if (nxi) { + int i; + + buffer += sprintf (buffer,"ipv4root:"); + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer," %08x/%08x" + ,nxi->ipv4[i] + ,nxi->mask[i]); + } + *buffer++ = '\n'; + buffer += sprintf (buffer,"ipv4root_bcast: %08x\n" + ,nxi->v4_bcast); + } else { + buffer += sprintf (buffer,"ipv4root: 0\n"); + buffer += sprintf (buffer,"ipv4root_bcast: 0\n"); + } + put_nx_info(nxi); +#endif +skip: #if defined(CONFIG_S390) buffer = task_show_regs(task, buffer); #endif @@ -322,7 +378,7 @@ static int do_task_stat(struct task_stru sigset_t sigign, sigcatch; char state; int res; - pid_t ppid, pgid = -1, sid = -1; + pid_t pid, ppid, pgid = -1, sid = -1; int num_threads = 0; struct mm_struct *mm; unsigned long long start_time; @@ -386,7 +442,11 @@ static int do_task_stat(struct task_stru stime = cputime_add(stime, task->signal->stime); } } - ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; + pid = vx_info_map_pid(task->vx_info, pid_alive(task) ? task->pid : 0); + ppid = (!(pid > 1)) ? 0 : vx_info_map_tgid(task->vx_info, + task->group_leader->real_parent->tgid); + pgid = vx_info_map_pid(task->vx_info, pgid); + read_unlock(&tasklist_lock); if (!whole || num_threads<2) @@ -410,10 +470,21 @@ static int do_task_stat(struct task_stru /* convert nsec -> ticks */ start_time = nsec_to_clock_t(start_time); + /* fixup start time for virt uptime */ + if (vx_flags(VXF_VIRT_UPTIME, 0)) { + unsigned long long bias = + current->vx_info->cvirt.bias_clock; + + if (start_time > bias) + start_time -= bias; + else + start_time = 0; + } + res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", - task->pid, + pid, tcomm, state, ppid, diff -NurpP --minimal linux-2.6.17.13/fs/proc/base.c linux-2.6.17.13-vs2.0.2.1/fs/proc/base.c --- linux-2.6.17.13/fs/proc/base.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/proc/base.c 2006-08-17 00:28:21 +0200 @@ -72,6 +72,8 @@ #include #include #include +#include +#include #include "internal.h" /* @@ -122,6 +124,8 @@ enum pid_directory_inos { PROC_TGID_ATTR_EXEC, PROC_TGID_ATTR_FSCREATE, #endif + PROC_TGID_VX_INFO, + PROC_TGID_IP_INFO, #ifdef CONFIG_AUDITSYSCALL PROC_TGID_LOGINUID, #endif @@ -163,6 +167,8 @@ enum pid_directory_inos { PROC_TID_ATTR_EXEC, PROC_TID_ATTR_FSCREATE, #endif + PROC_TID_VX_INFO, + PROC_TID_IP_INFO, #ifdef CONFIG_AUDITSYSCALL PROC_TID_LOGINUID, #endif @@ -219,6 +225,8 @@ static struct pid_entry tgid_base_stuff[ #ifdef CONFIG_CPUSETS E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), #endif + E(PROC_TGID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TGID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), #ifdef CONFIG_AUDITSYSCALL @@ -261,6 +269,8 @@ static struct pid_entry tid_base_stuff[] #ifdef CONFIG_CPUSETS E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), #endif + E(PROC_TID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), #ifdef CONFIG_AUDITSYSCALL @@ -547,6 +557,10 @@ static int proc_check_chroot(struct dent struct vfsmount *our_vfsmnt, *mnt; int res = 0; + /* context admin override */ + if (capable(CAP_CONTEXT)) + goto override; + read_lock(¤t->fs->lock); our_vfsmnt = mntget(current->fs->rootmnt); base = dget(current->fs->root); @@ -570,6 +584,7 @@ static int proc_check_chroot(struct dent exit: dput(base); mntput(our_vfsmnt); +override: dput(root); mntput(vfsmnt); return res; @@ -1286,7 +1301,7 @@ static int proc_pident_readdir(struct fi struct inode *inode = dentry->d_inode; struct pid_entry *p; ino_t ino; - int ret; + int ret, hide; ret = -ENOENT; if (!pid_alive(proc_task(inode))) @@ -1317,11 +1332,20 @@ static int proc_pident_readdir(struct fi goto out; } p = ents + i; + hide = vx_flags(VXF_INFO_HIDE, 0); while (p->name) { + if (hide) { + switch (p->type) { + case PROC_TGID_VX_INFO: + case PROC_TGID_IP_INFO: + goto skip; + } + } if (filldir(dirent, p->name, p->len, filp->f_pos, fake_ino(pid, p->type), p->mode >> 12) < 0) goto out; filp->f_pos++; + skip: p++; } } @@ -1396,6 +1420,7 @@ static struct inode *proc_pid_make_inode inode->i_uid = task->euid; inode->i_gid = task->egid; } + inode->i_xid = vx_task_xid(task); security_task_to_inode(task, inode); out: @@ -1421,6 +1446,11 @@ static int pid_revalidate(struct dentry { struct inode *inode = dentry->d_inode; struct task_struct *task = proc_task(inode); + + if (!vx_check(vx_task_xid(task), VX_IDENT)) + goto out_drop; + /* discard wrong fakeinit */ + if (pid_alive(task)) { if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { inode->i_uid = task->euid; @@ -1433,6 +1463,7 @@ static int pid_revalidate(struct dentry security_task_to_inode(task, inode); return 1; } +out_drop: d_drop(dentry); return 0; } @@ -1675,6 +1706,9 @@ static struct file_operations proc_tgid_ static struct inode_operations proc_tgid_attr_inode_operations; #endif +extern int proc_pid_vx_info(struct task_struct *, char *); +extern int proc_pid_nx_info(struct task_struct *, char *); + static int get_tid_list(int index, unsigned int *tids, struct inode *dir); /* SMP-safe */ @@ -1866,15 +1900,33 @@ static struct dentry *proc_pident_lookup inode->i_fop = &proc_loginuid_operations; break; #endif + case PROC_TID_VX_INFO: + case PROC_TGID_VX_INFO: + if (task_vx_flags(task, VXF_INFO_HIDE, 0)) + goto out_noent; + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_vx_info; + break; + case PROC_TID_IP_INFO: + case PROC_TGID_IP_INFO: + if (task_vx_flags(task, VXF_INFO_HIDE, 0)) + goto out_noent; + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_nx_info; + break; default: printk("procfs: impossible type (%d)",p->type); - iput(inode); - return ERR_PTR(-EINVAL); + error = -EINVAL; + goto out_put; } dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); return NULL; +out_noent: + error=-ENOENT; +out_put: + iput(inode); out: return ERR_PTR(error); } @@ -1962,14 +2014,14 @@ static int proc_self_readlink(struct den int buflen) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -2033,6 +2085,20 @@ void proc_pid_flush(struct dentry *proc_ } } +#define VXF_FAKE_INIT (VXF_INFO_INIT|VXF_STATE_INIT) + +static inline int proc_pid_visible(struct task_struct *task, int pid) +{ + if ((pid == 1) && + !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT)) + goto visible; + if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto visible; + return 0; +visible: + return 1; +} + /* SMP-safe */ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { @@ -2069,13 +2135,14 @@ struct dentry *proc_pid_lookup(struct in if (!task) goto out; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + /* check for context visibility */ + if (!proc_pid_visible(task, tgid)) + goto out_drop_task; + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + if (!inode) + goto out_drop_task; - if (!inode) { - put_task_struct(task); - goto out; - } inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; @@ -2104,6 +2171,8 @@ struct dentry *proc_pid_lookup(struct in goto out; } return NULL; +out_drop_task: + put_task_struct(task); out: return ERR_PTR(-ENOENT); } @@ -2119,6 +2188,8 @@ static struct dentry *proc_task_lookup(s tid = name_to_int(dentry); if (tid == ~0U) goto out; + if (vx_current_initpid(tid)) + goto out; read_lock(&tasklist_lock); task = find_task_by_pid(tid); @@ -2130,11 +2201,14 @@ static struct dentry *proc_task_lookup(s if (leader->tgid != task->tgid) goto out_drop_task; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); - + /* check for context visibility */ + if (!proc_pid_visible(task, tid)) + goto out_drop_task; + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); if (!inode) goto out_drop_task; + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; @@ -2174,7 +2248,7 @@ static int get_tgid_list(int index, unsi read_lock(&tasklist_lock); p = NULL; if (version) { - p = find_task_by_pid(version); + p = find_task_by_real_pid(version); if (p && !thread_group_leader(p)) p = NULL; } @@ -2186,11 +2260,15 @@ static int get_tgid_list(int index, unsi for ( ; p != &init_task; p = next_task(p)) { int tgid = p->pid; + if (!pid_alive(p)) continue; + /* check for context visibility */ + if (!proc_pid_visible(p, tgid)) + continue; if (--index >= 0) continue; - tgids[nr_tgids] = tgid; + tgids[nr_tgids] = vx_map_tgid(tgid); nr_tgids++; if (nr_tgids >= PROC_MAXPIDS) break; @@ -2220,10 +2298,13 @@ static int get_tid_list(int index, unsig if (pid_alive(task)) do { int tid = task->pid; + /* check for context visibility */ + if (!proc_pid_visible(task, tid)) + continue; if (--index >= 0) continue; if (tids != NULL) - tids[nr_tids] = tid; + tids[nr_tids] = vx_map_pid(tid); nr_tids++; if (nr_tids >= PROC_MAXPIDS) break; @@ -2299,11 +2380,14 @@ static int proc_task_readdir(struct file unsigned int nr_tids, i; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; + struct task_struct *task = proc_task(inode); int retval = -ENOENT; ino_t ino; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ - if (!pid_alive(proc_task(inode))) + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto out; + if (!pid_alive(task)) goto out; retval = 0; diff -NurpP --minimal linux-2.6.17.13/fs/proc/generic.c linux-2.6.17.13-vs2.0.2.1/fs/proc/generic.c --- linux-2.6.17.13/fs/proc/generic.c 2006-06-18 04:54:45 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/proc/generic.c 2006-08-17 00:28:21 +0200 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "internal.h" @@ -395,12 +396,16 @@ struct dentry *proc_lookup(struct inode for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) continue; + if (!vx_hide_check(0, de->vx_flags)) + continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino = de->low_ino; spin_unlock(&proc_subdir_lock); error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); + /* generic proc entries belong to the host */ + inode->i_xid = 0; spin_lock(&proc_subdir_lock); break; } @@ -476,12 +481,15 @@ int proc_readdir(struct file * filp, } do { + if (!vx_hide_check(0, de->vx_flags)) + goto skip; /* filldir passes info to user space */ spin_unlock(&proc_subdir_lock); if (filldir(dirent, de->name, de->namelen, filp->f_pos, de->low_ino, de->mode >> 12) < 0) goto out; spin_lock(&proc_subdir_lock); + skip: filp->f_pos++; de = de->next; } while (de); @@ -604,6 +612,7 @@ static struct proc_dir_entry *proc_creat ent->namelen = len; ent->mode = mode; ent->nlink = nlink; + ent->vx_flags = IATTR_PROC_DEFAULT; out: return ent; } @@ -624,7 +633,8 @@ struct proc_dir_entry *proc_symlink(cons kfree(ent->data); kfree(ent); ent = NULL; - } + } else + ent->vx_flags = IATTR_PROC_SYMLINK; } else { kfree(ent); ent = NULL; diff -NurpP --minimal linux-2.6.17.13/fs/proc/inode.c linux-2.6.17.13-vs2.0.2.1/fs/proc/inode.c --- linux-2.6.17.13/fs/proc/inode.c 2006-06-18 04:54:46 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/proc/inode.c 2006-08-17 00:28:21 +0200 @@ -171,6 +171,8 @@ struct inode *proc_get_inode(struct supe inode->i_uid = de->uid; inode->i_gid = de->gid; } + if (de->vx_flags) + PROC_I(inode)->vx_flags = de->vx_flags; if (de->size) inode->i_size = de->size; if (de->nlink) diff -NurpP --minimal linux-2.6.17.13/fs/proc/proc_misc.c linux-2.6.17.13-vs2.0.2.1/fs/proc/proc_misc.c --- linux-2.6.17.13/fs/proc/proc_misc.c 2006-06-18 04:54:47 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/proc/proc_misc.c 2006-08-17 00:28:21 +0200 @@ -53,6 +53,8 @@ #include #include "internal.h" +#include + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) /* @@ -82,17 +84,32 @@ static int proc_calc_metrics(char *page, static int loadavg_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { + unsigned int running, threads; int a, b, c; int len; - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); - len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + if (vx_flags(VXF_VIRT_LOAD, 0)) { + struct vx_info *vxi = current->vx_info; + + a = vxi->cvirt.load[0] + (FIXED_1/200); + b = vxi->cvirt.load[1] + (FIXED_1/200); + c = vxi->cvirt.load[2] + (FIXED_1/200); + + running = atomic_read(&vxi->cvirt.nr_running); + threads = atomic_read(&vxi->cvirt.nr_threads); + } else { + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + + running = nr_running(); + threads = nr_threads; + } + len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n", LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, last_pid); + running, threads, last_pid); return proc_calc_metrics(page, start, off, count, eof, len); } @@ -106,6 +123,9 @@ static int uptime_read_proc(char *page, do_posix_clock_monotonic_gettime(&uptime); cputime_to_timespec(idletime, &idle); + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&uptime, &idle); + len = sprintf(page,"%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, (uptime.tv_nsec / (NSEC_PER_SEC / 100)), @@ -143,7 +163,7 @@ static int meminfo_read_proc(char *page, * sysctl_overcommit_ratio / 100) + total_swap_pages; cached = get_page_cache_size() - total_swapcache_pages - i.bufferram; - if (cached < 0) + if (cached < 0 || vx_flags(VXF_VIRT_MEM, 0)) cached = 0; get_vmalloc_info(&vmi); @@ -238,8 +258,9 @@ static int version_read_proc(char *page, { int len; - strcpy(page, linux_banner); - len = strlen(page); + len = sprintf(page, vx_linux_banner, + vx_new_uts(release), + vx_new_uts(version)); return proc_calc_metrics(page, start, off, count, eof, len); } diff -NurpP --minimal linux-2.6.17.13/fs/proc/root.c linux-2.6.17.13-vs2.0.2.1/fs/proc/root.c --- linux-2.6.17.13/fs/proc/root.c 2006-04-09 13:49:54 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/proc/root.c 2006-08-17 00:28:21 +0200 @@ -25,6 +25,9 @@ struct proc_dir_entry *proc_net, *proc_n #ifdef CONFIG_SYSCTL struct proc_dir_entry *proc_sys_root; #endif +struct proc_dir_entry *proc_virtual; + +extern void proc_vx_init(void); static struct super_block *proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -78,6 +81,7 @@ void __init proc_root_init(void) proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", NULL); + proc_vx_init(); } static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat diff -NurpP --minimal linux-2.6.17.13/fs/quota.c linux-2.6.17.13-vs2.0.2.1/fs/quota.c --- linux-2.6.17.13/fs/quota.c 2006-06-18 04:54:47 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/quota.c 2006-08-17 00:28:21 +0200 @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include /* Check validity of generic quotactl commands */ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) @@ -81,11 +84,11 @@ static int generic_quotactl_valid(struct if (cmd == Q_GETQUOTA) { if (((type == USRQUOTA && current->euid != id) || (type == GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; } else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO) - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; return 0; @@ -132,10 +135,10 @@ static int xqm_quotactl_valid(struct sup if (cmd == Q_XGETQUOTA) { if (((type == XQM_USRQUOTA && current->euid != id) || (type == XQM_GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) { - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; } @@ -337,6 +340,43 @@ static int do_quotactl(struct super_bloc return 0; } +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE) + +#include +#include + +static vroot_grb_func *vroot_get_real_bdev = NULL; + +static spinlock_t vroot_grb_lock = SPIN_LOCK_UNLOCKED; + +int register_vroot_grb(vroot_grb_func *func) { + int ret = -EBUSY; + + spin_lock(&vroot_grb_lock); + if (!vroot_get_real_bdev) { + vroot_get_real_bdev = func; + ret = 0; + } + spin_unlock(&vroot_grb_lock); + return ret; +} +EXPORT_SYMBOL(register_vroot_grb); + +int unregister_vroot_grb(vroot_grb_func *func) { + int ret = -EINVAL; + + spin_lock(&vroot_grb_lock); + if (vroot_get_real_bdev) { + vroot_get_real_bdev = NULL; + ret = 0; + } + spin_unlock(&vroot_grb_lock); + return ret; +} +EXPORT_SYMBOL(unregister_vroot_grb); + +#endif + /* * This is the system call interface. This communicates with * the user-level programs. Currently this only supports diskquota @@ -362,6 +402,23 @@ asmlinkage long sys_quotactl(unsigned in putname(tmp); if (IS_ERR(bdev)) return PTR_ERR(bdev); +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE) + if (bdev && bdev->bd_inode && + imajor(bdev->bd_inode) == VROOT_MAJOR) { + struct block_device *bdnew = (void *)-EINVAL; + + if (vroot_get_real_bdev) + bdnew = vroot_get_real_bdev(bdev); + else + vxdprintk(VXD_CBIT(misc, 0), + "vroot_get_real_bdev not set"); + + bdput(bdev); + if (IS_ERR(bdnew)) + return PTR_ERR(bdnew); + bdev = bdnew; + } +#endif sb = get_super(bdev); bdput(bdev); if (!sb) diff -NurpP --minimal linux-2.6.17.13/fs/reiserfs/bitmap.c linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/bitmap.c --- linux-2.6.17.13/fs/reiserfs/bitmap.c 2005-08-29 22:25:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/bitmap.c 2006-08-17 00:28:21 +0200 @@ -13,6 +13,7 @@ #include #include #include +#include #define PREALLOCATION_SIZE 9 @@ -411,8 +412,10 @@ static void _reiserfs_free_block(struct set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); journal_mark_dirty(th, s, sbh); - if (for_unformatted) + if (for_unformatted) { + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK_NODIRTY(inode, 1); + } } void reiserfs_free_block(struct reiserfs_transaction_handle *th, @@ -1021,6 +1024,7 @@ static inline int blocknrs_and_prealloc_ int passno = 0; int nr_allocated = 0; int bigalloc = 0; + int blocks; determine_prealloc_size(hint); if (!hint->formatted_node) { @@ -1030,19 +1034,30 @@ static inline int blocknrs_and_prealloc_ "reiserquota: allocating %d blocks id=%u", amount_needed, hint->inode->i_uid); #endif - quota_ret = - DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed); - if (quota_ret) /* Quota exceeded? */ + quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, + amount_needed); + if (quota_ret) return QUOTA_EXCEEDED; + if (DLIMIT_ALLOC_BLOCK(hint->inode, amount_needed)) { + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, + amount_needed); + return NO_DISK_SPACE; + } + if (hint->preallocate && hint->prealloc_size) { #ifdef REISERQUOTA_DEBUG reiserfs_debug(s, REISERFS_DEBUG_CODE, "reiserquota: allocating (prealloc) %d blocks id=%u", hint->prealloc_size, hint->inode->i_uid); #endif - quota_ret = - DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, - hint->prealloc_size); + quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, + hint->prealloc_size); + if (!quota_ret && + DLIMIT_ALLOC_BLOCK(hint->inode, hint->prealloc_size)) { + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, + hint->prealloc_size); + quota_ret = 1; + } if (quota_ret) hint->preallocate = hint->prealloc_size = 0; } @@ -1093,7 +1108,10 @@ static inline int blocknrs_and_prealloc_ nr_allocated, hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ + /* Free not allocated blocks */ + blocks = amount_needed + hint->prealloc_size - nr_allocated; + DLIMIT_FREE_BLOCK(hint->inode, blocks); + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks); } while (nr_allocated--) reiserfs_free_block(hint->th, hint->inode, @@ -1125,10 +1143,10 @@ static inline int blocknrs_and_prealloc_ REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + - hint->prealloc_size - nr_allocated - - REISERFS_I(hint->inode)-> - i_prealloc_count); + blocks = amount_needed + hint->prealloc_size - nr_allocated - + REISERFS_I(hint->inode)->i_prealloc_count; + DLIMIT_FREE_BLOCK(hint->inode, blocks); + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks); } return CARRY_ON; diff -NurpP --minimal linux-2.6.17.13/fs/reiserfs/file.c linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/file.c --- linux-2.6.17.13/fs/reiserfs/file.c 2006-06-18 04:54:47 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/file.c 2006-08-17 00:28:21 +0200 @@ -1588,4 +1588,5 @@ struct inode_operations reiserfs_file_in .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; diff -NurpP --minimal linux-2.6.17.13/fs/reiserfs/inode.c linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/inode.c --- linux-2.6.17.13/fs/reiserfs/inode.c 2006-06-18 04:54:47 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/inode.c 2006-08-17 00:28:21 +0200 @@ -17,6 +17,8 @@ #include #include #include +#include +#include extern int reiserfs_default_io_size; /* default io size devuned in super.c */ @@ -57,6 +59,7 @@ void reiserfs_delete_inode(struct inode * stat data deletion */ if (!err) DQUOT_FREE_INODE(inode); + DLIMIT_FREE_INODE(inode); if (journal_end(&th, inode->i_sb, jbegin_count)) { mutex_unlock(&inode->i_mutex); @@ -1125,6 +1128,8 @@ static void init_inode(struct inode *ino struct buffer_head *bh; struct item_head *ih; __u32 rdev; + uid_t uid; + gid_t gid; //int version = ITEM_VERSION_1; bh = PATH_PLAST_BUFFER(path); @@ -1148,12 +1153,13 @@ static void init_inode(struct inode *ino (struct stat_data_v1 *)B_I_PITEM(bh, ih); unsigned long blocks; + uid = sd_v1_uid(sd); + gid = sd_v1_gid(sd); + set_inode_item_key_version(inode, KEY_FORMAT_3_5); set_inode_sd_version(inode, STAT_DATA_V1); inode->i_mode = sd_v1_mode(sd); inode->i_nlink = sd_v1_nlink(sd); - inode->i_uid = sd_v1_uid(sd); - inode->i_gid = sd_v1_gid(sd); inode->i_size = sd_v1_size(sd); inode->i_atime.tv_sec = sd_v1_atime(sd); inode->i_mtime.tv_sec = sd_v1_mtime(sd); @@ -1195,11 +1201,12 @@ static void init_inode(struct inode *ino // (directories and symlinks) struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); + uid = sd_v2_uid(sd); + gid = sd_v2_gid(sd); + inode->i_mode = sd_v2_mode(sd); inode->i_nlink = sd_v2_nlink(sd); - inode->i_uid = sd_v2_uid(sd); inode->i_size = sd_v2_size(sd); - inode->i_gid = sd_v2_gid(sd); inode->i_mtime.tv_sec = sd_v2_mtime(sd); inode->i_atime.tv_sec = sd_v2_atime(sd); inode->i_ctime.tv_sec = sd_v2_ctime(sd); @@ -1229,6 +1236,10 @@ static void init_inode(struct inode *ino sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); } + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, 0); + pathrelse(path); if (S_ISREG(inode->i_mode)) { inode->i_op = &reiserfs_file_inode_operations; @@ -1251,13 +1262,15 @@ static void init_inode(struct inode *ino static void inode2sd(void *sd, struct inode *inode, loff_t size) { struct stat_data *sd_v2 = (struct stat_data *)sd; + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); __u16 flags; + set_sd_v2_uid(sd_v2, uid); + set_sd_v2_gid(sd_v2, gid); set_sd_v2_mode(sd_v2, inode->i_mode); set_sd_v2_nlink(sd_v2, inode->i_nlink); - set_sd_v2_uid(sd_v2, inode->i_uid); set_sd_v2_size(sd_v2, size); - set_sd_v2_gid(sd_v2, inode->i_gid); set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); @@ -1788,6 +1801,10 @@ int reiserfs_new_inode(struct reiserfs_t BUG_ON(!th->t_trans_id); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto out_bad_dlimit; + } if (DQUOT_ALLOC_INODE(inode)) { err = -EDQUOT; goto out_end_trans; @@ -1973,6 +1990,9 @@ int reiserfs_new_inode(struct reiserfs_t DQUOT_FREE_INODE(inode); out_end_trans: + DLIMIT_FREE_INODE(inode); + + out_bad_dlimit: journal_end(th, th->t_super, th->t_blocks_allocated); /* Drop can be outside and it needs more credits so it's better to have it outside */ DQUOT_DROP(inode); @@ -2700,6 +2720,14 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (sd_attrs & REISERFS_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (sd_attrs & REISERFS_BARRIER_FL) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (sd_attrs & REISERFS_APPEND_FL) inode->i_flags |= S_APPEND; else @@ -2722,6 +2750,14 @@ void i_attrs_to_sd_attrs(struct inode *i *sd_attrs |= REISERFS_IMMUTABLE_FL; else *sd_attrs &= ~REISERFS_IMMUTABLE_FL; + if (inode->i_flags & S_IUNLINK) + *sd_attrs |= REISERFS_IUNLINK_FL; + else + *sd_attrs &= ~REISERFS_IUNLINK_FL; + if (inode->i_flags & S_BARRIER) + *sd_attrs |= REISERFS_BARRIER_FL; + else + *sd_attrs &= ~REISERFS_BARRIER_FL; if (inode->i_flags & S_SYNC) *sd_attrs |= REISERFS_SYNC_FL; else @@ -2901,6 +2937,22 @@ static ssize_t reiserfs_direct_IO(int rw reiserfs_get_blocks_direct_io, NULL); } +int reiserfs_sync_flags(struct inode *inode) +{ + u16 oldflags, newflags; + + oldflags = REISERFS_I(inode)->i_attrs; + newflags = oldflags; + i_attrs_to_sd_attrs(inode, &newflags); + + if (oldflags ^ newflags) { + REISERFS_I(inode)->i_attrs = newflags; + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + } + return 0; +} + int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -2945,9 +2997,11 @@ int reiserfs_setattr(struct dentry *dent } error = inode_change_ok(inode, attr); + if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_XID && attr->ia_xid != inode->i_xid)) { error = reiserfs_chown_xattrs(inode, attr); if (!error) { @@ -2977,6 +3031,9 @@ int reiserfs_setattr(struct dentry *dent inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_XID) && + IS_TAGXID(inode)) + inode->i_xid = attr->ia_xid; mark_inode_dirty(inode); error = journal_end(&th, inode->i_sb, jbegin_count); diff -NurpP --minimal linux-2.6.17.13/fs/reiserfs/ioctl.c linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/ioctl.c --- linux-2.6.17.13/fs/reiserfs/ioctl.c 2006-04-09 13:49:55 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/ioctl.c 2006-08-17 00:28:21 +0200 @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -23,7 +24,7 @@ static int reiserfs_unpack(struct inode int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { - unsigned int flags; + unsigned int flags, oldflags; switch (cmd) { case REISERFS_IOC_UNPACK: @@ -42,12 +43,14 @@ int reiserfs_ioctl(struct inode *inode, flags = REISERFS_I(inode)->i_attrs; i_attrs_to_sd_attrs(inode, (__u16 *) & flags); + flags &= REISERFS_FL_USER_VISIBLE; return put_user(flags, (int __user *)arg); case REISERFS_IOC_SETFLAGS:{ if (!reiserfs_attrs(inode->i_sb)) return -ENOTTY; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) @@ -57,10 +60,12 @@ int reiserfs_ioctl(struct inode *inode, if (get_user(flags, (int __user *)arg)) return -EFAULT; - if (((flags ^ REISERFS_I(inode)-> - i_attrs) & (REISERFS_IMMUTABLE_FL | - REISERFS_APPEND_FL)) - && !capable(CAP_LINUX_IMMUTABLE)) + oldflags = REISERFS_I(inode) -> i_attrs; + if (((oldflags & REISERFS_IMMUTABLE_FL) || + ((flags ^ oldflags) & + (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | + REISERFS_APPEND_FL))) && + !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; if ((flags & REISERFS_NOTAIL_FL) && @@ -71,6 +76,9 @@ int reiserfs_ioctl(struct inode *inode, if (result) return result; } + + flags = flags & REISERFS_FL_USER_MODIFIABLE; + flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE; sd_attrs_to_i_attrs(flags, inode); REISERFS_I(inode)->i_attrs = flags; inode->i_ctime = CURRENT_TIME_SEC; @@ -82,7 +90,8 @@ int reiserfs_ioctl(struct inode *inode, case REISERFS_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(inode->i_generation, (int __user *)arg)) return -EFAULT; diff -NurpP --minimal linux-2.6.17.13/fs/reiserfs/namei.c linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/namei.c --- linux-2.6.17.13/fs/reiserfs/namei.c 2006-04-09 13:49:55 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/namei.c 2006-08-17 00:28:21 +0200 @@ -19,6 +19,7 @@ #include #include #include +#include #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--; @@ -365,6 +366,7 @@ static struct dentry *reiserfs_lookup(st reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); } + vx_propagate_xid(nd, inode); /* Propogate the priv_object flag so we know we're in the priv tree */ if (is_reiserfs_priv_object(dir)) @@ -600,6 +602,7 @@ static int new_inode_init(struct inode * } else { inode->i_gid = current->fsgid; } + inode->i_xid = vx_current_fsxid(inode->i_sb); DQUOT_INIT(inode); return 0; } @@ -1546,6 +1549,7 @@ struct inode_operations reiserfs_dir_ino .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; /* @@ -1562,6 +1566,7 @@ struct inode_operations reiserfs_symlink .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; @@ -1575,5 +1580,6 @@ struct inode_operations reiserfs_special .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; diff -NurpP --minimal linux-2.6.17.13/fs/reiserfs/stree.c linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/stree.c --- linux-2.6.17.13/fs/reiserfs/stree.c 2006-06-18 04:54:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/stree.c 2006-08-17 00:28:21 +0200 @@ -57,6 +57,7 @@ #include #include #include +#include /* Does the buffer contain a disk block which is in the tree. */ inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh) @@ -1298,6 +1299,7 @@ int reiserfs_delete_item(struct reiserfs "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); #endif + DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes); DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); /* Return deleted body length */ @@ -1384,6 +1386,7 @@ void reiserfs_delete_solid_item(struct r quota_cut_bytes, inode->i_uid, key2type(key)); #endif + DLIMIT_FREE_SPACE(inode, quota_cut_bytes); DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); } @@ -1741,6 +1744,7 @@ int reiserfs_cut_from_item(struct reiser "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?'); #endif + DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes); DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); return n_ret_value; } @@ -1982,6 +1986,11 @@ int reiserfs_paste_into_item(struct reis pathrelse(p_s_search_path); return -EDQUOT; } + if (DLIMIT_ALLOC_SPACE(inode, n_pasted_size)) { + DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); + pathrelse(p_s_search_path); + return -ENOSPC; + } init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); #ifdef DISPLACE_NEW_PACKING_LOCALITIES @@ -2034,6 +2043,7 @@ int reiserfs_paste_into_item(struct reis n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); #endif + DLIMIT_FREE_SPACE(inode, n_pasted_size); DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); return retval; } @@ -2071,6 +2081,11 @@ int reiserfs_insert_item(struct reiserfs pathrelse(p_s_path); return -EDQUOT; } + if (DLIMIT_ALLOC_SPACE(inode, quota_bytes)) { + DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); + pathrelse(p_s_path); + return -ENOSPC; + } } init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); @@ -2118,7 +2133,9 @@ int reiserfs_insert_item(struct reiserfs "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); #endif - if (inode) + if (inode) { + DLIMIT_FREE_SPACE(inode, quota_bytes); DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); + } return retval; } diff -NurpP --minimal linux-2.6.17.13/fs/reiserfs/super.c linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/super.c --- linux-2.6.17.13/fs/reiserfs/super.c 2006-06-18 04:54:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/super.c 2006-08-17 00:28:21 +0200 @@ -883,6 +883,9 @@ static int reiserfs_parse_options(struct {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, #endif +#ifndef CONFIG_INOXID_NONE + {"tagxid",.setmask = 1 << REISERFS_TAGXID}, +#endif #ifdef CONFIG_REISERFS_FS_POSIX_ACL {"acl",.setmask = 1 << REISERFS_POSIXACL}, {"noacl",.clrmask = 1 << REISERFS_POSIXACL}, @@ -1155,6 +1158,12 @@ static int reiserfs_remount(struct super return -EINVAL; } + if ((mount_options & (1 << REISERFS_TAGXID)) && + !(s->s_flags & MS_TAGXID)) { + reiserfs_warning(s, "reiserfs: tagxid not permitted on remount."); + return -EINVAL; + } + handle_attrs(s); /* Add options that are safe here */ @@ -1730,6 +1739,10 @@ static int reiserfs_fill_super(struct su goto error; } + /* map mount option tagxid */ + if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGXID)) + s->s_flags |= MS_TAGXID; + rs = SB_DISK_SUPER_BLOCK(s); /* Let's do basic sanity check to verify that underlying device is not smaller than the filesystem. If the check fails then abort and scream, diff -NurpP --minimal linux-2.6.17.13/fs/reiserfs/xattr.c linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/xattr.c --- linux-2.6.17.13/fs/reiserfs/xattr.c 2006-02-18 14:40:26 +0100 +++ linux-2.6.17.13-vs2.0.2.1/fs/reiserfs/xattr.c 2006-08-17 00:28:21 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -824,7 +825,7 @@ int reiserfs_delete_xattrs(struct inode if (dir->d_inode->i_nlink <= 2) { root = get_xa_root(inode->i_sb); reiserfs_write_lock_xattrs(inode->i_sb); - err = vfs_rmdir(root->d_inode, dir); + err = vfs_rmdir(root->d_inode, dir, NULL); reiserfs_write_unlock_xattrs(inode->i_sb); dput(root); } else { diff -NurpP --minimal linux-2.6.17.13/fs/stat.c linux-2.6.17.13-vs2.0.2.1/fs/stat.c --- linux-2.6.17.13/fs/stat.c 2006-06-18 04:54:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/stat.c 2006-08-17 00:28:21 +0200 @@ -27,6 +27,7 @@ void generic_fillattr(struct inode *inod stat->nlink = inode->i_nlink; stat->uid = inode->i_uid; stat->gid = inode->i_gid; + stat->xid = inode->i_xid; stat->rdev = inode->i_rdev; stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; diff -NurpP --minimal linux-2.6.17.13/fs/super.c linux-2.6.17.13-vs2.0.2.1/fs/super.c --- linux-2.6.17.13/fs/super.c 2006-06-18 04:54:48 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/super.c 2006-08-17 00:28:21 +0200 @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include @@ -803,7 +805,7 @@ struct vfsmount * do_kern_mount(const char *fstype, int flags, const char *name, void *data) { struct file_system_type *type = get_fs_type(fstype); - struct super_block *sb = ERR_PTR(-ENOMEM); + struct super_block *sb; struct vfsmount *mnt; int error; char *secdata = NULL; @@ -811,6 +813,12 @@ do_kern_mount(const char *fstype, int fl if (!type) return ERR_PTR(-ENODEV); + sb = ERR_PTR(-EPERM); + if ((type->fs_flags & FS_BINARY_MOUNTDATA) && + !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT)) + goto out; + + sb = ERR_PTR(-ENOMEM); mnt = alloc_vfsmnt(name); if (!mnt) goto out; @@ -832,6 +840,13 @@ do_kern_mount(const char *fstype, int fl sb = type->get_sb(type, flags, name, data); if (IS_ERR(sb)) goto out_free_secdata; + + error = -EPERM; + if (!capable(CAP_SYS_ADMIN) && !sb->s_bdev && + (sb->s_magic != PROC_SUPER_MAGIC) && + (sb->s_magic != DEVPTS_SUPER_MAGIC)) + goto out_sb; + error = security_sb_kern_mount(sb, secdata); if (error) goto out_sb; diff -NurpP --minimal linux-2.6.17.13/fs/sysfs/mount.c linux-2.6.17.13-vs2.0.2.1/fs/sysfs/mount.c --- linux-2.6.17.13/fs/sysfs/mount.c 2005-08-29 22:25:33 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/sysfs/mount.c 2006-08-17 00:28:21 +0200 @@ -11,8 +11,6 @@ #include "sysfs.h" -/* Random magic number */ -#define SYSFS_MAGIC 0x62656572 struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; @@ -38,7 +36,7 @@ static int sysfs_fill_super(struct super sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = SYSFS_MAGIC; + sb->s_magic = SYSFS_SUPER_MAGIC; sb->s_op = &sysfs_ops; sb->s_time_gran = 1; sysfs_sb = sb; diff -NurpP --minimal linux-2.6.17.13/fs/xattr.c linux-2.6.17.13-vs2.0.2.1/fs/xattr.c --- linux-2.6.17.13/fs/xattr.c 2006-06-18 04:54:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xattr.c 2006-08-17 00:28:21 +0200 @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -168,7 +169,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); */ static long setxattr(struct dentry *d, char __user *name, void __user *value, - size_t size, int flags) + size_t size, int flags, struct vfsmount *mnt) { int error; void *kvalue = NULL; @@ -195,6 +196,9 @@ setxattr(struct dentry *d, char __user * } } + if (MNT_IS_RDONLY(mnt)) + return -EROFS; + error = vfs_setxattr(d, kname, kvalue, size, flags); kfree(kvalue); return error; @@ -210,7 +214,7 @@ sys_setxattr(char __user *path, char __u error = user_path_walk(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); + error = setxattr(nd.dentry, name, value, size, flags, nd.mnt); path_release(&nd); return error; } @@ -225,7 +229,7 @@ sys_lsetxattr(char __user *path, char __ error = user_path_walk_link(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); + error = setxattr(nd.dentry, name, value, size, flags, nd.mnt); path_release(&nd); return error; } @@ -243,7 +247,7 @@ sys_fsetxattr(int fd, char __user *name, return error; dentry = f->f_dentry; audit_inode(NULL, dentry->d_inode, 0); - error = setxattr(dentry, name, value, size, flags); + error = setxattr(dentry, name, value, size, flags, f->f_vfsmnt); fput(f); return error; } @@ -416,7 +420,7 @@ sys_flistxattr(int fd, char __user *list * Extended attribute REMOVE operations */ static long -removexattr(struct dentry *d, char __user *name) +removexattr(struct dentry *d, char __user *name, struct vfsmount *mnt) { int error; char kname[XATTR_NAME_MAX + 1]; @@ -427,6 +431,9 @@ removexattr(struct dentry *d, char __use if (error < 0) return error; + if (MNT_IS_RDONLY(mnt)) + return -EROFS; + return vfs_removexattr(d, kname); } @@ -439,7 +446,7 @@ sys_removexattr(char __user *path, char error = user_path_walk(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); + error = removexattr(nd.dentry, name, nd.mnt); path_release(&nd); return error; } @@ -453,7 +460,7 @@ sys_lremovexattr(char __user *path, char error = user_path_walk_link(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); + error = removexattr(nd.dentry, name, nd.mnt); path_release(&nd); return error; } @@ -470,7 +477,7 @@ sys_fremovexattr(int fd, char __user *na return error; dentry = f->f_dentry; audit_inode(NULL, dentry->d_inode, 0); - error = removexattr(dentry, name); + error = removexattr(dentry, name, f->f_vfsmnt); fput(f); return error; } diff -NurpP --minimal linux-2.6.17.13/fs/xfs/linux-2.6/xfs_ioctl.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_ioctl.c --- linux-2.6.17.13/fs/xfs/linux-2.6/xfs_ioctl.c 2006-06-18 04:54:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_ioctl.c 2006-08-17 00:28:21 +0200 @@ -1100,6 +1100,8 @@ xfs_ioc_fsgeometry( #define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */ #define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */ #define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */ +#define LINUX_XFLAG_BARRIER 0x04000000 /* chroot() barrier */ +#define LINUX_XFLAG_IUNLINK 0x08000000 /* immutable unlink */ STATIC unsigned int xfs_merge_ioc_xflags( @@ -1140,6 +1142,10 @@ xfs_di2lxflags( if (di_flags & XFS_DIFLAG_IMMUTABLE) flags |= LINUX_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_IUNLINK) + flags |= LINUX_XFLAG_IUNLINK; + if (di_flags & XFS_DIFLAG_BARRIER) + flags |= LINUX_XFLAG_BARRIER; if (di_flags & XFS_DIFLAG_APPEND) flags |= LINUX_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) diff -NurpP --minimal linux-2.6.17.13/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_iops.c --- linux-2.6.17.13/fs/xfs/linux-2.6/xfs_iops.c 2006-06-18 04:54:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_iops.c 2006-08-17 00:28:21 +0200 @@ -55,6 +55,7 @@ #include #include #include +#include /* * Get a XFS inode from a given vnode. @@ -409,6 +410,7 @@ xfs_vn_lookup( d_add(dentry, NULL); return NULL; } + vx_propagate_xid(nd, vn_to_inode(cvp)); return d_splice_alias(vn_to_inode(cvp), dentry); } @@ -658,6 +660,10 @@ xfs_vn_setattr( int flags = 0; int error; + error = inode_change_ok(inode, attr); + if (error) + return error; + if (ia_valid & ATTR_UID) { vattr.va_mask |= XFS_AT_UID; vattr.va_uid = attr->ia_uid; @@ -666,6 +672,10 @@ xfs_vn_setattr( vattr.va_mask |= XFS_AT_GID; vattr.va_gid = attr->ia_gid; } + if ((ia_valid & ATTR_XID) && IS_TAGXID(inode)) { + vattr.va_mask |= XFS_AT_XID; + vattr.va_xid = attr->ia_xid; + } if (ia_valid & ATTR_SIZE) { vattr.va_mask |= XFS_AT_SIZE; vattr.va_size = attr->ia_size; @@ -711,6 +721,41 @@ xfs_vn_truncate( } STATIC int +xfs_vn_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + vattr_t vattr; + int flags = 0; + int error; + vnode_t *vp = vn_from_inode(inode); + + memset(&vattr, 0, sizeof(vattr_t)); + + vattr.va_mask = XFS_AT_XFLAGS; + VOP_GETATTR(vp, &vattr, 0, NULL, error); + if (error) + return error; + oldflags = vattr.va_xflags; + newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE | + XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER); + + if (IS_IMMUTABLE(inode)) + newflags |= XFS_XFLAG_IMMUTABLE; + if (IS_IUNLINK(inode)) + newflags |= XFS_XFLAG_IUNLINK; + if (IS_BARRIER(inode)) + newflags |= XFS_XFLAG_BARRIER; + + if (oldflags ^ newflags) { + vattr.va_xflags = newflags; + vattr.va_mask |= XFS_AT_XFLAGS; + VOP_SETATTR(vp, &vattr, flags, NULL, error); + } + vn_revalidate(vp); + return error; +} + +STATIC int xfs_vn_setxattr( struct dentry *dentry, const char *name, @@ -823,6 +868,7 @@ struct inode_operations xfs_inode_operat .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .sync_flags = xfs_vn_sync_flags, }; struct inode_operations xfs_dir_inode_operations = { @@ -842,6 +888,7 @@ struct inode_operations xfs_dir_inode_op .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .sync_flags = xfs_vn_sync_flags, }; struct inode_operations xfs_symlink_inode_operations = { @@ -855,4 +902,5 @@ struct inode_operations xfs_symlink_inod .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .sync_flags = xfs_vn_sync_flags, }; diff -NurpP --minimal linux-2.6.17.13/fs/xfs/linux-2.6/xfs_linux.h linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_linux.h --- linux-2.6.17.13/fs/xfs/linux-2.6/xfs_linux.h 2006-06-18 04:54:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_linux.h 2006-08-17 00:28:21 +0200 @@ -142,6 +142,7 @@ BUFFER_FNS(PrivateStart, unwritten); #define current_pid() (current->pid) #define current_fsuid(cred) (current->fsuid) #define current_fsgid(cred) (current->fsgid) +#define current_fsxid(cred,vp) (vx_current_fsxid(vn_to_inode(vp)->i_sb)) #define NBPP PAGE_SIZE #define DPPSHFT (PAGE_SHIFT - 9) diff -NurpP --minimal linux-2.6.17.13/fs/xfs/linux-2.6/xfs_super.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_super.c --- linux-2.6.17.13/fs/xfs/linux-2.6/xfs_super.c 2006-06-18 04:54:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_super.c 2006-08-17 00:28:21 +0200 @@ -160,6 +160,7 @@ xfs_revalidate_inode( inode->i_nlink = ip->i_d.di_nlink; inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; + inode->i_xid = ip->i_d.di_xid; switch (inode->i_mode & S_IFMT) { case S_IFBLK: @@ -188,6 +189,14 @@ xfs_revalidate_inode( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) inode->i_flags |= S_APPEND; else @@ -724,6 +733,12 @@ xfs_fs_remount( int error; VFS_PARSEARGS(vfsp, options, args, 1, error); + if ((args->flags2 & XFSMNT2_TAGXID) && + !(sb->s_flags & MS_TAGXID)) { + printk("XFS: %s: tagxid not permitted on remount.\n", + sb->s_id); + error = EINVAL; + } if (!error) VFS_MNTUPDATE(vfsp, flags, args, error); kmem_free(args, sizeof(*args)); diff -NurpP --minimal linux-2.6.17.13/fs/xfs/linux-2.6/xfs_sysctl.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_sysctl.c --- linux-2.6.17.13/fs/xfs/linux-2.6/xfs_sysctl.c 2006-06-18 04:54:49 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_sysctl.c 2006-08-17 00:28:21 +0200 @@ -57,74 +57,74 @@ xfs_stats_clear_proc_handler( STATIC ctl_table xfs_table[] = { {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max}, {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max}, {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max}, {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.panic_mask.min, &xfs_params.panic_mask.max}, {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.error_level.min, &xfs_params.error_level.max}, {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max}, {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max}, {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max}, {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max}, {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max}, {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max}, {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max}, {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.rotorstep.min, &xfs_params.rotorstep.max}, /* please keep this the last entry */ #ifdef CONFIG_PROC_FS {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.stats_clear.min, &xfs_params.stats_clear.max}, #endif /* CONFIG_PROC_FS */ diff -NurpP --minimal linux-2.6.17.13/fs/xfs/linux-2.6/xfs_vnode.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_vnode.c --- linux-2.6.17.13/fs/xfs/linux-2.6/xfs_vnode.c 2006-06-18 04:54:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_vnode.c 2006-08-17 00:28:21 +0200 @@ -103,6 +103,7 @@ vn_revalidate_core( inode->i_nlink = vap->va_nlink; inode->i_uid = vap->va_uid; inode->i_gid = vap->va_gid; + inode->i_xid = vap->va_xid; inode->i_blocks = vap->va_nblocks; inode->i_mtime = vap->va_mtime; inode->i_ctime = vap->va_ctime; @@ -111,6 +112,14 @@ vn_revalidate_core( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else diff -NurpP --minimal linux-2.6.17.13/fs/xfs/linux-2.6/xfs_vnode.h linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_vnode.h --- linux-2.6.17.13/fs/xfs/linux-2.6/xfs_vnode.h 2006-06-18 04:54:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/linux-2.6/xfs_vnode.h 2006-08-17 00:28:21 +0200 @@ -404,6 +404,7 @@ typedef struct vattr { xfs_nlink_t va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ gid_t va_gid; /* owner group id */ + xid_t va_xid; /* owner group id */ xfs_ino_t va_nodeid; /* file id */ xfs_off_t va_size; /* file size in bytes */ u_long va_blocksize; /* blocksize preferred for i/o */ @@ -452,13 +453,15 @@ typedef struct vattr { #define XFS_AT_PROJID 0x04000000 #define XFS_AT_SIZE_NOPERM 0x08000000 #define XFS_AT_GENCOUNT 0x10000000 +#define XFS_AT_XID 0x20000000 #define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\ XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\ - XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT) + XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT\ + XFS_AT_XID) #define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ diff -NurpP --minimal linux-2.6.17.13/fs/xfs/quota/xfs_qm_syscalls.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/quota/xfs_qm_syscalls.c --- linux-2.6.17.13/fs/xfs/quota/xfs_qm_syscalls.c 2006-06-18 04:54:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/quota/xfs_qm_syscalls.c 2006-08-17 00:28:21 +0200 @@ -215,7 +215,7 @@ xfs_qm_scall_quotaoff( xfs_qoff_logitem_t *qoffstart; int nculprits; - if (!force && !capable(CAP_SYS_ADMIN)) + if (!force && !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); /* * No file system can have quotas enabled on disk but not in core. @@ -384,7 +384,7 @@ xfs_qm_scall_trunc_qfiles( int error; xfs_inode_t *qip; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); error = 0; if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) { @@ -429,7 +429,7 @@ xfs_qm_scall_quotaon( uint accflags; __int64_t sbflags; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); @@ -600,7 +600,7 @@ xfs_qm_scall_setqlim( int error; xfs_qcnt_t hard, soft; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); if ((newlim->d_fieldmask & diff -NurpP --minimal linux-2.6.17.13/fs/xfs/xfs_clnt.h linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_clnt.h --- linux-2.6.17.13/fs/xfs/xfs_clnt.h 2006-06-18 04:54:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_clnt.h 2006-08-17 00:28:21 +0200 @@ -99,5 +99,7 @@ struct xfs_mount_args { */ #define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred * I/O size in stat(2) */ +#define XFSMNT2_TAGXID 0x80000000 /* context xid tagging */ + #endif /* __XFS_CLNT_H__ */ diff -NurpP --minimal linux-2.6.17.13/fs/xfs/xfs_dinode.h linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_dinode.h --- linux-2.6.17.13/fs/xfs/xfs_dinode.h 2006-04-09 13:49:55 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_dinode.h 2006-08-17 00:28:21 +0200 @@ -53,7 +53,8 @@ typedef struct xfs_dinode_core __uint32_t di_gid; /* owner's group id */ __uint32_t di_nlink; /* number of links to file */ __uint16_t di_projid; /* owner's project id */ - __uint8_t di_pad[8]; /* unused, zeroed space */ + __uint16_t di_xid; /* vserver context id */ + __uint8_t di_pad[6]; /* unused, zeroed space */ __uint16_t di_flushiter; /* incremented on flush */ xfs_timestamp_t di_atime; /* time last accessed */ xfs_timestamp_t di_mtime; /* time last modified */ @@ -257,6 +258,9 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ +#define XFS_DIFLAG_BARRIER_BIT 13 /* chroot() barrier */ +#define XFS_DIFLAG_IUNLINK_BIT 14 /* immutable unlink */ + #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) @@ -270,12 +274,14 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) +#define XFS_DIFLAG_BARRIER (1 << XFS_DIFLAG_BARRIER_BIT) +#define XFS_DIFLAG_IUNLINK (1 << XFS_DIFLAG_IUNLINK_BIT) #define XFS_DIFLAG_ANY \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ - XFS_DIFLAG_EXTSZINHERIT) + XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_BARRIER | XFS_DIFLAG_IUNLINK) #endif /* __XFS_DINODE_H__ */ diff -NurpP --minimal linux-2.6.17.13/fs/xfs/xfs_fs.h linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_fs.h --- linux-2.6.17.13/fs/xfs/xfs_fs.h 2006-04-09 13:49:55 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_fs.h 2006-08-17 00:28:21 +0200 @@ -67,6 +67,8 @@ struct fsxattr { #define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ #define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ #define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ +#define XFS_XFLAG_BARRIER 0x00004000 /* chroot() barrier */ +#define XFS_XFLAG_IUNLINK 0x00008000 /* immutable unlink */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* @@ -295,7 +297,8 @@ typedef struct xfs_bstat { __s32 bs_extents; /* number of extents */ __u32 bs_gen; /* generation count */ __u16 bs_projid; /* project id */ - unsigned char bs_pad[14]; /* pad space, unused */ + __u16 bs_xid; /* context id */ + unsigned char bs_pad[12]; /* pad space, unused */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ diff -NurpP --minimal linux-2.6.17.13/fs/xfs/xfs_inode.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_inode.c --- linux-2.6.17.13/fs/xfs/xfs_inode.c 2006-06-18 04:54:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_inode.c 2006-08-17 00:28:21 +0200 @@ -52,6 +52,7 @@ #include "xfs_mac.h" #include "xfs_acl.h" +#include kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; @@ -732,20 +733,35 @@ xfs_xlate_dinode_core( xfs_dinode_core_t *buf_core = (xfs_dinode_core_t *)buf; xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; xfs_arch_t arch = ARCH_CONVERT; + uint32_t uid = 0, gid = 0; + uint16_t xid = 0; ASSERT(dir); + if (dir < 0) { + xid = mem_core->di_xid; + /* FIXME: supposed to use superblock flag */ + uid = XIDINO_UID(1, mem_core->di_uid, xid); + gid = XIDINO_GID(1, mem_core->di_gid, xid); + xid = XIDINO_XID(1, xid); + } + INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch); INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch); INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch); INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); - INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch); - INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch); + INT_XLATE(buf_core->di_uid, uid, dir, arch); + INT_XLATE(buf_core->di_gid, gid, dir, arch); + INT_XLATE(buf_core->di_xid, xid, dir, arch); INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); if (dir > 0) { + /* FIXME: supposed to use superblock flag */ + mem_core->di_uid = INOXID_UID(1, uid, gid); + mem_core->di_gid = INOXID_GID(1, uid, gid); + mem_core->di_xid = INOXID_XID(1, uid, gid, xid); memcpy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad)); } else { @@ -794,6 +810,10 @@ _xfs_dic2xflags( flags |= XFS_XFLAG_PREALLOC; if (di_flags & XFS_DIFLAG_IMMUTABLE) flags |= XFS_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_IUNLINK) + flags |= XFS_XFLAG_IUNLINK; + if (di_flags & XFS_DIFLAG_BARRIER) + flags |= XFS_XFLAG_BARRIER; if (di_flags & XFS_DIFLAG_APPEND) flags |= XFS_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) @@ -1121,6 +1141,7 @@ xfs_ialloc( ASSERT(ip->i_d.di_nlink == nlink); ip->i_d.di_uid = current_fsuid(cr); ip->i_d.di_gid = current_fsgid(cr); + ip->i_d.di_xid = current_fsxid(cr, vp); ip->i_d.di_projid = prid; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); diff -NurpP --minimal linux-2.6.17.13/fs/xfs/xfs_itable.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_itable.c --- linux-2.6.17.13/fs/xfs/xfs_itable.c 2006-06-18 04:54:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_itable.c 2006-08-17 00:28:21 +0200 @@ -85,6 +85,7 @@ xfs_bulkstat_one_iget( buf->bs_mode = dic->di_mode; buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; + buf->bs_xid = dic->di_xid; buf->bs_size = dic->di_size; vn_atime_to_bstime(vp, &buf->bs_atime); buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; @@ -159,6 +160,7 @@ xfs_bulkstat_one_dinode( buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT); buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT); buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT); + buf->bs_xid = INT_GET(dic->di_xid, ARCH_CONVERT); buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT); buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT); buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT); diff -NurpP --minimal linux-2.6.17.13/fs/xfs/xfs_mount.h linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_mount.h --- linux-2.6.17.13/fs/xfs/xfs_mount.h 2006-06-18 04:54:53 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_mount.h 2006-08-17 00:28:21 +0200 @@ -445,6 +445,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock counters */ +#define XFS_MOUNT_TAGXID (1ULL << 31) /* context xid tagging */ /* * Default minimum read and write sizes. diff -NurpP --minimal linux-2.6.17.13/fs/xfs/xfs_vfsops.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_vfsops.c --- linux-2.6.17.13/fs/xfs/xfs_vfsops.c 2006-06-18 04:54:54 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_vfsops.c 2006-08-17 00:28:21 +0200 @@ -308,6 +308,8 @@ xfs_start_flags( if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + if (ap->flags2 & XFSMNT2_TAGXID) + mp->m_flags |= XFS_MOUNT_TAGXID; /* * no recovery flag requires a read-only mount @@ -402,6 +404,8 @@ xfs_finish_flags( return XFS_ERROR(EINVAL); } + if (ap->flags2 & XFSMNT2_TAGXID) + vfs->vfs_super->s_flags |= MS_TAGXID; return 0; } @@ -1655,6 +1659,7 @@ xfs_vget( * in stat(). */ #define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ #define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ +#define MNTOPT_TAGXID "tagxid" /* context xid tagging for inodes */ STATIC unsigned long suffix_strtoul(const char *cp, char **endp, unsigned int base) @@ -1829,6 +1834,10 @@ xfs_parseargs( args->flags |= XFSMNT_ATTR2; } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { args->flags &= ~XFSMNT_ATTR2; +#ifndef CONFIG_INOXID_NONE + } else if (!strcmp(this_char, MNTOPT_TAGXID)) { + args->flags2 |= XFSMNT2_TAGXID; +#endif } else if (!strcmp(this_char, "osyncisdsync")) { /* no-op, this is now the default */ printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); diff -NurpP --minimal linux-2.6.17.13/fs/xfs/xfs_vnodeops.c linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_vnodeops.c --- linux-2.6.17.13/fs/xfs/xfs_vnodeops.c 2006-06-18 04:54:54 +0200 +++ linux-2.6.17.13-vs2.0.2.1/fs/xfs/xfs_vnodeops.c 2006-08-17 00:28:21 +0200 @@ -154,6 +154,7 @@ xfs_getattr( vap->va_mode = ip->i_d.di_mode; vap->va_uid = ip->i_d.di_uid; vap->va_gid = ip->i_d.di_gid; + vap->va_xid = ip->i_d.di_xid; vap->va_projid = ip->i_d.di_projid; /* @@ -254,6 +255,7 @@ xfs_setattr( uint commit_flags=0; uid_t uid=0, iuid=0; gid_t gid=0, igid=0; + xid_t xid=0, ixid=0; int timeflags = 0; vnode_t *vp; xfs_prid_t projid=0, iprojid=0; @@ -310,6 +312,7 @@ xfs_setattr( (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { uint qflags = 0; + /* FIXME: handle xid? */ if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = vap->va_uid; qflags |= XFS_QMOPT_UQUOTA; @@ -390,6 +393,8 @@ xfs_setattr( if (mask & (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| XFS_AT_GID|XFS_AT_PROJID)) { + /* FIXME: handle xid? */ + /* * CAP_FOWNER overrides the following restrictions: * @@ -438,7 +443,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_XID|XFS_AT_PROJID)) { /* * These IDs could have changed since we last looked at them. * But, we're assured that if the ownership did change @@ -446,10 +451,12 @@ xfs_setattr( * would have changed also. */ iuid = ip->i_d.di_uid; - iprojid = ip->i_d.di_projid; igid = ip->i_d.di_gid; - gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + ixid = ip->i_d.di_xid; + iprojid = ip->i_d.di_projid; uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; + gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + xid = (mask & XFS_AT_XID) ? vap->va_xid : ixid; projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : iprojid; @@ -477,6 +484,7 @@ xfs_setattr( if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { + /* FIXME: handle xid? */ ASSERT(tp); code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? @@ -694,7 +702,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_XID|XFS_AT_PROJID)) { /* * CAP_FSETID overrides the following restrictions: * @@ -710,6 +718,12 @@ xfs_setattr( * Change the ownerships and register quota modifications * in the transaction. */ + if (ixid != xid) { + if (XFS_IS_GQUOTA_ON(mp)) { + /* FIXME: handle xid quota? */ + } + ip->i_d.di_xid = xid; + } if (iuid != uid) { if (XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & XFS_AT_UID); @@ -790,6 +804,10 @@ xfs_setattr( di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) di_flags |= XFS_DIFLAG_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + di_flags |= XFS_DIFLAG_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + di_flags |= XFS_DIFLAG_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) di_flags |= XFS_DIFLAG_APPEND; if (vap->va_xflags & XFS_XFLAG_SYNC) diff -NurpP --minimal linux-2.6.17.13/include/asm-arm/tlb.h linux-2.6.17.13-vs2.0.2.1/include/asm-arm/tlb.h --- linux-2.6.17.13/include/asm-arm/tlb.h 2006-06-18 04:54:58 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-arm/tlb.h 2006-08-17 00:28:21 +0200 @@ -28,6 +28,7 @@ #else /* !CONFIG_MMU */ #include +#include /* * TLB handling. This allows us to remove pages from the page diff -NurpP --minimal linux-2.6.17.13/include/asm-arm26/tlb.h linux-2.6.17.13-vs2.0.2.1/include/asm-arm26/tlb.h --- linux-2.6.17.13/include/asm-arm26/tlb.h 2006-01-03 17:30:02 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-arm26/tlb.h 2006-08-17 00:28:21 +0200 @@ -3,6 +3,7 @@ #include #include +#include /* * TLB handling. This allows us to remove pages from the page diff -NurpP --minimal linux-2.6.17.13/include/asm-arm26/unistd.h linux-2.6.17.13-vs2.0.2.1/include/asm-arm26/unistd.h --- linux-2.6.17.13/include/asm-arm26/unistd.h 2006-01-03 17:30:02 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-arm26/unistd.h 2006-08-17 00:28:21 +0200 @@ -304,6 +304,8 @@ #define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) #define __NR_waitid (__NR_SYSCALL_BASE+280) +#define __NR_vserver (__NR_SYSCALL_BASE+313) + /* * The following SWIs are ARM private. FIXME - make appropriate for arm26 */ diff -NurpP --minimal linux-2.6.17.13/include/asm-generic/tlb.h linux-2.6.17.13-vs2.0.2.1/include/asm-generic/tlb.h --- linux-2.6.17.13/include/asm-generic/tlb.h 2006-01-03 17:30:02 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-generic/tlb.h 2006-08-17 00:28:21 +0200 @@ -15,6 +15,7 @@ #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/include/asm-i386/elf.h linux-2.6.17.13-vs2.0.2.1/include/asm-i386/elf.h --- linux-2.6.17.13/include/asm-i386/elf.h 2006-01-03 17:30:04 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-i386/elf.h 2006-08-17 00:28:21 +0200 @@ -108,7 +108,7 @@ typedef struct user_fxsr_struct elf_fpxr For the moment, we have only optimizations for the Intel generations, but that could change... */ -#define ELF_PLATFORM (system_utsname.machine) +#define ELF_PLATFORM (vx_new_uts(machine)) #ifdef __KERNEL__ #define SET_PERSONALITY(ex, ibcs2) do { } while (0) diff -NurpP --minimal linux-2.6.17.13/include/asm-ia64/tlb.h linux-2.6.17.13-vs2.0.2.1/include/asm-ia64/tlb.h --- linux-2.6.17.13/include/asm-ia64/tlb.h 2006-01-03 17:30:05 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-ia64/tlb.h 2006-08-17 00:28:21 +0200 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/include/asm-powerpc/unistd.h linux-2.6.17.13-vs2.0.2.1/include/asm-powerpc/unistd.h --- linux-2.6.17.13/include/asm-powerpc/unistd.h 2006-06-18 04:55:08 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-powerpc/unistd.h 2006-08-17 00:28:21 +0200 @@ -275,7 +275,7 @@ #endif #define __NR_rtas 255 #define __NR_sys_debug_setcontext 256 -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 /* 258 currently unused */ #define __NR_mbind 259 #define __NR_get_mempolicy 260 diff -NurpP --minimal linux-2.6.17.13/include/asm-s390/unistd.h linux-2.6.17.13-vs2.0.2.1/include/asm-s390/unistd.h --- linux-2.6.17.13/include/asm-s390/unistd.h 2006-06-18 04:55:09 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-s390/unistd.h 2006-08-17 00:28:21 +0200 @@ -255,7 +255,7 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) -/* Number 263 is reserved for vserver */ +#define __NR_vserver 263 #define __NR_fadvise64_64 264 #define __NR_statfs64 265 #define __NR_fstatfs64 266 diff -NurpP --minimal linux-2.6.17.13/include/asm-sparc/unistd.h linux-2.6.17.13-vs2.0.2.1/include/asm-sparc/unistd.h --- linux-2.6.17.13/include/asm-sparc/unistd.h 2006-06-18 04:55:10 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-sparc/unistd.h 2006-08-17 00:28:21 +0200 @@ -283,7 +283,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff -NurpP --minimal linux-2.6.17.13/include/asm-sparc64/tlb.h linux-2.6.17.13-vs2.0.2.1/include/asm-sparc64/tlb.h --- linux-2.6.17.13/include/asm-sparc64/tlb.h 2006-01-03 17:30:08 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-sparc64/tlb.h 2006-08-17 00:28:21 +0200 @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff -NurpP --minimal linux-2.6.17.13/include/asm-sparc64/unistd.h linux-2.6.17.13-vs2.0.2.1/include/asm-sparc64/unistd.h --- linux-2.6.17.13/include/asm-sparc64/unistd.h 2006-06-18 04:55:11 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-sparc64/unistd.h 2006-08-17 00:28:21 +0200 @@ -285,7 +285,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff -NurpP --minimal linux-2.6.17.13/include/asm-x86_64/unistd.h linux-2.6.17.13-vs2.0.2.1/include/asm-x86_64/unistd.h --- linux-2.6.17.13/include/asm-x86_64/unistd.h 2006-06-18 04:55:15 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/asm-x86_64/unistd.h 2006-08-17 00:28:21 +0200 @@ -532,7 +532,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_utimes 235 __SYSCALL(__NR_utimes, sys_utimes) #define __NR_vserver 236 -__SYSCALL(__NR_vserver, sys_ni_syscall) +__SYSCALL(__NR_vserver, sys_vserver) #define __NR_mbind 237 __SYSCALL(__NR_mbind, sys_mbind) #define __NR_set_mempolicy 238 diff -NurpP --minimal linux-2.6.17.13/include/linux/capability.h linux-2.6.17.13-vs2.0.2.1/include/linux/capability.h --- linux-2.6.17.13/include/linux/capability.h 2006-06-18 04:55:15 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/capability.h 2006-08-17 00:28:21 +0200 @@ -235,6 +235,7 @@ typedef __u32 kernel_cap_t; arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ /* Allow setting zone reclaim policy */ +/* Allow the selection of a security context */ #define CAP_SYS_ADMIN 21 @@ -288,6 +289,11 @@ typedef __u32 kernel_cap_t; #define CAP_AUDIT_CONTROL 30 +/* Allow context manipulations */ +/* Allow changing context info on files */ + +#define CAP_CONTEXT 31 + #ifdef __KERNEL__ /* * Bounding set diff -NurpP --minimal linux-2.6.17.13/include/linux/devpts_fs.h linux-2.6.17.13-vs2.0.2.1/include/linux/devpts_fs.h --- linux-2.6.17.13/include/linux/devpts_fs.h 2004-08-14 12:55:59 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/devpts_fs.h 2006-08-17 00:28:21 +0200 @@ -30,5 +30,7 @@ static inline void devpts_pty_kill(int n #endif +#define DEVPTS_SUPER_MAGIC 0x00001cd1 + #endif /* _LINUX_DEVPTS_FS_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/ext2_fs.h linux-2.6.17.13-vs2.0.2.1/include/linux/ext2_fs.h --- linux-2.6.17.13/include/linux/ext2_fs.h 2005-10-28 20:49:54 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/ext2_fs.h 2006-08-17 00:28:21 +0200 @@ -192,10 +192,17 @@ struct ext2_group_desc #define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT2_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT2_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define EXT2_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#endif /* * ioctl commands @@ -240,7 +247,7 @@ struct ext2_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_xid; /* LRU Context */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -272,6 +279,7 @@ struct ext2_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_xid osd2.linux2.l_i_xid #define i_reserved2 osd2.linux2.l_i_reserved2 #endif @@ -313,8 +321,9 @@ struct ext2_inode { #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ -#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ -#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ +#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ +#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ +#define EXT2_MOUNT_TAGXID (1<<24) /* Enable Context Tags */ #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt diff -NurpP --minimal linux-2.6.17.13/include/linux/ext3_fs.h linux-2.6.17.13-vs2.0.2.1/include/linux/ext3_fs.h --- linux-2.6.17.13/include/linux/ext3_fs.h 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/ext3_fs.h 2006-08-17 00:28:21 +0200 @@ -186,10 +186,20 @@ struct ext3_group_desc #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT3_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT3_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT3_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#endif +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_IOC_SETXID FIOC_SETXIDJ +#endif /* * Inode dynamic state flags @@ -288,7 +298,7 @@ struct ext3_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_xid; /* LRU Context */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -322,6 +332,7 @@ struct ext3_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_xid osd2.linux2.l_i_xid #define i_reserved2 osd2.linux2.l_i_reserved2 #elif defined(__GNU__) @@ -376,6 +387,7 @@ struct ext3_inode { #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT3_MOUNT_TAGXID (1<<24) /* Enable Context Tags */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H @@ -790,6 +802,7 @@ struct buffer_head * ext3_bread (handle_ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, int create, int extend_disksize); +extern int ext3_sync_flags(struct inode *inode); extern void ext3_read_inode (struct inode *); extern int ext3_write_inode (struct inode *, int); diff -NurpP --minimal linux-2.6.17.13/include/linux/fs.h linux-2.6.17.13-vs2.0.2.1/include/linux/fs.h --- linux-2.6.17.13/include/linux/fs.h 2006-06-18 04:55:17 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/fs.h 2006-08-17 00:28:21 +0200 @@ -116,6 +116,8 @@ extern int dir_notify_enable; #define MS_PRIVATE (1<<18) /* change to private */ #define MS_SLAVE (1<<19) /* change to slave */ #define MS_SHARED (1<<20) /* change to shared */ +#define MS_TAGXID (1<<24) /* tag inodes with context information */ +#define MS_XID (1<<25) /* use specific xid for this mount */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -142,6 +144,8 @@ extern int dir_notify_enable; #define S_NOCMTIME 128 /* Do not update file c/mtime */ #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE 512 /* Inode is fs-internal */ +#define S_BARRIER 1024 /* Barrier for chroot() */ +#define S_IUNLINK 2048 /* Immutable unlink */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -158,18 +162,22 @@ extern int dir_notify_enable; */ #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) -#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) +#define IS_RDONLY(inode) __IS_FLG(inode, MS_RDONLY) #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) +#define IS_TAGXID(inode) __IS_FLG(inode, MS_TAGXID) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) +#define IS_IUNLINK(inode) ((inode)->i_flags & S_IUNLINK) +#define IS_IXORUNLINK(inode) ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode)) #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) +#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER)) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) @@ -277,6 +285,7 @@ typedef void (dio_iodone_t)(struct kiocb #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 #define ATTR_FILE 8192 +#define ATTR_XID 16384 /* * This is the Inode Attributes structure, used for notify_change(). It @@ -292,6 +301,7 @@ struct iattr { umode_t ia_mode; uid_t ia_uid; gid_t ia_gid; + xid_t ia_xid; loff_t ia_size; struct timespec ia_atime; struct timespec ia_mtime; @@ -305,6 +315,9 @@ struct iattr { struct file *ia_file; }; +#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */ +#define ATTR_FLAG_IUNLINK 1024 /* Immutable unlink */ + /* * Includes for diskquotas. */ @@ -486,6 +499,7 @@ struct inode { unsigned int i_nlink; uid_t i_uid; gid_t i_gid; + xid_t i_xid; dev_t i_rdev; loff_t i_size; struct timespec i_atime; @@ -648,6 +662,7 @@ struct file { struct fown_struct f_owner; unsigned int f_uid, f_gid; struct file_ra_state f_ra; + xid_t f_xid; unsigned long f_version; void *f_security; @@ -726,6 +741,7 @@ struct file_lock { unsigned char fl_type; loff_t fl_start; loff_t fl_end; + xid_t fl_xid; struct fasync_struct * fl_fasync; /* for lease break notifications */ unsigned long fl_break_time; /* for nonblocking lease breaks */ @@ -920,12 +936,12 @@ static inline void unlock_super(struct s */ extern int vfs_permission(struct nameidata *, int); extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); -extern int vfs_mkdir(struct inode *, struct dentry *, int); -extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); -extern int vfs_symlink(struct inode *, struct dentry *, const char *, int); -extern int vfs_link(struct dentry *, struct inode *, struct dentry *); -extern int vfs_rmdir(struct inode *, struct dentry *); -extern int vfs_unlink(struct inode *, struct dentry *); +extern int vfs_mkdir(struct inode *, struct dentry *, int, struct nameidata *); +extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t, struct nameidata *); +extern int vfs_symlink(struct inode *, struct dentry *, const char *, int, struct nameidata *); +extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct nameidata *); +extern int vfs_rmdir(struct inode *, struct dentry *, struct nameidata *); +extern int vfs_unlink(struct inode *, struct dentry *, struct nameidata *); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); /* @@ -1067,6 +1083,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + int (*sync_flags) (struct inode *); }; struct seq_file; @@ -1742,6 +1759,7 @@ extern int dcache_dir_open(struct inode extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, void *, filldir_t); +extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *)); extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int simple_statfs(struct super_block *, struct kstatfs *); extern int simple_link(struct dentry *, struct inode *, struct dentry *); diff -NurpP --minimal linux-2.6.17.13/include/linux/init_task.h linux-2.6.17.13-vs2.0.2.1/include/linux/init_task.h --- linux-2.6.17.13/include/linux/init_task.h 2006-06-18 04:55:18 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/init_task.h 2006-08-17 00:28:21 +0200 @@ -123,6 +123,10 @@ extern struct group_info init_groups; .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ + .xid = 0, \ + .vx_info = NULL, \ + .nid = 0, \ + .nx_info = NULL, \ } diff -NurpP --minimal linux-2.6.17.13/include/linux/ipc.h linux-2.6.17.13-vs2.0.2.1/include/linux/ipc.h --- linux-2.6.17.13/include/linux/ipc.h 2004-08-14 12:54:46 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/ipc.h 2006-08-17 00:28:21 +0200 @@ -66,6 +66,7 @@ struct kern_ipc_perm mode_t mode; unsigned long seq; void *security; + xid_t xid; }; #endif /* __KERNEL__ */ diff -NurpP --minimal linux-2.6.17.13/include/linux/kernel.h linux-2.6.17.13-vs2.0.2.1/include/linux/kernel.h --- linux-2.6.17.13/include/linux/kernel.h 2006-06-18 04:55:18 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/kernel.h 2006-08-17 00:28:21 +0200 @@ -17,6 +17,7 @@ #include extern const char linux_banner[]; +extern const char vx_linux_banner[]; #define INT_MAX ((int)(~0U>>1)) #define INT_MIN (-INT_MAX - 1) diff -NurpP --minimal linux-2.6.17.13/include/linux/major.h linux-2.6.17.13-vs2.0.2.1/include/linux/major.h --- linux-2.6.17.13/include/linux/major.h 2006-06-18 04:55:19 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/major.h 2006-08-17 00:28:21 +0200 @@ -15,6 +15,7 @@ #define HD_MAJOR IDE0_MAJOR #define PTY_SLAVE_MAJOR 3 #define TTY_MAJOR 4 +#define VROOT_MAJOR 4 #define TTYAUX_MAJOR 5 #define LP_MAJOR 6 #define VCS_MAJOR 7 diff -NurpP --minimal linux-2.6.17.13/include/linux/mount.h linux-2.6.17.13-vs2.0.2.1/include/linux/mount.h --- linux-2.6.17.13/include/linux/mount.h 2006-04-09 13:49:57 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/mount.h 2006-08-17 00:28:21 +0200 @@ -22,10 +22,14 @@ #define MNT_NOEXEC 0x04 #define MNT_NOATIME 0x08 #define MNT_NODIRATIME 0x10 +#define MNT_RDONLY 0x20 + +#define MNT_IS_RDONLY(m) ((m) && ((m)->mnt_flags & MNT_RDONLY)) #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ #define MNT_PNODE_MASK 0x3000 /* propogation flag mask */ +#define MNT_XID 0x8000 struct vfsmount { struct list_head mnt_hash; @@ -47,6 +51,7 @@ struct vfsmount { struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ struct namespace *mnt_namespace; /* containing namespace */ int mnt_pinned; + xid_t mnt_xid; /* xid tagging used for vfsmount */ }; static inline struct vfsmount *mntget(struct vfsmount *mnt) diff -NurpP --minimal linux-2.6.17.13/include/linux/net.h linux-2.6.17.13-vs2.0.2.1/include/linux/net.h --- linux-2.6.17.13/include/linux/net.h 2006-06-18 04:55:19 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/net.h 2006-08-17 00:28:21 +0200 @@ -62,6 +62,7 @@ typedef enum { #define SOCK_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 +#define SOCK_USER_SOCKET 4 #ifndef ARCH_HAS_SOCKET_TYPES /** diff -NurpP --minimal linux-2.6.17.13/include/linux/nfs_mount.h linux-2.6.17.13-vs2.0.2.1/include/linux/nfs_mount.h --- linux-2.6.17.13/include/linux/nfs_mount.h 2005-08-29 22:25:42 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/nfs_mount.h 2006-08-17 00:28:21 +0200 @@ -61,6 +61,7 @@ struct nfs_mount_data { #define NFS_MOUNT_NOACL 0x0800 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ +#define NFS_MOUNT_TAGXID 0x8000 /* tagxid */ #define NFS_MOUNT_FLAGMASK 0xFFFF #endif diff -NurpP --minimal linux-2.6.17.13/include/linux/percpu.h linux-2.6.17.13-vs2.0.2.1/include/linux/percpu.h --- linux-2.6.17.13/include/linux/percpu.h 2006-04-09 13:49:57 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/percpu.h 2006-08-17 00:28:21 +0200 @@ -8,7 +8,7 @@ /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM -#define PERCPU_ENOUGH_ROOM 32768 +#define PERCPU_ENOUGH_ROOM 65536 #endif /* Must be an lvalue. */ diff -NurpP --minimal linux-2.6.17.13/include/linux/proc_fs.h linux-2.6.17.13-vs2.0.2.1/include/linux/proc_fs.h --- linux-2.6.17.13/include/linux/proc_fs.h 2006-06-18 04:55:21 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/proc_fs.h 2006-08-17 00:28:21 +0200 @@ -56,6 +56,7 @@ struct proc_dir_entry { nlink_t nlink; uid_t uid; gid_t gid; + int vx_flags; loff_t size; struct inode_operations * proc_iops; const struct file_operations * proc_fops; @@ -251,9 +252,11 @@ extern void kclist_add(struct kcore_list struct proc_inode { struct task_struct *task; int type; + int vx_flags; union { int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); int (*proc_read)(struct task_struct *task, char *page); + int (*proc_vid_read)(int vid, char *page); } op; struct proc_dir_entry *pde; struct inode vfs_inode; diff -NurpP --minimal linux-2.6.17.13/include/linux/reiserfs_fs.h linux-2.6.17.13-vs2.0.2.1/include/linux/reiserfs_fs.h --- linux-2.6.17.13/include/linux/reiserfs_fs.h 2006-06-18 04:55:21 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/reiserfs_fs.h 2006-08-17 00:28:21 +0200 @@ -829,6 +829,18 @@ struct stat_data_v1 { #define REISERFS_COMPR_FL EXT2_COMPR_FL #define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL +/* unfortunately reiserfs sdattr is only 16 bit */ +#define REISERFS_BARRIER_FL (EXT2_BARRIER_FL >> 16) +#define REISERFS_IUNLINK_FL (EXT2_IUNLINK_FL >> 16) + +#ifdef CONFIG_VSERVER_LEGACY +#define REISERFS_FL_USER_VISIBLE (REISERFS_IUNLINK_FL|0x80FF) +#define REISERFS_FL_USER_MODIFIABLE (REISERFS_IUNLINK_FL|0x80FF) +#else +#define REISERFS_FL_USER_VISIBLE 0x80FF +#define REISERFS_FL_USER_MODIFIABLE 0x80FF +#endif + /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ REISERFS_SYNC_FL | \ @@ -1909,6 +1921,7 @@ static inline void reiserfs_update_sd(st void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); +int reiserfs_sync_flags(struct inode *inode); /* namei.c */ void set_de_name_and_namelen(struct reiserfs_dir_entry *de); diff -NurpP --minimal linux-2.6.17.13/include/linux/reiserfs_fs_sb.h linux-2.6.17.13-vs2.0.2.1/include/linux/reiserfs_fs_sb.h --- linux-2.6.17.13/include/linux/reiserfs_fs_sb.h 2006-02-18 14:40:35 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/reiserfs_fs_sb.h 2006-08-17 00:28:21 +0200 @@ -456,6 +456,7 @@ enum reiserfs_mount_options { REISERFS_POSIXACL, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, + REISERFS_TAGXID, /* Actions on error */ REISERFS_ERROR_PANIC, diff -NurpP --minimal linux-2.6.17.13/include/linux/sched.h linux-2.6.17.13-vs2.0.2.1/include/linux/sched.h --- linux-2.6.17.13/include/linux/sched.h 2006-06-18 04:55:21 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/sched.h 2006-08-17 00:28:21 +0200 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -131,6 +132,7 @@ extern unsigned long nr_iowait(void); #define EXIT_DEAD 32 /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 +#define TASK_ONHOLD 128 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -259,27 +261,30 @@ extern void arch_unmap_area_topdown(stru * The mm counters are not protected by its page_table_lock, * so must be incremented atomically. */ -#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value) -#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member)) -#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member) -#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member) -#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member) typedef atomic_long_t mm_counter_t; +#define __set_mm_counter(mm, member, value) \ + atomic_long_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) \ + ((unsigned long)atomic_long_read(&(mm)->_##member)) #else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ /* * The mm counters are protected by its page_table_lock, * so can be incremented directly. */ -#define set_mm_counter(mm, member, value) (mm)->_##member = (value) -#define get_mm_counter(mm, member) ((mm)->_##member) -#define add_mm_counter(mm, member, value) (mm)->_##member += (value) -#define inc_mm_counter(mm, member) (mm)->_##member++ -#define dec_mm_counter(mm, member) (mm)->_##member-- typedef unsigned long mm_counter_t; +#define __set_mm_counter(mm, member, value) (mm)->_##member = (value) +#define get_mm_counter(mm, member) ((mm)->_##member) #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +#define set_mm_counter(mm, member, value) \ + vx_ ## member ## pages_sub((mm), (get_mm_counter(mm, member) - value)) +#define add_mm_counter(mm, member, value) \ + vx_ ## member ## pages_add((mm), (value)) +#define inc_mm_counter(mm, member) vx_ ## member ## pages_inc((mm)) +#define dec_mm_counter(mm, member) vx_ ## member ## pages_dec((mm)) + #define get_mm_rss(mm) \ (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) #define update_hiwater_rss(mm) do { \ @@ -338,6 +343,7 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; + struct vx_info *mm_vx_info; /* Token based thrashing protection. */ unsigned long swap_token_time; @@ -510,9 +516,10 @@ struct user_struct { /* Hash table maintenance information */ struct list_head uidhash_list; uid_t uid; + xid_t xid; }; -extern struct user_struct *find_user(uid_t); +extern struct user_struct *find_user(xid_t, uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -824,6 +831,14 @@ struct task_struct { void *security; struct audit_context *audit_context; + +/* vserver context data */ + struct vx_info *vx_info; + struct nx_info *nx_info; + + xid_t xid; + nid_t nid; + seccomp_t seccomp; /* Thread group tracking */ @@ -1040,13 +1055,19 @@ extern struct task_struct init_task; extern struct mm_struct init_mm; -#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) + +#define find_task_by_real_pid(nr) \ + find_task_by_pid_type(PIDTYPE_PID, nr) +#define find_task_by_pid(nr) \ + find_task_by_pid_type(PIDTYPE_PID, \ + vx_rmap_pid(nr)) + extern struct task_struct *find_task_by_pid_type(int type, int pid); extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(xid_t, uid_t); static inline struct user_struct *get_uid(struct user_struct *u) { atomic_inc(&u->__count); diff -NurpP --minimal linux-2.6.17.13/include/linux/shmem_fs.h linux-2.6.17.13-vs2.0.2.1/include/linux/shmem_fs.h --- linux-2.6.17.13/include/linux/shmem_fs.h 2006-04-09 13:49:57 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/shmem_fs.h 2006-08-17 00:28:21 +0200 @@ -8,6 +8,9 @@ #define SHMEM_NR_DIRECT 16 +#define TMPFS_SUPER_MAGIC 0x01021994 + + struct shmem_inode_info { spinlock_t lock; unsigned long flags; diff -NurpP --minimal linux-2.6.17.13/include/linux/stat.h linux-2.6.17.13-vs2.0.2.1/include/linux/stat.h --- linux-2.6.17.13/include/linux/stat.h 2006-06-18 04:55:25 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/stat.h 2006-08-17 00:28:21 +0200 @@ -63,6 +63,7 @@ struct kstat { unsigned int nlink; uid_t uid; gid_t gid; + xid_t xid; dev_t rdev; loff_t size; struct timespec atime; diff -NurpP --minimal linux-2.6.17.13/include/linux/sunrpc/auth.h linux-2.6.17.13-vs2.0.2.1/include/linux/sunrpc/auth.h --- linux-2.6.17.13/include/linux/sunrpc/auth.h 2006-02-18 14:40:35 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/sunrpc/auth.h 2006-08-17 00:28:21 +0200 @@ -28,6 +28,7 @@ struct auth_cred { uid_t uid; gid_t gid; + xid_t xid; struct group_info *group_info; }; diff -NurpP --minimal linux-2.6.17.13/include/linux/sunrpc/clnt.h linux-2.6.17.13-vs2.0.2.1/include/linux/sunrpc/clnt.h --- linux-2.6.17.13/include/linux/sunrpc/clnt.h 2006-06-18 04:55:25 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/sunrpc/clnt.h 2006-08-17 00:28:21 +0200 @@ -52,7 +52,8 @@ struct rpc_clnt { cl_intr : 1,/* interruptible */ cl_autobind : 1,/* use getport() */ cl_oneshot : 1,/* dispose after use */ - cl_dead : 1;/* abandoned */ + cl_dead : 1,/* abandoned */ + cl_tagxid : 1;/* do xid tagging */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ struct rpc_portmap * cl_pmap; /* port mapping */ diff -NurpP --minimal linux-2.6.17.13/include/linux/sysctl.h linux-2.6.17.13-vs2.0.2.1/include/linux/sysctl.h --- linux-2.6.17.13/include/linux/sysctl.h 2006-06-18 04:55:25 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/sysctl.h 2006-08-17 00:28:21 +0200 @@ -93,6 +93,7 @@ enum KERN_CAP_BSET=14, /* int: capability bounding set */ KERN_PANIC=15, /* int: panic timeout */ KERN_REALROOTDEV=16, /* real root device to mount after initrd */ + KERN_VSHELPER=17, /* string: path to vshelper policy agent */ KERN_SPARC_REBOOT=21, /* reboot command on Sparc */ KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */ @@ -909,6 +910,9 @@ typedef int ctl_handler (ctl_table *tabl typedef int proc_handler (ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos); +typedef int virt_handler (struct ctl_table *ctl, int write, xid_t xid, + void **datap, size_t *lenp); + extern int proc_dostring(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int proc_dointvec(ctl_table *, int, struct file *, @@ -990,6 +994,7 @@ struct ctl_table mode_t mode; ctl_table *child; proc_handler *proc_handler; /* Callback for text formatting */ + virt_handler *virt_handler; /* Context virtualization */ ctl_handler *strategy; /* Callback function for all r/w */ struct proc_dir_entry *de; /* /proc control block */ void *extra1; diff -NurpP --minimal linux-2.6.17.13/include/linux/sysfs.h linux-2.6.17.13-vs2.0.2.1/include/linux/sysfs.h --- linux-2.6.17.13/include/linux/sysfs.h 2006-06-18 04:55:25 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/sysfs.h 2006-08-17 00:28:21 +0200 @@ -12,6 +12,8 @@ #include +#define SYSFS_SUPER_MAGIC 0x62656572 + struct kobject; struct module; diff -NurpP --minimal linux-2.6.17.13/include/linux/types.h linux-2.6.17.13-vs2.0.2.1/include/linux/types.h --- linux-2.6.17.13/include/linux/types.h 2006-06-18 04:55:26 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/types.h 2006-08-17 00:28:21 +0200 @@ -38,6 +38,8 @@ typedef __kernel_uid32_t uid_t; typedef __kernel_gid32_t gid_t; typedef __kernel_uid16_t uid16_t; typedef __kernel_gid16_t gid16_t; +typedef unsigned int xid_t; +typedef unsigned int nid_t; #ifdef CONFIG_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vroot.h linux-2.6.17.13-vs2.0.2.1/include/linux/vroot.h --- linux-2.6.17.13/include/linux/vroot.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vroot.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,51 @@ + +/* + * include/linux/vroot.h + * + * written by Herbert Pötzl, 9/11/2002 + * ported to 2.6 by Herbert Pötzl, 30/12/2004 + * + * Copyright (C) 2002-2005 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + */ + +#ifndef _LINUX_VROOT_H +#define _LINUX_VROOT_H + + +#ifdef __KERNEL__ + +/* Possible states of device */ +enum { + Vr_unbound, + Vr_bound, +}; + +struct vroot_device { + int vr_number; + int vr_refcnt; + + struct semaphore vr_ctl_mutex; + struct block_device *vr_device; + int vr_state; +}; + + +typedef struct block_device *(vroot_grb_func)(struct block_device *); + +extern int register_vroot_grb(vroot_grb_func *); +extern int unregister_vroot_grb(vroot_grb_func *); + +#endif /* __KERNEL__ */ + +#define MAX_VROOT_DEFAULT 8 + +/* + * IOCTL commands --- we will commandeer 0x56 ('V') + */ + +#define VROOT_SET_DEV 0x5600 +#define VROOT_CLR_DEV 0x5601 + +#endif /* _LINUX_VROOT_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vs_base.h linux-2.6.17.13-vs2.0.2.1/include/linux/vs_base.h --- linux-2.6.17.13/include/linux/vs_base.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vs_base.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,106 @@ +#ifndef _VX_VS_BASE_H +#define _VX_VS_BASE_H + +#include "vserver/context.h" + + +#define vx_task_xid(t) ((t)->xid) + +#define vx_current_xid() vx_task_xid(current) + +#define vx_check(c,m) __vx_check(vx_current_xid(),c,m) + +#define vx_weak_check(c,m) ((m) ? vx_check(c,m) : 1) + + +/* + * check current context for ADMIN/WATCH and + * optionally against supplied argument + */ +static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode) +{ + if (mode & VX_ARG_MASK) { + if ((mode & VX_IDENT) && + (id == cid)) + return 1; + } + if (mode & VX_ATR_MASK) { + if ((mode & VX_DYNAMIC) && + (id >= MIN_D_CONTEXT) && + (id <= MAX_S_CONTEXT)) + return 1; + if ((mode & VX_STATIC) && + (id > 1) && (id < MIN_D_CONTEXT)) + return 1; + } + return (((mode & VX_ADMIN) && (cid == 0)) || + ((mode & VX_WATCH) && (cid == 1)) || + ((mode & VX_HOSTID) && (id == 0))); +} + + +#define __vx_state(v) ((v) ? ((v)->vx_state) : 0) + +#define vx_info_state(v,m) (__vx_state(v) & (m)) + + +/* generic flag merging */ + +#define vx_check_flags(v,m,f) (((v) & (m)) ^ (f)) + +#define vx_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m))) + +#define vx_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m))) + +#define vx_check_bit(v,n) ((v) & (1LL << (n))) + + +/* context flags */ + +#define __vx_flags(v) ((v) ? (v)->vx_flags : 0) + +#define vx_current_flags() __vx_flags(current->vx_info) + +#define vx_info_flags(v,m,f) \ + vx_check_flags(__vx_flags(v),(m),(f)) + +#define task_vx_flags(t,m,f) \ + ((t) && vx_info_flags((t)->vx_info, (m), (f))) + +#define vx_flags(m,f) vx_info_flags(current->vx_info,(m),(f)) + + +/* context caps */ + +#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0) + +#define vx_current_ccaps() __vx_ccaps(current->vx_info) + +#define vx_info_ccaps(v,c) (__vx_ccaps(v) & (c)) + +#define vx_ccaps(c) vx_info_ccaps(current->vx_info,(c)) + + +#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 ) + +#define vx_info_mcaps(v,c) (__vx_mcaps(v) & (c)) + +#define vx_mcaps(c) vx_info_mcaps(current->vx_info,(c)) + + +#define vx_current_bcaps() \ + (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \ + current->vx_info->vx_bcaps : cap_bset) + + +#define vx_current_initpid(n) \ + (current->vx_info && \ + (current->vx_info->vx_initpid == (n))) + +#define vx_capable(b,c) (capable(b) || \ + ((current->euid == 0) && vx_ccaps(c))) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.17.13/include/linux/vs_context.h linux-2.6.17.13-vs2.0.2.1/include/linux/vs_context.h --- linux-2.6.17.13/include/linux/vs_context.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vs_context.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,198 @@ +#ifndef _VX_VS_CONTEXT_H +#define _VX_VS_CONTEXT_H + +#include +#include "vserver/debug.h" + + +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__,__HERE__) + +static inline struct vx_info *__get_vx_info(struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (!vxi) + return NULL; + + vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_get_vx_info(vxi, _here); + + atomic_inc(&vxi->vx_usecnt); + return vxi; +} + + +extern void free_vx_info(struct vx_info *); + +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__,__HERE__) + +static inline void __put_vx_info(struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (!vxi) + return; + + vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_put_vx_info(vxi, _here); + + if (atomic_dec_and_test(&vxi->vx_usecnt)) + free_vx_info(vxi); +} + + +#define init_vx_info(p,i) __init_vx_info(p,i,__FILE__,__LINE__,__HERE__) + +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (vxi) { + vxlprintk(VXD_CBIT(xid, 3), + "init_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_init_vx_info(vxi, vxp, _here); + + atomic_inc(&vxi->vx_usecnt); + } + *vxp = vxi; +} + + +#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__,__HERE__) + +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxo; + + if (!vxi) + return; + + vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_set_vx_info(vxi, vxp, _here); + + atomic_inc(&vxi->vx_usecnt); + vxo = xchg(vxp, vxi); + BUG_ON(vxo); +} + + +#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__,__HERE__) + +static inline void __clr_vx_info(struct vx_info **vxp, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxo; + + vxo = xchg(vxp, NULL); + if (!vxo) + return; + + vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])", + vxo, vxo?vxo->vx_id:0, + vxo?atomic_read(&vxo->vx_usecnt):0, + _file, _line); + __vxh_clr_vx_info(vxo, vxp, _here); + + if (atomic_dec_and_test(&vxo->vx_usecnt)) + free_vx_info(vxo); +} + + +#define claim_vx_info(v,p) \ + __claim_vx_info(v,p,__FILE__,__LINE__,__HERE__) + +static inline void __claim_vx_info(struct vx_info *vxi, + struct task_struct *task, + const char *_file, int _line, void *_here) +{ + vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_tasks):0, + task, _file, _line); + __vxh_claim_vx_info(vxi, task, _here); + + atomic_inc(&vxi->vx_tasks); +} + + +extern void unhash_vx_info(struct vx_info *); + +#define release_vx_info(v,p) \ + __release_vx_info(v,p,__FILE__,__LINE__,__HERE__) + +static inline void __release_vx_info(struct vx_info *vxi, + struct task_struct *task, + const char *_file, int _line, void *_here) +{ + vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_tasks):0, + task, _file, _line); + __vxh_release_vx_info(vxi, task, _here); + + might_sleep(); + + if (atomic_dec_and_test(&vxi->vx_tasks)) + unhash_vx_info(vxi); +} + + +#define task_get_vx_info(p) \ + __task_get_vx_info(p,__FILE__,__LINE__,__HERE__) + +static inline struct vx_info *__task_get_vx_info(struct task_struct *p, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxi; + + task_lock(p); + vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)", + p, _file, _line); + vxi = __get_vx_info(p->vx_info, _file, _line, _here); + task_unlock(p); + return vxi; +} + + +static inline void __wakeup_vx_info(struct vx_info *vxi) +{ + if (waitqueue_active(&vxi->vx_wait)) + wake_up_interruptible(&vxi->vx_wait); +} + +extern void exit_vx_info(struct task_struct *, int); + +static inline +struct task_struct *vx_child_reaper(struct task_struct *p) +{ + struct vx_info *vxi = p->vx_info; + struct task_struct *reaper = child_reaper; + + if (!vxi) + goto out; + + BUG_ON(!p->vx_info->vx_reaper); + + /* child reaper for the guest reaper */ + if (vxi->vx_reaper == p) + goto out; + + reaper = vxi->vx_reaper; +out: + return reaper; +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.17.13/include/linux/vs_cvirt.h linux-2.6.17.13-vs2.0.2.1/include/linux/vs_cvirt.h --- linux-2.6.17.13/include/linux/vs_cvirt.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vs_cvirt.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,108 @@ +#ifndef _VX_VS_CVIRT_H +#define _VX_VS_CVIRT_H + +#include "vserver/cvirt.h" +#include "vserver/debug.h" + + +/* utsname virtualization */ + +static inline struct new_utsname *vx_new_utsname(void) +{ + if (current->vx_info) + return ¤t->vx_info->cvirt.utsname; + return &system_utsname; +} + +#define vx_new_uts(x) ((vx_new_utsname())->x) + + +/* pid faking stuff */ + + +#define vx_info_map_pid(v,p) \ + __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p) +#define vx_map_pid(p) vx_info_map_pid(current->vx_info, p) +#define vx_map_tgid(p) vx_map_pid(p) + +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_map_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid && pid == vxi->vx_initpid)?1:pid, + func, file, line); + if (pid == 0) + return 0; + if (pid == vxi->vx_initpid) + return 1; + } + return pid; +} + +#define vx_info_rmap_pid(v,p) \ + __vx_info_rmap_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p) +#define vx_rmap_tgid(p) vx_rmap_pid(p) + +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_rmap_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid == 1)?vxi->vx_initpid:pid, + func, file, line); + if ((pid == 1) && vxi->vx_initpid) + return vxi->vx_initpid; + if (pid == vxi->vx_initpid) + return ~0U; + } + return pid; +} + + +static inline void vx_activate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_inc(&vxi->cvirt.nr_running); + } +} + +static inline void vx_deactivate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_dec(&vxi->cvirt.nr_running); + } +} + +static inline void vx_uninterruptible_inc(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) + atomic_inc(&vxi->cvirt.nr_uninterruptible); +} + +static inline void vx_uninterruptible_dec(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) + atomic_dec(&vxi->cvirt.nr_uninterruptible); +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.17.13/include/linux/vs_dlimit.h linux-2.6.17.13-vs2.0.2.1/include/linux/vs_dlimit.h --- linux-2.6.17.13/include/linux/vs_dlimit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vs_dlimit.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,213 @@ +#ifndef _VX_VS_DLIMIT_H +#define _VX_VS_DLIMIT_H + +#include "vserver/dlimit.h" +#include "vserver/debug.h" + + +#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__) + +static inline struct dl_info *__get_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return NULL; + vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + atomic_inc(&dli->dl_usecnt); + return dli; +} + + +#define free_dl_info(i) \ + call_rcu(&i->dl_rcu, rcu_free_dl_info); + +#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__) + +static inline void __put_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return; + vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&dli->dl_usecnt)) + free_dl_info(dli); +} + + +#define __dlimit_char(d) ((d)?'*':' ') + +static inline int __dl_alloc_space(struct super_block *sb, + xid_t xid, dlsize_t nr, const char *file, int line) +{ + struct dl_info *dli = NULL; + int ret = 0; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_space_used + nr > dli->dl_space_total); + if (!ret) + dli->dl_space_used += nr; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "ALLOC (%p,#%d)%c %lld bytes (%d)", + sb, xid, __dlimit_char(dli), (long long)nr, + ret, file, line); + return ret; +} + +static inline void __dl_free_space(struct super_block *sb, + xid_t xid, dlsize_t nr, const char *_file, int _line) +{ + struct dl_info *dli = NULL; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_space_used > nr) + dli->dl_space_used -= nr; + else + dli->dl_space_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "FREE (%p,#%d)%c %lld bytes", + sb, xid, __dlimit_char(dli), (long long)nr, + _file, _line); +} + +static inline int __dl_alloc_inode(struct super_block *sb, + xid_t xid, const char *_file, int _line) +{ + struct dl_info *dli; + int ret = 0; + + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_inodes_used >= dli->dl_inodes_total); + if (!ret) + dli->dl_inodes_used++; +#if 0 + else + vxwprintk("DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d", + sb, xid, + dli->dl_inodes_used, dli->dl_inodes_total, + file, line); +#endif + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "ALLOC (%p,#%d)%c inode (%d)", + sb, xid, __dlimit_char(dli), ret, _file, _line); + return ret; +} + +static inline void __dl_free_inode(struct super_block *sb, + xid_t xid, const char *_file, int _line) +{ + struct dl_info *dli; + + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_used > 1) + dli->dl_inodes_used--; + else + dli->dl_inodes_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "FREE (%p,#%d)%c inode", + sb, xid, __dlimit_char(dli), _file, _line); +} + +static inline void __dl_adjust_block(struct super_block *sb, xid_t xid, + unsigned int *free_blocks, unsigned int *root_blocks, + const char *_file, int _line) +{ + struct dl_info *dli; + uint64_t broot, bfree; + + dli = locate_dl_info(sb, xid); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + broot = (dli->dl_space_total - + (dli->dl_space_total >> 10) * dli->dl_nrlmult) + >> sb->s_blocksize_bits; + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + spin_unlock(&dli->dl_lock); + + vxlprintk(VXD_CBIT(dlim, 2), + "ADJUST: %lld,%lld on %d,%d [mult=%d]", + (long long)bfree, (long long)broot, + *free_blocks, *root_blocks, dli->dl_nrlmult, + _file, _line); + if (free_blocks) { + if (*free_blocks > bfree) + *free_blocks = bfree; + } + if (root_blocks) { + if (*root_blocks > broot) + *root_blocks = broot; + } + put_dl_info(dli); +} + +#define DLIMIT_ALLOC_SPACE(in, bytes) \ + __dl_alloc_space((in)->i_sb, (in)->i_xid, (dlsize_t)(bytes), \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_SPACE(in, bytes) \ + __dl_free_space((in)->i_sb, (in)->i_xid, (dlsize_t)(bytes), \ + __FILE__, __LINE__ ) + +#define DLIMIT_ALLOC_BLOCK(in, nr) \ + __dl_alloc_space((in)->i_sb, (in)->i_xid, \ + ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_BLOCK(in, nr) \ + __dl_free_space((in)->i_sb, (in)->i_xid, \ + ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \ + __FILE__, __LINE__ ) + + +#define DLIMIT_ALLOC_INODE(in) \ + __dl_alloc_inode((in)->i_sb, (in)->i_xid, __FILE__, __LINE__ ) + +#define DLIMIT_FREE_INODE(in) \ + __dl_free_inode((in)->i_sb, (in)->i_xid, __FILE__, __LINE__ ) + + +#define DLIMIT_ADJUST_BLOCK(sb, xid, fb, rb) \ + __dl_adjust_block(sb, xid, fb, rb, __FILE__, __LINE__ ) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.17.13/include/linux/vs_limit.h linux-2.6.17.13-vs2.0.2.1/include/linux/vs_limit.h --- linux-2.6.17.13/include/linux/vs_limit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vs_limit.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,107 @@ +#ifndef _VX_VS_LIMIT_H +#define _VX_VS_LIMIT_H + +#include "vserver/limit.h" +#include "vserver/debug.h" +#include "vserver/limit_int.h" + + +#define vx_acc_cres(v,d,p,r) \ + __vx_acc_cres(v, r, d, p, __FILE__, __LINE__) + +#define vx_acc_cres_cond(x,d,p,r) \ + __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \ + r, d, p, __FILE__, __LINE__) + + +#define vx_add_cres(v,a,p,r) \ + __vx_add_cres(v, r, a, p, __FILE__, __LINE__) +#define vx_sub_cres(v,a,p,r) vx_add_cres(v,-(a),p,r) + +#define vx_add_cres_cond(x,a,p,r) \ + __vx_add_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \ + r, a, p, __FILE__, __LINE__) +#define vx_sub_cres_cond(x,a,p,r) vx_add_cres_cond(x,-(a),p,r) + + +/* process and file limits */ + +#define vx_nproc_inc(p) \ + vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC) + +#define vx_nproc_dec(p) \ + vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC) + +#define vx_files_inc(f) \ + vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE) + +#define vx_files_dec(f) \ + vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE) + +#define vx_locks_inc(l) \ + vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS) + +#define vx_locks_dec(l) \ + vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS) + +#define vx_openfd_inc(f) \ + vx_acc_cres(current->vx_info, 1, (void *)(long)(f), VLIMIT_OPENFD) + +#define vx_openfd_dec(f) \ + vx_acc_cres(current->vx_info,-1, (void *)(long)(f), VLIMIT_OPENFD) + + +#define vx_cres_avail(v,n,r) \ + __vx_cres_avail(v, r, n, __FILE__, __LINE__) + + +#define vx_nproc_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_NPROC) + +#define vx_files_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_NOFILE) + +#define vx_locks_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_LOCKS) + +#define vx_openfd_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_OPENFD) + + +/* socket limits */ + +#define vx_sock_inc(s) \ + vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK) + +#define vx_sock_dec(s) \ + vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK) + +#define vx_sock_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_NSOCK) + + +/* ipc resource limits */ + +#define vx_ipcmsg_add(v,u,a) \ + vx_add_cres(v, a, u, RLIMIT_MSGQUEUE) + +#define vx_ipcmsg_sub(v,u,a) \ + vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE) + +#define vx_ipcmsg_avail(v,a) \ + vx_cres_avail(v, a, RLIMIT_MSGQUEUE) + + +#define vx_ipcshm_add(v,k,a) \ + vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM) + +#define vx_ipcshm_sub(v,k,a) \ + vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM) + +#define vx_ipcshm_avail(v,a) \ + vx_cres_avail(v, a, VLIMIT_SHMEM) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.17.13/include/linux/vs_memory.h linux-2.6.17.13-vs2.0.2.1/include/linux/vs_memory.h --- linux-2.6.17.13/include/linux/vs_memory.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vs_memory.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,107 @@ +#ifndef _VX_VS_MEMORY_H +#define _VX_VS_MEMORY_H + +#include "vserver/limit.h" +#include "vserver/debug.h" +#include "vserver/limit_int.h" + + +#define __vx_add_long(a,v) (*(v) += (a)) +#define __vx_inc_long(v) (++*(v)) +#define __vx_dec_long(v) (--*(v)) + +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS +#define __vx_add_value(a,v) atomic_long_add(a,v) +#define __vx_inc_value(v) atomic_long_inc(v) +#define __vx_dec_value(v) atomic_long_dec(v) +#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +#define __vx_add_value(a,v) __vx_add_long(a,v) +#define __vx_inc_value(v) __vx_inc_long(v) +#define __vx_dec_value(v) __vx_dec_long(v) +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ + + +#define vx_acc_page(m,d,v,r) do { \ + if ((d) > 0) \ + __vx_inc_long(&(m->v)); \ + else \ + __vx_dec_long(&(m->v)); \ + __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__); \ +} while (0) + +#define vx_acc_page_atomic(m,d,v,r) do { \ + if ((d) > 0) \ + __vx_inc_value(&(m->v)); \ + else \ + __vx_dec_value(&(m->v)); \ + __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__); \ +} while (0) + + +#define vx_acc_pages(m,p,v,r) do { \ + unsigned long __p = (p); \ + __vx_add_long(__p, &(m->v)); \ + __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__); \ +} while (0) + +#define vx_acc_pages_atomic(m,p,v,r) do { \ + unsigned long __p = (p); \ + __vx_add_value(__p, &(m->v)); \ + __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__); \ +} while (0) + + + +#define vx_acc_vmpage(m,d) \ + vx_acc_page(m, d, total_vm, RLIMIT_AS) +#define vx_acc_vmlpage(m,d) \ + vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_file_rsspage(m,d) \ + vx_acc_page_atomic(m, d, _file_rss, RLIMIT_RSS) +#define vx_acc_anon_rsspage(m,d) \ + vx_acc_page_atomic(m, d, _anon_rss, VLIMIT_ANON) + +#define vx_acc_vmpages(m,p) \ + vx_acc_pages(m, p, total_vm, RLIMIT_AS) +#define vx_acc_vmlpages(m,p) \ + vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_file_rsspages(m,p) \ + vx_acc_pages_atomic(m, p, _file_rss, RLIMIT_RSS) +#define vx_acc_anon_rsspages(m,p) \ + vx_acc_pages_atomic(m, p, _anon_rss, VLIMIT_ANON) + +#define vx_pages_add(s,r,p) __vx_add_cres(s, r, p, 0, __FILE__, __LINE__) +#define vx_pages_sub(s,r,p) vx_pages_add(s, r, -(p)) + +#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1) +#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1) +#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p) +#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p)) + +#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1) +#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1) +#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p) +#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p)) + +#define vx_file_rsspages_inc(m) vx_acc_file_rsspage(m, 1) +#define vx_file_rsspages_dec(m) vx_acc_file_rsspage(m,-1) +#define vx_file_rsspages_add(m,p) vx_acc_file_rsspages(m, p) +#define vx_file_rsspages_sub(m,p) vx_acc_file_rsspages(m,-(p)) + +#define vx_anon_rsspages_inc(m) vx_acc_anon_rsspage(m, 1) +#define vx_anon_rsspages_dec(m) vx_acc_anon_rsspage(m,-1) +#define vx_anon_rsspages_add(m,p) vx_acc_anon_rsspages(m, p) +#define vx_anon_rsspages_sub(m,p) vx_acc_anon_rsspages(m,-(p)) + + +#define vx_pages_avail(m,p,r) \ + __vx_cres_avail((m)->mm_vx_info, r, p, __FILE__, __LINE__) + +#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS) +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK) +#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS) +#define vx_anonpages_avail(m,p) vx_pages_avail(m, p, VLIMIT_ANON) + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.17.13/include/linux/vs_network.h linux-2.6.17.13-vs2.0.2.1/include/linux/vs_network.h --- linux-2.6.17.13/include/linux/vs_network.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vs_network.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,215 @@ +#ifndef _NX_VS_NETWORK_H +#define _NX_VS_NETWORK_H + +#include "vserver/network.h" +#include "vserver/debug.h" + + +#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__) + +static inline struct nx_info *__get_nx_info(struct nx_info *nxi, + const char *_file, int _line) +{ + if (!nxi) + return NULL; + + vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + return nxi; +} + + +extern void free_nx_info(struct nx_info *); + +#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__) + +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line) +{ + if (!nxi) + return; + + vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + if (atomic_dec_and_test(&nxi->nx_usecnt)) + free_nx_info(nxi); +} + + +#define init_nx_info(p,i) __init_nx_info(p,i,__FILE__,__LINE__) + +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + if (nxi) { + vxlprintk(VXD_CBIT(nid, 3), + "init_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + } + *nxp = nxi; +} + + +#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__) + +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + struct nx_info *nxo; + + if (!nxi) + return; + + vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + nxo = xchg(nxp, nxi); + BUG_ON(nxo); +} + +#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__) + +static inline void __clr_nx_info(struct nx_info **nxp, + const char *_file, int _line) +{ + struct nx_info *nxo; + + nxo = xchg(nxp, NULL); + if (!nxo) + return; + + vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])", + nxo, nxo?nxo->nx_id:0, + nxo?atomic_read(&nxo->nx_usecnt):0, + _file, _line); + + if (atomic_dec_and_test(&nxo->nx_usecnt)) + free_nx_info(nxo); +} + + +#define claim_nx_info(v,p) __claim_nx_info(v,p,__FILE__,__LINE__) + +static inline void __claim_nx_info(struct nx_info *nxi, + struct task_struct *task, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_tasks):0, + task, _file, _line); + + atomic_inc(&nxi->nx_tasks); +} + + +extern void unhash_nx_info(struct nx_info *); + +#define release_nx_info(v,p) __release_nx_info(v,p,__FILE__,__LINE__) + +static inline void __release_nx_info(struct nx_info *nxi, + struct task_struct *task, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_tasks):0, + task, _file, _line); + + might_sleep(); + + if (atomic_dec_and_test(&nxi->nx_tasks)) + unhash_nx_info(nxi); +} + + +#define task_get_nx_info(i) __task_get_nx_info(i,__FILE__,__LINE__) + +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct nx_info *nxi; + + task_lock(p); + vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)", + p, _file, _line); + nxi = __get_nx_info(p->nx_info, _file, _line); + task_unlock(p); + return nxi; +} + + +#define nx_task_nid(t) ((t)->nid) + +#define nx_current_nid() nx_task_nid(current) + +#define nx_check(c,m) __nx_check(nx_current_nid(),c,m) + +#define nx_weak_check(c,m) ((m) ? nx_check(c,m) : 1) + + +#define __nx_state(v) ((v) ? ((v)->nx_state) : 0) + +#define nx_info_state(v,m) (__nx_state(v) & (m)) + + +#define __nx_flags(v) ((v) ? (v)->nx_flags : 0) + +#define nx_current_flags() __nx_flags(current->nx_info) + +#define nx_info_flags(v,m,f) \ + vx_check_flags(__nx_flags(v),(m),(f)) + +#define task_nx_flags(t,m,f) \ + ((t) && nx_info_flags((t)->nx_info, (m), (f))) + +#define nx_flags(m,f) nx_info_flags(current->nx_info,(m),(f)) + + +/* context caps */ + +#define __nx_ncaps(v) ((v) ? (v)->nx_ncaps : 0) + +#define nx_current_ncaps() __nx_ncaps(current->nx_info) + +#define nx_info_ncaps(v,c) (__nx_ncaps(v) & (c)) + +#define nx_ncaps(c) nx_info_ncaps(current->nx_info,(c)) + + +static inline int addr_in_nx_info(struct nx_info *nxi, uint32_t addr) +{ + int n,i; + + if (!nxi) + return 1; + + n = nxi->nbipv4; + for (i=0; iipv4[i] == addr) + return 1; + } + return 0; +} + +static inline void exit_nx_info(struct task_struct *p) +{ + if (p->nx_info) + release_nx_info(p->nx_info, p); +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.17.13/include/linux/vs_sched.h linux-2.6.17.13-vs2.0.2.1/include/linux/vs_sched.h --- linux-2.6.17.13/include/linux/vs_sched.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vs_sched.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,92 @@ +#ifndef _VX_VS_SCHED_H +#define _VX_VS_SCHED_H + +#include "vserver/sched.h" + + +#define VAVAVOOM_RATIO 50 + +#define MAX_PRIO_BIAS 20 +#define MIN_PRIO_BIAS -20 + + +static inline int vx_tokens_avail(struct vx_info *vxi) +{ + return atomic_read(&vxi->sched.tokens); +} + +static inline void vx_consume_token(struct vx_info *vxi) +{ + atomic_dec(&vxi->sched.tokens); +} + +static inline int vx_need_resched(struct task_struct *p) +{ +#ifdef CONFIG_VSERVER_HARDCPU + struct vx_info *vxi = p->vx_info; +#endif + int slice = --p->time_slice; + +#ifdef CONFIG_VSERVER_HARDCPU + if (vxi) { + int tokens; + + if ((tokens = vx_tokens_avail(vxi)) > 0) + vx_consume_token(vxi); + /* for tokens > 0, one token was consumed */ + if (tokens < 2) + return 1; + } +#endif + return (slice == 0); +} + + +static inline void vx_onhold_inc(struct vx_info *vxi) +{ + int onhold = atomic_read(&vxi->cvirt.nr_onhold); + + atomic_inc(&vxi->cvirt.nr_onhold); + if (!onhold) + vxi->cvirt.onhold_last = jiffies; +} + +static inline void __vx_onhold_update(struct vx_info *vxi) +{ + int cpu = smp_processor_id(); + uint32_t now = jiffies; + uint32_t delta = now - vxi->cvirt.onhold_last; + + vxi->cvirt.onhold_last = now; + vxi->sched.cpu[cpu].hold_ticks += delta; +} + +static inline void vx_onhold_dec(struct vx_info *vxi) +{ + if (atomic_dec_and_test(&vxi->cvirt.nr_onhold)) + __vx_onhold_update(vxi); +} + +static inline void vx_account_user(struct vx_info *vxi, + cputime_t cputime, int nice) +{ + int cpu = smp_processor_id(); + + if (!vxi) + return; + vxi->sched.cpu[cpu].user_ticks += cputime; +} + +static inline void vx_account_system(struct vx_info *vxi, + cputime_t cputime, int idle) +{ + int cpu = smp_processor_id(); + + if (!vxi) + return; + vxi->sched.cpu[cpu].sys_ticks += cputime; +} + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.17.13/include/linux/vs_socket.h linux-2.6.17.13-vs2.0.2.1/include/linux/vs_socket.h --- linux-2.6.17.13/include/linux/vs_socket.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vs_socket.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,57 @@ +#ifndef _VX_VS_SOCKET_H +#define _VX_VS_SOCKET_H + +#include "vserver/debug.h" + + +/* socket accounting */ + +#include + +static inline int vx_sock_type(int family) +{ + int type = 4; + + if (family > 0 && family < 3) + type = family; + else if (family == PF_INET6) + type = 3; + return type; +} + +#define vx_acc_sock(v,f,p,s) \ + __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__) + +static inline void __vx_acc_sock(struct vx_info *vxi, + int family, int pos, int size, char *file, int line) +{ + if (vxi) { + int type = vx_sock_type(family); + + atomic_inc(&vxi->cacct.sock[type][pos].count); + atomic_add(size, &vxi->cacct.sock[type][pos].total); + } +} + +#define vx_sock_recv(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s)) +#define vx_sock_send(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s)) +#define vx_sock_fail(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s)) + + +#define sock_vx_init(s) do { \ + (s)->sk_xid = 0; \ + (s)->sk_vx_info = NULL; \ + } while (0) + +#define sock_nx_init(s) do { \ + (s)->sk_nid = 0; \ + (s)->sk_nx_info = NULL; \ + } while (0) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/context.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/context.h --- linux-2.6.17.13/include/linux/vserver/context.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/context.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,172 @@ +#ifndef _VX_CONTEXT_H +#define _VX_CONTEXT_H + +#include + + +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */ +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */ + +#define VX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +/* context flags */ + +#define VXF_INFO_LOCK 0x00000001 +#define VXF_INFO_SCHED 0x00000002 +#define VXF_INFO_NPROC 0x00000004 +#define VXF_INFO_PRIVATE 0x00000008 + +#define VXF_INFO_INIT 0x00000010 +#define VXF_INFO_HIDE 0x00000020 +#define VXF_INFO_ULIMIT 0x00000040 +#define VXF_INFO_NSPACE 0x00000080 + +#define VXF_SCHED_HARD 0x00000100 +#define VXF_SCHED_PRIO 0x00000200 +#define VXF_SCHED_PAUSE 0x00000400 + +#define VXF_VIRT_MEM 0x00010000 +#define VXF_VIRT_UPTIME 0x00020000 +#define VXF_VIRT_CPU 0x00040000 +#define VXF_VIRT_LOAD 0x00080000 + +#define VXF_HIDE_MOUNT 0x01000000 +#define VXF_HIDE_NETIF 0x02000000 + +#define VXF_STATE_SETUP (1ULL<<32) +#define VXF_STATE_INIT (1ULL<<33) + +#define VXF_SC_HELPER (1ULL<<36) +#define VXF_REBOOT_KILL (1ULL<<37) +#define VXF_PERSISTENT (1ULL<<38) + +#define VXF_FORK_RSS (1ULL<<48) +#define VXF_PROLIFIC (1ULL<<49) + +#define VXF_IGNEG_NICE (1ULL<<52) + +#define VXF_ONE_TIME (0x0003ULL<<32) + +#define VXF_INIT_SET (VXF_STATE_SETUP|VXF_STATE_INIT) + + +/* context migration */ + +#define VXM_SET_INIT 0x00000001 +#define VXM_SET_REAPER 0x00000002 + +/* context caps */ + +#define VXC_CAP_MASK 0x00000000 + +#define VXC_SET_UTSNAME 0x00000001 +#define VXC_SET_RLIMIT 0x00000002 + +#define VXC_RAW_ICMP 0x00000100 +#define VXC_SYSLOG 0x00001000 + +#define VXC_SECURE_MOUNT 0x00010000 +#define VXC_SECURE_REMOUNT 0x00020000 +#define VXC_BINARY_MOUNT 0x00040000 + +#define VXC_QUOTA_CTL 0x00100000 + + +/* context state changes */ + +enum { + VSC_STARTUP = 1, + VSC_SHUTDOWN, + + VSC_NETUP, + VSC_NETDOWN, +}; + + +#ifdef __KERNEL__ + +#include +#include +#include + +#include "limit_def.h" +#include "sched_def.h" +#include "cvirt_def.h" + +struct vx_info { + struct hlist_node vx_hlist; /* linked list of contexts */ + xid_t vx_id; /* context id */ + atomic_t vx_usecnt; /* usage count */ + atomic_t vx_tasks; /* tasks count */ + struct vx_info *vx_parent; /* parent context */ + int vx_state; /* context state */ + + struct namespace *vx_namespace; /* private namespace */ + struct fs_struct *vx_fs; /* private namespace fs */ + uint64_t vx_flags; /* context flags */ + uint64_t vx_bcaps; /* bounding caps (system) */ + uint64_t vx_ccaps; /* context caps (vserver) */ + + struct task_struct *vx_reaper; /* guest reaper process */ + pid_t vx_initpid; /* PID of guest init */ + + struct _vx_limit limit; /* vserver limits */ + struct _vx_sched sched; /* vserver scheduler */ + struct _vx_cvirt cvirt; /* virtual/bias stuff */ + struct _vx_cacct cacct; /* context accounting */ + + wait_queue_head_t vx_wait; /* context exit waitqueue */ + int reboot_cmd; /* last sys_reboot() cmd */ + int exit_code; /* last process exit code */ + + char vx_name[65]; /* vserver name */ +}; + + +/* status flags */ + +#define VXS_HASHED 0x0001 +#define VXS_PAUSED 0x0010 +#define VXS_ONHOLD 0x0020 +#define VXS_SHUTDOWN 0x0100 +#define VXS_HELPER 0x1000 +#define VXS_RELEASED 0x8000 + +/* check conditions */ + +#define VX_ADMIN 0x0001 +#define VX_WATCH 0x0002 +#define VX_HIDE 0x0004 +#define VX_HOSTID 0x0008 + +#define VX_IDENT 0x0010 +#define VX_EQUIV 0x0020 +#define VX_PARENT 0x0040 +#define VX_CHILD 0x0080 + +#define VX_ARG_MASK 0x00F0 + +#define VX_DYNAMIC 0x0100 +#define VX_STATIC 0x0200 + +#define VX_ATR_MASK 0x0F00 + + +extern void claim_vx_info(struct vx_info *, struct task_struct *); +extern void release_vx_info(struct vx_info *, struct task_struct *); + +extern struct vx_info *lookup_vx_info(int); +extern struct vx_info *lookup_or_create_vx_info(int); + +extern int get_xid_list(int, unsigned int *, int); +extern int xid_is_hashed(xid_t); + +extern int vx_migrate_task(struct task_struct *, struct vx_info *); + +extern long vs_state_change(struct vx_info *, unsigned int); + + +#endif /* __KERNEL__ */ +#else /* _VX_CONTEXT_H */ +#warning duplicate inclusion +#endif /* _VX_CONTEXT_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/context_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/context_cmd.h --- linux-2.6.17.13/include/linux/vserver/context_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/context_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,84 @@ +#ifndef _VX_CONTEXT_CMD_H +#define _VX_CONTEXT_CMD_H + + +/* vinfo commands */ + +#define VCMD_task_xid VC_CMD(VINFO, 1, 0) + +#ifdef __KERNEL__ +extern int vc_task_xid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_vx_info VC_CMD(VINFO, 5, 0) + +struct vcmd_vx_info_v0 { + uint32_t xid; + uint32_t initpid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_vx_info(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* context commands */ + +#define VCMD_ctx_create_v0 VC_CMD(VPROC, 1, 0) +#define VCMD_ctx_create VC_CMD(VPROC, 1, 1) + +struct vcmd_ctx_create { + uint64_t flagword; +}; + +#define VCMD_ctx_migrate_v0 VC_CMD(PROCMIG, 1, 0) +#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 1) + +struct vcmd_ctx_migrate { + uint64_t flagword; +}; + +#ifdef __KERNEL__ +extern int vc_ctx_create(uint32_t, void __user *); +extern int vc_ctx_migrate(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* flag commands */ + +#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0) +#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0) + +struct vcmd_ctx_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_cflags(uint32_t, void __user *); +extern int vc_set_cflags(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* context caps commands */ + +#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 0) +#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 0) + +struct vcmd_ctx_caps_v0 { + uint64_t bcaps; + uint64_t ccaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ccaps(uint32_t, void __user *); +extern int vc_set_ccaps(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CONTEXT_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/cvirt.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/cvirt.h --- linux-2.6.17.13/include/linux/vserver/cvirt.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/cvirt.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,26 @@ +#ifndef _VX_CVIRT_H +#define _VX_CVIRT_H + + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *, struct timespec *); + + +struct vx_info; + +void vx_update_load(struct vx_info *); + + +int vx_uts_virt_handler(struct ctl_table *ctl, int write, xid_t xid, + void **datap, size_t *lenp); + + +int vx_do_syslog(int, char __user *, int); + +#endif /* __KERNEL__ */ +#else /* _VX_CVIRT_H */ +#warning duplicate inclusion +#endif /* _VX_CVIRT_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/cvirt_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/cvirt_cmd.h --- linux-2.6.17.13/include/linux/vserver/cvirt_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/cvirt_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,35 @@ +#ifndef _VX_CVIRT_CMD_H +#define _VX_CVIRT_CMD_H + + +/* virtual host info name commands */ + +#define VCMD_set_vhi_name VC_CMD(VHOST, 1, 0) +#define VCMD_get_vhi_name VC_CMD(VHOST, 2, 0) + +struct vcmd_vhi_name_v0 { + uint32_t field; + char name[65]; +}; + + +enum vhi_name_field { + VHIN_CONTEXT=0, + VHIN_SYSNAME, + VHIN_NODENAME, + VHIN_RELEASE, + VHIN_VERSION, + VHIN_MACHINE, + VHIN_DOMAINNAME, +}; + + +#ifdef __KERNEL__ + +#include + +extern int vc_set_vhi_name(uint32_t, void __user *); +extern int vc_get_vhi_name(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CVIRT_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/cvirt_def.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/cvirt_def.h --- linux-2.6.17.13/include/linux/vserver/cvirt_def.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/cvirt_def.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,77 @@ +#ifndef _VX_CVIRT_DEF_H +#define _VX_CVIRT_DEF_H + +#include +#include +#include +#include +#include +#include + + +struct _vx_usage_stat { + uint64_t user; + uint64_t nice; + uint64_t system; + uint64_t softirq; + uint64_t irq; + uint64_t idle; + uint64_t iowait; +}; + +struct _vx_syslog { + wait_queue_head_t log_wait; + spinlock_t logbuf_lock; /* lock for the log buffer */ + + unsigned long log_start; /* next char to be read by syslog() */ + unsigned long con_start; /* next char to be sent to consoles */ + unsigned long log_end; /* most-recently-written-char + 1 */ + unsigned long logged_chars; /* #chars since last read+clear operation */ + + char log_buf[1024]; +}; + + +/* context sub struct */ + +struct _vx_cvirt { + int max_threads; /* maximum allowed threads */ + atomic_t nr_threads; /* number of current threads */ + atomic_t nr_running; /* number of running threads */ + atomic_t nr_uninterruptible; /* number of uninterruptible threads */ + + atomic_t nr_onhold; /* processes on hold */ + uint32_t onhold_last; /* jiffies when put on hold */ + + struct timespec bias_idle; + struct timespec bias_uptime; /* context creation point */ + uint64_t bias_clock; /* offset in clock_t */ + + struct new_utsname utsname; + + spinlock_t load_lock; /* lock for the load averages */ + atomic_t load_updates; /* nr of load updates done so far */ + uint32_t load_last; /* last time load was cacled */ + uint32_t load[3]; /* load averages 1,5,15 */ + + atomic_t total_forks; /* number of forks so far */ + + struct _vx_usage_stat cpustat[NR_CPUS]; + + struct _vx_syslog syslog; +}; + +struct _vx_sock_acc { + atomic_t count; + atomic_t total; +}; + +/* context sub struct */ + +struct _vx_cacct { + unsigned long total_forks; + + struct _vx_sock_acc sock[5][3]; +}; + +#endif /* _VX_CVIRT_DEF_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/debug.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/debug.h --- linux-2.6.17.13/include/linux/vserver/debug.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/debug.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,298 @@ +#ifndef _VX_DEBUG_H +#define _VX_DEBUG_H + + +#define VXD_CBIT(n,m) (vx_debug_ ## n & (1 << (m))) +#define VXD_CMIN(n,m) (vx_debug_ ## n > (m)) +#define VXD_MASK(n,m) (vx_debug_ ## n & (m)) + +#define VXD_QPOS(v,p) (((uint32_t)(v) >> ((p)*8)) & 0xFF) +#define VXD_QUAD(v) VXD_QPOS(v,0), VXD_QPOS(v,1), \ + VXD_QPOS(v,2), VXD_QPOS(v,3) +#define VXF_QUAD "%u.%u.%u.%u" + +#define VXD_DEV(d) (d), (d)->bd_inode->i_ino, \ + imajor((d)->bd_inode), iminor((d)->bd_inode) +#define VXF_DEV "%p[%lu,%d:%d]" + + +#define __FUNC__ __func__ + + +#ifdef CONFIG_VSERVER_DEBUG + +extern unsigned int vx_debug_switch; +extern unsigned int vx_debug_xid; +extern unsigned int vx_debug_nid; +extern unsigned int vx_debug_net; +extern unsigned int vx_debug_limit; +extern unsigned int vx_debug_cres; +extern unsigned int vx_debug_dlim; +extern unsigned int vx_debug_cvirt; +extern unsigned int vx_debug_misc; + + +#define VX_LOGLEVEL "vxD: " +#define VX_WARNLEVEL KERN_WARNING "vxW: " + +#define vxdprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f "\n" , ##x); \ + } while (0) + +#define vxlprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f " @%s:%d\n", x); \ + } while (0) + +#define vxfprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \ + } while (0) + + +#define vxwprintk(c,f,x...) \ + do { \ + if (c) \ + printk(VX_WARNLEVEL f "\n" , ##x); \ + } while (0) + +#define vxd_path(d,m) \ + ({ static char _buffer[PATH_MAX]; \ + d_path((d), (m), _buffer, sizeof(_buffer)); }) + +#define vxd_cond_path(n) \ + ((n) ? vxd_path((n)->dentry, (n)->mnt) : "" ) + +#else /* CONFIG_VSERVER_DEBUG */ + +#define vx_debug_switch 0 +#define vx_debug_xid 0 +#define vx_debug_nid 0 +#define vx_debug_net 0 +#define vx_debug_limit 0 +#define vx_debug_cres 0 +#define vx_debug_dlim 0 +#define vx_debug_cvirt 0 + +#define vxdprintk(x...) do { } while (0) +#define vxlprintk(x...) do { } while (0) +#define vxfprintk(x...) do { } while (0) +#define vxwprintk(x...) do { } while (0) + +#define vxd_path "" +#define vxd_cond_path vxd_path + +#endif /* CONFIG_VSERVER_DEBUG */ + + +/* history stuff */ + +#ifdef CONFIG_VSERVER_HISTORY + + +extern unsigned volatile int vxh_active; + +struct _vxhe_vxi { + struct vx_info *ptr; + unsigned xid; + unsigned usecnt; + unsigned tasks; +}; + +struct _vxhe_set_clr { + void *data; +}; + +struct _vxhe_loc_lookup { + unsigned arg; +}; + +enum { + VXH_UNUSED=0, + VXH_THROW_OOPS=1, + + VXH_GET_VX_INFO, + VXH_PUT_VX_INFO, + VXH_INIT_VX_INFO, + VXH_SET_VX_INFO, + VXH_CLR_VX_INFO, + VXH_CLAIM_VX_INFO, + VXH_RELEASE_VX_INFO, + VXH_ALLOC_VX_INFO, + VXH_DEALLOC_VX_INFO, + VXH_HASH_VX_INFO, + VXH_UNHASH_VX_INFO, + VXH_LOC_VX_INFO, + VXH_LOOKUP_VX_INFO, + VXH_CREATE_VX_INFO, +}; + +struct _vx_hist_entry { + void *loc; + unsigned short seq; + unsigned short type; + struct _vxhe_vxi vxi; + union { + struct _vxhe_set_clr sc; + struct _vxhe_loc_lookup ll; + }; +}; + +struct _vx_hist_entry *vxh_advance(void *loc); + + +static inline +void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi) +{ + entry->vxi.ptr = vxi; + if (vxi) { + entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt); + entry->vxi.tasks = atomic_read(&vxi->vx_tasks); + entry->vxi.xid = vxi->vx_id; + } +} + + +#define __HERE__ current_text_addr() + +#define __VXH_BODY(__type, __data, __here) \ + struct _vx_hist_entry *entry; \ + \ + preempt_disable(); \ + entry = vxh_advance(__here); \ + __data; \ + entry->type = __type; \ + preempt_enable(); + + + /* pass vxi only */ + +#define __VXH_SMPL \ + __vxh_copy_vxi(entry, vxi) + +static inline +void __vxh_smpl(struct vx_info *vxi, int __type, void *__here) +{ + __VXH_BODY(__type, __VXH_SMPL, __here) +} + + /* pass vxi and data (void *) */ + +#define __VXH_DATA \ + __vxh_copy_vxi(entry, vxi); \ + entry->sc.data = data + +static inline +void __vxh_data(struct vx_info *vxi, void *data, + int __type, void *__here) +{ + __VXH_BODY(__type, __VXH_DATA, __here) +} + + /* pass vxi and arg (long) */ + +#define __VXH_LONG \ + __vxh_copy_vxi(entry, vxi); \ + entry->ll.arg = arg + +static inline +void __vxh_long(struct vx_info *vxi, long arg, + int __type, void *__here) +{ + __VXH_BODY(__type, __VXH_LONG, __here) +} + + +static inline +void __vxh_throw_oops(void *__here) +{ + __VXH_BODY(VXH_THROW_OOPS, {}, __here); + /* prevent further acquisition */ + vxh_active = 0; +} + + +#define vxh_throw_oops() __vxh_throw_oops(__HERE__); + +#define __vxh_get_vx_info(v,h) __vxh_smpl(v, VXH_GET_VX_INFO, h); +#define __vxh_put_vx_info(v,h) __vxh_smpl(v, VXH_PUT_VX_INFO, h); + +#define __vxh_init_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_INIT_VX_INFO, h); +#define __vxh_set_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_SET_VX_INFO, h); +#define __vxh_clr_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_CLR_VX_INFO, h); + +#define __vxh_claim_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_CLAIM_VX_INFO, h); +#define __vxh_release_vx_info(v,d,h) \ + __vxh_data(v,d, VXH_RELEASE_VX_INFO, h); + +#define vxh_alloc_vx_info(v) \ + __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__); +#define vxh_dealloc_vx_info(v) \ + __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__); + +#define vxh_hash_vx_info(v) \ + __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__); +#define vxh_unhash_vx_info(v) \ + __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__); + +#define vxh_loc_vx_info(v,l) \ + __vxh_long(v,l, VXH_LOC_VX_INFO, __HERE__); +#define vxh_lookup_vx_info(v,l) \ + __vxh_long(v,l, VXH_LOOKUP_VX_INFO, __HERE__); +#define vxh_create_vx_info(v,l) \ + __vxh_long(v,l, VXH_CREATE_VX_INFO, __HERE__); + +extern void vxh_dump_history(void); + + +#else /* CONFIG_VSERVER_HISTORY */ + +#define __HERE__ 0 + +#define vxh_throw_oops() do { } while (0) + +#define __vxh_get_vx_info(v,h) do { } while (0) +#define __vxh_put_vx_info(v,h) do { } while (0) + +#define __vxh_init_vx_info(v,d,h) do { } while (0) +#define __vxh_set_vx_info(v,d,h) do { } while (0) +#define __vxh_clr_vx_info(v,d,h) do { } while (0) + +#define __vxh_claim_vx_info(v,d,h) do { } while (0) +#define __vxh_release_vx_info(v,d,h) do { } while (0) + +#define vxh_alloc_vx_info(v) do { } while (0) +#define vxh_dealloc_vx_info(v) do { } while (0) + +#define vxh_hash_vx_info(v) do { } while (0) +#define vxh_unhash_vx_info(v) do { } while (0) + +#define vxh_loc_vx_info(a,v) do { } while (0) +#define vxh_lookup_vx_info(a,v) do { } while (0) +#define vxh_create_vx_info(a,v) do { } while (0) + +#define vxh_dump_history() do { } while (0) + + +#endif /* CONFIG_VSERVER_HISTORY */ + + +#ifdef CONFIG_VSERVER_DEBUG +#define vxd_assert_lock(l) assert_spin_locked(l) +#define vxd_assert(c,f,x...) vxlprintk(!(c), \ + "assertion [" f "] failed.", ##x, __FILE__, __LINE__) +#else +#define vxd_assert_lock(l) do { } while (0) +#define vxd_assert(c,f,x...) do { } while (0) +#endif + + +#endif /* _VX_DEBUG_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/debug_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/debug_cmd.h --- linux-2.6.17.13/include/linux/vserver/debug_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/debug_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,14 @@ +#ifndef _VX_DEBUG_CMD_H +#define _VX_DEBUG_CMD_H + + +/* debug commands */ + +#define VCMD_dump_history VC_CMD(DEBUG, 1, 0) + +#ifdef __KERNEL__ + +extern int vc_dump_history(uint32_t); + +#endif /* __KERNEL__ */ +#endif /* _VX_DEBUG_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/dlimit.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/dlimit.h --- linux-2.6.17.13/include/linux/vserver/dlimit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/dlimit.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,53 @@ +#ifndef _VX_DLIMIT_H +#define _VX_DLIMIT_H + +#include "switch.h" + +#define CDLIM_UNSET (0ULL) +#define CDLIM_INFINITY (~0ULL) +#define CDLIM_KEEP (~1ULL) + + +#ifdef __KERNEL__ + +#include + +struct super_block; + +struct dl_info { + struct hlist_node dl_hlist; /* linked list of contexts */ + struct rcu_head dl_rcu; /* the rcu head */ + xid_t dl_xid; /* context id */ + atomic_t dl_usecnt; /* usage count */ + atomic_t dl_refcnt; /* reference count */ + + struct super_block *dl_sb; /* associated superblock */ + + spinlock_t dl_lock; /* protect the values */ + + uint64_t dl_space_used; /* used space in bytes */ + uint64_t dl_space_total; /* maximum space in bytes */ + uint32_t dl_inodes_used; /* used inodes */ + uint32_t dl_inodes_total; /* maximum inodes */ + + unsigned int dl_nrlmult; /* non root limit mult */ +}; + +struct rcu_head; + +extern void rcu_free_dl_info(struct rcu_head *); +extern void unhash_dl_info(struct dl_info *); + +extern struct dl_info *locate_dl_info(struct super_block *, xid_t); + + +struct kstatfs; + +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *); + +typedef uint64_t dlsize_t; + +#endif /* __KERNEL__ */ +#else /* _VX_DLIMIT_H */ +#warning duplicate inclusion +#endif /* _VX_DLIMIT_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/dlimit_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/dlimit_cmd.h --- linux-2.6.17.13/include/linux/vserver/dlimit_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/dlimit_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,69 @@ +#ifndef _VX_DLIMIT_CMD_H +#define _VX_DLIMIT_CMD_H + + +/* dlimit vserver commands */ + +#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0) +#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0) + +#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0) +#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0) + +struct vcmd_ctx_dlimit_base_v0 { + const char __user *name; + uint32_t flags; +}; + +struct vcmd_ctx_dlimit_v0 { + const char __user *name; + uint32_t space_used; /* used space in kbytes */ + uint32_t space_total; /* maximum space in kbytes */ + uint32_t inodes_used; /* used inodes */ + uint32_t inodes_total; /* maximum inodes */ + uint32_t reserved; /* reserved for root in % */ + uint32_t flags; +}; + + +#ifdef __KERNEL__ + +#ifdef CONFIG_COMPAT + +struct vcmd_ctx_dlimit_base_v0_x32 { + compat_uptr_t name_ptr; + uint32_t flags; +}; + +struct vcmd_ctx_dlimit_v0_x32 { + compat_uptr_t name_ptr; + uint32_t space_used; /* used space in kbytes */ + uint32_t space_total; /* maximum space in kbytes */ + uint32_t inodes_used; /* used inodes */ + uint32_t inodes_total; /* maximum inodes */ + uint32_t reserved; /* reserved for root in % */ + uint32_t flags; +}; + +#endif /* CONFIG_COMPAT */ + +#include + +extern int vc_add_dlimit(uint32_t, void __user *); +extern int vc_rem_dlimit(uint32_t, void __user *); + +extern int vc_set_dlimit(uint32_t, void __user *); +extern int vc_get_dlimit(uint32_t, void __user *); + +#ifdef CONFIG_COMPAT + +extern int vc_add_dlimit_x32(uint32_t, void __user *); +extern int vc_rem_dlimit_x32(uint32_t, void __user *); + +extern int vc_set_dlimit_x32(uint32_t, void __user *); +extern int vc_get_dlimit_x32(uint32_t, void __user *); + +#endif /* CONFIG_COMPAT */ + +#endif /* __KERNEL__ */ +#endif /* _VX_DLIMIT_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/inode.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/inode.h --- linux-2.6.17.13/include/linux/vserver/inode.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/inode.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,38 @@ +#ifndef _VX_INODE_H +#define _VX_INODE_H + + +#define IATTR_XID 0x01000000 + +#define IATTR_ADMIN 0x00000001 +#define IATTR_WATCH 0x00000002 +#define IATTR_HIDE 0x00000004 +#define IATTR_FLAGS 0x00000007 + +#define IATTR_BARRIER 0x00010000 +#define IATTR_IUNLINK 0x00020000 +#define IATTR_IMMUTABLE 0x00040000 + +#ifdef __KERNEL__ + + +#ifdef CONFIG_VSERVER_PROC_SECURE +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#else +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#endif + +#define vx_hide_check(c,m) (((m) & IATTR_HIDE) ? vx_check(c,m) : 1) + +#endif /* __KERNEL__ */ + +/* inode ioctls */ + +#define FIOC_GETXFLG _IOR('x', 5, long) +#define FIOC_SETXFLG _IOW('x', 6, long) + +#else /* _VX_INODE_H */ +#warning duplicate inclusion +#endif /* _VX_INODE_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/inode_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/inode_cmd.h --- linux-2.6.17.13/include/linux/vserver/inode_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/inode_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,59 @@ +#ifndef _VX_INODE_CMD_H +#define _VX_INODE_CMD_H + + +/* inode vserver commands */ + +#define VCMD_get_iattr_v0 VC_CMD(INODE, 1, 0) +#define VCMD_set_iattr_v0 VC_CMD(INODE, 2, 0) + +#define VCMD_get_iattr VC_CMD(INODE, 1, 1) +#define VCMD_set_iattr VC_CMD(INODE, 2, 1) + +struct vcmd_ctx_iattr_v0 { + /* device handle in id */ + uint64_t ino; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + +struct vcmd_ctx_iattr_v1 { + const char __user *name; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + + +#ifdef __KERNEL__ + + +#ifdef CONFIG_COMPAT + +struct vcmd_ctx_iattr_v1_x32 { + compat_uptr_t name_ptr; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + +#endif /* CONFIG_COMPAT */ + +#include + +extern int vc_get_iattr_v0(uint32_t, void __user *); +extern int vc_set_iattr_v0(uint32_t, void __user *); + +extern int vc_get_iattr(uint32_t, void __user *); +extern int vc_set_iattr(uint32_t, void __user *); + +#ifdef CONFIG_COMPAT + +extern int vc_get_iattr_x32(uint32_t, void __user *); +extern int vc_set_iattr_x32(uint32_t, void __user *); + +#endif /* CONFIG_COMPAT */ + +#endif /* __KERNEL__ */ +#endif /* _VX_INODE_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/legacy.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/legacy.h --- linux-2.6.17.13/include/linux/vserver/legacy.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/legacy.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,49 @@ +#ifndef _VX_LEGACY_H +#define _VX_LEGACY_H + +#include "switch.h" + + +/* compatibiliy vserver commands */ + +#define VCMD_new_s_context VC_CMD(COMPAT, 1, 1) +#define VCMD_set_ipv4root VC_CMD(COMPAT, 2, 3) + +#define VCMD_create_context VC_CMD(VSETUP, 1, 0) + +/* compatibiliy vserver arguments */ + +struct vcmd_new_s_context_v1 { + uint32_t remove_cap; + uint32_t flags; +}; + +struct vcmd_set_ipv4root_v3 { + /* number of pairs in id */ + uint32_t broadcast; + struct { + uint32_t ip; + uint32_t mask; + } nx_mask_pair[NB_IPV4ROOT]; +}; + + +#define VX_INFO_LOCK 1 /* Can't request a new vx_id */ +#define VX_INFO_NPROC 4 /* Limit number of processes in a context */ +#define VX_INFO_PRIVATE 8 /* Noone can join this security context */ +#define VX_INFO_INIT 16 /* This process wants to become the */ + /* logical process 1 of the security */ + /* context */ +#define VX_INFO_HIDEINFO 32 /* Hide some information in /proc */ +#define VX_INFO_ULIMIT 64 /* Use ulimit of the current process */ + /* to become the global limits */ + /* of the context */ +#define VX_INFO_NAMESPACE 128 /* save private namespace */ + + +#ifdef __KERNEL__ +extern int vc_new_s_context(uint32_t, void __user *); +extern int vc_set_ipv4root(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_LEGACY_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/limit.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/limit.h --- linux-2.6.17.13/include/linux/vserver/limit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/limit.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,20 @@ +#ifndef _VX_LIMIT_H +#define _VX_LIMIT_H + + +#define VLIMIT_NSOCK 16 +#define VLIMIT_OPENFD 17 +#define VLIMIT_ANON 18 +#define VLIMIT_SHMEM 19 + +#ifdef __KERNEL__ + +struct sysinfo; + +void vx_vsi_meminfo(struct sysinfo *); +void vx_vsi_swapinfo(struct sysinfo *); + +#define NUM_LIMITS 24 + +#endif /* __KERNEL__ */ +#endif /* _VX_LIMIT_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/limit_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/limit_cmd.h --- linux-2.6.17.13/include/linux/vserver/limit_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/limit_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,55 @@ +#ifndef _VX_LIMIT_CMD_H +#define _VX_LIMIT_CMD_H + + +/* rlimit vserver commands */ + +#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0) +#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0) +#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0) + +struct vcmd_ctx_rlimit_v0 { + uint32_t id; + uint64_t minimum; + uint64_t softlimit; + uint64_t maximum; +}; + +struct vcmd_ctx_rlimit_mask_v0 { + uint32_t minimum; + uint32_t softlimit; + uint32_t maximum; +}; + +#define CRLIM_UNSET (0ULL) +#define CRLIM_INFINITY (~0ULL) +#define CRLIM_KEEP (~1ULL) + +#ifdef __KERNEL__ + +#ifdef CONFIG_IA32_EMULATION + +struct vcmd_ctx_rlimit_v0_x32 { + uint32_t id; + uint64_t minimum; + uint64_t softlimit; + uint64_t maximum; +} __attribute__ ((aligned (4))); + +#endif /* CONFIG_IA32_EMULATION */ + +#include + +extern int vc_get_rlimit(uint32_t, void __user *); +extern int vc_set_rlimit(uint32_t, void __user *); +extern int vc_get_rlimit_mask(uint32_t, void __user *); + +#ifdef CONFIG_IA32_EMULATION + +extern int vc_get_rlimit_x32(uint32_t, void __user *); +extern int vc_set_rlimit_x32(uint32_t, void __user *); + +#endif /* CONFIG_IA32_EMULATION */ + +#endif /* __KERNEL__ */ +#endif /* _VX_LIMIT_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/limit_def.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/limit_def.h --- linux-2.6.17.13/include/linux/vserver/limit_def.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/limit_def.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,22 @@ +#ifndef _VX_LIMIT_DEF_H +#define _VX_LIMIT_DEF_H + +#include +#include + +#include "limit.h" + + +/* context sub struct */ + +struct _vx_limit { + atomic_t ticks; + + unsigned long rlim[NUM_LIMITS]; /* Context limit */ + unsigned long rmax[NUM_LIMITS]; /* Context maximum */ + atomic_t rcur[NUM_LIMITS]; /* Current value */ + atomic_t lhit[NUM_LIMITS]; /* Limit hits */ +}; + + +#endif /* _VX_LIMIT_DEF_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/limit_int.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/limit_int.h --- linux-2.6.17.13/include/linux/vserver/limit_int.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/limit_int.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,76 @@ +#ifndef _VX_LIMIT_INT_H +#define _VX_LIMIT_INT_H + + +#ifdef __KERNEL__ + +#define VXD_RCRES(r) VXD_CBIT(cres, (r)) +#define VXD_RLIMIT(r) VXD_CBIT(limit, (r)) + +extern const char *vlimit_name[NUM_LIMITS]; + +static inline void __vx_acc_cres(struct vx_info *vxi, + int res, int dir, void *_data, char *_file, int _line) +{ + if (VXD_RCRES(res)) + vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5d%s (%p)", + (vxi ? vxi->vx_id : -1), vlimit_name[res], res, + (vxi ? atomic_read(&vxi->limit.rcur[res]) : 0), + (dir > 0) ? "++" : "--", _data, _file, _line); + if (!vxi) + return; + + if (dir > 0) + atomic_inc(&vxi->limit.rcur[res]); + else + atomic_dec(&vxi->limit.rcur[res]); +} + +static inline void __vx_add_cres(struct vx_info *vxi, + int res, int amount, void *_data, char *_file, int _line) +{ + if (VXD_RCRES(res)) + vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5d += %5d (%p)", + (vxi ? vxi->vx_id : -1), vlimit_name[res], res, + (vxi ? atomic_read(&vxi->limit.rcur[res]) : 0), + amount, _data, _file, _line); + if (amount == 0) + return; + if (!vxi) + return; + atomic_add(amount, &vxi->limit.rcur[res]); +} + +static inline int __vx_cres_avail(struct vx_info *vxi, + int res, int num, char *_file, int _line) +{ + unsigned long value; + + if (VXD_RLIMIT(res)) + vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d", + (vxi ? vxi->vx_id : -1), vlimit_name[res], res, + (vxi ? vxi->limit.rlim[res] : 1), + (vxi ? atomic_read(&vxi->limit.rcur[res]) : 0), + num, _file, _line); + if (num == 0) + return 1; + if (!vxi) + return 1; + + value = atomic_read(&vxi->limit.rcur[res]); + + if (value > vxi->limit.rmax[res]) + vxi->limit.rmax[res] = value; + + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + + if (value + num <= vxi->limit.rlim[res]) + return 1; + + atomic_inc(&vxi->limit.lhit[res]); + return 0; +} + +#endif /* __KERNEL__ */ +#endif /* _VX_LIMIT_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/namespace.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/namespace.h --- linux-2.6.17.13/include/linux/vserver/namespace.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/namespace.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,15 @@ +#ifndef _VX_NAMESPACE_H +#define _VX_NAMESPACE_H + + +#include + +struct vx_info; +struct namespace; +struct fs_struct; + +extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *); + +#else /* _VX_NAMESPACE_H */ +#warning duplicate inclusion +#endif /* _VX_NAMESPACE_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/namespace_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/namespace_cmd.h --- linux-2.6.17.13/include/linux/vserver/namespace_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/namespace_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,17 @@ +#ifndef _VX_NAMESPACE_CMD_H +#define _VX_NAMESPACE_CMD_H + + +#define VCMD_enter_namespace VC_CMD(PROCALT, 1, 0) + +#define VCMD_set_namespace_v0 VC_CMD(PROCALT, 3, 0) +#define VCMD_set_namespace VC_CMD(PROCALT, 3, 1) + + +#ifdef __KERNEL__ + +extern int vc_enter_namespace(uint32_t, void __user *); +extern int vc_set_namespace(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_NAMESPACE_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/network.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/network.h --- linux-2.6.17.13/include/linux/vserver/network.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/network.h 2006-08-25 04:33:07 +0200 @@ -0,0 +1,119 @@ +#ifndef _VX_NETWORK_H +#define _VX_NETWORK_H + +#include + + +#define MAX_N_CONTEXT 65535 /* Arbitrary limit */ + +#define NX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +#define NB_IPV4ROOT 16 + + +/* network flags */ + +#define NXF_STATE_SETUP (1ULL<<32) + +#define NXF_SC_HELPER (1ULL<<36) +#define NXF_PERSISTENT (1ULL<<38) + +#define NXF_ONE_TIME (0x0001ULL<<32) + +#define NXF_INIT_SET (0) + + +/* address types */ + +#define NXA_TYPE_IPV4 1 +#define NXA_TYPE_IPV6 2 + +#define NXA_MOD_BCAST (1<<8) + +#define NXA_TYPE_ANY ((uint16_t)-1) + + +#ifdef __KERNEL__ + +#include +#include +#include +#include + + +struct nx_info { + struct hlist_node nx_hlist; /* linked list of nxinfos */ + nid_t nx_id; /* vnet id */ + atomic_t nx_usecnt; /* usage count */ + atomic_t nx_tasks; /* tasks count */ + int nx_state; /* context state */ + + uint64_t nx_flags; /* network flag word */ + uint64_t nx_ncaps; /* network capabilities */ + + int nbipv4; + __u32 ipv4[NB_IPV4ROOT]; /* Process can only bind to these IPs */ + /* The first one is used to connect */ + /* and for bind any service */ + /* The other must be used explicity */ + __u32 mask[NB_IPV4ROOT]; /* Netmask for each ipv4 */ + /* Used to select the proper source */ + /* address for sockets */ + __u32 v4_bcast; /* Broadcast address to receive UDP */ + + char nx_name[65]; /* network context name */ +}; + + +/* status flags */ + +#define NXS_HASHED 0x0001 +#define NXS_SHUTDOWN 0x0100 +#define NXS_RELEASED 0x8000 + +extern struct nx_info *lookup_nx_info(int); + +extern int get_nid_list(int, unsigned int *, int); +extern int nid_is_hashed(nid_t); + +extern int nx_migrate_task(struct task_struct *, struct nx_info *); + +extern long vs_net_change(struct nx_info *, unsigned int); + +struct in_ifaddr; +struct net_device; + +#ifdef CONFIG_INET +int ifa_in_nx_info(struct in_ifaddr *, struct nx_info *); +int dev_in_nx_info(struct net_device *, struct nx_info *); + +#else /* CONFIG_INET */ +static inline +int ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n) +{ + return 1; +} + +static inline +int dev_in_nx_info(struct net_device *d, struct nx_info *n) +{ + return 1; +} +#endif /* CONFIG_INET */ + +struct sock; + +#ifdef CONFIG_INET +int nx_addr_conflict(struct nx_info *, uint32_t, struct sock *); +#else /* CONFIG_INET */ +static inline +int nx_addr_conflict(struct nx_info *n, uint32_t a, struct sock *s) +{ + return 1; +} +#endif /* CONFIG_INET */ + +#endif /* __KERNEL__ */ +#else /* _VX_NETWORK_H */ +#warning duplicate inclusion +#endif /* _VX_NETWORK_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/network_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/network_cmd.h --- linux-2.6.17.13/include/linux/vserver/network_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/network_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,89 @@ +#ifndef _VX_NETWORK_CMD_H +#define _VX_NETWORK_CMD_H + + +/* vinfo commands */ + +#define VCMD_task_nid VC_CMD(VINFO, 2, 0) + +#ifdef __KERNEL__ +extern int vc_task_nid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_nx_info VC_CMD(VINFO, 6, 0) + +struct vcmd_nx_info_v0 { + uint32_t nid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_nx_info(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_net_create_v0 VC_CMD(VNET, 1, 0) +#define VCMD_net_create VC_CMD(VNET, 1, 1) + +struct vcmd_net_create { + uint64_t flagword; +}; + +#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0) + +#define VCMD_net_add VC_CMD(NETALT, 1, 0) +#define VCMD_net_remove VC_CMD(NETALT, 2, 0) + +struct vcmd_net_addr_v0 { + uint16_t type; + uint16_t count; + uint32_t ip[4]; + uint32_t mask[4]; + /* more to come */ +}; + + +#ifdef __KERNEL__ +extern int vc_net_create(uint32_t, void __user *); +extern int vc_net_migrate(uint32_t, void __user *); + +extern int vc_net_add(uint32_t, void __user *); +extern int vc_net_remove(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* flag commands */ + +#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0) +#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0) + +struct vcmd_net_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_nflags(uint32_t, void __user *); +extern int vc_set_nflags(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + + +/* network caps commands */ + +#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0) +#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0) + +struct vcmd_net_caps_v0 { + uint64_t ncaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ncaps(uint32_t, void __user *); +extern int vc_set_ncaps(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_CONTEXT_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/sched.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/sched.h --- linux-2.6.17.13/include/linux/vserver/sched.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/sched.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,26 @@ +#ifndef _VX_SCHED_H +#define _VX_SCHED_H + + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *, struct timespec *); + + +struct vx_info; + +void vx_update_load(struct vx_info *); + + +struct task_struct; + +int vx_effective_vavavoom(struct vx_info *, int); + +int vx_tokens_recalc(struct vx_info *); + +#endif /* __KERNEL__ */ +#else /* _VX_SCHED_H */ +#warning duplicate inclusion +#endif /* _VX_SCHED_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/sched_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/sched_cmd.h --- linux-2.6.17.13/include/linux/vserver/sched_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/sched_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,48 @@ +#ifndef _VX_SCHED_CMD_H +#define _VX_SCHED_CMD_H + + +/* sched vserver commands */ + +#define VCMD_set_sched_v2 VC_CMD(SCHED, 1, 2) +#define VCMD_set_sched VC_CMD(SCHED, 1, 3) + +struct vcmd_set_sched_v2 { + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + uint64_t cpu_mask; +}; + +struct vcmd_set_sched_v3 { + uint32_t set_mask; + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + int32_t priority_bias; +}; + + +#define VXSM_FILL_RATE 0x0001 +#define VXSM_INTERVAL 0x0002 +#define VXSM_TOKENS 0x0010 +#define VXSM_TOKENS_MIN 0x0020 +#define VXSM_TOKENS_MAX 0x0040 +#define VXSM_PRIO_BIAS 0x0100 + +#define SCHED_KEEP (-2) + +#ifdef __KERNEL__ + +#include + +extern int vc_set_sched_v1(uint32_t, void __user *); +extern int vc_set_sched_v2(uint32_t, void __user *); +extern int vc_set_sched(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_SCHED_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/sched_def.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/sched_def.h --- linux-2.6.17.13/include/linux/vserver/sched_def.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/sched_def.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,38 @@ +#ifndef _VX_SCHED_DEF_H +#define _VX_SCHED_DEF_H + +#include +#include +#include +#include +#include + + +struct _vx_ticks { + uint64_t user_ticks; /* token tick events */ + uint64_t sys_ticks; /* token tick events */ + uint64_t hold_ticks; /* token ticks paused */ + uint64_t unused[5]; /* cacheline ? */ +}; + +/* context sub struct */ + +struct _vx_sched { + atomic_t tokens; /* number of CPU tokens */ + spinlock_t tokens_lock; /* lock for token bucket */ + + int fill_rate; /* Fill rate: add X tokens... */ + int interval; /* Divisor: per Y jiffies */ + int tokens_min; /* Limit: minimum for unhold */ + int tokens_max; /* Limit: no more than N tokens */ + uint32_t jiffies; /* last time accounted */ + + int priority_bias; /* bias offset for priority */ + int vavavoom; /* last calculated vavavoom */ + + cpumask_t cpus_allowed; /* cpu mask for context */ + + struct _vx_ticks cpu[NR_CPUS]; +}; + +#endif /* _VX_SCHED_DEF_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/signal.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/signal.h --- linux-2.6.17.13/include/linux/vserver/signal.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/signal.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,14 @@ +#ifndef _VX_SIGNAL_H +#define _VX_SIGNAL_H + + +#ifdef __KERNEL__ + +struct vx_info; + +int vx_info_kill(struct vx_info *, int, int); + +#endif /* __KERNEL__ */ +#else /* _VX_SIGNAL_H */ +#warning duplicate inclusion +#endif /* _VX_SIGNAL_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/signal_cmd.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/signal_cmd.h --- linux-2.6.17.13/include/linux/vserver/signal_cmd.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/signal_cmd.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,26 @@ +#ifndef _VX_SIGNAL_CMD_H +#define _VX_SIGNAL_CMD_H + + +/* signalling vserver commands */ + +#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0) +#define VCMD_wait_exit VC_CMD(EVENT, 99, 0) + +struct vcmd_ctx_kill_v0 { + int32_t pid; + int32_t sig; +}; + +struct vcmd_wait_exit_v0 { + int32_t reboot_cmd; + int32_t exit_code; +}; + +#ifdef __KERNEL__ + +extern int vc_ctx_kill(uint32_t, void __user *); +extern int vc_wait_exit(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_SIGNAL_CMD_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/switch.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/switch.h --- linux-2.6.17.13/include/linux/vserver/switch.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/switch.h 2006-08-25 05:12:39 +0200 @@ -0,0 +1,98 @@ +#ifndef _VX_SWITCH_H +#define _VX_SWITCH_H + +#include + + +#define VC_CATEGORY(c) (((c) >> 24) & 0x3F) +#define VC_COMMAND(c) (((c) >> 16) & 0xFF) +#define VC_VERSION(c) ((c) & 0xFFF) + +#define VC_CMD(c,i,v) ((((VC_CAT_ ## c) & 0x3F) << 24) \ + | (((i) & 0xFF) << 16) | ((v) & 0xFFF)) + +/* + + Syscall Matrix V2.8 + + |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL| + |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | | + |INFO |SETUP | |MOVE | | | | | | + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICES| | + HOST | 00| 01| 02| 03| 04| 05| | 06| 07| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | | + PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + MEMORY | | | | | | | |SWAP | | + | 16| 17| 18| 19| 20| 21| | 22| 23| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | | + | 24| 25| 26| 27| 28| 29| | 30| 31| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + DISK | | | | |DLIMIT | | |INODE | | + VFS | 32| 33| 34| 35| 36| 37| | 38| 39| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + OTHER | | | | | | | |VINFO | | + | 40| 41| 42| 43| 44| 45| | 46| 47| + =======+=======+=======+=======+=======+=======+=======+ +=======+=======+ + SPECIAL|EVENT | | | |FLAGS | | | | | + | 48| 49| 50| 51| 52| 53| | 54| 55| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT | + | 56| 57| 58| 59| 60|TEST 61| | 62| 63| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + +*/ + +#define VC_CAT_VERSION 0 + +#define VC_CAT_VSETUP 1 +#define VC_CAT_VHOST 2 + +#define VC_CAT_VPROC 9 +#define VC_CAT_PROCALT 10 +#define VC_CAT_PROCMIG 11 +#define VC_CAT_PROCTRL 12 + +#define VC_CAT_SCHED 14 + +#define VC_CAT_VNET 25 +#define VC_CAT_NETALT 26 +#define VC_CAT_NETMIG 27 +#define VC_CAT_NETCTRL 28 + +#define VC_CAT_DLIMIT 36 +#define VC_CAT_INODE 38 + +#define VC_CAT_VINFO 46 +#define VC_CAT_EVENT 48 + +#define VC_CAT_FLAGS 52 +#define VC_CAT_DEBUG 56 +#define VC_CAT_RLIMIT 60 + +#define VC_CAT_SYSTEST 61 +#define VC_CAT_COMPAT 63 + +/* interface version */ + +#define VCI_VERSION 0x00020002 +#define VCI_LEGACY_VERSION 0x000100FF + +/* query version */ + +#define VCMD_get_version VC_CMD(VERSION, 0, 0) + + +#ifdef __KERNEL__ + +#include + + +#else /* __KERNEL__ */ +#define __user +#endif /* __KERNEL__ */ + +#endif /* _VX_SWITCH_H */ diff -NurpP --minimal linux-2.6.17.13/include/linux/vserver/xid.h linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/xid.h --- linux-2.6.17.13/include/linux/vserver/xid.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/include/linux/vserver/xid.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,146 @@ +#ifndef _VX_XID_H +#define _VX_XID_H + +#include + + +#define XID_TAG(in) (IS_TAGXID(in)) + + +#ifdef CONFIG_XID_TAG_NFSD +#define XID_TAG_NFSD 1 +#else +#define XID_TAG_NFSD 0 +#endif + + +#ifdef CONFIG_INOXID_NONE + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) (0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_GID16 + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0x0000FFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? (((gid) >> 16) & 0xFFFF) : 0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) \ + ((tag) ? (((gid) & 0xFFFF) | ((xid) << 16)) : (gid)) + +#endif + + +#ifdef CONFIG_INOXID_UGID24 + +#define MAX_UID 0x00FFFFFF +#define MAX_GID 0x00FFFFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0) + +#define XIDINO_UID(tag, uid, xid) \ + ((tag) ? (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16)) : (uid)) +#define XIDINO_GID(tag, gid, xid) \ + ((tag) ? (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24)) : (gid)) + +#endif + + +#ifdef CONFIG_INOXID_UID16 + +#define MAX_UID 0x0000FFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? (((uid) >> 16) & 0xFFFF) : 0) + +#define XIDINO_UID(tag, uid, xid) \ + ((tag) ? (((uid) & 0xFFFF) | ((xid) << 16)) : (uid)) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_INTERN + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) \ + ((tag) ? (xid) : 0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_RUNTIME + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(tag, uid, gid, xid) (0) + +#define XIDINO_UID(tag, uid, xid) (uid) +#define XIDINO_GID(tag, gid, xid) (gid) + +#endif + + +#ifndef CONFIG_INOXID_NONE +#define vx_current_fsxid(sb) \ + ((sb)->s_flags & MS_TAGXID ? current->xid : 0) +#else +#define vx_current_fsxid(sb) (0) +#endif + +#ifndef CONFIG_INOXID_INTERN +#define XIDINO_XID(tag, xid) (0) +#else +#define XIDINO_XID(tag, xid) ((tag) ? (xid) : 0) +#endif + +#define INOXID_UID(tag, uid, gid) \ + ((tag) ? ((uid) & MAX_UID) : (uid)) +#define INOXID_GID(tag, uid, gid) \ + ((tag) ? ((gid) & MAX_GID) : (gid)) + + +static inline uid_t vx_map_uid(uid_t uid) +{ + if ((uid > MAX_UID) && (uid != -1)) + uid = -2; + return (uid & MAX_UID); +} + +static inline gid_t vx_map_gid(gid_t gid) +{ + if ((gid > MAX_GID) && (gid != -1)) + gid = -2; + return (gid & MAX_GID); +} + + +#ifdef CONFIG_VSERVER_LEGACY +#define FIOC_GETXID _IOR('x', 1, long) +#define FIOC_SETXID _IOW('x', 2, long) +#define FIOC_SETXIDJ _IOW('x', 3, long) +#endif + +int vx_parse_xid(char *string, xid_t *xid, int remove); +void vx_propagate_xid(struct nameidata *nd, struct inode *inode); + +#endif /* _VX_XID_H */ diff -NurpP --minimal linux-2.6.17.13/include/net/af_unix.h linux-2.6.17.13-vs2.0.2.1/include/net/af_unix.h --- linux-2.6.17.13/include/net/af_unix.h 2006-06-18 04:55:27 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/net/af_unix.h 2006-08-17 00:28:21 +0200 @@ -18,9 +18,9 @@ extern spinlock_t unix_table_lock; extern atomic_t unix_tot_inflight; -static inline struct sock *first_unix_socket(int *i) +static inline struct sock *next_unix_socket_table(int *i) { - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { + for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { if (!hlist_empty(&unix_socket_table[*i])) return __sk_head(&unix_socket_table[*i]); } @@ -29,16 +29,19 @@ static inline struct sock *first_unix_so static inline struct sock *next_unix_socket(int *i, struct sock *s) { - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; + do { + if (s) + s = sk_next(s); + if (!s) + s = next_unix_socket_table(i); + } while (s && !vx_check(s->sk_xid, VX_IDENT|VX_WATCH)); + return s; +} + +static inline struct sock *first_unix_socket(int *i) +{ + *i = 0; + return next_unix_socket(i, NULL); } #define forall_unix_sockets(i, s) \ diff -NurpP --minimal linux-2.6.17.13/include/net/inet_hashtables.h linux-2.6.17.13-vs2.0.2.1/include/net/inet_hashtables.h --- linux-2.6.17.13/include/net/inet_hashtables.h 2006-04-09 13:49:58 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/net/inet_hashtables.h 2006-08-17 00:28:21 +0200 @@ -272,6 +272,25 @@ static inline int inet_iif(const struct return ((struct rtable *)skb->dst)->rt_iif; } +/* + * Check if a given address matches for an inet socket + * + * nxi: the socket's nx_info if any + * addr: to be verified address + * saddr: socket addresses + */ +static inline int inet_addr_match ( + struct nx_info *nxi, + uint32_t addr, + uint32_t saddr) +{ + if (addr && (saddr == addr)) + return 1; + if (!saddr) + return addr_in_nx_info(nxi, addr); + return 0; +} + extern struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, const unsigned short hnum, @@ -292,7 +311,7 @@ static inline struct sock * const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && !sk->sk_bound_dev_if) goto sherry_cache; diff -NurpP --minimal linux-2.6.17.13/include/net/inet_sock.h linux-2.6.17.13-vs2.0.2.1/include/net/inet_sock.h --- linux-2.6.17.13/include/net/inet_sock.h 2006-04-09 13:49:58 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/net/inet_sock.h 2006-08-17 00:28:21 +0200 @@ -115,6 +115,7 @@ struct inet_sock { /* Socket demultiplex comparisons on incoming packets. */ __u32 daddr; __u32 rcv_saddr; + __u32 rcv_saddr2; /* Second bound ipv4 addr, for ipv4root */ __u16 dport; __u16 num; __u32 saddr; diff -NurpP --minimal linux-2.6.17.13/include/net/inet_timewait_sock.h linux-2.6.17.13-vs2.0.2.1/include/net/inet_timewait_sock.h --- linux-2.6.17.13/include/net/inet_timewait_sock.h 2006-06-18 04:55:27 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/net/inet_timewait_sock.h 2006-08-17 00:28:21 +0200 @@ -116,6 +116,10 @@ struct inet_timewait_sock { #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot +#define tw_xid __tw_common.skc_xid +#define tw_vx_info __tw_common.skc_vx_info +#define tw_nid __tw_common.skc_nid +#define tw_nx_info __tw_common.skc_nx_info volatile unsigned char tw_substate; /* 3 bits hole, try to pack */ unsigned char tw_rcv_wscale; diff -NurpP --minimal linux-2.6.17.13/include/net/route.h linux-2.6.17.13-vs2.0.2.1/include/net/route.h --- linux-2.6.17.13/include/net/route.h 2006-06-18 04:55:28 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/net/route.h 2006-08-17 00:28:21 +0200 @@ -28,11 +28,14 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -144,6 +147,59 @@ static inline char rt_tos2priority(u8 to return ip_tos2prio[IPTOS_TOS(tos)>>1]; } +#define IPI_LOOPBACK htonl(INADDR_LOOPBACK) + +static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl) +{ + int err; + int i, n = nxi->nbipv4; + u32 ipv4root = nxi->ipv4[0]; + + if (ipv4root == 0) + return 0; + + if (fl->fl4_src == 0) { + if (n > 1) { + u32 foundsrc; + + err = __ip_route_output_key(rp, fl); + if (err) { + fl->fl4_src = ipv4root; + err = __ip_route_output_key(rp, fl); + } + if (err) + return err; + + foundsrc = (*rp)->rt_src; + ip_rt_put(*rp); + + for (i=0; imask[i]; + u32 ipv4 = nxi->ipv4[i]; + u32 net4 = ipv4 & mask; + + if (foundsrc == ipv4) { + fl->fl4_src = ipv4; + break; + } + if (!fl->fl4_src && (foundsrc & mask) == net4) + fl->fl4_src = ipv4; + } + } + if (fl->fl4_src == 0) + fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK) + ? IPI_LOOPBACK : ipv4root; + } else { + for (i=0; iipv4[i] == fl->fl4_src) + break; + } + if (i == n) + return -EPERM; + } + return 0; +} + static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif, u8 protocol, u16 sport, u16 dport, struct sock *sk) @@ -158,7 +214,27 @@ static inline int ip_route_connect(struc .dport = dport } } }; int err; - if (!dst || !src) { + struct nx_info *nx_info = current->nx_info; + + if (sk) + nx_info = sk->sk_nx_info; + vxdprintk(VXD_CBIT(net, 4), + "ip_route_connect(%p) %p,%p;%lx", + sk, nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (nx_info) { + err = ip_find_src(nx_info, rp, &fl); + if (err) + return err; + if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + fl.fl4_dst = nx_info->ipv4[0]; +#ifdef CONFIG_VSERVER_REMAP_SADDR + if (fl.fl4_src == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + fl.fl4_src = nx_info->ipv4[0]; +#endif + } + if (!fl.fl4_dst || !fl.fl4_src) { err = __ip_route_output_key(rp, &fl); if (err) return err; diff -NurpP --minimal linux-2.6.17.13/include/net/sock.h linux-2.6.17.13-vs2.0.2.1/include/net/sock.h --- linux-2.6.17.13/include/net/sock.h 2006-06-18 04:55:28 +0200 +++ linux-2.6.17.13-vs2.0.2.1/include/net/sock.h 2006-08-17 00:28:21 +0200 @@ -115,6 +115,10 @@ struct sock_common { atomic_t skc_refcnt; unsigned int skc_hash; struct proto *skc_prot; + xid_t skc_xid; + struct vx_info *skc_vx_info; + nid_t skc_nid; + struct nx_info *skc_nx_info; }; /** @@ -189,6 +193,10 @@ struct sock { #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot +#define sk_xid __sk_common.skc_xid +#define sk_vx_info __sk_common.skc_vx_info +#define sk_nid __sk_common.skc_nid +#define sk_nx_info __sk_common.skc_nx_info unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; diff -NurpP --minimal linux-2.6.17.13/init/version.c linux-2.6.17.13-vs2.0.2.1/init/version.c --- linux-2.6.17.13/init/version.c 2005-03-02 12:39:08 +0100 +++ linux-2.6.17.13-vs2.0.2.1/init/version.c 2006-08-17 00:28:21 +0200 @@ -31,3 +31,8 @@ EXPORT_SYMBOL(system_utsname); const char linux_banner[] = "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; + +const char vx_linux_banner[] = + "Linux version %s (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") %s\n"; + diff -NurpP --minimal linux-2.6.17.13/ipc/mqueue.c linux-2.6.17.13-vs2.0.2.1/ipc/mqueue.c --- linux-2.6.17.13/ipc/mqueue.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/ipc/mqueue.c 2006-08-17 00:28:21 +0200 @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include "util.h" @@ -149,17 +151,20 @@ static struct inode *mqueue_get_inode(st spin_lock(&mq_lock); if (u->mq_bytes + mq_bytes < u->mq_bytes || u->mq_bytes + mq_bytes > - p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) { + p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur || + !vx_ipcmsg_avail(p->vx_info, mq_bytes)) { spin_unlock(&mq_lock); goto out_inode; } u->mq_bytes += mq_bytes; + vx_ipcmsg_add(p->vx_info, u, mq_bytes); spin_unlock(&mq_lock); info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); if (!info->messages) { spin_lock(&mq_lock); u->mq_bytes -= mq_bytes; + vx_ipcmsg_sub(p->vx_info, u, mq_bytes); spin_unlock(&mq_lock); goto out_inode; } @@ -257,10 +262,14 @@ static void mqueue_delete_inode(struct i (info->attr.mq_maxmsg * info->attr.mq_msgsize)); user = info->user; if (user) { + struct vx_info *vxi = lookup_vx_info(user->xid); + spin_lock(&mq_lock); user->mq_bytes -= mq_bytes; + vx_ipcmsg_sub(vxi, user, mq_bytes); queues_count--; spin_unlock(&mq_lock); + put_vx_info(vxi); free_uid(user); } } @@ -739,7 +748,7 @@ asmlinkage long sys_mq_unlink(const char if (inode) atomic_inc(&inode->i_count); - err = vfs_unlink(dentry->d_parent->d_inode, dentry); + err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL); out_err: dput(dentry); diff -NurpP --minimal linux-2.6.17.13/ipc/msg.c linux-2.6.17.13-vs2.0.2.1/ipc/msg.c --- linux-2.6.17.13/ipc/msg.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/ipc/msg.c 2006-08-17 00:28:21 +0200 @@ -105,6 +105,7 @@ static int newque (key_t key, int msgflg msq->q_perm.mode = (msgflg & S_IRWXUGO); msq->q_perm.key = key; + msq->q_perm.xid = vx_current_xid(); msq->q_perm.security = NULL; retval = security_msg_queue_alloc(msq); @@ -826,6 +827,9 @@ static int sysvipc_msg_proc_show(struct { struct msg_queue *msq = it; + if (!vx_check(msq->q_perm.xid, VX_IDENT)) + return 0; + return seq_printf(s, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", msq->q_perm.key, diff -NurpP --minimal linux-2.6.17.13/ipc/sem.c linux-2.6.17.13-vs2.0.2.1/ipc/sem.c --- linux-2.6.17.13/ipc/sem.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/ipc/sem.c 2006-08-17 00:28:21 +0200 @@ -184,6 +184,7 @@ static int newary (key_t key, int nsems, sma->sem_perm.mode = (semflg & S_IRWXUGO); sma->sem_perm.key = key; + sma->sem_perm.xid = vx_current_xid(); sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); @@ -1345,6 +1346,9 @@ static int sysvipc_sem_proc_show(struct { struct sem_array *sma = it; + if (!vx_check(sma->sem_perm.xid, VX_IDENT)) + return 0; + return seq_printf(s, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", sma->sem_perm.key, diff -NurpP --minimal linux-2.6.17.13/ipc/shm.c linux-2.6.17.13-vs2.0.2.1/ipc/shm.c --- linux-2.6.17.13/ipc/shm.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/ipc/shm.c 2006-08-17 00:28:21 +0200 @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include @@ -117,7 +119,12 @@ static void shm_open (struct vm_area_str */ static void shm_destroy (struct shmid_kernel *shp) { - shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid); + int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + + vx_ipcshm_sub(vxi, shp, numpages); + shm_tot -= numpages; + shm_rmid (shp->id); shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) @@ -127,6 +134,7 @@ static void shm_destroy (struct shmid_ke shp->mlock_user); fput (shp->shm_file); security_shm_free(shp); + put_vx_info(vxi); ipc_rcu_putref(shp); } @@ -203,12 +211,15 @@ static int newseg (key_t key, int shmflg if (shm_tot + numpages >= shm_ctlall) return -ENOSPC; + if (!vx_ipcshm_avail(current->vx_info, numpages)) + return -ENOSPC; shp = ipc_rcu_alloc(sizeof(*shp)); if (!shp) return -ENOMEM; shp->shm_perm.key = key; + shp->shm_perm.xid = vx_current_xid(); shp->shm_perm.mode = (shmflg & S_IRWXUGO); shp->mlock_user = NULL; @@ -259,6 +270,7 @@ static int newseg (key_t key, int shmflg file->f_op = &shm_file_operations; shm_tot += numpages; + vx_ipcshm_add(current->vx_info, key, numpages); shm_unlock(shp); return shp->id; @@ -914,6 +926,9 @@ static int sysvipc_shm_proc_show(struct #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" + if (!vx_check(shp->shm_perm.xid, VX_IDENT)) + return 0; + if (sizeof(size_t) <= sizeof(int)) format = SMALL_STRING; else diff -NurpP --minimal linux-2.6.17.13/ipc/util.c linux-2.6.17.13-vs2.0.2.1/ipc/util.c --- linux-2.6.17.13/ipc/util.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/ipc/util.c 2006-08-17 00:28:21 +0200 @@ -158,7 +158,9 @@ int ipc_findkey(struct ipc_ids* ids, key */ for (id = 0; id <= max_id; id++) { p = ids->entries->p[id]; - if(p==NULL) + if (p==NULL) + continue; + if (!vx_check(p->xid, VX_IDENT)) continue; if (key == p->key) return id; @@ -471,6 +473,9 @@ int ipcperms (struct kern_ipc_perm *ipcp if (unlikely((err = audit_ipc_obj(ipcp)))) return err; + + if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */ + return -1; requested_mode = (flag >> 6) | (flag >> 3) | flag; granted_mode = ipcp->mode; if (current->euid == ipcp->cuid || current->euid == ipcp->uid) diff -NurpP --minimal linux-2.6.17.13/kernel/Makefile linux-2.6.17.13-vs2.0.2.1/kernel/Makefile --- linux-2.6.17.13/kernel/Makefile 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/Makefile 2006-08-17 00:28:21 +0200 @@ -10,6 +10,8 @@ obj-y = sched.o fork.o exec_domain.o kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o +obj-y += vserver/ + obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o obj-$(CONFIG_FUTEX) += futex.o ifeq ($(CONFIG_COMPAT),y) diff -NurpP --minimal linux-2.6.17.13/kernel/capability.c linux-2.6.17.13-vs2.0.2.1/kernel/capability.c --- linux-2.6.17.13/kernel/capability.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/capability.c 2006-08-17 00:28:21 +0200 @@ -12,6 +12,7 @@ #include #include #include +#include #include unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ @@ -246,6 +247,9 @@ EXPORT_SYMBOL(__capable); int capable(int cap) { + /* here for now so we don't require task locking */ + if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap)) + return 0; return __capable(current, cap); } EXPORT_SYMBOL(capable); diff -NurpP --minimal linux-2.6.17.13/kernel/cpuset.c linux-2.6.17.13-vs2.0.2.1/kernel/cpuset.c --- linux-2.6.17.13/kernel/cpuset.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/cpuset.c 2006-08-17 00:28:21 +0200 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/kernel/exit.c linux-2.6.17.13-vs2.0.2.1/kernel/exit.c --- linux-2.6.17.13/kernel/exit.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/exit.c 2006-09-08 05:06:19 +0200 @@ -36,6 +36,10 @@ #include #include #include /* for audit_free() */ +#include +#include +#include +#include #include #include @@ -449,9 +453,11 @@ static void close_files(struct files_str struct file * file = xchg(&fdt->fd[i], NULL); if (file) filp_close(file, files); + vx_openfd_dec(i); } i++; set >>= 1; + cond_resched(); } } } @@ -591,6 +597,11 @@ static void exit_mm(struct task_struct * static inline void choose_new_parent(task_t *p, task_t *reaper) { + /* check for reaper context */ + vxwprintk((p->xid != reaper->xid) && (reaper != child_reaper), + "rogue reaper: %p[%d,#%u] <> %p[%d,#%u]", + p, p->pid, p->xid, reaper, reaper->pid, reaper->xid); + /* * Make sure we're not reparenting to ourselves and that * the parent is not a zombie. @@ -672,7 +683,7 @@ static void forget_original_parent(struc do { reaper = next_thread(reaper); if (reaper == father) { - reaper = child_reaper; + reaper = vx_child_reaper(father); break; } } while (reaper->exit_state); @@ -696,7 +707,7 @@ static void forget_original_parent(struc if (father == p->real_parent) { /* reparent with a reaper, real father it's us */ - choose_new_parent(p, reaper); + choose_new_parent(p, vx_child_reaper(p)); reparent_thread(p, father, 0); } else { /* reparent ptraced task to its real parent */ @@ -911,6 +922,8 @@ fastcall NORET_TYPE void do_exit(long co __exit_files(tsk); __exit_fs(tsk); exit_namespace(tsk); + exit_vx_info(tsk, code); + exit_nx_info(tsk); exit_thread(); cpuset_exit(tsk); exit_keys(tsk); diff -NurpP --minimal linux-2.6.17.13/kernel/fork.c linux-2.6.17.13-vs2.0.2.1/kernel/fork.c --- linux-2.6.17.13/kernel/fork.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/fork.c 2006-08-17 00:28:21 +0200 @@ -44,6 +44,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -104,6 +108,8 @@ static kmem_cache_t *mm_cachep; void free_task(struct task_struct *tsk) { free_thread_info(tsk->thread_info); + clr_vx_info(&tsk->vx_info); + clr_nx_info(&tsk->nx_info); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -201,6 +207,8 @@ static inline int dup_mmap(struct mm_str mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; + __set_mm_counter(mm, file_rss, 0); + __set_mm_counter(mm, anon_rss, 0); cpus_clear(mm->cpu_vm_mask); mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; @@ -212,7 +220,7 @@ static inline int dup_mmap(struct mm_str if (mpnt->vm_flags & VM_DONTCOPY) { long pages = vma_pages(mpnt); - mm->total_vm -= pages; + vx_vmpages_sub(mm, pages); vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, -pages); continue; @@ -319,8 +327,6 @@ static struct mm_struct * mm_init(struct INIT_LIST_HEAD(&mm->mmlist); mm->core_waiters = 0; mm->nr_ptes = 0; - set_mm_counter(mm, file_rss, 0); - set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); rwlock_init(&mm->ioctx_list_lock); mm->ioctx_list = NULL; @@ -329,6 +335,7 @@ static struct mm_struct * mm_init(struct if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + set_vx_info(&mm->mm_vx_info, current->vx_info); return mm; } free_mm(mm); @@ -360,6 +367,7 @@ void fastcall __mmdrop(struct mm_struct BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); + clr_vx_info(&mm->mm_vx_info); free_mm(mm); } @@ -463,6 +471,7 @@ static struct mm_struct *dup_mm(struct t goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); + mm->mm_vx_info = NULL; if (!mm_init(mm)) goto fail_nomem; @@ -490,6 +499,7 @@ fail_nocontext: * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() */ + clr_vx_info(&mm->mm_vx_info); mm_free_pgd(mm); free_mm(mm); return NULL; @@ -683,6 +693,8 @@ static struct files_struct *dup_fd(struc struct file *f = *old_fds++; if (f) { get_file(f); + /* FIXME: sum it first for check and performance */ + vx_openfd_inc(open_files - i); } else { /* * The fd may be claimed in the fd bitmap but not yet @@ -927,6 +939,8 @@ static task_t *copy_process(unsigned lon { int retval; struct task_struct *p = NULL; + struct vx_info *vxi; + struct nx_info *nxi; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -955,12 +969,30 @@ static task_t *copy_process(unsigned lon if (!p) goto fork_out; + init_vx_info(&p->vx_info, current->vx_info); + init_nx_info(&p->nx_info, current->nx_info); + + /* check vserver memory */ + if (p->mm && !(clone_flags & CLONE_VM)) { + if (vx_vmpages_avail(p->mm, p->mm->total_vm)) + vx_pages_add(p->vx_info, RLIMIT_AS, p->mm->total_vm); + else + goto bad_fork_free; + } + if (p->mm && vx_flags(VXF_FORK_RSS, 0)) { + if (!vx_rsspages_avail(p->mm, get_mm_counter(p->mm, file_rss))) + goto bad_fork_cleanup_vm; + } + retval = -EAGAIN; + if (!vx_nproc_avail(1)) + goto bad_fork_cleanup_vm; + if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) - goto bad_fork_free; + goto bad_fork_cleanup_vm; } atomic_inc(&p->user->__count); @@ -1212,6 +1244,18 @@ static task_t *copy_process(unsigned lon total_forks++; spin_unlock(¤t->sighand->siglock); + + /* p is copy of current */ + vxi = p->vx_info; + if (vxi) { + claim_vx_info(vxi, p); + atomic_inc(&vxi->cvirt.nr_threads); + atomic_inc(&vxi->cvirt.total_forks); + vx_nproc_inc(p); + } + nxi = p->nx_info; + if (nxi) + claim_nx_info(nxi, p); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); return p; @@ -1252,6 +1296,9 @@ bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); +bad_fork_cleanup_vm: + if (p->mm && !(clone_flags & CLONE_VM)) + vx_pages_sub(p->vx_info, RLIMIT_AS, p->mm->total_vm); bad_fork_free: free_task(p); fork_out: diff -NurpP --minimal linux-2.6.17.13/kernel/futex.c linux-2.6.17.13-vs2.0.2.1/kernel/futex.c --- linux-2.6.17.13/kernel/futex.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/futex.c 2006-09-12 17:57:43 +0200 @@ -44,6 +44,7 @@ #include #include #include +#include #include #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) diff -NurpP --minimal linux-2.6.17.13/kernel/futex_compat.c linux-2.6.17.13-vs2.0.2.1/kernel/futex_compat.c --- linux-2.6.17.13/kernel/futex_compat.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/futex_compat.c 2006-08-17 00:28:21 +0200 @@ -9,6 +9,7 @@ #include #include #include +#include #include diff -NurpP --minimal linux-2.6.17.13/kernel/kthread.c linux-2.6.17.13-vs2.0.2.1/kernel/kthread.c --- linux-2.6.17.13/kernel/kthread.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/kthread.c 2006-08-17 00:28:21 +0200 @@ -116,7 +116,7 @@ static void keventd_create_kthread(void } else { wait_for_completion(&create->started); read_lock(&tasklist_lock); - create->result = find_task_by_pid(pid); + create->result = find_task_by_real_pid(pid); read_unlock(&tasklist_lock); } complete(&create->done); diff -NurpP --minimal linux-2.6.17.13/kernel/posix-cpu-timers.c linux-2.6.17.13-vs2.0.2.1/kernel/posix-cpu-timers.c --- linux-2.6.17.13/kernel/posix-cpu-timers.c 2006-06-18 04:55:30 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/posix-cpu-timers.c 2006-08-17 00:28:21 +0200 @@ -6,6 +6,7 @@ #include #include #include +#include static int check_clock(const clockid_t which_clock) { diff -NurpP --minimal linux-2.6.17.13/kernel/posix-timers.c linux-2.6.17.13-vs2.0.2.1/kernel/posix-timers.c --- linux-2.6.17.13/kernel/posix-timers.c 2006-06-18 04:55:31 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/posix-timers.c 2006-08-17 00:28:21 +0200 @@ -372,7 +372,7 @@ static struct task_struct * good_sigeven struct task_struct *rtn = current->group_leader; if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) || + (!(rtn = find_task_by_real_pid(event->sigev_notify_thread_id)) || rtn->tgid != current->tgid || (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) return NULL; diff -NurpP --minimal linux-2.6.17.13/kernel/printk.c linux-2.6.17.13-vs2.0.2.1/kernel/printk.c --- linux-2.6.17.13/kernel/printk.c 2006-06-18 04:55:31 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/printk.c 2006-08-17 00:28:21 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -183,18 +184,13 @@ int do_syslog(int type, char __user *buf unsigned long i, j, limit, count; int do_clear = 0; char c; - int error = 0; + int error; error = security_syslog(type); if (error) return error; - switch (type) { - case 0: /* Close log */ - break; - case 1: /* Open log */ - break; - case 2: /* Read from log */ + if ((type >= 2) && (type <= 4)) { error = -EINVAL; if (!buf || len < 0) goto out; @@ -205,6 +201,16 @@ int do_syslog(int type, char __user *buf error = -EFAULT; goto out; } + } + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return vx_do_syslog(type, buf, len); + + switch (type) { + case 0: /* Close log */ + break; + case 1: /* Open log */ + break; + case 2: /* Read from log */ error = wait_event_interruptible(log_wait, (log_start - log_end)); if (error) @@ -229,16 +235,6 @@ int do_syslog(int type, char __user *buf do_clear = 1; /* FALL THRU */ case 3: /* Read last kernel messages */ - error = -EINVAL; - if (!buf || len < 0) - goto out; - error = 0; - if (!len) - goto out; - if (!access_ok(VERIFY_WRITE, buf, len)) { - error = -EFAULT; - goto out; - } count = len; if (count > log_buf_len) count = log_buf_len; diff -NurpP --minimal linux-2.6.17.13/kernel/ptrace.c linux-2.6.17.13-vs2.0.2.1/kernel/ptrace.c --- linux-2.6.17.13/kernel/ptrace.c 2006-06-18 04:55:31 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/ptrace.c 2006-08-17 00:28:21 +0200 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -506,6 +507,10 @@ asmlinkage long sys_ptrace(long request, goto out; } + ret = -EPERM; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_put_task_struct; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_put_task_struct; diff -NurpP --minimal linux-2.6.17.13/kernel/sched.c linux-2.6.17.13-vs2.0.2.1/kernel/sched.c --- linux-2.6.17.13/kernel/sched.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/sched.c 2006-08-17 00:28:21 +0200 @@ -53,6 +53,9 @@ #include #include +#include +#include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -241,6 +244,10 @@ struct runqueue { struct list_head migration_queue; int cpu; #endif +#ifdef CONFIG_VSERVER_HARDCPU + struct list_head hold_queue; + int idle_tokens; +#endif #ifdef CONFIG_SCHEDSTATS /* latency stats */ @@ -601,6 +608,7 @@ static inline void sched_info_switch(tas */ static void dequeue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); array->nr_active--; list_del(&p->run_list); if (list_empty(array->queue + p->prio)) @@ -609,6 +617,7 @@ static void dequeue_task(struct task_str static void enqueue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); sched_info_queued(p); list_add_tail(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); @@ -622,11 +631,13 @@ static void enqueue_task(struct task_str */ static void requeue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); list_move_tail(&p->run_list, array->queue + p->prio); } static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); list_add(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); array->nr_active++; @@ -650,6 +661,7 @@ static inline void enqueue_task_head(str static int effective_prio(task_t *p) { int bonus, prio; + struct vx_info *vxi; if (rt_task(p)) return p->prio; @@ -657,6 +669,11 @@ static int effective_prio(task_t *p) bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; prio = p->static_prio - bonus; + + if ((vxi = p->vx_info) && + vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) + prio += vx_effective_vavavoom(vxi, MAX_USER_PRIO); + if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) @@ -797,19 +814,77 @@ static void activate_task(task_t *p, run } p->timestamp = now; + vx_activate_task(p); __activate_task(p, rq); } /* * deactivate_task - remove a task from the runqueue. */ -static void deactivate_task(struct task_struct *p, runqueue_t *rq) +static void __deactivate_task(struct task_struct *p, runqueue_t *rq) { rq->nr_running--; dequeue_task(p, p->array); p->array = NULL; } +static inline +void deactivate_task(struct task_struct *p, runqueue_t *rq) +{ + vx_deactivate_task(p); + __deactivate_task(p, rq); +} + + +#ifdef CONFIG_VSERVER_HARDCPU +/* + * vx_hold_task - put a task on the hold queue + */ +static inline +void vx_hold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + __deactivate_task(p, rq); + p->state |= TASK_ONHOLD; + /* a new one on hold */ + vx_onhold_inc(vxi); + list_add_tail(&p->run_list, &rq->hold_queue); +} + +/* + * vx_unhold_task - put a task back to the runqueue + */ +static inline +void vx_unhold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + list_del(&p->run_list); + /* one less waiting */ + vx_onhold_dec(vxi); + p->state &= ~TASK_ONHOLD; + enqueue_task(p, rq->expired); + rq->nr_running++; + + if (p->static_prio < rq->best_expired_prio) + rq->best_expired_prio = p->static_prio; +} +#else +static inline +void vx_hold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + return; +} + +static inline +void vx_unhold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + return; +} +#endif /* CONFIG_VSERVER_HARDCPU */ + + /* * resched_task - mark a task 'to be rescheduled now'. * @@ -1173,6 +1248,12 @@ static int try_to_wake_up(task_t *p, uns rq = task_rq_lock(p, &flags); old_state = p->state; + + /* we need to unhold suspended tasks */ + if (old_state & TASK_ONHOLD) { + vx_unhold_task(p->vx_info, p, rq); + old_state = p->state; + } if (!(old_state & state)) goto out; @@ -1276,6 +1357,7 @@ out_activate: #endif /* CONFIG_SMP */ if (old_state == TASK_UNINTERRUPTIBLE) { rq->nr_uninterruptible--; + vx_uninterruptible_dec(p); /* * Tasks on involuntary sleep don't earn * sleep_avg beyond just interactive state. @@ -1416,6 +1498,7 @@ void fastcall wake_up_new_task(task_t *p p->prio = effective_prio(p); + vx_activate_task(p); if (likely(cpu == this_cpu)) { if (!(clone_flags & CLONE_VM)) { /* @@ -1427,6 +1510,7 @@ void fastcall wake_up_new_task(task_t *p __activate_task(p, rq); else { p->prio = current->prio; + BUG_ON(p->state & TASK_ONHOLD); list_add_tail(&p->run_list, ¤t->run_list); p->array = current->array; p->array->nr_active++; @@ -2514,13 +2598,16 @@ unsigned long long current_sched_time(co void account_user_time(struct task_struct *p, cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + struct vx_info *vxi = p->vx_info; /* p is _always_ current */ cputime64_t tmp; + int nice = (TASK_NICE(p) > 0); p->utime = cputime_add(p->utime, cputime); + vx_account_user(vxi, cputime, nice); /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (nice) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); @@ -2536,10 +2623,12 @@ void account_system_time(struct task_str cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + struct vx_info *vxi = p->vx_info; /* p is _always_ current */ runqueue_t *rq = this_rq(); cputime64_t tmp; p->stime = cputime_add(p->stime, cputime); + vx_account_system(vxi, cputime, (p == rq->idle)); /* Add system time to cpustat. */ tmp = cputime_to_cputime64(cputime); @@ -2599,6 +2688,10 @@ void scheduler_tick(void) if (p == rq->idle) { if (wake_priority_sleeper(rq)) goto out; +#ifdef CONFIG_VSERVER_HARDCPU_IDLE + if (!--rq->idle_tokens && !list_empty(&rq->hold_queue)) + set_need_resched(); +#endif rebalance_tick(cpu, rq, SCHED_IDLE); return; } @@ -2631,7 +2724,7 @@ void scheduler_tick(void) } goto out_unlock; } - if (!--p->time_slice) { + if (vx_need_resched(p)) { dequeue_task(p, rq->active); set_tsk_need_resched(p); p->prio = effective_prio(p); @@ -2902,6 +2995,10 @@ asmlinkage void __sched schedule(void) unsigned long long now; unsigned long run_time; int cpu, idx, new_prio; + struct vx_info *vxi; +#ifdef CONFIG_VSERVER_HARDCPU + int maxidle = -HZ; +#endif /* * Test if we are atomic. Since do_exit() needs to call into @@ -2959,12 +3056,41 @@ need_resched_nonpreemptible: unlikely(signal_pending(prev)))) prev->state = TASK_RUNNING; else { - if (prev->state == TASK_UNINTERRUPTIBLE) + if (prev->state == TASK_UNINTERRUPTIBLE) { rq->nr_uninterruptible++; + vx_uninterruptible_inc(prev); + } deactivate_task(prev, rq); } } +#ifdef CONFIG_VSERVER_HARDCPU + if (!list_empty(&rq->hold_queue)) { + struct list_head *l, *n; + int ret; + + vxi = NULL; + list_for_each_safe(l, n, &rq->hold_queue) { + next = list_entry(l, task_t, run_list); + if (vxi == next->vx_info) + continue; + + vxi = next->vx_info; + ret = vx_tokens_recalc(vxi); + + if (ret > 0) { + vx_unhold_task(vxi, next, rq); + break; + } + if ((ret < 0) && (maxidle < ret)) + maxidle = ret; + } + } + rq->idle_tokens = -maxidle; + +pick_next: +#endif + cpu = smp_processor_id(); if (unlikely(!rq->nr_running)) { go_idle: @@ -3012,6 +3138,22 @@ go_idle: queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); + vxi = next->vx_info; +#ifdef CONFIG_VSERVER_HARDCPU + if (vx_info_flags(vxi, VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) { + int ret = vx_tokens_recalc(vxi); + + if (unlikely(ret <= 0)) { + if (ret && (rq->idle_tokens > -ret)) + rq->idle_tokens = -ret; + vx_hold_task(vxi, next, rq); + goto pick_next; + } + } else /* well, looks ugly but not as ugly as the ifdef-ed version */ +#endif + if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) + vx_tokens_recalc(vxi); + if (!rt_task(next) && interactive_sleep(next->sleep_type)) { unsigned long long delta = now - next->timestamp; if (unlikely((long long)(now - next->timestamp) < 0)) @@ -3566,7 +3708,7 @@ asmlinkage long sys_nice(int increment) nice = 19; if (increment < 0 && !can_nice(current, nice)) - return -EPERM; + return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM; retval = security_task_setnice(current, nice); if (retval) @@ -3726,6 +3868,7 @@ recheck: oldprio = p->prio; __setscheduler(p, policy, param->sched_priority); if (array) { + vx_activate_task(p); __activate_task(p, rq); /* * Reschedule if we are currently running on this runqueue and @@ -6130,6 +6273,9 @@ void __init sched_init(void) rq->cpu = i; #endif atomic_set(&rq->nr_iowait, 0); +#ifdef CONFIG_VSERVER_HARDCPU + INIT_LIST_HEAD(&rq->hold_queue); +#endif for (j = 0; j < 2; j++) { array = rq->arrays + j; @@ -6199,6 +6345,7 @@ void normalize_rt_tasks(void) deactivate_task(p, task_rq(p)); __setscheduler(p, SCHED_NORMAL, 0); if (array) { + vx_activate_task(p); __activate_task(p, task_rq(p)); resched_task(rq->curr); } diff -NurpP --minimal linux-2.6.17.13/kernel/signal.c linux-2.6.17.13-vs2.0.2.1/kernel/signal.c --- linux-2.6.17.13/kernel/signal.c 2006-06-18 04:55:34 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/signal.c 2006-08-17 00:28:21 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -572,18 +573,27 @@ static int rm_from_queue(unsigned long m static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { + int user; int error = -EINVAL; + if (!valid_signal(sig)) return error; + + user = ((info == SEND_SIG_NOINFO) || + (!is_si_special(info) && SI_FROMUSER(info))); + error = -EPERM; - if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && ((sig != SIGCONT) || + if (user && ((sig != SIGCONT) || (current->signal->session != t->signal->session)) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) && !capable(CAP_KILL)) return error; + error = -ESRCH; + if (user && !vx_check(vx_task_xid(t), VX_ADMIN|VX_IDENT)) + return error; + error = security_task_kill(t, info, sig); if (!error) audit_signal_info(sig, t); /* Let audit system see the signal */ @@ -1803,6 +1813,11 @@ relock: if (current == child_reaper) continue; + /* virtual init is protected against user signals */ + if ((info->si_code == SI_USER) && + vx_current_initpid(current->pid)) + continue; + if (sig_kernel_stop(signr)) { /* * The default action is to stop all threads in diff -NurpP --minimal linux-2.6.17.13/kernel/sys.c linux-2.6.17.13-vs2.0.2.1/kernel/sys.c --- linux-2.6.17.13/kernel/sys.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/sys.c 2006-08-17 00:28:21 +0200 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include @@ -439,7 +441,10 @@ static int set_one_prio(struct task_stru goto out; } if (niceval < task_nice(p) && !can_nice(p, niceval)) { - error = -EACCES; + if (vx_flags(VXF_IGNEG_NICE, 0)) + error = 0; + else + error = -EACCES; goto out; } no_nice = security_task_setnice(p, niceval); @@ -491,7 +496,8 @@ asmlinkage long sys_setpriority(int whic if (!who) who = current->uid; else - if ((who != current->uid) && !(user = find_user(who))) + if ((who != current->uid) && + !(user = find_user(vx_current_xid(), who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) @@ -549,7 +555,8 @@ asmlinkage long sys_getpriority(int whic if (!who) who = current->uid; else - if ((who != current->uid) && !(user = find_user(who))) + if ((who != current->uid) && + !(user = find_user(vx_current_xid(), who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) @@ -666,6 +673,9 @@ void kernel_power_off(void) machine_power_off(); } EXPORT_SYMBOL_GPL(kernel_power_off); + +long vs_reboot(unsigned int, void __user *); + /* * Reboot system call: for obvious reasons only root may call it, * and even root needs to set up some magic numbers in the registers @@ -696,6 +706,9 @@ asmlinkage long sys_reboot(int magic1, i if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) cmd = LINUX_REBOOT_CMD_HALT; + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return vs_reboot(cmd, arg); + lock_kernel(); switch (cmd) { case LINUX_REBOOT_CMD_RESTART: @@ -883,7 +896,7 @@ static int set_user(uid_t new_ruid, int { struct user_struct *new_user; - new_user = alloc_uid(new_ruid); + new_user = alloc_uid(vx_current_xid(), new_ruid); if (!new_user) return -EAGAIN; @@ -1247,15 +1260,18 @@ asmlinkage long sys_setpgid(pid_t pid, p { struct task_struct *p; struct task_struct *group_leader = current->group_leader; + pid_t rpgid; int err = -EINVAL; if (!pid) - pid = group_leader->pid; + pid = vx_map_pid(group_leader->pid); if (!pgid) pgid = pid; if (pgid < 0) return -EINVAL; + rpgid = vx_rmap_pid(pgid); + /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ @@ -1290,22 +1306,22 @@ asmlinkage long sys_setpgid(pid_t pid, p if (pgid != pid) { struct task_struct *p; - do_each_task_pid(pgid, PIDTYPE_PGID, p) { + do_each_task_pid(rpgid, PIDTYPE_PGID, p) { if (p->signal->session == group_leader->signal->session) goto ok_pgid; - } while_each_task_pid(pgid, PIDTYPE_PGID, p); + } while_each_task_pid(rpgid, PIDTYPE_PGID, p); goto out; } ok_pgid: - err = security_task_setpgid(p, pgid); + err = security_task_setpgid(p, rpgid); if (err) goto out; - if (process_group(p) != pgid) { + if (process_group(p) != rpgid) { detach_pid(p, PIDTYPE_PGID); - p->signal->pgrp = pgid; - attach_pid(p, PIDTYPE_PGID, pgid); + p->signal->pgrp = rpgid; + attach_pid(p, PIDTYPE_PGID, rpgid); } err = 0; @@ -1318,7 +1334,7 @@ out: asmlinkage long sys_getpgid(pid_t pid) { if (!pid) { - return process_group(current); + return vx_rmap_pid(process_group(current)); } else { int retval; struct task_struct *p; @@ -1330,7 +1346,7 @@ asmlinkage long sys_getpgid(pid_t pid) if (p) { retval = security_task_getpgid(p); if (!retval) - retval = process_group(p); + retval = vx_rmap_pid(process_group(p)); } read_unlock(&tasklist_lock); return retval; @@ -1671,7 +1687,7 @@ asmlinkage long sys_newuname(struct new_ int errno = 0; down_read(&uts_sem); - if (copy_to_user(name,&system_utsname,sizeof *name)) + if (copy_to_user(name, vx_new_utsname(), sizeof *name)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1682,15 +1698,17 @@ asmlinkage long sys_sethostname(char __u int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.nodename, tmp, len); - system_utsname.nodename[len] = 0; + char *ptr = vx_new_uts(nodename); + + memcpy(ptr, tmp, len); + ptr[len] = 0; errno = 0; } up_write(&uts_sem); @@ -1702,15 +1720,17 @@ asmlinkage long sys_sethostname(char __u asmlinkage long sys_gethostname(char __user *name, int len) { int i, errno; + char *ptr; if (len < 0) return -EINVAL; down_read(&uts_sem); - i = 1 + strlen(system_utsname.nodename); + ptr = vx_new_uts(nodename); + i = 1 + strlen(ptr); if (i > len) i = len; errno = 0; - if (copy_to_user(name, system_utsname.nodename, i)) + if (copy_to_user(name, ptr, i)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1727,7 +1747,7 @@ asmlinkage long sys_setdomainname(char _ int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -1735,8 +1755,10 @@ asmlinkage long sys_setdomainname(char _ down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.domainname, tmp, len); - system_utsname.domainname[len] = 0; + char *ptr = vx_new_uts(domainname); + + memcpy(ptr, tmp, len); + ptr[len] = 0; errno = 0; } up_write(&uts_sem); @@ -1794,7 +1816,7 @@ asmlinkage long sys_setrlimit(unsigned i return -EINVAL; old_rlim = current->signal->rlim + resource; if ((new_rlim.rlim_max > old_rlim->rlim_max) && - !capable(CAP_SYS_RESOURCE)) + !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT)) return -EPERM; if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) return -EPERM; diff -NurpP --minimal linux-2.6.17.13/kernel/sysctl.c linux-2.6.17.13-vs2.0.2.1/kernel/sysctl.c --- linux-2.6.17.13/kernel/sysctl.c 2006-06-18 04:55:34 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/sysctl.c 2006-08-17 00:28:21 +0200 @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -89,6 +90,7 @@ static int ngroups_max = NGROUPS_MAX; #ifdef CONFIG_KMOD extern char modprobe_path[]; #endif +extern char vshelper_path[]; #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif @@ -237,6 +239,7 @@ static ctl_table kern_table[] = { .maxlen = sizeof(system_utsname.sysname), .mode = 0444, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -246,6 +249,7 @@ static ctl_table kern_table[] = { .maxlen = sizeof(system_utsname.release), .mode = 0444, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -255,6 +259,7 @@ static ctl_table kern_table[] = { .maxlen = sizeof(system_utsname.version), .mode = 0444, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -264,6 +269,7 @@ static ctl_table kern_table[] = { .maxlen = sizeof(system_utsname.nodename), .mode = 0644, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -273,6 +279,7 @@ static ctl_table kern_table[] = { .maxlen = sizeof(system_utsname.domainname), .mode = 0644, .proc_handler = &proc_doutsstring, + .virt_handler = &vx_uts_virt_handler, .strategy = &sysctl_string, }, { @@ -409,6 +416,15 @@ static ctl_table kern_table[] = { .strategy = &sysctl_string, }, #endif + { + .ctl_name = KERN_VSHELPER, + .procname = "vshelper", + .data = &vshelper_path, + .maxlen = 256, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, #ifdef CONFIG_CHR_DEV_SG { .ctl_name = KERN_SG_BIG_BUFF, @@ -1563,16 +1579,20 @@ static ssize_t proc_writesys(struct file int proc_dostring(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - size_t len; + size_t len, maxlen; char __user *p; char c; + void *data; + + data = table->data; + maxlen = table->maxlen; + + if (!data || !maxlen || !*lenp || (*ppos && !write)) + return (*lenp = 0); - if (!table->data || !table->maxlen || !*lenp || - (*ppos && !write)) { - *lenp = 0; - return 0; - } - + if (table->virt_handler) + table->virt_handler(table, write, filp->f_xid, &data, &maxlen); + if (write) { len = 0; p = buffer; @@ -1583,20 +1603,20 @@ int proc_dostring(ctl_table *table, int break; len++; } - if (len >= table->maxlen) - len = table->maxlen-1; - if(copy_from_user(table->data, buffer, len)) + if (len >= maxlen) + len = maxlen-1; + if(copy_from_user(data, buffer, len)) return -EFAULT; - ((char *) table->data)[len] = 0; + ((char *) data)[len] = 0; *ppos += *lenp; } else { - len = strlen(table->data); - if (len > table->maxlen) - len = table->maxlen; + len = strlen(data); + if (len > maxlen) + len = maxlen; if (len > *lenp) len = *lenp; if (len) - if(copy_to_user(buffer, table->data, len)) + if(copy_to_user(buffer, data, len)) return -EFAULT; if (len < *lenp) { if(put_user('\n', ((char __user *) buffer) + len)) diff -NurpP --minimal linux-2.6.17.13/kernel/timer.c linux-2.6.17.13-vs2.0.2.1/kernel/timer.c --- linux-2.6.17.13/kernel/timer.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/timer.c 2006-08-25 18:29:10 +0200 @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include @@ -953,12 +955,6 @@ asmlinkage unsigned long sys_alarm(unsig #endif -#ifndef __alpha__ - -/* - * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this - * should be moved into arch/i386 instead? - */ /** * sys_getpid - return the thread group id of the current process @@ -971,7 +967,7 @@ asmlinkage unsigned long sys_alarm(unsig */ asmlinkage long sys_getpid(void) { - return current->tgid; + return vx_map_tgid(current->tgid); } /* @@ -987,10 +983,23 @@ asmlinkage long sys_getppid(void) rcu_read_lock(); pid = rcu_dereference(current->real_parent)->tgid; rcu_read_unlock(); + return vx_map_pid(pid); +} - return pid; +#ifdef __alpha__ + +/* + * The Alpha uses getxpid, getxuid, and getxgid instead. + */ + +asmlinkage long do_getxpid(long *ppid) +{ + *ppid = sys_getppid(); + return sys_getpid(); } +#else /* _alpha_ */ + asmlinkage long sys_getuid(void) { /* Only we change this so SMP safe */ @@ -1151,6 +1160,8 @@ asmlinkage long sys_sysinfo(struct sysin tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; tp.tv_sec++; } + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&tp, NULL); val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); diff -NurpP --minimal linux-2.6.17.13/kernel/user.c linux-2.6.17.13-vs2.0.2.1/kernel/user.c --- linux-2.6.17.13/kernel/user.c 2006-06-18 04:55:35 +0200 +++ linux-2.6.17.13-vs2.0.2.1/kernel/user.c 2006-08-17 00:28:21 +0200 @@ -23,8 +23,8 @@ #define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) #define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) -#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) -#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) +#define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK) +#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid))) static kmem_cache_t *uid_cachep; static struct list_head uidhash_table[UIDHASH_SZ]; @@ -66,7 +66,7 @@ static inline void uid_hash_remove(struc list_del(&up->uidhash_list); } -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) +static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent) { struct list_head *up; @@ -75,7 +75,7 @@ static inline struct user_struct *uid_ha user = list_entry(up, struct user_struct, uidhash_list); - if(user->uid == uid) { + if(user->uid == uid && user->xid == xid) { atomic_inc(&user->__count); return user; } @@ -90,13 +90,13 @@ static inline struct user_struct *uid_ha * * If the user_struct could not be found, return NULL. */ -struct user_struct *find_user(uid_t uid) +struct user_struct *find_user(xid_t xid, uid_t uid) { struct user_struct *ret; unsigned long flags; spin_lock_irqsave(&uidhash_lock, flags); - ret = uid_hash_find(uid, uidhashentry(uid)); + ret = uid_hash_find(xid, uid, uidhashentry(xid, uid)); spin_unlock_irqrestore(&uidhash_lock, flags); return ret; } @@ -120,13 +120,13 @@ void free_uid(struct user_struct *up) } } -struct user_struct * alloc_uid(uid_t uid) +struct user_struct * alloc_uid(xid_t xid, uid_t uid) { - struct list_head *hashent = uidhashentry(uid); + struct list_head *hashent = uidhashentry(xid, uid); struct user_struct *up; spin_lock_irq(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); spin_unlock_irq(&uidhash_lock); if (!up) { @@ -136,6 +136,7 @@ struct user_struct * alloc_uid(uid_t uid if (!new) return NULL; new->uid = uid; + new->xid = xid; atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); atomic_set(&new->files, 0); @@ -158,7 +159,7 @@ struct user_struct * alloc_uid(uid_t uid * on adding the same user already.. */ spin_lock_irq(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); if (up) { key_put(new->uid_keyring); key_put(new->session_keyring); @@ -204,7 +205,7 @@ static int __init uid_cache_init(void) /* Insert the root user immediately (init already runs as root) */ spin_lock_irq(&uidhash_lock); - uid_hash_insert(&root_user, uidhashentry(0)); + uid_hash_insert(&root_user, uidhashentry(0,0)); spin_unlock_irq(&uidhash_lock); return 0; diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/Kconfig linux-2.6.17.13-vs2.0.2.1/kernel/vserver/Kconfig --- linux-2.6.17.13/kernel/vserver/Kconfig 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/Kconfig 2006-08-17 00:28:21 +0200 @@ -0,0 +1,189 @@ +# +# Linux VServer configuration +# + +menu "Linux VServer" + +config VSERVER_LEGACY + bool "Enable Legacy Kernel API" + default y + help + This enables the legacy API used in vs1.xx, maintaining + compatibility with older vserver tools, and guest images + that are configured using the legacy method. This is + probably a good idea for now, for migration purposes. + + Note that some tools have not yet been altered to use + this API, so disabling this option may reduce some + functionality. + +config VSERVER_LEGACY_VERSION + bool "Show a Legacy Version ID" + depends on VSERVER_LEGACY + default n + help + This shows a special legacy version to very old tools + which do not handle the current version correctly. + + This will probably disable some features of newer tools + so better avoid it, unless you really, really need it + for backwards compatibility. + +config VSERVER_NGNET + bool "Disable Legacy Networking Kernel API" + depends on EXPERIMENTAL + default n + help + This disables the legacy networking API which is required + by the chbind tool. Do not disable it unless you exactly + know what you are doing. + +config VSERVER_REMAP_SADDR + bool "Remap Source IP Address" + depends on EXPERIMENTAL && !VSERVER_LEGACY + default n + help + This allows to remap the source IP address of 'local' + connections from 127.0.0.1 to the first assigned + guest IP. + +config VSERVER_PROC_SECURE + bool "Enable Proc Security" + depends on PROC_FS + default y + help + This configures ProcFS security to initially hide + non-process entries for all contexts except the main and + spectator context (i.e. for all guests), which is a secure + default. + + (note: on 1.2x the entries were visible by default) + +config VSERVER_HARDCPU + bool "Enable Hard CPU Limits" + depends on EXPERIMENTAL + default n + help + Activate the Hard CPU Limits + + This will compile in code that allows the Token Bucket + Scheduler to put processes on hold when a context's + tokens are depleted (provided that its per-context + sched_hard flag is set). + + Processes belonging to that context will not be able + to consume CPU resources again until a per-context + configured minimum of tokens has been reached. + +config VSERVER_HARDCPU_IDLE + bool "Limit the IDLE task" + depends on VSERVER_HARDCPU + default n + help + Limit the idle slices, so the the next context + will be scheduled as soon as possible. + + This might improve interactivity and latency, but + will also marginally increase scheduling overhead. + +choice + prompt "Persistent Inode Context Tagging" + default INOXID_UGID24 + help + This adds persistent context information to filesystems + mounted with the tagxid option. Tagging is a requirement + for per-context disk limits and per-context quota. + + +config INOXID_NONE + bool "Disabled" + help + do not store per-context information in inodes. + +config INOXID_UID16 + bool "UID16/GID32" + help + reduces UID to 16 bit, but leaves GID at 32 bit. + +config INOXID_GID16 + bool "UID32/GID16" + help + reduces GID to 16 bit, but leaves UID at 32 bit. + +config INOXID_UGID24 + bool "UID24/GID24" + help + uses the upper 8bit from UID and GID for XID tagging + which leaves 24bit for UID/GID each, which should be + more than sufficient for normal use. + +config INOXID_INTERN + bool "UID32/GID32" + help + this uses otherwise reserved inode fields in the on + disk representation, which limits the use to a few + filesystems (currently ext2 and ext3) + +config INOXID_RUNTIME + bool "Runtime" + depends on EXPERIMENTAL + help + inodes are tagged when first accessed, this doesn't + require any persistant information, but might give + funny results for mixed access. + +endchoice + +config XID_TAG_NFSD + bool "Tag NFSD User Auth and Files" + default n + help + Enable this if you do want the in-kernel NFS + Server to use the xid tagging specified above. + (will require patched clients too) + +config VSERVER_DEBUG + bool "VServer Debugging Code" + default n + help + Set this to yes if you want to be able to activate + debugging output at runtime. It adds a probably small + overhead to all vserver related functions and + increases the kernel size by about 20k. + +config VSERVER_HISTORY + bool "VServer History Tracing" + depends on VSERVER_DEBUG + default n + help + Set this to yes if you want to record the history of + linux-vserver activities, so they can be replayed in + the event of a kernel panic or oops. + +config VSERVER_HISTORY_SIZE + int "Per-CPU History Size (32-65536)" + depends on VSERVER_HISTORY + range 32 65536 + default 64 + help + This allows you to specify the number of entries in + the per-CPU history buffer. + +endmenu + + +config VSERVER + bool + default y + +config VSERVER_SECURITY + bool + depends on SECURITY + default y + select SECURITY_CAPABILITIES + +config VSERVER_LEGACYNET + bool + depends on !VSERVER_NGNET + default y + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/Makefile linux-2.6.17.13-vs2.0.2.1/kernel/vserver/Makefile --- linux-2.6.17.13/kernel/vserver/Makefile 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/Makefile 2006-08-17 00:28:21 +0200 @@ -0,0 +1,16 @@ +# +# Makefile for the Linux vserver routines. +# + + +obj-y += vserver.o + +vserver-y := switch.o context.o namespace.o sched.o network.o inode.o \ + limit.o cvirt.o signal.o helper.o init.o dlimit.o + +vserver-$(CONFIG_PROC_FS) += proc.o +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o +vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o +vserver-$(CONFIG_VSERVER_LEGACYNET) += legacynet.o +vserver-$(CONFIG_VSERVER_HISTORY) += history.o + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/context.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/context.c --- linux-2.6.17.13/kernel/vserver/context.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/context.c 2006-09-03 18:31:06 +0200 @@ -0,0 +1,918 @@ +/* + * linux/kernel/vserver/context.c + * + * Virtual Server: Context Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 context helper + * V0.02 vx_ctx_kill syscall command + * V0.03 replaced context_info calls + * V0.04 redesign of struct (de)alloc + * V0.05 rlimit basic implementation + * V0.06 task_xid and info commands + * V0.07 context flags and caps + * V0.08 switch to RCU based hash + * V0.09 revert to non RCU for now + * V0.10 and back to working RCU hash + * V0.11 and back to locking again + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "cvirt_init.h" +#include "limit_init.h" +#include "sched_init.h" + + +/* __alloc_vx_info() + + * allocate an initialized vx_info struct + * doesn't make it visible (hash) */ + +static struct vx_info *__alloc_vx_info(xid_t xid) +{ + struct vx_info *new = NULL; + + vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct vx_info)); + new->vx_id = xid; + INIT_HLIST_NODE(&new->vx_hlist); + atomic_set(&new->vx_usecnt, 0); + atomic_set(&new->vx_tasks, 0); + new->vx_parent = NULL; + new->vx_state = 0; + init_waitqueue_head(&new->vx_wait); + + /* prepare reaper */ + get_task_struct(child_reaper); + new->vx_reaper = child_reaper; + + /* rest of init goes here */ + vx_info_init_limit(&new->limit); + vx_info_init_sched(&new->sched); + vx_info_init_cvirt(&new->cvirt); + vx_info_init_cacct(&new->cacct); + + new->vx_flags = VXF_INIT_SET; + new->vx_bcaps = CAP_INIT_EFF_SET; + new->vx_ccaps = 0; + + new->reboot_cmd = 0; + new->exit_code = 0; + + vxdprintk(VXD_CBIT(xid, 0), + "alloc_vx_info(%d) = %p", xid, new); + vxh_alloc_vx_info(new); + return new; +} + +/* __dealloc_vx_info() + + * final disposal of vx_info */ + +static void __dealloc_vx_info(struct vx_info *vxi) +{ + vxdprintk(VXD_CBIT(xid, 0), + "dealloc_vx_info(%p)", vxi); + vxh_dealloc_vx_info(vxi); + + vxi->vx_hlist.next = LIST_POISON1; + vxi->vx_id = -1; + + vx_info_exit_limit(&vxi->limit); + vx_info_exit_sched(&vxi->sched); + vx_info_exit_cvirt(&vxi->cvirt); + vx_info_exit_cacct(&vxi->cacct); + + vxi->vx_state |= VXS_RELEASED; + kfree(vxi); +} + +static void __shutdown_vx_info(struct vx_info *vxi) +{ + struct namespace *namespace; + struct fs_struct *fs; + + might_sleep(); + + vxi->vx_state |= VXS_SHUTDOWN; + vs_state_change(vxi, VSC_SHUTDOWN); + + namespace = xchg(&vxi->vx_namespace, NULL); + if (namespace) + put_namespace(namespace); + + fs = xchg(&vxi->vx_fs, NULL); + if (fs) + put_fs_struct(fs); +} + +/* exported stuff */ + +void free_vx_info(struct vx_info *vxi) +{ + /* context shutdown is mandatory */ + BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN)); + + BUG_ON(atomic_read(&vxi->vx_usecnt)); + BUG_ON(atomic_read(&vxi->vx_tasks)); + + BUG_ON(vx_info_state(vxi, VXS_HASHED)); + + BUG_ON(vxi->vx_namespace); + BUG_ON(vxi->vx_fs); + + __dealloc_vx_info(vxi); +} + + +/* hash table for vx_info hash */ + +#define VX_HASH_SIZE 13 + +struct hlist_head vx_info_hash[VX_HASH_SIZE]; + +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(xid_t xid) +{ + return (xid % VX_HASH_SIZE); +} + + + +/* __hash_vx_info() + + * add the vxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_vx_info(struct vx_info *vxi) +{ + struct hlist_head *head; + + vxd_assert_lock(&vx_info_hash_lock); + vxdprintk(VXD_CBIT(xid, 4), + "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id); + vxh_hash_vx_info(vxi); + + /* context must not be hashed */ + BUG_ON(vx_info_state(vxi, VXS_HASHED)); + + vxi->vx_state |= VXS_HASHED; + head = &vx_info_hash[__hashval(vxi->vx_id)]; + hlist_add_head(&vxi->vx_hlist, head); +} + +/* __unhash_vx_info() + + * remove the vxi from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_vx_info(struct vx_info *vxi) +{ + vxd_assert_lock(&vx_info_hash_lock); + vxdprintk(VXD_CBIT(xid, 4), + "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id); + vxh_unhash_vx_info(vxi); + + /* context must be hashed */ + BUG_ON(!vx_info_state(vxi, VXS_HASHED)); + + vxi->vx_state &= ~VXS_HASHED; + hlist_del(&vxi->vx_hlist); +} + + +/* __lookup_vx_info() + + * requires the hash_lock to be held + * doesn't increment the vx_refcnt */ + +static inline struct vx_info *__lookup_vx_info(xid_t xid) +{ + struct hlist_head *head = &vx_info_hash[__hashval(xid)]; + struct hlist_node *pos; + struct vx_info *vxi; + + vxd_assert_lock(&vx_info_hash_lock); + hlist_for_each(pos, head) { + vxi = hlist_entry(pos, struct vx_info, vx_hlist); + + if (vxi->vx_id == xid) + goto found; + } + vxi = NULL; +found: + vxdprintk(VXD_CBIT(xid, 0), + "__lookup_vx_info(#%u): %p[#%u]", + xid, vxi, vxi?vxi->vx_id:0); + vxh_lookup_vx_info(vxi, xid); + return vxi; +} + + +/* __vx_dynamic_id() + + * find unused dynamic xid + * requires the hash_lock to be held */ + +static inline xid_t __vx_dynamic_id(void) +{ + static xid_t seq = MAX_S_CONTEXT; + xid_t barrier = seq; + + vxd_assert_lock(&vx_info_hash_lock); + do { + if (++seq > MAX_S_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__lookup_vx_info(seq)) { + vxdprintk(VXD_CBIT(xid, 4), + "__vx_dynamic_id: [#%d]", seq); + return seq; + } + } while (barrier != seq); + return 0; +} + +#ifdef CONFIG_VSERVER_LEGACY + +/* __loc_vx_info() + + * locate or create the requested context + * get() it and if new hash it */ + +static struct vx_info * __loc_vx_info(int id, int *err) +{ + struct vx_info *new, *vxi = NULL; + + vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id); + + if (!(new = __alloc_vx_info(id))) { + *err = -ENOMEM; + return NULL; + } + + /* required to make dynamic xids unique */ + spin_lock(&vx_info_hash_lock); + + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + goto out_unlock; + } + new->vx_id = id; + } + /* existing context requested */ + else if ((vxi = __lookup_vx_info(id))) { + /* context in setup is not available */ + if (vxi->vx_flags & VXF_STATE_SETUP) { + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (not available)", id, vxi); + vxi = NULL; + *err = -EBUSY; + } else { + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (found)", id, vxi); + get_vx_info(vxi); + *err = 0; + } + goto out_unlock; + } + + /* new context requested */ + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (new)", id, new); + __hash_vx_info(get_vx_info(new)); + vxi = new, new = NULL; + *err = 1; + +out_unlock: + spin_unlock(&vx_info_hash_lock); + vxh_loc_vx_info(vxi, id); + if (new) + __dealloc_vx_info(new); + return vxi; +} + +#endif + +/* __create_vx_info() + + * create the requested context + * get() and hash it */ + +static struct vx_info * __create_vx_info(int id) +{ + struct vx_info *new, *vxi = NULL; + + vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id); + + if (!(new = __alloc_vx_info(id))) + return ERR_PTR(-ENOMEM); + + /* required to make dynamic xids unique */ + spin_lock(&vx_info_hash_lock); + + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + vxi = ERR_PTR(-EAGAIN); + goto out_unlock; + } + new->vx_id = id; + } + /* static context requested */ + else if ((vxi = __lookup_vx_info(id))) { + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) = %p (already there)", id, vxi); + if (vx_info_flags(vxi, VXF_STATE_SETUP, 0)) + vxi = ERR_PTR(-EBUSY); + else + vxi = ERR_PTR(-EEXIST); + goto out_unlock; + } + /* dynamic xid creation blocker */ + else if (id >= MIN_D_CONTEXT) { + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) (dynamic rejected)", id); + vxi = ERR_PTR(-EINVAL); + goto out_unlock; + } + + /* new context */ + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) = %p (new)", id, new); + __hash_vx_info(get_vx_info(new)); + vxi = new, new = NULL; + +out_unlock: + spin_unlock(&vx_info_hash_lock); + vxh_create_vx_info(IS_ERR(vxi)?NULL:vxi, id); + if (new) + __dealloc_vx_info(new); + return vxi; +} + + +/* exported stuff */ + + +void unhash_vx_info(struct vx_info *vxi) +{ + __shutdown_vx_info(vxi); + spin_lock(&vx_info_hash_lock); + __unhash_vx_info(vxi); + spin_unlock(&vx_info_hash_lock); + __wakeup_vx_info(vxi); +} + + +/* lookup_vx_info() + + * search for a vx_info and get() it + * negative id means current */ + +struct vx_info *lookup_vx_info(int id) +{ + struct vx_info *vxi = NULL; + + if (id < 0) { + vxi = get_vx_info(current->vx_info); + } else if (id > 1) { + spin_lock(&vx_info_hash_lock); + vxi = get_vx_info(__lookup_vx_info(id)); + spin_unlock(&vx_info_hash_lock); + } + return vxi; +} + +/* xid_is_hashed() + + * verify that xid is still hashed */ + +int xid_is_hashed(xid_t xid) +{ + int hashed; + + spin_lock(&vx_info_hash_lock); + hashed = (__lookup_vx_info(xid) != NULL); + spin_unlock(&vx_info_hash_lock); + return hashed; +} + +#ifdef CONFIG_VSERVER_LEGACY + +struct vx_info *lookup_or_create_vx_info(int id) +{ + int err; + + return __loc_vx_info(id, &err); +} + +#endif + +#ifdef CONFIG_PROC_FS + +int get_xid_list(int index, unsigned int *xids, int size) +{ + int hindex, nr_xids = 0; + + for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) { + struct hlist_head *head = &vx_info_hash[hindex]; + struct hlist_node *pos; + + spin_lock(&vx_info_hash_lock); + hlist_for_each(pos, head) { + struct vx_info *vxi; + + if (--index > 0) + continue; + + vxi = hlist_entry(pos, struct vx_info, vx_hlist); + xids[nr_xids] = vxi->vx_id; + if (++nr_xids >= size) { + spin_unlock(&vx_info_hash_lock); + goto out; + } + } + /* keep the lock time short */ + spin_unlock(&vx_info_hash_lock); + } +out: + return nr_xids; +} +#endif + + +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) +{ + struct user_struct *new_user, *old_user; + + if (!p || !vxi) + BUG(); + new_user = alloc_uid(vxi->vx_id, p->uid); + if (!new_user) + return -ENOMEM; + + old_user = p->user; + if (new_user != old_user) { + atomic_inc(&new_user->processes); + atomic_dec(&old_user->processes); + p->user = new_user; + } + free_uid(old_user); + return 0; +} + +void vx_mask_bcaps(struct vx_info *vxi, struct task_struct *p) +{ + p->cap_effective &= vxi->vx_bcaps; + p->cap_inheritable &= vxi->vx_bcaps; + p->cap_permitted &= vxi->vx_bcaps; +} + + +#include + +static int vx_openfd_task(struct task_struct *tsk) +{ + struct files_struct *files = tsk->files; + struct fdtable *fdt; + const unsigned long *bptr; + int count, total; + + /* no rcu_read_lock() because of spin_lock() */ + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + bptr = fdt->open_fds->fds_bits; + count = fdt->max_fds / (sizeof(unsigned long) * 8); + for (total = 0; count > 0; count--) { + if (*bptr) + total += hweight_long(*bptr); + bptr++; + } + spin_unlock(&files->file_lock); + return total; +} + +/* + * migrate task to new context + * gets vxi, puts old_vxi on change + */ + +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) +{ + struct vx_info *old_vxi; + int ret = 0; + + if (!p || !vxi) + BUG(); + + old_vxi = task_get_vx_info(p); + if (old_vxi == vxi) + goto out; + + vxdprintk(VXD_CBIT(xid, 5), + "vx_migrate_task(%p,%p[#%d.%d])", p, vxi, + vxi->vx_id, atomic_read(&vxi->vx_usecnt)); + + if (!(ret = vx_migrate_user(p, vxi))) { + int openfd; + + task_lock(p); + openfd = vx_openfd_task(p); + + if (old_vxi) { + atomic_dec(&old_vxi->cvirt.nr_threads); + atomic_dec(&old_vxi->cvirt.nr_running); + atomic_dec(&old_vxi->limit.rcur[RLIMIT_NPROC]); + /* FIXME: what about the struct files here? */ + atomic_sub(openfd, &old_vxi->limit.rcur[VLIMIT_OPENFD]); + } + atomic_inc(&vxi->cvirt.nr_threads); + atomic_inc(&vxi->cvirt.nr_running); + atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]); + /* FIXME: what about the struct files here? */ + atomic_add(openfd, &vxi->limit.rcur[VLIMIT_OPENFD]); + + if (old_vxi) { + release_vx_info(old_vxi, p); + clr_vx_info(&p->vx_info); + } + claim_vx_info(vxi, p); + set_vx_info(&p->vx_info, vxi); + p->xid = vxi->vx_id; + + vxdprintk(VXD_CBIT(xid, 5), + "moved task %p into vxi:%p[#%d]", + p, vxi, vxi->vx_id); + + vx_mask_bcaps(vxi, p); + task_unlock(p); + } +out: + put_vx_info(old_vxi); + return ret; +} + +int vx_set_reaper(struct vx_info *vxi, struct task_struct *p) +{ + struct task_struct *old_reaper; + + if (!vxi) + return -EINVAL; + + vxdprintk(VXD_CBIT(xid, 6), + "vx_set_reaper(%p[#%d],%p[#%d,%d])", + vxi, vxi->vx_id, p, p->xid, p->pid); + + old_reaper = vxi->vx_reaper; + if (old_reaper == p) + return 0; + + /* set new child reaper */ + get_task_struct(p); + vxi->vx_reaper = p; + put_task_struct(old_reaper); + return 0; +} + +int vx_set_init(struct vx_info *vxi, struct task_struct *p) +{ + if (!vxi) + return -EINVAL; + + vxdprintk(VXD_CBIT(xid, 6), + "vx_set_init(%p[#%d],%p[#%d,%d,%d])", + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid); + + vxi->vx_flags &= ~VXF_STATE_INIT; + vxi->vx_initpid = p->tgid; + return 0; +} + +void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code) +{ + vxdprintk(VXD_CBIT(xid, 6), + "vx_exit_init(%p[#%d],%p[#%d,%d,%d])", + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid); + + vxi->exit_code = code; + vxi->vx_initpid = 0; +} + +void vx_set_persistent(struct vx_info *vxi) +{ + vxdprintk(VXD_CBIT(xid, 6), + "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id); + + get_vx_info(vxi); + claim_vx_info(vxi, current); +} + +void vx_clear_persistent(struct vx_info *vxi) +{ + vxdprintk(VXD_CBIT(xid, 6), + "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id); + + release_vx_info(vxi, current); + put_vx_info(vxi); +} + +void vx_update_persistent(struct vx_info *vxi) +{ + if (vx_info_flags(vxi, VXF_PERSISTENT, 0)) + vx_set_persistent(vxi); + else + vx_clear_persistent(vxi); +} + + +/* task must be current or locked */ + +void exit_vx_info(struct task_struct *p, int code) +{ + struct vx_info *vxi = p->vx_info; + + if (vxi) { + atomic_dec(&vxi->cvirt.nr_threads); + vx_nproc_dec(p); + + vxi->exit_code = code; + if (vxi->vx_initpid == p->tgid) + vx_exit_init(vxi, p, code); + if (vxi->vx_reaper == p) + vx_set_reaper(vxi, child_reaper); + release_vx_info(vxi, p); + } +} + + +/* vserver syscall commands below here */ + +/* taks xid and vx_info functions */ + +#include + + +int vc_task_xid(uint32_t id, void __user *data) +{ + xid_t xid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + xid = (tsk) ? tsk->xid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + xid = vx_current_xid(); + return xid; +} + + +int vc_vx_info(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vx_info_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.xid = vxi->vx_id; + vc_data.initpid = vxi->vx_initpid; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* context functions */ + +int vc_ctx_create(uint32_t xid, void __user *data) +{ + struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET }; + struct vx_info *new_vxi; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if ((xid > MAX_S_CONTEXT) && (xid != VX_DYNAMIC_ID)) + return -EINVAL; + if (xid < 2) + return -EINVAL; + + new_vxi = __create_vx_info(xid); + if (IS_ERR(new_vxi)) + return PTR_ERR(new_vxi); + + /* initial flags */ + new_vxi->vx_flags = vc_data.flagword; + + /* get a reference for persistent contexts */ + if ((vc_data.flagword & VXF_PERSISTENT)) + vx_set_persistent(new_vxi); + + ret = -ENOEXEC; + if (vs_state_change(new_vxi, VSC_STARTUP)) + goto out_unhash; + ret = vx_migrate_task(current, new_vxi); + if (!ret) { + /* return context id on success */ + ret = new_vxi->vx_id; + goto out; + } +out_unhash: + /* prepare for context disposal */ + new_vxi->vx_state |= VXS_SHUTDOWN; + if ((vc_data.flagword & VXF_PERSISTENT)) + vx_clear_persistent(new_vxi); + __unhash_vx_info(new_vxi); +out: + put_vx_info(new_vxi); + return ret; +} + + +int vc_ctx_migrate(uint32_t id, void __user *data) +{ + struct vcmd_ctx_migrate vc_data = { .flagword = 0 }; + struct vx_info *vxi; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* dirty hack until Spectator becomes a cap */ + if (id == 1) { + current->xid = 1; + return 0; + } + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + vx_migrate_task(current, vxi); + if (vc_data.flagword & VXM_SET_INIT) + vx_set_init(vxi, current); + if (vc_data.flagword & VXM_SET_REAPER) + vx_set_reaper(vxi, current); + put_vx_info(vxi); + return 0; +} + + +int vc_get_cflags(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_flags_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.flagword = vxi->vx_flags; + + /* special STATE flag handling */ + vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME); + + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_cflags(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_flags_v0 vc_data; + uint64_t mask, trigger; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + /* special STATE flag handling */ + mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME); + trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword); + + if (vxi == current->vx_info) { + if (trigger & VXF_STATE_SETUP) + vx_mask_bcaps(vxi, current); + if (trigger & VXF_STATE_INIT) { + vx_set_init(vxi, current); + vx_set_reaper(vxi, current); + } + } + + vxi->vx_flags = vx_mask_flags(vxi->vx_flags, + vc_data.flagword, mask); + if (trigger & VXF_PERSISTENT) + vx_update_persistent(vxi); + + put_vx_info(vxi); + return 0; +} + +int vc_get_ccaps(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.bcaps = vxi->vx_bcaps; + vc_data.ccaps = vxi->vx_ccaps; + vc_data.cmask = ~0UL; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_ccaps(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + vxi->vx_bcaps &= vc_data.bcaps; + vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps, + vc_data.ccaps, vc_data.cmask); + put_vx_info(vxi); + return 0; +} + +#include + +EXPORT_SYMBOL_GPL(free_vx_info); + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/cvirt.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/cvirt.c --- linux-2.6.17.13/kernel/vserver/cvirt.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/cvirt.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,260 @@ +/* + * linux/kernel/vserver/cvirt.c + * + * Virtual Server: Context Virtualization + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 broken out from limit.c + * V0.02 added utsname stuff + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle) +{ + struct vx_info *vxi = current->vx_info; + + set_normalized_timespec(uptime, + uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec, + uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec); + if (!idle) + return; + set_normalized_timespec(idle, + idle->tv_sec - vxi->cvirt.bias_idle.tv_sec, + idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec); + return; +} + +uint64_t vx_idle_jiffies(void) +{ + return init_task.utime + init_task.stime; +} + + + +static inline uint32_t __update_loadavg(uint32_t load, + int wsize, int delta, int n) +{ + unsigned long long calc, prev; + + /* just set it to n */ + if (unlikely(delta >= wsize)) + return (n << FSHIFT); + + calc = delta * n; + calc <<= FSHIFT; + prev = (wsize - delta); + prev *= load; + calc += prev; + do_div(calc, wsize); + return calc; +} + + +void vx_update_load(struct vx_info *vxi) +{ + uint32_t now, last, delta; + unsigned int nr_running, nr_uninterruptible; + unsigned int total; + unsigned long flags; + + spin_lock_irqsave(&vxi->cvirt.load_lock, flags); + + now = jiffies; + last = vxi->cvirt.load_last; + delta = now - last; + + if (delta < 5*HZ) + goto out; + + nr_running = atomic_read(&vxi->cvirt.nr_running); + nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible); + total = nr_running + nr_uninterruptible; + + vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0], + 60*HZ, delta, total); + vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1], + 5*60*HZ, delta, total); + vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2], + 15*60*HZ, delta, total); + + vxi->cvirt.load_last = now; +out: + atomic_inc(&vxi->cvirt.load_updates); + spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags); +} + + +int vx_uts_virt_handler(struct ctl_table *ctl, int write, xid_t xid, + void **datap, size_t *lenp) +{ + switch (ctl->ctl_name) { + case KERN_OSTYPE: + *datap = vx_new_uts(sysname); + break; + case KERN_OSRELEASE: + *datap = vx_new_uts(release); + break; + case KERN_VERSION: + *datap = vx_new_uts(version); + break; + case KERN_NODENAME: + *datap = vx_new_uts(nodename); + break; + case KERN_DOMAINNAME: + *datap = vx_new_uts(domainname); + break; + } + + return 0; +} + + + +/* + * Commands to do_syslog: + * + * 0 -- Close the log. Currently a NOP. + * 1 -- Open the log. Currently a NOP. + * 2 -- Read from the log. + * 3 -- Read all messages remaining in the ring buffer. + * 4 -- Read and clear all messages remaining in the ring buffer + * 5 -- Clear ring buffer. + * 6 -- Disable printk's to console + * 7 -- Enable printk's to console + * 8 -- Set level of messages printed to console + * 9 -- Return number of unread characters in the log buffer + * 10 -- Return size of the log buffer + */ +int vx_do_syslog(int type, char __user *buf, int len) +{ + int error = 0; + int do_clear = 0; + struct vx_info *vxi = current->vx_info; + struct _vx_syslog *log; + + if (!vxi) + return -EINVAL; + log = &vxi->cvirt.syslog; + + switch (type) { + case 0: /* Close log */ + case 1: /* Open log */ + break; + case 2: /* Read from log */ + error = wait_event_interruptible(log->log_wait, + (log->log_start - log->log_end)); + if (error) + break; + spin_lock_irq(&log->logbuf_lock); + spin_unlock_irq(&log->logbuf_lock); + break; + case 4: /* Read/clear last kernel messages */ + do_clear = 1; + /* fall through */ + case 3: /* Read last kernel messages */ + return 0; + + case 5: /* Clear ring buffer */ + return 0; + + case 6: /* Disable logging to console */ + case 7: /* Enable logging to console */ + case 8: /* Set level of messages printed to console */ + break; + + case 9: /* Number of chars in the log buffer */ + return 0; + case 10: /* Size of the log buffer */ + return 0; + default: + error = -EINVAL; + break; + } + return error; +} + + +/* virtual host info names */ + +static char * vx_vhi_name(struct vx_info *vxi, int id) +{ + switch (id) { + case VHIN_CONTEXT: + return vxi->vx_name; + case VHIN_SYSNAME: + return vxi->cvirt.utsname.sysname; + case VHIN_NODENAME: + return vxi->cvirt.utsname.nodename; + case VHIN_RELEASE: + return vxi->cvirt.utsname.release; + case VHIN_VERSION: + return vxi->cvirt.utsname.version; + case VHIN_MACHINE: + return vxi->cvirt.utsname.machine; + case VHIN_DOMAINNAME: + return vxi->cvirt.utsname.domainname; + default: + return NULL; + } + return NULL; +} + +int vc_set_vhi_name(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vhi_name_v0 vc_data; + char *name; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (name) + memcpy(name, vc_data.name, 65); + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} + +int vc_get_vhi_name(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vhi_name_v0 vc_data; + char *name; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (!name) + goto out_put; + + memcpy(vc_data.name, name, 65); + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; +out_put: + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/cvirt_init.h linux-2.6.17.13-vs2.0.2.1/kernel/vserver/cvirt_init.h --- linux-2.6.17.13/kernel/vserver/cvirt_init.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/cvirt_init.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,81 @@ + + +#include + + +extern uint64_t vx_idle_jiffies(void); + +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt) +{ + uint64_t idle_jiffies = vx_idle_jiffies(); + uint64_t nsuptime; + + do_posix_clock_monotonic_gettime(&cvirt->bias_uptime); + nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec + * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec; + cvirt->bias_clock = nsec_to_clock_t(nsuptime); + + jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle); + atomic_set(&cvirt->nr_threads, 0); + atomic_set(&cvirt->nr_running, 0); + atomic_set(&cvirt->nr_uninterruptible, 0); + atomic_set(&cvirt->nr_onhold, 0); + + down_read(&uts_sem); + cvirt->utsname = system_utsname; + up_read(&uts_sem); + + spin_lock_init(&cvirt->load_lock); + cvirt->load_last = jiffies; + atomic_set(&cvirt->load_updates, 0); + cvirt->load[0] = 0; + cvirt->load[1] = 0; + cvirt->load[2] = 0; + atomic_set(&cvirt->total_forks, 0); + + spin_lock_init(&cvirt->syslog.logbuf_lock); + init_waitqueue_head(&cvirt->syslog.log_wait); + cvirt->syslog.log_start = 0; + cvirt->syslog.log_end = 0; + cvirt->syslog.con_start = 0; + cvirt->syslog.logged_chars = 0; +} + +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt) +{ +#ifdef CONFIG_VSERVER_DEBUG + int value; + + vxwprintk((value = atomic_read(&cvirt->nr_threads)), + "!!! cvirt: %p[nr_threads] = %d on exit.", + cvirt, value); + vxwprintk((value = atomic_read(&cvirt->nr_running)), + "!!! cvirt: %p[nr_running] = %d on exit.", + cvirt, value); + vxwprintk((value = atomic_read(&cvirt->nr_uninterruptible)), + "!!! cvirt: %p[nr_uninterruptible] = %d on exit.", + cvirt, value); + vxwprintk((value = atomic_read(&cvirt->nr_onhold)), + "!!! cvirt: %p[nr_onhold] = %d on exit.", + cvirt, value); +#endif + return; +} + +static inline void vx_info_init_cacct(struct _vx_cacct *cacct) +{ + int i,j; + + for (i=0; i<5; i++) { + for (j=0; j<3; j++) { + atomic_set(&cacct->sock[i][j].count, 0); + atomic_set(&cacct->sock[i][j].total, 0); + } + } +} + +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct) +{ + return; +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/cvirt_proc.h linux-2.6.17.13-vs2.0.2.1/kernel/vserver/cvirt_proc.h --- linux-2.6.17.13/kernel/vserver/cvirt_proc.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/cvirt_proc.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,92 @@ +#ifndef _VX_CVIRT_PROC_H +#define _VX_CVIRT_PROC_H + +#include + + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer) +{ + int length = 0; + int a, b, c; + + length += sprintf(buffer + length, + "BiasUptime:\t%lu.%02lu\n", + (unsigned long)cvirt->bias_uptime.tv_sec, + (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100))); + length += sprintf(buffer + length, + "SysName:\t%.*s\n" + "NodeName:\t%.*s\n" + "Release:\t%.*s\n" + "Version:\t%.*s\n" + "Machine:\t%.*s\n" + "DomainName:\t%.*s\n" + ,__NEW_UTS_LEN, cvirt->utsname.sysname + ,__NEW_UTS_LEN, cvirt->utsname.nodename + ,__NEW_UTS_LEN, cvirt->utsname.release + ,__NEW_UTS_LEN, cvirt->utsname.version + ,__NEW_UTS_LEN, cvirt->utsname.machine + ,__NEW_UTS_LEN, cvirt->utsname.domainname + ); + + a = cvirt->load[0] + (FIXED_1/200); + b = cvirt->load[1] + (FIXED_1/200); + c = cvirt->load[2] + (FIXED_1/200); + length += sprintf(buffer + length, + "nr_threads:\t%d\n" + "nr_running:\t%d\n" + "nr_unintr:\t%d\n" + "nr_onhold:\t%d\n" + "load_updates:\t%d\n" + "loadavg:\t%d.%02d %d.%02d %d.%02d\n" + "total_forks:\t%d\n" + ,atomic_read(&cvirt->nr_threads) + ,atomic_read(&cvirt->nr_running) + ,atomic_read(&cvirt->nr_uninterruptible) + ,atomic_read(&cvirt->nr_onhold) + ,atomic_read(&cvirt->load_updates) + ,LOAD_INT(a), LOAD_FRAC(a) + ,LOAD_INT(b), LOAD_FRAC(b) + ,LOAD_INT(c), LOAD_FRAC(c) + ,atomic_read(&cvirt->total_forks) + ); + return length; +} + + +static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].count); +} + + +static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].total); +} + +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer) +{ + int i,j, length = 0; + static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" }; + + for (i=0; i<5; i++) { + length += sprintf(buffer + length, + "%s:", type[i]); + for (j=0; j<3; j++) { + length += sprintf(buffer + length, + "\t%12lu/%-12lu" + ,vx_sock_count(cacct, i, j) + ,vx_sock_total(cacct, i, j) + ); + } + buffer[length++] = '\n'; + } + length += sprintf(buffer + length, + "forks:\t%lu\n", cacct->total_forks); + return length; +} + +#endif /* _VX_CVIRT_PROC_H */ diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/dlimit.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/dlimit.c --- linux-2.6.17.13/kernel/vserver/dlimit.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/dlimit.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,548 @@ +/* + * linux/kernel/vserver/dlimit.c + * + * Virtual Server: Context Disk Limits + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 initial version + * V0.02 compat32 splitup + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* __alloc_dl_info() + + * allocate an initialized dl_info struct + * doesn't make it visible (hash) */ + +static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid) +{ + struct dl_info *new = NULL; + + vxdprintk(VXD_CBIT(dlim, 5), + "alloc_dl_info(%p,%d)*", sb, xid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct dl_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct dl_info)); + new->dl_xid = xid; + new->dl_sb = sb; + INIT_RCU_HEAD(&new->dl_rcu); + INIT_HLIST_NODE(&new->dl_hlist); + spin_lock_init(&new->dl_lock); + atomic_set(&new->dl_refcnt, 0); + atomic_set(&new->dl_usecnt, 0); + + /* rest of init goes here */ + + vxdprintk(VXD_CBIT(dlim, 4), + "alloc_dl_info(%p,%d) = %p", sb, xid, new); + return new; +} + +/* __dealloc_dl_info() + + * final disposal of dl_info */ + +static void __dealloc_dl_info(struct dl_info *dli) +{ + vxdprintk(VXD_CBIT(dlim, 4), + "dealloc_dl_info(%p)", dli); + + dli->dl_hlist.next = LIST_POISON1; + dli->dl_xid = -1; + dli->dl_sb = 0; + + BUG_ON(atomic_read(&dli->dl_usecnt)); + BUG_ON(atomic_read(&dli->dl_refcnt)); + + kfree(dli); +} + + +/* hash table for dl_info hash */ + +#define DL_HASH_SIZE 13 + +struct hlist_head dl_info_hash[DL_HASH_SIZE]; + +static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(struct super_block *sb, xid_t xid) +{ + return ((xid ^ (unsigned long)sb) % DL_HASH_SIZE); +} + + + +/* __hash_dl_info() + + * add the dli to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_dl_info(struct dl_info *dli) +{ + struct hlist_head *head; + + vxdprintk(VXD_CBIT(dlim, 6), + "__hash_dl_info: %p[#%d]", dli, dli->dl_xid); + get_dl_info(dli); + head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)]; + hlist_add_head_rcu(&dli->dl_hlist, head); +} + +/* __unhash_dl_info() + + * remove the dli from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_dl_info(struct dl_info *dli) +{ + vxdprintk(VXD_CBIT(dlim, 6), + "__unhash_dl_info: %p[#%d]", dli, dli->dl_xid); + hlist_del_rcu(&dli->dl_hlist); + put_dl_info(dli); +} + + +/* __lookup_dl_info() + + * requires the rcu_read_lock() + * doesn't increment the dl_refcnt */ + +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, xid_t xid) +{ + struct hlist_head *head = &dl_info_hash[__hashval(sb, xid)]; + struct hlist_node *pos; + struct dl_info *dli; + + hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) { + + if (dli->dl_xid == xid && dli->dl_sb == sb) { + return dli; + } + } + return NULL; +} + + +struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid) +{ + struct dl_info *dli; + + rcu_read_lock(); + dli = get_dl_info(__lookup_dl_info(sb, xid)); + vxdprintk(VXD_CBIT(dlim, 7), + "locate_dl_info(%p,#%d) = %p", sb, xid, dli); + rcu_read_unlock(); + return dli; +} + +void rcu_free_dl_info(struct rcu_head *head) +{ + struct dl_info *dli = container_of(head, struct dl_info, dl_rcu); + int usecnt, refcnt; + + BUG_ON(!dli || !head); + + usecnt = atomic_read(&dli->dl_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&dli->dl_refcnt); + BUG_ON(refcnt < 0); + + vxdprintk(VXD_CBIT(dlim, 3), + "rcu_free_dl_info(%p)", dli); + if (!usecnt) + __dealloc_dl_info(dli); + else + printk("!!! rcu didn't free\n"); +} + + + + +static int do_addrem_dlimit(uint32_t id, const char __user *name, + uint32_t flags, int add) +{ + struct nameidata nd; + int ret; + + ret = user_path_walk_link(name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + if (add) { + dli = __alloc_dl_info(sb, id); + spin_lock(&dl_info_hash_lock); + + ret = -EEXIST; + if (__lookup_dl_info(sb, id)) + goto out_unlock; + __hash_dl_info(dli); + dli = NULL; + } else { + spin_lock(&dl_info_hash_lock); + dli = __lookup_dl_info(sb, id); + + ret = -ESRCH; + if (!dli) + goto out_unlock; + __unhash_dl_info(dli); + } + ret = 0; + out_unlock: + spin_unlock(&dl_info_hash_lock); + if (add && dli) + __dealloc_dl_info(dli); + out_release: + path_release(&nd); + } + return ret; +} + +int vc_add_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1); +} + +int vc_rem_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0); +} + +#ifdef CONFIG_COMPAT + +int vc_add_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0_x32 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, + compat_ptr(vc_data.name_ptr), vc_data.flags, 1); +} + +int vc_rem_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_base_v0_x32 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_addrem_dlimit(id, + compat_ptr(vc_data.name_ptr), vc_data.flags, 0); +} + +#endif /* CONFIG_COMPAT */ + + +static inline +int do_set_dlimit(uint32_t id, const char __user *name, + uint32_t space_used, uint32_t space_total, + uint32_t inodes_used, uint32_t inodes_total, + uint32_t reserved, uint32_t flags) +{ + struct nameidata nd; + int ret; + + ret = user_path_walk_link(name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + if ((reserved != (uint32_t)CDLIM_KEEP && + reserved > 100) || + (inodes_used != (uint32_t)CDLIM_KEEP && + inodes_used > inodes_total) || + (space_used != (uint32_t)CDLIM_KEEP && + space_used > space_total)) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + + if (inodes_used != (uint32_t)CDLIM_KEEP) + dli->dl_inodes_used = inodes_used; + if (inodes_total != (uint32_t)CDLIM_KEEP) + dli->dl_inodes_total = inodes_total; + if (space_used != (uint32_t)CDLIM_KEEP) { + dli->dl_space_used = space_used; + dli->dl_space_used <<= 10; + } + if (space_total == (uint32_t)CDLIM_INFINITY) + dli->dl_space_total = (uint64_t)CDLIM_INFINITY; + else if (space_total != (uint32_t)CDLIM_KEEP) { + dli->dl_space_total = space_total; + dli->dl_space_total <<= 10; + } + if (reserved != (uint32_t)CDLIM_KEEP) + dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100; + + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = 0; + + out_release: + path_release(&nd); + } + return ret; +} + +int vc_set_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_dlimit(id, vc_data.name, + vc_data.space_used, vc_data.space_total, + vc_data.inodes_used, vc_data.inodes_total, + vc_data.reserved, vc_data.flags); +} + +#ifdef CONFIG_COMPAT + +int vc_set_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0_x32 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_dlimit(id, compat_ptr(vc_data.name_ptr), + vc_data.space_used, vc_data.space_total, + vc_data.inodes_used, vc_data.inodes_total, + vc_data.reserved, vc_data.flags); +} + +#endif /* CONFIG_COMPAT */ + + +static inline +int do_get_dlimit(uint32_t id, const char __user *name, + uint32_t *space_used, uint32_t *space_total, + uint32_t *inodes_used, uint32_t *inodes_total, + uint32_t *reserved, uint32_t *flags) +{ + struct nameidata nd; + int ret; + + ret = user_path_walk_link(name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + *inodes_used = dli->dl_inodes_used; + *inodes_total = dli->dl_inodes_total; + *space_used = dli->dl_space_used >> 10; + if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY) + *space_total = (uint32_t)CDLIM_INFINITY; + else + *space_total = dli->dl_space_total >> 10; + + *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10); + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = -EFAULT; + + ret = 0; + out_release: + path_release(&nd); + } + return ret; +} + + +int vc_get_dlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_dlimit(id, vc_data.name, + &vc_data.space_used, &vc_data.space_total, + &vc_data.inodes_used, &vc_data.inodes_total, + &vc_data.reserved, &vc_data.flags); + if (ret) + return ret; + + if (copy_to_user(data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +#ifdef CONFIG_COMPAT + +int vc_get_dlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_dlimit_v0_x32 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr), + &vc_data.space_used, &vc_data.space_total, + &vc_data.inodes_used, &vc_data.inodes_total, + &vc_data.reserved, &vc_data.flags); + if (ret) + return ret; + + if (copy_to_user(data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +#endif /* CONFIG_COMPAT */ + + +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct dl_info *dli; + __u64 blimit, bfree, bavail; + __u32 ifree; + + dli = locate_dl_info(sb, vx_current_xid()); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_total == (uint32_t)CDLIM_INFINITY) + goto no_ilim; + + /* reduce max inodes available to limit */ + if (buf->f_files > dli->dl_inodes_total) + buf->f_files = dli->dl_inodes_total; + + /* inode hack for reiserfs */ + if ((buf->f_files == 0) && (dli->dl_inodes_total > 0)) { + buf->f_files = dli->dl_inodes_total; + buf->f_ffree = dli->dl_inodes_total; + } + + ifree = dli->dl_inodes_total - dli->dl_inodes_used; + /* reduce free inodes to min */ + if (ifree < buf->f_ffree) + buf->f_ffree = ifree; + +no_ilim: + if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY) + goto no_blim; + + blimit = dli->dl_space_total >> sb->s_blocksize_bits; + + if (dli->dl_space_total < dli->dl_space_used) + bfree = 0; + else + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + + bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult); + if (bavail < dli->dl_space_used) + bavail = 0; + else + bavail = (bavail - dli->dl_space_used) + >> sb->s_blocksize_bits; + + /* reduce max space available to limit */ + if (buf->f_blocks > blimit) + buf->f_blocks = blimit; + + /* reduce free space to min */ + if (bfree < buf->f_bfree) + buf->f_bfree = bfree; + + /* reduce avail space to min */ + if (bavail < buf->f_bavail) + buf->f_bavail = bavail; + +no_blim: + spin_unlock(&dli->dl_lock); + put_dl_info(dli); + + return; +} + +#include + +EXPORT_SYMBOL_GPL(locate_dl_info); +EXPORT_SYMBOL_GPL(rcu_free_dl_info); + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/helper.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/helper.c --- linux-2.6.17.13/kernel/vserver/helper.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/helper.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,210 @@ +/* + * linux/kernel/vserver/helper.c + * + * Virtual Context Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic helper + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +char vshelper_path[255] = "/sbin/vshelper"; + + +static int do_vshelper(char *name, char *argv[], char *envp[], int sync) +{ + int ret; + + if ((ret = call_usermodehelper(name, argv, envp, sync))) { + printk( KERN_WARNING + "%s: (%s %s) returned %s with %d\n", + name, argv[1], argv[2], + sync?"sync":"async", ret); + } + vxdprintk(VXD_CBIT(switch, 4), + "%s: (%s %s) returned %s with %d", + name, argv[1], argv[2], sync?"sync":"async", ret); + return ret; +} + +/* + * vshelper path is set via /proc/sys + * invoked by vserver sys_reboot(), with + * the following arguments + * + * argv [0] = vshelper_path; + * argv [1] = action: "restart", "halt", "poweroff", ... + * argv [2] = context identifier + * + * envp [*] = type-specific parameters + */ + +long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg) +{ + char id_buf[8], cmd_buf[16]; + char uid_buf[16], pid_buf[16]; + int ret; + + char *argv[] = {vshelper_path, NULL, id_buf, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + uid_buf, pid_buf, cmd_buf, 0}; + + if (vx_info_state(vxi, VXS_HELPER)) + return -EAGAIN; + vxi->vx_state |= VXS_HELPER; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id); + + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid); + snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid); + + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + argv[1] = "restart"; + break; + + case LINUX_REBOOT_CMD_HALT: + argv[1] = "halt"; + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + argv[1] = "poweroff"; + break; + + case LINUX_REBOOT_CMD_SW_SUSPEND: + argv[1] = "swsusp"; + break; + + default: + vxi->vx_state &= ~VXS_HELPER; + return 0; + } + +#ifndef CONFIG_VSERVER_LEGACY + ret = do_vshelper(vshelper_path, argv, envp, 1); +#else + ret = do_vshelper(vshelper_path, argv, envp, 0); +#endif + vxi->vx_state &= ~VXS_HELPER; + __wakeup_vx_info(vxi); + return (ret) ? -EPERM : 0; +} + + +long vs_reboot(unsigned int cmd, void __user *arg) +{ + struct vx_info *vxi = current->vx_info; + long ret = 0; + + vxdprintk(VXD_CBIT(misc, 5), + "vs_reboot(%p[#%d],%d)", + vxi, vxi?vxi->vx_id:0, cmd); + + ret = vs_reboot_helper(vxi, cmd, arg); + if (ret) + return ret; + + vxi->reboot_cmd = cmd; + if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) { + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + case LINUX_REBOOT_CMD_HALT: + case LINUX_REBOOT_CMD_POWER_OFF: + vx_info_kill(vxi, 0, SIGKILL); + vx_info_kill(vxi, 1, SIGKILL); + default: + break; + } + } + return 0; +} + + +/* + * argv [0] = vshelper_path; + * argv [1] = action: "startup", "shutdown" + * argv [2] = context identifier + * + * envp [*] = type-specific parameters + */ + +long vs_state_change(struct vx_info *vxi, unsigned int cmd) +{ + char id_buf[8], cmd_buf[16]; + char *argv[] = {vshelper_path, NULL, id_buf, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0}; + + if (!vx_info_flags(vxi, VXF_SC_HELPER, 0)) + return 0; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id); + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + + switch (cmd) { + case VSC_STARTUP: + argv[1] = "startup"; + break; + case VSC_SHUTDOWN: + argv[1] = "shutdown"; + break; + default: + return 0; + } + + do_vshelper(vshelper_path, argv, envp, 1); + return 0; +} + + +/* + * argv [0] = vshelper_path; + * argv [1] = action: "netup", "netdown" + * argv [2] = context identifier + * + * envp [*] = type-specific parameters + */ + +long vs_net_change(struct nx_info *nxi, unsigned int cmd) +{ + char id_buf[8], cmd_buf[16]; + char *argv[] = {vshelper_path, NULL, id_buf, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0}; + + if (!nx_info_flags(nxi, NXF_SC_HELPER, 0)) + return 0; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", nxi->nx_id); + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + + switch (cmd) { + case VSC_NETUP: + argv[1] = "netup"; + break; + case VSC_NETDOWN: + argv[1] = "netdown"; + break; + default: + return 0; + } + + do_vshelper(vshelper_path, argv, envp, 1); + return 0; +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/history.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/history.c --- linux-2.6.17.13/kernel/vserver/history.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/history.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,183 @@ +/* + * kernel/vserver/history.c + * + * Virtual Context History Backtrace + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic structure + * V0.02 hash/unhash and trace + * V0.03 preemption fixes + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + + +#ifdef CONFIG_VSERVER_HISTORY +#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE +#else +#define VXH_SIZE 64 +#endif + +struct _vx_history { + unsigned int counter; + + struct _vx_hist_entry entry[VXH_SIZE+1]; +}; + + +DEFINE_PER_CPU(struct _vx_history, vx_history_buffer); + +unsigned volatile int vxh_active = 1; + +static atomic_t sequence = ATOMIC_INIT(0); + + +/* vxh_advance() + + * requires disabled preemption */ + +struct _vx_hist_entry *vxh_advance(void *loc) +{ + unsigned int cpu = smp_processor_id(); + struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu); + struct _vx_hist_entry *entry; + unsigned int index; + + index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE; + entry = &hist->entry[index]; + + entry->seq = atomic_inc_return(&sequence); + entry->loc = loc; + return entry; +} + + +#define VXH_LOC_FMTS "(#%04x,*%d):%p" + +#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc + + +#define VXH_VXI_FMTS "%p[#%d,%d.%d]" + +#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \ + (e)->vxi.ptr?(e)->vxi.xid:0, \ + (e)->vxi.ptr?(e)->vxi.usecnt:0, \ + (e)->vxi.ptr?(e)->vxi.tasks:0 + +void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu) +{ + switch (e->type) { + case VXH_THROW_OOPS: + printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e)); + break; + + case VXH_GET_VX_INFO: + case VXH_PUT_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_GET_VX_INFO)?"get":"put", + VXH_VXI_ARGS(e)); + break; + + case VXH_INIT_VX_INFO: + case VXH_SET_VX_INFO: + case VXH_CLR_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n", + VXH_LOC_ARGS(e), + (e->type==VXH_INIT_VX_INFO)?"init": + ((e->type==VXH_SET_VX_INFO)?"set":"clr"), + VXH_VXI_ARGS(e), e->sc.data); + break; + + case VXH_CLAIM_VX_INFO: + case VXH_RELEASE_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n", + VXH_LOC_ARGS(e), + (e->type==VXH_CLAIM_VX_INFO)?"claim":"release", + VXH_VXI_ARGS(e), e->sc.data); + break; + + case VXH_ALLOC_VX_INFO: + case VXH_DEALLOC_VX_INFO: + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_ALLOC_VX_INFO)?"alloc":"dealloc", + VXH_VXI_ARGS(e)); + break; + + case VXH_HASH_VX_INFO: + case VXH_UNHASH_VX_INFO: + printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_HASH_VX_INFO)?"hash":"unhash", + VXH_VXI_ARGS(e)); + break; + + case VXH_LOC_VX_INFO: + case VXH_LOOKUP_VX_INFO: + case VXH_CREATE_VX_INFO: + printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n", + VXH_LOC_ARGS(e), + (e->type==VXH_CREATE_VX_INFO)?"create": + ((e->type==VXH_LOC_VX_INFO)?"loc":"lookup"), + e->ll.arg, VXH_VXI_ARGS(e)); + break; + } +} + +static void __vxh_dump_history(void) +{ + unsigned int i,j; + + printk("History:\tSEQ: %8x\tNR_CPUS: %d\n", + atomic_read(&sequence), NR_CPUS); + + for (i=0; i < VXH_SIZE; i++) { + for (j=0; j < NR_CPUS; j++) { + struct _vx_history *hist = + &per_cpu(vx_history_buffer, j); + unsigned int index = (hist->counter-i) % VXH_SIZE; + struct _vx_hist_entry *entry = &hist->entry[index]; + + vxh_dump_entry(entry, j); + } + } +} + +void vxh_dump_history(void) +{ + vxh_active = 0; +#ifdef CONFIG_SMP + local_irq_enable(); + smp_send_stop(); + local_irq_disable(); +#endif + __vxh_dump_history(); +} + + +/* vserver syscall commands below here */ + + +int vc_dump_history(uint32_t id) +{ + vxh_active = 0; + __vxh_dump_history(); + vxh_active = 1; + + return 0; +} + +EXPORT_SYMBOL_GPL(vxh_advance); + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/init.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/init.c --- linux-2.6.17.13/kernel/vserver/init.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/init.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,43 @@ +/* + * linux/kernel/init.c + * + * Virtual Server Init + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include + +int vserver_register_sysctl(void); +void vserver_unregister_sysctl(void); + + +static int __init init_vserver(void) +{ + int ret = 0; + +#ifdef CONFIG_VSERVER_DEBUG + vserver_register_sysctl(); +#endif + return ret; +} + + +static void __exit exit_vserver(void) +{ + +#ifdef CONFIG_VSERVER_DEBUG + vserver_unregister_sysctl(); +#endif + return; +} + + +module_init(init_vserver); +module_exit(exit_vserver); + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/inode.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/inode.c --- linux-2.6.17.13/kernel/vserver/inode.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/inode.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,368 @@ +/* + * linux/kernel/vserver/inode.c + * + * Virtual Server: File System Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 separated from vcontext V0.05 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint32_t *mask) +{ + struct proc_dir_entry *entry; + + if (!in || !in->i_sb) + return -ESRCH; + + *flags = IATTR_XID + | (IS_BARRIER(in) ? IATTR_BARRIER : 0) + | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0) + | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0); + *mask = IATTR_IUNLINK | IATTR_IMMUTABLE; + + if (S_ISDIR(in->i_mode)) + *mask |= IATTR_BARRIER; + + if (IS_TAGXID(in)) { + *xid = in->i_xid; + *mask |= IATTR_XID; + } + + switch (in->i_sb->s_magic) { + case PROC_SUPER_MAGIC: + entry = PROC_I(in)->pde; + + /* check for specific inodes? */ + if (entry) + *mask |= IATTR_FLAGS; + if (entry) + *flags |= (entry->vx_flags & IATTR_FLAGS); + else + *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS); + break; + + case DEVPTS_SUPER_MAGIC: + *xid = in->i_xid; + *mask |= IATTR_XID; + break; + + default: + break; + } + return 0; +} + +int vc_get_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data = { .xid = -1 }; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_get_iattr(nd.dentry->d_inode, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT + +int vc_get_iattr_x32(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1_x32 vc_data = { .xid = -1 }; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd); + if (!ret) { + ret = __vc_get_iattr(nd.dentry->d_inode, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#endif /* CONFIG_COMPAT */ + + +static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uint32_t *mask) +{ + struct inode *in = de->d_inode; + int error = 0, is_proc = 0, has_xid = 0; + struct iattr attr = { 0 }; + + if (!in || !in->i_sb) + return -ESRCH; + + is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC); + if ((*mask & IATTR_FLAGS) && !is_proc) + return -EINVAL; + + has_xid = IS_TAGXID(in) || + (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC); + if ((*mask & IATTR_XID) && !has_xid) + return -EINVAL; + + mutex_lock(&in->i_mutex); + if (*mask & IATTR_XID) { + attr.ia_xid = *xid; + attr.ia_valid |= ATTR_XID; + } + + if (*mask & IATTR_FLAGS) { + struct proc_dir_entry *entry = PROC_I(in)->pde; + unsigned int iflags = PROC_I(in)->vx_flags; + + iflags = (iflags & ~(*mask & IATTR_FLAGS)) + | (*flags & IATTR_FLAGS); + PROC_I(in)->vx_flags = iflags; + if (entry) + entry->vx_flags = iflags; + } + + if (*mask & (IATTR_BARRIER | IATTR_IUNLINK | IATTR_IMMUTABLE)) { + if (*mask & IATTR_IMMUTABLE) { + if (*flags & IATTR_IMMUTABLE) + in->i_flags |= S_IMMUTABLE; + else + in->i_flags &= ~S_IMMUTABLE; + } + if (*mask & IATTR_IUNLINK) { + if (*flags & IATTR_IUNLINK) + in->i_flags |= S_IUNLINK; + else + in->i_flags &= ~S_IUNLINK; + } + if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) { + if (*flags & IATTR_BARRIER) + in->i_flags |= S_BARRIER; + else + in->i_flags &= ~S_BARRIER; + } + if (in->i_op && in->i_op->sync_flags) { + error = in->i_op->sync_flags(in); + if (error) + goto out; + } + } + + if (attr.ia_valid) { + if (in->i_op && in->i_op->setattr) + error = in->i_op->setattr(de, &attr); + else { + error = inode_change_ok(in, &attr); + if (!error) + error = inode_setattr(in, &attr); + } + } + +out: + mutex_unlock(&in->i_mutex); + return error; +} + +int vc_set_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data; + int ret; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_set_iattr(nd.dentry, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT + +int vc_set_iattr_x32(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1_x32 vc_data; + int ret; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd); + if (!ret) { + ret = __vc_set_iattr(nd.dentry, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +#endif /* CONFIG_COMPAT */ + +#ifdef CONFIG_VSERVER_LEGACY + +#define PROC_DYNAMIC_FIRST 0xF0000000UL + +int vx_proc_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + struct proc_dir_entry *entry; + int error = 0; + int flags; + + if (inode->i_ino < PROC_DYNAMIC_FIRST) + return -ENOTTY; + + entry = PROC_I(inode)->pde; + if (!entry) + return -ENOTTY; + + switch(cmd) { + case FIOC_GETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + flags = entry->vx_flags; + if (capable(CAP_CONTEXT)) + error = put_user(flags, (int __user *) arg); + break; + } + case FIOC_SETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -EFAULT; + if (get_user(flags, (int __user *) arg)) + break; + error = 0; + entry->vx_flags = flags; + break; + } + default: + return -ENOTTY; + } + return error; +} +#endif + + +int vx_parse_xid(char *string, xid_t *xid, int remove) +{ + static match_table_t tokens = { + {1, "xid=%u"}, + {0, NULL} + }; + substring_t args[MAX_OPT_ARGS]; + int token, option = 0; + + if (!string) + return 0; + + token = match_token(string, tokens, args); + if (token && xid && !match_int(args, &option)) + *xid = option; + + vxdprintk(VXD_CBIT(xid, 7), + "vx_parse_xid(»%s«): %d:#%d", + string, token, option); + + if (token && remove) { + char *p = strstr(string, "xid="); + char *q = p; + + if (p) { + while (*q != '\0' && *q != ',') + q++; + while (*q) + *p++ = *q++; + while (*p) + *p++ = '\0'; + } + } + return token; +} + +void vx_propagate_xid(struct nameidata *nd, struct inode *inode) +{ + xid_t new_xid = 0; + struct vfsmount *mnt; + int propagate; + + if (!nd) + return; + mnt = nd->mnt; + if (!mnt) + return; + + propagate = (mnt->mnt_flags & MNT_XID); + if (propagate) + new_xid = mnt->mnt_xid; + + vxdprintk(VXD_CBIT(xid, 7), + "vx_propagate_xid(%p[#%lu.%d]): %d,%d", + inode, inode->i_ino, inode->i_xid, + new_xid, (propagate)?1:0); + + if (propagate) + inode->i_xid = new_xid; +} + +#include + +EXPORT_SYMBOL_GPL(vx_propagate_xid); + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/legacy.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/legacy.c --- linux-2.6.17.13/kernel/vserver/legacy.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/legacy.c 2006-09-03 18:31:06 +0200 @@ -0,0 +1,113 @@ +/* + * linux/kernel/vserver/legacy.c + * + * Virtual Server: Legacy Funtions + * + * Copyright (C) 2001-2003 Jacques Gelinas + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext.c V0.05 + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +extern int vx_set_init(struct vx_info *, struct task_struct *); + +static int vx_set_initpid(struct vx_info *vxi, int pid) +{ + struct task_struct *init; + + init = find_task_by_real_pid(pid); + if (!init) + return -ESRCH; + return vx_set_init(vxi, init); +} + +int vc_new_s_context(uint32_t ctx, void __user *data) +{ + int ret = -ENOMEM; + struct vcmd_new_s_context_v1 vc_data; + struct vx_info *new_vxi; + + if (copy_from_user(&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* legacy hack, will be removed soon */ + if (ctx == -2) { + /* assign flags and initpid */ + if (!current->vx_info) + return -EINVAL; + ret = 0; + if (vc_data.flags & VX_INFO_INIT) + ret = vx_set_initpid(current->vx_info, current->tgid); + if (ret == 0) { + /* We keep the same vx_id, but lower the capabilities */ + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + ret = vx_current_xid(); + current->vx_info->vx_flags |= vc_data.flags; + } + return ret; + } + + if (!vx_check(0, VX_ADMIN) || !capable(CAP_SYS_ADMIN) + /* might make sense in the future, or not ... */ + || vx_flags(VX_INFO_LOCK, 0)) + return -EPERM; + + /* ugly hack for Spectator */ + if (ctx == 1) { + current->xid = 1; + return 0; + } + + if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) || + (ctx == 0)) + return -EINVAL; + + if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT)) + new_vxi = lookup_or_create_vx_info(ctx); + else + new_vxi = lookup_vx_info(ctx); + + if (!new_vxi) + return -EINVAL; + + ret = -EPERM; + if (!vx_info_flags(new_vxi, VXF_STATE_SETUP, 0) && + vx_info_flags(new_vxi, VX_INFO_PRIVATE, 0)) + goto out_put; + + new_vxi->vx_flags &= ~VXF_STATE_SETUP; + + ret = vx_migrate_task(current, new_vxi); + if (ret == 0) { + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + new_vxi->vx_flags |= vc_data.flags; + if (vc_data.flags & VX_INFO_INIT) + vx_set_initpid(new_vxi, current->tgid); + if (vc_data.flags & VX_INFO_NAMESPACE) + vx_set_namespace(new_vxi, + current->namespace, current->fs); + if (vc_data.flags & VX_INFO_NPROC) + new_vxi->limit.rlim[RLIMIT_NPROC] = + current->signal->rlim[RLIMIT_NPROC].rlim_max; + + /* tweak some defaults for legacy */ + new_vxi->vx_flags |= (VXF_HIDE_NETIF|VXF_INFO_INIT); + ret = new_vxi->vx_id; + } +out_put: + put_vx_info(new_vxi); + return ret; +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/legacynet.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/legacynet.c --- linux-2.6.17.13/kernel/vserver/legacynet.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/legacynet.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,84 @@ + +/* + * linux/kernel/vserver/legacynet.c + * + * Virtual Server: Legacy Network Funtions + * + * Copyright (C) 2001-2003 Jacques Gelinas + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from legacy.c + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +extern struct nx_info *create_nx_info(void); + +/* set ipv4 root (syscall) */ + +int vc_set_ipv4root(uint32_t nbip, void __user *data) +{ + int i, err = -EPERM; + struct vcmd_set_ipv4root_v3 vc_data; + struct nx_info *new_nxi, *nxi = current->nx_info; + + if (nbip < 0 || nbip > NB_IPV4ROOT) + return -EINVAL; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN)) + /* We are allowed to change everything */ + err = 0; + else if (nxi) { + int found = 0; + + /* We are allowed to select a subset of the currently + installed IP numbers. No new one are allowed + We can't change the broadcast address though */ + for (i=0; inbipv4; j++) { + if (nxip == nxi->ipv4[j]) { + found++; + break; + } + } + } + if ((found == nbip) && + (vc_data.broadcast == nxi->v4_bcast)) + err = 0; + } + if (err) + return err; + + new_nxi = create_nx_info(); + if (IS_ERR(new_nxi)) + return -EINVAL; + + new_nxi->nbipv4 = nbip; + for (i=0; iipv4[i] = vc_data.nx_mask_pair[i].ip; + new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask; + } + new_nxi->v4_bcast = vc_data.broadcast; + if (nxi) + printk("!!! switching nx_info %p->%p\n", nxi, new_nxi); + + nx_migrate_task(current, new_nxi); + put_nx_info(new_nxi); + return 0; +} + + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/limit.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/limit.c --- linux-2.6.17.13/kernel/vserver/limit.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/limit.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,238 @@ +/* + * linux/kernel/vserver/limit.c + * + * Virtual Server: Context Limits + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +const char *vlimit_name[NUM_LIMITS] = { + [RLIMIT_CPU] = "CPU", + [RLIMIT_RSS] = "RSS", + [RLIMIT_NPROC] = "NPROC", + [RLIMIT_NOFILE] = "NOFILE", + [RLIMIT_MEMLOCK] = "VML", + [RLIMIT_AS] = "VM", + [RLIMIT_LOCKS] = "LOCKS", + [RLIMIT_SIGPENDING] = "SIGP", + [RLIMIT_MSGQUEUE] = "MSGQ", + + [VLIMIT_NSOCK] = "NSOCK", + [VLIMIT_OPENFD] = "OPENFD", + [VLIMIT_ANON] = "ANON", + [VLIMIT_SHMEM] = "SHMEM", +}; + +EXPORT_SYMBOL_GPL(vlimit_name); + + +static int is_valid_rlimit(int id) +{ + int valid = 0; + + switch (id) { + case RLIMIT_RSS: + case RLIMIT_NPROC: + case RLIMIT_NOFILE: + case RLIMIT_MEMLOCK: + case RLIMIT_AS: + + case VLIMIT_NSOCK: + case VLIMIT_OPENFD: + case VLIMIT_ANON: + case VLIMIT_SHMEM: + valid = 1; + break; + } + return valid; +} + +static inline uint64_t vc_get_rlim(struct vx_info *vxi, int id) +{ + unsigned long limit; + + limit = vxi->limit.rlim[id]; + if (limit == RLIM_INFINITY) + return CRLIM_INFINITY; + return limit; +} + +static int do_get_rlimit(xid_t xid, uint32_t id, + uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum) +{ + struct vx_info *vxi; + + if (!is_valid_rlimit(id)) + return -EINVAL; + + vxi = lookup_vx_info(xid); + if (!vxi) + return -ESRCH; + + if (minimum) + *minimum = CRLIM_UNSET; + if (softlimit) + *softlimit = CRLIM_UNSET; + if (maximum) + *maximum = vc_get_rlim(vxi, id); + put_vx_info(vxi); + return 0; +} + +int vc_get_rlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_rlimit_v0 vc_data; + int ret; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_rlimit(id, vc_data.id, + &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum); + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +static int do_set_rlimit(xid_t xid, uint32_t id, + uint64_t minimum, uint64_t softlimit, uint64_t maximum) +{ + struct vx_info *vxi; + + if (!is_valid_rlimit(id)) + return -EINVAL; + + vxi = lookup_vx_info(xid); + if (!vxi) + return -ESRCH; + + if (maximum != CRLIM_KEEP) + vxi->limit.rlim[id] = maximum; + + put_vx_info(vxi); + return 0; +} + +int vc_set_rlimit(uint32_t id, void __user *data) +{ + struct vcmd_ctx_rlimit_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_rlimit(id, vc_data.id, + vc_data.minimum, vc_data.softlimit, vc_data.maximum); +} + +#ifdef CONFIG_IA32_EMULATION + +int vc_set_rlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_rlimit_v0_x32 vc_data; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + return do_set_rlimit(id, vc_data.id, + vc_data.minimum, vc_data.softlimit, vc_data.maximum); +} + +int vc_get_rlimit_x32(uint32_t id, void __user *data) +{ + struct vcmd_ctx_rlimit_v0_x32 vc_data; + int ret; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_get_rlimit(id, vc_data.id, + &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum); + if (ret) + return ret; + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +#endif /* CONFIG_IA32_EMULATION */ + + +int vc_get_rlimit_mask(uint32_t id, void __user *data) +{ + static struct vcmd_ctx_rlimit_mask_v0 mask = { + /* minimum */ + 0 + , /* softlimit */ + 0 + , /* maximum */ + (1 << RLIMIT_RSS) | + (1 << RLIMIT_NPROC) | + (1 << RLIMIT_NOFILE) | + (1 << RLIMIT_MEMLOCK) | + (1 << RLIMIT_LOCKS) | + (1 << RLIMIT_AS) | + (1 << VLIMIT_ANON) | + 0 + }; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_to_user(data, &mask, sizeof(mask))) + return -EFAULT; + return 0; +} + + +void vx_vsi_meminfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long v; + + v = vxi->limit.rlim[RLIMIT_RSS]; + if (v != RLIM_INFINITY) + val->totalram = min(val->totalram, v); + v = atomic_read(&vxi->limit.rcur[RLIMIT_RSS]); + val->freeram = (v < val->totalram) ? val->totalram - v : 0; + val->bufferram = 0; + val->totalhigh = 0; + val->freehigh = 0; + return; +} + +void vx_vsi_swapinfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long v, w; + + v = vxi->limit.rlim[RLIMIT_RSS]; + w = vxi->limit.rlim[RLIMIT_AS]; + if (w != RLIM_INFINITY) + val->totalswap = min(val->totalswap, w - + ((v != RLIM_INFINITY) ? v : 0)); + w = atomic_read(&vxi->limit.rcur[RLIMIT_AS]); + val->freeswap = (w < val->totalswap) ? val->totalswap - w : 0; + return; +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/limit_init.h linux-2.6.17.13-vs2.0.2.1/kernel/vserver/limit_init.h --- linux-2.6.17.13/kernel/vserver/limit_init.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/limit_init.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,32 @@ + + +#include + + +static inline void vx_info_init_limit(struct _vx_limit *limit) +{ + int lim; + + for (lim=0; limrlim[lim] = RLIM_INFINITY; + limit->rmax[lim] = 0; + atomic_set(&limit->rcur[lim], 0); + atomic_set(&limit->lhit[lim], 0); + } +} + +static inline void vx_info_exit_limit(struct _vx_limit *limit) +{ +#ifdef CONFIG_VSERVER_DEBUG + unsigned long value; + unsigned int lim; + + for (lim=0; limrcur[lim]); + vxwprintk(value, + "!!! limit: %p[%s,%d] = %ld on exit.", + limit, vlimit_name[lim], lim, value); + } +#endif +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/limit_proc.h linux-2.6.17.13-vs2.0.2.1/kernel/vserver/limit_proc.h --- linux-2.6.17.13/kernel/vserver/limit_proc.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/limit_proc.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,58 @@ +#ifndef _VX_LIMIT_PROC_H +#define _VX_LIMIT_PROC_H + + +static inline void vx_limit_fixup(struct _vx_limit *limit) +{ + unsigned long value; + unsigned int lim; + + for (lim=0; limrcur[lim]); + if (value > limit->rmax[lim]) + limit->rmax[lim] = value; + if (limit->rmax[lim] > limit->rlim[lim]) + limit->rmax[lim] = limit->rlim[lim]; + } +} + +#define VX_LIMIT_FMT ":\t%10d\t%10ld\t%10ld\t%6d\n" + +#define VX_LIMIT_ARG(r) \ + ,atomic_read(&limit->rcur[r]) \ + ,limit->rmax[r] \ + ,limit->rlim[r] \ + ,atomic_read(&limit->lhit[r]) + +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer) +{ + vx_limit_fixup(limit); + return sprintf(buffer, + "PROC" VX_LIMIT_FMT + "VM" VX_LIMIT_FMT + "VML" VX_LIMIT_FMT + "RSS" VX_LIMIT_FMT + "ANON" VX_LIMIT_FMT + "FILES" VX_LIMIT_FMT + "OFD" VX_LIMIT_FMT + "LOCKS" VX_LIMIT_FMT + "SOCK" VX_LIMIT_FMT + "MSGQ" VX_LIMIT_FMT + "SHM" VX_LIMIT_FMT + VX_LIMIT_ARG(RLIMIT_NPROC) + VX_LIMIT_ARG(RLIMIT_AS) + VX_LIMIT_ARG(RLIMIT_MEMLOCK) + VX_LIMIT_ARG(RLIMIT_RSS) + VX_LIMIT_ARG(VLIMIT_ANON) + VX_LIMIT_ARG(RLIMIT_NOFILE) + VX_LIMIT_ARG(VLIMIT_OPENFD) + VX_LIMIT_ARG(RLIMIT_LOCKS) + VX_LIMIT_ARG(VLIMIT_NSOCK) + VX_LIMIT_ARG(RLIMIT_MSGQUEUE) + VX_LIMIT_ARG(VLIMIT_SHMEM) + ); +} + +#endif /* _VX_LIMIT_PROC_H */ + + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/namespace.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/namespace.c --- linux-2.6.17.13/kernel/vserver/namespace.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/namespace.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,113 @@ +/* + * linux/kernel/vserver/namespace.c + * + * Virtual Server: Context Namespace Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from context.c 0.07 + * V0.02 added task locking for namespace + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* namespace functions */ + +#include + +int vx_set_namespace(struct vx_info *vxi, struct namespace *ns, struct fs_struct *fs) +{ + struct fs_struct *fs_copy; + + if (vxi->vx_namespace) + return -EPERM; + if (!ns || !fs) + return -EINVAL; + + fs_copy = copy_fs_struct(fs); + if (!fs_copy) + return -ENOMEM; + + get_namespace(ns); + vxi->vx_namespace = ns; + vxi->vx_fs = fs_copy; + return 0; +} + +int vc_enter_namespace(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct fs_struct *old_fs, *fs; + struct namespace *old_ns; + int ret = 0; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = -EINVAL; + if (!vxi->vx_namespace) + goto out_put; + + ret = -ENOMEM; + fs = copy_fs_struct(vxi->vx_fs); + if (!fs) + goto out_put; + + ret = 0; + task_lock(current); + old_ns = current->namespace; + old_fs = current->fs; + get_namespace(vxi->vx_namespace); + current->namespace = vxi->vx_namespace; + current->fs = fs; + task_unlock(current); + + put_namespace(old_ns); + put_fs_struct(old_fs); +out_put: + put_vx_info(vxi); + return ret; +} + +int vc_set_namespace(uint32_t id, void __user *data) +{ + struct fs_struct *fs; + struct namespace *ns; + struct vx_info *vxi; + int ret; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + task_lock(current); + fs = current->fs; + atomic_inc(&fs->count); + ns = current->namespace; + get_namespace(current->namespace); + task_unlock(current); + + ret = vx_set_namespace(vxi, ns, fs); + + put_namespace(ns); + put_fs_struct(fs); + put_vx_info(vxi); + return ret; +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/network.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/network.c --- linux-2.6.17.13/kernel/vserver/network.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/network.c 2006-08-25 20:47:50 +0200 @@ -0,0 +1,807 @@ +/* + * linux/kernel/vserver/network.c + * + * Virtual Server: Network Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * V0.02 cleaned up implementation + * V0.03 added equiv nx commands + * V0.04 switch to RCU based hash + * V0.05 and back to locking again + * + */ + +#include +#include +#include +#include + +#include + + +/* __alloc_nx_info() + + * allocate an initialized nx_info struct + * doesn't make it visible (hash) */ + +static struct nx_info *__alloc_nx_info(nid_t nid) +{ + struct nx_info *new = NULL; + + vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct nx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct nx_info)); + new->nx_id = nid; + INIT_HLIST_NODE(&new->nx_hlist); + atomic_set(&new->nx_usecnt, 0); + atomic_set(&new->nx_tasks, 0); + new->nx_state = 0; + + new->nx_flags = NXF_INIT_SET; + + /* rest of init goes here */ + + vxdprintk(VXD_CBIT(nid, 0), + "alloc_nx_info(%d) = %p", nid, new); + return new; +} + +/* __dealloc_nx_info() + + * final disposal of nx_info */ + +static void __dealloc_nx_info(struct nx_info *nxi) +{ + vxdprintk(VXD_CBIT(nid, 0), + "dealloc_nx_info(%p)", nxi); + + nxi->nx_hlist.next = LIST_POISON1; + nxi->nx_id = -1; + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_tasks)); + + nxi->nx_state |= NXS_RELEASED; + kfree(nxi); +} + +static void __shutdown_nx_info(struct nx_info *nxi) +{ + nxi->nx_state |= NXS_SHUTDOWN; + vs_net_change(nxi, VSC_NETDOWN); +} + +/* exported stuff */ + +void free_nx_info(struct nx_info *nxi) +{ + /* context shutdown is mandatory */ + BUG_ON(nxi->nx_state != NXS_SHUTDOWN); + + /* context must not be hashed */ + BUG_ON(nxi->nx_state & NXS_HASHED); + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_tasks)); + + __dealloc_nx_info(nxi); +} + + +/* hash table for nx_info hash */ + +#define NX_HASH_SIZE 13 + +struct hlist_head nx_info_hash[NX_HASH_SIZE]; + +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(nid_t nid) +{ + return (nid % NX_HASH_SIZE); +} + + + +/* __hash_nx_info() + + * add the nxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_nx_info(struct nx_info *nxi) +{ + struct hlist_head *head; + + vxd_assert_lock(&nx_info_hash_lock); + vxdprintk(VXD_CBIT(nid, 4), + "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id); + + /* context must not be hashed */ + BUG_ON(nx_info_state(nxi, NXS_HASHED)); + + nxi->nx_state |= NXS_HASHED; + head = &nx_info_hash[__hashval(nxi->nx_id)]; + hlist_add_head(&nxi->nx_hlist, head); +} + +/* __unhash_nx_info() + + * remove the nxi from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_nx_info(struct nx_info *nxi) +{ + vxd_assert_lock(&nx_info_hash_lock); + vxdprintk(VXD_CBIT(nid, 4), + "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id); + + /* context must be hashed */ + BUG_ON(!nx_info_state(nxi, NXS_HASHED)); + + nxi->nx_state &= ~NXS_HASHED; + hlist_del(&nxi->nx_hlist); +} + + +/* __lookup_nx_info() + + * requires the hash_lock to be held + * doesn't increment the nx_refcnt */ + +static inline struct nx_info *__lookup_nx_info(nid_t nid) +{ + struct hlist_head *head = &nx_info_hash[__hashval(nid)]; + struct hlist_node *pos; + struct nx_info *nxi; + + vxd_assert_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { + nxi = hlist_entry(pos, struct nx_info, nx_hlist); + + if (nxi->nx_id == nid) + goto found; + } + nxi = NULL; +found: + vxdprintk(VXD_CBIT(nid, 0), + "__lookup_nx_info(#%u): %p[#%u]", + nid, nxi, nxi?nxi->nx_id:0); + return nxi; +} + + +/* __nx_dynamic_id() + + * find unused dynamic nid + * requires the hash_lock to be held */ + +static inline nid_t __nx_dynamic_id(void) +{ + static nid_t seq = MAX_N_CONTEXT; + nid_t barrier = seq; + + vxd_assert_lock(&nx_info_hash_lock); + do { + if (++seq > MAX_N_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__lookup_nx_info(seq)) { + vxdprintk(VXD_CBIT(nid, 4), + "__nx_dynamic_id: [#%d]", seq); + return seq; + } + } while (barrier != seq); + return 0; +} + +/* __create_nx_info() + + * create the requested context + * get() and hash it */ + +static struct nx_info * __create_nx_info(int id) +{ + struct nx_info *new, *nxi = NULL; + + vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id); + + if (!(new = __alloc_nx_info(id))) + return ERR_PTR(-ENOMEM); + + /* required to make dynamic xids unique */ + spin_lock(&nx_info_hash_lock); + + /* dynamic context requested */ + if (id == NX_DYNAMIC_ID) { + id = __nx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + nxi = ERR_PTR(-EAGAIN); + goto out_unlock; + } + new->nx_id = id; + } + /* static context requested */ + else if ((nxi = __lookup_nx_info(id))) { + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) = %p (already there)", id, nxi); + if (nx_info_flags(nxi, NXF_STATE_SETUP, 0)) + nxi = ERR_PTR(-EBUSY); + else + nxi = ERR_PTR(-EEXIST); + goto out_unlock; + } + /* dynamic nid creation blocker */ + else if (id >= MIN_D_CONTEXT) { + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) (dynamic rejected)", id); + nxi = ERR_PTR(-EINVAL); + goto out_unlock; + } + + /* new context */ + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) = %p (new)", id, new); + __hash_nx_info(get_nx_info(new)); + nxi = new, new = NULL; + +out_unlock: + spin_unlock(&nx_info_hash_lock); + if (new) + __dealloc_nx_info(new); + return nxi; +} + + + +/* exported stuff */ + + +void unhash_nx_info(struct nx_info *nxi) +{ + __shutdown_nx_info(nxi); + spin_lock(&nx_info_hash_lock); + __unhash_nx_info(nxi); + spin_unlock(&nx_info_hash_lock); +} + +#ifdef CONFIG_VSERVER_LEGACYNET + +struct nx_info *create_nx_info(void) +{ + return __create_nx_info(NX_DYNAMIC_ID); +} + +#endif + +/* lookup_nx_info() + + * search for a nx_info and get() it + * negative id means current */ + +struct nx_info *lookup_nx_info(int id) +{ + struct nx_info *nxi = NULL; + + if (id < 0) { + nxi = get_nx_info(current->nx_info); + } else if (id > 1) { + spin_lock(&nx_info_hash_lock); + nxi = get_nx_info(__lookup_nx_info(id)); + spin_unlock(&nx_info_hash_lock); + } + return nxi; +} + +/* nid_is_hashed() + + * verify that nid is still hashed */ + +int nid_is_hashed(nid_t nid) +{ + int hashed; + + spin_lock(&nx_info_hash_lock); + hashed = (__lookup_nx_info(nid) != NULL); + spin_unlock(&nx_info_hash_lock); + return hashed; +} + + +#ifdef CONFIG_PROC_FS + +int get_nid_list(int index, unsigned int *nids, int size) +{ + int hindex, nr_nids = 0; + + for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) { + struct hlist_head *head = &nx_info_hash[hindex]; + struct hlist_node *pos; + + spin_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { + struct nx_info *nxi; + + if (--index > 0) + continue; + + nxi = hlist_entry(pos, struct nx_info, nx_hlist); + nids[nr_nids] = nxi->nx_id; + if (++nr_nids >= size) { + spin_unlock(&nx_info_hash_lock); + goto out; + } + } + /* keep the lock time short */ + spin_unlock(&nx_info_hash_lock); + } +out: + return nr_nids; +} +#endif + + +/* + * migrate task to new network + * gets nxi, puts old_nxi on change + */ + +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) +{ + struct nx_info *old_nxi; + int ret = 0; + + if (!p || !nxi) + BUG(); + + vxdprintk(VXD_CBIT(nid, 5), + "nx_migrate_task(%p,%p[#%d.%d.%d])", + p, nxi, nxi->nx_id, + atomic_read(&nxi->nx_usecnt), + atomic_read(&nxi->nx_tasks)); + + /* maybe disallow this completely? */ + old_nxi = task_get_nx_info(p); + if (old_nxi == nxi) + goto out; + + task_lock(p); + if (old_nxi) + clr_nx_info(&p->nx_info); + claim_nx_info(nxi, p); + set_nx_info(&p->nx_info, nxi); + p->nid = nxi->nx_id; + task_unlock(p); + + vxdprintk(VXD_CBIT(nid, 5), + "moved task %p into nxi:%p[#%d]", + p, nxi, nxi->nx_id); + + if (old_nxi) + release_nx_info(old_nxi, p); +out: + put_nx_info(old_nxi); + return ret; +} + + +#ifdef CONFIG_INET + +#include +#include + +int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi) +{ + if (!nxi) + return 1; + if (!ifa) + return 0; + return addr_in_nx_info(nxi, ifa->ifa_local); +} + +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) +{ + struct in_device *in_dev; + struct in_ifaddr **ifap; + struct in_ifaddr *ifa; + int ret = 0; + + if (!nxi) + return 1; + + in_dev = in_dev_get(dev); + if (!in_dev) + goto out; + + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + ifap = &ifa->ifa_next) { + if (addr_in_nx_info(nxi, ifa->ifa_local)) { + ret = 1; + break; + } + } + in_dev_put(in_dev); +out: + return ret; +} + +/* + * check if address is covered by socket + * + * sk: the socket to check against + * addr: the address in question (must be != 0) + */ +static inline int __addr_in_socket(struct sock *sk, uint32_t addr) +{ + struct nx_info *nxi = sk->sk_nx_info; + uint32_t saddr = inet_rcv_saddr(sk); + + vxdprintk(VXD_CBIT(net, 5), + "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx", + sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (saddr) { + /* direct address match */ + return (saddr == addr); + } else if (nxi) { + /* match against nx_info */ + return addr_in_nx_info(nxi, addr); + } else { + /* unrestricted any socket */ + return 1; + } +} + + +int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk) +{ + vxdprintk(VXD_CBIT(net, 2), + "nx_addr_conflict(%p,%p) %d.%d,%d.%d", + nxi, sk, VXD_QUAD(addr)); + + if (addr) { + /* check real address */ + return __addr_in_socket(sk, addr); + } else if (nxi) { + /* check against nx_info */ + int i, n = nxi->nbipv4; + + for (i=0; iipv4[i])) + return 1; + return 0; + } else { + /* check against any */ + return 1; + } +} + +#endif /* CONFIG_INET */ + +void nx_set_persistent(struct nx_info *nxi) +{ + get_nx_info(nxi); + claim_nx_info(nxi, current); +} + +void nx_clear_persistent(struct nx_info *nxi) +{ + vxdprintk(VXD_CBIT(nid, 6), + "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id); + + release_nx_info(nxi, current); + put_nx_info(nxi); +} + +void nx_update_persistent(struct nx_info *nxi) +{ + if (nx_info_flags(nxi, NXF_PERSISTENT, 0)) + nx_set_persistent(nxi); + else + nx_clear_persistent(nxi); +} + +/* vserver syscall commands below here */ + +/* taks nid and nx_info functions */ + +#include + + +int vc_task_nid(uint32_t id, void __user *data) +{ + nid_t nid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + nid = (tsk) ? tsk->nid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + nid = nx_current_nid(); + return nid; +} + + +int vc_nx_info(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_nx_info_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.nid = nxi->nx_id; + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* network functions */ + +int vc_net_create(uint32_t nid, void __user *data) +{ + struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET }; + struct nx_info *new_nxi; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if ((nid > MAX_S_CONTEXT) && (nid != VX_DYNAMIC_ID)) + return -EINVAL; + if (nid < 2) + return -EINVAL; + + new_nxi = __create_nx_info(nid); + if (IS_ERR(new_nxi)) + return PTR_ERR(new_nxi); + + /* initial flags */ + new_nxi->nx_flags = vc_data.flagword; + + /* get a reference for persistent contexts */ + if ((vc_data.flagword & NXF_PERSISTENT)) + nx_set_persistent(new_nxi); + + ret = -ENOEXEC; + if (vs_net_change(new_nxi, VSC_NETUP)) + goto out_unhash; + ret = nx_migrate_task(current, new_nxi); + if (!ret) { + /* return context id on success */ + ret = new_nxi->nx_id; + goto out; + } +out_unhash: + /* prepare for context disposal */ + new_nxi->nx_state |= NXS_SHUTDOWN; + if ((vc_data.flagword & NXF_PERSISTENT)) + nx_clear_persistent(new_nxi); + __unhash_nx_info(new_nxi); +out: + put_nx_info(new_nxi); + return ret; +} + + +int vc_net_migrate(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + nx_migrate_task(current, nxi); + put_nx_info(nxi); + return 0; +} + +int vc_net_add(uint32_t nid, void __user *data) +{ + struct vcmd_net_addr_v0 vc_data; + struct nx_info *nxi; + int index, pos, ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + switch (vc_data.type) { + case NXA_TYPE_IPV4: + if ((vc_data.count < 1) || (vc_data.count > 4)) + return -EINVAL; + break; + + default: + break; + } + + nxi = lookup_nx_info(nid); + if (!nxi) + return -ESRCH; + + switch (vc_data.type) { + case NXA_TYPE_IPV4: + index = 0; + while ((index < vc_data.count) && + ((pos = nxi->nbipv4) < NB_IPV4ROOT)) { + nxi->ipv4[pos] = vc_data.ip[index]; + nxi->mask[pos] = vc_data.mask[index]; + index++; + nxi->nbipv4++; + } + ret = index; + break; + + case NXA_TYPE_IPV4|NXA_MOD_BCAST: + nxi->v4_bcast = vc_data.ip[0]; + ret = 1; + break; + + default: + ret = -EINVAL; + break; + } + + put_nx_info(nxi); + return ret; +} + +int vc_net_remove(uint32_t nid, void __user *data) +{ + struct vcmd_net_addr_v0 vc_data; + struct nx_info *nxi; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = lookup_nx_info(nid); + if (!nxi) + return -ESRCH; + + switch (vc_data.type) { + case NXA_TYPE_ANY: + nxi->nbipv4 = 0; + break; + + default: + ret = -EINVAL; + break; + } + + put_nx_info(nxi); + return ret; +} + +int vc_get_nflags(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_flags_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.flagword = nxi->nx_flags; + + /* special STATE flag handling */ + vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME); + + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_nflags(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_flags_v0 vc_data; + uint64_t mask, trigger; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + /* special STATE flag handling */ + mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME); + trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword); + + nxi->nx_flags = vx_mask_flags(nxi->nx_flags, + vc_data.flagword, mask); + if (trigger & NXF_PERSISTENT) + nx_update_persistent(nxi); + + put_nx_info(nxi); + return 0; +} + +int vc_get_ncaps(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.ncaps = nxi->nx_ncaps; + vc_data.cmask = ~0UL; + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_ncaps(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = lookup_nx_info(id); + if (!nxi) + return -ESRCH; + + nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps, + vc_data.ncaps, vc_data.cmask); + put_nx_info(nxi); + return 0; +} + + +#include + +EXPORT_SYMBOL_GPL(free_nx_info); +EXPORT_SYMBOL_GPL(unhash_nx_info); + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/proc.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/proc.c --- linux-2.6.17.13/kernel/vserver/proc.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/proc.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,860 @@ +/* + * linux/kernel/vserver/proc.c + * + * Virtual Context Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 basic structure + * V0.02 adaptation vs1.3.0 + * V0.03 proc permissions + * V0.04 locking/generic + * V0.05 next generation procfs + * V0.06 inode validation + * V0.07 generic rewrite vid + * + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "cvirt_proc.h" +#include "limit_proc.h" +#include "sched_proc.h" +#include "vci_config.h" + +static struct proc_dir_entry *proc_virtual; + +static struct proc_dir_entry *proc_vnet; + + +enum vid_directory_inos { + PROC_XID_INO = 32, + PROC_XID_INFO, + PROC_XID_STATUS, + PROC_XID_LIMIT, + PROC_XID_SCHED, + PROC_XID_CVIRT, + PROC_XID_CACCT, + + PROC_NID_INO = 64, + PROC_NID_INFO, + PROC_NID_STATUS, +}; + +#define PROC_VID_MASK 0x60 + + +/* first the actual feeds */ + + +static int proc_virtual_info(int vid, char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + "VCISyscall:\t%d\n" + "VCIKernel:\t%08x\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF + ,__NR_vserver + ,vci_kernel_config() + ); +} + + +int proc_xid_info (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + "Init:\t%d\n" + ,vxi->vx_id + ,vxi + ,vxi->vx_initpid + ); + put_vx_info(vxi); + return length; +} + +int proc_xid_status (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = sprintf(buffer, + "UseCnt:\t%d\n" + "Tasks:\t%d\n" + "Flags:\t%016llx\n" + "BCaps:\t%016llx\n" + "CCaps:\t%016llx\n" + "Ticks:\t%d\n" + ,atomic_read(&vxi->vx_usecnt) + ,atomic_read(&vxi->vx_tasks) + ,(unsigned long long)vxi->vx_flags + ,(unsigned long long)vxi->vx_bcaps + ,(unsigned long long)vxi->vx_ccaps + ,atomic_read(&vxi->limit.ticks) + ); + put_vx_info(vxi); + return length; +} + +int proc_xid_limit (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_limit(&vxi->limit, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_sched (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_sched(&vxi->sched, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_cvirt (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + vx_update_load(vxi); + length = vx_info_proc_cvirt(&vxi->cvirt, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_cacct (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = lookup_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_cacct(&vxi->cacct, buffer); + put_vx_info(vxi); + return length; +} + + +static int proc_vnet_info(int vid, char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + "VCISyscall:\t%d\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF + ,__NR_vserver + ); +} + + +int proc_nid_info (int vid, char *buffer) +{ + struct nx_info *nxi; + int length, i; + + nxi = lookup_nx_info(vid); + if (!nxi) + return 0; + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + ,nxi->nx_id + ,nxi + ); + for (i=0; inbipv4; i++) { + length += sprintf(buffer + length, + "%d:\t" NIPQUAD_FMT "/" NIPQUAD_FMT "\n", i, + NIPQUAD(nxi->ipv4[i]), NIPQUAD(nxi->mask[i])); + } + put_nx_info(nxi); + return length; +} + +int proc_nid_status (int vid, char *buffer) +{ + struct nx_info *nxi; + int length; + + nxi = lookup_nx_info(vid); + if (!nxi) + return 0; + length = sprintf(buffer, + "UseCnt:\t%d\n" + "Tasks:\t%d\n" + ,atomic_read(&nxi->nx_usecnt) + ,atomic_read(&nxi->nx_tasks) + ); + put_nx_info(nxi); + return length; +} + +/* here the inode helpers */ + + +#define fake_ino(id,nr) (((nr) & 0xFFFF) | \ + (((id) & 0xFFFF) << 16)) + +#define inode_vid(i) (((i)->i_ino >> 16) & 0xFFFF) +#define inode_type(i) ((i)->i_ino & 0xFFFF) + +#define MAX_MULBY10 ((~0U-9)/10) + + +static struct inode *proc_vid_make_inode(struct super_block * sb, + int vid, int ino) +{ + struct inode *inode = new_inode(sb); + + if (!inode) + goto out; + + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(vid, ino); + + inode->i_uid = 0; + inode->i_gid = 0; +out: + return inode; +} + +static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd) +{ + struct inode * inode = dentry->d_inode; + int vid, hashed=0; + + vid = inode_vid(inode); + switch (inode_type(inode) & PROC_VID_MASK) { + case PROC_XID_INO: + hashed = xid_is_hashed(vid); + break; + case PROC_NID_INO: + hashed = nid_is_hashed(vid); + break; + } + if (hashed) + return 1; + d_drop(dentry); + return 0; +} + + +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) + +static ssize_t proc_vid_info_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + unsigned long page; + ssize_t length; + int vid; + + if (count > PROC_BLOCK_SIZE) + count = PROC_BLOCK_SIZE; + if (!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + vid = inode_vid(inode); + length = PROC_I(inode)->op.proc_vid_read(vid, (char*)page); + + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, + (char *)page, length); + free_page(page); + return length; +} + + + + + +/* here comes the lower level (vid) */ + +static struct file_operations proc_vid_info_file_operations = { + .read = proc_vid_info_read, +}; + +static struct dentry_operations proc_vid_dentry_operations = { + .d_revalidate = proc_vid_revalidate, +}; + + +struct vid_entry { + int type; + int len; + char *name; + mode_t mode; +}; + +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} + +static struct vid_entry vx_base_stuff[] = { + E(PROC_XID_INFO, "info", S_IFREG|S_IRUGO), + E(PROC_XID_STATUS, "status", S_IFREG|S_IRUGO), + E(PROC_XID_LIMIT, "limit", S_IFREG|S_IRUGO), + E(PROC_XID_SCHED, "sched", S_IFREG|S_IRUGO), + E(PROC_XID_CVIRT, "cvirt", S_IFREG|S_IRUGO), + E(PROC_XID_CACCT, "cacct", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + +static struct vid_entry vn_base_stuff[] = { + E(PROC_NID_INFO, "info", S_IFREG|S_IRUGO), + E(PROC_NID_STATUS, "status", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + + + +static struct dentry *proc_vid_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode; + struct vid_entry *p; + int error; + + error = -ENOENT; + inode = NULL; + + switch (inode_type(dir)) { + case PROC_XID_INO: + p = vx_base_stuff; + break; + case PROC_NID_INO: + p = vn_base_stuff; + break; + default: + goto out; + } + + for (; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + goto out; + + error = -EINVAL; + inode = proc_vid_make_inode(dir->i_sb, inode_vid(dir), p->type); + if (!inode) + goto out; + + switch(p->type) { + case PROC_XID_INFO: + PROC_I(inode)->op.proc_vid_read = proc_xid_info; + break; + case PROC_XID_STATUS: + PROC_I(inode)->op.proc_vid_read = proc_xid_status; + break; + case PROC_XID_LIMIT: + PROC_I(inode)->op.proc_vid_read = proc_xid_limit; + break; + case PROC_XID_SCHED: + PROC_I(inode)->op.proc_vid_read = proc_xid_sched; + break; + case PROC_XID_CVIRT: + PROC_I(inode)->op.proc_vid_read = proc_xid_cvirt; + break; + case PROC_XID_CACCT: + PROC_I(inode)->op.proc_vid_read = proc_xid_cacct; + break; + + case PROC_NID_INFO: + PROC_I(inode)->op.proc_vid_read = proc_nid_info; + break; + case PROC_NID_STATUS: + PROC_I(inode)->op.proc_vid_read = proc_nid_status; + break; + + default: + printk("procfs: impossible type (%d)",p->type); + iput(inode); + return ERR_PTR(-EINVAL); + } + inode->i_mode = p->mode; + inode->i_fop = &proc_vid_info_file_operations; + inode->i_nlink = 1; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + error = 0; +out: + return ERR_PTR(error); +} + + +static int proc_vid_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + int i, size; + struct inode *inode = filp->f_dentry->d_inode; + struct vid_entry *p; + + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, + inode->i_ino, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, i, + PROC_ROOT_INO, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + switch (inode_type(inode)) { + case PROC_XID_INO: + size = sizeof(vx_base_stuff); + p = vx_base_stuff + i; + break; + case PROC_NID_INO: + size = sizeof(vn_base_stuff); + p = vn_base_stuff + i; + break; + default: + return 1; + } + if (i >= size/sizeof(struct vid_entry)) + return 1; + while (p->name) { + if (filldir(dirent, p->name, p->len, + filp->f_pos, fake_ino(inode_vid(inode), + p->type), p->mode >> 12) < 0) + return 0; + filp->f_pos++; + p++; + } + } + return 1; +} + + + + +/* now the upper level (virtual) */ + +static struct file_operations proc_vid_file_operations = { + .read = generic_read_dir, + .readdir = proc_vid_readdir, +}; + +static struct inode_operations proc_vid_inode_operations = { + .lookup = proc_vid_lookup, +}; + + + +static __inline__ int atovid(const char *str, int len) +{ + int vid, c; + + vid = 0; + while (len-- > 0) { + c = *str - '0'; + str++; + if (c > 9) + return -1; + if (vid >= MAX_MULBY10) + return -1; + vid *= 10; + vid += c; + if (!vid) + return -1; + } + return vid; +} + + +struct dentry *proc_virtual_lookup(struct inode *dir, + struct dentry * dentry, struct nameidata *nd) +{ + int xid, len, ret; + struct vx_info *vxi; + const char *name; + struct inode *inode; + + name = dentry->d_name.name; + len = dentry->d_name.len; + ret = -ENOMEM; + + if (len == 7 && !memcmp(name, "current", 7)) { + inode = new_inode(dir->i_sb); + if (!inode) + goto out; + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(1, PROC_XID_INO); + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + d_add(dentry, inode); + return NULL; + } + if (len == 4 && !memcmp(name, "info", 4)) { + inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_INFO); + if (!inode) + goto out; + inode->i_fop = &proc_vid_info_file_operations; + PROC_I(inode)->op.proc_vid_read = proc_virtual_info; + inode->i_mode = S_IFREG|S_IRUGO; + d_add(dentry, inode); + return NULL; + } + + ret = -ENOENT; + xid = atovid(name, len); + if (xid < 0) + goto out; + vxi = lookup_vx_info(xid); + if (!vxi) + goto out; + + inode = NULL; + if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT)) + inode = proc_vid_make_inode(dir->i_sb, + vxi->vx_id, PROC_XID_INO); + if (!inode) + goto out_release; + + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; + inode->i_op = &proc_vid_inode_operations; + inode->i_fop = &proc_vid_file_operations; + inode->i_nlink = 2; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + ret = 0; + +out_release: + put_vx_info(vxi); +out: + return ERR_PTR(ret); +} + + +struct dentry *proc_vnet_lookup(struct inode *dir, + struct dentry * dentry, struct nameidata *nd) +{ + int nid, len, ret; + struct nx_info *nxi; + const char *name; + struct inode *inode; + + name = dentry->d_name.name; + len = dentry->d_name.len; + ret = -ENOMEM; + if (len == 7 && !memcmp(name, "current", 7)) { + inode = new_inode(dir->i_sb); + if (!inode) + goto out; + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(1, PROC_NID_INO); + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + d_add(dentry, inode); + return NULL; + } + if (len == 4 && !memcmp(name, "info", 4)) { + inode = proc_vid_make_inode(dir->i_sb, 0, PROC_NID_INFO); + if (!inode) + goto out; + inode->i_fop = &proc_vid_info_file_operations; + PROC_I(inode)->op.proc_vid_read = proc_vnet_info; + inode->i_mode = S_IFREG|S_IRUGO; + d_add(dentry, inode); + return NULL; + } + + ret = -ENOENT; + nid = atovid(name, len); + if (nid < 0) + goto out; + nxi = lookup_nx_info(nid); + if (!nxi) + goto out; + + inode = NULL; + if (1) + inode = proc_vid_make_inode(dir->i_sb, + nxi->nx_id, PROC_NID_INO); + if (!inode) + goto out_release; + + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; + inode->i_op = &proc_vid_inode_operations; + inode->i_fop = &proc_vid_file_operations; + inode->i_nlink = 2; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + ret = 0; + +out_release: + put_nx_info(nxi); +out: + return ERR_PTR(ret); +} + + + + +#define PROC_NUMBUF 10 +#define PROC_MAXVIDS 32 + +int proc_virtual_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int xid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos-3; + unsigned int nr_xids, i; + ino_t ino; + + switch ((long)filp->f_pos) { + case 0: + ino = fake_ino(0, PROC_XID_INO); + if (filldir(dirent, ".", 1, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + ino = filp->f_dentry->d_parent->d_inode->i_ino; + if (filldir(dirent, "..", 2, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 2: + ino = fake_ino(0, PROC_XID_INFO); + if (filldir(dirent, "info", 4, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 3: + if (vx_current_xid() > 1) { + ino = fake_ino(1, PROC_XID_INO); + if (filldir(dirent, "current", 7, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + } + filp->f_pos++; + } + + nr_xids = get_xid_list(nr, xid_array, PROC_MAXVIDS); + for (i = 0; i < nr_xids; i++) { + int xid = xid_array[i]; + ino_t ino = fake_ino(xid, PROC_XID_INO); + unsigned int j = PROC_NUMBUF; + + do buf[--j] = '0' + (xid % 10); while (xid/=10); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, + filp->f_pos, ino, DT_DIR) < 0) + break; + filp->f_pos++; + } + return 0; +} + + +static struct file_operations proc_virtual_dir_operations = { + .read = generic_read_dir, + .readdir = proc_virtual_readdir, +}; + +static struct inode_operations proc_virtual_dir_inode_operations = { + .lookup = proc_virtual_lookup, +}; + + +int proc_vnet_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int nid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos-2; + unsigned int nr_nids, i; + ino_t ino; + + switch ((long)filp->f_pos) { + case 0: + ino = fake_ino(0, PROC_NID_INO); + if (filldir(dirent, ".", 1, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + ino = filp->f_dentry->d_parent->d_inode->i_ino; + if (filldir(dirent, "..", 2, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 2: + ino = fake_ino(0, PROC_NID_INFO); + if (filldir(dirent, "info", 4, + filp->f_pos, ino, DT_REG) < 0) + return 0; + filp->f_pos++; + /* fall through */ + } + + nr_nids = get_nid_list(nr, nid_array, PROC_MAXVIDS); + for (i = 0; i < nr_nids; i++) { + int nid = nid_array[i]; + ino_t ino = fake_ino(nid, PROC_NID_INO); + unsigned long j = PROC_NUMBUF; + + do buf[--j] = '0' + (nid % 10); while (nid/=10); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, + filp->f_pos, ino, DT_DIR) < 0) + break; + filp->f_pos++; + } + return 0; +} + + +static struct file_operations proc_vnet_dir_operations = { + .read = generic_read_dir, + .readdir = proc_vnet_readdir, +}; + +static struct inode_operations proc_vnet_dir_inode_operations = { + .lookup = proc_vnet_lookup, +}; + + + +void proc_vx_init(void) +{ + struct proc_dir_entry *ent; + + ent = proc_mkdir("virtual", 0); + if (ent) { + ent->proc_fops = &proc_virtual_dir_operations; + ent->proc_iops = &proc_virtual_dir_inode_operations; + } + proc_virtual = ent; + + ent = proc_mkdir("virtnet", 0); + if (ent) { + ent->proc_fops = &proc_vnet_dir_operations; + ent->proc_iops = &proc_vnet_dir_inode_operations; + } + proc_vnet = ent; +} + + + + +/* per pid info */ + + +int proc_pid_vx_info(struct task_struct *p, char *buffer) +{ + struct vx_info *vxi; + char * orig = buffer; + + buffer += sprintf (buffer,"XID:\t%d\n", vx_task_xid(p)); + if (vx_flags(VXF_INFO_HIDE, 0)) + goto out; + + vxi = task_get_vx_info(p); + if (!vxi) + goto out; + + buffer += sprintf (buffer,"BCaps:\t%016llx\n" + ,(unsigned long long)vxi->vx_bcaps); + buffer += sprintf (buffer,"CCaps:\t%016llx\n" + ,(unsigned long long)vxi->vx_ccaps); + buffer += sprintf (buffer,"CFlags:\t%016llx\n" + ,(unsigned long long)vxi->vx_flags); + buffer += sprintf (buffer,"CIPid:\t%d\n" + ,vxi->vx_initpid); + + put_vx_info(vxi); +out: + return buffer - orig; +} + + +int proc_pid_nx_info(struct task_struct *p, char *buffer) +{ + struct nx_info *nxi; + char * orig = buffer; + int i; + + buffer += sprintf (buffer,"NID:\t%d\n", nx_task_nid(p)); + if (vx_flags(VXF_INFO_HIDE, 0)) + goto out; + nxi = task_get_nx_info(p); + if (!nxi) + goto out; + + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer, + "V4Root[%d]:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i + ,NIPQUAD(nxi->ipv4[i]) + ,NIPQUAD(nxi->mask[i])); + } + buffer += sprintf (buffer, + "V4Root[bcast]:\t%d.%d.%d.%d\n" + ,NIPQUAD(nxi->v4_bcast)); + + put_nx_info(nxi); +out: + return buffer - orig; +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/sched.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/sched.c --- linux-2.6.17.13/kernel/vserver/sched.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/sched.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,217 @@ +/* + * linux/kernel/vserver/sched.c + * + * Virtual Server: Scheduler Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 adapted Sam Vilains version to 2.6.3 + * V0.02 removed legacy interface + * + */ + +#include +#include +#include +#include + +#include +#include + + +/* + * recalculate the context's scheduling tokens + * + * ret > 0 : number of tokens available + * ret = 0 : context is paused + * ret < 0 : number of jiffies until new tokens arrive + * + */ +int vx_tokens_recalc(struct vx_info *vxi) +{ + long delta, tokens = 0; + + if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0)) + /* we are paused */ + return 0; + + delta = jiffies - vxi->sched.jiffies; + + if (delta >= vxi->sched.interval) { + /* lockdown scheduler info */ + spin_lock(&vxi->sched.tokens_lock); + + /* calc integral token part */ + delta = jiffies - vxi->sched.jiffies; + tokens = delta / vxi->sched.interval; + delta = tokens * vxi->sched.interval; + tokens *= vxi->sched.fill_rate; + + atomic_add(tokens, &vxi->sched.tokens); + vxi->sched.jiffies += delta; + tokens = atomic_read(&vxi->sched.tokens); + + if (tokens > vxi->sched.tokens_max) { + tokens = vxi->sched.tokens_max; + atomic_set(&vxi->sched.tokens, tokens); + } + spin_unlock(&vxi->sched.tokens_lock); + } else { + /* no new tokens */ + tokens = vx_tokens_avail(vxi); + if (tokens <= 0) + vxi->vx_state |= VXS_ONHOLD; + if (tokens < vxi->sched.tokens_min) { + /* enough tokens will be available in */ + if (vxi->sched.tokens_min == 0) + return delta - vxi->sched.interval; + return delta - vxi->sched.interval * + vxi->sched.tokens_min / vxi->sched.fill_rate; + } + } + + /* we have some tokens left */ + if (vx_info_state(vxi, VXS_ONHOLD) && + (tokens >= vxi->sched.tokens_min)) + vxi->vx_state &= ~VXS_ONHOLD; + if (vx_info_state(vxi, VXS_ONHOLD)) + tokens -= vxi->sched.tokens_min; + + return tokens; +} + +/* + * effective_prio - return the priority that is based on the static + * priority but is modified by bonuses/penalties. + * + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] + * into a -4 ... 0 ... +4 bonus/penalty range. + * + * Additionally, we scale another amount based on the number of + * CPU tokens currently held by the context, if the process is + * part of a context (and the appropriate SCHED flag is set). + * This ranges from -5 ... 0 ... +15, quadratically. + * + * So, the total bonus is -9 .. 0 .. +19 + * We use ~50% of the full 0...39 priority range so that: + * + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. + * unless that context is far exceeding its CPU allocation. + * + * Both properties are important to certain workloads. + */ +int vx_effective_vavavoom(struct vx_info *vxi, int max_prio) +{ + int vavavoom, max; + + /* lots of tokens = lots of vavavoom + * no tokens = no vavavoom */ + if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) { + max = vxi->sched.tokens_max; + vavavoom = max - vavavoom; + max = max * max; + vavavoom = max_prio * VAVAVOOM_RATIO / 100 + * (vavavoom*vavavoom - (max >> 2)) / max; + } else + vavavoom = 0; + + vxi->sched.vavavoom = vavavoom; + return vavavoom + vxi->sched.priority_bias; +} + + +int vc_set_sched_v2(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v2 vc_data; + struct vx_info *vxi; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(xid); + if (!vxi) + return -EINVAL; + + spin_lock(&vxi->sched.tokens_lock); + + if (vc_data.interval != SCHED_KEEP) + vxi->sched.interval = vc_data.interval; + if (vc_data.fill_rate != SCHED_KEEP) + vxi->sched.fill_rate = vc_data.fill_rate; + if (vc_data.tokens_min != SCHED_KEEP) + vxi->sched.tokens_min = vc_data.tokens_min; + if (vc_data.tokens_max != SCHED_KEEP) + vxi->sched.tokens_max = vc_data.tokens_max; + if (vc_data.tokens != SCHED_KEEP) + atomic_set(&vxi->sched.tokens, vc_data.tokens); + + /* Sanity check the resultant values */ + if (vxi->sched.fill_rate <= 0) + vxi->sched.fill_rate = 1; + if (vxi->sched.interval <= 0) + vxi->sched.interval = HZ; + if (vxi->sched.tokens_max == 0) + vxi->sched.tokens_max = 1; + if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max) + atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max); + if (vxi->sched.tokens_min > vxi->sched.tokens_max) + vxi->sched.tokens_min = vxi->sched.tokens_max; + + spin_unlock(&vxi->sched.tokens_lock); + put_vx_info(vxi); + return 0; +} + + +int vc_set_sched(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v3 vc_data; + struct vx_info *vxi; + unsigned int set_mask; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(xid); + if (!vxi) + return -EINVAL; + + set_mask = vc_data.set_mask; + + spin_lock(&vxi->sched.tokens_lock); + + if (set_mask & VXSM_FILL_RATE) + vxi->sched.fill_rate = vc_data.fill_rate; + if (set_mask & VXSM_INTERVAL) + vxi->sched.interval = vc_data.interval; + if (set_mask & VXSM_TOKENS) + atomic_set(&vxi->sched.tokens, vc_data.tokens); + if (set_mask & VXSM_TOKENS_MIN) + vxi->sched.tokens_min = vc_data.tokens_min; + if (set_mask & VXSM_TOKENS_MAX) + vxi->sched.tokens_max = vc_data.tokens_max; + if (set_mask & VXSM_PRIO_BIAS) + vxi->sched.priority_bias = vc_data.priority_bias; + + /* Sanity check the resultant values */ + if (vxi->sched.fill_rate <= 0) + vxi->sched.fill_rate = 1; + if (vxi->sched.interval <= 0) + vxi->sched.interval = HZ; + if (vxi->sched.tokens_max == 0) + vxi->sched.tokens_max = 1; + if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max) + atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max); + if (vxi->sched.tokens_min > vxi->sched.tokens_max) + vxi->sched.tokens_min = vxi->sched.tokens_max; + if (vxi->sched.priority_bias > MAX_PRIO_BIAS) + vxi->sched.priority_bias = MAX_PRIO_BIAS; + if (vxi->sched.priority_bias < MIN_PRIO_BIAS) + vxi->sched.priority_bias = MIN_PRIO_BIAS; + + spin_unlock(&vxi->sched.tokens_lock); + put_vx_info(vxi); + return 0; +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/sched_init.h linux-2.6.17.13-vs2.0.2.1/kernel/vserver/sched_init.h --- linux-2.6.17.13/kernel/vserver/sched_init.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/sched_init.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,30 @@ + +static inline void vx_info_init_sched(struct _vx_sched *sched) +{ + int i; + + /* scheduling; hard code starting values as constants */ + sched->fill_rate = 1; + sched->interval = 4; + sched->tokens_min = HZ >> 4; + sched->tokens_max = HZ >> 1; + sched->jiffies = jiffies; + sched->tokens_lock = SPIN_LOCK_UNLOCKED; + + atomic_set(&sched->tokens, HZ >> 2); + sched->cpus_allowed = CPU_MASK_ALL; + sched->priority_bias = 0; + sched->vavavoom = 0; + + for_each_cpu(i) { + sched->cpu[i].user_ticks = 0; + sched->cpu[i].sys_ticks = 0; + sched->cpu[i].hold_ticks = 0; + } +} + +static inline void vx_info_exit_sched(struct _vx_sched *sched) +{ + return; +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/sched_proc.h linux-2.6.17.13-vs2.0.2.1/kernel/vserver/sched_proc.h --- linux-2.6.17.13/kernel/vserver/sched_proc.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/sched_proc.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,40 @@ +#ifndef _VX_SCHED_PROC_H +#define _VX_SCHED_PROC_H + + +static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer) +{ + int length = 0; + int i; + + length += sprintf(buffer, + "Token:\t\t%8d\n" + "FillRate:\t%8d\n" + "Interval:\t%8d\n" + "TokensMin:\t%8d\n" + "TokensMax:\t%8d\n" + "PrioBias:\t%8d\n" + "VaVaVoom:\t%8d\n" + ,atomic_read(&sched->tokens) + ,sched->fill_rate + ,sched->interval + ,sched->tokens_min + ,sched->tokens_max + ,sched->priority_bias + ,sched->vavavoom + ); + + for_each_online_cpu(i) { + length += sprintf(buffer + length, + "cpu %d: %lld %lld %lld\n" + ,i + ,(long long)sched->cpu[i].user_ticks + ,(long long)sched->cpu[i].sys_ticks + ,(long long)sched->cpu[i].hold_ticks + ); + } + + return length; +} + +#endif /* _VX_SCHED_PROC_H */ diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/signal.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/signal.c --- linux-2.6.17.13/kernel/vserver/signal.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/signal.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,139 @@ +/* + * linux/kernel/vserver/signal.c + * + * Virtual Server: Signal Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include + +#include +#include + +#include +#include + + +int vx_info_kill(struct vx_info *vxi, int pid, int sig) +{ + int retval, count=0; + struct task_struct *p; + unsigned long priv = 0; + + retval = -ESRCH; + vxdprintk(VXD_CBIT(misc, 4), + "vx_info_kill(%p[#%d],%d,%d)*", + vxi, vxi->vx_id, pid, sig); + read_lock(&tasklist_lock); + switch (pid) { + case 0: + priv = 1; + case -1: + for_each_process(p) { + int err = 0; + + if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 || + (pid && vxi->vx_initpid == p->pid)) + continue; + + err = group_send_sig_info(sig, (void*)priv, p); + ++count; + if (err != -EPERM) + retval = err; + } + break; + + case 1: + if (vxi->vx_initpid) { + pid = vxi->vx_initpid; + priv = 1; + } + /* fallthrough */ + default: + p = find_task_by_real_pid(pid); + if (p) { + if (vx_task_xid(p) == vxi->vx_id) + retval = group_send_sig_info(sig, + (void*)priv, p); + } + break; + } + read_unlock(&tasklist_lock); + vxdprintk(VXD_CBIT(misc, 4), + "vx_info_kill(%p[#%d],%d,%d) = %d", + vxi, vxi->vx_id, pid, sig, retval); + return retval; +} + +int vc_ctx_kill(uint32_t id, void __user *data) +{ + int retval; + struct vcmd_ctx_kill_v0 vc_data; + struct vx_info *vxi; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + retval = vx_info_kill(vxi, vc_data.pid, vc_data.sig); + put_vx_info(vxi); + return retval; +} + + +static int __wait_exit(struct vx_info *vxi) +{ + DECLARE_WAITQUEUE(wait, current); + int ret = 0; + + add_wait_queue(&vxi->vx_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + +wait: + if (vx_info_state(vxi, + VXS_SHUTDOWN|VXS_HASHED|VXS_HELPER) == VXS_SHUTDOWN) + goto out; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + goto out; + } + schedule(); + goto wait; + +out: + set_current_state(TASK_RUNNING); + remove_wait_queue(&vxi->vx_wait, &wait); + return ret; +} + + + +int vc_wait_exit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_wait_exit_v0 vc_data; + int ret; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = __wait_exit(vxi); + vc_data.reboot_cmd = vxi->reboot_cmd; + vc_data.exit_code = vxi->exit_code; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/switch.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/switch.c --- linux-2.6.17.13/kernel/vserver/switch.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/switch.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,268 @@ +/* + * linux/kernel/vserver/switch.c + * + * Virtual Server: Syscall Switch + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 syscall switch + * V0.02 added signal to context + * V0.03 added rlimit functions + * V0.04 added iattr, task/xid functions + * V0.05 added debug/history stuff + * V0.06 added compat32 layer + * + */ + +#include +#include +#include +#include + +#include +#include +#include + + +static inline +int vc_get_version(uint32_t id) +{ +#ifdef CONFIG_VSERVER_LEGACY_VERSION + if (id == 63) + return VCI_LEGACY_VERSION; +#endif + return VCI_VERSION; +} + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +#ifdef CONFIG_COMPAT +#define __COMPAT(name, id, data, compat) \ + (compat) ? name ## _x32 (id, data) : name (id, data) +#else +#define __COMPAT(name, id, data, compat) \ + name (id, data) +#endif + + +static inline +long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat) +{ + vxdprintk(VXD_CBIT(switch, 0), + "vc: VCMD_%02d_%d[%d], %d,%p,%d", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), id, data, compat); + +#ifdef CONFIG_VSERVER_LEGACY + if (!capable(CAP_CONTEXT) && + /* dirty hack for capremove */ + !(cmd==VCMD_new_s_context && id==-2)) + return -EPERM; +#else + if (!capable(CAP_CONTEXT)) + return -EPERM; +#endif + + switch (cmd) { + case VCMD_get_version: + return vc_get_version(id); + + case VCMD_dump_history: +#ifdef CONFIG_VSERVER_HISTORY + return vc_dump_history(id); +#else + return -ENOSYS; +#endif + +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_new_s_context: + return vc_new_s_context(id, data); +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + case VCMD_set_ipv4root: + return vc_set_ipv4root(id, data); +#endif + + case VCMD_task_xid: + return vc_task_xid(id, data); + case VCMD_vx_info: + return vc_vx_info(id, data); + + case VCMD_task_nid: + return vc_task_nid(id, data); + case VCMD_nx_info: + return vc_nx_info(id, data); + + case VCMD_set_namespace_v0: + return vc_set_namespace(-1, data); + case VCMD_set_namespace: + return vc_set_namespace(id, data); + } + + /* those are allowed while in setup too */ + if (!vx_check(0, VX_ADMIN|VX_WATCH) && + !vx_flags(VXF_STATE_SETUP,0)) + return -EPERM; + +#ifdef CONFIG_VSERVER_LEGACY + switch (cmd) { + case VCMD_set_cflags: + case VCMD_set_ccaps: + if (vx_check(0, VX_WATCH)) + return 0; + } +#endif + + switch (cmd) { +#ifdef CONFIG_IA32_EMULATION + case VCMD_get_rlimit: + return __COMPAT(vc_get_rlimit, id, data, compat); + case VCMD_set_rlimit: + return __COMPAT(vc_set_rlimit, id, data, compat); +#else + case VCMD_get_rlimit: + return vc_get_rlimit(id, data); + case VCMD_set_rlimit: + return vc_set_rlimit(id, data); +#endif + case VCMD_get_rlimit_mask: + return vc_get_rlimit_mask(id, data); + + case VCMD_get_vhi_name: + return vc_get_vhi_name(id, data); + case VCMD_set_vhi_name: + return vc_set_vhi_name(id, data); + + case VCMD_set_cflags: + return vc_set_cflags(id, data); + case VCMD_get_cflags: + return vc_get_cflags(id, data); + + case VCMD_set_ccaps: + return vc_set_ccaps(id, data); + case VCMD_get_ccaps: + return vc_get_ccaps(id, data); + + case VCMD_set_nflags: + return vc_set_nflags(id, data); + case VCMD_get_nflags: + return vc_get_nflags(id, data); + + case VCMD_set_ncaps: + return vc_set_ncaps(id, data); + case VCMD_get_ncaps: + return vc_get_ncaps(id, data); + + case VCMD_set_sched_v2: + return vc_set_sched_v2(id, data); + /* this is version 3 */ + case VCMD_set_sched: + return vc_set_sched(id, data); + + case VCMD_add_dlimit: + return __COMPAT(vc_add_dlimit, id, data, compat); + case VCMD_rem_dlimit: + return __COMPAT(vc_rem_dlimit, id, data, compat); + case VCMD_set_dlimit: + return __COMPAT(vc_set_dlimit, id, data, compat); + case VCMD_get_dlimit: + return __COMPAT(vc_get_dlimit, id, data, compat); + } + + /* below here only with VX_ADMIN */ + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + switch (cmd) { + case VCMD_ctx_kill: + return vc_ctx_kill(id, data); + + case VCMD_wait_exit: + return vc_wait_exit(id, data); + + case VCMD_create_context: +#ifdef CONFIG_VSERVER_LEGACY + return vc_ctx_create(id, NULL); +#else + return -ENOSYS; +#endif + + case VCMD_get_iattr: + return __COMPAT(vc_get_iattr, id, data, compat); + case VCMD_set_iattr: + return __COMPAT(vc_set_iattr, id, data, compat); + + case VCMD_enter_namespace: + return vc_enter_namespace(id, data); + + case VCMD_ctx_create_v0: +#ifdef CONFIG_VSERVER_LEGACY + if (id == 1) { + current->xid = 1; + return 1; + } +#endif + return vc_ctx_create(id, NULL); + case VCMD_ctx_create: + return vc_ctx_create(id, data); + case VCMD_ctx_migrate_v0: + return vc_ctx_migrate(id, NULL); + case VCMD_ctx_migrate: + return vc_ctx_migrate(id, data); + + case VCMD_net_create_v0: + return vc_net_create(id, NULL); + case VCMD_net_create: + return vc_net_create(id, data); + case VCMD_net_migrate: + return vc_net_migrate(id, data); + case VCMD_net_add: + return vc_net_add(id, data); + case VCMD_net_remove: + return vc_net_remove(id, data); + + } + return -ENOSYS; +} + +extern asmlinkage long +sys_vserver(uint32_t cmd, uint32_t id, void __user *data) +{ + long ret = do_vserver(cmd, id, data, 0); + + vxdprintk(VXD_CBIT(switch, 1), + "vc: VCMD_%02d_%d[%d] = %08lx(%ld)", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), ret, ret); + return ret; +} + +#ifdef CONFIG_COMPAT + +extern asmlinkage long +sys32_vserver(uint32_t cmd, uint32_t id, void __user *data) +{ + long ret = do_vserver(cmd, id, data, 1); + + vxdprintk(VXD_CBIT(switch, 1), + "vc: VCMD_%02d_%d[%d] = %08lx(%ld)", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), ret, ret); + return ret; +} + +#endif /* CONFIG_COMPAT */ diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/sysctl.c linux-2.6.17.13-vs2.0.2.1/kernel/vserver/sysctl.c --- linux-2.6.17.13/kernel/vserver/sysctl.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/sysctl.c 2006-08-17 00:28:21 +0200 @@ -0,0 +1,227 @@ +/* + * kernel/vserver/sysctl.c + * + * Virtual Context Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#define CTL_VSERVER 4242 /* unused? */ + +enum { + CTL_DEBUG_ERROR = 0, + CTL_DEBUG_SWITCH = 1, + CTL_DEBUG_XID, + CTL_DEBUG_NID, + CTL_DEBUG_NET, + CTL_DEBUG_LIMIT, + CTL_DEBUG_CRES, + CTL_DEBUG_DLIM, + CTL_DEBUG_CVIRT, + CTL_DEBUG_MISC, +}; + + +unsigned int vx_debug_switch = 0; +unsigned int vx_debug_xid = 0; +unsigned int vx_debug_nid = 0; +unsigned int vx_debug_net = 0; +unsigned int vx_debug_limit = 0; +unsigned int vx_debug_cres = 0; +unsigned int vx_debug_dlim = 0; +unsigned int vx_debug_cvirt = 0; +unsigned int vx_debug_misc = 0; + + +static struct ctl_table_header *vserver_table_header; +static ctl_table vserver_table[]; + + +void vserver_register_sysctl(void) +{ + if (!vserver_table_header) { + vserver_table_header = register_sysctl_table(vserver_table, 1); + } + +} + +void vserver_unregister_sysctl(void) +{ + if (vserver_table_header) { + unregister_sysctl_table(vserver_table_header); + vserver_table_header = NULL; + } +} + + +static int proc_dodebug(ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char tmpbuf[20], *p, c; + unsigned int value; + size_t left, len; + + if ((*ppos && !write) || !*lenp) { + *lenp = 0; + return 0; + } + + left = *lenp; + + if (write) { + if (!access_ok(VERIFY_READ, buffer, left)) + return -EFAULT; + p = (char *) buffer; + while (left && __get_user(c, p) >= 0 && isspace(c)) + left--, p++; + if (!left) + goto done; + + if (left > sizeof(tmpbuf) - 1) + return -EINVAL; + if (copy_from_user(tmpbuf, p, left)) + return -EFAULT; + tmpbuf[left] = '\0'; + + for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--) + value = 10 * value + (*p - '0'); + if (*p && !isspace(*p)) + return -EINVAL; + while (left && isspace(*p)) + left--, p++; + *(unsigned int *) table->data = value; + } else { + if (!access_ok(VERIFY_WRITE, buffer, left)) + return -EFAULT; + len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data); + if (len > left) + len = left; + if (__copy_to_user(buffer, tmpbuf, len)) + return -EFAULT; + if ((left -= len) > 0) { + if (put_user('\n', (char *)buffer + len)) + return -EFAULT; + left--; + } + } + +done: + *lenp -= left; + *ppos += *lenp; + return 0; +} + + + +static ctl_table debug_table[] = { + { + .ctl_name = CTL_DEBUG_SWITCH, + .procname = "debug_switch", + .data = &vx_debug_switch, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_XID, + .procname = "debug_xid", + .data = &vx_debug_xid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_NID, + .procname = "debug_nid", + .data = &vx_debug_nid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_NET, + .procname = "debug_net", + .data = &vx_debug_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_LIMIT, + .procname = "debug_limit", + .data = &vx_debug_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_CRES, + .procname = "debug_cres", + .data = &vx_debug_cres, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_DLIM, + .procname = "debug_dlim", + .data = &vx_debug_dlim, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_CVIRT, + .procname = "debug_cvirt", + .data = &vx_debug_cvirt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_MISC, + .procname = "debug_misc", + .data = &vx_debug_misc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { .ctl_name = 0 } +}; + +static ctl_table vserver_table[] = { + { + .ctl_name = CTL_VSERVER, + .procname = "vserver", + .mode = 0555, + .child = debug_table + }, + { .ctl_name = 0 } +}; + + +EXPORT_SYMBOL_GPL(vx_debug_switch); +EXPORT_SYMBOL_GPL(vx_debug_xid); +EXPORT_SYMBOL_GPL(vx_debug_nid); +EXPORT_SYMBOL_GPL(vx_debug_net); +EXPORT_SYMBOL_GPL(vx_debug_limit); +EXPORT_SYMBOL_GPL(vx_debug_cres); +EXPORT_SYMBOL_GPL(vx_debug_dlim); +EXPORT_SYMBOL_GPL(vx_debug_cvirt); +EXPORT_SYMBOL_GPL(vx_debug_misc); + diff -NurpP --minimal linux-2.6.17.13/kernel/vserver/vci_config.h linux-2.6.17.13-vs2.0.2.1/kernel/vserver/vci_config.h --- linux-2.6.17.13/kernel/vserver/vci_config.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/kernel/vserver/vci_config.h 2006-08-17 00:28:21 +0200 @@ -0,0 +1,70 @@ + +enum { + VCI_KCBIT_LEGACY = 1, + VCI_KCBIT_LEGACYNET, + VCI_KCBIT_NGNET, + + VCI_KCBIT_PROC_SECURE, + VCI_KCBIT_HARDCPU, + VCI_KCBIT_HARDCPU_IDLE, + + VCI_KCBIT_LEGACY_VERSION = 15, + + VCI_KCBIT_DEBUG = 16, + VCI_KCBIT_HISTORY = 20, + VCI_KCBIT_TAGXID = 24, +}; + + +static inline uint32_t vci_kernel_config(void) +{ + return + /* various legacy options */ +#ifdef CONFIG_VSERVER_LEGACY + (1 << VCI_KCBIT_LEGACY) | +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + (1 << VCI_KCBIT_LEGACYNET) | +#endif +#ifdef CONFIG_VSERVER_LEGACY_VERSION + (1 << VCI_KCBIT_LEGACY_VERSION) | +#endif + + /* configured features */ +#ifdef CONFIG_VSERVER_PROC_SECURE + (1 << VCI_KCBIT_PROC_SECURE) | +#endif +#ifdef CONFIG_VSERVER_HARDCPU + (1 << VCI_KCBIT_HARDCPU) | +#endif +#ifdef CONFIG_VSERVER_HARDCPU_IDLE + (1 << VCI_KCBIT_HARDCPU_IDLE) | +#endif + + /* debug options */ +#ifdef CONFIG_VSERVER_DEBUG + (1 << VCI_KCBIT_DEBUG) | +#endif +#ifdef CONFIG_VSERVER_HISTORY + (1 << VCI_KCBIT_HISTORY) | +#endif + + /* inode xid tagging */ +#if defined(CONFIG_INOXID_NONE) + (0 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_UID16) + (1 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_GID16) + (2 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_UGID24) + (3 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_INTERN) + (4 << VCI_KCBIT_TAGXID) | +#elif defined(CONFIG_INOXID_RUNTIME) + (5 << VCI_KCBIT_TAGXID) | +#else + (7 << VCI_KCBIT_TAGXID) | +#endif + 0; +} + diff -NurpP --minimal linux-2.6.17.13/lxdialog.scrltmp linux-2.6.17.13-vs2.0.2.1/lxdialog.scrltmp --- linux-2.6.17.13/lxdialog.scrltmp 1970-01-01 01:00:00 +0100 +++ linux-2.6.17.13-vs2.0.2.1/lxdialog.scrltmp 2006-08-29 17:50:52 +0200 @@ -0,0 +1 @@ +11 diff -NurpP --minimal linux-2.6.17.13/mm/filemap_xip.c linux-2.6.17.13-vs2.0.2.1/mm/filemap_xip.c --- linux-2.6.17.13/mm/filemap_xip.c 2006-04-09 13:49:58 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/filemap_xip.c 2006-08-17 00:28:21 +0200 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "filemap.h" diff -NurpP --minimal linux-2.6.17.13/mm/fremap.c linux-2.6.17.13-vs2.0.2.1/mm/fremap.c --- linux-2.6.17.13/mm/fremap.c 2006-01-03 17:30:13 +0100 +++ linux-2.6.17.13-vs2.0.2.1/mm/fremap.c 2006-08-17 00:28:21 +0200 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -74,6 +75,8 @@ int install_page(struct mm_struct *mm, s err = -ENOMEM; if (page_mapcount(page) > INT_MAX/2) goto unlock; + if (!vx_rsspages_avail(mm, 1)) + goto unlock; if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) inc_mm_counter(mm, file_rss); diff -NurpP --minimal linux-2.6.17.13/mm/hugetlb.c linux-2.6.17.13-vs2.0.2.1/mm/hugetlb.c --- linux-2.6.17.13/mm/hugetlb.c 2006-06-18 04:55:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/hugetlb.c 2006-08-17 00:28:21 +0200 @@ -19,6 +19,7 @@ #include #include +#include #include "internal.h" const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; diff -NurpP --minimal linux-2.6.17.13/mm/memory.c linux-2.6.17.13-vs2.0.2.1/mm/memory.c --- linux-2.6.17.13/mm/memory.c 2006-06-18 04:55:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/memory.c 2006-08-17 00:28:21 +0200 @@ -1901,6 +1901,10 @@ again: grab_swap_token(); } + if (!vx_rsspages_avail(mm, 1)) { + ret = VM_FAULT_OOM; + goto out; + } mark_page_accessed(page); lock_page(page); if (!PageSwapCache(page)) { @@ -1978,6 +1982,8 @@ static int do_anonymous_page(struct mm_s /* Allocate our own private page. */ pte_unmap(page_table); + if (!vx_rsspages_avail(mm, 1)) + goto oom; if (unlikely(anon_vma_prepare(vma))) goto oom; page = alloc_zeroed_user_highpage(vma, address); @@ -2056,6 +2062,9 @@ static int do_no_page(struct mm_struct * smp_rmb(); /* serializes i_size against truncate_count */ } retry: + /* FIXME: is that check useful here? */ + if (!vx_rsspages_avail(mm, 1)) + return VM_FAULT_OOM; new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); /* * No smp_rmb is needed here as long as there's a full diff -NurpP --minimal linux-2.6.17.13/mm/mempolicy.c linux-2.6.17.13-vs2.0.2.1/mm/mempolicy.c --- linux-2.6.17.13/mm/mempolicy.c 2006-06-18 04:55:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/mempolicy.c 2006-08-17 00:28:21 +0200 @@ -87,6 +87,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/mm/mlock.c linux-2.6.17.13-vs2.0.2.1/mm/mlock.c --- linux-2.6.17.13/mm/mlock.c 2006-04-09 13:49:58 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/mlock.c 2006-08-17 00:28:21 +0200 @@ -10,6 +10,7 @@ #include #include #include +#include static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, @@ -65,7 +66,7 @@ success: ret = make_pages_present(start, end); } - vma->vm_mm->locked_vm -= pages; + vx_vmlocked_sub(vma->vm_mm, pages); out: if (ret == -ENOMEM) ret = -EAGAIN; @@ -123,7 +124,7 @@ static int do_mlock(unsigned long start, asmlinkage long sys_mlock(unsigned long start, size_t len) { - unsigned long locked; + unsigned long locked, grow; unsigned long lock_limit; int error = -ENOMEM; @@ -134,8 +135,10 @@ asmlinkage long sys_mlock(unsigned long len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; - locked = len >> PAGE_SHIFT; - locked += current->mm->locked_vm; + grow = len >> PAGE_SHIFT; + if (!vx_vmlocked_avail(current->mm, grow)) + goto out; + locked = current->mm->locked_vm + grow; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; @@ -143,6 +146,7 @@ asmlinkage long sys_mlock(unsigned long /* check against resource limits */ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = do_mlock(start, len, 1); +out: up_write(¤t->mm->mmap_sem); return error; } @@ -202,6 +206,8 @@ asmlinkage long sys_mlockall(int flags) lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; + if (!vx_vmlocked_avail(current->mm, current->mm->total_vm)) + goto out; if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); diff -NurpP --minimal linux-2.6.17.13/mm/mmap.c linux-2.6.17.13-vs2.0.2.1/mm/mmap.c --- linux-2.6.17.13/mm/mmap.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/mmap.c 2006-08-25 05:44:21 +0200 @@ -1126,10 +1126,10 @@ munmap_back: kmem_cache_free(vm_area_cachep, vma); } out: - mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } if (flags & MAP_POPULATE) { @@ -1489,9 +1489,9 @@ static int acct_stack_growth(struct vm_a return -ENOMEM; /* Ok, everything looks good - let it rip */ - mm->total_vm += grow; + vx_vmpages_add(mm, grow); if (vma->vm_flags & VM_LOCKED) - mm->locked_vm += grow; + vx_vmlocked_add(mm, grow); vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow); return 0; } @@ -1644,9 +1644,9 @@ static void remove_vma_list(struct mm_st do { long nrpages = vma_pages(vma); - mm->total_vm -= nrpages; + vx_vmpages_sub(mm, nrpages); if (vma->vm_flags & VM_LOCKED) - mm->locked_vm -= nrpages; + vx_vmlocked_sub(mm, nrpages); vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); vma = remove_vma(vma); } while (vma); @@ -1882,6 +1882,8 @@ unsigned long do_brk(unsigned long addr, lock_limit >>= PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; + if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT)) + return -ENOMEM; } /* @@ -1908,7 +1910,8 @@ unsigned long do_brk(unsigned long addr, if (mm->map_count > sysctl_max_map_count) return -ENOMEM; - if (security_vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> PAGE_SHIFT) || + !vx_vmpages_avail(mm, len >> PAGE_SHIFT)) return -ENOMEM; /* Can we just expand an old private anonymous mapping? */ @@ -1933,9 +1936,9 @@ unsigned long do_brk(unsigned long addr, vma->vm_page_prot = protection_map[flags & 0x0f]; vma_link(mm, vma, prev, rb_link, rb_parent); out: - mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); if (flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } return addr; @@ -1961,6 +1964,11 @@ void exit_mmap(struct mm_struct *mm) free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); + set_mm_counter(mm, file_rss, 0); + set_mm_counter(mm, anon_rss, 0); + vx_vmpages_sub(mm, mm->total_vm); + vx_vmlocked_sub(mm, mm->locked_vm); + /* * Walk the list again, actually closing and freeing it, * with preemption enabled, without holding any MM locks. @@ -2000,7 +2008,8 @@ int insert_vm_struct(struct mm_struct * if (__vma && __vma->vm_start < vma->vm_end) return -ENOMEM; if ((vma->vm_flags & VM_ACCOUNT) && - security_vm_enough_memory(vma_pages(vma))) + (security_vm_enough_memory(vma_pages(vma)) || + !vx_vmpages_avail(mm, vma_pages(vma)))) return -ENOMEM; vma_link(mm, vma, prev, rb_link, rb_parent); return 0; @@ -2073,5 +2082,7 @@ int may_expand_vm(struct mm_struct *mm, if (cur + npages > lim) return 0; + if (!vx_vmpages_avail(mm, npages)) + return 0; return 1; } diff -NurpP --minimal linux-2.6.17.13/mm/mremap.c linux-2.6.17.13-vs2.0.2.1/mm/mremap.c --- linux-2.6.17.13/mm/mremap.c 2006-04-09 13:49:58 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/mremap.c 2006-08-17 00:28:21 +0200 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -211,7 +212,7 @@ static unsigned long move_vma(struct vm_ * If this were a serious issue, we'd add a flag to do_munmap(). */ hiwater_vm = mm->hiwater_vm; - mm->total_vm += new_len >> PAGE_SHIFT; + vx_vmpages_add(mm, new_len >> PAGE_SHIFT); vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); if (do_munmap(mm, old_addr, old_len) < 0) { @@ -229,7 +230,7 @@ static unsigned long move_vma(struct vm_ } if (vm_flags & VM_LOCKED) { - mm->locked_vm += new_len >> PAGE_SHIFT; + vx_vmlocked_add(mm, new_len >> PAGE_SHIFT); if (new_len > old_len) make_pages_present(new_addr + old_len, new_addr + new_len); @@ -336,6 +337,9 @@ unsigned long do_mremap(unsigned long ad ret = -EAGAIN; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) goto out; + if (!vx_vmlocked_avail(current->mm, + (new_len - old_len) >> PAGE_SHIFT)) + goto out; } if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) { ret = -ENOMEM; @@ -364,10 +368,10 @@ unsigned long do_mremap(unsigned long ad vma_adjust(vma, vma->vm_start, addr + new_len, vma->vm_pgoff, NULL); - mm->total_vm += pages; + vx_vmpages_add(mm, pages); vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages); if (vma->vm_flags & VM_LOCKED) { - mm->locked_vm += pages; + vx_vmlocked_add(mm, pages); make_pages_present(addr + old_len, addr + new_len); } diff -NurpP --minimal linux-2.6.17.13/mm/nommu.c linux-2.6.17.13-vs2.0.2.1/mm/nommu.c --- linux-2.6.17.13/mm/nommu.c 2006-06-18 04:55:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/nommu.c 2006-08-17 00:28:21 +0200 @@ -820,7 +820,7 @@ unsigned long do_mmap_pgoff(struct file realalloc += kobjsize(vma); askedalloc += sizeof(*vma); - current->mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(current->mm, len >> PAGE_SHIFT); add_nommu_vma(vma); @@ -937,7 +937,7 @@ int do_munmap(struct mm_struct *mm, unsi kfree(vml); update_hiwater_vm(mm); - mm->total_vm -= len >> PAGE_SHIFT; + vx_vmpages_sub(mm, len >> PAGE_SHIFT); #ifdef DEBUG show_process_blocks(); @@ -956,7 +956,7 @@ void exit_mmap(struct mm_struct * mm) printk("Exit_mmap:\n"); #endif - mm->total_vm = 0; + vx_vmpages_sub(mm, mm->total_vm); while ((tmp = mm->context.vmlist)) { mm->context.vmlist = tmp->next; diff -NurpP --minimal linux-2.6.17.13/mm/oom_kill.c linux-2.6.17.13-vs2.0.2.1/mm/oom_kill.c --- linux-2.6.17.13/mm/oom_kill.c 2006-06-18 04:55:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/oom_kill.c 2006-08-17 00:28:21 +0200 @@ -66,6 +66,8 @@ unsigned long badness(struct task_struct */ task_unlock(p); + /* FIXME: add vserver badness ;) */ + /* * Processes which fork a lot of child processes are likely * a good choice. We add half the vmsize of the children if they diff -NurpP --minimal linux-2.6.17.13/mm/page_alloc.c linux-2.6.17.13-vs2.0.2.1/mm/page_alloc.c --- linux-2.6.17.13/mm/page_alloc.c 2006-06-18 04:55:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/page_alloc.c 2006-08-17 00:28:21 +0200 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1375,6 +1376,8 @@ void si_meminfo(struct sysinfo *val) val->freehigh = 0; #endif val->mem_unit = PAGE_SIZE; + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_meminfo(val); } EXPORT_SYMBOL(si_meminfo); @@ -1389,6 +1392,8 @@ void si_meminfo_node(struct sysinfo *val val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; val->mem_unit = PAGE_SIZE; + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_meminfo(val); } #endif diff -NurpP --minimal linux-2.6.17.13/mm/rmap.c linux-2.6.17.13-vs2.0.2.1/mm/rmap.c --- linux-2.6.17.13/mm/rmap.c 2006-06-18 04:55:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/rmap.c 2006-08-17 00:28:21 +0200 @@ -53,6 +53,7 @@ #include #include #include +#include #include diff -NurpP --minimal linux-2.6.17.13/mm/shmem.c linux-2.6.17.13-vs2.0.2.1/mm/shmem.c --- linux-2.6.17.13/mm/shmem.c 2006-06-18 04:55:36 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/shmem.c 2006-08-17 00:28:21 +0200 @@ -53,7 +53,6 @@ #include /* This magic number is used in glibc for posix shared memory */ -#define TMPFS_MAGIC 0x01021994 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) @@ -1658,7 +1657,7 @@ static int shmem_statfs(struct super_blo { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - buf->f_type = TMPFS_MAGIC; + buf->f_type = TMPFS_SUPER_MAGIC; buf->f_bsize = PAGE_CACHE_SIZE; buf->f_namelen = NAME_MAX; spin_lock(&sbinfo->stat_lock); @@ -2101,7 +2100,7 @@ static int shmem_fill_super(struct super sb->s_maxbytes = SHMEM_MAX_BYTES; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = TMPFS_MAGIC; + sb->s_magic = TMPFS_SUPER_MAGIC; sb->s_op = &shmem_ops; sb->s_time_gran = 1; diff -NurpP --minimal linux-2.6.17.13/mm/swapfile.c linux-2.6.17.13-vs2.0.2.1/mm/swapfile.c --- linux-2.6.17.13/mm/swapfile.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/swapfile.c 2006-08-25 05:44:21 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include DEFINE_SPINLOCK(swap_lock); unsigned int nr_swapfiles; @@ -1696,6 +1697,8 @@ void si_swapinfo(struct sysinfo *val) val->freeswap = nr_swap_pages + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; spin_unlock(&swap_lock); + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_swapinfo(val); } /* diff -NurpP --minimal linux-2.6.17.13/mm/vmscan.c linux-2.6.17.13-vs2.0.2.1/mm/vmscan.c --- linux-2.6.17.13/mm/vmscan.c 2006-06-18 04:55:37 +0200 +++ linux-2.6.17.13-vs2.0.2.1/mm/vmscan.c 2006-08-17 00:28:21 +0200 @@ -1357,7 +1357,7 @@ static int __init kswapd_init(void) pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL); BUG_ON(pid < 0); read_lock(&tasklist_lock); - pgdat->kswapd = find_task_by_pid(pid); + pgdat->kswapd = find_task_by_real_pid(pid); read_unlock(&tasklist_lock); } total_memory = nr_free_pagecache_pages(); diff -NurpP --minimal linux-2.6.17.13/net/core/dev.c linux-2.6.17.13-vs2.0.2.1/net/core/dev.c --- linux-2.6.17.13/net/core/dev.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/core/dev.c 2006-08-17 00:28:21 +0200 @@ -115,6 +115,7 @@ #include #include #include +#include /* * The list of packet types we will receive (as opposed to discard) @@ -1945,6 +1946,9 @@ static int dev_ifconf(char __user *arg) total = 0; for (dev = dev_base; dev; dev = dev->next) { + if (vx_flags(VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, current->nx_info)) + continue; for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2005,6 +2009,10 @@ void dev_seq_stop(struct seq_file *seq, static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { + struct nx_info *nxi = current->nx_info; + + if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi)) + return; if (dev->get_stats) { struct net_device_stats *stats = dev->get_stats(dev); diff -NurpP --minimal linux-2.6.17.13/net/core/rtnetlink.c linux-2.6.17.13-vs2.0.2.1/net/core/rtnetlink.c --- linux-2.6.17.13/net/core/rtnetlink.c 2006-09-13 18:43:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/core/rtnetlink.c 2006-08-25 05:44:21 +0200 @@ -323,6 +323,9 @@ static int rtnetlink_dump_ifinfo(struct for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; + if (vx_info_flags(skb->sk->sk_vx_info, VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, skb->sk->sk_nx_info)) + continue; if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0, @@ -613,6 +616,9 @@ void rtmsg_ifinfo(int type, struct net_d sizeof(struct rtnl_link_ifmap) + sizeof(struct rtnl_link_stats) + 128); + if (vx_flags(VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, current->nx_info)) + return; skb = alloc_skb(size, GFP_KERNEL); if (!skb) return; diff -NurpP --minimal linux-2.6.17.13/net/core/sock.c linux-2.6.17.13-vs2.0.2.1/net/core/sock.c --- linux-2.6.17.13/net/core/sock.c 2006-06-18 04:55:39 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/core/sock.c 2006-08-17 00:28:21 +0200 @@ -125,6 +125,9 @@ #include #include +#include +#include +#include #ifdef CONFIG_INET #include @@ -768,6 +771,8 @@ struct sock *sk_alloc(int family, gfp_t sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); } + sock_vx_init(sk); + sock_nx_init(sk); if (security_sk_alloc(sk, family, priority)) goto out_free; @@ -806,6 +811,11 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + vx_sock_dec(sk); + clr_vx_info(&sk->sk_vx_info); + sk->sk_xid = -1; + clr_nx_info(&sk->sk_nx_info); + sk->sk_nid = -1; if (sk->sk_prot_creator->slab != NULL) kmem_cache_free(sk->sk_prot_creator->slab, sk); else @@ -823,6 +833,8 @@ struct sock *sk_clone(const struct sock memcpy(newsk, sk, sk->sk_prot->obj_size); /* SANITY */ + sock_vx_init(newsk); + sock_nx_init(newsk); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); @@ -863,6 +875,12 @@ struct sock *sk_clone(const struct sock newsk->sk_priority = 0; atomic_set(&newsk->sk_refcnt, 2); + set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info); + newsk->sk_xid = sk->sk_xid; + vx_sock_inc(newsk); + set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info); + newsk->sk_nid = sk->sk_nid; + /* * Increment the counter in the same struct proto as the master * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that @@ -1427,6 +1445,11 @@ void sock_init_data(struct socket *sock, sk->sk_stamp.tv_sec = -1L; sk->sk_stamp.tv_usec = -1L; + set_vx_info(&sk->sk_vx_info, current->vx_info); + sk->sk_xid = vx_current_xid(); + vx_sock_inc(sk); + set_nx_info(&sk->sk_nx_info, current->nx_info); + sk->sk_nid = nx_current_nid(); atomic_set(&sk->sk_refcnt, 1); } diff -NurpP --minimal linux-2.6.17.13/net/ipv4/af_inet.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/af_inet.c --- linux-2.6.17.13/net/ipv4/af_inet.c 2006-06-18 04:55:43 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/af_inet.c 2006-08-17 00:28:21 +0200 @@ -114,6 +114,7 @@ #ifdef CONFIG_IP_MROUTE #include #endif +#include DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly; @@ -282,9 +283,11 @@ lookup_protocol: } err = -EPERM; + if ((protocol == IPPROTO_ICMP) && vx_ccaps(VXC_RAW_ICMP)) + goto override; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; - +override: sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; @@ -401,6 +404,10 @@ int inet_bind(struct socket *sock, struc unsigned short snum; int chk_addr_ret; int err; + __u32 s_addr; /* Address used for validation */ + __u32 s_addr1; /* Address used for socket */ + __u32 s_addr2; /* Broadcast address for the socket */ + struct nx_info *nxi = sk->sk_nx_info; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { @@ -411,7 +418,40 @@ int inet_bind(struct socket *sock, struc if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + s_addr = addr->sin_addr.s_addr; + s_addr1 = s_addr; + s_addr2 = 0xffffffffl; + + vxdprintk(VXD_CBIT(net, 3), + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d", + sk, sk->sk_nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0), + VXD_QUAD(s_addr)); + if (nxi) { + __u32 v4_bcast = nxi->v4_bcast; + __u32 ipv4root = nxi->ipv4[0]; + int nbipv4 = nxi->nbipv4; + + if (s_addr == 0) { + /* bind to any for 1-n */ + s_addr = ipv4root; + s_addr1 = (nbipv4 > 1) ? 0 : s_addr; + s_addr2 = v4_bcast; + } else if (s_addr == IPI_LOOPBACK) { + /* rewrite localhost to ipv4root */ + s_addr = ipv4root; + s_addr1 = ipv4root; + } else if (s_addr != v4_bcast) { + /* normal address bind */ + if (!addr_in_nx_info(nxi, s_addr)) + return -EADDRNOTAVAIL; + } + } + chk_addr_ret = inet_addr_type(s_addr); + + vxdprintk(VXD_CBIT(net, 3), + "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d", + sk, VXD_QUAD(s_addr), VXD_QUAD(s_addr1), VXD_QUAD(s_addr2)); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -423,7 +463,7 @@ int inet_bind(struct socket *sock, struc err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !inet->freebind && - addr->sin_addr.s_addr != INADDR_ANY && + s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -448,7 +488,8 @@ int inet_bind(struct socket *sock, struc if (sk->sk_state != TCP_CLOSE || inet->num) goto out_release_sock; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->rcv_saddr = inet->saddr = s_addr1; + inet->rcv_saddr2 = s_addr2; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->saddr = 0; /* Use device */ diff -NurpP --minimal linux-2.6.17.13/net/ipv4/devinet.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/devinet.c --- linux-2.6.17.13/net/ipv4/devinet.c 2006-06-18 04:55:43 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/devinet.c 2006-08-17 00:28:21 +0200 @@ -609,6 +609,9 @@ int devinet_ioctl(unsigned int cmd, void *colon = ':'; if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { + struct nx_info *nxi = current->nx_info; + int hide_netif = vx_flags(VXF_HIDE_NETIF, 0); + if (tryaddrmatch) { /* Matthias Andree */ /* compare label and address (4.4BSD style) */ @@ -617,6 +620,8 @@ int devinet_ioctl(unsigned int cmd, void This is checked above. */ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { + if (hide_netif && !ifa_in_nx_info(ifa, nxi)) + continue; if (!strcmp(ifr.ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == ifa->ifa_address) { @@ -629,9 +634,12 @@ int devinet_ioctl(unsigned int cmd, void comparing just the label */ if (!ifa) { for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; - ifap = &ifa->ifa_next) + ifap = &ifa->ifa_next) { + if (hide_netif && !ifa_in_nx_info(ifa, nxi)) + continue; if (!strcmp(ifr.ifr_name, ifa->ifa_label)) break; + } } } @@ -782,6 +790,9 @@ static int inet_gifconf(struct net_devic goto out; for (; ifa; ifa = ifa->ifa_next) { + if (vx_flags(VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, current->nx_info)) + continue; if (!buf) { done += sizeof(ifr); continue; @@ -1093,6 +1104,7 @@ static int inet_dump_ifaddr(struct sk_bu struct net_device *dev; struct in_device *in_dev; struct in_ifaddr *ifa; + struct sock *sk = skb->sk; int s_ip_idx, s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; @@ -1110,6 +1122,9 @@ static int inet_dump_ifaddr(struct sk_bu for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { + if (sk && vx_info_flags(sk->sk_vx_info, VXF_HIDE_NETIF, 0) && + !ifa_in_nx_info(ifa, sk->sk_nx_info)) + continue; if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, diff -NurpP --minimal linux-2.6.17.13/net/ipv4/fib_hash.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/fib_hash.c --- linux-2.6.17.13/net/ipv4/fib_hash.c 2006-04-09 13:49:59 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/fib_hash.c 2006-08-17 00:28:21 +0200 @@ -989,6 +989,8 @@ static unsigned fib_flag_trans(int type, return flags; } +extern int dev_in_nx_info(struct net_device *, struct nx_info *); + /* * This outputs /proc/net/route. * @@ -1019,7 +1021,8 @@ static int fib_seq_show(struct seq_file prefix = f->fn_key; mask = FZ_MASK(iter->zone); flags = fib_flag_trans(fa->fa_type, mask, fi); - if (fi) + if (fi && (!vx_flags(VXF_HIDE_NETIF, 0) || + dev_in_nx_info(fi->fib_dev, current->nx_info))) snprintf(bf, sizeof(bf), "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", fi->fib_dev ? fi->fib_dev->name : "*", prefix, diff -NurpP --minimal linux-2.6.17.13/net/ipv4/inet_connection_sock.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/inet_connection_sock.c --- linux-2.6.17.13/net/ipv4/inet_connection_sock.c 2006-06-18 04:55:43 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/inet_connection_sock.c 2006-08-17 00:28:21 +0200 @@ -40,7 +40,6 @@ int sysctl_local_port_range[2] = { 1024, int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { - const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -53,9 +52,8 @@ int inet_csk_bind_conflict(const struct sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) + if (nx_addr_conflict(sk->sk_nx_info, + inet_rcv_saddr(sk), sk2)) break; } } diff -NurpP --minimal linux-2.6.17.13/net/ipv4/inet_diag.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/inet_diag.c --- linux-2.6.17.13/net/ipv4/inet_diag.c 2006-04-09 13:49:59 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/inet_diag.c 2006-08-17 00:28:21 +0200 @@ -694,6 +694,8 @@ static int inet_diag_dump(struct sk_buff sk_for_each(sk, node, &hashinfo->listening_hash[i]) { struct inet_sock *inet = inet_sk(sk); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (num < s_num) { num++; continue; @@ -754,6 +756,8 @@ skip_listen_ht: sk_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (num < s_num) goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) @@ -778,6 +782,8 @@ next_normal: inet_twsk_for_each(tw, node, &hashinfo->ehash[i + hashinfo->ehash_size].chain) { + if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) + continue; if (num < s_num) goto next_dying; if (r->id.idiag_sport != tw->tw_sport && diff -NurpP --minimal linux-2.6.17.13/net/ipv4/inet_hashtables.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/inet_hashtables.c --- linux-2.6.17.13/net/ipv4/inet_hashtables.c 2006-06-18 04:55:43 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/inet_hashtables.c 2006-08-17 00:28:21 +0200 @@ -139,11 +139,10 @@ struct sock *__inet_lookup_listener(cons const __u32 rcv_saddr = inet->rcv_saddr; int score = sk->sk_family == PF_INET ? 1 : 0; - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; + if (inet_addr_match(sk->sk_nx_info, daddr, rcv_saddr)) score += 2; - } + else + continue; if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) continue; diff -NurpP --minimal linux-2.6.17.13/net/ipv4/raw.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/raw.c --- linux-2.6.17.13/net/ipv4/raw.c 2006-06-18 04:55:45 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/raw.c 2006-08-17 00:28:21 +0200 @@ -102,6 +102,27 @@ static void raw_v4_unhash(struct sock *s write_unlock_bh(&raw_v4_lock); } + +/* + * Check if a given address matches for a socket + * + * nxi: the socket's nx_info if any + * addr: to be verified address + * saddr/baddr: socket addresses + */ +static inline int raw_addr_match ( + struct nx_info *nxi, + uint32_t addr, + uint32_t saddr, + uint32_t baddr) +{ + if (addr && (saddr == addr || baddr == addr)) + return 1; + if (!saddr) + return addr_in_nx_info(nxi, addr); + return 0; +} + struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif) @@ -113,7 +134,8 @@ struct sock *__raw_v4_lookup(struct sock if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + raw_addr_match(sk->sk_nx_info, laddr, + inet->rcv_saddr, inet->rcv_saddr2) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -313,6 +335,11 @@ static int raw_send_hdrinc(struct sock * iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } + err = -EPERM; + if (!vx_check(0, VX_ADMIN) && !capable(CAP_NET_RAW) + && (!addr_in_nx_info(sk->sk_nx_info, iph->saddr))) + goto error_free; + err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) @@ -324,6 +351,7 @@ out: error_fault: err = -EFAULT; +error_free: kfree_skb(skb); error: IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); @@ -484,6 +512,12 @@ static int raw_sendmsg(struct kiocb *ioc if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); + if (sk->sk_nx_info) { + err = ip_find_src(sk->sk_nx_info, &rt, &fl); + + if (err) + goto done; + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) @@ -787,7 +821,8 @@ static struct sock *raw_get_first(struct struct hlist_node *node; sk_for_each(sk, node, &raw_v4_htable[state->bucket]) - if (sk->sk_family == PF_INET) + if (sk->sk_family == PF_INET && + vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) goto found; } sk = NULL; @@ -803,7 +838,8 @@ static struct sock *raw_get_next(struct sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != PF_INET); + } while (sk && (sk->sk_family != PF_INET || + !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))); if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { sk = sk_head(&raw_v4_htable[state->bucket]); diff -NurpP --minimal linux-2.6.17.13/net/ipv4/tcp.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/tcp.c --- linux-2.6.17.13/net/ipv4/tcp.c 2006-06-18 04:55:45 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/tcp.c 2006-08-17 00:28:21 +0200 @@ -258,6 +258,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/net/ipv4/tcp_ipv4.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/tcp_ipv4.c --- linux-2.6.17.13/net/ipv4/tcp_ipv4.c 2006-06-18 04:55:45 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/tcp_ipv4.c 2006-08-17 00:28:21 +0200 @@ -77,6 +77,7 @@ #include #include #include +#include int sysctl_tcp_tw_reuse; int sysctl_tcp_low_latency; @@ -1355,6 +1356,12 @@ static void *listening_get_next(struct s req = req->dl_next; while (1) { while (req) { + vxdprintk(VXD_CBIT(net, 6), + "sk,req: %p [#%d] (from %d)", req->sk, + (req->sk)?req->sk->sk_xid:0, vx_current_xid()); + if (req->sk && + !vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (req->rsk_ops->family == st->family) { cur = req; goto out; @@ -1379,6 +1386,10 @@ get_req: } get_sk: sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)", + sk, sk->sk_xid, vx_current_xid()); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (sk->sk_family == st->family) { cur = sk; goto out; @@ -1430,18 +1441,26 @@ static void *established_get_first(struc read_lock(&tcp_hashinfo.ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { - if (sk->sk_family != st->family) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egf: %p [#%d] (from %d)", + sk, sk->sk_xid, vx_current_xid()); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; + if (sk->sk_family != st->family) continue; - } rc = sk; goto out; } st->state = TCP_SEQ_STATE_TIME_WAIT; inet_twsk_for_each(tw, node, &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { - if (tw->tw_family != st->family) { + vxdprintk(VXD_CBIT(net, 6), + "tw: %p [#%d] (from %d)", + tw, tw->tw_xid, vx_current_xid()); + if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) + continue; + if (tw->tw_family != st->family) continue; - } rc = tw; goto out; } @@ -1465,7 +1484,8 @@ static void *established_get_next(struct tw = cur; tw = tw_next(tw); get_tw: - while (tw && tw->tw_family != st->family) { + while (tw && (tw->tw_family != st->family || + !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))) { tw = tw_next(tw); } if (tw) { @@ -1489,6 +1509,11 @@ get_tw: sk = sk_next(sk); sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egn: %p [#%d] (from %d)", + sk, sk->sk_xid, vx_current_xid()); + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (sk->sk_family == st->family) goto found; } diff -NurpP --minimal linux-2.6.17.13/net/ipv4/tcp_minisocks.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/tcp_minisocks.c --- linux-2.6.17.13/net/ipv4/tcp_minisocks.c 2006-04-09 13:49:59 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/tcp_minisocks.c 2006-08-17 00:28:21 +0200 @@ -29,6 +29,10 @@ #include #include +#include +#include +#include + #ifdef CONFIG_SYSCTL #define SYNC_INIT 0 /* let the user enable it */ #else @@ -295,6 +299,11 @@ void tcp_time_wait(struct sock *sk, int tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + tw->tw_xid = sk->sk_xid; + tw->tw_vx_info = NULL; + tw->tw_nid = sk->sk_nid; + tw->tw_nx_info = NULL; + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); diff -NurpP --minimal linux-2.6.17.13/net/ipv4/udp.c linux-2.6.17.13-vs2.0.2.1/net/ipv4/udp.c --- linux-2.6.17.13/net/ipv4/udp.c 2006-06-18 04:55:45 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv4/udp.c 2006-08-17 00:28:21 +0200 @@ -176,14 +176,12 @@ gotit: struct inet_sock *inet2 = inet_sk(sk2); if (inet2->num == snum && - sk2 != sk && - !ipv6_only_sock(sk2) && + sk2 != sk && !ipv6_only_sock(sk2) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && + nx_addr_conflict(sk->sk_nx_info, + inet_rcv_saddr(sk), sk2) && (!sk2->sk_reuse || !sk->sk_reuse)) goto fail; } @@ -238,6 +236,11 @@ static struct sock *udp_v4_lookup_longwa if (inet->rcv_saddr != daddr) continue; score+=2; + } else if (sk->sk_nx_info) { + if (addr_in_nx_info(sk->sk_nx_info, daddr)) + score+=2; + else + continue; } if (inet->daddr) { if (inet->daddr != saddr) @@ -294,7 +297,8 @@ static inline struct sock *udp_v4_mcast_ if (inet->num != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr && + inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr) || ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; @@ -604,6 +608,19 @@ int udp_sendmsg(struct kiocb *iocb, stru .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + struct nx_info *nxi = sk->sk_nx_info; + + if (nxi) { + err = ip_find_src(nxi, &rt, &fl); + if (err) + goto out; + if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + daddr = fl.fl4_dst = nxi->ipv4[0]; +#ifdef CONFIG_VSERVER_REMAP_SADDR + if (saddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + saddr = fl.fl4_src = nxi->ipv4[0]; +#endif + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; @@ -1403,8 +1420,10 @@ static struct sock *udp_get_first(struct for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; + sk_for_each(sk, node, &udp_hash[state->bucket]) { - if (sk->sk_family == state->family) + if (sk->sk_family == state->family && + vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) goto found; } } @@ -1421,7 +1440,8 @@ static struct sock *udp_get_next(struct sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != state->family); + } while (sk && (sk->sk_family != state->family || + !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { sk = sk_head(&udp_hash[state->bucket]); diff -NurpP --minimal linux-2.6.17.13/net/ipv6/addrconf.c linux-2.6.17.13-vs2.0.2.1/net/ipv6/addrconf.c --- linux-2.6.17.13/net/ipv6/addrconf.c 2006-09-13 18:43:51 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/ipv6/addrconf.c 2006-08-17 00:28:21 +0200 @@ -2654,7 +2654,10 @@ static void if6_seq_stop(struct seq_file static int if6_seq_show(struct seq_file *seq, void *v) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; - seq_printf(seq, + + /* no ipv6 inside a vserver for now */ + if (vx_check(0, VX_ADMIN|VX_WATCH)) + seq_printf(seq, NIP6_SEQFMT " %02x %02x %02x %02x %8s\n", NIP6(ifp->addr), ifp->idev->dev->ifindex, @@ -3014,6 +3017,10 @@ static int inet6_dump_addr(struct sk_buf struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; read_lock(&dev_base_lock); @@ -3239,6 +3246,10 @@ static int inet6_dump_ifinfo(struct sk_b struct net_device *dev; struct inet6_dev *idev; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + read_lock(&dev_base_lock); for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) diff -NurpP --minimal linux-2.6.17.13/net/netlink/af_netlink.c linux-2.6.17.13-vs2.0.2.1/net/netlink/af_netlink.c --- linux-2.6.17.13/net/netlink/af_netlink.c 2006-06-18 04:55:50 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/netlink/af_netlink.c 2006-08-17 00:28:21 +0200 @@ -57,6 +57,9 @@ #include #include #include +#include +#include +#include #include #include diff -NurpP --minimal linux-2.6.17.13/net/socket.c linux-2.6.17.13-vs2.0.2.1/net/socket.c --- linux-2.6.17.13/net/socket.c 2006-06-18 04:55:52 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/socket.c 2006-08-17 00:28:21 +0200 @@ -94,6 +94,7 @@ #include #include +#include static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, @@ -582,7 +583,7 @@ static inline int __sock_sendmsg(struct struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); - int err; + int err, len; si->sock = sock; si->scm = NULL; @@ -593,7 +594,21 @@ static inline int __sock_sendmsg(struct if (err) return err; - return sock->ops->sendmsg(iocb, sock, msg, size); + len = sock->ops->sendmsg(iocb, sock, msg, size); + if (sock->sk) { + if (len == size) + vx_sock_send(sock->sk, size); + else + vx_sock_fail(sock->sk, size); + } + vxdprintk(VXD_CBIT(net, 7), + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + (unsigned int)size, len); + return len; } int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -631,7 +646,7 @@ int kernel_sendmsg(struct socket *sock, static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - int err; + int err, len; struct sock_iocb *si = kiocb_to_siocb(iocb); si->sock = sock; @@ -644,7 +659,17 @@ static inline int __sock_recvmsg(struct if (err) return err; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + len = sock->ops->recvmsg(iocb, sock, msg, size, flags); + if ((len >= 0) && sock->sk) + vx_sock_recv(sock->sk, len); + vxdprintk(VXD_CBIT(net, 7), + "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + (unsigned int)size, len); + return len; } int sock_recvmsg(struct socket *sock, struct msghdr *msg, @@ -1134,6 +1159,10 @@ static int __sock_create(int family, int if (type < 0 || type >= SOCK_MAX) return -EINVAL; + /* disable IPv6 inside vservers for now */ + if (family == PF_INET6 && !vx_check(0, VX_ADMIN)) + return -EAFNOSUPPORT; + /* Compatibility. This uglymoron is moved from INET layer to here to avoid @@ -1244,6 +1273,7 @@ asmlinkage long sys_socket(int family, i if (retval < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock->flags); retval = sock_map_fd(sock); if (retval < 0) goto out_release; @@ -1274,10 +1304,12 @@ asmlinkage long sys_socketpair(int famil err = sock_create(family, type, protocol, &sock1); if (err < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock1->flags); err = sock_create(family, type, protocol, &sock2); if (err < 0) goto out_release_1; + set_bit(SOCK_USER_SOCKET, &sock2->flags); err = sock1->ops->socketpair(sock1, sock2); if (err < 0) diff -NurpP --minimal linux-2.6.17.13/net/sunrpc/auth.c linux-2.6.17.13-vs2.0.2.1/net/sunrpc/auth.c --- linux-2.6.17.13/net/sunrpc/auth.c 2006-06-18 04:55:52 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/sunrpc/auth.c 2006-08-17 00:28:21 +0200 @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -263,6 +264,7 @@ rpcauth_lookupcred(struct rpc_auth *auth struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, + .xid = vx_current_xid(), .group_info = current->group_info, }; struct rpc_cred *ret; @@ -282,6 +284,7 @@ rpcauth_bindcred(struct rpc_task *task) struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, + .xid = vx_current_xid(), .group_info = current->group_info, }; struct rpc_cred *ret; diff -NurpP --minimal linux-2.6.17.13/net/sunrpc/auth_unix.c linux-2.6.17.13-vs2.0.2.1/net/sunrpc/auth_unix.c --- linux-2.6.17.13/net/sunrpc/auth_unix.c 2006-02-18 14:40:43 +0100 +++ linux-2.6.17.13-vs2.0.2.1/net/sunrpc/auth_unix.c 2006-08-17 00:28:21 +0200 @@ -11,12 +11,14 @@ #include #include #include +#include #define NFS_NGROUPS 16 struct unx_cred { struct rpc_cred uc_base; gid_t uc_gid; + xid_t uc_xid; gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid @@ -78,6 +80,7 @@ unx_create_cred(struct rpc_auth *auth, s if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { cred->uc_uid = 0; cred->uc_gid = 0; + cred->uc_xid = vx_current_xid(); cred->uc_gids[0] = NOGROUP; } else { int groups = acred->group_info->ngroups; @@ -86,6 +89,7 @@ unx_create_cred(struct rpc_auth *auth, s cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; + cred->uc_xid = acred->xid; for (i = 0; i < groups; i++) cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) @@ -117,7 +121,8 @@ unx_match(struct auth_cred *acred, struc int groups; if (cred->uc_uid != acred->uid - || cred->uc_gid != acred->gid) + || cred->uc_gid != acred->gid + || cred->uc_xid != acred->xid) return 0; groups = acred->group_info->ngroups; @@ -143,7 +148,7 @@ unx_marshal(struct rpc_task *task, u32 * struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; u32 *base, *hold; - int i; + int i, tagxid; *p++ = htonl(RPC_AUTH_UNIX); base = p++; @@ -153,9 +158,12 @@ unx_marshal(struct rpc_task *task, u32 * * Copy the UTS nodename captured when the client was created. */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); + tagxid = task->tk_client->cl_tagxid; - *p++ = htonl((u32) cred->uc_uid); - *p++ = htonl((u32) cred->uc_gid); + *p++ = htonl((u32) XIDINO_UID(tagxid, + cred->uc_uid, cred->uc_xid)); + *p++ = htonl((u32) XIDINO_GID(tagxid, + cred->uc_gid, cred->uc_xid)); hold = p++; for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) *p++ = htonl((u32) cred->uc_gids[i]); diff -NurpP --minimal linux-2.6.17.13/net/unix/af_unix.c linux-2.6.17.13-vs2.0.2.1/net/unix/af_unix.c --- linux-2.6.17.13/net/unix/af_unix.c 2006-06-18 04:55:56 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/unix/af_unix.c 2006-08-17 00:28:21 +0200 @@ -117,6 +117,9 @@ #include #include #include +#include +#include +#include int sysctl_unix_max_dgram_qlen = 10; @@ -235,6 +238,8 @@ static struct sock *__unix_find_socket_b sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); + if (!vx_check(s->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) goto found; @@ -781,7 +786,7 @@ static int unix_bind(struct socket *sock */ mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current->fs->umask); - err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); + err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0, NULL); if (err) goto out_mknod_dput; mutex_unlock(&nd.dentry->d_inode->i_mutex); diff -NurpP --minimal linux-2.6.17.13/net/x25/af_x25.c linux-2.6.17.13-vs2.0.2.1/net/x25/af_x25.c --- linux-2.6.17.13/net/x25/af_x25.c 2006-06-18 04:55:56 +0200 +++ linux-2.6.17.13-vs2.0.2.1/net/x25/af_x25.c 2006-08-17 00:28:21 +0200 @@ -502,7 +502,10 @@ static int x25_create(struct socket *soc x25 = x25_sk(sk); - sock_init_data(sock, sk); + sk->sk_socket = sock; + sk->sk_type = sock->type; + sk->sk_sleep = &sock->wait; + sock->sk = sk; x25_init_timers(sk); diff -NurpP --minimal linux-2.6.17.13/security/commoncap.c linux-2.6.17.13-vs2.0.2.1/security/commoncap.c --- linux-2.6.17.13/security/commoncap.c 2006-06-18 04:55:57 +0200 +++ linux-2.6.17.13-vs2.0.2.1/security/commoncap.c 2006-08-17 00:28:21 +0200 @@ -143,7 +143,7 @@ void cap_bprm_apply_creds (struct linux_ /* Derived from fs/exec.c:compute_creds. */ kernel_cap_t new_permitted, working; - new_permitted = cap_intersect (bprm->cap_permitted, cap_bset); + new_permitted = cap_intersect (bprm->cap_permitted, vx_current_bcaps()); working = cap_intersect (bprm->cap_inheritable, current->cap_inheritable); new_permitted = cap_combine (new_permitted, working); @@ -312,7 +312,8 @@ void cap_task_reparent_to_init (struct t int cap_syslog (int type) { - if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) + if ((type != 3 && type != 10) && + !vx_capable(CAP_SYS_ADMIN, VXC_SYSLOG)) return -EPERM; return 0; }