summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Heim <phreak@gentoo.org>2006-01-31 13:58:45 +0000
committerChristian Heim <phreak@gentoo.org>2006-01-31 13:58:45 +0000
commit1802889f8e8799c21814c1f67a56e30606bf4e12 (patch)
treef82300abe2caabb3787edd12905ded731375ac25 /openvz-sources
parentAdding latest stable patchset release for openvz (diff)
downloadmisc-1802889f8e8799c21814c1f67a56e30606bf4e12.tar.gz
misc-1802889f8e8799c21814c1f67a56e30606bf4e12.tar.bz2
misc-1802889f8e8799c21814c1f67a56e30606bf4e12.zip
Adding latest development patchset release for openvz
svn path=/; revision=222
Diffstat (limited to 'openvz-sources')
-rw-r--r--openvz-sources/025.014/0001_linux-2.6.0-nonintconfig.patch99
-rw-r--r--openvz-sources/025.014/0100_patch-025stab014-core.patch48347
2 files changed, 48446 insertions, 0 deletions
diff --git a/openvz-sources/025.014/0001_linux-2.6.0-nonintconfig.patch b/openvz-sources/025.014/0001_linux-2.6.0-nonintconfig.patch
new file mode 100644
index 0000000..a7fe97d
--- /dev/null
+++ b/openvz-sources/025.014/0001_linux-2.6.0-nonintconfig.patch
@@ -0,0 +1,99 @@
+--- ./scripts/kconfig/Makefile.nonint 2006-01-03 06:21:10.000000000 +0300
++++ ./scripts/kconfig/Makefile 2006-01-16 16:59:19.000000000 +0300
+@@ -42,6 +42,10 @@ update-po-config: $(obj)/kxgettext
+ $(Q)rm -f arch/um/Kconfig_arch
+ $(Q)rm -f scripts/kconfig/linux_*.pot scripts/kconfig/config.pot
+
++nonint_oldconfig: scripts/kconfig/conf
++ ./scripts/kconfig/conf -b arch/$(ARCH)/Kconfig
++
++
+ .PHONY: randconfig allyesconfig allnoconfig allmodconfig defconfig
+
+ randconfig: $(obj)/conf
+--- ./scripts/kconfig/conf.c.nonint 2006-01-03 06:21:10.000000000 +0300
++++ ./scripts/kconfig/conf.c 2006-01-16 16:10:30.000000000 +0300
+@@ -20,6 +20,7 @@ enum {
+ ask_all,
+ ask_new,
+ ask_silent,
++ dont_ask,
+ set_default,
+ set_yes,
+ set_mod,
+@@ -36,6 +37,8 @@ static struct menu *rootEntry;
+
+ static char nohelp_text[] = N_("Sorry, no help available for this option yet.\n");
+
++static int return_value = 0;
++
+ static void strip(char *str)
+ {
+ char *p = str;
+@@ -102,6 +105,12 @@ static void conf_askvalue(struct symbol
+ fflush(stdout);
+ fgets(line, 128, stdin);
+ return;
++ case dont_ask:
++ if (!sym_has_value(sym)) {
++ fprintf(stderr,"CONFIG_%s\n",sym->name);
++ return_value++;
++ }
++ return;
+ case set_default:
+ printf("%s\n", def);
+ return;
+@@ -346,6 +355,10 @@ static int conf_choice(struct menu *menu
+ printf("?");
+ printf("]: ");
+ switch (input_mode) {
++ case dont_ask:
++ cnt = def;
++ printf("%d\n", cnt);
++ break;
+ case ask_new:
+ case ask_silent:
+ if (!is_new) {
+@@ -482,7 +495,10 @@ static void check_conf(struct menu *menu
+ if (!conf_cnt++)
+ printf(_("*\n* Restart config...\n*\n"));
+ rootEntry = menu_get_parent_menu(menu);
+- conf(rootEntry);
++ if (input_mode == dont_ask)
++ fprintf(stderr,"CONFIG_%s\n",sym->name);
++ else
++ conf(rootEntry);
+ }
+ }
+
+@@ -501,6 +517,9 @@ int main(int ac, char **av)
+ case 'o':
+ input_mode = ask_new;
+ break;
++ case 'b':
++ input_mode = dont_ask;
++ break;
+ case 's':
+ input_mode = ask_silent;
+ valid_stdin = isatty(0) && isatty(1) && isatty(2);
+@@ -565,6 +584,7 @@ int main(int ac, char **av)
+ }
+ case ask_all:
+ case ask_new:
++ case dont_ask:
+ conf_read(NULL);
+ break;
+ case set_no:
+@@ -603,10 +623,10 @@ int main(int ac, char **av)
+ do {
+ conf_cnt = 0;
+ check_conf(&rootmenu);
+- } while (conf_cnt);
++ } while ((conf_cnt) && (input_mode != dont_ask));
+ if (conf_write(NULL)) {
+ fprintf(stderr, _("\n*** Error during writing of the kernel configuration.\n\n"));
+ return 1;
+ }
+- return 0;
++ return return_value;
+ }
diff --git a/openvz-sources/025.014/0100_patch-025stab014-core.patch b/openvz-sources/025.014/0100_patch-025stab014-core.patch
new file mode 100644
index 0000000..78bf588
--- /dev/null
+++ b/openvz-sources/025.014/0100_patch-025stab014-core.patch
@@ -0,0 +1,48347 @@
+diff -uprN linux-2.6.15.orig/Makefile linux-2.6.15-ve025stab014/Makefile
+--- linux-2.6.15.orig/Makefile 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/Makefile 2006-01-27 14:48:09.000000000 +0300
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 15
+-EXTRAVERSION =
++EXTRAVERSION = -025stab014
+ NAME=Sliding Snow Leopard
+
+ # *DOCUMENTATION*
+diff -uprN linux-2.6.15.orig/arch/i386/Kconfig linux-2.6.15-ve025stab014/arch/i386/Kconfig
+--- linux-2.6.15.orig/arch/i386/Kconfig 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/Kconfig 2006-01-27 14:48:07.000000000 +0300
+@@ -1014,12 +1014,18 @@ endmenu
+
+ source "arch/i386/Kconfig.debug"
+
++menu "OpenVZ"
++source "kernel/Kconfig.openvz"
++endmenu
++
+ source "security/Kconfig"
+
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
+
++source "kernel/ub/Kconfig"
++
+ #
+ # Use the generic interrupt handling code in kernel/irq/:
+ #
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/apic.c linux-2.6.15-ve025stab014/arch/i386/kernel/apic.c
+--- linux-2.6.15.orig/arch/i386/kernel/apic.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/apic.c 2006-01-27 14:48:07.000000000 +0300
+@@ -1185,6 +1185,7 @@ inline void smp_local_timer_interrupt(st
+ fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
+ {
+ int cpu = smp_processor_id();
++ struct ve_struct *ve;
+
+ /*
+ * the NMI deadlock-detector uses this.
+@@ -1201,9 +1202,11 @@ fastcall void smp_apic_timer_interrupt(s
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
+ */
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+ smp_local_timer_interrupt(regs);
+ irq_exit();
++ (void)set_exec_env(ve);
+ }
+
+ /*
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/cpu/mtrr/if.c linux-2.6.15-ve025stab014/arch/i386/kernel/cpu/mtrr/if.c
+--- linux-2.6.15.orig/arch/i386/kernel/cpu/mtrr/if.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/cpu/mtrr/if.c 2006-01-27 14:48:07.000000000 +0300
+@@ -391,7 +391,7 @@ static int __init mtrr_if_init(void)
+ return -ENODEV;
+
+ proc_root_mtrr =
+- create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
++ create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL);
+ if (proc_root_mtrr) {
+ proc_root_mtrr->owner = THIS_MODULE;
+ proc_root_mtrr->proc_fops = &mtrr_fops;
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/irq.c linux-2.6.15-ve025stab014/arch/i386/kernel/irq.c
+--- linux-2.6.15.orig/arch/i386/kernel/irq.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/irq.c 2006-01-27 14:48:07.000000000 +0300
+@@ -59,7 +59,9 @@ fastcall unsigned int do_IRQ(struct pt_r
+ union irq_ctx *curctx, *irqctx;
+ u32 *isp;
+ #endif
++ struct ve_struct *ve;
+
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+@@ -108,6 +110,7 @@ fastcall unsigned int do_IRQ(struct pt_r
+ __do_IRQ(irq, regs);
+
+ irq_exit();
++ (void)set_exec_env(ve);
+
+ return 1;
+ }
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/ldt.c linux-2.6.15-ve025stab014/arch/i386/kernel/ldt.c
+--- linux-2.6.15.orig/arch/i386/kernel/ldt.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/ldt.c 2006-01-27 14:48:05.000000000 +0300
+@@ -20,6 +20,8 @@
+ #include <asm/desc.h>
+ #include <asm/mmu_context.h>
+
++#include <ub/ub_mem.h>
++
+ #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+ static void flush_ldt(void *null)
+ {
+@@ -39,9 +41,9 @@ static int alloc_ldt(mm_context_t *pc, i
+ oldsize = pc->size;
+ mincount = (mincount+511)&(~511);
+ if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
++ newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
+ else
+- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
++ newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+
+ if (!newldt)
+ return -ENOMEM;
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/process.c linux-2.6.15-ve025stab014/arch/i386/kernel/process.c
+--- linux-2.6.15.orig/arch/i386/kernel/process.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/process.c 2006-01-27 14:48:07.000000000 +0300
+@@ -338,6 +338,13 @@ int kernel_thread(int (*fn)(void *), voi
+ {
+ struct pt_regs regs;
+
++ /* Don't allow kernel_thread() inside VE */
++ if (!ve_is_super(get_exec_env())) {
++ printk("kernel_thread call inside VE\n");
++ dump_stack();
++ return -EPERM;
++ }
++
+ memset(&regs, 0, sizeof(regs));
+
+ regs.ebx = (unsigned long) fn;
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/signal.c linux-2.6.15-ve025stab014/arch/i386/kernel/signal.c
+--- linux-2.6.15.orig/arch/i386/kernel/signal.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/signal.c 2006-01-27 14:48:05.000000000 +0300
+@@ -615,7 +615,7 @@ int fastcall do_signal(struct pt_regs *r
+ if (!user_mode(regs))
+ return 1;
+
+- if (try_to_freeze())
++ if (try_to_freeze() && !signal_pending(current))
+ goto no_signal;
+
+ if (!oldset)
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/smpboot.c linux-2.6.15-ve025stab014/arch/i386/kernel/smpboot.c
+--- linux-2.6.15.orig/arch/i386/kernel/smpboot.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/smpboot.c 2006-01-27 14:48:07.000000000 +0300
+@@ -321,6 +321,10 @@ static void __init synchronize_tsc_bp (v
+ }
+ if (!buggy)
+ printk("passed.\n");
++#ifdef CONFIG_VE
++ /* TSC reset. kill whatever might rely on old values */
++ VE_TASK_INFO(current)->wakeup_stamp = 0;
++#endif
+ }
+
+ static void __init synchronize_tsc_ap (void)
+@@ -346,6 +350,10 @@ static void __init synchronize_tsc_ap (v
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ }
++#ifdef CONFIG_VE
++ /* TSC reset. kill whatever might rely on old values */
++ VE_TASK_INFO(current)->wakeup_stamp = 0;
++#endif
+ }
+ #undef NR_LOOPS
+
+@@ -913,6 +921,13 @@ static int __devinit do_boot_cpu(int api
+ if (IS_ERR(idle))
+ panic("failed fork for CPU %d", cpu);
+ idle->thread.eip = (unsigned long) start_secondary;
++
++#ifdef CONFIG_VE
++ /* Cosmetic: sleep_time won't be changed afterwards for the idle
++ * thread; keep it 0 rather than -cycles. */
++ VE_TASK_INFO(idle)->sleep_time = 0;
++#endif
++
+ /* start_eip had better be page-aligned! */
+ start_eip = setup_trampoline();
+
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/sys_i386.c linux-2.6.15-ve025stab014/arch/i386/kernel/sys_i386.c
+--- linux-2.6.15.orig/arch/i386/kernel/sys_i386.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/sys_i386.c 2006-01-27 14:48:07.000000000 +0300
+@@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsn
+ if (!name)
+ return -EFAULT;
+ down_read(&uts_sem);
+- err=copy_to_user(name, &system_utsname, sizeof (*name));
++ err=copy_to_user(name, &ve_utsname, sizeof (*name));
+ up_read(&uts_sem);
+ return err?-EFAULT:0;
+ }
+@@ -233,15 +233,15 @@ asmlinkage int sys_olduname(struct oldol
+
+ down_read(&uts_sem);
+
+- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
++ error = __copy_to_user(name->sysname,ve_utsname.sysname,__OLD_UTS_LEN);
+ error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
+- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
++ error |= __copy_to_user(name->nodename,ve_utsname.nodename,__OLD_UTS_LEN);
+ error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
+- error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
++ error |= __copy_to_user(name->release,ve_utsname.release,__OLD_UTS_LEN);
+ error |= __put_user(0,name->release+__OLD_UTS_LEN);
+- error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
++ error |= __copy_to_user(name->version,ve_utsname.version,__OLD_UTS_LEN);
+ error |= __put_user(0,name->version+__OLD_UTS_LEN);
+- error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
++ error |= __copy_to_user(name->machine,ve_utsname.machine,__OLD_UTS_LEN);
+ error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+
+ up_read(&uts_sem);
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/syscall_table.S linux-2.6.15-ve025stab014/arch/i386/kernel/syscall_table.S
+--- linux-2.6.15.orig/arch/i386/kernel/syscall_table.S 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/syscall_table.S 2006-01-27 14:48:05.000000000 +0300
+@@ -294,3 +294,11 @@ ENTRY(sys_call_table)
+ .long sys_inotify_init
+ .long sys_inotify_add_watch
+ .long sys_inotify_rm_watch
++ .rept 510-(.-sys_call_table)/4
++ .long sys_ni_syscall
++ .endr
++
++ .long sys_getluid /* 510 */
++ .long sys_setluid
++ .long sys_setublimit
++ .long sys_ubstat
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/timers/timer_tsc.c linux-2.6.15-ve025stab014/arch/i386/kernel/timers/timer_tsc.c
+--- linux-2.6.15.orig/arch/i386/kernel/timers/timer_tsc.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/timers/timer_tsc.c 2006-01-27 14:48:07.000000000 +0300
+@@ -85,7 +85,7 @@ static int count2; /* counter for mark_o
+ * Equal to 2^32 * (1 / (clocks per usec) ).
+ * Initialized in time_init.
+ */
+-static unsigned long fast_gettimeoffset_quotient;
++unsigned long fast_gettimeoffset_quotient;
+
+ static unsigned long get_offset_tsc(void)
+ {
+diff -uprN linux-2.6.15.orig/arch/i386/kernel/traps.c linux-2.6.15-ve025stab014/arch/i386/kernel/traps.c
+--- linux-2.6.15.orig/arch/i386/kernel/traps.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/kernel/traps.c 2006-01-27 14:48:07.000000000 +0300
+@@ -227,8 +227,11 @@ void show_registers(struct pt_regs *regs
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk("ds: %04x es: %04x ss: %04x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff, ss);
+- printk("Process %s (pid: %d, threadinfo=%p task=%p)",
+- current->comm, current->pid, current_thread_info(), current);
++ printk("Process %s (pid: %d, veid=%d, threadinfo=%p task=%p)",
++ current->comm, current->pid,
++ VEID(VE_TASK_INFO(current)->owner_env),
++ current_thread_info(), current);
++
+ /*
+ * When in-kernel, we also print out the stack and code at the
+ * time of the fault..
+@@ -291,6 +294,13 @@ bug:
+ printk("Kernel BUG\n");
+ }
+
++static void inline check_kernel_csum_bug(void)
++{
++ if (kernel_text_csum_broken)
++ printk("Kernel code checksum mismatch detected %d times\n",
++ kernel_text_csum_broken);
++}
++
+ /* This is gone through when something in the kernel
+ * has done something bad and is about to be terminated.
+ */
+@@ -337,6 +347,7 @@ void die(const char * str, struct pt_reg
+ show_registers(regs);
+ } else
+ printk(KERN_ERR "Recursive die() failure, output suppressed\n");
++ check_kernel_csum_bug();
+
+ bust_spinlocks(0);
+ die.lock_owner = -1;
+diff -uprN linux-2.6.15.orig/arch/i386/mm/fault.c linux-2.6.15-ve025stab014/arch/i386/mm/fault.c
+--- linux-2.6.15.orig/arch/i386/mm/fault.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/mm/fault.c 2006-01-27 14:48:06.000000000 +0300
+@@ -347,7 +347,6 @@ good_area:
+ goto bad_area;
+ }
+
+- survive:
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+@@ -485,14 +484,14 @@ no_context:
+ */
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (tsk->pid == 1) {
+- yield();
+- down_read(&mm->mmap_sem);
+- goto survive;
+- }
+- printk("VM: killing process %s\n", tsk->comm);
+- if (error_code & 4)
+- do_exit(SIGKILL);
++ if (error_code & 4) {
++ /*
++ * 0-order allocation always success if something really
++ * fatal not happen: beancounter overdraft or OOM.
++ */
++ force_sig(SIGKILL, tsk);
++ return;
++ }
+ goto no_context;
+
+ do_sigbus:
+diff -uprN linux-2.6.15.orig/arch/i386/mm/init.c linux-2.6.15-ve025stab014/arch/i386/mm/init.c
+--- linux-2.6.15.orig/arch/i386/mm/init.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/mm/init.c 2006-01-27 14:48:05.000000000 +0300
+@@ -677,7 +677,7 @@ void __init pgtable_cache_init(void)
+ pmd_cache = kmem_cache_create("pmd",
+ PTRS_PER_PMD*sizeof(pmd_t),
+ PTRS_PER_PMD*sizeof(pmd_t),
+- 0,
++ SLAB_UBC,
+ pmd_ctor,
+ NULL);
+ if (!pmd_cache)
+@@ -686,7 +686,7 @@ void __init pgtable_cache_init(void)
+ pgd_cache = kmem_cache_create("pgd",
+ PTRS_PER_PGD*sizeof(pgd_t),
+ PTRS_PER_PGD*sizeof(pgd_t),
+- 0,
++ SLAB_UBC,
+ pgd_ctor,
+ PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+ if (!pgd_cache)
+diff -uprN linux-2.6.15.orig/arch/i386/mm/pgtable.c linux-2.6.15-ve025stab014/arch/i386/mm/pgtable.c
+--- linux-2.6.15.orig/arch/i386/mm/pgtable.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/i386/mm/pgtable.c 2006-01-27 14:48:07.000000000 +0300
+@@ -5,8 +5,10 @@
+ #include <linux/config.h>
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/errno.h>
+ #include <linux/mm.h>
++#include <linux/vmalloc.h>
+ #include <linux/swap.h>
+ #include <linux/smp.h>
+ #include <linux/highmem.h>
+@@ -64,7 +66,9 @@ void show_mem(void)
+ printk(KERN_INFO "%lu pages mapped\n", ps.nr_mapped);
+ printk(KERN_INFO "%lu pages slab\n", ps.nr_slab);
+ printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages);
++ vprintstat();
+ }
++EXPORT_SYMBOL(show_mem);
+
+ /*
+ * Associate a virtual page frame with a given physical page frame
+@@ -159,9 +163,11 @@ struct page *pte_alloc_one(struct mm_str
+ struct page *pte;
+
+ #ifdef CONFIG_HIGHPTE
+- pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
++ pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_HIGHMEM|
++ __GFP_REPEAT|__GFP_ZERO, 0);
+ #else
+- pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
++ pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|
++ __GFP_REPEAT|__GFP_ZERO, 0);
+ #endif
+ return pte;
+ }
+diff -uprN linux-2.6.15.orig/arch/ia64/Kconfig linux-2.6.15-ve025stab014/arch/ia64/Kconfig
+--- linux-2.6.15.orig/arch/ia64/Kconfig 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/Kconfig 2006-01-27 14:48:07.000000000 +0300
+@@ -461,6 +461,10 @@ endmenu
+
+ source "arch/ia64/Kconfig.debug"
+
++source "kernel/Kconfig.openvz"
++
+ source "security/Kconfig"
+
+ source "crypto/Kconfig"
++
++source "kernel/ub/Kconfig"
+diff -uprN linux-2.6.15.orig/arch/ia64/ia32/binfmt_elf32.c linux-2.6.15-ve025stab014/arch/ia64/ia32/binfmt_elf32.c
+--- linux-2.6.15.orig/arch/ia64/ia32/binfmt_elf32.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/ia32/binfmt_elf32.c 2006-01-27 14:48:05.000000000 +0300
+@@ -136,6 +136,12 @@ ia64_elf32_init (struct pt_regs *regs)
+ up_write(&current->mm->mmap_sem);
+ }
+
++ if (ub_memory_charge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
++ IA32_LDT_ENTRY_SIZE),
++ VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE,
++ NULL, UB_SOFT))
++ goto skip;
++
+ /*
+ * Install LDT as anonymous memory. This gives us all-zero segment descriptors
+ * until a task modifies them via modify_ldt().
+@@ -157,7 +163,12 @@ ia64_elf32_init (struct pt_regs *regs)
+ }
+ }
+ up_write(&current->mm->mmap_sem);
+- }
++ } else
++ ub_memory_uncharge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
++ IA32_LDT_ENTRY_SIZE),
++ VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE, NULL);
++
++skip:
+
+ ia64_psr(regs)->ac = 0; /* turn off alignment checking */
+ regs->loadrs = 0;
+@@ -212,9 +223,15 @@ ia32_setup_arg_pages (struct linux_binpr
+ bprm->loader += stack_base;
+ bprm->exec += stack_base;
+
++ ret = -ENOMEM;
++ if (ub_memory_charge(mm, IA32_STACK_TOP -
++ (PAGE_MASK & (unsigned long)bprm->p),
++ VM_STACK_FLAGS, NULL, UB_SOFT))
++ goto err_charge;
++
+ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!mpnt)
+- return -ENOMEM;
++ goto err_alloc;
+
+ memset(mpnt, 0, sizeof(*mpnt));
+
+@@ -231,11 +248,8 @@ ia32_setup_arg_pages (struct linux_binpr
+ mpnt->vm_flags = VM_STACK_FLAGS;
+ mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
+ PAGE_COPY_EXEC: PAGE_COPY;
+- if ((ret = insert_vm_struct(current->mm, mpnt))) {
+- up_write(&current->mm->mmap_sem);
+- kmem_cache_free(vm_area_cachep, mpnt);
+- return ret;
+- }
++ if ((ret = insert_vm_struct(current->mm, mpnt)))
++ goto err_insert;
+ current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
+ }
+
+@@ -254,6 +268,16 @@ ia32_setup_arg_pages (struct linux_binpr
+ current->thread.ppl = ia32_init_pp_list();
+
+ return 0;
++
++err_insert:
++ up_write(&current->mm->mmap_sem);
++ kmem_cache_free(vm_area_cachep, mpnt);
++err_alloc:
++ ub_memory_uncharge(mm, IA32_STACK_TOP -
++ (PAGE_MASK & (unsigned long)bprm->p),
++ VM_STACK_FLAGS, NULL);
++err_charge:
++ return ret;
+ }
+
+ static void
+diff -uprN linux-2.6.15.orig/arch/ia64/ia32/sys_ia32.c linux-2.6.15-ve025stab014/arch/ia64/ia32/sys_ia32.c
+--- linux-2.6.15.orig/arch/ia64/ia32/sys_ia32.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/ia32/sys_ia32.c 2006-01-27 14:48:07.000000000 +0300
+@@ -1767,7 +1767,7 @@ sys32_ptrace (int request, pid_t pid, un
+
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/asm-offsets.c linux-2.6.15-ve025stab014/arch/ia64/kernel/asm-offsets.c
+--- linux-2.6.15.orig/arch/ia64/kernel/asm-offsets.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/asm-offsets.c 2006-01-27 14:48:07.000000000 +0300
+@@ -44,11 +44,21 @@ void foo(void)
+ DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
+ DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+ DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
++#ifdef CONFIG_VE
++ DEFINE(IA64_TASK_PID_OFFSET, offsetof
++ (struct task_struct, pids[PIDTYPE_PID].vnr));
++#else
+ DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
++#endif
+ DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+ DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+ DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
++#ifdef CONFIG_VE
++ DEFINE(IA64_TASK_TGID_OFFSET, offsetof
++ (struct task_struct, pids[PIDTYPE_TGID].vnr));
++#else
+ DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
++#endif
+ DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
+ DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
+
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/entry.S linux-2.6.15-ve025stab014/arch/ia64/kernel/entry.S
+--- linux-2.6.15.orig/arch/ia64/kernel/entry.S 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/entry.S 2006-01-27 14:48:05.000000000 +0300
+@@ -1600,5 +1600,12 @@ sys_call_table:
+ data8 sys_inotify_init
+ data8 sys_inotify_add_watch
+ data8 sys_inotify_rm_watch
++.rept 1505-1280
++ data8 sys_ni_syscall // 1280 - 1499
++.endr
++ data8 sys_getluid // 1505
++ data8 sys_setluid
++ data8 sys_setublimit
++ data8 sys_ubstat
+
+ .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/fsys.S linux-2.6.15-ve025stab014/arch/ia64/kernel/fsys.S
+--- linux-2.6.15.orig/arch/ia64/kernel/fsys.S 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/fsys.S 2006-01-27 14:48:07.000000000 +0300
+@@ -72,6 +72,7 @@ ENTRY(fsys_getpid)
+ FSYS_RETURN
+ END(fsys_getpid)
+
++#ifndef CONFIG_VE
+ ENTRY(fsys_getppid)
+ .prologue
+ .altrp b6
+@@ -118,6 +119,7 @@ ENTRY(fsys_getppid)
+ #endif
+ FSYS_RETURN
+ END(fsys_getppid)
++#endif
+
+ ENTRY(fsys_set_tid_address)
+ .prologue
+@@ -665,7 +667,11 @@ fsyscall_table:
+ data8 0 // chown
+ data8 0 // lseek // 1040
+ data8 fsys_getpid // getpid
++#ifdef CONFIG_VE
++ data8 0
++#else
+ data8 fsys_getppid // getppid
++#endif
+ data8 0 // mount
+ data8 0 // umount
+ data8 0 // setuid // 1045
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/irq.c linux-2.6.15-ve025stab014/arch/ia64/kernel/irq.c
+--- linux-2.6.15.orig/arch/ia64/kernel/irq.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/irq.c 2006-01-27 14:48:07.000000000 +0300
+@@ -163,7 +163,9 @@ void fixup_irqs(void)
+ {
+ unsigned int irq;
+ extern void ia64_process_pending_intr(void);
++ struct ve_struct *ve;
+
++ ve = set_exec_env(get_ve0());
+ ia64_set_itv(1<<16);
+ /*
+ * Phase 1: Locate irq's bound to this cpu and
+@@ -197,5 +199,6 @@ void fixup_irqs(void)
+ */
+ max_xtp();
+ local_irq_disable();
++ (void)set_exec_env(ve);
+ }
+ #endif
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/irq_ia64.c linux-2.6.15-ve025stab014/arch/ia64/kernel/irq_ia64.c
+--- linux-2.6.15.orig/arch/ia64/kernel/irq_ia64.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/irq_ia64.c 2006-01-27 14:48:07.000000000 +0300
+@@ -103,6 +103,7 @@ void
+ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+ {
+ unsigned long saved_tpr;
++ struct ve_struct *ve;
+
+ #if IRQ_DEBUG
+ {
+@@ -139,6 +140,7 @@ ia64_handle_irq (ia64_vector vector, str
+ * 16 (without this, it would be ~240, which could easily lead
+ * to kernel stack overflows).
+ */
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ ia64_srlz_d();
+@@ -164,6 +166,7 @@ ia64_handle_irq (ia64_vector vector, str
+ * come through until ia64_eoi() has been done.
+ */
+ irq_exit();
++ (void)set_exec_env(get_ve0());
+ }
+
+ #ifdef CONFIG_HOTPLUG_CPU
+@@ -176,9 +179,11 @@ void ia64_process_pending_intr(void)
+ ia64_vector vector;
+ unsigned long saved_tpr;
+ extern unsigned int vectors_in_migration[NR_IRQS];
++ struct ve_struct *ve;
+
+ vector = ia64_get_ivr();
+
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ ia64_srlz_d();
+@@ -210,6 +215,7 @@ void ia64_process_pending_intr(void)
+ vector = ia64_get_ivr();
+ }
+ irq_exit();
++ (void)set_exec_env(ve);
+ }
+ #endif
+
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/mca.c linux-2.6.15-ve025stab014/arch/ia64/kernel/mca.c
+--- linux-2.6.15.orig/arch/ia64/kernel/mca.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/mca.c 2006-01-27 14:48:07.000000000 +0300
+@@ -1241,10 +1241,10 @@ default_monarch_init_process(struct noti
+ }
+ printk("\n\n");
+ if (read_trylock(&tasklist_lock)) {
+- do_each_thread (g, t) {
++ do_each_thread_all (g, t) {
+ printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
+ show_stack(t, NULL);
+- } while_each_thread (g, t);
++ } while_each_thread_all (g, t);
+ read_unlock(&tasklist_lock);
+ }
+ return NOTIFY_DONE;
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/perfmon.c linux-2.6.15-ve025stab014/arch/ia64/kernel/perfmon.c
+--- linux-2.6.15.orig/arch/ia64/kernel/perfmon.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/perfmon.c 2006-01-27 14:48:07.000000000 +0300
+@@ -2620,7 +2620,7 @@ pfm_get_task(pfm_context_t *ctx, pid_t p
+
+ read_lock(&tasklist_lock);
+
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+
+ /* make sure task cannot go away while we operate on it */
+ if (p) get_task_struct(p);
+@@ -4184,12 +4184,12 @@ pfm_check_task_exist(pfm_context_t *ctx)
+
+ read_lock(&tasklist_lock);
+
+- do_each_thread (g, t) {
++ do_each_thread_ve (g, t) {
+ if (t->thread.pfm_context == ctx) {
+ ret = 0;
+ break;
+ }
+- } while_each_thread (g, t);
++ } while_each_thread_ve (g, t);
+
+ read_unlock(&tasklist_lock);
+
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/process.c linux-2.6.15-ve025stab014/arch/ia64/kernel/process.c
+--- linux-2.6.15.orig/arch/ia64/kernel/process.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/process.c 2006-01-27 14:48:07.000000000 +0300
+@@ -681,6 +681,13 @@ kernel_thread (int (*fn)(void *), void *
+ struct pt_regs pt;
+ } regs;
+
++ /* Don't allow kernel_thread() inside VE */
++ if (!ve_is_super(get_exec_env())) {
++ printk("kernel_thread call inside VE\n");
++ dump_stack();
++ return -EPERM;
++ }
++
+ memset(&regs, 0, sizeof(regs));
+ regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */
+ regs.pt.r1 = helper_fptr[1]; /* set GP */
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/ptrace.c linux-2.6.15-ve025stab014/arch/ia64/kernel/ptrace.c
+--- linux-2.6.15.orig/arch/ia64/kernel/ptrace.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/ptrace.c 2006-01-27 14:48:07.000000000 +0300
+@@ -1440,7 +1440,7 @@ sys_ptrace (long request, pid_t pid, uns
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+ {
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child) {
+ if (peek_or_poke)
+ child = find_thread_for_addr(child, addr);
+diff -uprN linux-2.6.15.orig/arch/ia64/kernel/signal.c linux-2.6.15-ve025stab014/arch/ia64/kernel/signal.c
+--- linux-2.6.15.orig/arch/ia64/kernel/signal.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/kernel/signal.c 2006-01-27 14:48:07.000000000 +0300
+@@ -270,7 +270,7 @@ ia64_rt_sigreturn (struct sigscratch *sc
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+- si.si_pid = current->pid;
++ si.si_pid = virt_pid(current);
+ si.si_uid = current->uid;
+ si.si_addr = sc;
+ force_sig_info(SIGSEGV, &si, current);
+@@ -375,7 +375,7 @@ force_sigsegv_info (int sig, void __user
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+- si.si_pid = current->pid;
++ si.si_pid = virt_pid(current);
+ si.si_uid = current->uid;
+ si.si_addr = addr;
+ force_sig_info(SIGSEGV, &si, current);
+@@ -641,7 +641,7 @@ set_sigdelayed(pid_t pid, int signo, int
+ for (i = 1; i <= 3; ++i) {
+ switch (i) {
+ case 1:
+- t = find_task_by_pid(pid);
++ t = find_task_by_pid_ve(pid);
+ if (t)
+ start_time = start_time_ul(t);
+ break;
+@@ -682,7 +682,7 @@ do_sigdelayed(void)
+ siginfo.si_code = current_thread_info()->sigdelayed.code;
+ siginfo.si_addr = current_thread_info()->sigdelayed.addr;
+ pid = current_thread_info()->sigdelayed.pid;
+- t = find_task_by_pid(pid);
++ t = find_task_by_pid_ve(pid);
+ if (!t)
+ return;
+ if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
+diff -uprN linux-2.6.15.orig/arch/ia64/mm/contig.c linux-2.6.15-ve025stab014/arch/ia64/mm/contig.c
+--- linux-2.6.15.orig/arch/ia64/mm/contig.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/mm/contig.c 2006-01-27 14:48:07.000000000 +0300
+@@ -64,6 +64,7 @@ show_mem (void)
+ printk("%ld pages in page table cache\n",
+ pgtable_quicklist_total_size());
+ }
++EXPORT_SYMBOL(show_mem);
+
+ /* physical address where the bootmem map is located */
+ unsigned long bootmap_start;
+diff -uprN linux-2.6.15.orig/arch/ia64/mm/discontig.c linux-2.6.15-ve025stab014/arch/ia64/mm/discontig.c
+--- linux-2.6.15.orig/arch/ia64/mm/discontig.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/mm/discontig.c 2006-01-27 14:48:07.000000000 +0300
+@@ -594,6 +594,7 @@ void show_mem(void)
+ pgtable_quicklist_total_size());
+ printk("%d free buffer pages\n", nr_free_buffer_pages());
+ }
++EXPORT_SYMBOL(show_mem);
+
+ /**
+ * call_pernode_memory - use SRAT to call callback functions with node info
+diff -uprN linux-2.6.15.orig/arch/ia64/mm/fault.c linux-2.6.15-ve025stab014/arch/ia64/mm/fault.c
+--- linux-2.6.15.orig/arch/ia64/mm/fault.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/mm/fault.c 2006-01-27 14:48:05.000000000 +0300
+@@ -116,7 +116,6 @@ ia64_do_page_fault (unsigned long addres
+ if ((vma->vm_flags & mask) != mask)
+ goto bad_area;
+
+- survive:
+ /*
+ * If for any reason at all we couldn't handle the fault, make
+ * sure we exit gracefully rather than endlessly redo the
+@@ -241,13 +240,13 @@ ia64_do_page_fault (unsigned long addres
+
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (current->pid == 1) {
+- yield();
+- down_read(&mm->mmap_sem);
+- goto survive;
+- }
+- printk(KERN_CRIT "VM: killing process %s\n", current->comm);
+- if (user_mode(regs))
+- do_exit(SIGKILL);
++ if (user_mode(regs)) {
++ /*
++ * 0-order allocation always success if something really
++ * fatal not happen: beancounter overdraft or OOM.
++ */
++ force_sig(SIGKILL, current);
++ return;
++ }
+ goto no_context;
+ }
+diff -uprN linux-2.6.15.orig/arch/ia64/mm/init.c linux-2.6.15-ve025stab014/arch/ia64/mm/init.c
+--- linux-2.6.15.orig/arch/ia64/mm/init.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ia64/mm/init.c 2006-01-27 14:48:05.000000000 +0300
+@@ -37,6 +37,8 @@
+ #include <asm/unistd.h>
+ #include <asm/mca.h>
+
++#include <ub/ub_vmpages.h>
++
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+ DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
+@@ -96,7 +98,7 @@ check_pgt_cache(void)
+ preempt_disable();
+ while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
+ while (pages_to_free--) {
+- free_page((unsigned long)pgtable_quicklist_alloc());
++ free_page((unsigned long)pgtable_quicklist_alloc(0));
+ }
+ preempt_enable();
+ preempt_disable();
+@@ -146,6 +148,10 @@ ia64_init_addr_space (void)
+
+ ia64_set_rbs_bot();
+
++ if (ub_memory_charge(current->mm, PAGE_SIZE, VM_DATA_DEFAULT_FLAGS,
++ NULL, UB_SOFT))
++ goto skip;
++
+ /*
+ * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
+ * the problem. When the process attempts to write to the register backing store
+@@ -166,8 +172,11 @@ ia64_init_addr_space (void)
+ return;
+ }
+ up_write(&current->mm->mmap_sem);
+- }
++ } else
++ ub_memory_uncharge(current->mm, PAGE_SIZE,
++ VM_DATA_DEFAULT_FLAGS, NULL);
+
++skip:
+ /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
+ if (!(current->personality & MMAP_PAGE_ZERO)) {
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+diff -uprN linux-2.6.15.orig/arch/powerpc/Kconfig linux-2.6.15-ve025stab014/arch/powerpc/Kconfig
+--- linux-2.6.15.orig/arch/powerpc/Kconfig 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/Kconfig 2006-01-27 14:48:07.000000000 +0300
+@@ -912,6 +912,8 @@ source "arch/powerpc/platforms/iseries/K
+
+ source "lib/Kconfig"
+
++source "kernel/ub/Kconfig"
++
+ menu "Instrumentation Support"
+ depends on EXPERIMENTAL
+
+@@ -930,6 +932,8 @@ endmenu
+
+ source "arch/powerpc/Kconfig.debug"
+
++source "kernel/Kconfig.openvz"
++
+ source "security/Kconfig"
+
+ config KEYS_COMPAT
+diff -uprN linux-2.6.15.orig/arch/powerpc/kernel/irq.c linux-2.6.15-ve025stab014/arch/powerpc/kernel/irq.c
+--- linux-2.6.15.orig/arch/powerpc/kernel/irq.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/kernel/irq.c 2006-01-27 14:48:07.000000000 +0300
+@@ -57,6 +57,8 @@
+ #include <linux/kallsyms.h>
+ #endif
+
++#include <ub/beancounter.h>
++
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+ #include <asm/io.h>
+@@ -199,7 +201,11 @@ void fixup_irqs(cpumask_t map)
+ void do_IRQ(struct pt_regs *regs)
+ {
+ struct paca_struct *lpaca;
++ struct ve_struct *ve;
++ struct user_beancounter *ub;
+
++ ve = set_exec_env(get_ve0());
++ ub = set_exec_ub(get_ub0());
+ irq_enter();
+
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+@@ -228,6 +234,8 @@ void do_IRQ(struct pt_regs *regs)
+ process_hvlpevents(regs);
+
+ irq_exit();
++ (void)set_exec_ub(ub);
++ (void)set_exec_env(ve);
+
+ if (lpaca->lppaca.int_dword.fields.decr_int) {
+ lpaca->lppaca.int_dword.fields.decr_int = 0;
+@@ -244,7 +252,11 @@ void do_IRQ(struct pt_regs *regs)
+ #ifdef CONFIG_IRQSTACKS
+ struct thread_info *curtp, *irqtp;
+ #endif
++ struct ve_struct *ve;
++ struct user_beancounter *ub;
+
++ ve = set_exec_env(get_ve0());
++ ub = set_exec_ub(get_ub0());
+ irq_enter();
+
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+@@ -293,6 +305,8 @@ void do_IRQ(struct pt_regs *regs)
+ /* That's not SMP safe ... but who cares ? */
+ ppc_spurious_interrupts++;
+ irq_exit();
++ (void)set_exec_ub(ub);
++ (void)set_exec_env(ve);
+ }
+
+ #endif /* CONFIG_PPC_ISERIES */
+diff -uprN linux-2.6.15.orig/arch/powerpc/kernel/misc_32.S linux-2.6.15-ve025stab014/arch/powerpc/kernel/misc_32.S
+--- linux-2.6.15.orig/arch/powerpc/kernel/misc_32.S 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/kernel/misc_32.S 2006-01-27 14:48:07.000000000 +0300
+@@ -967,7 +967,7 @@ _GLOBAL(_get_SP)
+ * Create a kernel thread
+ * kernel_thread(fn, arg, flags)
+ */
+-_GLOBAL(kernel_thread)
++_GLOBAL(ppc_kernel_thread)
+ stwu r1,-16(r1)
+ stw r30,8(r1)
+ stw r31,12(r1)
+diff -uprN linux-2.6.15.orig/arch/powerpc/kernel/misc_64.S linux-2.6.15-ve025stab014/arch/powerpc/kernel/misc_64.S
+--- linux-2.6.15.orig/arch/powerpc/kernel/misc_64.S 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/kernel/misc_64.S 2006-01-27 14:48:07.000000000 +0300
+@@ -677,7 +677,7 @@ _GLOBAL(scom970_write)
+ * Create a kernel thread
+ * kernel_thread(fn, arg, flags)
+ */
+-_GLOBAL(kernel_thread)
++_GLOBAL(ppc_kernel_thread)
+ std r29,-24(r1)
+ std r30,-16(r1)
+ stdu r1,-STACK_FRAME_OVERHEAD(r1)
+diff -uprN linux-2.6.15.orig/arch/powerpc/kernel/process.c linux-2.6.15-ve025stab014/arch/powerpc/kernel/process.c
+--- linux-2.6.15.orig/arch/powerpc/kernel/process.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/kernel/process.c 2006-01-27 14:48:07.000000000 +0300
+@@ -888,3 +888,17 @@ void dump_stack(void)
+ show_stack(current, NULL);
+ }
+ EXPORT_SYMBOL(dump_stack);
++
++long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
++{
++ extern long ppc_kernel_thread(int (*fn)(void *), void *arg,
++ unsigned long flags);
++
++ if (!ve_is_super(get_exec_env())) {
++ printk("kernel_thread call inside VE\n");
++ dump_stack();
++ return -EPERM;
++ }
++
++ return ppc_kernel_thread(fn, arg, flags);
++}
+diff -uprN linux-2.6.15.orig/arch/powerpc/kernel/ptrace32.c linux-2.6.15-ve025stab014/arch/powerpc/kernel/ptrace32.c
+--- linux-2.6.15.orig/arch/powerpc/kernel/ptrace32.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/kernel/ptrace32.c 2006-01-27 14:48:07.000000000 +0300
+@@ -62,7 +62,7 @@ long compat_sys_ptrace(int request, int
+ }
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+diff -uprN linux-2.6.15.orig/arch/powerpc/kernel/syscalls.c linux-2.6.15-ve025stab014/arch/powerpc/kernel/syscalls.c
+--- linux-2.6.15.orig/arch/powerpc/kernel/syscalls.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/kernel/syscalls.c 2006-01-27 14:48:07.000000000 +0300
+@@ -262,7 +262,7 @@ long ppc_newuname(struct new_utsname __u
+ int err = 0;
+
+ down_read(&uts_sem);
+- if (copy_to_user(name, &system_utsname, sizeof(*name)))
++ if (copy_to_user(name, &ve_utsname, sizeof(*name)))
+ err = -EFAULT;
+ up_read(&uts_sem);
+ if (!err)
+@@ -275,7 +275,7 @@ int sys_uname(struct old_utsname __user
+ int err = 0;
+
+ down_read(&uts_sem);
+- if (copy_to_user(name, &system_utsname, sizeof(*name)))
++ if (copy_to_user(name, &ve_utsname, sizeof(*name)))
+ err = -EFAULT;
+ up_read(&uts_sem);
+ if (!err)
+@@ -291,19 +291,19 @@ int sys_olduname(struct oldold_utsname _
+ return -EFAULT;
+
+ down_read(&uts_sem);
+- error = __copy_to_user(&name->sysname, &system_utsname.sysname,
++ error = __copy_to_user(&name->sysname, &ve_utsname.sysname,
+ __OLD_UTS_LEN);
+ error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+- error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
++ error |= __copy_to_user(&name->nodename, &ve_utsname.nodename,
+ __OLD_UTS_LEN);
+ error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+- error |= __copy_to_user(&name->release, &system_utsname.release,
++ error |= __copy_to_user(&name->release, &ve_utsname.release,
+ __OLD_UTS_LEN);
+ error |= __put_user(0, name->release + __OLD_UTS_LEN);
+- error |= __copy_to_user(&name->version, &system_utsname.version,
++ error |= __copy_to_user(&name->version, &ve_utsname.version,
+ __OLD_UTS_LEN);
+ error |= __put_user(0, name->version + __OLD_UTS_LEN);
+- error |= __copy_to_user(&name->machine, &system_utsname.machine,
++ error |= __copy_to_user(&name->machine, &ve_utsname.machine,
+ __OLD_UTS_LEN);
+ error |= override_machine(name->machine);
+ up_read(&uts_sem);
+diff -uprN linux-2.6.15.orig/arch/powerpc/kernel/systbl.S linux-2.6.15-ve025stab014/arch/powerpc/kernel/systbl.S
+--- linux-2.6.15.orig/arch/powerpc/kernel/systbl.S 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/kernel/systbl.S 2006-01-27 14:48:05.000000000 +0300
+@@ -319,3 +319,12 @@ COMPAT_SYS(ioprio_get)
+ SYSCALL(inotify_init)
+ SYSCALL(inotify_add_watch)
+ SYSCALL(inotify_rm_watch)
++
++.rept 410 - (. - sys_call_table)/8
++SYSX(sys_ni_syscall, sys_ni_syscall, sys_ni_syscall)
++.endr
++
++SYSX(sys_getluid, sys_ni_syscall, sys_getluid)
++SYSX(sys_setluid, sys_ni_syscall, sys_setluid)
++SYSX(sys_setublimit, sys_ni_syscall, sys_setublimit)
++SYSX(sys_ubstat, sys_ni_syscall, sys_ubstat)
+diff -uprN linux-2.6.15.orig/arch/powerpc/kernel/time.c linux-2.6.15-ve025stab014/arch/powerpc/kernel/time.c
+--- linux-2.6.15.orig/arch/powerpc/kernel/time.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/kernel/time.c 2006-01-27 14:48:07.000000000 +0300
+@@ -420,12 +420,14 @@ void timer_interrupt(struct pt_regs * re
+ int next_dec;
+ int cpu = smp_processor_id();
+ unsigned long ticks;
++ struct ve_struct *ve;
+
+ #ifdef CONFIG_PPC32
+ if (atomic_read(&ppc_n_lost_interrupts) != 0)
+ do_IRQ(regs);
+ #endif
+
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+
+ profile_tick(CPU_PROFILING, regs);
+@@ -488,6 +490,7 @@ void timer_interrupt(struct pt_regs * re
+ #endif
+
+ irq_exit();
++ (void)set_exec_env(ve);
+ }
+
+ void wakeup_decrementer(void)
+diff -uprN linux-2.6.15.orig/arch/powerpc/mm/fault.c linux-2.6.15-ve025stab014/arch/powerpc/mm/fault.c
+--- linux-2.6.15.orig/arch/powerpc/mm/fault.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/mm/fault.c 2006-01-27 14:48:05.000000000 +0300
+@@ -306,7 +306,6 @@ good_area:
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+- survive:
+ switch (handle_mm_fault(mm, vma, address, is_write)) {
+
+ case VM_FAULT_MINOR:
+@@ -350,14 +349,12 @@ bad_area_nosemaphore:
+ */
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (current->pid == 1) {
+- yield();
+- down_read(&mm->mmap_sem);
+- goto survive;
+- }
+- printk("VM: killing process %s\n", current->comm);
+ if (user_mode(regs))
+- do_exit(SIGKILL);
++ /*
++ * 0-order allocation always success if something really
++ * fatal not happen: beancounter overdraft or OOM. Den
++ */
++ force_sig(SIGKILL, current);
+ return SIGKILL;
+
+ do_sigbus:
+diff -uprN linux-2.6.15.orig/arch/powerpc/mm/init_64.c linux-2.6.15-ve025stab014/arch/powerpc/mm/init_64.c
+--- linux-2.6.15.orig/arch/powerpc/mm/init_64.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/mm/init_64.c 2006-01-27 14:48:05.000000000 +0300
+@@ -225,7 +225,8 @@ void pgtable_cache_init(void)
+ pgtable_cache[i] = kmem_cache_create(name,
+ size, size,
+ SLAB_HWCACHE_ALIGN |
+- SLAB_MUST_HWCACHE_ALIGN,
++ SLAB_MUST_HWCACHE_ALIGN |
++ SLAB_UBC | SLAB_NO_CHARGE,
+ zero_ctor,
+ NULL);
+ if (! pgtable_cache[i])
+diff -uprN linux-2.6.15.orig/arch/powerpc/mm/mem.c linux-2.6.15-ve025stab014/arch/powerpc/mm/mem.c
+--- linux-2.6.15.orig/arch/powerpc/mm/mem.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/mm/mem.c 2006-01-27 14:48:07.000000000 +0300
+@@ -223,6 +223,7 @@ void show_mem(void)
+ printk("%ld pages shared\n", shared);
+ printk("%ld pages swap cached\n", cached);
+ }
++EXPORT_SYMBOL(show_mem);
+
+ /*
+ * Initialize the bootmem system and give it all the memory we
+diff -uprN linux-2.6.15.orig/arch/powerpc/mm/pgtable_32.c linux-2.6.15-ve025stab014/arch/powerpc/mm/pgtable_32.c
+--- linux-2.6.15.orig/arch/powerpc/mm/pgtable_32.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/powerpc/mm/pgtable_32.c 2006-01-27 14:48:05.000000000 +0300
+@@ -84,7 +84,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ pgd_t *ret;
+
+- ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
++ ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
++ __GFP_ZERO, PGDIR_ORDER);
+ return ret;
+ }
+
+@@ -118,6 +119,7 @@ struct page *pte_alloc_one(struct mm_str
+ #else
+ gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
+ #endif
++ flags |= (__GFP_UBC | __GFP_SOFT_UBC);
+
+ ptepage = alloc_pages(flags, 0);
+ if (ptepage)
+diff -uprN linux-2.6.15.orig/arch/ppc/Kconfig linux-2.6.15-ve025stab014/arch/ppc/Kconfig
+--- linux-2.6.15.orig/arch/ppc/Kconfig 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ppc/Kconfig 2006-01-27 14:48:07.000000000 +0300
+@@ -1422,6 +1422,10 @@ source "arch/powerpc/oprofile/Kconfig"
+
+ source "arch/ppc/Kconfig.debug"
+
++source "kernel/Kconfig.openvz"
++
+ source "security/Kconfig"
+
++source "kernel/ub/Kconfig"
++
+ source "crypto/Kconfig"
+diff -uprN linux-2.6.15.orig/arch/ppc/kernel/misc.S linux-2.6.15-ve025stab014/arch/ppc/kernel/misc.S
+--- linux-2.6.15.orig/arch/ppc/kernel/misc.S 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ppc/kernel/misc.S 2006-01-27 14:48:07.000000000 +0300
+@@ -1076,7 +1076,7 @@ _GLOBAL(_get_SP)
+ * Create a kernel thread
+ * kernel_thread(fn, arg, flags)
+ */
+-_GLOBAL(kernel_thread)
++_GLOBAL(ppc_kernel_thread)
+ stwu r1,-16(r1)
+ stw r30,8(r1)
+ stw r31,12(r1)
+@@ -1403,3 +1403,12 @@ _GLOBAL(sys_call_table)
+ .long sys_inotify_init /* 275 */
+ .long sys_inotify_add_watch
+ .long sys_inotify_rm_watch
++
++ .rept 410-(.-sys_call_table)/4
++ .long sys_ni_syscall
++ .endr
++
++ .long sys_getluid /* 410 */
++ .long sys_setluid
++ .long sys_setublimit
++ .long sys_ubstat
+diff -uprN linux-2.6.15.orig/arch/ppc/kernel/process.c linux-2.6.15-ve025stab014/arch/ppc/kernel/process.c
+--- linux-2.6.15.orig/arch/ppc/kernel/process.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ppc/kernel/process.c 2006-01-27 14:48:07.000000000 +0300
+@@ -849,3 +849,17 @@ unsigned long get_wchan(struct task_stru
+ } while (count++ < 16);
+ return 0;
+ }
++
++long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
++{
++ extern long ppc_kernel_thread(int (*fn)(void *), void *arg,
++ unsigned long flags);
++
++ if (!ve_is_super(get_exec_env())) {
++ printk("kernel_thread call inside VE\n");
++ dump_stack();
++ return -EPERM;
++ }
++
++ return ppc_kernel_thread(fn, arg, flags);
++}
+diff -uprN linux-2.6.15.orig/arch/ppc/kernel/time.c linux-2.6.15-ve025stab014/arch/ppc/kernel/time.c
+--- linux-2.6.15.orig/arch/ppc/kernel/time.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ppc/kernel/time.c 2006-01-27 14:48:07.000000000 +0300
+@@ -58,6 +58,8 @@
+ #include <linux/init.h>
+ #include <linux/profile.h>
+
++#include <ub/beancounter.h>
++
+ #include <asm/io.h>
+ #include <asm/nvram.h>
+ #include <asm/cache.h>
+@@ -136,10 +138,14 @@ void timer_interrupt(struct pt_regs * re
+ unsigned long cpu = smp_processor_id();
+ unsigned jiffy_stamp = last_jiffy_stamp(cpu);
+ extern void do_IRQ(struct pt_regs *);
++ struct ve_struct *ve;
++ struct user_beancounter *ub;
+
+ if (atomic_read(&ppc_n_lost_interrupts) != 0)
+ do_IRQ(regs);
+
++ ve = set_exec_env(get_ve0());
++ ub = set_exec_ub(get_ub0());
+ irq_enter();
+
+ while ((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) <= 0) {
+@@ -192,6 +198,8 @@ void timer_interrupt(struct pt_regs * re
+ ppc_md.heartbeat();
+
+ irq_exit();
++ (void)set_exec_ub(ub);
++ (void)set_exec_env(ve);
+ }
+
+ /*
+diff -uprN linux-2.6.15.orig/arch/ppc/mm/fault.c linux-2.6.15-ve025stab014/arch/ppc/mm/fault.c
+--- linux-2.6.15.orig/arch/ppc/mm/fault.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ppc/mm/fault.c 2006-01-27 14:48:05.000000000 +0300
+@@ -247,7 +247,6 @@ good_area:
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+- survive:
+ switch (handle_mm_fault(mm, vma, address, is_write)) {
+ case VM_FAULT_MINOR:
+ current->min_flt++;
+@@ -290,14 +289,12 @@ bad_area:
+ */
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (current->pid == 1) {
+- yield();
+- down_read(&mm->mmap_sem);
+- goto survive;
+- }
+- printk("VM: killing process %s\n", current->comm);
+ if (user_mode(regs))
+- do_exit(SIGKILL);
++ /*
++ * 0-order allocation always success if something really
++ * fatal not happen: beancounter overdraft or OOM. Den
++ */
++ force_sig(SIGKILL, current);
+ return SIGKILL;
+
+ do_sigbus:
+diff -uprN linux-2.6.15.orig/arch/ppc/mm/init.c linux-2.6.15-ve025stab014/arch/ppc/mm/init.c
+--- linux-2.6.15.orig/arch/ppc/mm/init.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ppc/mm/init.c 2006-01-27 14:48:07.000000000 +0300
+@@ -136,6 +136,7 @@ void show_mem(void)
+ printk("%d pages shared\n",shared);
+ printk("%d pages swap cached\n",cached);
+ }
++EXPORT_SYMBOL(show_mem);
+
+ /* Free up now-unused memory */
+ static void free_sec(unsigned long start, unsigned long end, const char *name)
+diff -uprN linux-2.6.15.orig/arch/ppc/mm/pgtable.c linux-2.6.15-ve025stab014/arch/ppc/mm/pgtable.c
+--- linux-2.6.15.orig/arch/ppc/mm/pgtable.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/ppc/mm/pgtable.c 2006-01-27 14:48:05.000000000 +0300
+@@ -84,7 +84,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ pgd_t *ret;
+
+- ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
++ ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
++ __GFP_ZERO, PGDIR_ORDER);
+ return ret;
+ }
+
+@@ -118,6 +119,7 @@ struct page *pte_alloc_one(struct mm_str
+ #else
+ gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
+ #endif
++ flags |= (__GFP_UBC | __GFP_SOFT_UBC);
+
+ ptepage = alloc_pages(flags, 0);
+ if (ptepage)
+diff -uprN linux-2.6.15.orig/arch/s390/Kconfig linux-2.6.15-ve025stab014/arch/s390/Kconfig
+--- linux-2.6.15.orig/arch/s390/Kconfig 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/s390/Kconfig 2006-01-27 14:48:07.000000000 +0300
+@@ -485,8 +485,14 @@ source "arch/s390/oprofile/Kconfig"
+
+ source "arch/s390/Kconfig.debug"
+
++menu "OpenVZ"
++source "kernel/Kconfig.openvz"
++endmenu
++
+ source "security/Kconfig"
+
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "kernel/ub/Kconfig"
+diff -uprN linux-2.6.15.orig/arch/s390/kernel/process.c linux-2.6.15-ve025stab014/arch/s390/kernel/process.c
+--- linux-2.6.15.orig/arch/s390/kernel/process.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/s390/kernel/process.c 2006-01-27 14:48:07.000000000 +0300
+@@ -154,9 +154,10 @@ void show_regs(struct pt_regs *regs)
+ struct task_struct *tsk = current;
+
+ printk("CPU: %d %s\n", tsk->thread_info->cpu, print_tainted());
+- printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
+- current->comm, current->pid, (void *) tsk,
+- (void *) tsk->thread.ksp);
++ printk("Process %s (pid: %d, veid: %d, task: %p, ksp: %p)\n",
++ current->comm, current->pid,
++ VEID(VE_TASK_INFO(current)->owner_env),
++ (void *) tsk, (void *) tsk->thread.ksp);
+
+ show_registers(regs);
+ /* Show stack backtrace if pt_regs is from kernel mode */
+@@ -177,6 +178,13 @@ int kernel_thread(int (*fn)(void *), voi
+ {
+ struct pt_regs regs;
+
++ if (!ve_is_super(get_exec_env())) {
++ /* Don't allow kernel_thread() inside VE */
++ printk("kernel_thread call inside VE\n");
++ dump_stack();
++ return -EPERM;
++ }
++
+ memset(&regs, 0, sizeof(regs));
+ regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
+ regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
+diff -uprN linux-2.6.15.orig/arch/s390/kernel/ptrace.c linux-2.6.15-ve025stab014/arch/s390/kernel/ptrace.c
+--- linux-2.6.15.orig/arch/s390/kernel/ptrace.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/s390/kernel/ptrace.c 2006-01-27 14:48:07.000000000 +0300
+@@ -732,7 +732,7 @@ sys_ptrace(long request, long pid, long
+
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+diff -uprN linux-2.6.15.orig/arch/s390/kernel/s390_ext.c linux-2.6.15-ve025stab014/arch/s390/kernel/s390_ext.c
+--- linux-2.6.15.orig/arch/s390/kernel/s390_ext.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/s390/kernel/s390_ext.c 2006-01-27 14:48:07.000000000 +0300
+@@ -114,7 +114,9 @@ void do_extint(struct pt_regs *regs, uns
+ {
+ ext_int_info_t *p;
+ int index;
++ struct ve_struct *envid;
+
++ envid = set_exec_env(get_ve0());
+ irq_enter();
+ asm volatile ("mc 0,0");
+ if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
+@@ -132,6 +134,7 @@ void do_extint(struct pt_regs *regs, uns
+ }
+ }
+ irq_exit();
++ (void)set_exec_env(envid);
+ }
+
+ EXPORT_SYMBOL(register_external_interrupt);
+diff -uprN linux-2.6.15.orig/arch/s390/kernel/smp.c linux-2.6.15-ve025stab014/arch/s390/kernel/smp.c
+--- linux-2.6.15.orig/arch/s390/kernel/smp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/s390/kernel/smp.c 2006-01-27 14:48:07.000000000 +0300
+@@ -533,6 +533,17 @@ int __devinit start_secondary(void *cpuv
+ {
+ /* Setup the cpu */
+ cpu_init();
++
++#ifdef CONFIG_VE
++ /* TSC reset. kill whatever might rely on old values */
++ VE_TASK_INFO(current)->wakeup_stamp = 0;
++ /*
++ * Cosmetic: sleep_time won't be changed afterwards for the idle
++ * thread; keep it 0 rather than -cycles.
++ */
++ VE_TASK_INFO(idle)->sleep_time = 0;
++#endif
++
+ preempt_disable();
+ /* init per CPU timer */
+ init_cpu_timer();
+@@ -802,6 +813,11 @@ void __init smp_prepare_cpus(unsigned in
+ for_each_cpu(cpu)
+ if (cpu != smp_processor_id())
+ smp_create_idle(cpu);
++
++#ifdef CONFIG_VE
++ /* TSC reset. kill whatever might rely on old values */
++ VE_TASK_INFO(current)->wakeup_stamp = 0;
++#endif
+ }
+
+ void __devinit smp_prepare_boot_cpu(void)
+diff -uprN linux-2.6.15.orig/arch/s390/kernel/syscalls.S linux-2.6.15-ve025stab014/arch/s390/kernel/syscalls.S
+--- linux-2.6.15.orig/arch/s390/kernel/syscalls.S 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/s390/kernel/syscalls.S 2006-01-27 14:48:05.000000000 +0300
+@@ -295,3 +295,12 @@ SYSCALL(sys_ioprio_get,sys_ioprio_get,sy
+ SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init)
+ SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper)
+ SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper)
++
++.rept 410-(.-sys_call_table)/4
++ NI_SYSCALL
++.endr
++
++SYSCALL(sys_getluid, sys_getluid, sys_ni_syscall) /* 410 */
++SYSCALL(sys_setluid, sys_setluid, sys_ni_syscall)
++SYSCALL(sys_setublimit, sys_setublimit, sys_ni_syscall)
++SYSCALL(sys_ubstat, sys_ubstat, sys_ni_syscall)
+diff -uprN linux-2.6.15.orig/arch/s390/mm/init.c linux-2.6.15-ve025stab014/arch/s390/mm/init.c
+--- linux-2.6.15.orig/arch/s390/mm/init.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/s390/mm/init.c 2006-01-27 14:48:07.000000000 +0300
+@@ -89,6 +89,7 @@ void show_mem(void)
+ printk("%d pages shared\n",shared);
+ printk("%d pages swap cached\n",cached);
+ }
++EXPORT_SYMBOL(show_mem);
+
+ /* References to section boundaries */
+
+diff -uprN linux-2.6.15.orig/arch/um/drivers/mconsole_kern.c linux-2.6.15-ve025stab014/arch/um/drivers/mconsole_kern.c
+--- linux-2.6.15.orig/arch/um/drivers/mconsole_kern.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/um/drivers/mconsole_kern.c 2006-01-27 14:48:07.000000000 +0300
+@@ -509,7 +509,7 @@ void do_stack(struct mc_request *req)
+ }
+
+ from = current;
+- to = find_task_by_pid(pid_requested);
++ to = find_task_by_pid_all(pid_requested);
+
+ if((to == NULL) || (pid_requested == 0)) {
+ mconsole_reply(req, "Couldn't find that pid", 1, 0);
+diff -uprN linux-2.6.15.orig/arch/um/kernel/skas/process_kern.c linux-2.6.15-ve025stab014/arch/um/kernel/skas/process_kern.c
+--- linux-2.6.15.orig/arch/um/kernel/skas/process_kern.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/um/kernel/skas/process_kern.c 2006-01-27 14:48:07.000000000 +0300
+@@ -213,7 +213,7 @@ void kill_off_processes_skas(void)
+ int pid, me;
+
+ me = os_getpid();
+- for_each_process(p){
++ for_each_process_all(p){
+ if(p->mm == NULL)
+ continue;
+
+diff -uprN linux-2.6.15.orig/arch/um/kernel/tt/process_kern.c linux-2.6.15-ve025stab014/arch/um/kernel/tt/process_kern.c
+--- linux-2.6.15.orig/arch/um/kernel/tt/process_kern.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/um/kernel/tt/process_kern.c 2006-01-27 14:48:07.000000000 +0300
+@@ -303,7 +303,7 @@ void kill_off_processes_tt(void)
+ int me;
+
+ me = os_getpid();
+- for_each_process(p){
++ for_each_process_all(p){
+ if(p->thread.mode.tt.extern_pid != me)
+ os_kill_process(p->thread.mode.tt.extern_pid, 0);
+ }
+@@ -446,7 +446,7 @@ int is_valid_pid(int pid)
+ struct task_struct *task;
+
+ read_lock(&tasklist_lock);
+- for_each_process(task){
++ for_each_process_all(task){
+ if(task->thread.mode.tt.extern_pid == pid){
+ read_unlock(&tasklist_lock);
+ return(1);
+diff -uprN linux-2.6.15.orig/arch/x86_64/Kconfig linux-2.6.15-ve025stab014/arch/x86_64/Kconfig
+--- linux-2.6.15.orig/arch/x86_64/Kconfig 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/Kconfig 2006-01-27 14:48:07.000000000 +0300
+@@ -574,8 +574,14 @@ endmenu
+
+ source "arch/x86_64/Kconfig.debug"
+
++menu "OpenVZ"
++source "kernel/Kconfig.openvz"
++endmenu
++
+ source "security/Kconfig"
+
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "kernel/ub/Kconfig"
+diff -uprN linux-2.6.15.orig/arch/x86_64/ia32/ia32_aout.c linux-2.6.15-ve025stab014/arch/x86_64/ia32/ia32_aout.c
+--- linux-2.6.15.orig/arch/x86_64/ia32/ia32_aout.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/ia32/ia32_aout.c 2006-01-27 14:48:07.000000000 +0300
+@@ -347,14 +347,14 @@ static int load_aout_binary(struct linux
+ if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
+ (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
+ {
+- printk(KERN_NOTICE "executable not page aligned\n");
++ ve_printk(VE_LOG, KERN_NOTICE "executable not page aligned\n");
+ error_time2 = jiffies;
+ }
+
+ if ((fd_offset & ~PAGE_MASK) != 0 &&
+ (jiffies-error_time) > 5*HZ)
+ {
+- printk(KERN_WARNING
++ ve_printk(VE_LOG, KERN_WARNING
+ "fd_offset is not page aligned. Please convert program: %s\n",
+ bprm->file->f_dentry->d_name.name);
+ error_time = jiffies;
+@@ -467,7 +467,7 @@ static int load_aout_library(struct file
+ static unsigned long error_time;
+ if ((jiffies-error_time) > 5*HZ)
+ {
+- printk(KERN_WARNING
++ ve_printk(VE_LOG, KERN_WARNING
+ "N_TXTOFF is not page aligned. Please convert library: %s\n",
+ file->f_dentry->d_name.name);
+ error_time = jiffies;
+diff -uprN linux-2.6.15.orig/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.15-ve025stab014/arch/x86_64/ia32/ia32_binfmt.c
+--- linux-2.6.15.orig/arch/x86_64/ia32/ia32_binfmt.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/ia32/ia32_binfmt.c 2006-01-27 14:48:05.000000000 +0300
+@@ -27,6 +27,8 @@
+ #include <asm/ia32.h>
+ #include <asm/vsyscall32.h>
+
++#include <ub/ub_vmpages.h>
++
+ #define ELF_NAME "elf/i386"
+
+ #define AT_SYSINFO 32
+@@ -350,9 +352,15 @@ int ia32_setup_arg_pages(struct linux_bi
+ bprm->loader += stack_base;
+ bprm->exec += stack_base;
+
++ ret = -ENOMEM;
++ if (ub_memory_charge(mm, IA32_STACK_TOP -
++ (PAGE_MASK & (unsigned long)bprm->p),
++ VM_STACK_FLAGS, NULL, UB_SOFT))
++ goto err_charge;
++
+ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!mpnt)
+- return -ENOMEM;
++ goto err_alloc;
+
+ memset(mpnt, 0, sizeof(*mpnt));
+
+@@ -369,11 +377,8 @@ int ia32_setup_arg_pages(struct linux_bi
+ mpnt->vm_flags = VM_STACK_FLAGS;
+ mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ?
+ PAGE_COPY_EXEC : PAGE_COPY;
+- if ((ret = insert_vm_struct(mm, mpnt))) {
+- up_write(&mm->mmap_sem);
+- kmem_cache_free(vm_area_cachep, mpnt);
+- return ret;
+- }
++ if ((ret = insert_vm_struct(mm, mpnt)))
++ goto err_insert;
+ mm->stack_vm = mm->total_vm = vma_pages(mpnt);
+ }
+
+@@ -388,6 +393,16 @@ int ia32_setup_arg_pages(struct linux_bi
+ up_write(&mm->mmap_sem);
+
+ return 0;
++
++err_insert:
++ up_write(&mm->mmap_sem);
++ kmem_cache_free(vm_area_cachep, mpnt);
++err_alloc:
++ ub_memory_uncharge(mm, IA32_STACK_TOP -
++ (PAGE_MASK & (unsigned long)bprm->p),
++ VM_STACK_FLAGS, NULL);
++err_charge:
++ return ret;
+ }
+ EXPORT_SYMBOL(ia32_setup_arg_pages);
+
+diff -uprN linux-2.6.15.orig/arch/x86_64/ia32/ptrace32.c linux-2.6.15-ve025stab014/arch/x86_64/ia32/ptrace32.c
+--- linux-2.6.15.orig/arch/x86_64/ia32/ptrace32.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/ia32/ptrace32.c 2006-01-27 14:48:07.000000000 +0300
+@@ -206,7 +206,7 @@ static struct task_struct *find_target(i
+
+ *err = -ESRCH;
+ read_lock(&tasklist_lock);
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+diff -uprN linux-2.6.15.orig/arch/x86_64/ia32/sys_ia32.c linux-2.6.15-ve025stab014/arch/x86_64/ia32/sys_ia32.c
+--- linux-2.6.15.orig/arch/x86_64/ia32/sys_ia32.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/ia32/sys_ia32.c 2006-01-27 14:48:07.000000000 +0300
+@@ -505,7 +505,7 @@ int sys32_ni_syscall(int call)
+ static char lastcomm[sizeof(me->comm)];
+
+ if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
+- printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
++ ve_printk(VE_LOG, KERN_INFO "IA32 syscall %d from %s not implemented\n",
+ call, me->comm);
+ strncpy(lastcomm, me->comm, sizeof(lastcomm));
+ }
+@@ -868,13 +868,13 @@ asmlinkage long sys32_olduname(struct ol
+
+ down_read(&uts_sem);
+
+- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
++ error = __copy_to_user(&name->sysname,&ve_utsname.sysname,__OLD_UTS_LEN);
+ __put_user(0,name->sysname+__OLD_UTS_LEN);
+- __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
++ __copy_to_user(&name->nodename,&ve_utsname.nodename,__OLD_UTS_LEN);
+ __put_user(0,name->nodename+__OLD_UTS_LEN);
+- __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
++ __copy_to_user(&name->release,&ve_utsname.release,__OLD_UTS_LEN);
+ __put_user(0,name->release+__OLD_UTS_LEN);
+- __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
++ __copy_to_user(&name->version,&ve_utsname.version,__OLD_UTS_LEN);
+ __put_user(0,name->version+__OLD_UTS_LEN);
+ {
+ char *arch = "x86_64";
+@@ -897,7 +897,7 @@ long sys32_uname(struct old_utsname __us
+ if (!name)
+ return -EFAULT;
+ down_read(&uts_sem);
+- err=copy_to_user(name, &system_utsname, sizeof (*name));
++ err=copy_to_user(name, &ve_utsname, sizeof (*name));
+ up_read(&uts_sem);
+ if (personality(current->personality) == PER_LINUX32)
+ err |= copy_to_user(&name->machine, "i686", 5);
+@@ -1002,7 +1002,7 @@ long sys32_vm86_warning(void)
+ struct task_struct *me = current;
+ static char lastcomm[sizeof(me->comm)];
+ if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
+- printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
++ ve_printk(VE_LOG, KERN_INFO "%s: vm87 mode not supported on 64 bit kernel\n",
+ me->comm);
+ strncpy(lastcomm, me->comm, sizeof(lastcomm));
+ }
+diff -uprN linux-2.6.15.orig/arch/x86_64/ia32/syscall32.c linux-2.6.15-ve025stab014/arch/x86_64/ia32/syscall32.c
+--- linux-2.6.15.orig/arch/x86_64/ia32/syscall32.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/ia32/syscall32.c 2006-01-27 14:48:05.000000000 +0300
+@@ -14,6 +14,8 @@
+ #include <asm/tlbflush.h>
+ #include <asm/ia32_unistd.h>
+
++#include <ub/ub_vmpages.h>
++
+ extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
+ extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
+ extern int sysctl_vsyscall32;
+@@ -47,32 +49,45 @@ int syscall32_setup_pages(struct linux_b
+ int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
++ unsigned long flags;
+ int ret;
+
++ flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC | VM_MAYWRITE |
++ mm->def_flags;
++
++ ret = -ENOMEM;
++ if (ub_memory_charge(mm, VSYSCALL32_END - VSYSCALL32_BASE,
++ flags, NULL, UB_SOFT))
++ goto err_charge;
++
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma)
+- return -ENOMEM;
++ goto err_alloc;
+
+ memset(vma, 0, sizeof(struct vm_area_struct));
+ /* Could randomize here */
+ vma->vm_start = VSYSCALL32_BASE;
+ vma->vm_end = VSYSCALL32_END;
+ /* MAYWRITE to allow gdb to COW and set breakpoints */
+- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+- vma->vm_flags |= mm->def_flags;
++ vma->vm_flags = flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+ vma->vm_ops = &syscall32_vm_ops;
+ vma->vm_mm = mm;
+
+ down_write(&mm->mmap_sem);
+- if ((ret = insert_vm_struct(mm, vma))) {
+- up_write(&mm->mmap_sem);
+- kmem_cache_free(vm_area_cachep, vma);
+- return ret;
+- }
++ if ((ret = insert_vm_struct(mm, vma)))
++ goto err_ins;
+ mm->total_vm += npages;
+ up_write(&mm->mmap_sem);
+ return 0;
++
++err_ins:
++ up_write(&mm->mmap_sem);
++ kmem_cache_free(vm_area_cachep, vma);
++err_alloc:
++ ub_memory_uncharge(mm, VSYSCALL32_END - VSYSCALL32_BASE, flags, NULL);
++err_charge:
++ return ret;
+ }
+
+ static int __init init_syscall32(void)
+diff -uprN linux-2.6.15.orig/arch/x86_64/kernel/apic.c linux-2.6.15-ve025stab014/arch/x86_64/kernel/apic.c
+--- linux-2.6.15.orig/arch/x86_64/kernel/apic.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/kernel/apic.c 2006-01-27 14:48:07.000000000 +0300
+@@ -905,6 +905,7 @@ void smp_local_timer_interrupt(struct pt
+ */
+ void smp_apic_timer_interrupt(struct pt_regs *regs)
+ {
++ struct ve_struct *ve;
+ /*
+ * the NMI deadlock-detector uses this.
+ */
+@@ -920,9 +921,11 @@ void smp_apic_timer_interrupt(struct pt_
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
+ */
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+ smp_local_timer_interrupt(regs);
+ irq_exit();
++ (void)set_exec_env(ve);
+ }
+
+ /*
+diff -uprN linux-2.6.15.orig/arch/x86_64/kernel/entry.S linux-2.6.15-ve025stab014/arch/x86_64/kernel/entry.S
+--- linux-2.6.15.orig/arch/x86_64/kernel/entry.S 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/kernel/entry.S 2006-01-27 14:48:07.000000000 +0300
+@@ -828,7 +828,7 @@ ENTRY(kernel_thread)
+ xorl %r9d,%r9d
+
+ # clone now
+- call do_fork
++ call do_fork_kthread
+ movq %rax,RAX(%rsp)
+ xorl %edi,%edi
+
+diff -uprN linux-2.6.15.orig/arch/x86_64/kernel/irq.c linux-2.6.15-ve025stab014/arch/x86_64/kernel/irq.c
+--- linux-2.6.15.orig/arch/x86_64/kernel/irq.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/kernel/irq.c 2006-01-27 14:48:07.000000000 +0300
+@@ -97,12 +97,14 @@ asmlinkage unsigned int do_IRQ(struct pt
+ {
+ /* high bits used in ret_from_ code */
+ unsigned irq = regs->orig_rax & 0xff;
+-
++ struct ve_struct *ve;
++
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+
+ __do_IRQ(irq, regs);
+ irq_exit();
+-
++ (void)set_exec_env(ve);
+ return 1;
+ }
+
+diff -uprN linux-2.6.15.orig/arch/x86_64/kernel/ldt.c linux-2.6.15-ve025stab014/arch/x86_64/kernel/ldt.c
+--- linux-2.6.15.orig/arch/x86_64/kernel/ldt.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/kernel/ldt.c 2006-01-27 14:48:05.000000000 +0300
+@@ -23,6 +23,8 @@
+ #include <asm/desc.h>
+ #include <asm/proto.h>
+
++#include <ub/ub_mem.h>
++
+ #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+ static void flush_ldt(void *null)
+ {
+@@ -42,9 +44,9 @@ static int alloc_ldt(mm_context_t *pc, u
+ oldsize = pc->size;
+ mincount = (mincount+511)&(~511);
+ if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
++ newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
+ else
+- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
++ newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+
+ if (!newldt)
+ return -ENOMEM;
+diff -uprN linux-2.6.15.orig/arch/x86_64/kernel/process.c linux-2.6.15-ve025stab014/arch/x86_64/kernel/process.c
+--- linux-2.6.15.orig/arch/x86_64/kernel/process.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/kernel/process.c 2006-01-27 14:48:07.000000000 +0300
+@@ -812,3 +812,20 @@ unsigned long arch_align_stack(unsigned
+ sp -= get_random_int() % 8192;
+ return sp & ~0xf;
+ }
++
++long do_fork_kthread(unsigned long clone_flags,
++ unsigned long stack_start,
++ struct pt_regs *regs,
++ unsigned long stack_size,
++ int __user *parent_tidptr,
++ int __user *child_tidptr)
++{
++ if (ve_is_super(get_exec_env()))
++ return do_fork(clone_flags, stack_start, regs, stack_size,
++ parent_tidptr, child_tidptr);
++
++ /* Don't allow kernel_thread() inside VE */
++ printk("kernel_thread call inside VE\n");
++ dump_stack();
++ return -EPERM;
++}
+diff -uprN linux-2.6.15.orig/arch/x86_64/kernel/sys_x86_64.c linux-2.6.15-ve025stab014/arch/x86_64/kernel/sys_x86_64.c
+--- linux-2.6.15.orig/arch/x86_64/kernel/sys_x86_64.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/kernel/sys_x86_64.c 2006-01-27 14:48:07.000000000 +0300
+@@ -148,7 +148,7 @@ asmlinkage long sys_uname(struct new_uts
+ {
+ int err;
+ down_read(&uts_sem);
+- err = copy_to_user(name, &system_utsname, sizeof (*name));
++ err = copy_to_user(name, &ve_utsname, sizeof (*name));
+ up_read(&uts_sem);
+ if (personality(current->personality) == PER_LINUX32)
+ err |= copy_to_user(&name->machine, "i686", 5);
+diff -uprN linux-2.6.15.orig/arch/x86_64/kernel/time.c linux-2.6.15-ve025stab014/arch/x86_64/kernel/time.c
+--- linux-2.6.15.orig/arch/x86_64/kernel/time.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/kernel/time.c 2006-01-27 14:48:07.000000000 +0300
+@@ -64,6 +64,8 @@ unsigned long vxtime_hz = PIT_TICK_RATE;
+ int report_lost_ticks; /* command line option */
+ unsigned long long monotonic_base;
+
++EXPORT_SYMBOL(cpu_khz);
++
+ struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
+
+ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+diff -uprN linux-2.6.15.orig/arch/x86_64/kernel/traps.c linux-2.6.15-ve025stab014/arch/x86_64/kernel/traps.c
+--- linux-2.6.15.orig/arch/x86_64/kernel/traps.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/kernel/traps.c 2006-01-27 14:48:07.000000000 +0300
+@@ -279,10 +279,12 @@ void show_registers(struct pt_regs *regs
+
+ rsp = regs->rsp;
+
+- printk("CPU %d ", cpu);
++ printk("CPU: %d ", cpu);
+ __show_regs(regs);
+- printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
+- cur->comm, cur->pid, cur->thread_info, cur);
++ printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
++ cur->comm, cur->pid,
++ VEID(VE_TASK_INFO(current)->owner_env),
++ cur->thread_info, cur);
+
+ /*
+ * When in-kernel, we also print out the stack and code at the
+diff -uprN linux-2.6.15.orig/arch/x86_64/mm/fault.c linux-2.6.15-ve025stab014/arch/x86_64/mm/fault.c
+--- linux-2.6.15.orig/arch/x86_64/mm/fault.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/mm/fault.c 2006-01-27 14:48:07.000000000 +0300
+@@ -318,7 +318,7 @@ asmlinkage void __kprobes do_page_fault(
+ local_irq_enable();
+
+ if (unlikely(page_fault_trace))
+- printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
++ ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+ regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
+
+ tsk = current;
+@@ -364,7 +364,6 @@ asmlinkage void __kprobes do_page_fault(
+ if (unlikely(in_atomic() || !mm))
+ goto bad_area_nosemaphore;
+
+- again:
+ /* When running in the kernel we expect faults to occur only to
+ * addresses in user space. All other faults represent errors in the
+ * kernel and should generate an OOPS. Unfortunatly, in the case of an
+@@ -468,7 +467,7 @@ bad_area_nosemaphore:
+ return;
+
+ if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
+- printk(
++ ve_printk(VE_LOG,
+ "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
+ tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
+ tsk->comm, tsk->pid, address, regs->rip,
+@@ -533,13 +532,14 @@ no_context:
+ */
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (current->pid == 1) {
+- yield();
+- goto again;
+- }
+- printk("VM: killing process %s\n", tsk->comm);
+- if (error_code & 4)
+- do_exit(SIGKILL);
++ if (error_code & 4) {
++ /*
++ * 0-order allocation always success if something really
++ * fatal not happen: beancounter overdraft or OOM.
++ */
++ force_sig(SIGKILL, tsk);
++ return;
++ }
+ goto no_context;
+
+ do_sigbus:
+diff -uprN linux-2.6.15.orig/arch/x86_64/mm/init.c linux-2.6.15-ve025stab014/arch/x86_64/mm/init.c
+--- linux-2.6.15.orig/arch/x86_64/mm/init.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/arch/x86_64/mm/init.c 2006-01-27 14:48:07.000000000 +0300
+@@ -81,6 +81,7 @@ void show_mem(void)
+ printk(KERN_INFO "%lu pages shared\n",shared);
+ printk(KERN_INFO "%lu pages swap cached\n",cached);
+ }
++EXPORT_SYMBOL(show_mem);
+
+ /* References to section boundaries */
+
+diff -uprN linux-2.6.15.orig/block/elevator.c linux-2.6.15-ve025stab014/block/elevator.c
+--- linux-2.6.15.orig/block/elevator.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/block/elevator.c 2006-01-27 14:48:07.000000000 +0300
+@@ -646,7 +646,7 @@ void elv_unregister(struct elevator_type
+ * Iterate every thread in the process to remove the io contexts.
+ */
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ struct io_context *ioc = p->io_context;
+ if (ioc && ioc->cic) {
+ ioc->cic->exit(ioc->cic);
+@@ -658,7 +658,7 @@ void elv_unregister(struct elevator_type
+ ioc->aic->dtor(ioc->aic);
+ ioc->aic = NULL;
+ }
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+ read_unlock(&tasklist_lock);
+
+ spin_lock_irq(&elv_list_lock);
+diff -uprN linux-2.6.15.orig/block/genhd.c linux-2.6.15-ve025stab014/block/genhd.c
+--- linux-2.6.15.orig/block/genhd.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/block/genhd.c 2006-01-27 14:48:07.000000000 +0300
+@@ -18,7 +18,7 @@
+
+ #define MAX_PROBE_HASH 255 /* random */
+
+-static struct subsystem block_subsys;
++struct subsystem block_subsys;
+
+ static DECLARE_MUTEX(block_subsys_sem);
+
+@@ -526,7 +526,8 @@ static struct kset_hotplug_ops block_hot
+ };
+
+ /* declare block_subsys. */
+-static decl_subsys(block, &ktype_block, &block_hotplug_ops);
++decl_subsys(block, &ktype_block, &block_hotplug_ops);
++EXPORT_SYMBOL(block_subsys);
+
+
+ /*
+diff -uprN linux-2.6.15.orig/drivers/base/class.c linux-2.6.15-ve025stab014/drivers/base/class.c
+--- linux-2.6.15.orig/drivers/base/class.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/base/class.c 2006-01-27 14:48:07.000000000 +0300
+@@ -74,6 +74,11 @@ static struct kobj_type ktype_class = {
+ /* Hotplug events for classes go to the class_obj subsys */
+ static decl_subsys(class, &ktype_class, NULL);
+
++#ifndef CONFIG_VE
++#define visible_class_subsys class_subsys
++#else
++#define visible_class_subsys (*get_exec_env()->class_subsys)
++#endif
+
+ int class_create_file(struct class * cls, const struct class_attribute * attr)
+ {
+@@ -148,7 +153,7 @@ int class_register(struct class * cls)
+ if (error)
+ return error;
+
+- subsys_set_kset(cls, class_subsys);
++ subsys_set_kset(cls, visible_class_subsys);
+
+ error = subsystem_register(&cls->subsys);
+ if (!error) {
+@@ -422,6 +427,11 @@ static struct kset_hotplug_ops class_hot
+
+ static decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops);
+
++#ifndef CONFIG_VE
++#define visible_class_obj_subsys class_obj_subsys
++#else
++#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys)
++#endif
+
+ static int class_device_add_attrs(struct class_device * cd)
+ {
+@@ -470,7 +480,7 @@ static ssize_t store_uevent(struct class
+
+ void class_device_initialize(struct class_device *class_dev)
+ {
+- kobj_set_kset_s(class_dev, class_obj_subsys);
++ kobj_set_kset_s(class_dev, visible_class_obj_subsys);
+ kobject_init(&class_dev->kobj);
+ INIT_LIST_HEAD(&class_dev->node);
+ }
+@@ -805,12 +815,19 @@ void class_interface_unregister(struct c
+ class_put(parent);
+ }
+
+-
++void prepare_sysfs_classes(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->class_subsys = &class_subsys;
++ get_ve0()->class_obj_subsys = &class_obj_subsys;
++#endif
++}
+
+ int __init classes_init(void)
+ {
+ int retval;
+
++ prepare_sysfs_classes();
+ retval = subsystem_register(&class_subsys);
+ if (retval)
+ return retval;
+@@ -848,3 +865,6 @@ EXPORT_SYMBOL_GPL(class_device_remove_bi
+
+ EXPORT_SYMBOL_GPL(class_interface_register);
+ EXPORT_SYMBOL_GPL(class_interface_unregister);
++
++EXPORT_SYMBOL(class_subsys);
++EXPORT_SYMBOL(class_obj_subsys);
+diff -uprN linux-2.6.15.orig/drivers/char/pty.c linux-2.6.15-ve025stab014/drivers/char/pty.c
+--- linux-2.6.15.orig/drivers/char/pty.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/char/pty.c 2006-01-27 14:48:07.000000000 +0300
+@@ -32,16 +32,30 @@
+ #include <linux/bitops.h>
+ #include <linux/devpts_fs.h>
+
++#include <ub/ub_misc.h>
++
+ /* These are global because they are accessed in tty_io.c */
+ #ifdef CONFIG_UNIX98_PTYS
+ struct tty_driver *ptm_driver;
+-static struct tty_driver *pts_driver;
++struct tty_driver *pts_driver;
++EXPORT_SYMBOL(ptm_driver);
++EXPORT_SYMBOL(pts_driver);
++
++void prepare_pty(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->ptm_driver = ptm_driver;
++ /* don't clean ptm_driver and co. here, they are used in vecalls.c */
++#endif
++}
+ #endif
+
+ static void pty_close(struct tty_struct * tty, struct file * filp)
+ {
+ if (!tty)
+ return;
++
++ ub_pty_uncharge(tty);
+ if (tty->driver->subtype == PTY_TYPE_MASTER) {
+ if (tty->count > 1)
+ printk("master pty_close: count = %d!!\n", tty->count);
+@@ -61,8 +75,12 @@ static void pty_close(struct tty_struct
+ if (tty->driver->subtype == PTY_TYPE_MASTER) {
+ set_bit(TTY_OTHER_CLOSED, &tty->flags);
+ #ifdef CONFIG_UNIX98_PTYS
+- if (tty->driver == ptm_driver)
++ if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) {
++ struct ve_struct *old_env;
++ old_env = set_exec_env(VE_OWNER_TTY(tty));
+ devpts_pty_kill(tty->index);
++ (void)set_exec_env(old_env);
++ }
+ #endif
+ tty_vhangup(tty->link);
+ }
+@@ -212,6 +230,10 @@ static int pty_open(struct tty_struct *t
+ if (tty->link->count != 1)
+ goto out;
+
++ retval = -ENODEV;
++ if (ub_pty_charge(tty))
++ goto out;
++
+ clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
+ set_bit(TTY_THROTTLED, &tty->flags);
+ set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+@@ -239,7 +261,9 @@ static struct tty_operations pty_ops = {
+
+ /* Traditional BSD devices */
+ #ifdef CONFIG_LEGACY_PTYS
+-static struct tty_driver *pty_driver, *pty_slave_driver;
++struct tty_driver *pty_driver, *pty_slave_driver;
++EXPORT_SYMBOL(pty_driver);
++EXPORT_SYMBOL(pty_slave_driver);
+
+ static int pty_bsd_ioctl(struct tty_struct *tty, struct file *file,
+ unsigned int cmd, unsigned long arg)
+@@ -397,6 +421,7 @@ static void __init unix98_pty_init(void)
+ panic("Couldn't register Unix98 pts driver");
+
+ pty_table[1].data = &ptm_driver->refcount;
++ prepare_pty();
+ }
+ #else
+ static inline void unix98_pty_init(void) { }
+diff -uprN linux-2.6.15.orig/drivers/char/snsc_event.c linux-2.6.15-ve025stab014/drivers/char/snsc_event.c
+--- linux-2.6.15.orig/drivers/char/snsc_event.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/char/snsc_event.c 2006-01-27 14:48:07.000000000 +0300
+@@ -206,7 +206,7 @@ scdrv_dispatch_event(char *event, int le
+
+ /* first find init's task */
+ read_lock(&tasklist_lock);
+- for_each_process(p) {
++ for_each_process_all(p) {
+ if (p->pid == 1)
+ break;
+ }
+diff -uprN linux-2.6.15.orig/drivers/char/sysrq.c linux-2.6.15-ve025stab014/drivers/char/sysrq.c
+--- linux-2.6.15.orig/drivers/char/sysrq.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/char/sysrq.c 2006-01-27 14:48:07.000000000 +0300
+@@ -206,7 +206,7 @@ static void send_sig_all(int sig)
+ {
+ struct task_struct *p;
+
+- for_each_process(p) {
++ for_each_process_all(p) {
+ if (p->mm && p->pid != 1)
+ /* Not swapper, init nor kernel thread */
+ force_sig(sig, p);
+diff -uprN linux-2.6.15.orig/drivers/char/tty_io.c linux-2.6.15-ve025stab014/drivers/char/tty_io.c
+--- linux-2.6.15.orig/drivers/char/tty_io.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/char/tty_io.c 2006-01-27 14:48:07.000000000 +0300
+@@ -86,6 +86,7 @@
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
++#include <linux/ve_owner.h>
+ #include <linux/proc_fs.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+@@ -105,6 +106,7 @@
+ #include <linux/devfs_fs_kernel.h>
+
+ #include <linux/kmod.h>
++#include <ub/ub_mem.h>
+
+ #undef TTY_DEBUG_HANGUP
+
+@@ -122,11 +124,16 @@ struct termios tty_std_termios = { /* fo
+
+ EXPORT_SYMBOL(tty_std_termios);
+
++/* this lock protects tty_drivers list, this pretty guys do no locking */
++rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED;
++EXPORT_SYMBOL(tty_driver_guard);
++
+ /* This list gets poked at by procfs and various bits of boot up code. This
+ could do with some rationalisation such as pulling the tty proc function
+ into this file */
+
+ LIST_HEAD(tty_drivers); /* linked list of tty drivers */
++EXPORT_SYMBOL(tty_drivers);
+
+ /* Semaphore to protect creating and releasing a tty. This is shared with
+ vt.c for deeply disgusting hack reasons */
+@@ -136,6 +143,15 @@ DECLARE_MUTEX(tty_sem);
+ extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */
+ extern int pty_limit; /* Config limit on Unix98 ptys */
+ static DEFINE_IDR(allocated_ptys);
++#ifdef CONFIG_VE
++#define __ve_allocated_ptys(ve) (*((ve)->allocated_ptys))
++#define ve_allocated_ptys __ve_allocated_ptys(get_exec_env())
++#define ve_ptm_driver (get_exec_env()->ptm_driver)
++#else
++#define __ve_allocated_ptys(ve) allocated_ptys
++#define ve_allocated_ptys allocated_ptys
++#define ve_ptm_driver ptm_driver
++#endif
+ static DECLARE_MUTEX(allocated_ptys_lock);
+ static int ptmx_open(struct inode *, struct file *);
+ #endif
+@@ -156,11 +172,25 @@ static int tty_fasync(int fd, struct fil
+ static void release_mem(struct tty_struct *tty, int idx);
+
+
++DCL_VE_OWNER(TTYDRV, struct tty_driver, owner_env)
++DCL_VE_OWNER(TTY, struct tty_struct, owner_env)
++
++void prepare_tty(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->allocated_ptys = &allocated_ptys;
++ /*
++ * in this case, tty_register_driver() setups
++ * owner_env correctly right from the bootup
++ */
++#endif
++}
++
+ static struct tty_struct *alloc_tty_struct(void)
+ {
+ struct tty_struct *tty;
+
+- tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
++ tty = ub_kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+ if (tty)
+ memset(tty, 0, sizeof(struct tty_struct));
+ return tty;
+@@ -627,14 +657,37 @@ static struct tty_driver *get_tty_driver
+ {
+ struct tty_driver *p;
+
++ read_lock(&tty_driver_guard);
+ list_for_each_entry(p, &tty_drivers, tty_drivers) {
+ dev_t base = MKDEV(p->major, p->minor_start);
+ if (device < base || device >= base + p->num)
+ continue;
+ *index = device - base;
+- return p;
++#ifdef CONFIG_VE
++ if (in_interrupt())
++ goto found;
++ if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR
++#ifdef CONFIG_UNIX98_PTYS
++ && (p->major<UNIX98_PTY_MASTER_MAJOR ||
++ p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) &&
++ (p->major<UNIX98_PTY_SLAVE_MAJOR ||
++ p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1)
++#endif
++ ) goto found;
++ if (ve_is_super(VE_OWNER_TTYDRV(p)) &&
++ ve_is_super(get_exec_env()))
++ goto found;
++ if (!ve_accessible_strict(VE_OWNER_TTYDRV(p), get_exec_env()))
++ continue;
++#endif
++ goto found;
+ }
++ read_unlock(&tty_driver_guard);
+ return NULL;
++
++found:
++ read_unlock(&tty_driver_guard);
++ return p;
+ }
+
+ /*
+@@ -862,7 +915,7 @@ static void do_tty_hangup(void *data)
+
+ read_lock(&tasklist_lock);
+ if (tty->session > 0) {
+- do_each_task_pid(tty->session, PIDTYPE_SID, p) {
++ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ if (p->signal->tty == tty)
+ p->signal->tty = NULL;
+ if (!p->signal->leader)
+@@ -871,7 +924,7 @@ static void do_tty_hangup(void *data)
+ send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ if (tty->pgrp > 0)
+ p->signal->tty_old_pgrp = tty->pgrp;
+- } while_each_task_pid(tty->session, PIDTYPE_SID, p);
++ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ }
+ read_unlock(&tasklist_lock);
+
+@@ -988,9 +1041,9 @@ void disassociate_ctty(int on_exit)
+
+ /* Now clear signal->tty under the lock */
+ read_lock(&tasklist_lock);
+- do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
++ do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) {
+ p->signal->tty = NULL;
+- } while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
++ } while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p);
+ read_unlock(&tasklist_lock);
+ up(&tty_sem);
+ unlock_kernel();
+@@ -1216,21 +1269,28 @@ static inline void tty_line_name(struct
+ * really quite straightforward. The semaphore locking can probably be
+ * relaxed for the (most common) case of reopening a tty.
+ */
+-static int init_dev(struct tty_driver *driver, int idx,
+- struct tty_struct **ret_tty)
++static int init_dev(struct tty_driver *driver, int idx,
++ struct tty_struct *i_tty, struct tty_struct **ret_tty)
+ {
+ struct tty_struct *tty, *o_tty;
+ struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
+ struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
++ struct ve_struct * owner;
+ int retval=0;
+
+- /* check whether we're reopening an existing tty */
+- if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+- tty = devpts_get_tty(idx);
+- if (tty && driver->subtype == PTY_TYPE_MASTER)
+- tty = tty->link;
+- } else {
+- tty = driver->ttys[idx];
++ owner = VE_OWNER_TTYDRV(driver);
++
++ if (i_tty)
++ tty = i_tty;
++ else {
++ /* check whether we're reopening an existing tty */
++ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
++ tty = devpts_get_tty(idx);
++ if (tty && driver->subtype == PTY_TYPE_MASTER)
++ tty = tty->link;
++ } else {
++ tty = driver->ttys[idx];
++ }
+ }
+ if (tty) goto fast_track;
+
+@@ -1258,6 +1318,7 @@ static int init_dev(struct tty_driver *d
+ tty->driver = driver;
+ tty->index = idx;
+ tty_line_name(driver, idx, tty->name);
++ SET_VE_OWNER_TTY(tty, owner);
+
+ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+ tp_loc = &tty->termios;
+@@ -1268,7 +1329,7 @@ static int init_dev(struct tty_driver *d
+ }
+
+ if (!*tp_loc) {
+- tp = (struct termios *) kmalloc(sizeof(struct termios),
++ tp = (struct termios *) ub_kmalloc(sizeof(struct termios),
+ GFP_KERNEL);
+ if (!tp)
+ goto free_mem_out;
+@@ -1276,7 +1337,7 @@ static int init_dev(struct tty_driver *d
+ }
+
+ if (!*ltp_loc) {
+- ltp = (struct termios *) kmalloc(sizeof(struct termios),
++ ltp = (struct termios *) ub_kmalloc(sizeof(struct termios),
+ GFP_KERNEL);
+ if (!ltp)
+ goto free_mem_out;
+@@ -1291,6 +1352,7 @@ static int init_dev(struct tty_driver *d
+ o_tty->driver = driver->other;
+ o_tty->index = idx;
+ tty_line_name(driver->other, idx, o_tty->name);
++ SET_VE_OWNER_TTY(o_tty, owner);
+
+ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+ o_tp_loc = &o_tty->termios;
+@@ -1302,7 +1364,7 @@ static int init_dev(struct tty_driver *d
+
+ if (!*o_tp_loc) {
+ o_tp = (struct termios *)
+- kmalloc(sizeof(struct termios), GFP_KERNEL);
++ ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
+ if (!o_tp)
+ goto free_mem_out;
+ *o_tp = driver->other->init_termios;
+@@ -1310,7 +1372,7 @@ static int init_dev(struct tty_driver *d
+
+ if (!*o_ltp_loc) {
+ o_ltp = (struct termios *)
+- kmalloc(sizeof(struct termios), GFP_KERNEL);
++ ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
+ if (!o_ltp)
+ goto free_mem_out;
+ memset(o_ltp, 0, sizeof(struct termios));
+@@ -1328,6 +1390,10 @@ static int init_dev(struct tty_driver *d
+ *o_ltp_loc = o_ltp;
+ o_tty->termios = *o_tp_loc;
+ o_tty->termios_locked = *o_ltp_loc;
++#ifdef CONFIG_VE
++ if (driver->other->refcount == 0)
++ (void)get_ve(owner);
++#endif
+ driver->other->refcount++;
+ if (driver->subtype == PTY_TYPE_MASTER)
+ o_tty->count++;
+@@ -1352,6 +1418,10 @@ static int init_dev(struct tty_driver *d
+ *ltp_loc = ltp;
+ tty->termios = *tp_loc;
+ tty->termios_locked = *ltp_loc;
++#ifdef CONFIG_VE
++ if (driver->refcount == 0)
++ (void)get_ve(owner);
++#endif
+ driver->refcount++;
+ tty->count++;
+
+@@ -1462,6 +1532,10 @@ static void release_mem(struct tty_struc
+ }
+ o_tty->magic = 0;
+ o_tty->driver->refcount--;
++#ifdef CONFIG_VE
++ if (o_tty->driver->refcount == 0)
++ put_ve(VE_OWNER_TTY(o_tty));
++#endif
+ file_list_lock();
+ list_del_init(&o_tty->tty_files);
+ file_list_unlock();
+@@ -1484,6 +1558,10 @@ static void release_mem(struct tty_struc
+
+ tty->magic = 0;
+ tty->driver->refcount--;
++#ifdef CONFIG_VE
++ if (tty->driver->refcount == 0)
++ put_ve(VE_OWNER_TTY(tty));
++#endif
+ file_list_lock();
+ list_del_init(&tty->tty_files);
+ file_list_unlock();
+@@ -1507,7 +1585,10 @@ static void release_dev(struct file * fi
+ int idx;
+ char buf[64];
+ unsigned long flags;
+-
++#ifdef CONFIG_UNIX98_PTYS
++ struct idr *idr_alloced;
++#endif
++
+ tty = (struct tty_struct *)filp->private_data;
+ if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev"))
+ return;
+@@ -1522,6 +1603,9 @@ static void release_dev(struct file * fi
+ devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
+ devpts_master = pty_master && devpts;
+ o_tty = tty->link;
++#ifdef CONFIG_UNIX98_PTYS
++ idr_alloced = &__ve_allocated_ptys(tty->owner_env);
++#endif
+
+ #ifdef TTY_PARANOIA_CHECK
+ if (idx < 0 || idx >= tty->driver->num) {
+@@ -1697,13 +1781,13 @@ static void release_dev(struct file * fi
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+- do_each_task_pid(tty->session, PIDTYPE_SID, p) {
++ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ p->signal->tty = NULL;
+- } while_each_task_pid(tty->session, PIDTYPE_SID, p);
++ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ if (o_tty)
+- do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
++ do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) {
+ p->signal->tty = NULL;
+- } while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
++ } while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p);
+ read_unlock(&tasklist_lock);
+ }
+
+@@ -1776,7 +1860,7 @@ static void release_dev(struct file * fi
+ /* Make this pty number available for reallocation */
+ if (devpts) {
+ down(&allocated_ptys_lock);
+- idr_remove(&allocated_ptys, idx);
++ idr_remove(idr_alloced, idx);
+ up(&allocated_ptys_lock);
+ }
+ #endif
+@@ -1797,7 +1881,7 @@ static void release_dev(struct file * fi
+ */
+ static int tty_open(struct inode * inode, struct file * filp)
+ {
+- struct tty_struct *tty;
++ struct tty_struct *tty, *c_tty;
+ int noctty, retval;
+ struct tty_driver *driver;
+ int index;
+@@ -1810,6 +1894,7 @@ retry_open:
+ noctty = filp->f_flags & O_NOCTTY;
+ index = -1;
+ retval = 0;
++ c_tty = NULL;
+
+ down(&tty_sem);
+
+@@ -1820,6 +1905,7 @@ retry_open:
+ }
+ driver = current->signal->tty->driver;
+ index = current->signal->tty->index;
++ c_tty = current->signal->tty;
+ filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+ /* noctty = 1; */
+ goto got_driver;
+@@ -1827,6 +1913,12 @@ retry_open:
+ #ifdef CONFIG_VT
+ if (device == MKDEV(TTY_MAJOR,0)) {
+ extern struct tty_driver *console_driver;
++#ifdef CONFIG_VE
++ if (!ve_is_super(get_exec_env())) {
++ up(&tty_sem);
++ return -ENODEV;
++ }
++#endif
+ driver = console_driver;
+ index = fg_console;
+ noctty = 1;
+@@ -1834,6 +1926,12 @@ retry_open:
+ }
+ #endif
+ if (device == MKDEV(TTYAUX_MAJOR,1)) {
++#ifdef CONFIG_VE
++ if (!ve_is_super(get_exec_env())) {
++ up(&tty_sem);
++ return -ENODEV;
++ }
++#endif
+ driver = console_device(&index);
+ if (driver) {
+ /* Don't let /dev/console block */
+@@ -1851,7 +1949,7 @@ retry_open:
+ return -ENODEV;
+ }
+ got_driver:
+- retval = init_dev(driver, index, &tty);
++ retval = init_dev(driver, index, c_tty, &tty);
+ up(&tty_sem);
+ if (retval)
+ return retval;
+@@ -1920,11 +2018,11 @@ static int ptmx_open(struct inode * inod
+
+ /* find a device that is not in use. */
+ down(&allocated_ptys_lock);
+- if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
++ if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) {
+ up(&allocated_ptys_lock);
+ return -ENOMEM;
+ }
+- idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
++ idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index);
+ if (idr_ret < 0) {
+ up(&allocated_ptys_lock);
+ if (idr_ret == -EAGAIN)
+@@ -1932,14 +2030,14 @@ static int ptmx_open(struct inode * inod
+ return -EIO;
+ }
+ if (index >= pty_limit) {
+- idr_remove(&allocated_ptys, index);
++ idr_remove(&ve_allocated_ptys, index);
+ up(&allocated_ptys_lock);
+ return -EIO;
+ }
+ up(&allocated_ptys_lock);
+
+ down(&tty_sem);
+- retval = init_dev(ptm_driver, index, &tty);
++ retval = init_dev(ve_ptm_driver, index, NULL, &tty);
+ up(&tty_sem);
+
+ if (retval)
+@@ -1954,14 +2052,14 @@ static int ptmx_open(struct inode * inod
+ goto out1;
+
+ check_tty_count(tty, "tty_open");
+- retval = ptm_driver->open(tty, filp);
++ retval = ve_ptm_driver->open(tty, filp);
+ if (!retval)
+ return 0;
+ out1:
+ release_dev(filp);
+ out:
+ down(&allocated_ptys_lock);
+- idr_remove(&allocated_ptys, index);
++ idr_remove(&ve_allocated_ptys, index);
+ up(&allocated_ptys_lock);
+ return retval;
+ }
+@@ -2074,6 +2172,8 @@ static int tioccons(struct file *file)
+ {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
+ if (file->f_op->write == redirected_tty_write) {
+ struct file *f;
+ spin_lock(&redirect_lock);
+@@ -2134,9 +2234,9 @@ static int tiocsctty(struct tty_struct *
+ */
+
+ read_lock(&tasklist_lock);
+- do_each_task_pid(tty->session, PIDTYPE_SID, p) {
++ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ p->signal->tty = NULL;
+- } while_each_task_pid(tty->session, PIDTYPE_SID, p);
++ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ read_unlock(&tasklist_lock);
+ } else
+ return -EPERM;
+@@ -2158,7 +2258,7 @@ static int tiocgpgrp(struct tty_struct *
+ */
+ if (tty == real_tty && current->signal->tty != real_tty)
+ return -ENOTTY;
+- return put_user(real_tty->pgrp, p);
++ return put_user(pid_type_to_vpid(PIDTYPE_PGID, real_tty->pgrp), p);
+ }
+
+ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+@@ -2178,6 +2278,9 @@ static int tiocspgrp(struct tty_struct *
+ return -EFAULT;
+ if (pgrp < 0)
+ return -EINVAL;
++ pgrp = vpid_to_pid(pgrp);
++ if (pgrp < 0)
++ return -EPERM;
+ if (session_of_pgrp(pgrp) != current->signal->session)
+ return -EPERM;
+ real_tty->pgrp = pgrp;
+@@ -2194,7 +2297,7 @@ static int tiocgsid(struct tty_struct *t
+ return -ENOTTY;
+ if (real_tty->session <= 0)
+ return -ENOTTY;
+- return put_user(real_tty->session, p);
++ return put_user(pid_type_to_vpid(PIDTYPE_SID, real_tty->session), p);
+ }
+
+ static int tiocsetd(struct tty_struct *tty, int __user *p)
+@@ -2467,7 +2570,7 @@ static void __do_SAK(void *arg)
+ tty->driver->flush_buffer(tty);
+
+ read_lock(&tasklist_lock);
+- do_each_task_pid(session, PIDTYPE_SID, p) {
++ do_each_task_pid_all(session, PIDTYPE_SID, p) {
+ if (p->signal->tty == tty || session > 0) {
+ printk(KERN_NOTICE "SAK: killed process %d"
+ " (%s): p->signal->session==tty->session\n",
+@@ -2495,7 +2598,7 @@ static void __do_SAK(void *arg)
+ rcu_read_unlock();
+ }
+ task_unlock(p);
+- } while_each_task_pid(session, PIDTYPE_SID, p);
++ } while_each_task_pid_all(session, PIDTYPE_SID, p);
+ read_unlock(&tasklist_lock);
+ #endif
+ }
+@@ -2857,8 +2960,11 @@ int tty_register_driver(struct tty_drive
+
+ if (!driver->put_char)
+ driver->put_char = tty_default_put_char;
+-
++
++ SET_VE_OWNER_TTYDRV(driver, get_exec_env());
++ write_lock_irq(&tty_driver_guard);
+ list_add(&driver->tty_drivers, &tty_drivers);
++ write_unlock_irq(&tty_driver_guard);
+
+ if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
+ for(i = 0; i < driver->num; i++)
+@@ -2885,7 +2991,9 @@ int tty_unregister_driver(struct tty_dri
+ unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
+ driver->num);
+
++ write_lock_irq(&tty_driver_guard);
+ list_del(&driver->tty_drivers);
++ write_unlock_irq(&tty_driver_guard);
+
+ /*
+ * Free the termios and termios_locked structures because
+@@ -3008,6 +3116,7 @@ static int __init tty_init(void)
+
+ vty_init();
+ #endif
++ prepare_tty();
+ return 0;
+ }
+ module_init(tty_init);
+diff -uprN linux-2.6.15.orig/drivers/net/Makefile linux-2.6.15-ve025stab014/drivers/net/Makefile
+--- linux-2.6.15.orig/drivers/net/Makefile 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/net/Makefile 2006-01-27 14:48:08.000000000 +0300
+@@ -15,6 +15,9 @@ obj-$(CONFIG_GIANFAR) += gianfar_driver.
+
+ gianfar_driver-objs := gianfar.o gianfar_ethtool.o gianfar_mii.o
+
++obj-$(CONFIG_VE_NETDEV) += vznetdev.o
++vznetdev-objs := open_vznet.o venet_core.o
++
+ #
+ # link order important here
+ #
+diff -uprN linux-2.6.15.orig/drivers/net/loopback.c linux-2.6.15-ve025stab014/drivers/net/loopback.c
+--- linux-2.6.15.orig/drivers/net/loopback.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/net/loopback.c 2006-01-27 14:48:07.000000000 +0300
+@@ -198,6 +198,30 @@ static struct ethtool_ops loopback_ethto
+ .set_tso = ethtool_op_set_tso,
+ };
+
++static void loopback_destructor(struct net_device *dev)
++{
++ kfree(dev->priv);
++ dev->priv = NULL;
++}
++
++struct net_device templ_loopback_dev = {
++ .name = "lo",
++ .mtu = (16 * 1024) + 20 + 20 + 12,
++ .hard_start_xmit = loopback_xmit,
++ .hard_header = eth_header,
++ .hard_header_cache = eth_header_cache,
++ .header_cache_update = eth_header_cache_update,
++ .hard_header_len = ETH_HLEN, /* 14 */
++ .addr_len = ETH_ALEN, /* 6 */
++ .tx_queue_len = 0,
++ .type = ARPHRD_LOOPBACK, /* 0x0001*/
++ .rebuild_header = eth_rebuild_header,
++ .flags = IFF_LOOPBACK,
++ .features = NETIF_F_SG|NETIF_F_FRAGLIST
++ |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
++ |NETIF_F_LLTX,
++};
++
+ struct net_device loopback_dev = {
+ .name = "lo",
+ .mtu = (16 * 1024) + 20 + 20 + 12,
+@@ -231,9 +255,11 @@ int __init loopback_init(void)
+ memset(stats, 0, sizeof(struct net_device_stats));
+ loopback_dev.priv = stats;
+ loopback_dev.get_stats = &get_stats;
++ loopback_dev.destructor = &loopback_destructor;
+ }
+
+ return register_netdev(&loopback_dev);
+ };
+
+ EXPORT_SYMBOL(loopback_dev);
++EXPORT_SYMBOL(templ_loopback_dev);
+diff -uprN linux-2.6.15.orig/drivers/net/open_vznet.c linux-2.6.15-ve025stab014/drivers/net/open_vznet.c
+--- linux-2.6.15.orig/drivers/net/open_vznet.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/net/open_vznet.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,190 @@
++/*
++ * open_vznet.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * Virtual Networking device used to change VE ownership on packets
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/seq_file.h>
++
++#include <linux/inet.h>
++#include <net/ip.h>
++#include <linux/skbuff.h>
++#include <linux/venet.h>
++
++void veip_stop(struct ve_struct *ve)
++{
++ struct list_head *p, *tmp;
++
++ write_lock_irq(&veip_hash_lock);
++ if (ve->veip == NULL)
++ goto unlock;
++ list_for_each_safe(p, tmp, &ve->veip->ip_lh) {
++ struct ip_entry_struct *ptr;
++ ptr = list_entry(p, struct ip_entry_struct, ve_list);
++ ptr->active_env = NULL;
++ list_del(&ptr->ve_list);
++ list_del(&ptr->ip_hash);
++ kfree(ptr);
++ }
++ veip_put(ve->veip);
++ ve->veip = NULL;
++unlock:
++ write_unlock_irq(&veip_hash_lock);
++}
++
++int veip_start(struct ve_struct *ve)
++{
++ int err;
++
++ err = 0;
++ write_lock_irq(&veip_hash_lock);
++ ve->veip = veip_findcreate(ve->veid);
++ if (ve->veip == NULL)
++ err = -ENOMEM;
++ write_unlock_irq(&veip_hash_lock);
++ return err;
++}
++
++int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr)
++{
++ struct ip_entry_struct *entry, *found;
++ int err;
++
++ entry = kmalloc(sizeof(struct ip_entry_struct), GFP_KERNEL);
++ if (entry == NULL)
++ return -ENOMEM;
++
++ memset(entry, 0, sizeof(struct ip_entry_struct));
++ entry->ip = addr->sin_addr.s_addr;
++
++ write_lock_irq(&veip_hash_lock);
++ err = -EADDRINUSE;
++ found = ip_entry_lookup(entry->ip);
++ if (found != NULL)
++ goto out_unlock;
++ else {
++ ip_entry_hash(entry, ve->veip);
++ found = entry;
++ entry = NULL;
++ }
++ err = 0;
++ found->active_env = ve;
++out_unlock:
++ write_unlock_irq(&veip_hash_lock);
++ if (entry != NULL)
++ kfree(entry);
++ return err;
++}
++
++int veip_entry_del(envid_t veid, struct sockaddr_in *addr)
++{
++ struct ip_entry_struct *found;
++ int err;
++
++ err = -EADDRNOTAVAIL;
++ write_lock_irq(&veip_hash_lock);
++ found = ip_entry_lookup(addr->sin_addr.s_addr);
++ if (found == NULL)
++ goto out;
++ if (found->active_env->veid != veid)
++ goto out;
++
++ err = 0;
++ found->active_env = NULL;
++
++ list_del(&found->ip_hash);
++ list_del(&found->ve_list);
++ kfree(found);
++out:
++ write_unlock_irq(&veip_hash_lock);
++ return err;
++}
++
++static struct ve_struct *venet_find_ve(__u32 ip)
++{
++ struct ip_entry_struct *entry;
++
++ entry = ip_entry_lookup(ip);
++ if (entry == NULL)
++ return NULL;
++
++ return entry->active_env;
++}
++
++int venet_change_skb_owner(struct sk_buff *skb)
++{
++ struct ve_struct *ve, *ve_old;
++ struct iphdr *iph;
++
++ ve_old = skb->owner_env;
++ iph = skb->nh.iph;
++
++ read_lock(&veip_hash_lock);
++ if (!ve_is_super(ve_old)) {
++ /* from VE to host */
++ ve = venet_find_ve(iph->saddr);
++ if (ve == NULL)
++ goto out_drop;
++ if (!ve_accessible_strict(ve, ve_old))
++ goto out_source;
++ skb->owner_env = get_ve0();
++ } else {
++ /* from host to VE */
++ ve = venet_find_ve(iph->daddr);
++ if (ve == NULL)
++ goto out_drop;
++ skb->owner_env = ve;
++ }
++ read_unlock(&veip_hash_lock);
++
++ return 0;
++
++out_drop:
++ read_unlock(&veip_hash_lock);
++ return -ESRCH;
++
++out_source:
++ read_unlock(&veip_hash_lock);
++ if (net_ratelimit()) {
++ printk(KERN_WARNING "Dropped packet, source wrong "
++ "veid=%u src-IP=%u.%u.%u.%u "
++ "dst-IP=%u.%u.%u.%u\n",
++ skb->owner_env->veid,
++ NIPQUAD(skb->nh.iph->saddr),
++ NIPQUAD(skb->nh.iph->daddr));
++ }
++ return -EACCES;
++}
++
++#ifdef CONFIG_PROC_FS
++int veip_seq_show(struct seq_file *m, void *v)
++{
++ struct list_head *p;
++ struct ip_entry_struct *entry;
++ char s[16];
++
++ p = (struct list_head *)v;
++ if (p == ip_entry_hash_table) {
++ seq_puts(m, "Version: 2.5\n");
++ return 0;
++ }
++ entry = list_entry(p, struct ip_entry_struct, ip_hash);
++ sprintf(s, "%u.%u.%u.%u", NIPQUAD(entry->ip));
++ seq_printf(m, "%15s %10u\n", s, 0);
++ return 0;
++}
++#endif
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Virtual Network Device");
++MODULE_LICENSE("GPL v2");
+diff -uprN linux-2.6.15.orig/drivers/net/tun.c linux-2.6.15-ve025stab014/drivers/net/tun.c
+--- linux-2.6.15.orig/drivers/net/tun.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/net/tun.c 2006-01-27 14:48:08.000000000 +0300
+@@ -62,6 +62,7 @@
+
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <ub/beancounter.h>
+
+ #ifdef TUN_DEBUG
+ static int debug;
+@@ -90,6 +91,7 @@ static int tun_net_close(struct net_devi
+ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct tun_struct *tun = netdev_priv(dev);
++ struct user_beancounter *ub;
+
+ DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
+
+@@ -114,6 +116,18 @@ static int tun_net_xmit(struct sk_buff *
+ }
+ }
+
++ ub = netdev_bc(dev)->exec_ub;
++ if (ub && (skb_bc(skb)->charged == 0)) {
++ unsigned long charge;
++ charge = skb_charge_fullsize(skb);
++ if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1))
++ goto drop;
++ get_beancounter(ub);
++ skb_bc(skb)->ub = ub;
++ skb_bc(skb)->charged = charge;
++ skb_bc(skb)->resource = UB_OTHERSOCKBUF;
++ }
++
+ /* Queue packet */
+ skb_queue_tail(&tun->readq, skb);
+ dev->trans_start = jiffies;
+@@ -407,12 +421,15 @@ static ssize_t tun_chr_readv(struct file
+ tun->dev->name, addr[0], addr[1], addr[2],
+ addr[3], addr[4], addr[5]);
+ ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
++
++ /* skb will be uncharged in kfree_skb() */
+ kfree_skb(skb);
+ break;
+ } else {
+ DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x\n",
+ tun->dev->name, addr[0], addr[1], addr[2],
+ addr[3], addr[4], addr[5]);
++ /* skb will be uncharged in kfree_skb() */
+ kfree_skb(skb);
+ continue;
+ }
+@@ -476,7 +493,8 @@ static int tun_set_iff(struct file *file
+
+ /* Check permissions */
+ if (tun->owner != -1 &&
+- current->euid != tun->owner && !capable(CAP_NET_ADMIN))
++ current->euid != tun->owner &&
++ !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
+ return -EPERM;
+ }
+ else if (__dev_get_by_name(ifr->ifr_name))
+diff -uprN linux-2.6.15.orig/drivers/net/venet_core.c linux-2.6.15-ve025stab014/drivers/net/venet_core.c
+--- linux-2.6.15.orig/drivers/net/venet_core.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/net/venet_core.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,625 @@
++/*
++ * venet_core.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * Common part for Virtuozzo virtual network devices
++ */
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <linux/fs.h>
++#include <linux/types.h>
++#include <linux/string.h>
++#include <linux/socket.h>
++#include <linux/errno.h>
++#include <linux/fcntl.h>
++#include <linux/in.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/tcp.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++
++#include <asm/system.h>
++#include <asm/uaccess.h>
++#include <asm/io.h>
++#include <asm/unistd.h>
++
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <net/ip.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <linux/if_ether.h> /* For the statistics structure. */
++#include <linux/if_arp.h> /* For ARPHRD_ETHER */
++#include <linux/venet.h>
++#include <linux/ve_proto.h>
++#include <linux/vzctl.h>
++#include <linux/vzctl_venet.h>
++
++struct list_head ip_entry_hash_table[VEIP_HASH_SZ];
++rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED;
++LIST_HEAD(veip_lh);
++
++#define ip_entry_hash_function(ip) (ntohl(ip) & (VEIP_HASH_SZ - 1))
++
++void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip)
++{
++ list_add(&entry->ip_hash,
++ ip_entry_hash_table + ip_entry_hash_function(entry->ip));
++ list_add(&entry->ve_list, &veip->ip_lh);
++}
++
++void veip_put(struct veip_struct *veip)
++{
++ if (!list_empty(&veip->ip_lh))
++ return;
++ if (!list_empty(&veip->src_lh))
++ return;
++ if (!list_empty(&veip->dst_lh))
++ return;
++
++ list_del(&veip->list);
++ kfree(veip);
++}
++
++struct ip_entry_struct *ip_entry_lookup(u32 addr)
++{
++ struct ip_entry_struct *entry;
++ struct list_head *tmp;
++
++ list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr)) {
++ entry = list_entry(tmp, struct ip_entry_struct, ip_hash);
++ if (entry->ip != addr)
++ continue;
++ return entry;
++ }
++ return NULL;
++}
++
++struct veip_struct *veip_find(envid_t veid)
++{
++ struct veip_struct *ptr;
++ list_for_each_entry(ptr, &veip_lh, list) {
++ if (ptr->veid != veid)
++ continue;
++ return ptr;
++ }
++ return NULL;
++}
++
++struct veip_struct *veip_findcreate(envid_t veid)
++{
++ struct veip_struct *ptr;
++
++ ptr = veip_find(veid);
++ if (ptr != NULL)
++ return ptr;
++
++ ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC);
++ if (ptr == NULL)
++ return NULL;
++ memset(ptr, 0, sizeof(struct veip_struct));
++ INIT_LIST_HEAD(&ptr->ip_lh);
++ INIT_LIST_HEAD(&ptr->src_lh);
++ INIT_LIST_HEAD(&ptr->dst_lh);
++ list_add(&ptr->list, &veip_lh);
++ ptr->veid = veid;
++ return ptr;
++}
++
++/*
++ * Device functions
++ */
++
++static int venet_open(struct net_device *dev)
++{
++ if (!try_module_get(THIS_MODULE))
++ return -EBUSY;
++ return 0;
++}
++
++static int venet_close(struct net_device *master)
++{
++ module_put(THIS_MODULE);
++ return 0;
++}
++
++static void venet_destructor(struct net_device *dev)
++{
++ kfree(dev->priv);
++ dev->priv = NULL;
++}
++
++/*
++ * The higher levels take care of making this non-reentrant (it's
++ * called with bh's disabled).
++ */
++static int venet_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++ struct net_device_stats *stats = (struct net_device_stats *)dev->priv;
++ struct net_device *rcv = NULL;
++ struct iphdr *iph;
++ int length;
++
++ /*
++ * Optimise so buffers with skb->free=1 are not copied but
++ * instead are lobbed from tx queue to rx queue
++ */
++ if (atomic_read(&skb->users) != 1) {
++ struct sk_buff *skb2 = skb;
++ skb = skb_clone(skb, GFP_ATOMIC); /* Clone the buffer */
++ if (skb == NULL) {
++ kfree_skb(skb2);
++ goto out;
++ }
++ kfree_skb(skb2);
++ } else
++ skb_orphan(skb);
++
++ if (skb->protocol != __constant_htons(ETH_P_IP))
++ goto outf;
++
++ iph = skb->nh.iph;
++ if (MULTICAST(iph->daddr))
++ goto outf;
++
++ if (venet_change_skb_owner(skb) < 0)
++ goto outf;
++
++ rcv = VE_OWNER_SKB(skb)->_venet_dev;
++ if (!rcv)
++ /* VE going down */
++ goto outf;
++
++ dev_hold(rcv);
++
++ if (!(rcv->flags & IFF_UP)) {
++ /* Target VE does not want to receive packets */
++ dev_put(rcv);
++ goto outf;
++ }
++
++ skb->pkt_type = PACKET_HOST;
++ skb->dev = rcv;
++
++ skb->mac.raw = skb->data;
++ memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len);
++
++ dst_release(skb->dst);
++ skb->dst = NULL;
++#ifdef CONFIG_NETFILTER
++ nf_conntrack_put(skb->nfct);
++ skb->nfct = NULL;
++#ifdef CONFIG_NETFILTER_DEBUG
++ skb->nf_debug = 0;
++#endif
++#endif
++ length = skb->len;
++
++ netif_rx(skb);
++
++ stats->tx_bytes += length;
++ stats->tx_packets++;
++ if (rcv) {
++ struct net_device_stats *rcv_stats =
++ (struct net_device_stats *)rcv->priv;
++ rcv_stats->rx_bytes += length;
++ rcv_stats->rx_packets++;
++ dev_put(rcv);
++ }
++
++ return 0;
++
++outf:
++ kfree_skb(skb);
++ ++stats->tx_dropped;
++out:
++ return 0;
++}
++
++static struct net_device_stats *get_stats(struct net_device *dev)
++{
++ return (struct net_device_stats *)dev->priv;
++}
++
++/* Initialize the rest of the LOOPBACK device. */
++int venet_init_dev(struct net_device *dev)
++{
++ dev->hard_start_xmit = venet_xmit;
++ dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
++ if (dev->priv == NULL)
++ return -ENOMEM;
++ memset(dev->priv, 0, sizeof(struct net_device_stats));
++ dev->get_stats = get_stats;
++ dev->open = venet_open;
++ dev->stop = venet_close;
++ dev->destructor = venet_destructor;
++
++ /*
++ * Fill in the generic fields of the device structure.
++ */
++ dev->type = ARPHRD_VOID;
++ dev->hard_header_len = ETH_HLEN;
++ dev->mtu = 1500; /* eth_mtu */
++ dev->tx_queue_len = 0;
++
++ memset(dev->broadcast, 0xFF, ETH_ALEN);
++
++ /* New-style flags. */
++ dev->flags = IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT;
++ return 0;
++}
++
++static void venet_setup(struct net_device *dev)
++{
++ dev->init = venet_init_dev;
++ /*
++ * No other features, as they are:
++ * - checksumming is required, and nobody else will done our job
++ */
++ dev->features |= NETIF_F_VENET;
++}
++
++#ifdef CONFIG_PROC_FS
++static int veinfo_seq_show(struct seq_file *m, void *v)
++{
++ struct ve_struct *ve = (struct ve_struct *)v;
++ struct list_head *tmp;
++
++ seq_printf(m, "%10u %5u %5u", ve->veid,
++ ve->class_id, atomic_read(&ve->pcounter));
++ read_lock(&veip_hash_lock);
++ if (ve->veip == NULL)
++ goto unlock;
++ list_for_each(tmp, &ve->veip->ip_lh) {
++ char ip[16];
++ struct ip_entry_struct *entry;
++
++ entry = list_entry(tmp, struct ip_entry_struct, ve_list);
++ if (entry->active_env == NULL)
++ continue;
++
++ sprintf(ip, "%u.%u.%u.%u", NIPQUAD(entry->ip));
++ seq_printf(m, " %15s", ip);
++ }
++unlock:
++ read_unlock(&veip_hash_lock);
++ seq_putc(m, '\n');
++ return 0;
++}
++
++static void *ve_seq_start(struct seq_file *m, loff_t *pos)
++{
++ struct ve_struct *ve, *curve;
++ loff_t l;
++
++ curve = get_exec_env();
++ read_lock(&ve_list_guard);
++ if (!ve_is_super(curve)) {
++ if (*pos != 0)
++ return NULL;
++ return curve;
++ }
++ for (ve = ve_list_head, l = *pos;
++ ve != NULL && l > 0;
++ ve = ve->next, l--);
++ return ve;
++}
++
++static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ struct ve_struct *ve = (struct ve_struct *)v;
++
++ if (!ve_is_super(get_exec_env()))
++ return NULL;
++ (*pos)++;
++ return ve->next;
++}
++
++static void ve_seq_stop(struct seq_file *m, void *v)
++{
++ read_unlock(&ve_list_guard);
++}
++
++
++static struct seq_operations veinfo_seq_op = {
++ start: ve_seq_start,
++ next: ve_seq_next,
++ stop: ve_seq_stop,
++ show: veinfo_seq_show
++};
++
++static int veinfo_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &veinfo_seq_op);
++}
++
++static struct file_operations proc_veinfo_operations = {
++ open: veinfo_open,
++ read: seq_read,
++ llseek: seq_lseek,
++ release: seq_release
++};
++
++static void *veip_seq_start(struct seq_file *m, loff_t *pos)
++{
++ loff_t l;
++ struct list_head *p;
++ int i;
++
++ l = *pos;
++ write_lock_irq(&veip_hash_lock);
++ if (l == 0)
++ return ip_entry_hash_table;
++ for (i = 0; i < VEIP_HASH_SZ; i++) {
++ list_for_each(p, ip_entry_hash_table + i) {
++ if (--l == 0)
++ return p;
++ }
++ }
++ return NULL;
++}
++
++static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ struct list_head *p;
++
++ p = (struct list_head *)v;
++ while (1) {
++ p = p->next;
++ if (p < ip_entry_hash_table ||
++ p >= ip_entry_hash_table + VEIP_HASH_SZ) {
++ (*pos)++;
++ return p;
++ }
++ if (++p >= ip_entry_hash_table + VEIP_HASH_SZ)
++ return NULL;
++ }
++ return NULL;
++}
++
++static void veip_seq_stop(struct seq_file *m, void *v)
++{
++ write_unlock_irq(&veip_hash_lock);
++}
++
++static struct seq_operations veip_seq_op = {
++ start: veip_seq_start,
++ next: veip_seq_next,
++ stop: veip_seq_stop,
++ show: veip_seq_show
++};
++
++static int veip_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &veip_seq_op);
++}
++
++static struct file_operations proc_veip_operations = {
++ open: veip_open,
++ read: seq_read,
++ llseek: seq_lseek,
++ release: seq_release
++};
++#endif
++
++int real_ve_ip_map(envid_t veid, int op, struct sockaddr *uservaddr, int addrlen)
++{
++ int err;
++ struct sockaddr_in addr;
++ struct ve_struct *ve;
++
++ err = -EPERM;
++ if (!capable(CAP_SETVEID))
++ goto out;
++
++ err = -EINVAL;
++ if (addrlen != sizeof(struct sockaddr_in))
++ goto out;
++
++ err = move_addr_to_kernel(uservaddr, addrlen, &addr);
++ if (err < 0)
++ goto out;
++
++ switch (op)
++ {
++ case VE_IP_ADD:
++ ve = get_ve_by_id(veid);
++ err = -ESRCH;
++ if (!ve)
++ goto out;
++
++ down_read(&ve->op_sem);
++ if (ve->is_running)
++ err = veip_entry_add(ve, &addr);
++ up_read(&ve->op_sem);
++ put_ve(ve);
++ break;
++
++ case VE_IP_DEL:
++ err = veip_entry_del(veid, &addr);
++ break;
++ default:
++ err = -EINVAL;
++ }
++
++out:
++ return err;
++}
++
++int venet_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ int err;
++
++ err = -ENOTTY;
++ switch(cmd) {
++ case VENETCTL_VE_IP_MAP: {
++ struct vzctl_ve_ip_map s;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen);
++ }
++ break;
++ }
++ return err;
++}
++
++static struct vzioctlinfo venetcalls = {
++ type: VENETCTLTYPE,
++ func: venet_ioctl,
++ owner: THIS_MODULE,
++};
++
++int venet_dev_start(struct ve_struct *env)
++{
++ struct net_device *dev_venet;
++ int err;
++
++ dev_venet = alloc_netdev(0, "venet%d", venet_setup);
++ if (!dev_venet)
++ return -ENOMEM;
++ err = dev_alloc_name(dev_venet, dev_venet->name);
++ if (err<0)
++ goto err;
++ if ((err = register_netdev(dev_venet)) != 0)
++ goto err;
++ env->_venet_dev = dev_venet;
++ return 0;
++err:
++ free_netdev(dev_venet);
++ printk(KERN_ERR "VENET initialization error err=%d\n", err);
++ return err;
++}
++
++static int venet_start(unsigned int hooknum, void *data)
++{
++ struct ve_struct *env;
++ int err;
++
++ env = ((struct ve_hook_init_data *)data)->env;
++ if (env->veip)
++ return -EEXIST;
++ if (!ve_is_super(env) && !try_module_get(THIS_MODULE))
++ return 0;
++
++ err = veip_start(env);
++ if (err)
++ goto err;
++
++ err = venet_dev_start(env);
++ if (err)
++ goto err_free;
++ return 0;
++
++err_free:
++ veip_stop(env);
++err:
++ if (!ve_is_super(env))
++ module_put(THIS_MODULE);
++ return err;
++}
++
++static int venet_stop(unsigned int hooknum, void *data)
++{
++ struct ve_struct *env;
++
++ if (hooknum == VE_HOOK_INIT)
++ env = ((struct ve_hook_init_data *)data)->env;
++ else
++ env = (struct ve_struct *)data;
++ veip_stop(env);
++ if (!ve_is_super(env))
++ module_put(THIS_MODULE);
++ return 0;
++}
++
++#define VE_HOOK_PRI_NET 0
++
++static struct ve_hook venet_ve_hook_init = {
++ hook: venet_start,
++ undo: venet_stop,
++ hooknum: VE_HOOK_INIT,
++ priority: VE_HOOK_PRI_NET
++};
++
++static struct ve_hook venet_ve_hook_fini = {
++ hook: venet_stop,
++ hooknum: VE_HOOK_FINI,
++ priority: VE_HOOK_PRI_NET
++};
++
++__init int venet_init(void)
++{
++ struct ve_hook_init_data vhd;
++#ifdef CONFIG_PROC_FS
++ struct proc_dir_entry *de;
++#endif
++ int i, err;
++
++ if (get_ve0()->_venet_dev != NULL)
++ return -EEXIST;
++
++ for (i = 0; i < VEIP_HASH_SZ; i++)
++ INIT_LIST_HEAD(ip_entry_hash_table + i);
++
++ vhd.env = get_ve0();
++ err = venet_start(VE_HOOK_INIT, (void *)&vhd);
++ if (err)
++ return err;
++
++#ifdef CONFIG_PROC_FS
++ de = create_proc_glob_entry("vz/veinfo",
++ S_IFREG|S_IRUSR, NULL);
++ if (de)
++ de->proc_fops = &proc_veinfo_operations;
++ else
++ printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
++
++ de = create_proc_entry("vz/veip", S_IFREG|S_IRUSR, NULL);
++ if (de)
++ de->proc_fops = &proc_veip_operations;
++ else
++ printk(KERN_WARNING "venet: can't make veip proc entry\n");
++#endif
++
++ ve_hook_register(&venet_ve_hook_init);
++ ve_hook_register(&venet_ve_hook_fini);
++ vzioctl_register(&venetcalls);
++ return 0;
++}
++
++__exit void venet_exit(void)
++{
++ struct net_device *dev_venet;
++
++ vzioctl_unregister(&venetcalls);
++ ve_hook_unregister(&venet_ve_hook_fini);
++ ve_hook_unregister(&venet_ve_hook_init);
++#ifdef CONFIG_PROC_FS
++ remove_proc_entry("vz/veip", NULL);
++ remove_proc_entry("vz/veinfo", NULL);
++#endif
++
++ dev_venet = get_ve0()->_venet_dev;
++ if (dev_venet != NULL) {
++ get_ve0()->_venet_dev = NULL;
++ unregister_netdev(dev_venet);
++ free_netdev(dev_venet);
++ }
++ veip_stop(get_ve0());
++}
++
++module_init(venet_init);
++module_exit(venet_exit);
+diff -uprN linux-2.6.15.orig/drivers/pci/probe.c linux-2.6.15-ve025stab014/drivers/pci/probe.c
+--- linux-2.6.15.orig/drivers/pci/probe.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/pci/probe.c 2006-01-27 14:48:08.000000000 +0300
+@@ -21,6 +21,7 @@ LIST_HEAD(pci_root_buses);
+ EXPORT_SYMBOL(pci_root_buses);
+
+ LIST_HEAD(pci_devices);
++EXPORT_SYMBOL(pci_devices);
+
+ #ifdef HAVE_PCI_LEGACY
+ /**
+diff -uprN linux-2.6.15.orig/drivers/s390/cio/cio.c linux-2.6.15-ve025stab014/drivers/s390/cio/cio.c
+--- linux-2.6.15.orig/drivers/s390/cio/cio.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/drivers/s390/cio/cio.c 2006-01-27 14:48:08.000000000 +0300
+@@ -604,7 +604,11 @@ do_IRQ (struct pt_regs *regs)
+ struct tpi_info *tpi_info;
+ struct subchannel *sch;
+ struct irb *irb;
++ struct ve_struct *ve;
++ struct user_beancounter *ub;
+
++ ve = set_exec_env(get_ve0());
++ ub = set_exec_ub(get_ub0());
+ irq_enter ();
+ asm volatile ("mc 0,0");
+ if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
+@@ -651,6 +655,8 @@ do_IRQ (struct pt_regs *regs)
+ */
+ } while (!MACHINE_IS_VM && tpi (NULL) != 0);
+ irq_exit ();
++ (void)set_exec_ub(ub);
++ (void)set_exec_env(ve);
+ }
+
+ #ifdef CONFIG_CCW_CONSOLE
+diff -uprN linux-2.6.15.orig/fs/Kconfig linux-2.6.15-ve025stab014/fs/Kconfig
+--- linux-2.6.15.orig/fs/Kconfig 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/Kconfig 2006-01-27 14:48:08.000000000 +0300
+@@ -403,6 +403,38 @@ config QFMT_V2
+ This quota format allows using quotas with 32-bit UIDs/GIDs. If you
+ need this functionality say Y here.
+
++config SIM_FS
++ tristate "VPS filesystem"
++ depends on VZ_QUOTA
++ default m
++ help
++ This file system is a part of Virtuozzo. It intoduces a fake
++ superblock and blockdev to VE to hide real device and show
++ statfs results taken from quota.
++
++config VZ_QUOTA
++ tristate "Virtuozzo Disk Quota support"
++ depends on QUOTA
++ default m
++ help
++ Virtuozzo Disk Quota imposes disk quota on directories with their
++ files and subdirectories in total. Such disk quota is used to
++ account and limit disk usage by Virtuozzo VPS, but also may be used
++ separately.
++
++config VZ_QUOTA_UNLOAD
++ bool "Unloadable Virtuozzo Disk Quota module"
++ depends on VZ_QUOTA=m
++ default n
++ help
++ Make Virtuozzo Disk Quota module unloadable.
++ Doesn't work reliably now.
++
++config VZ_QUOTA_UGID
++ bool "Per-user and per-group quota in Virtuozzo quota partitions"
++ depends on VZ_QUOTA!=n
++ default y
++
+ config QUOTACTL
+ bool
+ depends on XFS_QUOTA || QUOTA
+diff -uprN linux-2.6.15.orig/fs/Makefile linux-2.6.15-ve025stab014/fs/Makefile
+--- linux-2.6.15.orig/fs/Makefile 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/Makefile 2006-01-27 14:48:08.000000000 +0300
+@@ -39,9 +39,15 @@ obj-$(CONFIG_QUOTA) += dquot.o
+ obj-$(CONFIG_QFMT_V1) += quota_v1.o
+ obj-$(CONFIG_QFMT_V2) += quota_v2.o
+ obj-$(CONFIG_QUOTACTL) += quota.o
++obj-$(CONFIG_VZ_QUOTA) += vzdquota.o
++vzdquota-y += vzdquot.o vzdq_mgmt.o vzdq_ops.o vzdq_tree.o
++vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_ugid.o
++vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
+
+ obj-$(CONFIG_DNOTIFY) += dnotify.o
+
++obj-$(CONFIG_SIM_FS) += simfs.o
++
+ obj-$(CONFIG_PROC_FS) += proc/
+ obj-y += partitions/
+ obj-$(CONFIG_SYSFS) += sysfs/
+diff -uprN linux-2.6.15.orig/fs/binfmt_elf.c linux-2.6.15-ve025stab014/fs/binfmt_elf.c
+--- linux-2.6.15.orig/fs/binfmt_elf.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/binfmt_elf.c 2006-01-27 14:48:08.000000000 +0300
+@@ -355,7 +355,7 @@ static unsigned long load_elf_interp(str
+ eppnt = elf_phdata;
+ for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
+ if (eppnt->p_type == PT_LOAD) {
+- int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
++ int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECPRIO;
+ int elf_prot = 0;
+ unsigned long vaddr = 0;
+ unsigned long k, map_addr;
+@@ -828,7 +828,7 @@ static int load_elf_binary(struct linux_
+ if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
+ if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
+
+- elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
++ elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE|MAP_EXECPRIO;
+
+ vaddr = elf_ppnt->p_vaddr;
+ if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
+@@ -1270,10 +1270,10 @@ static void fill_prstatus(struct elf_prs
+ prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
+ prstatus->pr_sigpend = p->pending.signal.sig[0];
+ prstatus->pr_sighold = p->blocked.sig[0];
+- prstatus->pr_pid = p->pid;
+- prstatus->pr_ppid = p->parent->pid;
+- prstatus->pr_pgrp = process_group(p);
+- prstatus->pr_sid = p->signal->session;
++ prstatus->pr_pid = virt_pid(p);
++ prstatus->pr_ppid = virt_pid(p->parent);
++ prstatus->pr_pgrp = virt_pgid(p);
++ prstatus->pr_sid = virt_sid(p);
+ if (thread_group_leader(p)) {
+ /*
+ * This is the record for the group leader. Add in the
+@@ -1316,10 +1316,10 @@ static int fill_psinfo(struct elf_prpsin
+ psinfo->pr_psargs[i] = ' ';
+ psinfo->pr_psargs[len] = 0;
+
+- psinfo->pr_pid = p->pid;
+- psinfo->pr_ppid = p->parent->pid;
+- psinfo->pr_pgrp = process_group(p);
+- psinfo->pr_sid = p->signal->session;
++ psinfo->pr_pid = virt_pid(p);
++ psinfo->pr_ppid = virt_pid(p->parent);
++ psinfo->pr_pgrp = virt_pgid(p);
++ psinfo->pr_sid = virt_sid(p);
+
+ i = p->state ? ffz(~p->state) + 1 : 0;
+ psinfo->pr_state = i;
+@@ -1452,7 +1452,7 @@ static int elf_core_dump(long signr, str
+ if (signr) {
+ struct elf_thread_status *tmp;
+ read_lock(&tasklist_lock);
+- do_each_thread(g,p)
++ do_each_thread_ve(g,p)
+ if (current->mm == p->mm && current != p) {
+ tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
+ if (!tmp) {
+@@ -1464,7 +1464,7 @@ static int elf_core_dump(long signr, str
+ tmp->thread = p;
+ list_add(&tmp->list, &thread_list);
+ }
+- while_each_thread(g,p);
++ while_each_thread_ve(g,p);
+ read_unlock(&tasklist_lock);
+ list_for_each(t, &thread_list) {
+ struct elf_thread_status *tmp;
+diff -uprN linux-2.6.15.orig/fs/block_dev.c linux-2.6.15-ve025stab014/fs/block_dev.c
+--- linux-2.6.15.orig/fs/block_dev.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/block_dev.c 2006-01-27 14:48:08.000000000 +0300
+@@ -561,9 +561,16 @@ static int do_open(struct block_device *
+ {
+ struct module *owner = NULL;
+ struct gendisk *disk;
+- int ret = -ENXIO;
++ int ret;
+ int part;
+
++#ifdef CONFIG_VE
++ ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev,
++ file->f_mode&(FMODE_READ|FMODE_WRITE));
++ if (ret)
++ return ret;
++#endif
++ ret = -ENXIO;
+ file->f_mapping = bdev->bd_inode->i_mapping;
+ lock_kernel();
+ disk = get_gendisk(bdev->bd_dev, &part);
+@@ -832,7 +839,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
+ * namespace if possible and return it. Return ERR_PTR(error)
+ * otherwise.
+ */
+-struct block_device *lookup_bdev(const char *path)
++struct block_device *lookup_bdev(const char *path, int mode)
+ {
+ struct block_device *bdev;
+ struct inode *inode;
+@@ -850,6 +857,11 @@ struct block_device *lookup_bdev(const c
+ error = -ENOTBLK;
+ if (!S_ISBLK(inode->i_mode))
+ goto fail;
++#ifdef CONFIG_VE
++ error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode);
++ if (error)
++ goto fail;
++#endif
+ error = -EACCES;
+ if (nd.mnt->mnt_flags & MNT_NODEV)
+ goto fail;
+@@ -881,12 +893,13 @@ struct block_device *open_bdev_excl(cons
+ mode_t mode = FMODE_READ;
+ int error = 0;
+
+- bdev = lookup_bdev(path);
++ if (!(flags & MS_RDONLY))
++ mode |= FMODE_WRITE;
++
++ bdev = lookup_bdev(path, mode);
+ if (IS_ERR(bdev))
+ return bdev;
+
+- if (!(flags & MS_RDONLY))
+- mode |= FMODE_WRITE;
+ error = blkdev_get(bdev, mode, 0);
+ if (error)
+ return ERR_PTR(error);
+diff -uprN linux-2.6.15.orig/fs/char_dev.c linux-2.6.15-ve025stab014/fs/char_dev.c
+--- linux-2.6.15.orig/fs/char_dev.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/char_dev.c 2006-01-27 14:48:08.000000000 +0300
+@@ -292,6 +292,13 @@ int chrdev_open(struct inode * inode, st
+ struct cdev *new = NULL;
+ int ret = 0;
+
++#ifdef CONFIG_VE
++ ret = get_device_perms_ve(S_IFCHR, inode->i_rdev,
++ filp->f_mode&(FMODE_READ|FMODE_WRITE));
++ if (ret)
++ return ret;
++#endif
++
+ spin_lock(&cdev_lock);
+ p = inode->i_cdev;
+ if (!p) {
+diff -uprN linux-2.6.15.orig/fs/dcache.c linux-2.6.15-ve025stab014/fs/dcache.c
+--- linux-2.6.15.orig/fs/dcache.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/dcache.c 2006-01-27 14:48:08.000000000 +0300
+@@ -28,11 +28,15 @@
+ #include <linux/module.h>
+ #include <linux/mount.h>
+ #include <linux/file.h>
++#include <linux/namei.h>
+ #include <asm/uaccess.h>
+ #include <linux/security.h>
+ #include <linux/seqlock.h>
+ #include <linux/swap.h>
+ #include <linux/bootmem.h>
++#include <linux/kernel_stat.h>
++
++#include <ub/ub_dcache.h>
+
+ /* #define DCACHE_DEBUG 1 */
+
+@@ -44,7 +48,7 @@ static seqlock_t rename_lock __cacheline
+
+ EXPORT_SYMBOL(dcache_lock);
+
+-static kmem_cache_t *dentry_cache;
++kmem_cache_t *dentry_cache;
+
+ #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
+
+@@ -114,6 +118,75 @@ static inline void dentry_iput(struct de
+ }
+ }
+
++struct dcache_shrinker {
++ struct list_head list;
++ struct dentry *dentry;
++};
++
++DECLARE_WAIT_QUEUE_HEAD(dcache_shrinker_wq);
++
++/* called under dcache_lock */
++static void dcache_shrinker_add(struct dcache_shrinker *ds,
++ struct dentry *parent, struct dentry *dentry)
++{
++ struct super_block *sb;
++
++ sb = parent->d_sb;
++ ds->dentry = parent;
++ list_add(&ds->list, &sb->s_dshrinkers);
++}
++
++/* called under dcache_lock */
++static void dcache_shrinker_del(struct dcache_shrinker *ds)
++{
++ if (ds == NULL || list_empty(&ds->list))
++ return;
++
++ list_del_init(&ds->list);
++ wake_up_all(&dcache_shrinker_wq);
++}
++
++/* called under dcache_lock, drops inside */
++static void dcache_shrinker_wait(struct super_block *sb)
++{
++ DECLARE_WAITQUEUE(wq, current);
++
++ __set_current_state(TASK_UNINTERRUPTIBLE);
++ add_wait_queue(&dcache_shrinker_wq, &wq);
++ spin_unlock(&dcache_lock);
++
++ schedule();
++ remove_wait_queue(&dcache_shrinker_wq, &wq);
++ __set_current_state(TASK_RUNNING);
++}
++
++void dcache_shrinker_wait_sb(struct super_block *sb)
++{
++ /* the root dentry can be held in dput_recursive */
++ spin_lock(&dcache_lock);
++ while (!list_empty(&sb->s_dshrinkers)) {
++ dcache_shrinker_wait(sb);
++ spin_lock(&dcache_lock);
++ }
++ spin_unlock(&dcache_lock);
++}
++
++/* dcache_lock protects shrinker's list */
++static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck)
++{
++ struct super_block *sb;
++ struct dcache_shrinker *ds;
++
++ sb = parent->d_sb;
++ list_for_each_entry(ds, &sb->s_dshrinkers, list) {
++ /* is one of dcache shrinkers working on the dentry? */
++ if (ds->dentry == parent) {
++ *racecheck = 1;
++ break;
++ }
++ }
++}
++
+ /*
+ * This is dput
+ *
+@@ -132,8 +205,9 @@ static inline void dentry_iput(struct de
+ */
+
+ /*
+- * dput - release a dentry
+- * @dentry: dentry to release
++ * dput_recursive - go upward through the dentry tree and release dentries
++ * @dentry: starting dentry
++ * @ds: shrinker to be added to active list (see shrink_dcache_parent)
+ *
+ * Release a dentry. This will drop the usage count and if appropriate
+ * call the dentry unlink method as well as removing it from the queues and
+@@ -142,18 +216,15 @@ static inline void dentry_iput(struct de
+ *
+ * no dcache lock, please.
+ */
+-
+-void dput(struct dentry *dentry)
++static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds)
+ {
+- if (!dentry)
+- return;
+-
+-repeat:
+ if (atomic_read(&dentry->d_count) == 1)
+ might_sleep();
+ if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
+ return;
++ dcache_shrinker_del(ds);
+
++repeat:
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+@@ -185,6 +256,7 @@ unhash_it:
+
+ kill_it: {
+ struct dentry *parent;
++ struct dcache_shrinker lds;
+
+ /* If dentry was on d_lru list
+ * delete it from there
+@@ -194,18 +266,50 @@ kill_it: {
+ dentry_stat.nr_unused--;
+ }
+ list_del(&dentry->d_child);
++ parent = dentry->d_parent;
++ dcache_shrinker_add(&lds, parent, dentry);
+ dentry_stat.nr_dentry--; /* For d_free, below */
+ /*drops the locks, at that point nobody can reach this dentry */
+ dentry_iput(dentry);
+- parent = dentry->d_parent;
+ d_free(dentry);
+- if (dentry == parent)
++ if (unlikely(dentry == parent)) {
++ spin_lock(&dcache_lock);
++ dcache_shrinker_del(&lds);
++ spin_unlock(&dcache_lock);
+ return;
++ }
+ dentry = parent;
+- goto repeat;
++ spin_lock(&dcache_lock);
++ dcache_shrinker_del(&lds);
++ if (atomic_dec_and_test(&dentry->d_count))
++ goto repeat;
++ spin_unlock(&dcache_lock);
+ }
+ }
+
++/*
++ * dput - release a dentry
++ * @dentry: dentry to release
++ *
++ * Release a dentry. This will drop the usage count and if appropriate
++ * call the dentry unlink method as well as removing it from the queues and
++ * releasing its resources. If the parent dentries were scheduled for release
++ * they too may now get deleted.
++ *
++ * no dcache lock, please.
++ */
++
++void dput(struct dentry *dentry)
++{
++ if (!dentry)
++ return;
++
++ spin_lock(&dcache_lock);
++ ub_dentry_uncharge(dentry);
++ spin_unlock(&dcache_lock);
++ dput_recursive(dentry, NULL);
++}
++
+ /**
+ * d_invalidate - invalidate a dentry
+ * @dentry: dentry to invalidate
+@@ -272,6 +376,8 @@ static inline struct dentry * __dget_loc
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ }
++
++ ub_dentry_charge_nofail(dentry);
+ return dentry;
+ }
+
+@@ -362,19 +468,27 @@ restart:
+ * removed.
+ * Called with dcache_lock, drops it and then regains.
+ */
+-static inline void prune_one_dentry(struct dentry * dentry)
++static void prune_one_dentry(struct dentry * dentry)
+ {
+ struct dentry * parent;
++ struct dcache_shrinker ds;
+
+ __d_drop(dentry);
+ list_del(&dentry->d_child);
++ parent = dentry->d_parent;
++ dcache_shrinker_add(&ds, parent, dentry);
+ dentry_stat.nr_dentry--; /* For d_free, below */
+ dentry_iput(dentry);
+ parent = dentry->d_parent;
+ d_free(dentry);
+ if (parent != dentry)
+- dput(parent);
++ /*
++ * dentry is not in use, only child (not outside)
++ * references change, so parent->d_inuse does not change
++ */
++ dput_recursive(parent, &ds);
+ spin_lock(&dcache_lock);
++ dcache_shrinker_del(&ds);
+ }
+
+ /**
+@@ -557,13 +671,12 @@ positive:
+ * drop the lock and return early due to latency
+ * constraints.
+ */
+-static int select_parent(struct dentry * parent)
++static int select_parent(struct dentry * parent, int * racecheck)
+ {
+ struct dentry *this_parent = parent;
+ struct list_head *next;
+ int found = 0;
+
+- spin_lock(&dcache_lock);
+ repeat:
+ next = this_parent->d_subdirs.next;
+ resume:
+@@ -605,6 +718,9 @@ dentry->d_parent->d_name.name, dentry->d
+ #endif
+ goto repeat;
+ }
++
++ if (!found && racecheck != NULL)
++ shrink_dcache_racecheck(dentry, racecheck);
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+@@ -619,7 +735,6 @@ this_parent->d_parent->d_name.name, this
+ goto resume;
+ }
+ out:
+- spin_unlock(&dcache_lock);
+ return found;
+ }
+
+@@ -632,10 +747,66 @@ out:
+
+ void shrink_dcache_parent(struct dentry * parent)
+ {
+- int found;
++ int found, r;
++
++ while (1) {
++ spin_lock(&dcache_lock);
++ found = select_parent(parent, NULL);
++ if (found)
++ goto found;
+
+- while ((found = select_parent(parent)) != 0)
++ /*
++ * try again with a dput_recursive() race check.
++ * it returns quickly if everything was really shrinked
++ */
++ r = 0;
++ found = select_parent(parent, &r);
++ if (found)
++ goto found;
++ if (!r)
++ break;
++
++ /* drops the lock inside */
++ dcache_shrinker_wait(parent->d_sb);
++ continue;
++
++found:
++ spin_unlock(&dcache_lock);
+ prune_dcache(found);
++ }
++ spin_unlock(&dcache_lock);
++}
++
++/*
++ * Move any unused anon dentries to the end of the unused list.
++ * called under dcache_lock
++ */
++static int select_anon(struct hlist_head *head, int *racecheck)
++{
++ struct hlist_node *lp;
++ int found = 0;
++
++ hlist_for_each(lp, head) {
++ struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
++ if (!list_empty(&this->d_lru)) {
++ dentry_stat.nr_unused--;
++ list_del_init(&this->d_lru);
++ }
++
++ /*
++ * move only zero ref count dentries to the end
++ * of the unused list for prune_dcache
++ */
++ if (!atomic_read(&this->d_count)) {
++ list_add_tail(&this->d_lru, &dentry_unused);
++ dentry_stat.nr_unused++;
++ found++;
++ }
++
++ if (!found && racecheck != NULL)
++ shrink_dcache_racecheck(this, racecheck);
++ }
++ return found;
+ }
+
+ /**
+@@ -648,33 +819,36 @@ void shrink_dcache_parent(struct dentry
+ * done under dcache_lock.
+ *
+ */
+-void shrink_dcache_anon(struct hlist_head *head)
++void shrink_dcache_anon(struct super_block *sb)
+ {
+- struct hlist_node *lp;
+- int found;
+- do {
+- found = 0;
++ int found, r;
++
++ while (1) {
+ spin_lock(&dcache_lock);
+- hlist_for_each(lp, head) {
+- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
+- if (!list_empty(&this->d_lru)) {
+- dentry_stat.nr_unused--;
+- list_del_init(&this->d_lru);
+- }
++ found = select_anon(&sb->s_anon, NULL);
++ if (found)
++ goto found;
+
+- /*
+- * move only zero ref count dentries to the end
+- * of the unused list for prune_dcache
+- */
+- if (!atomic_read(&this->d_count)) {
+- list_add_tail(&this->d_lru, &dentry_unused);
+- dentry_stat.nr_unused++;
+- found++;
+- }
+- }
++ /*
++ * try again with a dput_recursive() race check.
++ * it returns quickly if everything was really shrinked
++ */
++ r = 0;
++ found = select_anon(&sb->s_anon, &r);
++ if (found)
++ goto found;
++ if (!r)
++ break;
++
++ /* drops the lock inside */
++ dcache_shrinker_wait(sb);
++ continue;
++
++found:
+ spin_unlock(&dcache_lock);
+ prune_dcache(found);
+- } while(found);
++ }
++ spin_unlock(&dcache_lock);
+ }
+
+ /*
+@@ -691,12 +865,18 @@ void shrink_dcache_anon(struct hlist_hea
+ */
+ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+ {
++ int res = -1;
++
++ KSTAT_PERF_ENTER(shrink_dcache)
+ if (nr) {
+ if (!(gfp_mask & __GFP_FS))
+- return -1;
++ goto out;
+ prune_dcache(nr);
+ }
+- return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
++ res = (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
++out:
++ KSTAT_PERF_LEAVE(shrink_dcache)
++ return res;
+ }
+
+ /**
+@@ -716,19 +896,20 @@ struct dentry *d_alloc(struct dentry * p
+
+ dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
+ if (!dentry)
+- return NULL;
++ goto err_alloc;
+
+ if (name->len > DNAME_INLINE_LEN-1) {
+ dname = kmalloc(name->len + 1, GFP_KERNEL);
+- if (!dname) {
+- kmem_cache_free(dentry_cache, dentry);
+- return NULL;
+- }
++ if (!dname)
++ goto err_name;
+ } else {
+ dname = dentry->d_iname;
+ }
+ dentry->d_name.name = dname;
+
++ if (ub_dentry_alloc(dentry))
++ goto err_charge;
++
+ dentry->d_name.len = name->len;
+ dentry->d_name.hash = name->hash;
+ memcpy(dname, name->name, name->len);
+@@ -757,12 +938,23 @@ struct dentry *d_alloc(struct dentry * p
+ }
+
+ spin_lock(&dcache_lock);
+- if (parent)
++ if (parent) {
+ list_add(&dentry->d_child, &parent->d_subdirs);
++ if (parent->d_flags & DCACHE_VIRTUAL)
++ dentry->d_flags |= DCACHE_VIRTUAL;
++ }
+ dentry_stat.nr_dentry++;
+ spin_unlock(&dcache_lock);
+
+ return dentry;
++
++err_charge:
++ if (name->len > DNAME_INLINE_LEN - 1)
++ kfree(dname);
++err_name:
++ kmem_cache_free(dentry_cache, dentry);
++err_alloc:
++ return NULL;
+ }
+
+ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
+@@ -1041,7 +1233,6 @@ struct dentry * __d_lookup(struct dentry
+ unsigned int hash = name->hash;
+ const unsigned char *str = name->name;
+ struct hlist_head *head = d_hash(parent,hash);
+- struct dentry *found = NULL;
+ struct hlist_node *node;
+ struct dentry *dentry;
+
+@@ -1082,7 +1273,7 @@ struct dentry * __d_lookup(struct dentry
+
+ if (!d_unhashed(dentry)) {
+ atomic_inc(&dentry->d_count);
+- found = dentry;
++ goto found;
+ }
+ spin_unlock(&dentry->d_lock);
+ break;
+@@ -1091,7 +1282,18 @@ next:
+ }
+ rcu_read_unlock();
+
+- return found;
++ return NULL;
++
++found:
++ /*
++ * d_lock and rcu_read_lock
++ * are dropped in ub_dentry_charge()
++ */
++ if (ub_dentry_charge(dentry)) {
++ dput(dentry);
++ dentry = NULL;
++ }
++ return dentry;
+ }
+
+ /**
+@@ -1338,6 +1540,32 @@ already_unhashed:
+ }
+
+ /**
++ * __d_path_add_deleted - prepend "(deleted) " text
++ * @end: a pointer to the character after free space at the beginning of the
++ * buffer
++ * @buflen: remaining free space
++ */
++static inline char * __d_path_add_deleted(char * end, int buflen)
++{
++ buflen -= 10;
++ if (buflen < 0)
++ return ERR_PTR(-ENAMETOOLONG);
++ end -= 10;
++ memcpy(end, "(deleted) ", 10);
++ return end;
++}
++
++/**
++ * d_root_check - checks if dentry is accessible from current's fs root
++ * @dentry: dentry to be verified
++ * @vfsmnt: vfsmnt to which the dentry belongs
++ */
++int d_root_check(struct dentry *dentry, struct vfsmount *vfsmnt)
++{
++ return PTR_ERR(d_path(dentry, vfsmnt, NULL, 0));
++}
++
++/**
+ * d_path - return the path of a dentry
+ * @dentry: dentry to report
+ * @vfsmnt: vfsmnt to which the dentry belongs
+@@ -1358,36 +1586,35 @@ static char * __d_path( struct dentry *d
+ char *buffer, int buflen)
+ {
+ char * end = buffer+buflen;
+- char * retval;
++ char * retval = NULL;
+ int namelen;
++ int deleted;
++ struct vfsmount *oldvfsmnt;
+
+- *--end = '\0';
+- buflen--;
+- if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
+- buflen -= 10;
+- end -= 10;
+- if (buflen < 0)
++ oldvfsmnt = vfsmnt;
++ deleted = (!IS_ROOT(dentry) && d_unhashed(dentry));
++ if (buffer != NULL) {
++ *--end = '\0';
++ buflen--;
++
++ if (buflen < 1)
+ goto Elong;
+- memcpy(end, " (deleted)", 10);
++ /* Get '/' right */
++ retval = end-1;
++ *retval = '/';
+ }
+
+- if (buflen < 1)
+- goto Elong;
+- /* Get '/' right */
+- retval = end-1;
+- *retval = '/';
+-
+ for (;;) {
+ struct dentry * parent;
+
+ if (dentry == root && vfsmnt == rootmnt)
+ break;
+ if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+- /* Global root? */
++ /* root of a tree? */
+ spin_lock(&vfsmount_lock);
+ if (vfsmnt->mnt_parent == vfsmnt) {
+ spin_unlock(&vfsmount_lock);
+- goto global_root;
++ goto other_root;
+ }
+ dentry = vfsmnt->mnt_mountpoint;
+ vfsmnt = vfsmnt->mnt_parent;
+@@ -1396,27 +1623,51 @@ static char * __d_path( struct dentry *d
+ }
+ parent = dentry->d_parent;
+ prefetch(parent);
++ if (buffer != NULL) {
++ namelen = dentry->d_name.len;
++ buflen -= namelen + 1;
++ if (buflen < 0)
++ goto Elong;
++ end -= namelen;
++ memcpy(end, dentry->d_name.name, namelen);
++ *--end = '/';
++ retval = end;
++ }
++ dentry = parent;
++ }
++ /* the given root point is reached */
++finish:
++ if (buffer != NULL && deleted)
++ retval = __d_path_add_deleted(end, buflen);
++ return retval;
++
++other_root:
++ /*
++ * We traversed the tree upward and reached a root, but the given
++ * lookup terminal point wasn't encountered. It means either that the
++ * dentry is out of our scope or belongs to an abstract space like
++ * sock_mnt or pipe_mnt. Check for it.
++ *
++ * There are different options to check it.
++ * We may assume that any dentry tree is unreachable unless it's
++ * connected to `root' (defined as fs root of init aka child reaper)
++ * and expose all paths that are not connected to it.
++ * The other option is to allow exposing of known abstract spaces
++ * explicitly and hide the path information for other cases.
++ * This approach is more safe, let's take it. 2001/04/22 SAW
++ */
++ if (!(oldvfsmnt->mnt_sb->s_flags & MS_NOUSER))
++ return ERR_PTR(-EINVAL);
++ if (buffer != NULL) {
+ namelen = dentry->d_name.len;
+- buflen -= namelen + 1;
++ buflen -= namelen;
+ if (buflen < 0)
+ goto Elong;
+- end -= namelen;
+- memcpy(end, dentry->d_name.name, namelen);
+- *--end = '/';
+- retval = end;
+- dentry = parent;
++ retval -= namelen-1; /* hit the slash */
++ memcpy(retval, dentry->d_name.name, namelen);
+ }
++ goto finish;
+
+- return retval;
+-
+-global_root:
+- namelen = dentry->d_name.len;
+- buflen -= namelen;
+- if (buflen < 0)
+- goto Elong;
+- retval -= namelen-1; /* hit the slash */
+- memcpy(retval, dentry->d_name.name, namelen);
+- return retval;
+ Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+@@ -1441,6 +1692,228 @@ char * d_path(struct dentry *dentry, str
+ return res;
+ }
+
++#ifdef CONFIG_VE
++#include <net/sock.h>
++#include <linux/ip.h>
++#include <linux/file.h>
++#include <linux/namespace.h>
++#include <linux/vzratelimit.h>
++
++static void mark_sub_tree_virtual(struct dentry *d)
++{
++ struct dentry *orig_root;
++
++ orig_root = d;
++ while (1) {
++ spin_lock(&d->d_lock);
++ d->d_flags |= DCACHE_VIRTUAL;
++ spin_unlock(&d->d_lock);
++
++ if (!list_empty(&d->d_subdirs)) {
++ d = list_entry(d->d_subdirs.next,
++ struct dentry, d_child);
++ continue;
++ }
++ if (d == orig_root)
++ break;
++ while (d == list_entry(d->d_parent->d_subdirs.prev,
++ struct dentry, d_child)) {
++ d = d->d_parent;
++ if (d == orig_root)
++ goto out;
++ }
++ d = list_entry(d->d_child.next,
++ struct dentry, d_child);
++ }
++out:
++ return;
++}
++
++void mark_tree_virtual(struct vfsmount *m, struct dentry *d)
++{
++ struct vfsmount *orig_rootmnt;
++
++ spin_lock(&dcache_lock);
++ spin_lock(&vfsmount_lock);
++ orig_rootmnt = m;
++ while (1) {
++ mark_sub_tree_virtual(d);
++ if (!list_empty(&m->mnt_mounts)) {
++ m = list_entry(m->mnt_mounts.next,
++ struct vfsmount, mnt_child);
++ d = m->mnt_root;
++ continue;
++ }
++ if (m == orig_rootmnt)
++ break;
++ while (m == list_entry(m->mnt_parent->mnt_mounts.prev,
++ struct vfsmount, mnt_child)) {
++ m = m->mnt_parent;
++ if (m == orig_rootmnt)
++ goto out;
++ }
++ m = list_entry(m->mnt_child.next,
++ struct vfsmount, mnt_child);
++ d = m->mnt_root;
++ }
++out:
++ spin_unlock(&vfsmount_lock);
++ spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(mark_tree_virtual);
++
++static struct vz_rate_info area_ri = { 20, 10*HZ };
++#define VE_AREA_ACC_CHECK 0x0001
++#define VE_AREA_ACC_DENY 0x0002
++#define VE_AREA_EXEC_CHECK 0x0010
++#define VE_AREA_EXEC_DENY 0x0020
++#define VE0_AREA_ACC_CHECK 0x0100
++#define VE0_AREA_ACC_DENY 0x0200
++#define VE0_AREA_EXEC_CHECK 0x1000
++#define VE0_AREA_EXEC_DENY 0x2000
++int ve_area_access_check = 0;
++
++static void print_connection_info(struct task_struct *tsk)
++{
++ struct files_struct *files;
++ struct fdtable *fdt;
++ int fd;
++
++ files = get_files_struct(tsk);
++ if (!files)
++ return;
++
++ spin_lock(&files->file_lock);
++ fdt = files_fdtable(files);
++ for (fd = 0; fd < fdt->max_fds; fd++) {
++ struct file *file;
++ struct inode *inode;
++ struct socket *socket;
++ struct sock *sk;
++ struct inet_sock *inet;
++
++ file = fdt->fd[fd];
++ if (file == NULL)
++ continue;
++
++ inode = file->f_dentry->d_inode;
++ if (!S_ISSOCK(inode->i_mode))
++ continue;
++
++ socket = SOCKET_I(inode);
++ if (socket == NULL)
++ continue;
++
++ sk = socket->sk;
++ if (sk->sk_family != PF_INET || sk->sk_type != SOCK_STREAM)
++ continue;
++
++ inet = inet_sk(sk);
++ printk(KERN_ALERT "connection from %u.%u.%u.%u:%u to port %u\n",
++ NIPQUAD(inet->daddr), ntohs(inet->dport),
++ inet->num);
++ }
++ spin_unlock(&files->file_lock);
++ put_files_struct(files);
++}
++
++static void check_alert(struct vfsmount *vfsmnt, struct dentry *dentry,
++ char *str)
++{
++ struct task_struct *tsk;
++ unsigned long page;
++ struct super_block *sb;
++ char *p;
++
++ if (!vz_ratelimit(&area_ri))
++ return;
++
++ tsk = current;
++ p = ERR_PTR(-ENOMEM);
++ page = __get_free_page(GFP_KERNEL);
++ if (page) {
++ spin_lock(&dcache_lock);
++ p = __d_path(dentry, vfsmnt, tsk->fs->root, tsk->fs->rootmnt,
++ (char *)page, PAGE_SIZE);
++ spin_unlock(&dcache_lock);
++ }
++ if (IS_ERR(p))
++ p = "(undefined)";
++
++ sb = dentry->d_sb;
++ printk(KERN_ALERT "%s check alert! file:[%s] from %d/%s, dev%x\n"
++ "Task %d/%d[%s] from VE%d, execenv %d\n",
++ str, p, VE_OWNER_FSTYPE(sb->s_type)->veid,
++ sb->s_type->name, sb->s_dev,
++ tsk->pid, virt_pid(tsk), tsk->comm,
++ VE_TASK_INFO(tsk)->owner_env->veid,
++ get_exec_env()->veid);
++
++ free_page(page);
++
++ print_connection_info(tsk);
++
++ read_lock(&tasklist_lock);
++ tsk = tsk->real_parent;
++ get_task_struct(tsk);
++ read_unlock(&tasklist_lock);
++
++ printk(KERN_ALERT "Parent %d/%d[%s] from VE%d\n",
++ tsk->pid, virt_pid(tsk), tsk->comm,
++ VE_TASK_INFO(tsk)->owner_env->veid);
++
++ print_connection_info(tsk);
++ put_task_struct(tsk);
++ dump_stack();
++}
++#endif
++
++int check_area_access_ve(struct dentry *dentry, struct vfsmount *mnt)
++{
++#ifdef CONFIG_VE
++ int check, alert, deny;
++
++ if (ve_is_super(get_exec_env())) {
++ check = ve_area_access_check & VE0_AREA_ACC_CHECK;
++ alert = dentry->d_flags & DCACHE_VIRTUAL;
++ deny = ve_area_access_check & VE0_AREA_ACC_DENY;
++ } else {
++ check = ve_area_access_check & VE_AREA_ACC_CHECK;
++ alert = !(dentry->d_flags & DCACHE_VIRTUAL);
++ deny = ve_area_access_check & VE_AREA_ACC_DENY;
++ }
++
++ if (check && alert)
++ check_alert(mnt, dentry, "Access");
++ if (deny && alert)
++ return -EACCES;
++#endif
++ return 0;
++}
++
++int check_area_execute_ve(struct dentry *dentry, struct vfsmount *mnt)
++{
++#ifdef CONFIG_VE
++ int check, alert, deny;
++
++ if (ve_is_super(get_exec_env())) {
++ check = ve_area_access_check & VE0_AREA_EXEC_CHECK;
++ alert = dentry->d_flags & DCACHE_VIRTUAL;
++ deny = ve_area_access_check & VE0_AREA_EXEC_DENY;
++ } else {
++ check = ve_area_access_check & VE_AREA_EXEC_CHECK;
++ alert = !(dentry->d_flags & DCACHE_VIRTUAL);
++ deny = ve_area_access_check & VE_AREA_EXEC_DENY;
++ }
++
++ if (check && alert)
++ check_alert(mnt, dentry, "Exec");
++ if (deny && alert)
++ return -EACCES;
++#endif
++ return 0;
++}
++
+ /*
+ * NOTE! The user-level library version returns a
+ * character pointer. The kernel system call just
+@@ -1577,10 +2050,12 @@ resume:
+ goto repeat;
+ }
+ atomic_dec(&dentry->d_count);
++ ub_dentry_uncharge(dentry);
+ }
+ if (this_parent != root) {
+ next = this_parent->d_child.next;
+ atomic_dec(&this_parent->d_count);
++ ub_dentry_uncharge(this_parent);
+ this_parent = this_parent->d_parent;
+ goto resume;
+ }
+@@ -1729,7 +2204,8 @@ void __init vfs_caches_init(unsigned lon
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+
+ filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
++ SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
++ filp_ctor, filp_dtor);
+
+ dcache_init(mempages);
+ inode_init(mempages);
+diff -uprN linux-2.6.15.orig/fs/devpts/inode.c linux-2.6.15-ve025stab014/fs/devpts/inode.c
+--- linux-2.6.15.orig/fs/devpts/inode.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/devpts/inode.c 2006-01-27 14:48:08.000000000 +0300
+@@ -12,6 +12,7 @@
+
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <linux/ve.h>
+ #include <linux/fs.h>
+ #include <linux/sched.h>
+ #include <linux/namei.h>
+@@ -21,16 +22,17 @@
+
+ #define DEVPTS_SUPER_MAGIC 0x1cd1
+
++struct devpts_config devpts_config = {.mode = 0600};
++
++#ifndef CONFIG_VE
+ static struct vfsmount *devpts_mnt;
+ static struct dentry *devpts_root;
+-
+-static struct {
+- int setuid;
+- int setgid;
+- uid_t uid;
+- gid_t gid;
+- umode_t mode;
+-} config = {.mode = 0600};
++#define config devpts_config
++#else
++#define devpts_mnt (get_exec_env()->devpts_mnt)
++#define devpts_root (get_exec_env()->devpts_root)
++#define config (*(get_exec_env()->devpts_config))
++#endif
+
+ static int devpts_remount(struct super_block *sb, int *flags, char *data)
+ {
+@@ -56,7 +58,8 @@ static int devpts_remount(struct super_b
+ } else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
+ mode = n & ~S_IFMT;
+ else {
+- printk("devpts: called with bogus options\n");
++ ve_printk(VE_LOG,
++ "devpts: called with bogus options\n");
+ return -EINVAL;
+ }
+ }
+@@ -121,6 +124,8 @@ static struct file_system_type devpts_fs
+ .kill_sb = kill_anon_super,
+ };
+
++EXPORT_SYMBOL(devpts_fs_type);
++
+ /*
+ * The normal naming convention is simply /dev/pts/<number>; this conforms
+ * to the System V naming convention
+@@ -212,6 +217,7 @@ static int __init init_devpts_fs(void)
+
+ static void __exit exit_devpts_fs(void)
+ {
++ /* the code is never called, the argument is irrelevant */
+ unregister_filesystem(&devpts_fs_type);
+ mntput(devpts_mnt);
+ }
+diff -uprN linux-2.6.15.orig/fs/exec.c linux-2.6.15-ve025stab014/fs/exec.c
+--- linux-2.6.15.orig/fs/exec.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/exec.c 2006-01-27 14:48:08.000000000 +0300
+@@ -53,6 +53,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+
++#include <ub/ub_vmpages.h>
++
+ #ifdef CONFIG_KMOD
+ #include <linux/kmod.h>
+ #endif
+@@ -308,6 +310,10 @@ void install_arg_page(struct vm_area_str
+ struct mm_struct *mm = vma->vm_mm;
+ pte_t * pte;
+ spinlock_t *ptl;
++ struct page_beancounter *pb;
++
++ if (unlikely(pb_alloc(&pb)))
++ goto out_nopb;
+
+ if (unlikely(anon_vma_prepare(vma)))
+ goto out;
+@@ -321,15 +327,21 @@ void install_arg_page(struct vm_area_str
+ goto out;
+ }
+ inc_mm_counter(mm, anon_rss);
++ inc_vma_rss(vma);
+ lru_cache_add_active(page);
+ set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
+ page, vma->vm_page_prot))));
++ pb_add_ref(page, mm, &pb);
++ ub_unused_privvm_dec(mm, vma);
++ pb_free(&pb);
+ page_add_anon_rmap(page, vma, address);
+ pte_unmap_unlock(pte, ptl);
+
+ /* no need for flush_tlb */
+ return;
+ out:
++ pb_free(&pb);
++out_nopb:
+ __free_page(page);
+ force_sig(SIGKILL, current);
+ }
+@@ -404,9 +416,13 @@ int setup_arg_pages(struct linux_binprm
+ bprm->loader += stack_base;
+ bprm->exec += stack_base;
+
+- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++ if (ub_memory_charge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags,
++ NULL, UB_SOFT))
++ goto fail_charge;
++
++ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL | __GFP_SOFT_UBC);
+ if (!mpnt)
+- return -ENOMEM;
++ goto fail_alloc;
+
+ memset(mpnt, 0, sizeof(*mpnt));
+
+@@ -450,6 +466,11 @@ int setup_arg_pages(struct linux_binprm
+ up_write(&mm->mmap_sem);
+
+ return 0;
++
++fail_alloc:
++ ub_memory_uncharge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags, NULL);
++fail_charge:
++ return -ENOMEM;
+ }
+
+ EXPORT_SYMBOL(setup_arg_pages);
+@@ -657,7 +678,7 @@ static inline int de_thread(struct task_
+ */
+ if (!thread_group_leader(current)) {
+ struct task_struct *parent;
+- struct dentry *proc_dentry1, *proc_dentry2;
++ struct dentry *proc_dentry1[2], *proc_dentry2[2];
+ unsigned long ptrace;
+
+ /*
+@@ -671,8 +692,8 @@ static inline int de_thread(struct task_
+
+ spin_lock(&leader->proc_lock);
+ spin_lock(&current->proc_lock);
+- proc_dentry1 = proc_pid_unhash(current);
+- proc_dentry2 = proc_pid_unhash(leader);
++ proc_pid_unhash(current, proc_dentry1);
++ proc_pid_unhash(leader, proc_dentry2);
+ write_lock_irq(&tasklist_lock);
+
+ BUG_ON(leader->tgid != current->tgid);
+@@ -891,6 +912,7 @@ int flush_old_exec(struct linux_binprm *
+ suid_keys(current);
+ current->mm->dumpable = suid_dumpable;
+ }
++ current->mm->vps_dumpable = 1;
+
+ /* An exec changes our domain. We are no longer part of the thread
+ group */
+@@ -1282,7 +1304,7 @@ static void format_corename(char *corena
+ case 'p':
+ pid_in_pattern = 1;
+ rc = snprintf(out_ptr, out_end - out_ptr,
+- "%d", current->tgid);
++ "%d", virt_tgid(current));
+ if (rc > out_end - out_ptr)
+ goto out;
+ out_ptr += rc;
+@@ -1326,7 +1348,7 @@ static void format_corename(char *corena
+ case 'h':
+ down_read(&uts_sem);
+ rc = snprintf(out_ptr, out_end - out_ptr,
+- "%s", system_utsname.nodename);
++ "%s", ve_utsname.nodename);
+ up_read(&uts_sem);
+ if (rc > out_end - out_ptr)
+ goto out;
+@@ -1354,7 +1376,7 @@ static void format_corename(char *corena
+ if (!pid_in_pattern
+ && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
+ rc = snprintf(out_ptr, out_end - out_ptr,
+- ".%d", current->tgid);
++ ".%d", virt_tgid(current));
+ if (rc > out_end - out_ptr)
+ goto out;
+ out_ptr += rc;
+@@ -1380,7 +1402,7 @@ static void zap_threads (struct mm_struc
+ }
+
+ read_lock(&tasklist_lock);
+- do_each_thread(g,p)
++ do_each_thread_ve(g,p)
+ if (mm == p->mm && p != tsk) {
+ force_sig_specific(SIGKILL, p);
+ mm->core_waiters++;
+@@ -1388,7 +1410,7 @@ static void zap_threads (struct mm_struc
+ unlikely(p->parent->mm == mm))
+ traced = 1;
+ }
+- while_each_thread(g,p);
++ while_each_thread_ve(g,p);
+
+ read_unlock(&tasklist_lock);
+
+@@ -1400,12 +1422,12 @@ static void zap_threads (struct mm_struc
+ * coredump to finish. Detach them so they can both die.
+ */
+ write_lock_irq(&tasklist_lock);
+- do_each_thread(g,p) {
++ do_each_thread_ve(g,p) {
+ if (mm == p->mm && p != tsk &&
+ p->ptrace && p->parent->mm == mm) {
+ __ptrace_unlink(p);
+ }
+- } while_each_thread(g,p);
++ } while_each_thread_ve(g,p);
+ write_unlock_irq(&tasklist_lock);
+ }
+ }
+@@ -1441,7 +1463,8 @@ int do_coredump(long signr, int exit_cod
+ if (!binfmt || !binfmt->core_dump)
+ goto fail;
+ down_write(&mm->mmap_sem);
+- if (!mm->dumpable) {
++ if (!mm->dumpable ||
++ (!mm->vps_dumpable && !ve_is_super(get_exec_env()))) {
+ up_write(&mm->mmap_sem);
+ goto fail;
+ }
+diff -uprN linux-2.6.15.orig/fs/ext2/namei.c linux-2.6.15-ve025stab014/fs/ext2/namei.c
+--- linux-2.6.15.orig/fs/ext2/namei.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/ext2/namei.c 2006-01-27 14:48:08.000000000 +0300
+@@ -31,6 +31,7 @@
+ */
+
+ #include <linux/pagemap.h>
++#include <linux/quotaops.h>
+ #include "ext2.h"
+ #include "xattr.h"
+ #include "acl.h"
+@@ -276,6 +277,8 @@ static int ext2_unlink(struct inode * di
+ struct page * page;
+ int err = -ENOENT;
+
++ DQUOT_INIT(inode);
++
+ de = ext2_find_entry (dir, dentry, &page);
+ if (!de)
+ goto out;
+@@ -318,6 +321,9 @@ static int ext2_rename (struct inode * o
+ struct ext2_dir_entry_2 * old_de;
+ int err = -ENOENT;
+
++ if (new_inode)
++ DQUOT_INIT(new_inode);
++
+ old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
+ if (!old_de)
+ goto out;
+diff -uprN linux-2.6.15.orig/fs/ext2/super.c linux-2.6.15-ve025stab014/fs/ext2/super.c
+--- linux-2.6.15.orig/fs/ext2/super.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/ext2/super.c 2006-01-27 14:48:08.000000000 +0300
+@@ -1200,7 +1200,7 @@ static struct file_system_type ext2_fs_t
+ .name = "ext2",
+ .get_sb = ext2_get_sb,
+ .kill_sb = kill_block_super,
+- .fs_flags = FS_REQUIRES_DEV,
++ .fs_flags = FS_REQUIRES_DEV | FS_VIRTUALIZED,
+ };
+
+ static int __init init_ext2_fs(void)
+diff -uprN linux-2.6.15.orig/fs/ext3/inode.c linux-2.6.15-ve025stab014/fs/ext3/inode.c
+--- linux-2.6.15.orig/fs/ext3/inode.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/ext3/inode.c 2006-01-27 14:48:05.000000000 +0300
+@@ -2728,6 +2728,10 @@ int ext3_write_inode(struct inode *inode
+ {
+ if (current->flags & PF_MEMALLOC)
+ return 0;
++#ifdef CONFIG_USER_RESOURCE
++ if (test_ti_thread_flag(current->thread_info, TIF_MEMDIE))
++ return 0;
++#endif
+
+ if (ext3_journal_current_handle()) {
+ jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+diff -uprN linux-2.6.15.orig/fs/ext3/super.c linux-2.6.15-ve025stab014/fs/ext3/super.c
+--- linux-2.6.15.orig/fs/ext3/super.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/ext3/super.c 2006-01-27 14:48:08.000000000 +0300
+@@ -2627,7 +2627,7 @@ static struct file_system_type ext3_fs_t
+ .name = "ext3",
+ .get_sb = ext3_get_sb,
+ .kill_sb = kill_block_super,
+- .fs_flags = FS_REQUIRES_DEV,
++ .fs_flags = FS_REQUIRES_DEV | FS_VIRTUALIZED,
+ };
+
+ static int __init init_ext3_fs(void)
+diff -uprN linux-2.6.15.orig/fs/fcntl.c linux-2.6.15-ve025stab014/fs/fcntl.c
+--- linux-2.6.15.orig/fs/fcntl.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/fcntl.c 2006-01-27 14:48:08.000000000 +0300
+@@ -17,6 +17,7 @@
+ #include <linux/ptrace.h>
+ #include <linux/signal.h>
+ #include <linux/rcupdate.h>
++#include <linux/ve_owner.h>
+
+ #include <asm/poll.h>
+ #include <asm/siginfo.h>
+@@ -250,6 +251,7 @@ static int setfl(int fd, struct file * f
+ static void f_modown(struct file *filp, unsigned long pid,
+ uid_t uid, uid_t euid, int force)
+ {
++ pid = comb_vpid_to_pid(pid);
+ write_lock_irq(&filp->f_owner.lock);
+ if (force || !filp->f_owner.pid) {
+ filp->f_owner.pid = pid;
+@@ -316,7 +318,7 @@ static long do_fcntl(int fd, unsigned in
+ * current syscall conventions, the only way
+ * to fix this will be in libc.
+ */
+- err = filp->f_owner.pid;
++ err = comb_pid_to_vpid(filp->f_owner.pid);
+ force_successful_syscall_return();
+ break;
+ case F_SETOWN:
+@@ -468,23 +470,29 @@ static void send_sigio_to_task(struct ta
+ void send_sigio(struct fown_struct *fown, int fd, int band)
+ {
+ struct task_struct *p;
++ struct file *f;
++ struct ve_struct *ve;
+ int pid;
+
+ read_lock(&fown->lock);
+ pid = fown->pid;
+ if (!pid)
+ goto out_unlock_fown;
++
++ /* hack: fown's are always embedded in struct file */
++ f = container_of(fown, struct file, f_owner);
++ ve = VE_OWNER_FILP(f);
+
+ read_lock(&tasklist_lock);
+ if (pid > 0) {
+- p = find_task_by_pid(pid);
+- if (p) {
++ p = find_task_by_pid_all(pid);
++ if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
+ send_sigio_to_task(p, fown, fd, band);
+ }
+ } else {
+- do_each_task_pid(-pid, PIDTYPE_PGID, p) {
++ __do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
+ send_sigio_to_task(p, fown, fd, band);
+- } while_each_task_pid(-pid, PIDTYPE_PGID, p);
++ } __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
+ }
+ read_unlock(&tasklist_lock);
+ out_unlock_fown:
+@@ -501,6 +509,8 @@ static void send_sigurg_to_task(struct t
+ int send_sigurg(struct fown_struct *fown)
+ {
+ struct task_struct *p;
++ struct file *f;
++ struct ve_struct *ve;
+ int pid, ret = 0;
+
+ read_lock(&fown->lock);
+@@ -509,17 +519,19 @@ int send_sigurg(struct fown_struct *fown
+ goto out_unlock_fown;
+
+ ret = 1;
++ f = container_of(fown, struct file, f_owner);
++ ve = VE_OWNER_FILP(f);
+
+ read_lock(&tasklist_lock);
+ if (pid > 0) {
+- p = find_task_by_pid(pid);
+- if (p) {
++ p = find_task_by_pid_all(pid);
++ if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
+ send_sigurg_to_task(p, fown);
+ }
+ } else {
+- do_each_task_pid(-pid, PIDTYPE_PGID, p) {
++ __do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
+ send_sigurg_to_task(p, fown);
+- } while_each_task_pid(-pid, PIDTYPE_PGID, p);
++ } __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
+ }
+ read_unlock(&tasklist_lock);
+ out_unlock_fown:
+diff -uprN linux-2.6.15.orig/fs/file.c linux-2.6.15-ve025stab014/fs/file.c
+--- linux-2.6.15.orig/fs/file.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/file.c 2006-01-27 14:48:05.000000000 +0300
+@@ -18,6 +18,8 @@
+ #include <linux/rcupdate.h>
+ #include <linux/workqueue.h>
+
++#include <ub/ub_mem.h>
++
+ struct fdtable_defer {
+ spinlock_t lock;
+ struct work_struct wq;
+@@ -44,9 +46,9 @@ struct file ** alloc_fd_array(int num)
+ int size = num * sizeof(struct file *);
+
+ if (size <= PAGE_SIZE)
+- new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
++ new_fds = (struct file **) ub_kmalloc(size, GFP_KERNEL);
+ else
+- new_fds = (struct file **) vmalloc(size);
++ new_fds = (struct file **) ub_vmalloc(size);
+ return new_fds;
+ }
+
+@@ -212,9 +214,9 @@ fd_set * alloc_fdset(int num)
+ int size = num / 8;
+
+ if (size <= PAGE_SIZE)
+- new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
++ new_fdset = (fd_set *) ub_kmalloc(size, GFP_KERNEL);
+ else
+- new_fdset = (fd_set *) vmalloc(size);
++ new_fdset = (fd_set *) ub_vmalloc(size);
+ return new_fdset;
+ }
+
+diff -uprN linux-2.6.15.orig/fs/file_table.c linux-2.6.15-ve025stab014/fs/file_table.c
+--- linux-2.6.15.orig/fs/file_table.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/file_table.c 2006-01-27 14:48:08.000000000 +0300
+@@ -8,6 +8,7 @@
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/file.h>
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+@@ -19,6 +20,8 @@
+ #include <linux/cdev.h>
+ #include <linux/fsnotify.h>
+
++#include <ub/ub_misc.h>
++
+ /* sysctl tunables... */
+ struct files_stat_struct files_stat = {
+ .max_files = NR_FILE
+@@ -57,6 +60,8 @@ void filp_dtor(void *objp, struct kmem_c
+ static inline void file_free_rcu(struct rcu_head *head)
+ {
+ struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
++ ub_file_uncharge(f);
++ put_ve(VE_OWNER_FILP(f));
+ kmem_cache_free(filp_cachep, f);
+ }
+
+@@ -86,6 +91,11 @@ struct file *get_empty_filp(void)
+ goto fail;
+
+ memset(f, 0, sizeof(*f));
++ SET_VE_OWNER_FILP(f, get_ve(get_exec_env()));
++
++ if (ub_file_charge(f))
++ goto fail_ch;
++
+ if (security_file_alloc(f))
+ goto fail_sec;
+
+@@ -111,6 +121,10 @@ fail_sec:
+ file_free(f);
+ fail:
+ return NULL;
++
++fail_ch:
++ kmem_cache_free(filp_cachep, f);
++ return NULL;
+ }
+
+ EXPORT_SYMBOL(get_empty_filp);
+diff -uprN linux-2.6.15.orig/fs/filesystems.c linux-2.6.15-ve025stab014/fs/filesystems.c
+--- linux-2.6.15.orig/fs/filesystems.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/filesystems.c 2006-01-27 14:48:08.000000000 +0300
+@@ -13,6 +13,7 @@
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/sched.h> /* for 'current' */
++#include <linux/ve_owner.h>
+ #include <asm/uaccess.h>
+
+ /*
+@@ -22,8 +23,8 @@
+ * During the unload module must call unregister_filesystem().
+ * We can access the fields of list element if:
+ * 1) spinlock is held or
+- * 2) we hold the reference to the module.
+- * The latter can be guaranteed by call of try_module_get(); if it
++ * 2) we hold the reference to the element.
++ * The latter can be guaranteed by call of try_filesystem(); if it
+ * returned 0 we must skip the element, otherwise we got the reference.
+ * Once the reference is obtained we can drop the spinlock.
+ */
+@@ -31,23 +32,51 @@
+ static struct file_system_type *file_systems;
+ static DEFINE_RWLOCK(file_systems_lock);
+
++int try_get_filesystem(struct file_system_type *fs)
++{
++ if (try_module_get(fs->owner)) {
++#ifdef CONFIG_VE
++ get_ve(VE_OWNER_FSTYPE(fs));
++#endif
++ return 1;
++ }
++ return 0;
++}
++
+ /* WARNING: This can be used only if we _already_ own a reference */
+ void get_filesystem(struct file_system_type *fs)
+ {
++#ifdef CONFIG_VE
++ get_ve(VE_OWNER_FSTYPE(fs));
++#endif
+ __module_get(fs->owner);
+ }
+
+ void put_filesystem(struct file_system_type *fs)
+ {
+ module_put(fs->owner);
++#ifdef CONFIG_VE
++ put_ve(VE_OWNER_FSTYPE(fs));
++#endif
++}
++
++static inline int check_ve_fstype(struct file_system_type *p,
++ struct ve_struct *env)
++{
++ return ((p->fs_flags & FS_VIRTUALIZED) ||
++ ve_accessible_strict(VE_OWNER_FSTYPE(p), env));
+ }
+
+-static struct file_system_type **find_filesystem(const char *name)
++static struct file_system_type **find_filesystem(const char *name,
++ struct ve_struct *env)
+ {
+ struct file_system_type **p;
+- for (p=&file_systems; *p; p=&(*p)->next)
++ for (p=&file_systems; *p; p=&(*p)->next) {
++ if (!check_ve_fstype(*p, env))
++ continue;
+ if (strcmp((*p)->name,name) == 0)
+ break;
++ }
+ return p;
+ }
+
+@@ -74,8 +103,10 @@ int register_filesystem(struct file_syst
+ if (fs->next)
+ return -EBUSY;
+ INIT_LIST_HEAD(&fs->fs_supers);
++ if (VE_OWNER_FSTYPE(fs) == NULL)
++ SET_VE_OWNER_FSTYPE(fs, get_ve0());
+ write_lock(&file_systems_lock);
+- p = find_filesystem(fs->name);
++ p = find_filesystem(fs->name, VE_OWNER_FSTYPE(fs));
+ if (*p)
+ res = -EBUSY;
+ else
+@@ -132,11 +163,14 @@ static int fs_index(const char __user *
+
+ err = -EINVAL;
+ read_lock(&file_systems_lock);
+- for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
++ for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next) {
++ if (!check_ve_fstype(tmp, get_exec_env()))
++ continue;
+ if (strcmp(tmp->name,name) == 0) {
+ err = index;
+ break;
+ }
++ index++;
+ }
+ read_unlock(&file_systems_lock);
+ putname(name);
+@@ -149,9 +183,15 @@ static int fs_name(unsigned int index, c
+ int len, res;
+
+ read_lock(&file_systems_lock);
+- for (tmp = file_systems; tmp; tmp = tmp->next, index--)
+- if (index <= 0 && try_module_get(tmp->owner))
+- break;
++ for (tmp = file_systems; tmp; tmp = tmp->next) {
++ if (!check_ve_fstype(tmp, get_exec_env()))
++ continue;
++ if (!index) {
++ if (try_get_filesystem(tmp))
++ break;
++ } else
++ index--;
++ }
+ read_unlock(&file_systems_lock);
+ if (!tmp)
+ return -EINVAL;
+@@ -169,8 +209,9 @@ static int fs_maxindex(void)
+ int index;
+
+ read_lock(&file_systems_lock);
+- for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
+- ;
++ for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next)
++ if (check_ve_fstype(tmp, get_exec_env()))
++ index++;
+ read_unlock(&file_systems_lock);
+ return index;
+ }
+@@ -206,9 +247,10 @@ int get_filesystem_list(char * buf)
+ read_lock(&file_systems_lock);
+ tmp = file_systems;
+ while (tmp && len < PAGE_SIZE - 80) {
+- len += sprintf(buf+len, "%s\t%s\n",
+- (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
+- tmp->name);
++ if (check_ve_fstype(tmp, get_exec_env()))
++ len += sprintf(buf+len, "%s\t%s\n",
++ (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
++ tmp->name);
+ tmp = tmp->next;
+ }
+ read_unlock(&file_systems_lock);
+@@ -220,14 +262,14 @@ struct file_system_type *get_fs_type(con
+ struct file_system_type *fs;
+
+ read_lock(&file_systems_lock);
+- fs = *(find_filesystem(name));
+- if (fs && !try_module_get(fs->owner))
++ fs = *(find_filesystem(name, get_exec_env()));
++ if (fs && !try_get_filesystem(fs))
+ fs = NULL;
+ read_unlock(&file_systems_lock);
+ if (!fs && (request_module("%s", name) == 0)) {
+ read_lock(&file_systems_lock);
+- fs = *(find_filesystem(name));
+- if (fs && !try_module_get(fs->owner))
++ fs = *(find_filesystem(name, get_exec_env()));
++ if (fs && !try_get_filesystem(fs))
+ fs = NULL;
+ read_unlock(&file_systems_lock);
+ }
+@@ -235,3 +277,5 @@ struct file_system_type *get_fs_type(con
+ }
+
+ EXPORT_SYMBOL(get_fs_type);
++EXPORT_SYMBOL(get_filesystem);
++EXPORT_SYMBOL(put_filesystem);
+diff -uprN linux-2.6.15.orig/fs/hugetlbfs/inode.c linux-2.6.15-ve025stab014/fs/hugetlbfs/inode.c
+--- linux-2.6.15.orig/fs/hugetlbfs/inode.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/hugetlbfs/inode.c 2006-01-27 14:48:08.000000000 +0300
+@@ -802,7 +802,7 @@ struct file *hugetlb_zero_setup(size_t s
+ struct inode *inode;
+ struct dentry *dentry, *root;
+ struct qstr quick_string;
+- char buf[16];
++ char buf[64];
+
+ if (!can_do_hugetlb_shm())
+ return ERR_PTR(-EPERM);
+@@ -814,7 +814,8 @@ struct file *hugetlb_zero_setup(size_t s
+ return ERR_PTR(-ENOMEM);
+
+ root = hugetlbfs_vfsmount->mnt_root;
+- snprintf(buf, 16, "%lu", hugetlbfs_counter());
++ snprintf(buf, sizeof(buf), "VE%d-%lu",
++ get_exec_env()->veid, hugetlbfs_counter());
+ quick_string.name = buf;
+ quick_string.len = strlen(quick_string.name);
+ quick_string.hash = 0;
+diff -uprN linux-2.6.15.orig/fs/inode.c linux-2.6.15-ve025stab014/fs/inode.c
+--- linux-2.6.15.orig/fs/inode.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/inode.c 2006-01-27 14:48:08.000000000 +0300
+@@ -9,6 +9,7 @@
+ #include <linux/mm.h>
+ #include <linux/dcache.h>
+ #include <linux/init.h>
++#include <linux/kernel_stat.h>
+ #include <linux/quotaops.h>
+ #include <linux/slab.h>
+ #include <linux/writeback.h>
+@@ -97,13 +98,15 @@ DECLARE_MUTEX(iprune_sem);
+ */
+ struct inodes_stat_t inodes_stat;
+
+-static kmem_cache_t * inode_cachep;
++kmem_cache_t *inode_cachep;
++
++static struct address_space_operations vfs_empty_aops;
++struct inode_operations vfs_empty_iops;
++static struct file_operations vfs_empty_fops;
++EXPORT_SYMBOL(vfs_empty_iops);
+
+ static struct inode *alloc_inode(struct super_block *sb)
+ {
+- static struct address_space_operations empty_aops;
+- static struct inode_operations empty_iops;
+- static struct file_operations empty_fops;
+ struct inode *inode;
+
+ if (sb->s_op->alloc_inode)
+@@ -118,8 +121,8 @@ static struct inode *alloc_inode(struct
+ inode->i_blkbits = sb->s_blocksize_bits;
+ inode->i_flags = 0;
+ atomic_set(&inode->i_count, 1);
+- inode->i_op = &empty_iops;
+- inode->i_fop = &empty_fops;
++ inode->i_op = &vfs_empty_iops;
++ inode->i_fop = &vfs_empty_fops;
+ inode->i_nlink = 1;
+ atomic_set(&inode->i_writecount, 0);
+ inode->i_size = 0;
+@@ -143,7 +146,7 @@ static struct inode *alloc_inode(struct
+ return NULL;
+ }
+
+- mapping->a_ops = &empty_aops;
++ mapping->a_ops = &vfs_empty_aops;
+ mapping->host = inode;
+ mapping->flags = 0;
+ mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
+@@ -477,6 +480,7 @@ static void prune_icache(int nr_to_scan)
+ */
+ static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+ {
++ KSTAT_PERF_ENTER(shrink_icache)
+ if (nr) {
+ /*
+ * Nasty deadlock avoidance. We may hold various FS locks,
+@@ -487,6 +491,7 @@ static int shrink_icache_memory(int nr,
+ return -1;
+ prune_icache(nr);
+ }
++ KSTAT_PERF_LEAVE(shrink_icache)
+ return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+ }
+
+@@ -736,7 +741,7 @@ EXPORT_SYMBOL(iunique);
+ struct inode *igrab(struct inode *inode)
+ {
+ spin_lock(&inode_lock);
+- if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
++ if (inode && !(inode->i_state & (I_FREEING|I_WILL_FREE)))
+ __iget(inode);
+ else
+ /*
+diff -uprN linux-2.6.15.orig/fs/ioprio.c linux-2.6.15-ve025stab014/fs/ioprio.c
+--- linux-2.6.15.orig/fs/ioprio.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/ioprio.c 2006-01-27 14:48:08.000000000 +0300
+@@ -77,18 +77,18 @@ asmlinkage long sys_ioprio_set(int which
+ if (!who)
+ p = current;
+ else
+- p = find_task_by_pid(who);
++ p = find_task_by_pid_all(who);
+ if (p)
+ ret = set_task_ioprio(p, ioprio);
+ break;
+ case IOPRIO_WHO_PGRP:
+ if (!who)
+ who = process_group(current);
+- do_each_task_pid(who, PIDTYPE_PGID, p) {
++ do_each_task_pid_all(who, PIDTYPE_PGID, p) {
+ ret = set_task_ioprio(p, ioprio);
+ if (ret)
+ break;
+- } while_each_task_pid(who, PIDTYPE_PGID, p);
++ } while_each_task_pid_all(who, PIDTYPE_PGID, p);
+ break;
+ case IOPRIO_WHO_USER:
+ if (!who)
+@@ -99,13 +99,13 @@ asmlinkage long sys_ioprio_set(int which
+ if (!user)
+ break;
+
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ if (p->uid != who)
+ continue;
+ ret = set_task_ioprio(p, ioprio);
+ if (ret)
+ break;
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+
+ if (who)
+ free_uid(user);
+@@ -130,19 +130,19 @@ asmlinkage long sys_ioprio_get(int which
+ if (!who)
+ p = current;
+ else
+- p = find_task_by_pid(who);
++ p = find_task_by_pid_ve(who);
+ if (p)
+ ret = p->ioprio;
+ break;
+ case IOPRIO_WHO_PGRP:
+ if (!who)
+ who = process_group(current);
+- do_each_task_pid(who, PIDTYPE_PGID, p) {
++ do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ if (ret == -ESRCH)
+ ret = p->ioprio;
+ else
+ ret = ioprio_best(ret, p->ioprio);
+- } while_each_task_pid(who, PIDTYPE_PGID, p);
++ } while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ break;
+ case IOPRIO_WHO_USER:
+ if (!who)
+@@ -153,14 +153,14 @@ asmlinkage long sys_ioprio_get(int which
+ if (!user)
+ break;
+
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ if (p->uid != user->uid)
+ continue;
+ if (ret == -ESRCH)
+ ret = p->ioprio;
+ else
+ ret = ioprio_best(ret, p->ioprio);
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+
+ if (who)
+ free_uid(user);
+diff -uprN linux-2.6.15.orig/fs/jbd/transaction.c linux-2.6.15-ve025stab014/fs/jbd/transaction.c
+--- linux-2.6.15.orig/fs/jbd/transaction.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/jbd/transaction.c 2006-01-27 14:48:05.000000000 +0300
+@@ -1379,6 +1379,9 @@ int journal_stop(handle_t *handle)
+ * to wait for the commit to complete.
+ */
+ if (handle->h_sync && !(current->flags & PF_MEMALLOC))
++#ifdef CONFIG_USER_RESOURCE
++ if (!test_ti_thread_flag(current->thread_info, TIF_MEMDIE))
++#endif
+ err = log_wait_commit(journal, tid);
+ } else {
+ spin_unlock(&transaction->t_handle_lock);
+diff -uprN linux-2.6.15.orig/fs/lockd/clntproc.c linux-2.6.15-ve025stab014/fs/lockd/clntproc.c
+--- linux-2.6.15.orig/fs/lockd/clntproc.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/lockd/clntproc.c 2006-01-27 14:48:08.000000000 +0300
+@@ -127,10 +127,10 @@ static void nlmclnt_setlockargs(struct n
+ nlmclnt_next_cookie(&argp->cookie);
+ argp->state = nsm_local_state;
+ memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
+- lock->caller = system_utsname.nodename;
++ lock->caller = ve_utsname.nodename;
+ lock->oh.data = req->a_owner;
+ lock->oh.len = sprintf(req->a_owner, "%d@%s",
+- current->pid, system_utsname.nodename);
++ current->pid, ve_utsname.nodename);
+ locks_copy_lock(&lock->fl, fl);
+ }
+
+@@ -151,7 +151,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
+ {
+ locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+ memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+- call->a_args.lock.caller = system_utsname.nodename;
++ call->a_args.lock.caller = ve_utsname.nodename;
+ call->a_args.lock.oh.len = lock->oh.len;
+
+ /* set default data area */
+diff -uprN linux-2.6.15.orig/fs/lockd/mon.c linux-2.6.15-ve025stab014/fs/lockd/mon.c
+--- linux-2.6.15.orig/fs/lockd/mon.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/lockd/mon.c 2006-01-27 14:48:08.000000000 +0300
+@@ -148,7 +148,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
+ */
+ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
+ if (!(p = xdr_encode_string(p, buffer))
+- || !(p = xdr_encode_string(p, system_utsname.nodename)))
++ || !(p = xdr_encode_string(p, ve_utsname.nodename)))
+ return ERR_PTR(-EIO);
+ *p++ = htonl(argp->prog);
+ *p++ = htonl(argp->vers);
+diff -uprN linux-2.6.15.orig/fs/locks.c linux-2.6.15-ve025stab014/fs/locks.c
+--- linux-2.6.15.orig/fs/locks.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/locks.c 2006-01-27 14:48:08.000000000 +0300
+@@ -129,6 +129,8 @@
+ #include <asm/semaphore.h>
+ #include <asm/uaccess.h>
+
++#include <ub/ub_misc.h>
++
+ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
+ #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
+ #define IS_LEASE(fl) (fl->fl_flags & FL_LEASE)
+@@ -148,11 +150,28 @@ static LIST_HEAD(blocked_list);
+ static kmem_cache_t *filelock_cache;
+
+ /* Allocate an empty lock structure. */
+-static struct file_lock *locks_alloc_lock(void)
++static struct file_lock *locks_alloc_lock(int charge)
+ {
+- return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
++ struct file_lock *fl;
++
++ fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
++#ifdef CONFIG_USER_RESOURCE
++ if (fl == NULL)
++ goto out;
++ fl->fl_charged = 0;
++ if (!charge)
++ goto out;
++ if (!ub_flock_charge(fl, 1))
++ goto out;
++
++ kmem_cache_free(filelock_cache, fl);
++ fl = NULL;
++out:
++#endif
++ return fl;
+ }
+
++
+ /* Free a lock which is not in use. */
+ static inline void locks_free_lock(struct file_lock *fl)
+ {
+@@ -181,6 +200,7 @@ static inline void locks_free_lock(struc
+ fl->fl_lmops = NULL;
+ }
+
++ ub_flock_uncharge(fl);
+ kmem_cache_free(filelock_cache, fl);
+ }
+
+@@ -263,7 +283,7 @@ static int flock_make_lock(struct file *
+ if (type < 0)
+ return type;
+
+- fl = locks_alloc_lock();
++ fl = locks_alloc_lock(type != F_UNLCK);
+ if (fl == NULL)
+ return -ENOMEM;
+
+@@ -451,7 +471,7 @@ static int lease_init(struct file *filp,
+ /* Allocate a file_lock initialised to this type of lease */
+ static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
+ {
+- struct file_lock *fl = locks_alloc_lock();
++ struct file_lock *fl = locks_alloc_lock(1);
+ int error;
+
+ if (fl == NULL)
+@@ -785,8 +805,11 @@ static int __posix_lock_file(struct inod
+ * We may need two file_lock structures for this operation,
+ * so we get them in advance to avoid races.
+ */
+- new_fl = locks_alloc_lock();
+- new_fl2 = locks_alloc_lock();
++ if (request->fl_type != F_UNLCK)
++ new_fl = locks_alloc_lock(1);
++ else
++ new_fl = NULL;
++ new_fl2 = locks_alloc_lock(0);
+
+ lock_kernel();
+ if (request->fl_type != F_UNLCK) {
+@@ -814,7 +837,7 @@ static int __posix_lock_file(struct inod
+ goto out;
+
+ error = -ENOLCK; /* "no luck" */
+- if (!(new_fl && new_fl2))
++ if (!((request->fl_type == F_UNLCK || new_fl) && new_fl2))
+ goto out;
+
+ /*
+@@ -920,19 +943,30 @@ static int __posix_lock_file(struct inod
+ if (!added) {
+ if (request->fl_type == F_UNLCK)
+ goto out;
++ error = -ENOLCK;
++ if (right && (left == right) && ub_flock_charge(new_fl, 1))
++ goto out;
+ locks_copy_lock(new_fl, request);
+ locks_insert_lock(before, new_fl);
+ new_fl = NULL;
++ error = 0;
+ }
+ if (right) {
+ if (left == right) {
+ /* The new lock breaks the old one in two pieces,
+ * so we have to use the second new lock.
+ */
++ error = -ENOLCK;
++ if (added && ub_flock_charge(new_fl2,
++ request->fl_type != F_UNLCK))
++ goto out;
++ /* FIXME move all fl_charged manipulations in ub code */
++ set_flock_charged(new_fl2);
+ left = new_fl2;
+ new_fl2 = NULL;
+ locks_copy_lock(left, right);
+ locks_insert_lock(before, left);
++ error = 0;
+ }
+ right->fl_start = request->fl_end + 1;
+ locks_wake_up_blocks(right);
+@@ -1574,7 +1608,7 @@ int fcntl_getlk(struct file *filp, struc
+
+ flock.l_type = F_UNLCK;
+ if (fl != NULL) {
+- flock.l_pid = fl->fl_pid;
++ flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ #if BITS_PER_LONG == 32
+ /*
+ * Make sure we can represent the posix lock via
+@@ -1606,7 +1640,7 @@ out:
+ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
+ struct flock __user *l)
+ {
+- struct file_lock *file_lock = locks_alloc_lock();
++ struct file_lock *file_lock = locks_alloc_lock(0);
+ struct flock flock;
+ struct inode *inode;
+ int error;
+@@ -1728,7 +1762,7 @@ int fcntl_getlk64(struct file *filp, str
+
+ flock.l_type = F_UNLCK;
+ if (fl != NULL) {
+- flock.l_pid = fl->fl_pid;
++ flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ flock.l_start = fl->fl_start;
+ flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+ fl->fl_end - fl->fl_start + 1;
+@@ -1749,7 +1783,7 @@ out:
+ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
+ struct flock64 __user *l)
+ {
+- struct file_lock *file_lock = locks_alloc_lock();
++ struct file_lock *file_lock = locks_alloc_lock(0);
+ struct flock64 flock;
+ struct inode *inode;
+ int error;
+@@ -1981,7 +2015,9 @@ EXPORT_SYMBOL(posix_unblock_lock);
+ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
+ {
+ struct inode *inode = NULL;
++ unsigned int fl_pid;
+
++ fl_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ if (fl->fl_file != NULL)
+ inode = fl->fl_file->f_dentry->d_inode;
+
+@@ -2023,16 +2059,16 @@ static void lock_get_status(char* out, s
+ }
+ if (inode) {
+ #ifdef WE_CAN_BREAK_LSLK_NOW
+- out += sprintf(out, "%d %s:%ld ", fl->fl_pid,
++ out += sprintf(out, "%d %s:%ld ", fl_pid,
+ inode->i_sb->s_id, inode->i_ino);
+ #else
+ /* userspace relies on this representation of dev_t ;-( */
+- out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid,
++ out += sprintf(out, "%d %02x:%02x:%ld ", fl_pid,
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev), inode->i_ino);
+ #endif
+ } else {
+- out += sprintf(out, "%d <none>:0 ", fl->fl_pid);
++ out += sprintf(out, "%d <none>:0 ", fl_pid);
+ }
+ if (IS_POSIX(fl)) {
+ if (fl->fl_end == OFFSET_MAX)
+@@ -2081,11 +2117,17 @@ int get_locks_status(char *buffer, char
+ char *q = buffer;
+ off_t pos = 0;
+ int i = 0;
++ struct ve_struct *env;
+
+ lock_kernel();
++ env = get_exec_env();
+ list_for_each(tmp, &file_lock_list) {
+ struct list_head *btmp;
+ struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
++
++ if (!ve_accessible(VE_OWNER_FILP(fl->fl_file), env))
++ continue;
++
+ lock_get_status(q, fl, ++i, "");
+ move_lock_status(&q, &pos, offset);
+
+@@ -2243,7 +2285,7 @@ EXPORT_SYMBOL(steal_locks);
+ static int __init filelock_init(void)
+ {
+ filelock_cache = kmem_cache_create("file_lock_cache",
+- sizeof(struct file_lock), 0, SLAB_PANIC,
++ sizeof(struct file_lock), 0, SLAB_PANIC | SLAB_UBC,
+ init_once, NULL);
+ return 0;
+ }
+diff -uprN linux-2.6.15.orig/fs/namei.c linux-2.6.15-ve025stab014/fs/namei.c
+--- linux-2.6.15.orig/fs/namei.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/namei.c 2006-01-27 14:48:08.000000000 +0300
+@@ -701,7 +701,14 @@ static inline void follow_dotdot(struct
+ read_unlock(&current->fs->lock);
+ break;
+ }
+- read_unlock(&current->fs->lock);
++#ifdef CONFIG_VE
++ if (nd->dentry == get_exec_env()->fs_root &&
++ nd->mnt == get_exec_env()->fs_rootmnt) {
++ read_unlock(&current->fs->lock);
++ break;
++ }
++#endif
++ read_unlock(&current->fs->lock);
+ spin_lock(&dcache_lock);
+ if (nd->dentry != nd->mnt->mnt_root) {
+ nd->dentry = dget(nd->dentry->d_parent);
+@@ -742,6 +749,10 @@ static int do_lookup(struct nameidata *n
+ if (dentry->d_op && dentry->d_op->d_revalidate)
+ goto need_revalidate;
+ done:
++ if ((nd->flags & LOOKUP_STRICT) && d_mountpoint(dentry)) {
++ dput(dentry);
++ return -ENOENT;
++ }
+ path->mnt = mnt;
+ path->dentry = dentry;
+ __follow_mount(path);
+@@ -861,6 +872,9 @@ static fastcall int __link_path_walk(con
+ goto out_dput;
+
+ if (inode->i_op->follow_link) {
++ err = -ENOENT;
++ if (lookup_flags & LOOKUP_STRICT)
++ goto out_dput;
+ err = do_follow_link(&next, nd);
+ if (err)
+ goto return_err;
+@@ -907,6 +921,7 @@ last_component:
+ break;
+ inode = next.dentry->d_inode;
+ if ((lookup_flags & LOOKUP_FOLLOW)
++ && !(lookup_flags & LOOKUP_STRICT)
+ && inode && inode->i_op && inode->i_op->follow_link) {
+ err = do_follow_link(&next, nd);
+ if (err)
+@@ -947,6 +962,11 @@ return_reval:
+ break;
+ }
+ return_base:
++ if (!(nd->flags & LOOKUP_NOAREACHECK)) {
++ err = check_area_access_ve(nd->dentry, nd->mnt);
++ if (err)
++ break;
++ }
+ return 0;
+ out_dput:
+ dput_path(&next, nd);
+@@ -2278,6 +2298,9 @@ int vfs_rename(struct inode *old_dir, st
+ int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+ const char *old_name;
+
++ if (DQUOT_RENAME(old_dentry->d_inode, old_dir, new_dir))
++ return -EXDEV;
++
+ if (old_dentry->d_inode == new_dentry->d_inode)
+ return 0;
+
+diff -uprN linux-2.6.15.orig/fs/namespace.c linux-2.6.15-ve025stab014/fs/namespace.c
+--- linux-2.6.15.orig/fs/namespace.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/namespace.c 2006-01-27 14:48:08.000000000 +0300
+@@ -39,13 +39,15 @@ static inline int sysfs_init(void)
+
+ /* spinlock for vfsmount related operations, inplace of dcache_lock */
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
++EXPORT_SYMBOL(vfsmount_lock);
+
+ static int event;
+
+ static struct list_head *mount_hashtable;
+ static int hash_mask __read_mostly, hash_bits __read_mostly;
+ static kmem_cache_t *mnt_cache;
+-static struct rw_semaphore namespace_sem;
++struct rw_semaphore namespace_sem;
++EXPORT_SYMBOL(namespace_sem);
+
+ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
+ {
+@@ -366,10 +368,32 @@ static int show_vfsmnt(struct seq_file *
+ { 0, NULL }
+ };
+ struct proc_fs_info *fs_infop;
++ char *path_buf, *path;
+
+- mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++ /* skip FS_NOMOUNT mounts (rootfs) */
++ if (mnt->mnt_sb->s_flags & MS_NOUSER)
++ return 0;
++
++ path_buf = (char *) __get_free_page(GFP_KERNEL);
++ if (!path_buf)
++ return -ENOMEM;
++ path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
++ if (IS_ERR(path)) {
++ free_page((unsigned long) path_buf);
++ /*
++ * This means that the file position will be incremented, i.e.
++ * the total number of "invisible" vfsmnt will leak.
++ */
++ return 0;
++ }
++
++ if (ve_is_super(get_exec_env()))
++ mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++ else
++ mangle(m, mnt->mnt_sb->s_type->name);
+ seq_putc(m, ' ');
+- seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
++ mangle(m, path);
++ free_page((unsigned long) path_buf);
+ seq_putc(m, ' ');
+ mangle(m, mnt->mnt_sb->s_type->name);
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
+@@ -469,6 +493,7 @@ void release_mounts(struct list_head *he
+ mntput(mnt);
+ }
+ }
++EXPORT_SYMBOL(release_mounts);
+
+ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
+ {
+@@ -493,6 +518,7 @@ void umount_tree(struct vfsmount *mnt, i
+ change_mnt_propagation(p, MS_PRIVATE);
+ }
+ }
++EXPORT_SYMBOL(umount_tree);
+
+ static int do_umount(struct vfsmount *mnt, int flags)
+ {
+@@ -603,7 +629,7 @@ asmlinkage long sys_umount(char __user *
+ goto dput_and_out;
+
+ retval = -EPERM;
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ goto dput_and_out;
+
+ retval = do_umount(nd.mnt, flags);
+@@ -627,7 +653,7 @@ asmlinkage long sys_oldumount(char __use
+
+ static int mount_is_safe(struct nameidata *nd)
+ {
+- if (capable(CAP_SYS_ADMIN))
++ if (capable(CAP_VE_SYS_ADMIN))
+ return 0;
+ return -EPERM;
+ #ifdef notyet
+@@ -912,7 +938,7 @@ static int do_remount(struct nameidata *
+ int err;
+ struct super_block *sb = nd->mnt->mnt_sb;
+
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+
+ if (!check_mnt(nd->mnt))
+@@ -946,7 +972,7 @@ static int do_move_mount(struct nameidat
+ struct nameidata old_nd, parent_nd;
+ struct vfsmount *p;
+ int err = 0;
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ if (!old_name || !*old_name)
+ return -EINVAL;
+@@ -1026,7 +1052,7 @@ static int do_new_mount(struct nameidata
+ return -EINVAL;
+
+ /* we need capabilities... */
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+
+ mnt = do_kern_mount(type, flags, name, data);
+@@ -1067,6 +1093,10 @@ int do_add_mount(struct vfsmount *newmnt
+ if ((err = graft_tree(newmnt, nd)))
+ goto unlock;
+
++ if (newmnt->mnt_mountpoint->d_flags & DCACHE_VIRTUAL)
++ /* unaccessible yet - no lock */
++ newmnt->mnt_root->d_flags |= DCACHE_VIRTUAL;
++
+ if (fslist) {
+ /* add to the specified expiration list */
+ spin_lock(&vfsmount_lock);
+@@ -1494,7 +1524,7 @@ static void chroot_fs_refs(struct nameid
+ struct fs_struct *fs;
+
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_ve(g, p) {
+ task_lock(p);
+ fs = p->fs;
+ if (fs) {
+@@ -1509,7 +1539,7 @@ static void chroot_fs_refs(struct nameid
+ put_fs_struct(fs);
+ } else
+ task_unlock(p);
+- } while_each_thread(g, p);
++ } while_each_thread_ve(g, p);
+ read_unlock(&tasklist_lock);
+ }
+
+@@ -1658,10 +1688,10 @@ static void __init init_mount_tree(void)
+
+ init_task.namespace = namespace;
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ get_namespace(namespace);
+ p->namespace = namespace;
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+ read_unlock(&tasklist_lock);
+
+ set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
+@@ -1677,7 +1707,8 @@ void __init mnt_init(unsigned long mempa
+ init_rwsem(&namespace_sem);
+
+ mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
+- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
++ 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
++ NULL, NULL);
+
+ mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
+
+diff -uprN linux-2.6.15.orig/fs/nfs/nfsroot.c linux-2.6.15-ve025stab014/fs/nfs/nfsroot.c
+--- linux-2.6.15.orig/fs/nfs/nfsroot.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/nfs/nfsroot.c 2006-01-27 14:48:08.000000000 +0300
+@@ -310,7 +310,7 @@ static int __init root_nfs_name(char *na
+ /* Override them by options set on kernel command-line */
+ root_nfs_parse(name, buf);
+
+- cp = system_utsname.nodename;
++ cp = ve_utsname.nodename;
+ if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
+ printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
+ return -1;
+diff -uprN linux-2.6.15.orig/fs/open.c linux-2.6.15-ve025stab014/fs/open.c
+--- linux-2.6.15.orig/fs/open.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/open.c 2006-01-27 14:48:07.000000000 +0300
+@@ -23,6 +23,7 @@
+ #include <linux/fs.h>
+ #include <linux/personality.h>
+ #include <linux/pagemap.h>
++#include <linux/faudit.h>
+ #include <linux/syscalls.h>
+ #include <linux/rcupdate.h>
+
+@@ -119,6 +120,34 @@ static int vfs_statfs64(struct super_blo
+ return 0;
+ }
+
++static int faudit_statfs(struct vfsmount *mnt, struct dentry *dentry,
++ struct statfs *buf)
++{
++ struct faudit_stat_arg arg;
++
++ arg.mnt = mnt;
++ arg.dentry = dentry;
++ arg.stat = buf;
++ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS, &arg)
++ != NOTIFY_DONE)
++ return arg.err;
++ return 0;
++}
++
++static int faudit_statfs64(struct vfsmount *mnt, struct dentry *dentry,
++ struct statfs64 *buf)
++{
++ struct faudit_stat_arg arg;
++
++ arg.mnt = mnt;
++ arg.dentry = dentry;
++ arg.stat = buf;
++ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS64,
++ &arg) != NOTIFY_DONE)
++ return arg.err;
++ return 0;
++}
++
+ asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
+ {
+ struct nameidata nd;
+@@ -128,6 +157,8 @@ asmlinkage long sys_statfs(const char __
+ if (!error) {
+ struct statfs tmp;
+ error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
++ if (!error)
++ error = faudit_statfs(nd.mnt, nd.dentry, &tmp);
+ if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ error = -EFAULT;
+ path_release(&nd);
+@@ -147,6 +178,8 @@ asmlinkage long sys_statfs64(const char
+ if (!error) {
+ struct statfs64 tmp;
+ error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
++ if (!error)
++ error = faudit_statfs64(nd.mnt, nd.dentry, &tmp);
+ if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ error = -EFAULT;
+ path_release(&nd);
+@@ -166,6 +199,8 @@ asmlinkage long sys_fstatfs(unsigned int
+ if (!file)
+ goto out;
+ error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
++ if (!error)
++ error = faudit_statfs(file->f_vfsmnt, file->f_dentry, &tmp);
+ if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ error = -EFAULT;
+ fput(file);
+@@ -187,6 +222,8 @@ asmlinkage long sys_fstatfs64(unsigned i
+ if (!file)
+ goto out;
+ error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
++ if (!error)
++ error = faudit_statfs64(file->f_vfsmnt, file->f_dentry, &tmp);
+ if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ error = -EFAULT;
+ fput(file);
+diff -uprN linux-2.6.15.orig/fs/partitions/check.c linux-2.6.15-ve025stab014/fs/partitions/check.c
+--- linux-2.6.15.orig/fs/partitions/check.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/partitions/check.c 2006-01-27 14:48:08.000000000 +0300
+@@ -124,6 +124,7 @@ char *disk_name(struct gendisk *hd, int
+
+ return buf;
+ }
++EXPORT_SYMBOL(disk_name);
+
+ const char *bdevname(struct block_device *bdev, char *buf)
+ {
+diff -uprN linux-2.6.15.orig/fs/proc/array.c linux-2.6.15-ve025stab014/fs/proc/array.c
+--- linux-2.6.15.orig/fs/proc/array.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/proc/array.c 2006-01-27 14:48:08.000000000 +0300
+@@ -76,6 +76,8 @@
+ #include <linux/cpuset.h>
+ #include <linux/rcupdate.h>
+
++#include <ub/beancounter.h>
++
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+ #include <asm/io.h>
+@@ -161,8 +163,13 @@ static inline char * task_state(struct t
+ struct group_info *group_info;
+ int g;
+ struct fdtable *fdt = NULL;
++ pid_t pid, ppid, tgid;
++
++ pid = get_task_pid(p);
++ tgid = get_task_tgid(p);
+
+ read_lock(&tasklist_lock);
++ ppid = get_task_ppid(p);
+ buffer += sprintf(buffer,
+ "State:\t%s\n"
+ "SleepAVG:\t%lu%%\n"
+@@ -174,9 +181,9 @@ static inline char * task_state(struct t
+ "Gid:\t%d\t%d\t%d\t%d\n",
+ get_task_state(p),
+ (p->sleep_avg/1024)*100/(1020000000/1024),
+- p->tgid,
+- p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
+- pid_alive(p) && p->ptrace ? p->parent->pid : 0,
++ tgid,
++ pid, ppid,
++ pid_alive(p) && p->ptrace ? get_task_pid(p->parent) : 0,
+ p->uid, p->euid, p->suid, p->fsuid,
+ p->gid, p->egid, p->sgid, p->fsgid);
+ read_unlock(&tasklist_lock);
+@@ -199,6 +206,14 @@ static inline char * task_state(struct t
+ put_group_info(group_info);
+
+ buffer += sprintf(buffer, "\n");
++
++#ifdef CONFIG_VE
++ buffer += sprintf(buffer,
++ "envID:\t%d\n"
++ "VPid:\t%d\n",
++ VE_TASK_INFO(p)->owner_env->veid,
++ virt_pid(p));
++#endif
+ return buffer;
+ }
+
+@@ -293,10 +308,27 @@ static inline char *task_cap(struct task
+ cap_t(p->cap_effective));
+ }
+
++#ifdef CONFIG_USER_RESOURCE
++static inline void ub_dump_task_info(struct task_struct *tsk,
++ char *stsk, int ltsk, char *smm, int lmm)
++{
++ print_ub_uid(tsk->task_bc.task_ub, stsk, ltsk);
++ task_lock(tsk);
++ if (tsk->mm)
++ print_ub_uid(tsk->mm->mm_ub, smm, lmm);
++ else
++ strncpy(smm, "N/A", lmm);
++ task_unlock(tsk);
++}
++#endif
++
+ int proc_pid_status(struct task_struct *task, char * buffer)
+ {
+ char * orig = buffer;
+ struct mm_struct *mm = get_task_mm(task);
++#ifdef CONFIG_USER_RESOURCE
++ char tsk_ub_info[64], mm_ub_info[64];
++#endif
+
+ buffer = task_name(task, buffer);
+ buffer = task_state(task, buffer);
+@@ -311,6 +343,14 @@ int proc_pid_status(struct task_struct *
+ #if defined(CONFIG_ARCH_S390)
+ buffer = task_show_regs(task, buffer);
+ #endif
++#ifdef CONFIG_USER_RESOURCE
++ ub_dump_task_info(task,
++ tsk_ub_info, sizeof(tsk_ub_info),
++ mm_ub_info, sizeof(mm_ub_info));
++
++ buffer += sprintf(buffer, "TaskUB:\t%s\n", tsk_ub_info);
++ buffer += sprintf(buffer, "MMUB:\t%s\n", mm_ub_info);
++#endif
+ return buffer - orig;
+ }
+
+@@ -333,6 +373,10 @@ static int do_task_stat(struct task_stru
+ unsigned long it_real_value = 0;
+ struct task_struct *t;
+ char tcomm[sizeof(task->comm)];
++#ifdef CONFIG_USER_RESOURCE
++ char ub_task_info[64];
++ char ub_mm_info[64];
++#endif
+
+ state = *get_task_state(task);
+ vsize = eip = esp = 0;
+@@ -370,11 +414,12 @@ static int do_task_stat(struct task_stru
+ }
+ if (task->signal) {
+ if (task->signal->tty) {
+- tty_pgrp = task->signal->tty->pgrp;
++ tty_pgrp = pid_type_to_vpid(PIDTYPE_PGID,
++ task->signal->tty->pgrp);
+ tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
+ }
+- pgid = process_group(task);
+- sid = task->signal->session;
++ pgid = get_task_pgid(task);
++ sid = get_task_sid(task);
+ cmin_flt = task->signal->cmin_flt;
+ cmaj_flt = task->signal->cmaj_flt;
+ cutime = task->signal->cutime;
+@@ -388,7 +433,7 @@ static int do_task_stat(struct task_stru
+ }
+ it_real_value = task->signal->it_real_value;
+ }
+- ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
++ ppid = get_task_ppid(task);
+ read_unlock(&tasklist_lock);
+
+ if (!whole || num_threads<2)
+@@ -407,14 +452,34 @@ static int do_task_stat(struct task_stru
+
+ /* Temporary variable needed for gcc-2.96 */
+ /* convert timespec -> nsec*/
++#ifndef CONFIG_VE
+ start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
+ + task->start_time.tv_nsec;
++#else
++ start_time = (unsigned long long)(task->start_time.tv_sec -
++ get_exec_env()->init_entry->start_time.tv_sec) *
++ NSEC_PER_SEC + task->start_time.tv_nsec -
++ get_exec_env()->init_entry->start_time.tv_nsec;
++#endif
+ /* convert nsec -> ticks */
+ start_time = nsec_to_clock_t(start_time);
+
++#ifdef CONFIG_USER_RESOURCE
++ ub_dump_task_info(task,
++ ub_task_info, sizeof(ub_task_info),
++ ub_mm_info, sizeof(ub_mm_info));
++#endif
++
+ res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
+ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
+-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
++%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu"
++#ifdef CONFIG_VE
++"0 0 0 0 0 0 0 0 %d %u"
++#endif
++#ifdef CONFIG_USER_RESOURCE
++ " %s %s"
++#endif
++ "\n",
+ task->pid,
+ tcomm,
+ state,
+@@ -459,7 +524,16 @@ static int do_task_stat(struct task_stru
+ task->exit_signal,
+ task_cpu(task),
+ task->rt_priority,
+- task->policy);
++ task->policy
++#ifdef CONFIG_VE
++ , virt_pid(task),
++ VEID(VE_TASK_INFO(task)->owner_env)
++#endif
++#ifdef CONFIG_USER_RESOURCE
++ , ub_task_info,
++ ub_mm_info
++#endif
++ );
+ if(mm)
+ mmput(mm);
+ return res;
+diff -uprN linux-2.6.15.orig/fs/proc/base.c linux-2.6.15-ve025stab014/fs/proc/base.c
+--- linux-2.6.15.orig/fs/proc/base.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/proc/base.c 2006-01-27 14:48:08.000000000 +0300
+@@ -290,22 +290,25 @@ static int proc_fd_link(struct inode *in
+ struct files_struct *files;
+ struct file *file;
+ int fd = proc_type(inode) - PROC_TID_FD_DIR;
++ int err = -ENOENT;
+
+ files = get_files_struct(task);
+ if (files) {
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+- *mnt = mntget(file->f_vfsmnt);
+- *dentry = dget(file->f_dentry);
+- rcu_read_unlock();
+- put_files_struct(files);
+- return 0;
++ if (d_root_check(file->f_dentry, file->f_vfsmnt)) {
++ err = -EACCES;
++ } else {
++ *mnt = mntget(file->f_vfsmnt);
++ *dentry = dget(file->f_dentry);
++ err = 0;
++ }
+ }
+ rcu_read_unlock();
+ put_files_struct(files);
+ }
+- return -ENOENT;
++ return err;
+ }
+
+ static struct fs_struct *get_fs_struct(struct task_struct *task)
+@@ -325,10 +328,12 @@ static int proc_cwd_link(struct inode *i
+ int result = -ENOENT;
+ if (fs) {
+ read_lock(&fs->lock);
+- *mnt = mntget(fs->pwdmnt);
+- *dentry = dget(fs->pwd);
++ result = d_root_check(fs->pwd, fs->pwdmnt);
++ if (!result) {
++ *mnt = mntget(fs->pwdmnt);
++ *dentry = dget(fs->pwd);
++ }
+ read_unlock(&fs->lock);
+- result = 0;
+ put_fs_struct(fs);
+ }
+ return result;
+@@ -1302,6 +1307,10 @@ static struct inode *proc_pid_make_inode
+ struct inode * inode;
+ struct proc_inode *ei;
+
++ if (!ve_accessible(VE_TASK_INFO(task)->owner_env,
++ VE_OWNER_FSTYPE(sb->s_type)))
++ return NULL;
++
+ /* We need a new inode */
+
+ inode = new_inode(sb);
+@@ -1405,6 +1414,10 @@ static void pid_base_iput(struct dentry
+ spin_lock(&task->proc_lock);
+ if (task->proc_dentry == dentry)
+ task->proc_dentry = NULL;
++#ifdef CONFIG_VE
++ if (VE_TASK_INFO(task)->glob_proc_dentry == dentry)
++ VE_TASK_INFO(task)->glob_proc_dentry = NULL;
++#endif
+ spin_unlock(&task->proc_lock);
+ iput(inode);
+ }
+@@ -1878,14 +1891,14 @@ static int proc_self_readlink(struct den
+ int buflen)
+ {
+ char tmp[30];
+- sprintf(tmp, "%d", current->tgid);
++ sprintf(tmp, "%d", get_task_tgid(current));
+ return vfs_readlink(dentry,buffer,buflen,tmp);
+ }
+
+ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+ {
+ char tmp[30];
+- sprintf(tmp, "%d", current->tgid);
++ sprintf(tmp, "%d", get_task_tgid(current));
+ return ERR_PTR(vfs_follow_link(nd,tmp));
+ }
+
+@@ -1910,11 +1923,8 @@ static struct inode_operations proc_self
+ * of PIDTYPE_PID.
+ */
+
+-struct dentry *proc_pid_unhash(struct task_struct *p)
++struct dentry *__proc_pid_unhash(struct task_struct *p, struct dentry *proc_dentry)
+ {
+- struct dentry *proc_dentry;
+-
+- proc_dentry = p->proc_dentry;
+ if (proc_dentry != NULL) {
+
+ spin_lock(&dcache_lock);
+@@ -1932,6 +1942,14 @@ struct dentry *proc_pid_unhash(struct ta
+ return proc_dentry;
+ }
+
++void proc_pid_unhash(struct task_struct *p, struct dentry *pd[2])
++{
++ pd[0] = __proc_pid_unhash(p, p->proc_dentry);
++#ifdef CONFIG_VE
++ pd[1] = __proc_pid_unhash(p, VE_TASK_INFO(p)->glob_proc_dentry);
++#endif
++}
++
+ /**
+ * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
+ * @proc_dentry: directoy to prune.
+@@ -1939,7 +1957,7 @@ struct dentry *proc_pid_unhash(struct ta
+ * Shrink the /proc directory that was used by the just killed thread.
+ */
+
+-void proc_pid_flush(struct dentry *proc_dentry)
++void __proc_pid_flush(struct dentry *proc_dentry)
+ {
+ might_sleep();
+ if(proc_dentry != NULL) {
+@@ -1948,12 +1966,21 @@ void proc_pid_flush(struct dentry *proc_
+ }
+ }
+
++void proc_pid_flush(struct dentry *proc_dentry[2])
++{
++ __proc_pid_flush(proc_dentry[0]);
++#ifdef CONFIG_VE
++ __proc_pid_flush(proc_dentry[1]);
++#endif
++}
++
+ /* SMP-safe */
+ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+ {
+ struct task_struct *task;
+ struct inode *inode;
+ struct proc_inode *ei;
++ struct dentry *pd[2];
+ unsigned tgid;
+ int died;
+
+@@ -1977,7 +2004,19 @@ struct dentry *proc_pid_lookup(struct in
+ goto out;
+
+ read_lock(&tasklist_lock);
+- task = find_task_by_pid(tgid);
++ task = find_task_by_pid_ve(tgid);
++ /* In theory we are allowed to lookup both /proc/VIRT_PID and
++ * /proc/GLOBAL_PID inside VE. However, current /proc implementation
++ * cannot maintain two references to one task, so that we have
++ * to prohibit /proc/GLOBAL_PID.
++ */
++ if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tgid)) {
++ /* However, VE_ENTERed tasks are exception, they use global
++ * pids.
++ */
++ if (virt_pid(task) != tgid)
++ task = NULL;
++ }
+ if (task)
+ get_task_struct(task);
+ read_unlock(&tasklist_lock);
+@@ -2006,16 +2045,23 @@ struct dentry *proc_pid_lookup(struct in
+ died = 0;
+ d_add(dentry, inode);
+ spin_lock(&task->proc_lock);
++#ifdef CONFIG_VE
++ if (ve_is_super(VE_OWNER_FSTYPE(inode->i_sb->s_type)))
++ VE_TASK_INFO(task)->glob_proc_dentry = dentry;
++ else
++ task->proc_dentry = dentry;
++#else
+ task->proc_dentry = dentry;
++#endif
+ if (!pid_alive(task)) {
+- dentry = proc_pid_unhash(task);
++ proc_pid_unhash(task, pd);
+ died = 1;
+ }
+ spin_unlock(&task->proc_lock);
+
+ put_task_struct(task);
+ if (died) {
+- proc_pid_flush(dentry);
++ proc_pid_flush(pd);
+ goto out;
+ }
+ return NULL;
+@@ -2036,7 +2082,12 @@ static struct dentry *proc_task_lookup(s
+ goto out;
+
+ read_lock(&tasklist_lock);
+- task = find_task_by_pid(tid);
++ task = find_task_by_pid_ve(tid);
++ /* See comment above in similar place. */
++ if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tid)) {
++ if (virt_pid(task) != tid)
++ task = NULL;
++ }
+ if (task)
+ get_task_struct(task);
+ read_unlock(&tasklist_lock);
+@@ -2080,7 +2131,8 @@ out:
+ * tasklist lock while doing this, and we must release it before
+ * we actually do the filldir itself, so we use a temp buffer..
+ */
+-static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
++static int get_tgid_list(int index, unsigned long version, unsigned int *tgids,
++ struct ve_struct *ve)
+ {
+ struct task_struct *p;
+ int nr_tgids = 0;
+@@ -2089,7 +2141,11 @@ static int get_tgid_list(int index, unsi
+ read_lock(&tasklist_lock);
+ p = NULL;
+ if (version) {
+- p = find_task_by_pid(version);
++ struct ve_struct *oldve;
++
++ oldve = set_exec_env(ve);
++ p = find_task_by_pid_ve(version);
++ (void)set_exec_env(oldve);
+ if (p && !thread_group_leader(p))
+ p = NULL;
+ }
+@@ -2097,10 +2153,10 @@ static int get_tgid_list(int index, unsi
+ if (p)
+ index = 0;
+ else
+- p = next_task(&init_task);
++ p = __first_task_ve(ve);
+
+- for ( ; p != &init_task; p = next_task(p)) {
+- int tgid = p->pid;
++ for ( ; p != NULL; p = __next_task_ve(ve, p)) {
++ int tgid = get_task_pid_ve(p, ve);
+ if (!pid_alive(p))
+ continue;
+ if (--index >= 0)
+@@ -2133,7 +2189,7 @@ static int get_tid_list(int index, unsig
+ * via next_thread().
+ */
+ if (pid_alive(task)) do {
+- int tid = task->pid;
++ int tid = get_task_pid(task);
+
+ if (--index >= 0)
+ continue;
+@@ -2170,7 +2226,8 @@ int proc_pid_readdir(struct file * filp,
+ next_tgid = filp->f_version;
+ filp->f_version = 0;
+ for (;;) {
+- nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
++ nr_tgids = get_tgid_list(nr, next_tgid, tgid_array,
++ filp->f_dentry->d_sb->s_type->owner_env);
+ if (!nr_tgids) {
+ /* no more entries ! */
+ break;
+diff -uprN linux-2.6.15.orig/fs/proc/generic.c linux-2.6.15-ve025stab014/fs/proc/generic.c
+--- linux-2.6.15.orig/fs/proc/generic.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/proc/generic.c 2006-01-27 14:48:08.000000000 +0300
+@@ -10,7 +10,9 @@
+
+ #include <linux/errno.h>
+ #include <linux/time.h>
++#include <linux/fs.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve_owner.h>
+ #include <linux/stat.h>
+ #include <linux/module.h>
+ #include <linux/mount.h>
+@@ -27,6 +29,8 @@ static ssize_t proc_file_write(struct fi
+ size_t count, loff_t *ppos);
+ static loff_t proc_file_lseek(struct file *, loff_t, int);
+
++static DEFINE_RWLOCK(proc_tree_lock);
++
+ int proc_match(int len, const char *name, struct proc_dir_entry *de)
+ {
+ if (de->namelen != len)
+@@ -227,6 +231,7 @@ proc_file_lseek(struct file *file, loff_
+ return retval;
+ }
+
++#ifndef CONFIG_VE
+ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
+ {
+ struct inode *inode = dentry->d_inode;
+@@ -259,9 +264,12 @@ static int proc_getattr(struct vfsmount
+ generic_fillattr(inode, stat);
+ return 0;
+ }
++#endif
+
+ static struct inode_operations proc_file_inode_operations = {
++#ifndef CONFIG_VE
+ .setattr = proc_notify_change,
++#endif
+ };
+
+ /*
+@@ -269,14 +277,20 @@ static struct inode_operations proc_file
+ * returns the struct proc_dir_entry for "/proc/tty/driver", and
+ * returns "serial" in residual.
+ */
+-static int xlate_proc_name(const char *name,
++static int __xlate_proc_name(struct proc_dir_entry *root, const char *name,
+ struct proc_dir_entry **ret, const char **residual)
+ {
+ const char *cp = name, *next;
+ struct proc_dir_entry *de;
+ int len;
+
+- de = &proc_root;
++ if (*ret) {
++ de_get(*ret);
++ return 0;
++ }
++
++ read_lock(&proc_tree_lock);
++ de = root;
+ while (1) {
+ next = strchr(cp, '/');
+ if (!next)
+@@ -287,15 +301,35 @@ static int xlate_proc_name(const char *n
+ if (proc_match(len, cp, de))
+ break;
+ }
+- if (!de)
++ if (!de) {
++ read_unlock(&proc_tree_lock);
+ return -ENOENT;
++ }
+ cp += len + 1;
+ }
+ *residual = cp;
+- *ret = de;
++ *ret = de_get(de);
++ read_unlock(&proc_tree_lock);
+ return 0;
+ }
+
++#ifndef CONFIG_VE
++#define xlate_proc_loc_name xlate_proc_name
++#else
++static int xlate_proc_loc_name(const char *name,
++ struct proc_dir_entry **ret, const char **residual)
++{
++ return __xlate_proc_name(get_exec_env()->proc_root,
++ name, ret, residual);
++}
++#endif
++
++static int xlate_proc_name(const char *name,
++ struct proc_dir_entry **ret, const char **residual)
++{
++ return __xlate_proc_name(&proc_root, name, ret, residual);
++}
++
+ static DEFINE_IDR(proc_inum_idr);
+ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
+
+@@ -367,6 +401,20 @@ static struct dentry_operations proc_den
+ .d_delete = proc_delete_dentry,
+ };
+
++static struct proc_dir_entry *__proc_lookup(struct proc_dir_entry *dir,
++ struct dentry *d)
++{
++ struct proc_dir_entry *de;
++
++ for (de = dir->subdir; de; de = de->next) {
++ if (de->namelen != d->d_name.len)
++ continue;
++ if (!memcmp(d->d_name.name, de->name, de->namelen))
++ break;
++ }
++ return de_get(de);
++}
++
+ /*
+ * Don't create negative dentries here, return -ENOENT by hand
+ * instead.
+@@ -374,34 +422,147 @@ static struct dentry_operations proc_den
+ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+ {
+ struct inode *inode = NULL;
+- struct proc_dir_entry * de;
++ struct proc_dir_entry *lde, *gde;
+ int error = -ENOENT;
+
+ lock_kernel();
+- de = PDE(dir);
+- if (de) {
+- for (de = de->subdir; de ; de = de->next) {
+- if (de->namelen != dentry->d_name.len)
+- continue;
+- if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
+- unsigned int ino = de->low_ino;
++ lde = LPDE(dir);
+
+- error = -EINVAL;
+- inode = proc_get_inode(dir->i_sb, ino, de);
+- break;
+- }
+- }
+- }
++ if (!lde)
++ goto out;
++
++ read_lock(&proc_tree_lock);
++ lde = __proc_lookup(lde, dentry);
++#ifdef CONFIG_VE
++ gde = GPDE(dir);
++ if (gde)
++ gde = __proc_lookup(gde, dentry);
++#else
++ gde = NULL;
++#endif
++ read_unlock(&proc_tree_lock);
++
++ /*
++ * There are following possible cases after lookup:
++ *
++ * lde gde
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * NULL NULL ENOENT
++ * loc NULL found in local tree
++ * loc glob found in both trees
++ * NULL glob found in global tree
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ *
++ * We initialized inode as follows after lookup:
++ *
++ * inode->lde inode->gde
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * loc NULL in local tree
++ * loc glob both trees
++ * glob glob global tree
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * i.e. inode->lde is always initialized
++ */
++
++ if (lde == NULL && gde == NULL)
++ goto out;
++
++ if (lde != NULL)
++ inode = proc_get_inode(dir->i_sb, lde->low_ino, lde);
++ else
++ inode = proc_get_inode(dir->i_sb, gde->low_ino, gde);
++
++ /*
++ * We can sleep in proc_get_inode(), but since we have i_sem
++ * being taken, no one can setup GPDE/LPDE on this inode.
++ */
++ if (!inode)
++ goto out_put;
++
++#ifdef CONFIG_VE
++ GPDE(inode) = de_get(gde);
++ if (gde)
++ __module_get(gde->owner);
++
++ /* if dentry is found in both trees and it is a directory
++ * then inode's nlink count must be altered, because local
++ * and global subtrees may differ.
++ * on the other hand, they may intersect, so actual nlink
++ * value is difficult to calculate - upper estimate is used
++ * instead of it.
++ * dentry found in global tree only must not be writable
++ * in non-super ve.
++ */
++ if (lde && gde && lde != gde && gde->nlink > 1)
++ inode->i_nlink += gde->nlink - 2;
++ if (lde == NULL && !ve_is_super(
++ VE_OWNER_FSTYPE(dir->i_sb->s_type)))
++ inode->i_mode &= ~S_IWUGO;
++#endif
+ unlock_kernel();
++ dentry->d_op = &proc_dentry_operations;
++ d_add(dentry, inode);
++ de_put(lde);
++ de_put(gde);
++ return NULL;
+
+- if (inode) {
+- dentry->d_op = &proc_dentry_operations;
+- d_add(dentry, inode);
+- return NULL;
+- }
++out_put:
++ de_put(lde);
++ de_put(gde);
++out:
++ unlock_kernel();
+ return ERR_PTR(error);
+ }
+
++struct proc_dir_reader {
++ struct list_head list;
++ struct proc_dir_entry *next;
++};
++
++static LIST_HEAD(proc_dir_readers);
++static DEFINE_SPINLOCK(proc_dir_readers_lock);
++
++static inline void add_reader(struct proc_dir_reader *r,
++ struct proc_dir_entry *cur)
++{
++ r->next = cur->next;
++ spin_lock(&proc_dir_readers_lock);
++ list_add(&r->list, &proc_dir_readers);
++ spin_unlock(&proc_dir_readers_lock);
++}
++
++static inline struct proc_dir_entry *del_reader(struct proc_dir_reader *r)
++{
++ spin_lock(&proc_dir_readers_lock);
++ list_del(&r->list);
++ spin_unlock(&proc_dir_readers_lock);
++ return r->next;
++}
++
++static void notify_readers(struct proc_dir_entry *de)
++{
++ struct proc_dir_reader *r;
++
++ /* lockless since proc_tree_lock is taken for writing */
++ list_for_each_entry(r, &proc_dir_readers, list)
++ if (r->next == de)
++ r->next = de->next;
++}
++
++static inline int in_tree(struct proc_dir_entry *de, struct proc_dir_entry *dir)
++{
++ struct proc_dir_entry *gde;
++
++ for (gde = dir->subdir; gde; gde = gde->next) {
++ if (de->namelen != gde->namelen)
++ continue;
++ if (memcmp(de->name, gde->name, gde->namelen))
++ continue;
++ return 1;
++ }
++ return 0;
++}
++
+ /*
+ * This returns non-zero if at EOF, so that the /proc
+ * root directory can use this and check if it should
+@@ -419,6 +580,7 @@ int proc_readdir(struct file * filp,
+ int i;
+ struct inode *inode = filp->f_dentry->d_inode;
+ int ret = 0;
++ struct proc_dir_reader this;
+
+ lock_kernel();
+
+@@ -445,13 +607,12 @@ int proc_readdir(struct file * filp,
+ filp->f_pos++;
+ /* fall through */
+ default:
++ read_lock(&proc_tree_lock);
+ de = de->subdir;
+ i -= 2;
+ for (;;) {
+- if (!de) {
+- ret = 1;
+- goto out;
+- }
++ if (!de)
++ goto chk_global;
+ if (!i)
+ break;
+ de = de->next;
+@@ -459,12 +620,60 @@ int proc_readdir(struct file * filp,
+ }
+
+ do {
+- if (filldir(dirent, de->name, de->namelen, filp->f_pos,
+- de->low_ino, de->mode >> 12) < 0)
++ de_get(de);
++ add_reader(&this, de);
++ read_unlock(&proc_tree_lock);
++ ret = filldir(dirent, de->name, de->namelen,
++ filp->f_pos, de->low_ino,
++ de->mode >> 12);
++ read_lock(&proc_tree_lock);
++ de_put(de);
++ de = del_reader(&this);
++ if (ret < 0) {
++ read_unlock(&proc_tree_lock);
++ ret = 0;
+ goto out;
++ }
+ filp->f_pos++;
+- de = de->next;
+ } while (de);
++chk_global:
++#ifdef CONFIG_VE
++ de = GPDE(inode);
++ if (de == NULL)
++ goto done;
++
++ de = de->subdir;
++ while (de) {
++ if (in_tree(de, LPDE(inode))) {
++ de = de->next;
++ continue;
++ }
++
++ if (i > 0) {
++ i--;
++ de = de->next;
++ continue;
++ }
++
++ de_get(de);
++ add_reader(&this, de);
++ read_unlock(&proc_tree_lock);
++ ret = filldir(dirent, de->name, de->namelen,
++ filp->f_pos, de->low_ino,
++ de->mode >> 12);
++ read_lock(&proc_tree_lock);
++ de_put(de);
++ de = del_reader(&this);
++ if (ret < 0) {
++ read_unlock(&proc_tree_lock);
++ ret = 0;
++ goto out;
++ }
++ filp->f_pos++;
++ }
++done:
++#endif
++ read_unlock(&proc_tree_lock);
+ }
+ ret = 1;
+ out: unlock_kernel();
+@@ -486,8 +695,10 @@ static struct file_operations proc_dir_o
+ */
+ static struct inode_operations proc_dir_inode_operations = {
+ .lookup = proc_lookup,
++#ifndef CONFIG_VE
+ .getattr = proc_getattr,
+ .setattr = proc_notify_change,
++#endif
+ };
+
+ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
+@@ -497,10 +708,20 @@ static int proc_register(struct proc_dir
+ i = get_inode_number();
+ if (i == 0)
+ return -EAGAIN;
++
++ write_lock(&proc_tree_lock);
++ if (dir->deleted) {
++ write_unlock(&proc_tree_lock);
++ release_inode_number(i);
++ return -ENOENT;
++ }
++
+ dp->low_ino = i;
+ dp->next = dir->subdir;
+- dp->parent = dir;
++ dp->parent = de_get(dir);
+ dir->subdir = dp;
++ write_unlock(&proc_tree_lock);
++
+ if (S_ISDIR(dp->mode)) {
+ if (dp->proc_iops == NULL) {
+ dp->proc_fops = &proc_dir_operations;
+@@ -554,24 +775,26 @@ static struct proc_dir_entry *proc_creat
+ mode_t mode,
+ nlink_t nlink)
+ {
+- struct proc_dir_entry *ent = NULL;
++ struct proc_dir_entry *ent;
+ const char *fn = name;
+ int len;
+
+ /* make sure name is valid */
+- if (!name || !strlen(name)) goto out;
++ if (!name || !strlen(name))
++ goto out;
+
+- if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
++ if (xlate_proc_loc_name(name, parent, &fn) != 0)
+ goto out;
+
+ /* At this point there must not be any '/' characters beyond *fn */
+ if (strchr(fn, '/'))
+- goto out;
++ goto out_put;
+
+ len = strlen(fn);
+
+ ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
+- if (!ent) goto out;
++ if (!ent)
++ goto out_put;
+
+ memset(ent, 0, sizeof(struct proc_dir_entry));
+ memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
+@@ -579,8 +802,13 @@ static struct proc_dir_entry *proc_creat
+ ent->namelen = len;
+ ent->mode = mode;
+ ent->nlink = nlink;
+- out:
++ atomic_set(&ent->count, 1);
+ return ent;
++
++out_put:
++ de_put(*parent);
++out:
++ return NULL;
+ }
+
+ struct proc_dir_entry *proc_symlink(const char *name,
+@@ -604,6 +832,7 @@ struct proc_dir_entry *proc_symlink(cons
+ kfree(ent);
+ ent = NULL;
+ }
++ de_put(parent);
+ }
+ return ent;
+ }
+@@ -622,6 +851,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
+ kfree(ent);
+ ent = NULL;
+ }
++ de_put(parent);
+ }
+ return ent;
+ }
+@@ -660,9 +890,28 @@ struct proc_dir_entry *create_proc_entry
+ kfree(ent);
+ ent = NULL;
+ }
++ de_put(parent);
+ }
+ return ent;
+ }
++EXPORT_SYMBOL(remove_proc_glob_entry);
++
++struct proc_dir_entry *create_proc_glob_entry(const char *name, mode_t mode,
++ struct proc_dir_entry *parent)
++{
++ const char *path;
++ struct proc_dir_entry *ent;
++
++ path = name;
++ if (xlate_proc_name(path, &parent, &name) != 0)
++ return NULL;
++
++ ent = create_proc_entry(name, mode, parent);
++ de_put(parent);
++ return ent;
++}
++
++EXPORT_SYMBOL(create_proc_glob_entry);
+
+ void free_proc_entry(struct proc_dir_entry *de)
+ {
+@@ -682,20 +931,21 @@ void free_proc_entry(struct proc_dir_ent
+ * Remove a /proc entry and free it if it's not currently in use.
+ * If it is in use, we set the 'deleted' flag.
+ */
+-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
++static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+ {
+ struct proc_dir_entry **p;
+ struct proc_dir_entry *de;
+ const char *fn = name;
+ int len;
+
+- if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
+- goto out;
+ len = strlen(fn);
++ write_lock(&proc_tree_lock);
+ for (p = &parent->subdir; *p; p=&(*p)->next ) {
+ if (!proc_match(len, fn, *p))
+ continue;
++
+ de = *p;
++ notify_readers(de);
+ *p = de->next;
+ de->next = NULL;
+ if (S_ISDIR(de->mode))
+@@ -703,15 +953,43 @@ void remove_proc_entry(const char *name,
+ proc_kill_inodes(de);
+ de->nlink = 0;
+ WARN_ON(de->subdir);
+- if (!atomic_read(&de->count))
+- free_proc_entry(de);
+- else {
+- de->deleted = 1;
+- printk("remove_proc_entry: %s/%s busy, count=%d\n",
+- parent->name, de->name, atomic_read(&de->count));
+- }
++ de->deleted = 1;
++ de_put(de);
++ de_put(parent);
+ break;
+ }
+-out:
+- return;
++ write_unlock(&proc_tree_lock);
++}
++
++void remove_proc_loc_entry(const char *name, struct proc_dir_entry *parent)
++{
++ const char *path;
++
++ path = name;
++ if (xlate_proc_loc_name(path, &parent, &name) != 0)
++ return;
++
++ __remove_proc_entry(name, parent);
++ de_put(parent);
++}
++
++void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent)
++{
++ const char *path;
++
++ path = name;
++ if (xlate_proc_name(path, &parent, &name) != 0)
++ return;
++
++ __remove_proc_entry(name, parent);
++ de_put(parent);
++}
++
++void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
++{
++ remove_proc_loc_entry(name, parent);
++#ifdef CONFIG_VE
++ if (ve_is_super(get_exec_env()))
++ remove_proc_glob_entry(name, parent);
++#endif
+ }
+diff -uprN linux-2.6.15.orig/fs/proc/inode.c linux-2.6.15-ve025stab014/fs/proc/inode.c
+--- linux-2.6.15.orig/fs/proc/inode.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/proc/inode.c 2006-01-27 14:48:08.000000000 +0300
+@@ -8,6 +8,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
++#include <linux/ve_owner.h>
+ #include <linux/string.h>
+ #include <linux/stat.h>
+ #include <linux/file.h>
+@@ -21,34 +22,25 @@
+
+ extern void free_proc_entry(struct proc_dir_entry *);
+
+-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
+-{
+- if (de)
+- atomic_inc(&de->count);
+- return de;
+-}
+-
+ /*
+ * Decrements the use count and checks for deferred deletion.
+ */
+-static void de_put(struct proc_dir_entry *de)
++void de_put(struct proc_dir_entry *de)
+ {
+ if (de) {
+- lock_kernel();
+ if (!atomic_read(&de->count)) {
+ printk("de_put: entry %s already free!\n", de->name);
+- unlock_kernel();
+ return;
+ }
+
+ if (atomic_dec_and_test(&de->count)) {
+- if (de->deleted) {
+- printk("de_put: deferred delete of %s\n",
++ if (unlikely(!de->deleted)) {
++ printk("de_put: early delete of %s\n",
+ de->name);
+- free_proc_entry(de);
++ return;
+ }
++ free_proc_entry(de);
+ }
+- unlock_kernel();
+ }
+ }
+
+@@ -68,12 +60,19 @@ static void proc_delete_inode(struct ino
+ put_task_struct(tsk);
+
+ /* Let go of any associated proc directory entry */
+- de = PROC_I(inode)->pde;
++ de = LPDE(inode);
+ if (de) {
+ if (de->owner)
+ module_put(de->owner);
+ de_put(de);
+ }
++#ifdef CONFIG_VE
++ de = GPDE(inode);
++ if (de) {
++ module_put(de->owner);
++ de_put(de);
++ }
++#endif
+ clear_inode(inode);
+ }
+
+@@ -100,6 +99,9 @@ static struct inode *proc_alloc_inode(st
+ ei->pde = NULL;
+ inode = &ei->vfs_inode;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++#ifdef CONFIG_VE
++ GPDE(inode) = NULL;
++#endif
+ return inode;
+ }
+
+@@ -213,6 +215,12 @@ int proc_fill_super(struct super_block *
+ s->s_root = d_alloc_root(root_inode);
+ if (!s->s_root)
+ goto out_no_root;
++#ifdef CONFIG_VE
++ LPDE(root_inode) = de_get(get_exec_env()->proc_root);
++ GPDE(root_inode) = &proc_root;
++#else
++ LPDE(root_inode) = &proc_root;
++#endif
+ return 0;
+
+ out_no_root:
+diff -uprN linux-2.6.15.orig/fs/proc/proc_misc.c linux-2.6.15-ve025stab014/fs/proc/proc_misc.c
+--- linux-2.6.15.orig/fs/proc/proc_misc.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/proc/proc_misc.c 2006-01-27 14:48:08.000000000 +0300
+@@ -31,6 +31,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
+ #include <linux/slab.h>
++#include <linux/virtinfo.h>
+ #include <linux/smp.h>
+ #include <linux/signal.h>
+ #include <linux/module.h>
+@@ -52,8 +53,10 @@
+ #include <asm/div64.h>
+ #include "internal.h"
+
+-#define LOAD_INT(x) ((x) >> FSHIFT)
+-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
++#ifdef CONFIG_FAIRSCHED
++#include <linux/fairsched.h>
++#endif
++
+ /*
+ * Warning: stuff below (imported functions) assumes that its output will fit
+ * into one page. For some of those functions it may be wrong. Moreover, we
+@@ -84,15 +87,33 @@ static int loadavg_read_proc(char *page,
+ {
+ int a, b, c;
+ int len;
+-
+- a = avenrun[0] + (FIXED_1/200);
+- b = avenrun[1] + (FIXED_1/200);
+- c = avenrun[2] + (FIXED_1/200);
++ unsigned long __nr_running;
++ int __nr_threads;
++ unsigned long *__avenrun;
++ struct ve_struct *ve;
++
++ ve = get_exec_env();
++
++ if (ve_is_super(ve)) {
++ __avenrun = &avenrun[0];
++ __nr_running = nr_running();
++ __nr_threads = nr_threads;
++ }
++#ifdef CONFIG_VE
++ else {
++ __avenrun = &ve->avenrun[0];
++ __nr_running = nr_running_ve(ve);
++ __nr_threads = atomic_read(&ve->pcounter);
++ }
++#endif
++ a = __avenrun[0] + (FIXED_1/200);
++ b = __avenrun[1] + (FIXED_1/200);
++ c = __avenrun[2] + (FIXED_1/200);
+ len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+ LOAD_INT(a), LOAD_FRAC(a),
+ LOAD_INT(b), LOAD_FRAC(b),
+ LOAD_INT(c), LOAD_FRAC(c),
+- nr_running(), nr_threads, last_pid);
++ __nr_running, __nr_threads, last_pid);
+ return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+
+@@ -105,6 +126,13 @@ static int uptime_read_proc(char *page,
+ cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
+
+ do_posix_clock_monotonic_gettime(&uptime);
++#ifdef CONFIG_VE
++ if (!ve_is_super(get_exec_env())) {
++ set_normalized_timespec(&uptime,
++ uptime.tv_sec - get_exec_env()->start_timespec.tv_sec,
++ uptime.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
++ }
++#endif
+ cputime_to_timespec(idletime, &idle);
+ len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
+ (unsigned long) uptime.tv_sec,
+@@ -118,35 +146,37 @@ static int uptime_read_proc(char *page,
+ static int meminfo_read_proc(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ {
+- struct sysinfo i;
++ struct meminfo mi;
+ int len;
+- struct page_state ps;
+- unsigned long inactive;
+- unsigned long active;
+- unsigned long free;
+- unsigned long committed;
+- unsigned long allowed;
++ unsigned long dummy;
+ struct vmalloc_info vmi;
+- long cached;
+
+- get_page_state(&ps);
+- get_zone_counts(&active, &inactive, &free);
++ get_page_state(&mi.ps);
++ get_zone_counts(&mi.active, &mi.inactive, &dummy);
+
+ /*
+ * display in kilobytes.
+ */
+ #define K(x) ((x) << (PAGE_SHIFT - 10))
+- si_meminfo(&i);
+- si_swapinfo(&i);
+- committed = atomic_read(&vm_committed_space);
+- allowed = ((totalram_pages - hugetlb_total_pages())
+- * sysctl_overcommit_ratio / 100) + total_swap_pages;
++ si_meminfo(&mi.si);
++ si_swapinfo(&mi.si);
++ mi.committed_space = atomic_read(&vm_committed_space);
++ mi.swapcache = total_swapcache_pages;
++ mi.cache = get_page_cache_size() - mi.swapcache - mi.si.bufferram;
++ if (mi.cache < 0)
++ mi.cache = 0;
+
+- cached = get_page_cache_size() - total_swapcache_pages - i.bufferram;
+- if (cached < 0)
+- cached = 0;
++ mi.vmalloc_total = (VMALLOC_END - VMALLOC_START) >> PAGE_SHIFT;
++ mi.allowed = ((totalram_pages - hugetlb_total_pages())
++ * sysctl_overcommit_ratio / 100) + total_swap_pages;
+
+ get_vmalloc_info(&vmi);
++ mi.vmalloc_used = vmi.used >> PAGE_SHIFT;
++ mi.vmalloc_largest = vmi.largest_chunk >> PAGE_SHIFT;
++
++ if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
++ & NOTIFY_FAIL)
++ return -ENOMSG;
+
+ /*
+ * Tagged format, for easy grepping and expansion.
+@@ -175,29 +205,29 @@ static int meminfo_read_proc(char *page,
+ "VmallocTotal: %8lu kB\n"
+ "VmallocUsed: %8lu kB\n"
+ "VmallocChunk: %8lu kB\n",
+- K(i.totalram),
+- K(i.freeram),
+- K(i.bufferram),
+- K(cached),
+- K(total_swapcache_pages),
+- K(active),
+- K(inactive),
+- K(i.totalhigh),
+- K(i.freehigh),
+- K(i.totalram-i.totalhigh),
+- K(i.freeram-i.freehigh),
+- K(i.totalswap),
+- K(i.freeswap),
+- K(ps.nr_dirty),
+- K(ps.nr_writeback),
+- K(ps.nr_mapped),
+- K(ps.nr_slab),
+- K(allowed),
+- K(committed),
+- K(ps.nr_page_table_pages),
+- (unsigned long)VMALLOC_TOTAL >> 10,
+- vmi.used >> 10,
+- vmi.largest_chunk >> 10
++ K(mi.si.totalram),
++ K(mi.si.freeram),
++ K(mi.si.bufferram),
++ K(mi.cache),
++ K(mi.swapcache),
++ K(mi.active),
++ K(mi.inactive),
++ K(mi.si.totalhigh),
++ K(mi.si.freehigh),
++ K(mi.si.totalram-mi.si.totalhigh),
++ K(mi.si.freeram-mi.si.freehigh),
++ K(mi.si.totalswap),
++ K(mi.si.freeswap),
++ K(mi.ps.nr_dirty),
++ K(mi.ps.nr_writeback),
++ K(mi.ps.nr_mapped),
++ K(mi.ps.nr_slab),
++ K(mi.allowed),
++ K(mi.committed_space),
++ K(mi.ps.nr_page_table_pages),
++ K(mi.vmalloc_total),
++ K(mi.vmalloc_used),
++ K(mi.vmalloc_largest)
+ );
+
+ len += hugetlb_report_meminfo(page + len);
+@@ -337,18 +367,15 @@ static struct file_operations proc_slabi
+ .release = seq_release,
+ };
+
+-static int show_stat(struct seq_file *p, void *v)
++static void show_stat_ve0(struct seq_file *p)
+ {
+ int i;
+- unsigned long jif;
++ struct page_state page_state;
+ cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
+ u64 sum = 0;
+
+ user = nice = system = idle = iowait =
+ irq = softirq = steal = cputime64_zero;
+- jif = - wall_to_monotonic.tv_sec;
+- if (wall_to_monotonic.tv_nsec)
+- --jif;
+
+ for_each_cpu(i) {
+ int j;
+@@ -402,9 +429,84 @@ static int show_stat(struct seq_file *p,
+ for (i = 0; i < NR_IRQS; i++)
+ seq_printf(p, " %u", kstat_irqs(i));
+ #endif
++ get_full_page_state(&page_state);
++ seq_printf(p, "\nswap %lu %lu\n", page_state.pswpin, page_state.pswpout);
++}
++
++#ifdef CONFIG_VE
++static void show_stat_ve(struct seq_file *p, struct ve_struct *env)
++{
++ int i;
++ u64 user, nice, system;
++ cycles_t idle, iowait;
++ cpumask_t ve_cpus;
++
++ ve_cpu_online_map(env, &ve_cpus);
++
++ user = nice = system = idle = iowait = 0;
++ for_each_cpu_mask(i, ve_cpus) {
++ user += VE_CPU_STATS(env, i)->user;
++ nice += VE_CPU_STATS(env, i)->nice;
++ system += VE_CPU_STATS(env, i)->system;
++ idle += ve_sched_get_idle_time(env, i);
++ iowait += ve_sched_get_iowait_time(env, i);
++ }
++
++ seq_printf(p, "cpu %llu %llu %llu %llu %llu 0 0 0\n",
++ (unsigned long long)cputime64_to_clock_t(user),
++ (unsigned long long)cputime64_to_clock_t(nice),
++ (unsigned long long)cputime64_to_clock_t(system),
++ (unsigned long long)cycles_to_clocks(idle),
++ (unsigned long long)cycles_to_clocks(iowait));
++
++ for_each_cpu_mask(i, ve_cpus) {
++ user = VE_CPU_STATS(env, i)->user;
++ nice = VE_CPU_STATS(env, i)->nice;
++ system = VE_CPU_STATS(env, i)->system;
++ idle = ve_sched_get_idle_time(env, i);
++ iowait = ve_sched_get_iowait_time(env, i);
++ seq_printf(p, "cpu%d %llu %llu %llu %llu %llu 0 0 0\n",
++ i,
++ (unsigned long long)cputime64_to_clock_t(user),
++ (unsigned long long)cputime64_to_clock_t(nice),
++ (unsigned long long)cputime64_to_clock_t(system),
++ (unsigned long long)cycles_to_clocks(idle),
++ (unsigned long long)cycles_to_clocks(iowait));
++ }
++ seq_printf(p, "intr 0\nswap 0 0\n");
++}
++#endif
++
++int show_stat(struct seq_file *p, void *v)
++{
++ extern unsigned long total_forks;
++ unsigned long seq, jif;
++ struct ve_struct *env;
++ unsigned long __nr_running, __nr_iowait;
++
++ do {
++ seq = read_seqbegin(&xtime_lock);
++ jif = - wall_to_monotonic.tv_sec;
++ if (wall_to_monotonic.tv_nsec)
++ --jif;
++ } while (read_seqretry(&xtime_lock, seq));
++
++ env = get_exec_env();
++ if (ve_is_super(env)) {
++ show_stat_ve0(p);
++ __nr_running = nr_running();
++ __nr_iowait = nr_iowait();
++ }
++#ifdef CONFIG_VE
++ else {
++ show_stat_ve(p, env);
++ __nr_running = nr_running_ve(env);
++ __nr_iowait = nr_iowait_ve(env);
++ }
++#endif
+
+ seq_printf(p,
+- "\nctxt %llu\n"
++ "ctxt %llu\n"
+ "btime %lu\n"
+ "processes %lu\n"
+ "procs_running %lu\n"
+@@ -412,8 +514,8 @@ static int show_stat(struct seq_file *p,
+ nr_context_switches(),
+ (unsigned long)jif,
+ total_forks,
+- nr_running(),
+- nr_iowait());
++ __nr_running,
++ __nr_iowait);
+
+ return 0;
+ }
+@@ -510,7 +612,8 @@ static int cmdline_read_proc(char *page,
+ {
+ int len;
+
+- len = sprintf(page, "%s\n", saved_command_line);
++ len = sprintf(page, "%s\n",
++ ve_is_super(get_exec_env()) ? saved_command_line : "");
+ return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+
+diff -uprN linux-2.6.15.orig/fs/proc/proc_tty.c linux-2.6.15-ve025stab014/fs/proc/proc_tty.c
+--- linux-2.6.15.orig/fs/proc/proc_tty.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/proc/proc_tty.c 2006-01-27 14:48:08.000000000 +0300
+@@ -6,6 +6,7 @@
+
+ #include <asm/uaccess.h>
+
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/errno.h>
+ #include <linux/time.h>
+@@ -106,24 +107,35 @@ static int show_tty_driver(struct seq_fi
+ /* iterator */
+ static void *t_start(struct seq_file *m, loff_t *pos)
+ {
+- struct list_head *p;
++ struct tty_driver *drv;
++
+ loff_t l = *pos;
+- list_for_each(p, &tty_drivers)
++ read_lock(&tty_driver_guard);
++ list_for_each_entry(drv, &tty_drivers, tty_drivers) {
++ if (!ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
++ continue;
+ if (!l--)
+- return list_entry(p, struct tty_driver, tty_drivers);
++ return drv;
++ }
+ return NULL;
+ }
+
+ static void *t_next(struct seq_file *m, void *v, loff_t *pos)
+ {
+- struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
++ struct tty_driver *drv;
++
+ (*pos)++;
+- return p==&tty_drivers ? NULL :
+- list_entry(p, struct tty_driver, tty_drivers);
++ drv = (struct tty_driver *)v;
++ list_for_each_entry_continue(drv, &tty_drivers, tty_drivers) {
++ if (ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
++ return drv;
++ }
++ return NULL;
+ }
+
+ static void t_stop(struct seq_file *m, void *v)
+ {
++ read_unlock(&tty_driver_guard);
+ }
+
+ static struct seq_operations tty_drivers_op = {
+diff -uprN linux-2.6.15.orig/fs/proc/root.c linux-2.6.15-ve025stab014/fs/proc/root.c
+--- linux-2.6.15.orig/fs/proc/root.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/proc/root.c 2006-01-27 14:48:08.000000000 +0300
+@@ -18,7 +18,10 @@
+ #include <linux/bitops.h>
+ #include <linux/smp_lock.h>
+
+-struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
++#ifndef CONFIG_VE
++struct proc_dir_entry *proc_net, *proc_net_stat;
++#endif
++struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
+
+ #ifdef CONFIG_SYSCTL
+ struct proc_dir_entry *proc_sys_root;
+@@ -36,6 +39,8 @@ static struct file_system_type proc_fs_t
+ .kill_sb = kill_anon_super,
+ };
+
++EXPORT_SYMBOL(proc_fs_type);
++
+ extern int __init proc_init_inodecache(void);
+ void __init proc_root_init(void)
+ {
+@@ -155,7 +160,9 @@ EXPORT_SYMBOL(create_proc_entry);
+ EXPORT_SYMBOL(remove_proc_entry);
+ EXPORT_SYMBOL(proc_root);
+ EXPORT_SYMBOL(proc_root_fs);
++#ifndef CONFIG_VE
+ EXPORT_SYMBOL(proc_net);
+ EXPORT_SYMBOL(proc_net_stat);
++#endif
+ EXPORT_SYMBOL(proc_bus);
+ EXPORT_SYMBOL(proc_root_driver);
+diff -uprN linux-2.6.15.orig/fs/proc/task_mmu.c linux-2.6.15-ve025stab014/fs/proc/task_mmu.c
+--- linux-2.6.15.orig/fs/proc/task_mmu.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/proc/task_mmu.c 2006-01-27 14:48:08.000000000 +0300
+@@ -90,9 +90,12 @@ int proc_exe_link(struct inode *inode, s
+ }
+
+ if (vma) {
+- *mnt = mntget(vma->vm_file->f_vfsmnt);
+- *dentry = dget(vma->vm_file->f_dentry);
+- result = 0;
++ result = d_root_check(vma->vm_file->f_dentry,
++ vma->vm_file->f_vfsmnt);
++ if (!result) {
++ *mnt = mntget(vma->vm_file->f_vfsmnt);
++ *dentry = dget(vma->vm_file->f_dentry);
++ }
+ }
+
+ up_read(&mm->mmap_sem);
+diff -uprN linux-2.6.15.orig/fs/proc/task_nommu.c linux-2.6.15-ve025stab014/fs/proc/task_nommu.c
+--- linux-2.6.15.orig/fs/proc/task_nommu.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/proc/task_nommu.c 2006-01-27 14:48:08.000000000 +0300
+@@ -126,9 +126,12 @@ int proc_exe_link(struct inode *inode, s
+ }
+
+ if (vma) {
+- *mnt = mntget(vma->vm_file->f_vfsmnt);
+- *dentry = dget(vma->vm_file->f_dentry);
+- result = 0;
++ result = d_root_check(vma->vm_file->f_dentry,
++ vma->vm_file->f_vfsmnt);
++ if (!result) {
++ *mnt = mntget(vma->vm_file->f_vfsmnt);
++ *dentry = dget(vma->vm_file->f_dentry);
++ }
+ }
+
+ up_read(&mm->mmap_sem);
+diff -uprN linux-2.6.15.orig/fs/quota.c linux-2.6.15-ve025stab014/fs/quota.c
+--- linux-2.6.15.orig/fs/quota.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/quota.c 2006-01-27 14:48:08.000000000 +0300
+@@ -80,11 +80,11 @@ static int generic_quotactl_valid(struct
+ if (cmd == Q_GETQUOTA) {
+ if (((type == USRQUOTA && current->euid != id) ||
+ (type == GRPQUOTA && !in_egroup_p(id))) &&
+- !capable(CAP_SYS_ADMIN))
++ !capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ }
+ else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+
+ return 0;
+@@ -131,10 +131,10 @@ static int xqm_quotactl_valid(struct sup
+ if (cmd == Q_XGETQUOTA) {
+ if (((type == XQM_USRQUOTA && current->euid != id) ||
+ (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
+- !capable(CAP_SYS_ADMIN))
++ !capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ }
+
+@@ -215,7 +215,7 @@ restart:
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+ down_read(&sb->s_umount);
+- if (sb->s_root && sb->s_qcop->quota_sync)
++ if (sb->s_root && sb->s_qcop && sb->s_qcop->quota_sync)
+ quota_sync_sb(sb, type);
+ up_read(&sb->s_umount);
+ spin_lock(&sb_lock);
+@@ -357,7 +357,7 @@ asmlinkage long sys_quotactl(unsigned in
+ tmp = getname(special);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+- bdev = lookup_bdev(tmp);
++ bdev = lookup_bdev(tmp, FMODE_QUOTACTL);
+ putname(tmp);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
+diff -uprN linux-2.6.15.orig/fs/reiserfs/namei.c linux-2.6.15-ve025stab014/fs/reiserfs/namei.c
+--- linux-2.6.15.orig/fs/reiserfs/namei.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/reiserfs/namei.c 2006-01-27 14:48:08.000000000 +0300
+@@ -868,6 +868,9 @@ static int reiserfs_rmdir(struct inode *
+ INITIALIZE_PATH(path);
+ struct reiserfs_dir_entry de;
+
++ inode = dentry->d_inode;
++ DQUOT_INIT(inode);
++
+ /* we will be doing 2 balancings and update 2 stat data, we change quotas
+ * of the owner of the directory and of the owner of the parent directory.
+ * The quota structure is possibly deleted only on last iput => outside
+@@ -892,8 +895,6 @@ static int reiserfs_rmdir(struct inode *
+ goto end_rmdir;
+ }
+
+- inode = dentry->d_inode;
+-
+ reiserfs_update_inode_transaction(inode);
+ reiserfs_update_inode_transaction(dir);
+
+@@ -956,6 +957,7 @@ static int reiserfs_unlink(struct inode
+ unsigned long savelink;
+
+ inode = dentry->d_inode;
++ DQUOT_INIT(inode);
+
+ /* in this transaction we can be doing at max two balancings and update
+ * two stat datas, we change quotas of the owner of the directory and of
+@@ -1263,6 +1265,8 @@ static int reiserfs_rename(struct inode
+
+ old_inode = old_dentry->d_inode;
+ new_dentry_inode = new_dentry->d_inode;
++ if (new_dentry_inode)
++ DQUOT_INIT(new_dentry_inode);
+
+ // make sure, that oldname still exists and points to an object we
+ // are going to rename
+diff -uprN linux-2.6.15.orig/fs/select.c linux-2.6.15-ve025stab014/fs/select.c
+--- linux-2.6.15.orig/fs/select.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/select.c 2006-01-27 14:48:05.000000000 +0300
+@@ -24,6 +24,8 @@
+ #include <linux/fs.h>
+ #include <linux/rcupdate.h>
+
++#include <ub/ub_mem.h>
++
+ #include <asm/uaccess.h>
+
+ #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
+@@ -276,7 +278,7 @@ int do_select(int n, fd_set_bits *fds, l
+
+ static void *select_bits_alloc(int size)
+ {
+- return kmalloc(6 * size, GFP_KERNEL);
++ return ub_kmalloc(6 * size, GFP_KERNEL);
+ }
+
+ static void select_bits_free(void *bits, int size)
+@@ -498,7 +500,7 @@ asmlinkage long sys_poll(struct pollfd _
+ err = -ENOMEM;
+ while(i!=0) {
+ struct poll_list *pp;
+- pp = kmalloc(sizeof(struct poll_list)+
++ pp = ub_kmalloc(sizeof(struct poll_list)+
+ sizeof(struct pollfd)*
+ (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
+ GFP_KERNEL);
+diff -uprN linux-2.6.15.orig/fs/seq_file.c linux-2.6.15-ve025stab014/fs/seq_file.c
+--- linux-2.6.15.orig/fs/seq_file.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/seq_file.c 2006-01-27 14:48:08.000000000 +0300
+@@ -345,6 +345,8 @@ int seq_path(struct seq_file *m,
+ if (m->count < m->size) {
+ char *s = m->buf + m->count;
+ char *p = d_path(dentry, mnt, s, m->size - m->count);
++ if (IS_ERR(p) && PTR_ERR(p) != -ENAMETOOLONG)
++ return 0;
+ if (!IS_ERR(p)) {
+ while (s <= p) {
+ char c = *p++;
+diff -uprN linux-2.6.15.orig/fs/simfs.c linux-2.6.15-ve025stab014/fs/simfs.c
+--- linux-2.6.15.orig/fs/simfs.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/simfs.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,319 @@
++/*
++ * fs/simfs.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/init.h>
++#include <linux/namei.h>
++#include <linux/err.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/vzquota.h>
++#include <linux/statfs.h>
++#include <linux/virtinfo.h>
++#include <linux/faudit.h>
++#include <linux/genhd.h>
++
++#include <asm/unistd.h>
++#include <asm/uaccess.h>
++
++#define SIMFS_GET_LOWER_FS_SB(sb) sb->s_root->d_sb
++
++static struct super_operations sim_super_ops;
++
++static int sim_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ struct super_block *sb;
++ struct inode *inode;
++
++ inode = dentry->d_inode;
++ if (!inode->i_op->getattr) {
++ generic_fillattr(inode, stat);
++ if (!stat->blksize) {
++ unsigned blocks;
++
++ sb = inode->i_sb;
++ blocks = (stat->size + sb->s_blocksize-1) >>
++ sb->s_blocksize_bits;
++ stat->blocks = (sb->s_blocksize / 512) * blocks;
++ stat->blksize = sb->s_blocksize;
++ }
++ } else {
++ int err;
++
++ err = inode->i_op->getattr(mnt, dentry, stat);
++ if (err)
++ return err;
++ }
++
++ sb = mnt->mnt_sb;
++ if (sb->s_op == &sim_super_ops)
++ stat->dev = sb->s_dev;
++ return 0;
++}
++
++static void quota_get_stat(struct super_block *sb, struct kstatfs *buf)
++{
++ int err;
++ struct dq_stat qstat;
++ struct virt_info_quota q;
++ long free_file, adj_file;
++ s64 blk, free_blk, adj_blk;
++ int bsize_bits;
++
++ q.super = sb;
++ q.qstat = &qstat;
++ err = virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_GETSTAT, &q);
++ if (err != NOTIFY_OK)
++ return;
++
++ bsize_bits = ffs(buf->f_bsize) - 1;
++ free_blk = (s64)(qstat.bsoftlimit - qstat.bcurrent) >> bsize_bits;
++ if (free_blk < 0)
++ free_blk = 0;
++ /*
++ * In the regular case, we always set buf->f_bfree and buf->f_blocks to
++ * the values reported by quota. In case of real disk space shortage,
++ * we adjust the values. We want this adjustment to look as if the
++ * total disk space were reduced, not as if the usage were increased.
++ * -- SAW
++ */
++ adj_blk = 0;
++ if (buf->f_bfree < free_blk)
++ adj_blk = free_blk - buf->f_bfree;
++ buf->f_bfree = (long)(free_blk - adj_blk);
++
++ if (free_blk < buf->f_bavail)
++ buf->f_bavail = (long)free_blk; /* min(f_bavail, free_blk) */
++
++ blk = (qstat.bsoftlimit >> bsize_bits) - adj_blk;
++ buf->f_blocks = blk > LONG_MAX ? LONG_MAX : blk;
++
++ free_file = qstat.isoftlimit - qstat.icurrent;
++ if (free_file < 0)
++ free_file = 0;
++ if (buf->f_ffree == -1)
++ /*
++ * One filesystem uses -1 to represent the fact that it doesn't
++ * have a detached limit for inode number.
++ * May be, because -1 is a good pretendent for the maximum value
++ * of signed long type, may be, because it's just nice to have
++ * an exceptional case... Guess what that filesystem is :-)
++ * -- SAW
++ */
++ buf->f_ffree = free_file;
++ adj_file = 0;
++ if (buf->f_ffree < free_file)
++ adj_file = free_file - buf->f_ffree;
++ buf->f_ffree = free_file - adj_file;
++ buf->f_files = qstat.isoftlimit - adj_file;
++}
++
++static int sim_statfs(struct super_block *sb, struct statfs *buf)
++{
++ int err;
++ struct super_block *lsb;
++ struct kstatfs statbuf;
++
++ err = 0;
++ if (sb->s_op != &sim_super_ops)
++ goto out;
++
++ lsb = SIMFS_GET_LOWER_FS_SB(sb);
++
++ err = -ENOSYS;
++ if (lsb && lsb->s_op && lsb->s_op->statfs)
++ err = lsb->s_op->statfs(lsb, &statbuf);
++ if (err)
++ goto out;
++
++ quota_get_stat(sb, &statbuf);
++
++ buf->f_files = statbuf.f_files;
++ buf->f_ffree = statbuf.f_ffree;
++ buf->f_blocks = statbuf.f_blocks;
++ buf->f_bfree = statbuf.f_bfree;
++ buf->f_bavail = statbuf.f_bavail;
++out:
++ return err;
++}
++
++static int sim_statfs64(struct super_block *sb, struct statfs64 *buf)
++{
++ int err;
++ struct super_block *lsb;
++ struct kstatfs statbuf;
++
++ err = 0;
++ if (sb->s_op != &sim_super_ops)
++ goto out;
++
++ lsb = SIMFS_GET_LOWER_FS_SB(sb);
++
++ err = -ENOSYS;
++ if (lsb && lsb->s_op && lsb->s_op->statfs)
++ err = lsb->s_op->statfs(lsb, &statbuf);
++ if (err)
++ goto out;
++
++ quota_get_stat(sb, &statbuf);
++
++ buf->f_files = (__u64)statbuf.f_files;
++ buf->f_ffree = (__u64)statbuf.f_ffree;
++ buf->f_blocks = (__u64)statbuf.f_blocks;
++ buf->f_bfree = (__u64)statbuf.f_bfree;
++ buf->f_bavail = (__u64)statbuf.f_bavail;
++out:
++ return err;
++}
++
++static int sim_systemcall(struct vnotifier_block *me, unsigned long n,
++ void *d, int old_ret)
++{
++ int err;
++ struct faudit_stat_arg *arg;
++
++ arg = (struct faudit_stat_arg *)d;
++ switch (n) {
++ case VIRTINFO_FAUDIT_STAT:
++ err = sim_getattr(arg->mnt, arg->dentry,
++ (struct kstat *)arg->stat);
++ break;
++ case VIRTINFO_FAUDIT_STATFS:
++ err = sim_statfs(arg->mnt->mnt_sb,
++ (struct statfs *)arg->stat);
++ break;
++ case VIRTINFO_FAUDIT_STATFS64:
++ err = sim_statfs64(arg->mnt->mnt_sb,
++ (struct statfs64 *)arg->stat);
++ break;
++ default:
++ return old_ret;
++ }
++ arg->err = err;
++ return (err ? NOTIFY_BAD : NOTIFY_OK);
++}
++
++static struct inode *sim_quota_root(struct super_block *sb)
++{
++ return sb->s_root->d_inode;
++}
++
++void sim_put_super(struct super_block *sb)
++{
++ struct virt_info_quota viq;
++
++ viq.super = sb;
++ virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_OFF, &viq);
++ bdput(sb->s_bdev);
++}
++
++static struct super_operations sim_super_ops = {
++ .get_quota_root = sim_quota_root,
++ .put_super = sim_put_super,
++};
++
++static int sim_fill_super(struct super_block *s, void *data)
++{
++ int err;
++ struct nameidata *nd;
++
++ err = set_anon_super(s, NULL);
++ if (err)
++ goto out;
++
++ err = 0;
++ nd = (struct nameidata *)data;
++ s->s_root = dget(nd->dentry);
++ s->s_op = &sim_super_ops;
++out:
++ return err;
++}
++
++struct super_block *sim_get_sb(struct file_system_type *type,
++ int flags, const char *dev_name, void *opt)
++{
++ int err;
++ struct nameidata nd;
++ struct super_block *sb;
++ struct block_device *bd;
++ struct virt_info_quota viq;
++ static struct hd_struct fake_hds;
++
++ sb = ERR_PTR(-EINVAL);
++ if (opt == NULL)
++ goto out;
++
++ err = path_lookup(opt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
++ sb = ERR_PTR(err);
++ if (err)
++ goto out;
++
++ sb = sget(type, NULL, sim_fill_super, &nd);
++ if (IS_ERR(sb))
++ goto out_path;
++
++ bd = bdget(sb->s_dev);
++ if (!bd)
++ goto out_killsb;
++
++ sb->s_bdev = bd;
++ bd->bd_part = &fake_hds;
++ viq.super = sb;
++ virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_ON, &viq);
++out_path:
++ path_release(&nd);
++out:
++ return sb;
++
++out_killsb:
++ up_write(&sb->s_umount);
++ deactivate_super(sb);
++ sb = ERR_PTR(-ENODEV);
++ goto out_path;
++}
++
++static struct file_system_type sim_fs_type = {
++ .owner = THIS_MODULE,
++ .name = "simfs",
++ .get_sb = sim_get_sb,
++ .kill_sb = kill_anon_super,
++};
++
++static struct vnotifier_block sim_syscalls = {
++ .notifier_call = sim_systemcall,
++};
++
++static int __init init_simfs(void)
++{
++ int err;
++
++ err = register_filesystem(&sim_fs_type);
++ if (err)
++ return err;
++
++ virtinfo_notifier_register(VITYPE_FAUDIT, &sim_syscalls);
++ return 0;
++}
++
++static void __exit exit_simfs(void)
++{
++ virtinfo_notifier_unregister(VITYPE_FAUDIT, &sim_syscalls);
++ unregister_filesystem(&sim_fs_type);
++}
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Open Virtuozzo Simulation of File System");
++MODULE_LICENSE("GPL v2");
++
++module_init(init_simfs);
++module_exit(exit_simfs);
+diff -uprN linux-2.6.15.orig/fs/stat.c linux-2.6.15-ve025stab014/fs/stat.c
+--- linux-2.6.15.orig/fs/stat.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/stat.c 2006-01-27 14:48:07.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/namei.h>
+ #include <linux/security.h>
+ #include <linux/syscalls.h>
++#include <linux/faudit.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -42,11 +43,19 @@ int vfs_getattr(struct vfsmount *mnt, st
+ {
+ struct inode *inode = dentry->d_inode;
+ int retval;
++ struct faudit_stat_arg arg;
+
+ retval = security_inode_getattr(mnt, dentry);
+ if (retval)
+ return retval;
+
++ arg.mnt = mnt;
++ arg.dentry = dentry;
++ arg.stat = stat;
++ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT, &arg)
++ != NOTIFY_DONE)
++ return arg.err;
++
+ if (inode->i_op->getattr)
+ return inode->i_op->getattr(mnt, dentry, stat);
+
+diff -uprN linux-2.6.15.orig/fs/super.c linux-2.6.15-ve025stab014/fs/super.c
+--- linux-2.6.15.orig/fs/super.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/super.c 2006-01-27 14:48:08.000000000 +0300
+@@ -23,6 +23,7 @@
+ #include <linux/config.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/smp_lock.h>
+ #include <linux/acct.h>
+@@ -69,6 +70,7 @@ static struct super_block *alloc_super(v
+ INIT_LIST_HEAD(&s->s_io);
+ INIT_LIST_HEAD(&s->s_files);
+ INIT_LIST_HEAD(&s->s_instances);
++ INIT_LIST_HEAD(&s->s_dshrinkers);
+ INIT_HLIST_HEAD(&s->s_anon);
+ INIT_LIST_HEAD(&s->s_inodes);
+ init_rwsem(&s->s_umount);
+@@ -231,8 +233,9 @@ void generic_shutdown_super(struct super
+ if (root) {
+ sb->s_root = NULL;
+ shrink_dcache_parent(root);
+- shrink_dcache_anon(&sb->s_anon);
++ shrink_dcache_anon(sb);
+ dput(root);
++ dcache_shrinker_wait_sb(sb);
+ fsync_super(sb);
+ lock_super(sb);
+ sb->s_flags &= ~MS_ACTIVE;
+@@ -480,11 +483,20 @@ asmlinkage long sys_ustat(unsigned dev,
+ struct super_block *s;
+ struct ustat tmp;
+ struct kstatfs sbuf;
+- int err = -EINVAL;
++ dev_t kdev;
++ int err;
++
++ kdev = new_decode_dev(dev);
++#ifdef CONFIG_VE
++ err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
++ if (err)
++ goto out;
++#endif
+
+- s = user_get_super(new_decode_dev(dev));
+- if (s == NULL)
+- goto out;
++ err = -EINVAL;
++ s = user_get_super(kdev);
++ if (s == NULL)
++ goto out;
+ err = vfs_statfs(s, &sbuf);
+ drop_super(s);
+ if (err)
+@@ -598,6 +610,13 @@ void emergency_remount(void)
+ static struct idr unnamed_dev_idr;
+ static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
+
++/* for compatibility with coreutils still unaware of new minor sizes */
++int unnamed_dev_majors[] = {
++ 0, 144, 145, 146, 242, 243, 244, 245,
++ 246, 247, 248, 249, 250, 251, 252, 253
++};
++EXPORT_SYMBOL(unnamed_dev_majors);
++
+ int set_anon_super(struct super_block *s, void *data)
+ {
+ int dev;
+@@ -615,13 +634,13 @@ int set_anon_super(struct super_block *s
+ else if (error)
+ return -EAGAIN;
+
+- if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
++ if ((dev & MAX_ID_MASK) >= (1 << MINORBITS)) {
+ spin_lock(&unnamed_dev_lock);
+ idr_remove(&unnamed_dev_idr, dev);
+ spin_unlock(&unnamed_dev_lock);
+ return -EMFILE;
+ }
+- s->s_dev = MKDEV(0, dev & MINORMASK);
++ s->s_dev = make_unnamed_dev(dev);
+ return 0;
+ }
+
+@@ -629,8 +648,9 @@ EXPORT_SYMBOL(set_anon_super);
+
+ void kill_anon_super(struct super_block *sb)
+ {
+- int slot = MINOR(sb->s_dev);
++ int slot;
+
++ slot = unnamed_dev_idx(sb->s_dev);
+ generic_shutdown_super(sb);
+ spin_lock(&unnamed_dev_lock);
+ idr_remove(&unnamed_dev_idr, slot);
+diff -uprN linux-2.6.15.orig/fs/sysfs/bin.c linux-2.6.15-ve025stab014/fs/sysfs/bin.c
+--- linux-2.6.15.orig/fs/sysfs/bin.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/sysfs/bin.c 2006-01-27 14:48:08.000000000 +0300
+@@ -120,6 +120,9 @@ static int open(struct inode * inode, st
+ struct bin_attribute * attr = to_bin_attr(file->f_dentry);
+ int error = -EINVAL;
+
++ if (!ve_sysfs_alowed())
++ return 0;
++
+ if (!kobj || !attr)
+ goto Done;
+
+@@ -196,6 +199,9 @@ int sysfs_create_bin_file(struct kobject
+
+ int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+ {
++ if (!ve_sysfs_alowed())
++ return 0;
++
+ sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
+ return 0;
+ }
+diff -uprN linux-2.6.15.orig/fs/sysfs/dir.c linux-2.6.15-ve025stab014/fs/sysfs/dir.c
+--- linux-2.6.15.orig/fs/sysfs/dir.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/sysfs/dir.c 2006-01-27 14:48:08.000000000 +0300
+@@ -140,6 +140,9 @@ int sysfs_create_dir(struct kobject * ko
+ struct dentry * parent;
+ int error = 0;
+
++ if (!ve_sysfs_alowed())
++ return 0;
++
+ BUG_ON(!kobj);
+
+ if (kobj->parent)
+@@ -274,10 +277,14 @@ void sysfs_remove_subdir(struct dentry *
+
+ void sysfs_remove_dir(struct kobject * kobj)
+ {
+- struct dentry * dentry = dget(kobj->dentry);
++ struct dentry * dentry;
+ struct sysfs_dirent * parent_sd;
+ struct sysfs_dirent * sd, * tmp;
+
++ if (!ve_sysfs_alowed())
++ return;
++
++ dentry = dget(kobj->dentry);
+ if (!dentry)
+ return;
+
+@@ -305,6 +312,9 @@ int sysfs_rename_dir(struct kobject * ko
+ int error = 0;
+ struct dentry * new_dentry, * parent;
+
++ if (!ve_sysfs_alowed())
++ return 0;
++
+ if (!strcmp(kobject_name(kobj), new_name))
+ return -EINVAL;
+
+diff -uprN linux-2.6.15.orig/fs/sysfs/file.c linux-2.6.15-ve025stab014/fs/sysfs/file.c
+--- linux-2.6.15.orig/fs/sysfs/file.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/sysfs/file.c 2006-01-27 14:48:08.000000000 +0300
+@@ -380,6 +380,9 @@ int sysfs_add_file(struct dentry * dir,
+
+ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
+ {
++ if (!ve_sysfs_alowed())
++ return 0;
++
+ BUG_ON(!kobj || !kobj->dentry || !attr);
+
+ return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
+@@ -398,6 +401,9 @@ int sysfs_update_file(struct kobject * k
+ struct dentry * victim;
+ int res = -ENOENT;
+
++ if (!ve_sysfs_alowed())
++ return 0;
++
+ down(&dir->d_inode->i_sem);
+ victim = lookup_one_len(attr->name, dir, strlen(attr->name));
+ if (!IS_ERR(victim)) {
+@@ -473,6 +479,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
+
+ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
+ {
++ if (!ve_sysfs_alowed())
++ return;
++
+ sysfs_hash_and_remove(kobj->dentry,attr->name);
+ }
+
+diff -uprN linux-2.6.15.orig/fs/sysfs/group.c linux-2.6.15-ve025stab014/fs/sysfs/group.c
+--- linux-2.6.15.orig/fs/sysfs/group.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/sysfs/group.c 2006-01-27 14:48:08.000000000 +0300
+@@ -46,6 +46,9 @@ int sysfs_create_group(struct kobject *
+ struct dentry * dir;
+ int error;
+
++ if (!ve_sysfs_alowed())
++ return 0;
++
+ BUG_ON(!kobj || !kobj->dentry);
+
+ if (grp->name) {
+@@ -68,6 +71,9 @@ void sysfs_remove_group(struct kobject *
+ {
+ struct dentry * dir;
+
++ if (!ve_sysfs_alowed())
++ return;
++
+ if (grp->name)
+ dir = lookup_one_len(grp->name, kobj->dentry,
+ strlen(grp->name));
+diff -uprN linux-2.6.15.orig/fs/sysfs/inode.c linux-2.6.15-ve025stab014/fs/sysfs/inode.c
+--- linux-2.6.15.orig/fs/sysfs/inode.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/sysfs/inode.c 2006-01-27 14:48:08.000000000 +0300
+@@ -8,13 +8,12 @@
+
+ #undef DEBUG
+
++#include <linux/config.h>
+ #include <linux/pagemap.h>
+ #include <linux/namei.h>
+ #include <linux/backing-dev.h>
+ #include "sysfs.h"
+
+-extern struct super_block * sysfs_sb;
+-
+ static struct address_space_operations sysfs_aops = {
+ .readpage = simple_readpage,
+ .prepare_write = simple_prepare_write,
+diff -uprN linux-2.6.15.orig/fs/sysfs/mount.c linux-2.6.15-ve025stab014/fs/sysfs/mount.c
+--- linux-2.6.15.orig/fs/sysfs/mount.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/sysfs/mount.c 2006-01-27 14:48:08.000000000 +0300
+@@ -7,6 +7,7 @@
+ #include <linux/fs.h>
+ #include <linux/mount.h>
+ #include <linux/pagemap.h>
++#include <linux/module.h>
+ #include <linux/init.h>
+
+ #include "sysfs.h"
+@@ -14,8 +15,11 @@
+ /* Random magic number */
+ #define SYSFS_MAGIC 0x62656572
+
++#ifndef CONFIG_VE_SYSFS
+ struct vfsmount *sysfs_mount;
+ struct super_block * sysfs_sb = NULL;
++#endif
++
+ kmem_cache_t *sysfs_dir_cachep;
+
+ static struct super_operations sysfs_ops = {
+@@ -78,6 +82,8 @@ static struct file_system_type sysfs_fs_
+ .kill_sb = kill_litter_super,
+ };
+
++EXPORT_SYMBOL(sysfs_fs_type);
++
+ int __init sysfs_init(void)
+ {
+ int err = -ENOMEM;
+diff -uprN linux-2.6.15.orig/fs/sysfs/symlink.c linux-2.6.15-ve025stab014/fs/sysfs/symlink.c
+--- linux-2.6.15.orig/fs/sysfs/symlink.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/sysfs/symlink.c 2006-01-27 14:48:08.000000000 +0300
+@@ -86,6 +86,9 @@ int sysfs_create_link(struct kobject * k
+
+ BUG_ON(!kobj || !kobj->dentry || !name);
+
++ if (!ve_sysfs_alowed())
++ return 0;
++
+ down(&dentry->d_inode->i_sem);
+ error = sysfs_add_link(dentry, name, target);
+ up(&dentry->d_inode->i_sem);
+@@ -101,6 +104,9 @@ int sysfs_create_link(struct kobject * k
+
+ void sysfs_remove_link(struct kobject * kobj, const char * name)
+ {
++ if(!ve_sysfs_alowed())
++ return;
++
+ sysfs_hash_and_remove(kobj->dentry,name);
+ }
+
+diff -uprN linux-2.6.15.orig/fs/sysfs/sysfs.h linux-2.6.15-ve025stab014/fs/sysfs/sysfs.h
+--- linux-2.6.15.orig/fs/sysfs/sysfs.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/sysfs/sysfs.h 2006-01-27 14:48:08.000000000 +0300
+@@ -1,5 +1,14 @@
+
+-extern struct vfsmount * sysfs_mount;
++#ifndef CONFIG_VE_SYSFS
++extern struct vfsmount *sysfs_mount;
++extern struct super_block *sysfs_sb;
++#define ve_sysfs_alowed() (ve_is_super(get_exec_env()))
++#else
++#define sysfs_mount (get_exec_env()->sysfs_mnt)
++#define sysfs_sb (get_exec_env()->sysfs_sb)
++#define ve_sysfs_alowed() (1)
++#endif
++
+ extern kmem_cache_t *sysfs_dir_cachep;
+
+ extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
+@@ -19,7 +28,6 @@ extern void sysfs_drop_dentry(struct sys
+ extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+
+ extern struct rw_semaphore sysfs_rename_sem;
+-extern struct super_block * sysfs_sb;
+ extern struct file_operations sysfs_dir_operations;
+ extern struct file_operations sysfs_file_operations;
+ extern struct file_operations bin_fops;
+diff -uprN linux-2.6.15.orig/fs/vzdq_file.c linux-2.6.15-ve025stab014/fs/vzdq_file.c
+--- linux-2.6.15.orig/fs/vzdq_file.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/vzdq_file.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,852 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo quota files as proc entry implementation.
++ * It is required for std quota tools to work correctly as they are expecting
++ * aquota.user and aquota.group files.
++ */
++
++#include <linux/ctype.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <linux/sysctl.h>
++#include <linux/mount.h>
++#include <linux/namespace.h>
++#include <linux/quotaio_v2.h>
++#include <asm/uaccess.h>
++
++#include <linux/ve.h>
++#include <linux/ve_proto.h>
++#include <linux/vzdq_tree.h>
++#include <linux/vzquota.h>
++
++/* ----------------------------------------------------------------------
++ *
++ * File read operation
++ *
++ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
++ * perhaps) abuse vz_quota_sem.
++ * Taking a global semaphore for lengthy and user-controlled operations inside
++ * VPSs is not a good idea in general.
++ * In this case, the reasons for taking this semaphore are completely unclear,
++ * especially taking into account that the only function that has comments
++ * about the necessity to be called under this semaphore
++ * (create_proc_quotafile) is actually called OUTSIDE it.
++ *
++ * --------------------------------------------------------------------- */
++
++#define DQBLOCK_SIZE 1024
++#define DQUOTBLKNUM 21U
++#define DQTREE_DEPTH 4
++#define TREENUM_2_BLKNUM(num) (((num) + 1) << 1)
++#define ISINDBLOCK(num) ((num)%2 != 0)
++#define FIRST_DATABLK 2 /* first even number */
++#define LAST_IND_LEVEL (DQTREE_DEPTH - 1)
++#define CONVERT_LEVEL(level) ((level) * (QUOTAID_EBITS/QUOTAID_BBITS))
++#define GETLEVINDX(ind, lev) (((ind) >> QUOTAID_BBITS*(lev)) \
++ & QUOTATREE_BMASK)
++
++#if (QUOTAID_EBITS / QUOTAID_BBITS) != (QUOTATREE_DEPTH / DQTREE_DEPTH)
++#error xBITS and DQTREE_DEPTH does not correspond
++#endif
++
++#define BLOCK_NOT_FOUND 1
++
++/* data for quota file -- one per proc entry */
++struct quotatree_data {
++ struct list_head list;
++ struct vz_quota_master *qmblk;
++ int type; /* type of the tree */
++};
++
++/* serialized by vz_quota_sem */
++static LIST_HEAD(qf_data_head);
++
++static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
++static const u_int32_t vzquota_versions[] = V2_INITQVERSIONS;
++
++static inline loff_t get_depoff(int depth)
++{
++ loff_t res = 1;
++ while (depth) {
++ res += (1 << ((depth - 1)*QUOTAID_EBITS + 1));
++ depth--;
++ }
++ return res;
++}
++
++static inline loff_t get_blknum(loff_t num, int depth)
++{
++ loff_t res;
++ res = (num << 1) + get_depoff(depth);
++ return res;
++}
++
++static int get_depth(loff_t num)
++{
++ int i;
++ for (i = 0; i < DQTREE_DEPTH; i++) {
++ if (num >= get_depoff(i) && (i == DQTREE_DEPTH - 1
++ || num < get_depoff(i + 1)))
++ return i;
++ }
++ return -1;
++}
++
++static inline loff_t get_offset(loff_t num)
++{
++ loff_t res, tmp;
++
++ tmp = get_depth(num);
++ if (tmp < 0)
++ return -1;
++ num -= get_depoff(tmp);
++ BUG_ON(num < 0);
++ res = num >> 1;
++
++ return res;
++}
++
++static inline loff_t get_quot_blk_num(struct quotatree_tree *tree, int level)
++{
++ /* return maximum available block num */
++ return tree->levels[level].freenum;
++}
++
++static inline loff_t get_block_num(struct quotatree_tree *tree)
++{
++ loff_t ind_blk_num, quot_blk_num, max_ind, max_quot;
++
++ quot_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH) - 1);
++ max_quot = TREENUM_2_BLKNUM(quot_blk_num);
++ ind_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH - 1));
++ max_ind = (quot_blk_num) ? get_blknum(ind_blk_num, LAST_IND_LEVEL)
++ : get_blknum(ind_blk_num, 0);
++
++ return (max_ind > max_quot) ? max_ind + 1 : max_quot + 1;
++}
++
++/* Write quota file header */
++static int read_header(void *buf, struct quotatree_tree *tree,
++ struct dq_info *dq_ugid_info, int type)
++{
++ struct v2_disk_dqheader *dqh;
++ struct v2_disk_dqinfo *dq_disk_info;
++
++ dqh = buf;
++ dq_disk_info = buf + sizeof(struct v2_disk_dqheader);
++
++ dqh->dqh_magic = vzquota_magics[type];
++ dqh->dqh_version = vzquota_versions[type];
++
++ dq_disk_info->dqi_bgrace = dq_ugid_info[type].bexpire;
++ dq_disk_info->dqi_igrace = dq_ugid_info[type].iexpire;
++ dq_disk_info->dqi_flags = 0; /* no flags */
++ dq_disk_info->dqi_blocks = get_block_num(tree);
++ dq_disk_info->dqi_free_blk = 0; /* first block in the file */
++ dq_disk_info->dqi_free_entry = FIRST_DATABLK;
++
++ return 0;
++}
++
++static int get_block_child(int depth, struct quotatree_node *p, u_int32_t *buf)
++{
++ int i, j, lev_num;
++
++ lev_num = QUOTATREE_DEPTH/DQTREE_DEPTH - 1;
++ for (i = 0; i < BLOCK_SIZE/sizeof(u_int32_t); i++) {
++ struct quotatree_node *next, *parent;
++
++ parent = p;
++ next = p;
++ for (j = lev_num; j >= 0; j--) {
++ if (!next->blocks[GETLEVINDX(i,j)]) {
++ buf[i] = 0;
++ goto bad_branch;
++ }
++ parent = next;
++ next = next->blocks[GETLEVINDX(i,j)];
++ }
++ buf[i] = (depth == DQTREE_DEPTH - 1) ?
++ TREENUM_2_BLKNUM(parent->num)
++ : get_blknum(next->num, depth + 1);
++
++ bad_branch:
++ ;
++ }
++
++ return 0;
++}
++
++/*
++ * Write index block to disk (or buffer)
++ * @buf has length 256*sizeof(u_int32_t) bytes
++ */
++static int read_index_block(int num, u_int32_t *buf,
++ struct quotatree_tree *tree)
++{
++ struct quotatree_node *p;
++ u_int32_t index;
++ loff_t off;
++ int depth, res;
++
++ res = BLOCK_NOT_FOUND;
++ index = 0;
++ depth = get_depth(num);
++ off = get_offset(num);
++ if (depth < 0 || off < 0)
++ return -EINVAL;
++
++ list_for_each_entry(p, &tree->levels[CONVERT_LEVEL(depth)].usedlh,
++ list) {
++ if (p->num >= off)
++ res = 0;
++ if (p->num != off)
++ continue;
++ get_block_child(depth, p, buf);
++ break;
++ }
++
++ return res;
++}
++
++static inline void convert_quot_format(struct v2_disk_dqblk *dq,
++ struct vz_quota_ugid *vzq)
++{
++ dq->dqb_id = vzq->qugid_id;
++ dq->dqb_ihardlimit = vzq->qugid_stat.ihardlimit;
++ dq->dqb_isoftlimit = vzq->qugid_stat.isoftlimit;
++ dq->dqb_curinodes = vzq->qugid_stat.icurrent;
++ dq->dqb_bhardlimit = vzq->qugid_stat.bhardlimit / QUOTABLOCK_SIZE;
++ dq->dqb_bsoftlimit = vzq->qugid_stat.bsoftlimit / QUOTABLOCK_SIZE;
++ dq->dqb_curspace = vzq->qugid_stat.bcurrent;
++ dq->dqb_btime = vzq->qugid_stat.btime;
++ dq->dqb_itime = vzq->qugid_stat.itime;
++}
++
++static int read_dquot(loff_t num, void *buf, struct quotatree_tree *tree)
++{
++ int res, i, entries = 0;
++ struct v2_disk_dqdbheader *dq_header;
++ struct quotatree_node *p;
++ struct v2_disk_dqblk *blk = buf + sizeof(struct v2_disk_dqdbheader);
++
++ res = BLOCK_NOT_FOUND;
++ dq_header = buf;
++ memset(dq_header, 0, sizeof(*dq_header));
++
++ list_for_each_entry(p, &(tree->levels[QUOTATREE_DEPTH - 1].usedlh),
++ list) {
++ if (TREENUM_2_BLKNUM(p->num) >= num)
++ res = 0;
++ if (TREENUM_2_BLKNUM(p->num) != num)
++ continue;
++
++ for (i = 0; i < QUOTATREE_BSIZE; i++) {
++ if (!p->blocks[i])
++ continue;
++ convert_quot_format(blk + entries,
++ (struct vz_quota_ugid *)p->blocks[i]);
++ entries++;
++ res = 0;
++ }
++ break;
++ }
++ dq_header->dqdh_entries = entries;
++
++ return res;
++}
++
++static int read_block(int num, void *buf, struct quotatree_tree *tree,
++ struct dq_info *dq_ugid_info, int magic)
++{
++ int res;
++
++ memset(buf, 0, DQBLOCK_SIZE);
++ if (!num)
++ res = read_header(buf, tree, dq_ugid_info, magic);
++ else if (ISINDBLOCK(num))
++ res = read_index_block(num, (u_int32_t*)buf, tree);
++ else
++ res = read_dquot(num, buf, tree);
++
++ return res;
++}
++
++/*
++ * FIXME: this function can handle quota files up to 2GB only.
++ */
++static int read_proc_quotafile(char *page, char **start, off_t off, int count,
++ int *eof, void *data)
++{
++ off_t blk_num, blk_off, buf_off;
++ char *tmp;
++ size_t buf_size;
++ struct quotatree_data *qtd;
++ struct quotatree_tree *tree;
++ struct dq_info *dqi;
++ int res;
++
++ qtd = data;
++ down(&vz_quota_sem);
++ down(&qtd->qmblk->dq_sem);
++
++ res = 0;
++ tree = QUGID_TREE(qtd->qmblk, qtd->type);
++ if (!tree) {
++ *eof = 1;
++ goto out_dq;
++ }
++
++ res = -ENOMEM;
++ tmp = kmalloc(DQBLOCK_SIZE, GFP_KERNEL);
++ if (!tmp)
++ goto out_dq;
++
++ dqi = &qtd->qmblk->dq_ugid_info[qtd->type];
++
++ buf_off = 0;
++ buf_size = count;
++ blk_num = off / DQBLOCK_SIZE;
++ blk_off = off % DQBLOCK_SIZE;
++
++ while (buf_size > 0) {
++ off_t len;
++
++ len = min((size_t)(DQBLOCK_SIZE-blk_off), buf_size);
++ res = read_block(blk_num, tmp, tree, dqi, qtd->type);
++ if (res < 0)
++ goto out_err;
++ if (res == BLOCK_NOT_FOUND) {
++ *eof = 1;
++ break;
++ }
++ memcpy(page + buf_off, tmp + blk_off, len);
++
++ blk_num++;
++ buf_size -= len;
++ blk_off = 0;
++ buf_off += len;
++ }
++ res = buf_off;
++
++out_err:
++ kfree(tmp);
++ *start = NULL + count;
++out_dq:
++ up(&qtd->qmblk->dq_sem);
++ up(&vz_quota_sem);
++
++ return res;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota/QID/aquota.* files
++ *
++ * FIXME: this code lacks serialization of read/readdir/lseek.
++ * However, this problem should be fixed after the mainstream issue of what
++ * appears to be non-atomic read and update of file position in sys_read.
++ *
++ * --------------------------------------------------------------------- */
++
++static inline unsigned long vzdq_aquot_getino(dev_t dev)
++{
++ return 0xec000000UL + dev;
++}
++
++static inline dev_t vzdq_aquot_getidev(struct inode *inode)
++{
++ return (dev_t)(unsigned long)PROC_I(inode)->op.proc_get_link;
++}
++
++static inline void vzdq_aquot_setidev(struct inode *inode, dev_t dev)
++{
++ PROC_I(inode)->op.proc_get_link = (void *)(unsigned long)dev;
++}
++
++static ssize_t vzdq_aquotf_read(struct file *file,
++ char __user *buf, size_t size, loff_t *ppos)
++{
++ char *page;
++ size_t bufsize;
++ ssize_t l, l2, copied;
++ char *start;
++ struct inode *inode;
++ struct block_device *bdev;
++ struct super_block *sb;
++ struct quotatree_data data;
++ int eof, err;
++
++ err = -ENOMEM;
++ page = (char *)__get_free_page(GFP_KERNEL);
++ if (page == NULL)
++ goto out_err;
++
++ err = -ENODEV;
++ inode = file->f_dentry->d_inode;
++ bdev = bdget(vzdq_aquot_getidev(inode));
++ if (bdev == NULL)
++ goto out_err;
++ sb = get_super(bdev);
++ bdput(bdev);
++ if (sb == NULL)
++ goto out_err;
++ data.qmblk = vzquota_find_qmblk(sb);
++ data.type = PROC_I(inode)->type - 1;
++ drop_super(sb);
++ if (data.qmblk == NULL || data.qmblk == VZ_QUOTA_BAD)
++ goto out_err;
++
++ copied = 0;
++ l = l2 = 0;
++ while (1) {
++ bufsize = min(size, (size_t)PAGE_SIZE);
++ if (bufsize <= 0)
++ break;
++
++ l = read_proc_quotafile(page, &start, *ppos, bufsize,
++ &eof, &data);
++ if (l <= 0)
++ break;
++
++ l2 = copy_to_user(buf, page, l);
++ copied += l - l2;
++ if (l2)
++ break;
++
++ buf += l;
++ size -= l;
++ *ppos += (unsigned long)start;
++ l = l2 = 0;
++ }
++
++ qmblk_put(data.qmblk);
++ free_page((unsigned long)page);
++ if (copied)
++ return copied;
++ else if (l2) /* last copy_to_user failed */
++ return -EFAULT;
++ else /* read error or EOF */
++ return l;
++
++out_err:
++ if (page != NULL)
++ free_page((unsigned long)page);
++ return err;
++}
++
++static struct file_operations vzdq_aquotf_file_operations = {
++ .read = &vzdq_aquotf_read,
++};
++
++static struct inode_operations vzdq_aquotf_inode_operations = {
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota/QID directory
++ *
++ * --------------------------------------------------------------------- */
++
++static int vzdq_aquotq_readdir(struct file *file, void *data, filldir_t filler)
++{
++ loff_t n;
++ int err;
++
++ n = file->f_pos;
++ for (err = 0; !err; n++) {
++ switch (n) {
++ case 0:
++ err = (*filler)(data, ".", 1, n,
++ file->f_dentry->d_inode->i_ino,
++ DT_DIR);
++ break;
++ case 1:
++ err = (*filler)(data, "..", 2, n,
++ parent_ino(file->f_dentry), DT_DIR);
++ break;
++ case 2:
++ err = (*filler)(data, "aquota.user", 11, n,
++ file->f_dentry->d_inode->i_ino
++ + USRQUOTA + 1,
++ DT_REG);
++ break;
++ case 3:
++ err = (*filler)(data, "aquota.group", 12, n,
++ file->f_dentry->d_inode->i_ino
++ + GRPQUOTA + 1,
++ DT_REG);
++ break;
++ default:
++ goto out;
++ }
++ }
++out:
++ file->f_pos = n;
++ return err;
++}
++
++struct vzdq_aquotq_lookdata {
++ dev_t dev;
++ int type;
++};
++
++static int vzdq_aquotq_looktest(struct inode *inode, void *data)
++{
++ struct vzdq_aquotq_lookdata *d;
++
++ d = data;
++ return inode->i_op == &vzdq_aquotf_inode_operations &&
++ vzdq_aquot_getidev(inode) == d->dev &&
++ PROC_I(inode)->type == d->type + 1;
++}
++
++static int vzdq_aquotq_lookset(struct inode *inode, void *data)
++{
++ struct vzdq_aquotq_lookdata *d;
++
++ d = data;
++ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++ inode->i_ino = vzdq_aquot_getino(d->dev) + d->type + 1;
++ inode->i_mode = S_IFREG | S_IRUSR;
++ inode->i_uid = 0;
++ inode->i_gid = 0;
++ inode->i_nlink = 1;
++ inode->i_op = &vzdq_aquotf_inode_operations;
++ inode->i_fop = &vzdq_aquotf_file_operations;
++ PROC_I(inode)->type = d->type + 1;
++ vzdq_aquot_setidev(inode, d->dev);
++ return 0;
++}
++
++static struct dentry *vzdq_aquotq_lookup(struct inode *dir,
++ struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct inode *inode;
++ struct vzdq_aquotq_lookdata d;
++ int k;
++
++ if (dentry->d_name.len == 11) {
++ if (memcmp(dentry->d_name.name, "aquota.user", 11))
++ goto out;
++ k = USRQUOTA;
++ } else if (dentry->d_name.len == 12) {
++ if (memcmp(dentry->d_name.name, "aquota.group", 11))
++ goto out;
++ k = GRPQUOTA;
++ } else
++ goto out;
++ d.dev = vzdq_aquot_getidev(dir);
++ d.type = k;
++ inode = iget5_locked(dir->i_sb, dir->i_ino + k + 1,
++ vzdq_aquotq_looktest, vzdq_aquotq_lookset, &d);
++ if (inode == NULL)
++ goto out;
++ unlock_new_inode(inode);
++ d_add(dentry, inode);
++ return NULL;
++
++out:
++ return ERR_PTR(-ENOENT);
++}
++
++static struct file_operations vzdq_aquotq_file_operations = {
++ .read = &generic_read_dir,
++ .readdir = &vzdq_aquotq_readdir,
++};
++
++static struct inode_operations vzdq_aquotq_inode_operations = {
++ .lookup = &vzdq_aquotq_lookup,
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota directory
++ *
++ * --------------------------------------------------------------------- */
++
++struct vzdq_aquot_de {
++ struct list_head list;
++ struct vfsmount *mnt;
++};
++
++static int vzdq_aquot_buildmntlist(struct ve_struct *ve,
++ struct list_head *head)
++{
++ struct vfsmount *rmnt, *mnt;
++ struct vzdq_aquot_de *p;
++ int err;
++
++#ifdef CONFIG_VE
++ rmnt = mntget(ve->fs_rootmnt);
++#else
++ read_lock(&current->fs->lock);
++ rmnt = mntget(current->fs->rootmnt);
++ read_unlock(&current->fs->lock);
++#endif
++ mnt = rmnt;
++ spin_lock(&vfsmount_lock);
++ while (1) {
++ list_for_each_entry(p, head, list) {
++ if (p->mnt->mnt_sb == mnt->mnt_sb)
++ goto skip;
++ }
++
++ err = -ENOMEM;
++ p = kmalloc(sizeof(*p), GFP_KERNEL);
++ if (p == NULL)
++ goto out;
++ p->mnt = mntget(mnt);
++ list_add_tail(&p->list, head);
++
++skip:
++ err = 0;
++ if (list_empty(&mnt->mnt_mounts)) {
++ while (1) {
++ if (mnt == rmnt)
++ goto out;
++ if (mnt->mnt_child.next !=
++ &mnt->mnt_parent->mnt_mounts)
++ break;
++ mnt = mnt->mnt_parent;
++ }
++ mnt = list_entry(mnt->mnt_child.next,
++ struct vfsmount, mnt_child);
++ } else
++ mnt = list_entry(mnt->mnt_mounts.next,
++ struct vfsmount, mnt_child);
++ }
++out:
++ spin_unlock(&vfsmount_lock);
++ mntput(rmnt);
++ return err;
++}
++
++static void vzdq_aquot_releasemntlist(struct ve_struct *ve,
++ struct list_head *head)
++{
++ struct vzdq_aquot_de *p;
++
++ while (!list_empty(head)) {
++ p = list_entry(head->next, typeof(*p), list);
++ mntput(p->mnt);
++ list_del(&p->list);
++ kfree(p);
++ }
++}
++
++static int vzdq_aquotd_readdir(struct file *file, void *data, filldir_t filler)
++{
++ struct ve_struct *ve, *old_ve;
++ struct list_head mntlist;
++ struct vzdq_aquot_de *de;
++ struct super_block *sb;
++ struct vz_quota_master *qmblk;
++ loff_t i, n;
++ char buf[24];
++ int l, err;
++
++ i = 0;
++ n = file->f_pos;
++ ve = VE_OWNER_FSTYPE(file->f_dentry->d_sb->s_type);
++ old_ve = set_exec_env(ve);
++
++ INIT_LIST_HEAD(&mntlist);
++#ifdef CONFIG_VE
++ /*
++ * The only reason of disabling readdir for the host system is that
++ * this readdir can be slow and CPU consuming with large number of VPSs
++ * (or just mount points).
++ */
++ err = ve_is_super(ve);
++#else
++ err = 0;
++#endif
++ if (!err) {
++ err = vzdq_aquot_buildmntlist(ve, &mntlist);
++ if (err)
++ goto out_err;
++ }
++
++ if (i >= n) {
++ if ((*filler)(data, ".", 1, i,
++ file->f_dentry->d_inode->i_ino, DT_DIR))
++ goto out_fill;
++ }
++ i++;
++
++ if (i >= n) {
++ if ((*filler)(data, "..", 2, i,
++ parent_ino(file->f_dentry), DT_DIR))
++ goto out_fill;
++ }
++ i++;
++
++ list_for_each_entry (de, &mntlist, list) {
++ sb = de->mnt->mnt_sb;
++#ifdef CONFIG_VE
++ if (get_device_perms_ve(S_IFBLK, sb->s_dev, FMODE_QUOTACTL))
++ continue;
++#endif
++ qmblk = vzquota_find_qmblk(sb);
++ if (qmblk == NULL || qmblk == VZ_QUOTA_BAD)
++ continue;
++
++ qmblk_put(qmblk);
++ i++;
++ if (i <= n)
++ continue;
++
++ l = sprintf(buf, "%08x", new_encode_dev(sb->s_dev));
++ if ((*filler)(data, buf, l, i - 1,
++ vzdq_aquot_getino(sb->s_dev), DT_DIR))
++ break;
++ }
++
++out_fill:
++ err = 0;
++ file->f_pos = i;
++out_err:
++ vzdq_aquot_releasemntlist(ve, &mntlist);
++ (void)set_exec_env(old_ve);
++ return err;
++}
++
++static int vzdq_aquotd_looktest(struct inode *inode, void *data)
++{
++ return inode->i_op == &vzdq_aquotq_inode_operations &&
++ vzdq_aquot_getidev(inode) == (dev_t)(unsigned long)data;
++}
++
++static int vzdq_aquotd_lookset(struct inode *inode, void *data)
++{
++ dev_t dev;
++
++ dev = (dev_t)(unsigned long)data;
++ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++ inode->i_ino = vzdq_aquot_getino(dev);
++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++ inode->i_uid = 0;
++ inode->i_gid = 0;
++ inode->i_nlink = 2;
++ inode->i_op = &vzdq_aquotq_inode_operations;
++ inode->i_fop = &vzdq_aquotq_file_operations;
++ vzdq_aquot_setidev(inode, dev);
++ return 0;
++}
++
++static struct dentry *vzdq_aquotd_lookup(struct inode *dir,
++ struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct ve_struct *ve, *old_ve;
++ const unsigned char *s;
++ int l;
++ dev_t dev;
++ struct inode *inode;
++
++ ve = VE_OWNER_FSTYPE(dir->i_sb->s_type);
++ old_ve = set_exec_env(ve);
++#ifdef CONFIG_VE
++ /*
++ * Lookup is much lighter than readdir, so it can be allowed for the
++ * host system. But it would be strange to be able to do lookup only
++ * without readdir...
++ */
++ if (ve_is_super(ve))
++ goto out;
++#endif
++
++ dev = 0;
++ l = dentry->d_name.len;
++ if (l <= 0)
++ goto out;
++ for (s = dentry->d_name.name; l > 0; s++, l--) {
++ if (!isxdigit(*s))
++ goto out;
++ if (dev & ~(~0UL >> 4))
++ goto out;
++ dev <<= 4;
++ if (isdigit(*s))
++ dev += *s - '0';
++ else if (islower(*s))
++ dev += *s - 'a' + 10;
++ else
++ dev += *s - 'A' + 10;
++ }
++ dev = new_decode_dev(dev);
++
++#ifdef CONFIG_VE
++ if (get_device_perms_ve(S_IFBLK, dev, FMODE_QUOTACTL))
++ goto out;
++#endif
++
++ inode = iget5_locked(dir->i_sb, vzdq_aquot_getino(dev),
++ vzdq_aquotd_looktest, vzdq_aquotd_lookset,
++ (void *)(unsigned long)dev);
++ if (inode == NULL)
++ goto out;
++ unlock_new_inode(inode);
++
++ d_add(dentry, inode);
++ (void)set_exec_env(old_ve);
++ return NULL;
++
++out:
++ (void)set_exec_env(old_ve);
++ return ERR_PTR(-ENOENT);
++}
++
++static struct file_operations vzdq_aquotd_file_operations = {
++ .read = &generic_read_dir,
++ .readdir = &vzdq_aquotd_readdir,
++};
++
++static struct inode_operations vzdq_aquotd_inode_operations = {
++ .lookup = &vzdq_aquotd_lookup,
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Initialization and deinitialization
++ *
++ * --------------------------------------------------------------------- */
++
++/*
++ * FIXME: creation of proc entries here is unsafe with respect to module
++ * unloading.
++ */
++void vzaquota_init(void)
++{
++ struct proc_dir_entry *de;
++
++ de = create_proc_glob_entry("vz/vzaquota",
++ S_IFDIR | S_IRUSR | S_IXUSR, NULL);
++ if (de != NULL) {
++ de->proc_iops = &vzdq_aquotd_inode_operations;
++ de->proc_fops = &vzdq_aquotd_file_operations;
++ } else
++ printk("VZDQ: vz/vzaquota creation failed\n");
++#if defined(CONFIG_SYSCTL)
++ de = create_proc_glob_entry("sys/fs/quota",
++ S_IFDIR | S_IRUSR | S_IXUSR, NULL);
++ if (de == NULL)
++ printk("VZDQ: sys/fs/quota creation failed\n");
++#endif
++}
++
++void vzaquota_fini(void)
++{
++}
+diff -uprN linux-2.6.15.orig/fs/vzdq_mgmt.c linux-2.6.15-ve025stab014/fs/vzdq_mgmt.c
+--- linux-2.6.15.orig/fs/vzdq_mgmt.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/vzdq_mgmt.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,714 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/list.h>
++#include <asm/semaphore.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/writeback.h>
++#include <linux/gfp.h>
++#include <asm/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/quota.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++
++
++/* ----------------------------------------------------------------------
++ * Switching quota on.
++ * --------------------------------------------------------------------- */
++
++/*
++ * check limits copied from user
++ */
++int vzquota_check_sane_limits(struct dq_stat *qstat)
++{
++ int err;
++
++ err = -EINVAL;
++
++ /* softlimit must be less then hardlimit */
++ if (qstat->bsoftlimit > qstat->bhardlimit)
++ goto out;
++
++ if (qstat->isoftlimit > qstat->ihardlimit)
++ goto out;
++
++ err = 0;
++out:
++ return err;
++}
++
++/*
++ * check usage values copied from user
++ */
++int vzquota_check_sane_values(struct dq_stat *qstat)
++{
++ int err;
++
++ err = -EINVAL;
++
++ /* expiration time must not be set if softlimit was not exceeded */
++ if (qstat->bcurrent < qstat->bsoftlimit && qstat->btime != (time_t)0)
++ goto out;
++
++ if (qstat->icurrent < qstat->isoftlimit && qstat->itime != (time_t)0)
++ goto out;
++
++ err = vzquota_check_sane_limits(qstat);
++out:
++ return err;
++}
++
++/*
++ * create new quota master block
++ * this function should:
++ * - copy limits and usage parameters from user buffer;
++ * - allock, initialize quota block and insert it to hash;
++ */
++static int vzquota_create(unsigned int quota_id, struct vz_quota_stat *u_qstat)
++{
++ int err;
++ struct vz_quota_stat qstat;
++ struct vz_quota_master *qmblk;
++
++ down(&vz_quota_sem);
++
++ err = -EFAULT;
++ if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
++ goto out;
++
++ err = -EINVAL;
++ if (quota_id == 0)
++ goto out;
++
++ if (vzquota_check_sane_values(&qstat.dq_stat))
++ goto out;
++ err = 0;
++ qmblk = vzquota_alloc_master(quota_id, &qstat);
++
++ if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
++ err = PTR_ERR(qmblk);
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++/**
++ * vzquota_on - turn quota on
++ *
++ * This function should:
++ * - find and get refcnt of directory entry for quota root and corresponding
++ * mountpoint;
++ * - find corresponding quota block and mark it with given path;
++ * - check quota tree;
++ * - initialize quota for the tree root.
++ */
++static int vzquota_on(unsigned int quota_id, const char *quota_root)
++{
++ int err;
++ struct nameidata nd;
++ struct vz_quota_master *qmblk;
++ struct super_block *dqsb;
++
++ dqsb = NULL;
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EBUSY;
++ if (qmblk->dq_state != VZDQ_STARTING)
++ goto out;
++
++ err = user_path_walk(quota_root, &nd);
++ if (err)
++ goto out;
++ /* init path must be a directory */
++ err = -ENOTDIR;
++ if (!S_ISDIR(nd.dentry->d_inode->i_mode))
++ goto out_path;
++
++ qmblk->dq_root_dentry = nd.dentry;
++ qmblk->dq_root_mnt = nd.mnt;
++ qmblk->dq_sb = nd.dentry->d_inode->i_sb;
++ err = vzquota_get_super(qmblk->dq_sb);
++ if (err)
++ goto out_super;
++
++ /*
++ * Serialization with quota initialization and operations is performed
++ * through generation check: generation is memorized before qmblk is
++ * found and compared under inode_qmblk_lock with assignment.
++ *
++ * Note that the dentry tree is shrunk only for high-level logical
++ * serialization, purely as a courtesy to the user: to have consistent
++ * quota statistics, files should be closed etc. on quota on.
++ */
++ err = vzquota_on_qmblk(qmblk->dq_sb, qmblk->dq_root_dentry->d_inode,
++ qmblk);
++ if (err)
++ goto out_init;
++ qmblk->dq_state = VZDQ_WORKING;
++
++ up(&vz_quota_sem);
++ return 0;
++
++out_init:
++ dqsb = qmblk->dq_sb;
++out_super:
++ /* clear for qmblk_put/quota_free_master */
++ qmblk->dq_sb = NULL;
++ qmblk->dq_root_dentry = NULL;
++ qmblk->dq_root_mnt = NULL;
++out_path:
++ path_release(&nd);
++out:
++ if (dqsb)
++ vzquota_put_super(dqsb);
++ up(&vz_quota_sem);
++ return err;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Switching quota off.
++ * --------------------------------------------------------------------- */
++
++/*
++ * destroy quota block by ID
++ */
++static int vzquota_destroy(unsigned int quota_id)
++{
++ int err;
++ struct vz_quota_master *qmblk;
++ struct dentry *dentry;
++ struct vfsmount *mnt;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EBUSY;
++ if (qmblk->dq_state == VZDQ_WORKING)
++ goto out; /* quota_off first */
++
++ list_del_init(&qmblk->dq_hash);
++ dentry = qmblk->dq_root_dentry;
++ qmblk->dq_root_dentry = NULL;
++ mnt = qmblk->dq_root_mnt;
++ qmblk->dq_root_mnt = NULL;
++
++ if (qmblk->dq_sb)
++ vzquota_put_super(qmblk->dq_sb);
++ up(&vz_quota_sem);
++
++ qmblk_put(qmblk);
++ dput(dentry);
++ mntput(mnt);
++ return 0;
++
++out:
++ up(&vz_quota_sem);
++ return err;
++}
++
++/**
++ * vzquota_off - turn quota off
++ */
++static int vzquota_sync_list(struct list_head *lh,
++ struct vz_quota_master *qmblk)
++{
++ int err;
++ LIST_HEAD(list);
++ struct vz_quota_ilink *qlnk;
++ struct inode *inode;
++ struct writeback_control wbc;
++
++ memset(&wbc, 0, sizeof(wbc));
++ wbc.sync_mode = WB_SYNC_ALL;
++
++ err = 0;
++ do {
++ inode = NULL;
++ list_for_each_entry (qlnk, lh, list) {
++ inode = igrab(QLNK_INODE(qlnk));
++ if (inode)
++ break;
++ }
++ if (inode == NULL)
++ break;
++
++ list_move(&qlnk->list, &list);
++ inode_qmblk_unlock(qmblk->dq_sb);
++
++ wbc.nr_to_write = LONG_MAX;
++ err = sync_inode(inode, &wbc);
++ iput(inode);
++
++ inode_qmblk_lock(qmblk->dq_sb);
++ } while (!err);
++
++ list_splice(&list, lh);
++ return err;
++}
++
++static int vzquota_sync_inodes(struct vz_quota_master *qmblk)
++{
++ int err;
++ LIST_HEAD(qlnk_list);
++
++ list_splice_init(&qmblk->dq_ilink_list, &qlnk_list);
++ err = vzquota_sync_list(&qlnk_list, qmblk);
++ if (!err && !list_empty(&qmblk->dq_ilink_list))
++ err = -EBUSY;
++ list_splice(&qlnk_list, &qmblk->dq_ilink_list);
++
++ return err;
++}
++
++static int vzquota_off(unsigned int quota_id)
++{
++ int err;
++ struct vz_quota_master *qmblk;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EALREADY;
++ if (qmblk->dq_state != VZDQ_WORKING)
++ goto out;
++
++ inode_qmblk_lock(qmblk->dq_sb); /* protects dq_ilink_list also */
++ err = vzquota_sync_inodes(qmblk);
++ if (err)
++ goto out_unlock;
++ inode_qmblk_unlock(qmblk->dq_sb);
++
++ err = vzquota_off_qmblk(qmblk->dq_sb, qmblk);
++ if (err)
++ goto out;
++
++ /* vzquota_destroy will free resources */
++ qmblk->dq_state = VZDQ_STOPING;
++out:
++ up(&vz_quota_sem);
++
++ return err;
++
++out_unlock:
++ inode_qmblk_unlock(qmblk->dq_sb);
++ goto out;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Other VZQUOTA ioctl's.
++ * --------------------------------------------------------------------- */
++
++/*
++ * this function should:
++ * - set new limits/buffer under quota master block lock
++ * - if new softlimit less then usage, then set expiration time
++ * - no need to alloc ugid hash table - we'll do that on demand
++ */
++int vzquota_update_limit(struct dq_stat *_qstat,
++ struct dq_stat *qstat)
++{
++ int err;
++
++ err = -EINVAL;
++ if (vzquota_check_sane_limits(qstat))
++ goto out;
++
++ err = 0;
++
++ /* limits */
++ _qstat->bsoftlimit = qstat->bsoftlimit;
++ _qstat->bhardlimit = qstat->bhardlimit;
++ /*
++ * If the soft limit is exceeded, administrator can override the moment
++ * when the grace period for limit exceeding ends.
++ * Specifying the moment may be useful if the soft limit is set to be
++ * lower than the current usage. In the latter case, if the grace
++ * period end isn't specified, the grace period will start from the
++ * moment of the first write operation.
++ * There is a race with the user level. Soft limit may be already
++ * exceeded before the limit change, and grace period end calculated by
++ * the kernel will be overriden. User level may check if the limit is
++ * already exceeded, but check and set calls are not atomic.
++ * This race isn't dangerous. Under normal cicrumstances, the
++ * difference between the grace period end calculated by the kernel and
++ * the user level should be not greater than as the difference between
++ * the moments of check and set calls, i.e. not bigger than the quota
++ * timer resolution - 1 sec.
++ */
++ if (qstat->btime != (time_t)0 &&
++ _qstat->bcurrent >= _qstat->bsoftlimit)
++ _qstat->btime = qstat->btime;
++
++ _qstat->isoftlimit = qstat->isoftlimit;
++ _qstat->ihardlimit = qstat->ihardlimit;
++ if (qstat->itime != (time_t)0 &&
++ _qstat->icurrent >= _qstat->isoftlimit)
++ _qstat->itime = qstat->itime;
++
++out:
++ return err;
++}
++
++/*
++ * set new quota limits.
++ * this function should:
++ * copy new limits from user level
++ * - find quota block
++ * - set new limits and flags.
++ */
++static int vzquota_setlimit(unsigned int quota_id,
++ struct vz_quota_stat *u_qstat)
++{
++ int err;
++ struct vz_quota_stat qstat;
++ struct vz_quota_master *qmblk;
++
++ down(&vz_quota_sem); /* for hash list protection */
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EFAULT;
++ if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
++ goto out;
++
++ qmblk_data_write_lock(qmblk);
++ err = vzquota_update_limit(&qmblk->dq_stat, &qstat.dq_stat);
++ if (err == 0)
++ qmblk->dq_info = qstat.dq_info;
++ qmblk_data_write_unlock(qmblk);
++
++out:
++ up(&vz_quota_sem);
++ return err;
++}
++
++/*
++ * get quota limits.
++ * very simple - just return stat buffer to user
++ */
++static int vzquota_getstat(unsigned int quota_id,
++ struct vz_quota_stat *u_qstat)
++{
++ int err;
++ struct vz_quota_stat qstat;
++ struct vz_quota_master *qmblk;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ qmblk_data_read_lock(qmblk);
++ /* copy whole buffer under lock */
++ memcpy(&qstat.dq_stat, &qmblk->dq_stat, sizeof(qstat.dq_stat));
++ memcpy(&qstat.dq_info, &qmblk->dq_info, sizeof(qstat.dq_info));
++ qmblk_data_read_unlock(qmblk);
++
++ err = copy_to_user(u_qstat, &qstat, sizeof(qstat));
++ if (err)
++ err = -EFAULT;
++
++out:
++ up(&vz_quota_sem);
++ return err;
++}
++
++/*
++ * This is a system call to turn per-VE disk quota on.
++ * Note this call is allowed to run ONLY from VE0
++ */
++long do_vzquotactl(int cmd, unsigned int quota_id,
++ struct vz_quota_stat *qstat, const char *ve_root)
++{
++ int ret;
++
++ ret = -EPERM;
++ /* access allowed only from root of VE0 */
++ if (!capable(CAP_SYS_RESOURCE) ||
++ !capable(CAP_SYS_ADMIN))
++ goto out;
++
++ switch (cmd) {
++ case VZ_DQ_CREATE:
++ ret = vzquota_create(quota_id, qstat);
++ break;
++ case VZ_DQ_DESTROY:
++ ret = vzquota_destroy(quota_id);
++ break;
++ case VZ_DQ_ON:
++ ret = vzquota_on(quota_id, ve_root);
++ break;
++ case VZ_DQ_OFF:
++ ret = vzquota_off(quota_id);
++ break;
++ case VZ_DQ_SETLIMIT:
++ ret = vzquota_setlimit(quota_id, qstat);
++ break;
++ case VZ_DQ_GETSTAT:
++ ret = vzquota_getstat(quota_id, qstat);
++ break;
++
++ default:
++ ret = -EINVAL;
++ goto out;
++ }
++
++out:
++ return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Proc filesystem routines
++ * ---------------------------------------------------------------------*/
++
++#if defined(CONFIG_PROC_FS)
++
++#define QUOTA_UINT_LEN 15
++#define QUOTA_TIME_LEN_FMT_UINT "%11u"
++#define QUOTA_NUM_LEN_FMT_UINT "%15u"
++#define QUOTA_NUM_LEN_FMT_ULL "%15Lu"
++#define QUOTA_TIME_LEN_FMT_STR "%11s"
++#define QUOTA_NUM_LEN_FMT_STR "%15s"
++#define QUOTA_PROC_MAX_LINE_LEN 2048
++
++/*
++ * prints /proc/ve_dq header line
++ */
++static int print_proc_header(char * buffer)
++{
++ return sprintf(buffer,
++ "%-11s"
++ QUOTA_NUM_LEN_FMT_STR
++ QUOTA_NUM_LEN_FMT_STR
++ QUOTA_NUM_LEN_FMT_STR
++ QUOTA_TIME_LEN_FMT_STR
++ QUOTA_TIME_LEN_FMT_STR
++ "\n",
++ "qid: path",
++ "usage", "softlimit", "hardlimit", "time", "expire");
++}
++
++/*
++ * prints proc master record id, dentry path
++ */
++static int print_proc_master_id(char * buffer, char * path_buf,
++ struct vz_quota_master * qp)
++{
++ char *path;
++ int over;
++
++ path = NULL;
++ switch (qp->dq_state) {
++ case VZDQ_WORKING:
++ if (!path_buf) {
++ path = "";
++ break;
++ }
++ path = d_path(qp->dq_root_dentry,
++ qp->dq_root_mnt, path_buf, PAGE_SIZE);
++ if (IS_ERR(path)) {
++ path = "";
++ break;
++ }
++ /* do not print large path, truncate it */
++ over = strlen(path) -
++ (QUOTA_PROC_MAX_LINE_LEN - 3 - 3 -
++ QUOTA_UINT_LEN);
++ if (over > 0) {
++ path += over - 3;
++ path[0] = path[1] = path[3] = '.';
++ }
++ break;
++ case VZDQ_STARTING:
++ path = "-- started --";
++ break;
++ case VZDQ_STOPING:
++ path = "-- stopped --";
++ break;
++ }
++
++ return sprintf(buffer, "%u: %s\n", qp->dq_id, path);
++}
++
++/*
++ * prints struct vz_quota_stat data
++ */
++static int print_proc_stat(char * buffer, struct dq_stat *qs,
++ struct dq_info *qi)
++{
++ return sprintf(buffer,
++ "%11s"
++ QUOTA_NUM_LEN_FMT_ULL
++ QUOTA_NUM_LEN_FMT_ULL
++ QUOTA_NUM_LEN_FMT_ULL
++ QUOTA_TIME_LEN_FMT_UINT
++ QUOTA_TIME_LEN_FMT_UINT
++ "\n"
++ "%11s"
++ QUOTA_NUM_LEN_FMT_UINT
++ QUOTA_NUM_LEN_FMT_UINT
++ QUOTA_NUM_LEN_FMT_UINT
++ QUOTA_TIME_LEN_FMT_UINT
++ QUOTA_TIME_LEN_FMT_UINT
++ "\n",
++ "1k-blocks",
++ qs->bcurrent >> 10,
++ qs->bsoftlimit >> 10,
++ qs->bhardlimit >> 10,
++ (unsigned int)qs->btime,
++ (unsigned int)qi->bexpire,
++ "inodes",
++ qs->icurrent,
++ qs->isoftlimit,
++ qs->ihardlimit,
++ (unsigned int)qs->itime,
++ (unsigned int)qi->iexpire);
++}
++
++
++/*
++ * for /proc filesystem output
++ */
++static int vzquota_read_proc(char *page, char **start, off_t off, int count,
++ int *eof, void *data)
++{
++ int len, i;
++ off_t printed = 0;
++ char *p = page;
++ struct vz_quota_master *qp;
++ struct vz_quota_ilink *ql2;
++ struct list_head *listp;
++ char *path_buf;
++
++ path_buf = (char*)__get_free_page(GFP_KERNEL);
++ if (path_buf == NULL)
++ return -ENOMEM;
++
++ len = print_proc_header(p);
++ printed += len;
++ if (off < printed) /* keep header in output */ {
++ *start = p + off;
++ p += len;
++ }
++
++ down(&vz_quota_sem);
++
++ /* traverse master hash table for all records */
++ for (i = 0; i < vzquota_hash_size; i++) {
++ list_for_each(listp, &vzquota_hash_table[i]) {
++ qp = list_entry(listp,
++ struct vz_quota_master, dq_hash);
++
++ /* Skip other VE's information if not root of VE0 */
++ if ((!capable(CAP_SYS_ADMIN) ||
++ !capable(CAP_SYS_RESOURCE))) {
++ ql2 = INODE_QLNK(current->fs->root->d_inode);
++ if (ql2 == NULL || qp != ql2->qmblk)
++ continue;
++ }
++ /*
++ * Now print the next record
++ */
++ len = 0;
++ /* we print quotaid and path only in VE0 */
++ if (capable(CAP_SYS_ADMIN))
++ len += print_proc_master_id(p+len,path_buf, qp);
++ len += print_proc_stat(p+len, &qp->dq_stat,
++ &qp->dq_info);
++ printed += len;
++ /* skip unnecessary lines */
++ if (printed <= off)
++ continue;
++ p += len;
++ /* provide start offset */
++ if (*start == NULL)
++ *start = p + (off - printed);
++ /* have we printed all requested size? */
++ if (PAGE_SIZE - (p - page) < QUOTA_PROC_MAX_LINE_LEN ||
++ (p - *start) >= count)
++ goto out;
++ }
++ }
++
++ *eof = 1; /* checked all hash */
++out:
++ up(&vz_quota_sem);
++
++ len = 0;
++ if (*start != NULL) {
++ len = (p - *start);
++ if (len > count)
++ len = count;
++ }
++
++ if (path_buf)
++ free_page((unsigned long) path_buf);
++
++ return len;
++}
++
++/*
++ * Register procfs read callback
++ */
++int vzquota_proc_init(void)
++{
++ struct proc_dir_entry *de;
++
++ de = create_proc_entry("vz/vzquota", S_IFREG|S_IRUSR, NULL);
++ if (de == NULL) {
++ /* create "vz" subdirectory, if not exist */
++ de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++ if (de == NULL)
++ goto out_err;
++ de = create_proc_entry("vzquota", S_IFREG|S_IRUSR, de);
++ if (de == NULL)
++ goto out_err;
++ }
++ de->read_proc = vzquota_read_proc;
++ de->data = NULL;
++ return 0;
++out_err:
++ return -EBUSY;
++}
++
++void vzquota_proc_release(void)
++{
++ /* Unregister procfs read callback */
++ remove_proc_entry("vz/vzquota", NULL);
++}
++
++#endif
+diff -uprN linux-2.6.15.orig/fs/vzdq_ops.c linux-2.6.15-ve025stab014/fs/vzdq_ops.c
+--- linux-2.6.15.orig/fs/vzdq_ops.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/vzdq_ops.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,565 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <asm/semaphore.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/quota.h>
++#include <linux/vzquota.h>
++
++
++/* ----------------------------------------------------------------------
++ * Quota superblock operations - helper functions.
++ * --------------------------------------------------------------------- */
++
++static inline void vzquota_incr_inodes(struct dq_stat *dqstat,
++ unsigned long number)
++{
++ dqstat->icurrent += number;
++}
++
++static inline void vzquota_incr_space(struct dq_stat *dqstat,
++ __u64 number)
++{
++ dqstat->bcurrent += number;
++}
++
++static inline void vzquota_decr_inodes(struct dq_stat *dqstat,
++ unsigned long number)
++{
++ if (dqstat->icurrent > number)
++ dqstat->icurrent -= number;
++ else
++ dqstat->icurrent = 0;
++ if (dqstat->icurrent < dqstat->isoftlimit)
++ dqstat->itime = (time_t) 0;
++}
++
++static inline void vzquota_decr_space(struct dq_stat *dqstat,
++ __u64 number)
++{
++ if (dqstat->bcurrent > number)
++ dqstat->bcurrent -= number;
++ else
++ dqstat->bcurrent = 0;
++ if (dqstat->bcurrent < dqstat->bsoftlimit)
++ dqstat->btime = (time_t) 0;
++}
++
++/*
++ * better printk() message or use /proc/vzquotamsg interface
++ * similar to /proc/kmsg
++ */
++static inline void vzquota_warn(struct dq_info *dq_info, int dq_id, int flag,
++ const char *fmt)
++{
++ if (dq_info->flags & flag) /* warning already printed for this
++ masterblock */
++ return;
++ printk(fmt, dq_id);
++ dq_info->flags |= flag;
++}
++
++/*
++ * ignore_hardlimit -
++ *
++ * Intended to allow superuser of VE0 to overwrite hardlimits.
++ *
++ * ignore_hardlimit() has a very bad feature:
++ *
++ * writepage() operation for writable mapping of a file with holes
++ * may trigger get_block() with wrong current and as a consequence,
++ * opens a possibility to overcommit hardlimits
++ */
++/* for the reason above, it is disabled now */
++static inline int ignore_hardlimit(struct dq_info *dqstat)
++{
++#if 0
++ return ve_is_super(get_exec_env()) &&
++ capable(CAP_SYS_RESOURCE) &&
++ (dqstat->options & VZ_QUOTA_OPT_RSQUASH);
++#else
++ return 0;
++#endif
++}
++
++static int vzquota_check_inodes(struct dq_info *dq_info,
++ struct dq_stat *dqstat,
++ unsigned long number, int dq_id)
++{
++ if (number == 0)
++ return QUOTA_OK;
++
++ if (dqstat->icurrent + number > dqstat->ihardlimit &&
++ !ignore_hardlimit(dq_info)) {
++ vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
++ "VZ QUOTA: file hardlimit reached for id=%d\n");
++ return NO_QUOTA;
++ }
++
++ if (dqstat->icurrent + number > dqstat->isoftlimit) {
++ if (dqstat->itime == (time_t)0) {
++ vzquota_warn(dq_info, dq_id, 0,
++ "VZ QUOTA: file softlimit exceeded "
++ "for id=%d\n");
++ dqstat->itime = CURRENT_TIME_SEC.tv_sec +
++ dq_info->iexpire;
++ } else if (CURRENT_TIME_SEC.tv_sec >= dqstat->itime &&
++ !ignore_hardlimit(dq_info)) {
++ vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
++ "VZ QUOTA: file softlimit expired "
++ "for id=%d\n");
++ return NO_QUOTA;
++ }
++ }
++
++ return QUOTA_OK;
++}
++
++static int vzquota_check_space(struct dq_info *dq_info,
++ struct dq_stat *dqstat,
++ __u64 number, int dq_id, char prealloc)
++{
++ if (number == 0)
++ return QUOTA_OK;
++
++ if (dqstat->bcurrent + number > dqstat->bhardlimit &&
++ !ignore_hardlimit(dq_info)) {
++ if (!prealloc)
++ vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
++ "VZ QUOTA: disk hardlimit reached "
++ "for id=%d\n");
++ return NO_QUOTA;
++ }
++
++ if (dqstat->bcurrent + number > dqstat->bsoftlimit) {
++ if (dqstat->btime == (time_t)0) {
++ if (!prealloc) {
++ vzquota_warn(dq_info, dq_id, 0,
++ "VZ QUOTA: disk softlimit exceeded "
++ "for id=%d\n");
++ dqstat->btime = CURRENT_TIME_SEC.tv_sec
++ + dq_info->bexpire;
++ } else {
++ /*
++ * Original Linux quota doesn't allow
++ * preallocation to exceed softlimit so
++ * exceeding will be always printed
++ */
++ return NO_QUOTA;
++ }
++ } else if (CURRENT_TIME_SEC.tv_sec >= dqstat->btime &&
++ !ignore_hardlimit(dq_info)) {
++ if (!prealloc)
++ vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
++ "VZ QUOTA: disk quota "
++ "softlimit expired "
++ "for id=%d\n");
++ return NO_QUOTA;
++ }
++ }
++
++ return QUOTA_OK;
++}
++
++static int vzquota_check_ugid_inodes(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid[],
++ int type, unsigned long number)
++{
++ struct dq_info *dqinfo;
++ struct dq_stat *dqstat;
++
++ if (qugid[type] == NULL)
++ return QUOTA_OK;
++ if (qugid[type] == VZ_QUOTA_UGBAD)
++ return NO_QUOTA;
++
++ if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
++ return QUOTA_OK;
++ if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
++ return QUOTA_OK;
++ if (number == 0)
++ return QUOTA_OK;
++
++ dqinfo = &qmblk->dq_ugid_info[type];
++ dqstat = &qugid[type]->qugid_stat;
++
++ if (dqstat->ihardlimit != 0 &&
++ dqstat->icurrent + number > dqstat->ihardlimit)
++ return NO_QUOTA;
++
++ if (dqstat->isoftlimit != 0 &&
++ dqstat->icurrent + number > dqstat->isoftlimit) {
++ if (dqstat->itime == (time_t)0)
++ dqstat->itime = CURRENT_TIME_SEC.tv_sec +
++ dqinfo->iexpire;
++ else if (CURRENT_TIME_SEC.tv_sec >= dqstat->itime)
++ return NO_QUOTA;
++ }
++
++ return QUOTA_OK;
++}
++
++static int vzquota_check_ugid_space(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid[],
++ int type, __u64 number, char prealloc)
++{
++ struct dq_info *dqinfo;
++ struct dq_stat *dqstat;
++
++ if (qugid[type] == NULL)
++ return QUOTA_OK;
++ if (qugid[type] == VZ_QUOTA_UGBAD)
++ return NO_QUOTA;
++
++ if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
++ return QUOTA_OK;
++ if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
++ return QUOTA_OK;
++ if (number == 0)
++ return QUOTA_OK;
++
++ dqinfo = &qmblk->dq_ugid_info[type];
++ dqstat = &qugid[type]->qugid_stat;
++
++ if (dqstat->bhardlimit != 0 &&
++ dqstat->bcurrent + number > dqstat->bhardlimit)
++ return NO_QUOTA;
++
++ if (dqstat->bsoftlimit != 0 &&
++ dqstat->bcurrent + number > dqstat->bsoftlimit) {
++ if (dqstat->btime == (time_t)0) {
++ if (!prealloc)
++ dqstat->btime = CURRENT_TIME_SEC.tv_sec
++ + dqinfo->bexpire;
++ else
++ /*
++ * Original Linux quota doesn't allow
++ * preallocation to exceed softlimit so
++ * exceeding will be always printed
++ */
++ return NO_QUOTA;
++ } else if (CURRENT_TIME_SEC.tv_sec >= dqstat->btime)
++ return NO_QUOTA;
++ }
++
++ return QUOTA_OK;
++}
++
++/* ----------------------------------------------------------------------
++ * Quota superblock operations
++ * --------------------------------------------------------------------- */
++
++/*
++ * S_NOQUOTA note.
++ * In the current kernel (2.6.8.1), S_NOQUOTA flag is set only for
++ * - quota file (absent in our case)
++ * - after explicit DQUOT_DROP (earlier than clear_inode) in functions like
++ * filesystem-specific new_inode, before the inode gets outside links.
++ * For the latter case, the only quota operation where care about S_NOQUOTA
++ * might be required is vzquota_drop, but there S_NOQUOTA has already been
++ * checked in DQUOT_DROP().
++ * So, S_NOQUOTA may be ignored for now in the VZDQ code.
++ *
++ * The above note is not entirely correct.
++ * Both for ext2 and ext3 filesystems, DQUOT_FREE_INODE is called from
++ * delete_inode if new_inode fails (for example, because of inode quota
++ * limits), so S_NOQUOTA check is needed in free_inode.
++ * This seems to be the dark corner of the current quota API.
++ */
++
++/*
++ * Initialize quota operations for the specified inode.
++ */
++static int vzquota_initialize(struct inode *inode, int type)
++{
++ vzquota_inode_init_call(inode);
++ return 0; /* ignored by caller */
++}
++
++/*
++ * Release quota for the specified inode.
++ */
++static int vzquota_drop(struct inode *inode)
++{
++ vzquota_inode_drop_call(inode);
++ return 0; /* ignored by caller */
++}
++
++/*
++ * Allocate block callback.
++ *
++ * If (prealloc) disk quota exceeding warning is not printed.
++ * See Linux quota to know why.
++ *
++ * Return:
++ * QUOTA_OK == 0 on SUCCESS
++ * NO_QUOTA == 1 if allocation should fail
++ */
++static int vzquota_alloc_space(struct inode *inode,
++ qsize_t number, int prealloc)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++ int ret = QUOTA_OK;
++
++ qmblk = vzquota_inode_data(inode, &data);
++ if (qmblk == VZ_QUOTA_BAD)
++ return NO_QUOTA;
++ if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++ int cnt;
++ struct vz_quota_ugid * qugid[MAXQUOTAS];
++#endif
++
++ /* checking first */
++ ret = vzquota_check_space(&qmblk->dq_info, &qmblk->dq_stat,
++ number, qmblk->dq_id, prealloc);
++ if (ret == NO_QUOTA)
++ goto no_quota;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
++ ret = vzquota_check_ugid_space(qmblk, qugid,
++ cnt, number, prealloc);
++ if (ret == NO_QUOTA)
++ goto no_quota;
++ }
++ /* check ok, may increment */
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ if (qugid[cnt] == NULL)
++ continue;
++ vzquota_incr_space(&qugid[cnt]->qugid_stat, number);
++ }
++#endif
++ vzquota_incr_space(&qmblk->dq_stat, number);
++ vzquota_data_unlock(inode, &data);
++ }
++
++ inode_add_bytes(inode, number);
++ might_sleep();
++ return QUOTA_OK;
++
++no_quota:
++ vzquota_data_unlock(inode, &data);
++ return NO_QUOTA;
++}
++
++/*
++ * Allocate inodes callback.
++ *
++ * Return:
++ * QUOTA_OK == 0 on SUCCESS
++ * NO_QUOTA == 1 if allocation should fail
++ */
++static int vzquota_alloc_inode(const struct inode *inode, unsigned long number)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++ int ret = QUOTA_OK;
++
++ qmblk = vzquota_inode_data((struct inode *)inode, &data);
++ if (qmblk == VZ_QUOTA_BAD)
++ return NO_QUOTA;
++ if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++ int cnt;
++ struct vz_quota_ugid *qugid[MAXQUOTAS];
++#endif
++
++ /* checking first */
++ ret = vzquota_check_inodes(&qmblk->dq_info, &qmblk->dq_stat,
++ number, qmblk->dq_id);
++ if (ret == NO_QUOTA)
++ goto no_quota;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
++ ret = vzquota_check_ugid_inodes(qmblk, qugid,
++ cnt, number);
++ if (ret == NO_QUOTA)
++ goto no_quota;
++ }
++ /* check ok, may increment */
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ if (qugid[cnt] == NULL)
++ continue;
++ vzquota_incr_inodes(&qugid[cnt]->qugid_stat, number);
++ }
++#endif
++ vzquota_incr_inodes(&qmblk->dq_stat, number);
++ vzquota_data_unlock((struct inode *)inode, &data);
++ }
++
++ might_sleep();
++ return QUOTA_OK;
++
++no_quota:
++ vzquota_data_unlock((struct inode *)inode, &data);
++ return NO_QUOTA;
++}
++
++/*
++ * Free space callback.
++ */
++static int vzquota_free_space(struct inode *inode, qsize_t number)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++
++ qmblk = vzquota_inode_data(inode, &data);
++ if (qmblk == VZ_QUOTA_BAD)
++ return NO_QUOTA; /* isn't checked by the caller */
++ if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++ int cnt;
++ struct vz_quota_ugid * qugid;
++#endif
++
++ vzquota_decr_space(&qmblk->dq_stat, number);
++#ifdef CONFIG_VZ_QUOTA_UGID
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ qugid = INODE_QLNK(inode)->qugid[cnt];
++ if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
++ continue;
++ vzquota_decr_space(&qugid->qugid_stat, number);
++ }
++#endif
++ vzquota_data_unlock(inode, &data);
++ }
++ inode_sub_bytes(inode, number);
++ might_sleep();
++ return QUOTA_OK;
++}
++
++/*
++ * Free inodes callback.
++ */
++static int vzquota_free_inode(const struct inode *inode, unsigned long number)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++
++ if (IS_NOQUOTA(inode))
++ return QUOTA_OK;
++
++ qmblk = vzquota_inode_data((struct inode *)inode, &data);
++ if (qmblk == VZ_QUOTA_BAD)
++ return NO_QUOTA;
++ if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++ int cnt;
++ struct vz_quota_ugid * qugid;
++#endif
++
++ vzquota_decr_inodes(&qmblk->dq_stat, number);
++#ifdef CONFIG_VZ_QUOTA_UGID
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ qugid = INODE_QLNK(inode)->qugid[cnt];
++ if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
++ continue;
++ vzquota_decr_inodes(&qugid->qugid_stat, number);
++ }
++#endif
++ vzquota_data_unlock((struct inode *)inode, &data);
++ }
++ might_sleep();
++ return QUOTA_OK;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++
++/*
++ * helper function for quota_transfer
++ * check that we can add inode to this quota_id
++ */
++static int vzquota_transfer_check(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid[],
++ unsigned int type, __u64 size)
++{
++ if (vzquota_check_ugid_space(qmblk, qugid, type, size, 0) != QUOTA_OK ||
++ vzquota_check_ugid_inodes(qmblk, qugid, type, 1) != QUOTA_OK)
++ return -1;
++ return 0;
++}
++
++int vzquota_transfer_usage(struct inode *inode,
++ int mask,
++ struct vz_quota_ilink *qlnk)
++{
++ struct vz_quota_ugid *qugid_old;
++ __u64 space;
++ int i;
++
++ space = inode_get_bytes(inode);
++ for (i = 0; i < MAXQUOTAS; i++) {
++ if (!(mask & (1 << i)))
++ continue;
++ if (vzquota_transfer_check(qlnk->qmblk, qlnk->qugid, i, space))
++ return -1;
++ }
++
++ for (i = 0; i < MAXQUOTAS; i++) {
++ if (!(mask & (1 << i)))
++ continue;
++ qugid_old = INODE_QLNK(inode)->qugid[i];
++ vzquota_decr_space(&qugid_old->qugid_stat, space);
++ vzquota_decr_inodes(&qugid_old->qugid_stat, 1);
++ vzquota_incr_space(&qlnk->qugid[i]->qugid_stat, space);
++ vzquota_incr_inodes(&qlnk->qugid[i]->qugid_stat, 1);
++ }
++ return 0;
++}
++
++/*
++ * Transfer the inode between diffent user/group quotas.
++ */
++static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
++{
++ return vzquota_inode_transfer_call(inode, iattr) ?
++ NO_QUOTA : QUOTA_OK;
++}
++
++#else /* CONFIG_VZ_QUOTA_UGID */
++
++static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
++{
++ return QUOTA_OK;
++}
++
++#endif
++
++/*
++ * Called under following semaphores:
++ * old_d->d_inode->i_sb->s_vfs_rename_sem
++ * old_d->d_inode->i_sem
++ * new_d->d_inode->i_sem
++ * [not verified --SAW]
++ */
++static int vzquota_rename(struct inode *inode,
++ struct inode *old_dir, struct inode *new_dir)
++{
++ return vzquota_rename_check(inode, old_dir, new_dir) ?
++ NO_QUOTA : QUOTA_OK;
++}
++
++/*
++ * Structure of superblock diskquota operations.
++ */
++struct dquot_operations vz_quota_operations = {
++ initialize: vzquota_initialize,
++ drop: vzquota_drop,
++ alloc_space: vzquota_alloc_space,
++ alloc_inode: vzquota_alloc_inode,
++ free_space: vzquota_free_space,
++ free_inode: vzquota_free_inode,
++ transfer: vzquota_transfer,
++ rename: vzquota_rename
++};
+diff -uprN linux-2.6.15.orig/fs/vzdq_tree.c linux-2.6.15-ve025stab014/fs/vzdq_tree.c
+--- linux-2.6.15.orig/fs/vzdq_tree.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/vzdq_tree.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,286 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo quota tree implementation
++ */
++
++#include <linux/errno.h>
++#include <linux/slab.h>
++#include <linux/vzdq_tree.h>
++
++struct quotatree_tree *quotatree_alloc(void)
++{
++ int l;
++ struct quotatree_tree *tree;
++
++ tree = kmalloc(sizeof(struct quotatree_tree), GFP_KERNEL);
++ if (tree == NULL)
++ goto out;
++
++ for (l = 0; l < QUOTATREE_DEPTH; l++) {
++ INIT_LIST_HEAD(&tree->levels[l].usedlh);
++ INIT_LIST_HEAD(&tree->levels[l].freelh);
++ tree->levels[l].freenum = 0;
++ }
++ tree->root = NULL;
++ tree->leaf_num = 0;
++out:
++ return tree;
++}
++
++static struct quotatree_node *
++quotatree_follow(struct quotatree_tree *tree, quotaid_t id, int level,
++ struct quotatree_find_state *st)
++{
++ void **block;
++ struct quotatree_node *parent;
++ int l, index;
++
++ parent = NULL;
++ block = (void **)&tree->root;
++ l = 0;
++ while (l < level && *block != NULL) {
++ index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
++ parent = *block;
++ block = parent->blocks + index;
++ l++;
++ }
++ if (st != NULL) {
++ st->block = block;
++ st->level = l;
++ }
++
++ return parent;
++}
++
++void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
++ struct quotatree_find_state *st)
++{
++ quotatree_follow(tree, id, QUOTATREE_DEPTH, st);
++ if (st->level == QUOTATREE_DEPTH)
++ return *st->block;
++ else
++ return NULL;
++}
++
++void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index)
++{
++ int i, count;
++ struct quotatree_node *p;
++ void *leaf;
++
++ if (QTREE_LEAFNUM(tree) <= index)
++ return NULL;
++
++ count = 0;
++ list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
++ for (i = 0; i < QUOTATREE_BSIZE; i++) {
++ leaf = p->blocks[i];
++ if (leaf == NULL)
++ continue;
++ if (count == index)
++ return leaf;
++ count++;
++ }
++ }
++ return NULL;
++}
++
++/* returns data leaf (vz_quota_ugid) after _existent_ ugid (@id)
++ * in the tree... */
++void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id)
++{
++ int off;
++ struct quotatree_node *parent, *p;
++ struct list_head *lh;
++
++ /* get parent refering correct quota tree node of the last level */
++ parent = quotatree_follow(tree, id, QUOTATREE_DEPTH, NULL);
++ if (!parent)
++ return NULL;
++
++ off = (id & QUOTATREE_BMASK) + 1; /* next ugid */
++ lh = &parent->list;
++ do {
++ p = list_entry(lh, struct quotatree_node, list);
++ for ( ; off < QUOTATREE_BSIZE; off++)
++ if (p->blocks[off])
++ return p->blocks[off];
++ off = 0;
++ lh = lh->next;
++ } while (lh != &QTREE_LEAFLVL(tree)->usedlh);
++
++ return NULL;
++}
++
++int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
++ struct quotatree_find_state *st, void *data)
++{
++ struct quotatree_node *p;
++ int l, index;
++
++ while (st->level < QUOTATREE_DEPTH) {
++ l = st->level;
++ if (!list_empty(&tree->levels[l].freelh)) {
++ p = list_entry(tree->levels[l].freelh.next,
++ struct quotatree_node, list);
++ list_del(&p->list);
++ } else {
++ p = kmalloc(sizeof(struct quotatree_node), GFP_KERNEL);
++ if (p == NULL)
++ return -ENOMEM;
++ /* save block number in the l-level
++ * it uses for quota file generation */
++ p->num = tree->levels[l].freenum++;
++ }
++ list_add(&p->list, &tree->levels[l].usedlh);
++ memset(p->blocks, 0, sizeof(p->blocks));
++ *st->block = p;
++
++ index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
++ st->block = p->blocks + index;
++ st->level++;
++ }
++ tree->leaf_num++;
++ *st->block = data;
++
++ return 0;
++}
++
++static struct quotatree_node *
++quotatree_remove_ptr(struct quotatree_tree *tree, quotaid_t id,
++ int level)
++{
++ struct quotatree_node *parent;
++ struct quotatree_find_state st;
++
++ parent = quotatree_follow(tree, id, level, &st);
++ if (st.level == QUOTATREE_DEPTH)
++ tree->leaf_num--;
++ *st.block = NULL;
++ return parent;
++}
++
++void quotatree_remove(struct quotatree_tree *tree, quotaid_t id)
++{
++ struct quotatree_node *p;
++ int level, i;
++
++ p = quotatree_remove_ptr(tree, id, QUOTATREE_DEPTH);
++ for (level = QUOTATREE_DEPTH - 1; level >= QUOTATREE_CDEPTH; level--) {
++ for (i = 0; i < QUOTATREE_BSIZE; i++)
++ if (p->blocks[i] != NULL)
++ return;
++ list_move(&p->list, &tree->levels[level].freelh);
++ p = quotatree_remove_ptr(tree, id, level);
++ }
++}
++
++#if 0
++static void quotatree_walk(struct quotatree_tree *tree,
++ struct quotatree_node *node_start,
++ quotaid_t id_start,
++ int level_start, int level_end,
++ int (*callback)(struct quotatree_tree *,
++ quotaid_t id,
++ int level,
++ void *ptr,
++ void *data),
++ void *data)
++{
++ struct quotatree_node *p;
++ int l, shift, index;
++ quotaid_t id;
++ struct quotatree_find_state st;
++
++ p = node_start;
++ l = level_start;
++ shift = (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
++ id = id_start;
++ index = 0;
++
++ /*
++ * Invariants:
++ * shift == (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
++ * id & ((1 << shift) - 1) == 0
++ * p is l-level node corresponding to id
++ */
++ do {
++ if (!p)
++ break;
++
++ if (l < level_end) {
++ for (; index < QUOTATREE_BSIZE; index++)
++ if (p->blocks[index] != NULL)
++ break;
++ if (index < QUOTATREE_BSIZE) {
++ /* descend */
++ p = p->blocks[index];
++ l++;
++ shift -= QUOTAID_BBITS;
++ id += (quotaid_t)index << shift;
++ index = 0;
++ continue;
++ }
++ }
++
++ if ((*callback)(tree, id, l, p, data))
++ break;
++
++ /* ascend and to the next node */
++ p = quotatree_follow(tree, id, l, &st);
++
++ index = ((id >> shift) & QUOTATREE_BMASK) + 1;
++ l--;
++ shift += QUOTAID_BBITS;
++ id &= ~(((quotaid_t)1 << shift) - 1);
++ } while (l >= level_start);
++}
++#endif
++
++static void free_list(struct list_head *node_list)
++{
++ struct quotatree_node *p, *tmp;
++
++ list_for_each_entry_safe(p, tmp, node_list, list) {
++ list_del(&p->list);
++ kfree(p);
++ }
++}
++
++static inline void quotatree_free_nodes(struct quotatree_tree *tree)
++{
++ int i;
++
++ for (i = 0; i < QUOTATREE_DEPTH; i++) {
++ free_list(&tree->levels[i].usedlh);
++ free_list(&tree->levels[i].freelh);
++ }
++}
++
++static void quotatree_free_leafs(struct quotatree_tree *tree,
++ void (*dtor)(void *))
++{
++ int i;
++ struct quotatree_node *p;
++
++ list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
++ for (i = 0; i < QUOTATREE_BSIZE; i++) {
++ if (p->blocks[i] == NULL)
++ continue;
++
++ dtor(p->blocks[i]);
++ }
++ }
++}
++
++void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *))
++{
++ quotatree_free_leafs(tree, dtor);
++ quotatree_free_nodes(tree);
++ kfree(tree);
++}
+diff -uprN linux-2.6.15.orig/fs/vzdq_ugid.c linux-2.6.15-ve025stab014/fs/vzdq_ugid.c
+--- linux-2.6.15.orig/fs/vzdq_ugid.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/vzdq_ugid.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,1116 @@
++/*
++ * Copyright (C) 2002 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo UID/GID disk quota implementation
++ */
++
++#include <linux/config.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/smp_lock.h>
++#include <linux/rcupdate.h>
++#include <asm/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/quota.h>
++#include <linux/quotaio_v2.h>
++#include <linux/virtinfo.h>
++
++#include <linux/vzctl.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++
++/*
++ * XXX
++ * may be something is needed for sb->s_dquot->info[]?
++ */
++
++#define USRQUOTA_MASK (1 << USRQUOTA)
++#define GRPQUOTA_MASK (1 << GRPQUOTA)
++#define QTYPE2MASK(type) (1 << (type))
++
++static kmem_cache_t *vz_quota_ugid_cachep;
++
++/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
++ * list on the hash table */
++extern struct semaphore vz_quota_sem;
++
++inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
++{
++ if (qugid != VZ_QUOTA_UGBAD)
++ atomic_inc(&qugid->qugid_count);
++ return qugid;
++}
++
++/* we don't limit users with zero limits */
++static inline int vzquota_fake_stat(struct dq_stat *stat)
++{
++ return stat->bhardlimit == 0 && stat->bsoftlimit == 0 &&
++ stat->ihardlimit == 0 && stat->isoftlimit == 0;
++}
++
++/* callback function for quotatree_free() */
++static inline void vzquota_free_qugid(void *ptr)
++{
++ kmem_cache_free(vz_quota_ugid_cachep, ptr);
++}
++
++/*
++ * destroy ugid, if it have zero refcount, limits and usage
++ * must be called under qmblk->dq_sem
++ */
++void vzquota_put_ugid(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid)
++{
++ if (qugid == VZ_QUOTA_UGBAD)
++ return;
++ qmblk_data_read_lock(qmblk);
++ if (atomic_dec_and_test(&qugid->qugid_count) &&
++ (qmblk->dq_flags & VZDQUG_FIXED_SET) == 0 &&
++ vzquota_fake_stat(&qugid->qugid_stat) &&
++ qugid->qugid_stat.bcurrent == 0 &&
++ qugid->qugid_stat.icurrent == 0) {
++ quotatree_remove(QUGID_TREE(qmblk, qugid->qugid_type),
++ qugid->qugid_id);
++ qmblk->dq_ugid_count--;
++ vzquota_free_qugid(qugid);
++ }
++ qmblk_data_read_unlock(qmblk);
++}
++
++/*
++ * Get ugid block by its index, like it would present in array.
++ * In reality, this is not array - this is leafs chain of the tree.
++ * NULL if index is out of range.
++ * qmblk semaphore is required to protect the tree.
++ */
++static inline struct vz_quota_ugid *
++vzquota_get_byindex(struct vz_quota_master *qmblk, unsigned int index, int type)
++{
++ return quotatree_leaf_byindex(QUGID_TREE(qmblk, type), index);
++}
++
++/*
++ * get next element from ugid "virtual array"
++ * ugid must be in current array and this array may not be changed between
++ * two accesses (quaranteed by "stopped" quota state and quota semaphore)
++ * qmblk semaphore is required to protect the tree
++ */
++static inline struct vz_quota_ugid *
++vzquota_get_next(struct vz_quota_master *qmblk, struct vz_quota_ugid *qugid)
++{
++ return quotatree_get_next(QUGID_TREE(qmblk, qugid->qugid_type),
++ qugid->qugid_id);
++}
++
++/*
++ * requires dq_sem
++ */
++struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
++ unsigned int quota_id, int type, int flags)
++{
++ struct vz_quota_ugid *qugid;
++ struct quotatree_tree *tree;
++ struct quotatree_find_state st;
++
++ tree = QUGID_TREE(qmblk, type);
++ qugid = quotatree_find(tree, quota_id, &st);
++ if (qugid)
++ goto success;
++
++ /* caller does not want alloc */
++ if (flags & VZDQUG_FIND_DONT_ALLOC)
++ goto fail;
++
++ if (flags & VZDQUG_FIND_FAKE)
++ goto doit;
++
++ /* check limit */
++ if (qmblk->dq_ugid_count >= qmblk->dq_ugid_max)
++ goto fail;
++
++ /* see comment at VZDQUG_FIXED_SET define */
++ if (qmblk->dq_flags & VZDQUG_FIXED_SET)
++ goto fail;
++
++doit:
++ /* alloc new structure */
++ qugid = kmem_cache_alloc(vz_quota_ugid_cachep,
++ SLAB_NOFS | __GFP_NOFAIL);
++ if (qugid == NULL)
++ goto fail;
++
++ /* initialize new structure */
++ qugid->qugid_id = quota_id;
++ memset(&qugid->qugid_stat, 0, sizeof(qugid->qugid_stat));
++ qugid->qugid_type = type;
++ atomic_set(&qugid->qugid_count, 0);
++
++ /* insert in tree */
++ if (quotatree_insert(tree, quota_id, &st, qugid) < 0)
++ goto fail_insert;
++ qmblk->dq_ugid_count++;
++
++success:
++ vzquota_get_ugid(qugid);
++ return qugid;
++
++fail_insert:
++ vzquota_free_qugid(qugid);
++fail:
++ return VZ_QUOTA_UGBAD;
++}
++
++/*
++ * takes dq_sem, may schedule
++ */
++struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
++ unsigned int quota_id, int type, int flags)
++{
++ struct vz_quota_ugid *qugid;
++
++ down(&qmblk->dq_sem);
++ qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
++ up(&qmblk->dq_sem);
++
++ return qugid;
++}
++
++/*
++ * destroy all ugid records on given quota master
++ */
++void vzquota_kill_ugid(struct vz_quota_master *qmblk)
++{
++ BUG_ON((qmblk->dq_gid_tree == NULL && qmblk->dq_uid_tree != NULL) ||
++ (qmblk->dq_uid_tree == NULL && qmblk->dq_gid_tree != NULL));
++
++ if (qmblk->dq_uid_tree != NULL) {
++ quotatree_free(qmblk->dq_uid_tree, vzquota_free_qugid);
++ quotatree_free(qmblk->dq_gid_tree, vzquota_free_qugid);
++ }
++}
++
++
++/* ----------------------------------------------------------------------
++ * Management interface to ugid quota for (super)users.
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_find_qmblk - helper to emulate quota on virtual filesystems
++ *
++ * This function finds a quota master block corresponding to the root of
++ * a virtual filesystem.
++ * Returns a quota master block with reference taken, or %NULL if not under
++ * quota, or %VZ_QUOTA_BAD if quota inconsistency is found (and all allocation
++ * operations will fail).
++ *
++ * Note: this function uses vzquota_inode_qmblk().
++ * The latter is a rather confusing function: it returns qmblk that used to be
++ * on the inode some time ago (without guarantee that it still has any
++ * relations to the inode). So, vzquota_find_qmblk() leaves it up to the
++ * caller to think whether the inode could have changed its qmblk and what to
++ * do in that case.
++ * Currently, the callers appear to not care :(
++ */
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *sb)
++{
++ struct inode *qrinode;
++ struct vz_quota_master *qmblk;
++
++ qmblk = NULL;
++ qrinode = NULL;
++ if (sb->s_op->get_quota_root != NULL)
++ qrinode = sb->s_op->get_quota_root(sb);
++ if (qrinode != NULL)
++ qmblk = vzquota_inode_qmblk(qrinode);
++ return qmblk;
++}
++
++static int vzquota_initialize2(struct inode *inode, int type)
++{
++ return QUOTA_OK;
++}
++
++static int vzquota_drop2(struct inode *inode)
++{
++ return QUOTA_OK;
++}
++
++static int vzquota_alloc_space2(struct inode *inode,
++ qsize_t number, int prealloc)
++{
++ inode_add_bytes(inode, number);
++ return QUOTA_OK;
++}
++
++static int vzquota_alloc_inode2(const struct inode *inode, unsigned long number)
++{
++ return QUOTA_OK;
++}
++
++static int vzquota_free_space2(struct inode *inode, qsize_t number)
++{
++ inode_sub_bytes(inode, number);
++ return QUOTA_OK;
++}
++
++static int vzquota_free_inode2(const struct inode *inode, unsigned long number)
++{
++ return QUOTA_OK;
++}
++
++static int vzquota_transfer2(struct inode *inode, struct iattr *iattr)
++{
++ return QUOTA_OK;
++}
++
++struct dquot_operations vz_quota_operations2 = {
++ initialize: vzquota_initialize2,
++ drop: vzquota_drop2,
++ alloc_space: vzquota_alloc_space2,
++ alloc_inode: vzquota_alloc_inode2,
++ free_space: vzquota_free_space2,
++ free_inode: vzquota_free_inode2,
++ transfer: vzquota_transfer2
++};
++
++static int vz_quota_on(struct super_block *sb, int type,
++ int format_id, char *path)
++{
++ struct vz_quota_master *qmblk;
++ int mask, mask2;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ mask = 0;
++ mask2 = 0;
++ sb->dq_op = &vz_quota_operations2;
++ sb->s_qcop = &vz_quotactl_operations;
++ if (type == USRQUOTA) {
++ mask = DQUOT_USR_ENABLED;
++ mask2 = VZDQ_USRQUOTA;
++ }
++ if (type == GRPQUOTA) {
++ mask = DQUOT_GRP_ENABLED;
++ mask2 = VZDQ_GRPQUOTA;
++ }
++ err = -EBUSY;
++ if (qmblk->dq_flags & mask2)
++ goto out;
++
++ err = 0;
++ qmblk->dq_flags |= mask2;
++ sb->s_dquot.flags |= mask;
++
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++static int vz_quota_off(struct super_block *sb, int type)
++{
++ struct vz_quota_master *qmblk;
++ int mask2;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ mask2 = 0;
++ if (type == USRQUOTA)
++ mask2 = VZDQ_USRQUOTA;
++ if (type == GRPQUOTA)
++ mask2 = VZDQ_GRPQUOTA;
++ err = -EINVAL;
++ if (!(qmblk->dq_flags & mask2))
++ goto out;
++
++ qmblk->dq_flags &= ~mask2;
++ err = 0;
++
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++static int vz_quota_sync(struct super_block *sb, int type)
++{
++ return 0; /* vz quota is always uptodate */
++}
++
++static int vz_get_dqblk(struct super_block *sb, int type,
++ qid_t id, struct if_dqblk *di)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid *ugid;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ err = 0;
++ ugid = vzquota_find_ugid(qmblk, id, type, VZDQUG_FIND_DONT_ALLOC);
++ if (ugid != VZ_QUOTA_UGBAD) {
++ qmblk_data_read_lock(qmblk);
++ di->dqb_bhardlimit = ugid->qugid_stat.bhardlimit >> 10;
++ di->dqb_bsoftlimit = ugid->qugid_stat.bsoftlimit >> 10;
++ di->dqb_curspace = ugid->qugid_stat.bcurrent;
++ di->dqb_ihardlimit = ugid->qugid_stat.ihardlimit;
++ di->dqb_isoftlimit = ugid->qugid_stat.isoftlimit;
++ di->dqb_curinodes = ugid->qugid_stat.icurrent;
++ di->dqb_btime = ugid->qugid_stat.btime;
++ di->dqb_itime = ugid->qugid_stat.itime;
++ qmblk_data_read_unlock(qmblk);
++ di->dqb_valid = QIF_ALL;
++ vzquota_put_ugid(qmblk, ugid);
++ } else {
++ memset(di, 0, sizeof(*di));
++ di->dqb_valid = QIF_ALL;
++ }
++
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++/* must be called under vz_quota_sem */
++static int __vz_set_dqblk(struct vz_quota_master *qmblk,
++ int type, qid_t id, struct if_dqblk *di)
++{
++ struct vz_quota_ugid *ugid;
++
++ ugid = vzquota_find_ugid(qmblk, id, type, 0);
++ if (ugid == VZ_QUOTA_UGBAD)
++ return -ESRCH;
++
++ qmblk_data_write_lock(qmblk);
++ /*
++ * Subtle compatibility breakage.
++ *
++ * Some old non-vz kernel quota didn't start grace period
++ * if the new soft limit happens to be below the usage.
++ * Non-vz kernel quota in 2.4.20 starts the grace period
++ * (if it hasn't been started).
++ * Current non-vz kernel performs even more complicated
++ * manipulations...
++ *
++ * Also, current non-vz kernels have inconsistency related to
++ * the grace time start. In regular operations the grace period
++ * is started if the usage is greater than the soft limit (and,
++ * strangely, is cancelled if the usage is less).
++ * However, set_dqblk starts the grace period if the usage is greater
++ * or equal to the soft limit.
++ *
++ * Here we try to mimic the behavior of the current non-vz kernel.
++ */
++ if (di->dqb_valid & QIF_BLIMITS) {
++ ugid->qugid_stat.bhardlimit =
++ (__u64)di->dqb_bhardlimit << 10;
++ ugid->qugid_stat.bsoftlimit =
++ (__u64)di->dqb_bsoftlimit << 10;
++ if (di->dqb_bsoftlimit == 0 ||
++ ugid->qugid_stat.bcurrent < ugid->qugid_stat.bsoftlimit)
++ ugid->qugid_stat.btime = 0;
++ else if (!(di->dqb_valid & QIF_BTIME))
++ ugid->qugid_stat.btime = CURRENT_TIME_SEC.tv_sec
++ + qmblk->dq_ugid_info[type].bexpire;
++ else
++ ugid->qugid_stat.btime = di->dqb_btime;
++ }
++ if (di->dqb_valid & QIF_ILIMITS) {
++ ugid->qugid_stat.ihardlimit = di->dqb_ihardlimit;
++ ugid->qugid_stat.isoftlimit = di->dqb_isoftlimit;
++ if (di->dqb_isoftlimit == 0 ||
++ ugid->qugid_stat.icurrent < ugid->qugid_stat.isoftlimit)
++ ugid->qugid_stat.itime = 0;
++ else if (!(di->dqb_valid & QIF_ITIME))
++ ugid->qugid_stat.itime = CURRENT_TIME_SEC.tv_sec
++ + qmblk->dq_ugid_info[type].iexpire;
++ else
++ ugid->qugid_stat.itime = di->dqb_itime;
++ }
++ qmblk_data_write_unlock(qmblk);
++ vzquota_put_ugid(qmblk, ugid);
++
++ return 0;
++}
++
++static int vz_set_dqblk(struct super_block *sb, int type,
++ qid_t id, struct if_dqblk *di)
++{
++ struct vz_quota_master *qmblk;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++ err = __vz_set_dqblk(qmblk, type, id, di);
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++static int vz_get_dqinfo(struct super_block *sb, int type,
++ struct if_dqinfo *ii)
++{
++ struct vz_quota_master *qmblk;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ err = 0;
++ ii->dqi_bgrace = qmblk->dq_ugid_info[type].bexpire;
++ ii->dqi_igrace = qmblk->dq_ugid_info[type].iexpire;
++ ii->dqi_flags = 0;
++ ii->dqi_valid = IIF_ALL;
++
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++/* must be called under vz_quota_sem */
++static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
++ int type, struct if_dqinfo *ii)
++{
++ if (ii->dqi_valid & IIF_FLAGS)
++ if (ii->dqi_flags & DQF_MASK)
++ return -EINVAL;
++
++ if (ii->dqi_valid & IIF_BGRACE)
++ qmblk->dq_ugid_info[type].bexpire = ii->dqi_bgrace;
++ if (ii->dqi_valid & IIF_IGRACE)
++ qmblk->dq_ugid_info[type].iexpire = ii->dqi_igrace;
++ return 0;
++}
++
++static int vz_set_dqinfo(struct super_block *sb, int type,
++ struct if_dqinfo *ii)
++{
++ struct vz_quota_master *qmblk;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++ err = __vz_set_dqinfo(qmblk, type, ii);
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++#ifdef CONFIG_QUOTA_COMPAT
++
++#define Q_GETQUOTI_SIZE 1024
++
++#define UGID2DQBLK(dst, src) \
++ do { \
++ (dst).dqb_ihardlimit = (src)->qugid_stat.ihardlimit; \
++ (dst).dqb_isoftlimit = (src)->qugid_stat.isoftlimit; \
++ (dst).dqb_curinodes = (src)->qugid_stat.icurrent; \
++ /* in 1K blocks */ \
++ (dst).dqb_bhardlimit = (src)->qugid_stat.bhardlimit >> 10; \
++ /* in 1K blocks */ \
++ (dst).dqb_bsoftlimit = (src)->qugid_stat.bsoftlimit >> 10; \
++ /* in bytes, 64 bit */ \
++ (dst).dqb_curspace = (src)->qugid_stat.bcurrent; \
++ (dst).dqb_btime = (src)->qugid_stat.btime; \
++ (dst).dqb_itime = (src)->qugid_stat.itime; \
++ } while (0)
++
++static int vz_get_quoti(struct super_block *sb, int type, qid_t idx,
++ struct v2_disk_dqblk *dqblk)
++{
++ struct vz_quota_master *qmblk;
++ struct v2_disk_dqblk data;
++ struct vz_quota_ugid *ugid;
++ int count;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ down(&qmblk->dq_sem);
++ for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
++ ugid != NULL && count < Q_GETQUOTI_SIZE;
++ count++)
++ {
++ qmblk_data_read_lock(qmblk);
++ UGID2DQBLK(data, ugid);
++ qmblk_data_read_unlock(qmblk);
++ data.dqb_id = ugid->qugid_id;
++ if (copy_to_user(dqblk, &data, sizeof(data)))
++ goto fault;
++ dqblk++;
++
++ /* Find next entry */
++ ugid = vzquota_get_next(qmblk, ugid);
++ BUG_ON(ugid != NULL && ugid->qugid_type != type);
++ }
++ err = count;
++out_ugid:
++ up(&qmblk->dq_sem);
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++
++ return err;
++
++fault:
++ err = count ? count : -EFAULT;
++ goto out_ugid;
++}
++
++#endif
++
++struct quotactl_ops vz_quotactl_operations = {
++ quota_on: vz_quota_on,
++ quota_off: vz_quota_off,
++ quota_sync: vz_quota_sync,
++ get_info: vz_get_dqinfo,
++ set_info: vz_set_dqinfo,
++ get_dqblk: vz_get_dqblk,
++ set_dqblk: vz_set_dqblk,
++#ifdef CONFIG_QUOTA_COMPAT
++ get_quoti: vz_get_quoti
++#endif
++};
++
++
++/* ----------------------------------------------------------------------
++ * Management interface for host system admins.
++ * --------------------------------------------------------------------- */
++
++static int quota_ugid_addstat(unsigned int quota_id, unsigned int ugid_size,
++ struct vz_quota_iface *u_ugid_buf)
++{
++ struct vz_quota_master *qmblk;
++ int ret;
++
++ down(&vz_quota_sem);
++
++ ret = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ ret = -EBUSY;
++ if (qmblk->dq_state != VZDQ_STARTING)
++ goto out; /* working quota doesn't accept new ugids */
++
++ ret = 0;
++ /* start to add ugids */
++ for (ret = 0; ret < ugid_size; ret++) {
++ struct vz_quota_iface ugid_buf;
++ struct vz_quota_ugid *ugid;
++
++ if (copy_from_user(&ugid_buf, u_ugid_buf, sizeof(ugid_buf)))
++ break;
++
++ if (ugid_buf.qi_type >= MAXQUOTAS)
++ break; /* bad quota type - this is the only check */
++
++ ugid = vzquota_find_ugid(qmblk,
++ ugid_buf.qi_id, ugid_buf.qi_type, 0);
++ if (ugid == VZ_QUOTA_UGBAD) {
++ qmblk->dq_flags |= VZDQUG_FIXED_SET;
++ break; /* limit reached */
++ }
++
++ /* update usage/limits
++ * we can copy the data without the lock, because the data
++ * cannot be modified in VZDQ_STARTING state */
++ ugid->qugid_stat = ugid_buf.qi_stat;
++
++ vzquota_put_ugid(qmblk, ugid);
++
++ u_ugid_buf++; /* next user buffer */
++ }
++out:
++ up(&vz_quota_sem);
++
++ return ret;
++}
++
++static int quota_ugid_setgrace(unsigned int quota_id,
++ struct dq_info u_dq_info[])
++{
++ struct vz_quota_master *qmblk;
++ struct dq_info dq_info[MAXQUOTAS];
++ struct dq_info *target;
++ int err, type;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EBUSY;
++ if (qmblk->dq_state != VZDQ_STARTING)
++ goto out; /* working quota doesn't accept changing options */
++
++ err = -EFAULT;
++ if (copy_from_user(dq_info, u_dq_info, sizeof(dq_info)))
++ goto out;
++
++ err = 0;
++
++ /* update in qmblk */
++ for (type = 0; type < MAXQUOTAS; type ++) {
++ target = &qmblk->dq_ugid_info[type];
++ target->bexpire = dq_info[type].bexpire;
++ target->iexpire = dq_info[type].iexpire;
++ }
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int do_quota_ugid_getstat(struct vz_quota_master *qmblk, int index, int size,
++ struct vz_quota_iface *u_ugid_buf)
++{
++ int type, count;
++ struct vz_quota_ugid *ugid;
++
++ if (QTREE_LEAFNUM(qmblk->dq_uid_tree) +
++ QTREE_LEAFNUM(qmblk->dq_gid_tree)
++ <= index)
++ return 0;
++
++ count = 0;
++
++ type = index < QTREE_LEAFNUM(qmblk->dq_uid_tree) ? USRQUOTA : GRPQUOTA;
++ if (type == GRPQUOTA)
++ index -= QTREE_LEAFNUM(qmblk->dq_uid_tree);
++
++ /* loop through ugid and then qgid quota */
++repeat:
++ for (ugid = vzquota_get_byindex(qmblk, index, type);
++ ugid != NULL && count < size;
++ ugid = vzquota_get_next(qmblk, ugid), count++)
++ {
++ struct vz_quota_iface ugid_buf;
++
++ /* form interface buffer and send in to user-level */
++ qmblk_data_read_lock(qmblk);
++ memcpy(&ugid_buf.qi_stat, &ugid->qugid_stat,
++ sizeof(ugid_buf.qi_stat));
++ qmblk_data_read_unlock(qmblk);
++ ugid_buf.qi_id = ugid->qugid_id;
++ ugid_buf.qi_type = ugid->qugid_type;
++
++ if (copy_to_user(u_ugid_buf, &ugid_buf, sizeof(ugid_buf)))
++ goto fault;
++ u_ugid_buf++; /* next portion of user buffer */
++ }
++
++ if (type == USRQUOTA && count < size) {
++ type = GRPQUOTA;
++ index = 0;
++ goto repeat;
++ }
++
++ return count;
++
++fault:
++ return count ? count : -EFAULT;
++}
++
++static int quota_ugid_getstat(unsigned int quota_id,
++ int index, int size, struct vz_quota_iface *u_ugid_buf)
++{
++ struct vz_quota_master *qmblk;
++ int err;
++
++ if (index < 0 || size < 0)
++ return -EINVAL;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ down(&qmblk->dq_sem);
++ err = do_quota_ugid_getstat(qmblk, index, size, u_ugid_buf);
++ up(&qmblk->dq_sem);
++
++out:
++ up(&vz_quota_sem);
++ return err;
++}
++
++static int quota_ugid_getgrace(unsigned int quota_id,
++ struct dq_info u_dq_info[])
++{
++ struct vz_quota_master *qmblk;
++ struct dq_info dq_info[MAXQUOTAS];
++ struct dq_info *target;
++ int err, type;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = 0;
++ /* update from qmblk */
++ for (type = 0; type < MAXQUOTAS; type ++) {
++ target = &qmblk->dq_ugid_info[type];
++ dq_info[type].bexpire = target->bexpire;
++ dq_info[type].iexpire = target->iexpire;
++ dq_info[type].flags = target->flags;
++ }
++
++ if (copy_to_user(u_dq_info, dq_info, sizeof(dq_info)))
++ err = -EFAULT;
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int quota_ugid_getconfig(unsigned int quota_id,
++ struct vz_quota_ugid_stat *info)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid_stat kinfo;
++ int err;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = 0;
++ kinfo.limit = qmblk->dq_ugid_max;
++ kinfo.count = qmblk->dq_ugid_count;
++ kinfo.flags = qmblk->dq_flags;
++
++ if (copy_to_user(info, &kinfo, sizeof(kinfo)))
++ err = -EFAULT;
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int quota_ugid_setconfig(unsigned int quota_id,
++ struct vz_quota_ugid_stat *info)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid_stat kinfo;
++ int err;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EFAULT;
++ if (copy_from_user(&kinfo, info, sizeof(kinfo)))
++ goto out;
++
++ err = 0;
++ qmblk->dq_ugid_max = kinfo.limit;
++ if (qmblk->dq_state == VZDQ_STARTING) {
++ qmblk->dq_flags = kinfo.flags;
++ if (qmblk->dq_flags & VZDQUG_ON)
++ qmblk->dq_flags |= VZDQ_USRQUOTA | VZDQ_GRPQUOTA;
++ }
++
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int quota_ugid_setlimit(unsigned int quota_id,
++ struct vz_quota_ugid_setlimit *u_lim)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid_setlimit lim;
++ int err;
++
++ down(&vz_quota_sem);
++
++ err = -ESRCH;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EFAULT;
++ if (copy_from_user(&lim, u_lim, sizeof(lim)))
++ goto out;
++
++ err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
++
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int quota_ugid_setinfo(unsigned int quota_id,
++ struct vz_quota_ugid_setinfo *u_info)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid_setinfo info;
++ int err;
++
++ down(&vz_quota_sem);
++
++ err = -ESRCH;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EFAULT;
++ if (copy_from_user(&info, u_info, sizeof(info)))
++ goto out;
++
++ err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
++
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++/*
++ * This is a system call to maintain UGID quotas
++ * Note this call is allowed to run ONLY from VE0
++ */
++long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub)
++{
++ int ret;
++
++ ret = -EPERM;
++ /* access allowed only from root of VE0 */
++ if (!capable(CAP_SYS_RESOURCE) ||
++ !capable(CAP_SYS_ADMIN))
++ goto out;
++
++ switch (qub->cmd) {
++ case VZ_DQ_UGID_GETSTAT:
++ ret = quota_ugid_getstat(qub->quota_id,
++ qub->ugid_index, qub->ugid_size,
++ (struct vz_quota_iface *)qub->addr);
++ break;
++ case VZ_DQ_UGID_ADDSTAT:
++ ret = quota_ugid_addstat(qub->quota_id, qub->ugid_size,
++ (struct vz_quota_iface *)qub->addr);
++ break;
++ case VZ_DQ_UGID_GETGRACE:
++ ret = quota_ugid_getgrace(qub->quota_id,
++ (struct dq_info *)qub->addr);
++ break;
++ case VZ_DQ_UGID_SETGRACE:
++ ret = quota_ugid_setgrace(qub->quota_id,
++ (struct dq_info *)qub->addr);
++ break;
++ case VZ_DQ_UGID_GETCONFIG:
++ ret = quota_ugid_getconfig(qub->quota_id,
++ (struct vz_quota_ugid_stat *)qub->addr);
++ break;
++ case VZ_DQ_UGID_SETCONFIG:
++ ret = quota_ugid_setconfig(qub->quota_id,
++ (struct vz_quota_ugid_stat *)qub->addr);
++ break;
++ case VZ_DQ_UGID_SETLIMIT:
++ ret = quota_ugid_setlimit(qub->quota_id,
++ (struct vz_quota_ugid_setlimit *)
++ qub->addr);
++ break;
++ case VZ_DQ_UGID_SETINFO:
++ ret = quota_ugid_setinfo(qub->quota_id,
++ (struct vz_quota_ugid_setinfo *)
++ qub->addr);
++ break;
++ default:
++ ret = -EINVAL;
++ goto out;
++ }
++out:
++ return ret;
++}
++
++static void ugid_quota_on_sb(struct super_block *sb)
++{
++ struct super_block *real_sb;
++ struct vz_quota_master *qmblk;
++
++ if (!sb->s_op->get_quota_root)
++ return;
++
++ real_sb = sb->s_op->get_quota_root(sb)->i_sb;
++ if (real_sb->dq_op != &vz_quota_operations)
++ return;
++
++ sb->dq_op = &vz_quota_operations2;
++ sb->s_qcop = &vz_quotactl_operations;
++ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++ sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
++ sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
++
++ qmblk = vzquota_find_qmblk(sb);
++ if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
++ return;
++ down(&vz_quota_sem);
++ if (qmblk->dq_flags & VZDQ_USRQUOTA)
++ sb->s_dquot.flags |= DQUOT_USR_ENABLED;
++ if (qmblk->dq_flags & VZDQ_GRPQUOTA)
++ sb->s_dquot.flags |= DQUOT_GRP_ENABLED;
++ up(&vz_quota_sem);
++ qmblk_put(qmblk);
++}
++
++static void ugid_quota_off_sb(struct super_block *sb)
++{
++ /* can't make quota off on mounted super block */
++ BUG_ON(sb->s_root != NULL);
++}
++
++static int ugid_notifier_call(struct vnotifier_block *self,
++ unsigned long n, void *data, int old_ret)
++{
++ struct virt_info_quota *viq;
++
++ viq = (struct virt_info_quota *)data;
++
++ switch (n) {
++ case VIRTINFO_QUOTA_ON:
++ ugid_quota_on_sb(viq->super);
++ break;
++ case VIRTINFO_QUOTA_OFF:
++ ugid_quota_off_sb(viq->super);
++ break;
++ case VIRTINFO_QUOTA_GETSTAT:
++ break;
++ default:
++ return old_ret;
++ }
++ return NOTIFY_OK;
++}
++
++static struct vnotifier_block ugid_notifier_block = {
++ .notifier_call = ugid_notifier_call,
++};
++
++/* ----------------------------------------------------------------------
++ * Init/exit.
++ * --------------------------------------------------------------------- */
++
++struct quota_format_type vz_quota_empty_v2_format = {
++ qf_fmt_id: QFMT_VFS_V0,
++ qf_ops: NULL,
++ qf_owner: THIS_MODULE
++};
++
++int vzquota_ugid_init()
++{
++ int err;
++
++ vz_quota_ugid_cachep = kmem_cache_create("vz_quota_ugid",
++ sizeof(struct vz_quota_ugid),
++ 0, SLAB_HWCACHE_ALIGN,
++ NULL, NULL);
++ if (vz_quota_ugid_cachep == NULL)
++ goto err_slab;
++
++ err = register_quota_format(&vz_quota_empty_v2_format);
++ if (err)
++ goto err_reg;
++
++ virtinfo_notifier_register(VITYPE_QUOTA, &ugid_notifier_block);
++ return 0;
++
++err_reg:
++ kmem_cache_destroy(vz_quota_ugid_cachep);
++ return err;
++
++err_slab:
++ printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
++ return -ENOMEM;
++}
++
++void vzquota_ugid_release()
++{
++ virtinfo_notifier_unregister(VITYPE_QUOTA, &ugid_notifier_block);
++ unregister_quota_format(&vz_quota_empty_v2_format);
++
++ if (kmem_cache_destroy(vz_quota_ugid_cachep))
++ printk(KERN_ERR "VZQUOTA: kmem_cache_destroy failed\n");
++}
+diff -uprN linux-2.6.15.orig/fs/vzdquot.c linux-2.6.15-ve025stab014/fs/vzdquot.c
+--- linux-2.6.15.orig/fs/vzdquot.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/fs/vzdquot.c 2006-01-27 14:48:09.000000000 +0300
+@@ -0,0 +1,1705 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains the core of Virtuozzo disk quota implementation:
++ * maintenance of VZDQ information in inodes,
++ * external interfaces,
++ * module entry.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/list.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/quota.h>
++#include <linux/rcupdate.h>
++#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <linux/vzctl.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++#include <linux/virtinfo.h>
++#include <linux/vzdq_tree.h>
++
++/* ----------------------------------------------------------------------
++ *
++ * Locking
++ *
++ * ---------------------------------------------------------------------- */
++
++/*
++ * Serializes on/off and all other do_vzquotactl operations.
++ * Protects qmblk hash.
++ */
++struct semaphore vz_quota_sem;
++
++/*
++ * Data access locks
++ * inode_qmblk
++ * protects qmblk pointers in all inodes and qlnk content in general
++ * (but not qmblk content);
++ * also protects related qmblk invalidation procedures;
++ * can't be per-inode because of vzquota_dtree_qmblk complications
++ * and problems with serialization with quota_on,
++ * but can be per-superblock;
++ * qmblk_data
++ * protects qmblk fields (such as current usage)
++ * quota_data
++ * protects charge/uncharge operations, thus, implies
++ * qmblk_data lock and, if CONFIG_VZ_QUOTA_UGID, inode_qmblk lock
++ * (to protect ugid pointers).
++ *
++ * Lock order:
++ * inode_qmblk_lock -> dcache_lock
++ * inode_qmblk_lock -> qmblk_data
++ */
++static spinlock_t vzdq_qmblk_lock = SPIN_LOCK_UNLOCKED;
++
++inline void inode_qmblk_lock(struct super_block *sb)
++{
++ spin_lock(&vzdq_qmblk_lock);
++}
++
++inline void inode_qmblk_unlock(struct super_block *sb)
++{
++ spin_unlock(&vzdq_qmblk_lock);
++}
++
++inline void qmblk_data_read_lock(struct vz_quota_master *qmblk)
++{
++ spin_lock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_read_unlock(struct vz_quota_master *qmblk)
++{
++ spin_unlock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_write_lock(struct vz_quota_master *qmblk)
++{
++ spin_lock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_write_unlock(struct vz_quota_master *qmblk)
++{
++ spin_unlock(&qmblk->dq_data_lock);
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Master hash table handling.
++ *
++ * SMP not safe, serialied by vz_quota_sem within quota syscalls
++ *
++ * --------------------------------------------------------------------- */
++
++static kmem_cache_t *vzquota_cachep;
++
++/*
++ * Hash function.
++ */
++#define QHASH_BITS 6
++#define VZ_QUOTA_HASH_SIZE (1 << QHASH_BITS)
++#define QHASH_MASK (VZ_QUOTA_HASH_SIZE - 1)
++
++struct list_head vzquota_hash_table[VZ_QUOTA_HASH_SIZE];
++int vzquota_hash_size = VZ_QUOTA_HASH_SIZE;
++
++static inline int vzquota_hash_func(unsigned int qid)
++{
++ return (((qid >> QHASH_BITS) ^ qid) & QHASH_MASK);
++}
++
++/**
++ * vzquota_alloc_master - alloc and instantiate master quota record
++ *
++ * Returns:
++ * pointer to newly created record if SUCCESS
++ * -ENOMEM if out of memory
++ * -EEXIST if record with given quota_id already exist
++ */
++struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
++ struct vz_quota_stat *qstat)
++{
++ int err;
++ struct vz_quota_master *qmblk;
++
++ err = -EEXIST;
++ if (vzquota_find_master(quota_id) != NULL)
++ goto out;
++
++ err = -ENOMEM;
++ qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
++ if (qmblk == NULL)
++ goto out;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ qmblk->dq_uid_tree = quotatree_alloc();
++ if (!qmblk->dq_uid_tree)
++ goto out_free;
++
++ qmblk->dq_gid_tree = quotatree_alloc();
++ if (!qmblk->dq_gid_tree)
++ goto out_free_tree;
++#endif
++
++ qmblk->dq_state = VZDQ_STARTING;
++ init_MUTEX(&qmblk->dq_sem);
++ spin_lock_init(&qmblk->dq_data_lock);
++
++ qmblk->dq_id = quota_id;
++ qmblk->dq_stat = qstat->dq_stat;
++ qmblk->dq_info = qstat->dq_info;
++ qmblk->dq_root_dentry = NULL;
++ qmblk->dq_root_mnt = NULL;
++ qmblk->dq_sb = NULL;
++ qmblk->dq_ugid_count = 0;
++ qmblk->dq_ugid_max = 0;
++ qmblk->dq_flags = 0;
++ memset(qmblk->dq_ugid_info, 0, sizeof(qmblk->dq_ugid_info));
++ INIT_LIST_HEAD(&qmblk->dq_ilink_list);
++
++ atomic_set(&qmblk->dq_count, 1);
++
++ /* insert in hash chain */
++ list_add(&qmblk->dq_hash,
++ &vzquota_hash_table[vzquota_hash_func(quota_id)]);
++
++ /* success */
++ return qmblk;
++
++out_free_tree:
++ quotatree_free(qmblk->dq_uid_tree, NULL);
++out_free:
++ kmem_cache_free(vzquota_cachep, qmblk);
++out:
++ return ERR_PTR(err);
++}
++
++static struct vz_quota_master *vzquota_alloc_fake(void)
++{
++ struct vz_quota_master *qmblk;
++
++ qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
++ if (qmblk == NULL)
++ return NULL;
++ memset(qmblk, 0, sizeof(*qmblk));
++ qmblk->dq_state = VZDQ_STOPING;
++ qmblk->dq_flags = VZDQ_NOQUOT;
++ spin_lock_init(&qmblk->dq_data_lock);
++ INIT_LIST_HEAD(&qmblk->dq_ilink_list);
++ atomic_set(&qmblk->dq_count, 1);
++ return qmblk;
++}
++
++/**
++ * vzquota_find_master - find master record with given id
++ *
++ * Returns qmblk without touching its refcounter.
++ * Called under vz_quota_sem.
++ */
++struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
++{
++ int i;
++ struct vz_quota_master *qp;
++
++ i = vzquota_hash_func(quota_id);
++ list_for_each_entry(qp, &vzquota_hash_table[i], dq_hash) {
++ if (qp->dq_id == quota_id)
++ return qp;
++ }
++ return NULL;
++}
++
++/**
++ * vzquota_free_master - release resources taken by qmblk, freeing memory
++ *
++ * qmblk is assumed to be already taken out from the hash.
++ * Should be called outside vz_quota_sem.
++ */
++void vzquota_free_master(struct vz_quota_master *qmblk)
++{
++#ifdef CONFIG_VZ_QUOTA_UGID
++ vzquota_kill_ugid(qmblk);
++#endif
++ BUG_ON(!list_empty(&qmblk->dq_ilink_list));
++ kmem_cache_free(vzquota_cachep, qmblk);
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Passing quota information through current
++ *
++ * Used in inode -> qmblk lookup at inode creation stage (since at that
++ * time there are no links between the inode being created and its parent
++ * directory).
++ *
++ * --------------------------------------------------------------------- */
++
++#define VZDQ_CUR_MAGIC 0x57d0fee2
++
++static inline int vzquota_cur_qmblk_check(void)
++{
++ return current->magic == VZDQ_CUR_MAGIC;
++}
++
++static inline struct inode *vzquota_cur_qmblk_fetch(void)
++{
++ return current->ino;
++}
++
++static inline void vzquota_cur_qmblk_set(struct inode *data)
++{
++ struct task_struct *tsk;
++
++ tsk = current;
++ tsk->magic = VZDQ_CUR_MAGIC;
++ tsk->ino = data;
++}
++
++#if 0
++static inline void vzquota_cur_qmblk_reset(void)
++{
++ current->magic = 0;
++}
++#endif
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Superblock quota operations
++ *
++ * --------------------------------------------------------------------- */
++
++/*
++ * Kernel structure abuse.
++ * We use files[0] pointer as an int variable:
++ * reference counter of how many quota blocks uses this superblock.
++ * files[1] is used for generations structure which helps us to track
++ * when traversing of dentries is really required.
++ */
++#define __VZ_QUOTA_NOQUOTA(sb) sb->s_dquot.vzdq_master
++#define __VZ_QUOTA_TSTAMP(sb) ((struct timeval *)\
++ &sb->s_dquot.dqio_sem)
++
++#if defined(VZ_QUOTA_UNLOAD)
++
++#define __VZ_QUOTA_SBREF(sb) sb->s_dquot.vzdq_count
++
++struct dquot_operations *orig_dq_op;
++struct quotactl_ops *orig_dq_cop;
++
++/**
++ * quota_get_super - account for new a quoted tree under the superblock
++ *
++ * One superblock can have multiple directory subtrees with different VZ
++ * quotas. We keep a counter of such subtrees and set VZ quota operations or
++ * reset the default ones.
++ *
++ * Called under vz_quota_sem (from quota_on).
++ */
++int vzquota_get_super(struct super_block *sb)
++{
++ if (sb->dq_op != &vz_quota_operations) {
++ down(&sb->s_dquot.dqonoff_sem);
++ if (sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) {
++ up(&sb->s_dquot.dqonoff_sem);
++ return -EEXIST;
++ }
++ if (orig_dq_op == NULL && sb->dq_op != NULL)
++ orig_dq_op = sb->dq_op;
++ sb->dq_op = &vz_quota_operations;
++ if (orig_dq_cop == NULL && sb->s_qcop != NULL)
++ orig_dq_cop = sb->s_qcop;
++ /* XXX this may race with sys_quotactl */
++#ifdef CONFIG_VZ_QUOTA_UGID
++ sb->s_qcop = &vz_quotactl_operations;
++#else
++ sb->s_qcop = NULL;
++#endif
++ do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
++ memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++
++ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++ sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
++ sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
++ /*
++ * To get quotaops.h call us we need to mark superblock
++ * as having quota. These flags mark the moment when
++ * our dq_op start to be called.
++ *
++ * The ordering of dq_op and s_dquot.flags assignment
++ * needs to be enforced, but other CPUs do not do rmb()
++ * between s_dquot.flags and dq_op accesses.
++ */
++ wmb(); synchronize_sched();
++ sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
++ __module_get(THIS_MODULE);
++ up(&sb->s_dquot.dqonoff_sem);
++ }
++ /* protected by vz_quota_sem */
++ __VZ_QUOTA_SBREF(sb)++;
++ return 0;
++}
++
++/**
++ * quota_put_super - release superblock when one quota tree goes away
++ *
++ * Called under vz_quota_sem.
++ */
++void vzquota_put_super(struct super_block *sb)
++{
++ int count;
++
++ count = --__VZ_QUOTA_SBREF(sb);
++ if (count == 0) {
++ down(&sb->s_dquot.dqonoff_sem);
++ sb->s_dquot.flags = 0;
++ wmb(); synchronize_sched();
++ sema_init(&sb->s_dquot.dqio_sem, 1);
++ sb->s_qcop = orig_dq_cop;
++ sb->dq_op = orig_dq_op;
++ inode_qmblk_lock(sb);
++ quota_gen_put(SB_QGEN(sb));
++ SB_QGEN(sb) = NULL;
++ /* release qlnk's without qmblk */
++ remove_inode_quota_links_list(&non_vzquota_inodes_lh,
++ sb, NULL);
++ /*
++ * Races with quota initialization:
++ * after this inode_qmblk_unlock all inode's generations are
++ * invalidated, quota_inode_qmblk checks superblock operations.
++ */
++ inode_qmblk_unlock(sb);
++ /*
++ * Module refcounting: in theory, this is the best place
++ * to call module_put(THIS_MODULE).
++ * In reality, it can't be done because we can't be sure that
++ * other CPUs do not enter our code segment through dq_op
++ * cached long time ago. Quotaops interface isn't supposed to
++ * go into modules currently (that is, into unloadable
++ * modules). By omitting module_put, our module isn't
++ * unloadable.
++ */
++ up(&sb->s_dquot.dqonoff_sem);
++ }
++}
++
++#else
++
++struct vzquota_new_sop {
++ struct super_operations new_op;
++ struct super_operations *old_op;
++};
++
++/**
++ * vzquota_shutdown_super - callback on umount
++ */
++void vzquota_shutdown_super(struct super_block *sb)
++{
++ struct vz_quota_master *qmblk;
++ struct vzquota_new_sop *sop;
++
++ qmblk = __VZ_QUOTA_NOQUOTA(sb);
++ __VZ_QUOTA_NOQUOTA(sb) = NULL;
++ if (qmblk != NULL)
++ qmblk_put(qmblk);
++ sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
++ sb->s_op = sop->old_op;
++ kfree(sop);
++ (*sb->s_op->put_super)(sb);
++}
++
++/**
++ * vzquota_get_super - account for new a quoted tree under the superblock
++ *
++ * One superblock can have multiple directory subtrees with different VZ
++ * quotas.
++ *
++ * Called under vz_quota_sem (from vzquota_on).
++ */
++int vzquota_get_super(struct super_block *sb)
++{
++ struct vz_quota_master *qnew;
++ struct vzquota_new_sop *sop;
++ int err;
++
++ down(&sb->s_dquot.dqonoff_sem);
++ err = -EEXIST;
++ if ((sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) &&
++ sb->dq_op != &vz_quota_operations)
++ goto out_up;
++
++ /*
++ * This allocation code should be under sb->dq_op check below, but
++ * it doesn't really matter...
++ */
++ if (__VZ_QUOTA_NOQUOTA(sb) == NULL) {
++ qnew = vzquota_alloc_fake();
++ if (qnew == NULL)
++ goto out_up;
++ __VZ_QUOTA_NOQUOTA(sb) = qnew;
++ }
++
++ if (sb->dq_op != &vz_quota_operations) {
++ sop = kmalloc(sizeof(*sop), GFP_KERNEL);
++ if (sop == NULL) {
++ vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
++ __VZ_QUOTA_NOQUOTA(sb) = NULL;
++ goto out_up;
++ }
++ memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
++ sop->new_op.put_super = &vzquota_shutdown_super;
++ sop->old_op = sb->s_op;
++ sb->s_op = &sop->new_op;
++
++ sb->dq_op = &vz_quota_operations;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ sb->s_qcop = &vz_quotactl_operations;
++#else
++ sb->s_qcop = NULL;
++#endif
++ do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
++
++ memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++ /* these 2 list heads are checked in sync_dquots() */
++ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++ sb->s_dquot.info[USRQUOTA].dqi_format =
++ &vz_quota_empty_v2_format;
++ sb->s_dquot.info[GRPQUOTA].dqi_format =
++ &vz_quota_empty_v2_format;
++
++ /*
++ * To get quotaops.h to call us we need to mark superblock
++ * as having quota. These flags mark the moment when
++ * our dq_op start to be called.
++ *
++ * The ordering of dq_op and s_dquot.flags assignment
++ * needs to be enforced, but other CPUs do not do rmb()
++ * between s_dquot.flags and dq_op accesses.
++ */
++ wmb(); synchronize_sched();
++ sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
++ }
++ err = 0;
++
++out_up:
++ up(&sb->s_dquot.dqonoff_sem);
++ return err;
++}
++
++/**
++ * vzquota_put_super - one quota tree less on this superblock
++ *
++ * Called under vz_quota_sem.
++ */
++void vzquota_put_super(struct super_block *sb)
++{
++ /*
++ * Even if this put is the last one,
++ * sb->s_dquot.flags can't be cleared, because otherwise vzquota_drop
++ * won't be called and the remaining qmblk references won't be put.
++ */
++}
++
++#endif
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Helpers for inode -> qmblk link maintenance
++ *
++ * --------------------------------------------------------------------- */
++
++#define __VZ_QUOTA_EMPTY ((void *)0xbdbdbdbd)
++#define VZ_QUOTA_IS_NOQUOTA(qm, sb) ((qm)->dq_flags & VZDQ_NOQUOT)
++#define VZ_QUOTA_EMPTY_IOPS (&vfs_empty_iops)
++extern struct inode_operations vfs_empty_iops;
++
++static int VZ_QUOTA_IS_ACTUAL(struct inode *inode)
++{
++ struct vz_quota_master *qmblk;
++
++ qmblk = INODE_QLNK(inode)->qmblk;
++ if (qmblk == VZ_QUOTA_BAD)
++ return 1;
++ if (qmblk == __VZ_QUOTA_EMPTY)
++ return 0;
++ if (qmblk->dq_flags & VZDQ_NOACT)
++ /* not actual (invalidated) qmblk */
++ return 0;
++ return 1;
++}
++
++static inline int vzquota_qlnk_is_empty(struct vz_quota_ilink *qlnk)
++{
++ return qlnk->qmblk == __VZ_QUOTA_EMPTY;
++}
++
++static inline void vzquota_qlnk_set_empty(struct vz_quota_ilink *qlnk)
++{
++ qlnk->qmblk = __VZ_QUOTA_EMPTY;
++ qlnk->origin = VZ_QUOTAO_SETE;
++}
++
++void vzquota_qlnk_init(struct vz_quota_ilink *qlnk)
++{
++ memset(qlnk, 0, sizeof(*qlnk));
++ INIT_LIST_HEAD(&qlnk->list);
++ vzquota_qlnk_set_empty(qlnk);
++ qlnk->origin = VZ_QUOTAO_INIT;
++}
++
++void vzquota_qlnk_destroy(struct vz_quota_ilink *qlnk)
++{
++ might_sleep();
++ if (vzquota_qlnk_is_empty(qlnk))
++ return;
++#if defined(CONFIG_VZ_QUOTA_UGID)
++ if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD) {
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid *quid, *qgid;
++ qmblk = qlnk->qmblk;
++ quid = qlnk->qugid[USRQUOTA];
++ qgid = qlnk->qugid[GRPQUOTA];
++ if (quid != NULL || qgid != NULL) {
++ down(&qmblk->dq_sem);
++ if (qgid != NULL)
++ vzquota_put_ugid(qmblk, qgid);
++ if (quid != NULL)
++ vzquota_put_ugid(qmblk, quid);
++ up(&qmblk->dq_sem);
++ }
++ }
++#endif
++ if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qlnk->qmblk);
++ qlnk->origin = VZ_QUOTAO_DESTR;
++}
++
++/**
++ * vzquota_qlnk_swap - swap inode's and temporary vz_quota_ilink contents
++ * @qlt: temporary
++ * @qli: inode's
++ *
++ * Locking is provided by the caller (depending on the context).
++ * After swap, @qli is inserted into the corresponding dq_ilink_list,
++ * @qlt list is reinitialized.
++ */
++static void vzquota_qlnk_swap(struct vz_quota_ilink *qlt,
++ struct vz_quota_ilink *qli)
++{
++ struct vz_quota_master *qb;
++ struct vz_quota_ugid *qu;
++ int i;
++
++ qb = qlt->qmblk;
++ qlt->qmblk = qli->qmblk;
++ qli->qmblk = qb;
++ list_del_init(&qli->list);
++ if (qb != __VZ_QUOTA_EMPTY && qb != VZ_QUOTA_BAD)
++ list_add(&qli->list, &qb->dq_ilink_list);
++ INIT_LIST_HEAD(&qlt->list);
++ qli->origin = VZ_QUOTAO_SWAP;
++
++ for (i = 0; i < MAXQUOTAS; i++) {
++ qu = qlt->qugid[i];
++ qlt->qugid[i] = qli->qugid[i];
++ qli->qugid[i] = qu;
++ }
++}
++
++/**
++ * vzquota_qlnk_reinit_locked - destroy qlnk content, called under locks
++ *
++ * Called under dcache_lock and inode_qmblk locks.
++ * Returns 1 if locks were dropped inside, 0 if atomic.
++ */
++static int vzquota_qlnk_reinit_locked(struct vz_quota_ilink *qlnk,
++ struct inode *inode)
++{
++ if (vzquota_qlnk_is_empty(qlnk))
++ return 0;
++ if (qlnk->qmblk == VZ_QUOTA_BAD) {
++ vzquota_qlnk_set_empty(qlnk);
++ return 0;
++ }
++ spin_unlock(&dcache_lock);
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(qlnk);
++ vzquota_qlnk_init(qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ spin_lock(&dcache_lock);
++ return 1;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_qlnk_reinit_attr - destroy and reinit qlnk content
++ *
++ * Similar to vzquota_qlnk_reinit_locked, called under different locks.
++ */
++static int vzquota_qlnk_reinit_attr(struct vz_quota_ilink *qlnk,
++ struct inode *inode,
++ struct vz_quota_master *qmblk)
++{
++ if (vzquota_qlnk_is_empty(qlnk))
++ return 0;
++ /* may be optimized if qlnk->qugid all NULLs */
++ qmblk_data_write_unlock(qmblk);
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(qlnk);
++ vzquota_qlnk_init(qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ qmblk_data_write_lock(qmblk);
++ return 1;
++}
++#endif
++
++/**
++ * vzquota_qlnk_fill - fill vz_quota_ilink content
++ * @qlnk: vz_quota_ilink to fill
++ * @inode: inode for which @qlnk is filled (i_sb, i_uid, i_gid)
++ * @qmblk: qmblk to which this @qlnk will belong
++ *
++ * Called under dcache_lock and inode_qmblk locks.
++ * Returns 1 if locks were dropped inside, 0 if atomic.
++ * @qlnk is expected to be empty.
++ */
++static int vzquota_qlnk_fill(struct vz_quota_ilink *qlnk,
++ struct inode *inode,
++ struct vz_quota_master *qmblk)
++{
++ if (qmblk != VZ_QUOTA_BAD)
++ qmblk_get(qmblk);
++ qlnk->qmblk = qmblk;
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++ if (qmblk != VZ_QUOTA_BAD &&
++ !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
++ (qmblk->dq_flags & VZDQUG_ON)) {
++ struct vz_quota_ugid *quid, *qgid;
++
++ spin_unlock(&dcache_lock);
++ inode_qmblk_unlock(inode->i_sb);
++
++ down(&qmblk->dq_sem);
++ quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
++ qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
++ up(&qmblk->dq_sem);
++
++ inode_qmblk_lock(inode->i_sb);
++ spin_lock(&dcache_lock);
++ qlnk->qugid[USRQUOTA] = quid;
++ qlnk->qugid[GRPQUOTA] = qgid;
++ return 1;
++ }
++#endif
++
++ return 0;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_qlnk_fill_attr - fill vz_quota_ilink content for uid, gid
++ *
++ * This function is a helper for vzquota_transfer, and differs from
++ * vzquota_qlnk_fill only by locking.
++ */
++static int vzquota_qlnk_fill_attr(struct vz_quota_ilink *qlnk,
++ struct inode *inode,
++ struct iattr *iattr,
++ int mask,
++ struct vz_quota_master *qmblk)
++{
++ qmblk_get(qmblk);
++ qlnk->qmblk = qmblk;
++
++ if (mask) {
++ struct vz_quota_ugid *quid, *qgid;
++
++ quid = qgid = NULL; /* to make gcc happy */
++ if (!(mask & (1 << USRQUOTA)))
++ quid = vzquota_get_ugid(INODE_QLNK(inode)->
++ qugid[USRQUOTA]);
++ if (!(mask & (1 << GRPQUOTA)))
++ qgid = vzquota_get_ugid(INODE_QLNK(inode)->
++ qugid[GRPQUOTA]);
++
++ qmblk_data_write_unlock(qmblk);
++ inode_qmblk_unlock(inode->i_sb);
++
++ down(&qmblk->dq_sem);
++ if (mask & (1 << USRQUOTA))
++ quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
++ USRQUOTA, 0);
++ if (mask & (1 << GRPQUOTA))
++ qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
++ GRPQUOTA, 0);
++ up(&qmblk->dq_sem);
++
++ inode_qmblk_lock(inode->i_sb);
++ qmblk_data_write_lock(qmblk);
++ qlnk->qugid[USRQUOTA] = quid;
++ qlnk->qugid[GRPQUOTA] = qgid;
++ return 1;
++ }
++
++ return 0;
++}
++#endif
++
++/**
++ * __vzquota_inode_init - make sure inode's qlnk is initialized
++ *
++ * May be called if qlnk is already initialized, detects this situation itself.
++ * Called under inode_qmblk_lock.
++ */
++static void __vzquota_inode_init(struct inode *inode, unsigned char origin)
++{
++ if (inode->i_dquot[USRQUOTA] == NODQUOT) {
++ vzquota_qlnk_init(INODE_QLNK(inode));
++ inode->i_dquot[USRQUOTA] = (void *)~(unsigned long)NODQUOT;
++ }
++ INODE_QLNK(inode)->origin = origin;
++}
++
++/**
++ * vzquota_inode_drop - destroy VZ quota information in the inode
++ *
++ * Inode must not be externally accessible or dirty.
++ */
++static void vzquota_inode_drop(struct inode *inode)
++{
++ struct vz_quota_ilink qlnk;
++
++ vzquota_qlnk_init(&qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ vzquota_qlnk_swap(&qlnk, INODE_QLNK(inode));
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_DRCAL;
++ inode->i_dquot[USRQUOTA] = NODQUOT;
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(&qlnk);
++}
++
++/**
++ * vzquota_inode_qmblk_set - initialize inode's qlnk
++ * @inode: inode to be initialized
++ * @qmblk: quota master block to which this inode should belong (may be BAD)
++ * @qlnk: placeholder to store data to resolve locking issues
++ *
++ * Returns 1 if locks were dropped and rechecks possibly needed, 0 otherwise.
++ * Called under dcache_lock and inode_qmblk locks.
++ * @qlnk will be destroyed in the caller chain.
++ *
++ * It is not mandatory to restart parent checks since quota on/off currently
++ * shrinks dentry tree and checks that there are not outside references.
++ * But if at some time that shink is removed, restarts will be required.
++ * Additionally, the restarts prevent inconsistencies if the dentry tree
++ * changes (inode is moved). This is not a big deal, but anyway...
++ */
++static int vzquota_inode_qmblk_set(struct inode *inode,
++ struct vz_quota_master *qmblk,
++ struct vz_quota_ilink *qlnk)
++{
++ if (qmblk == NULL) {
++ printk(KERN_ERR "VZDQ: NULL in set, "
++ "orig %u, dev %s, inode %lu, fs %s\n",
++ INODE_QLNK(inode)->origin,
++ inode->i_sb->s_id, inode->i_ino,
++ inode->i_sb->s_type->name);
++ printk(KERN_ERR "current %d (%s), VE %d\n",
++ current->pid, current->comm,
++ VEID(get_exec_env()));
++ dump_stack();
++ qmblk = VZ_QUOTA_BAD;
++ }
++ while (1) {
++ if (vzquota_qlnk_is_empty(qlnk) &&
++ vzquota_qlnk_fill(qlnk, inode, qmblk))
++ return 1;
++ if (qlnk->qmblk == qmblk)
++ break;
++ if (vzquota_qlnk_reinit_locked(qlnk, inode))
++ return 1;
++ }
++ vzquota_qlnk_swap(qlnk, INODE_QLNK(inode));
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_QSET;
++ return 0;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * vzquota_inode_qmblk (inode -> qmblk lookup) parts
++ *
++ * --------------------------------------------------------------------- */
++
++static int vzquota_dparents_check_attach(struct inode *inode)
++{
++ if (!list_empty(&inode->i_dentry))
++ return 0;
++ printk(KERN_ERR "VZDQ: no parent for "
++ "dev %s, inode %lu, fs %s\n",
++ inode->i_sb->s_id,
++ inode->i_ino,
++ inode->i_sb->s_type->name);
++ return -1;
++}
++
++static struct inode *vzquota_dparents_check_actual(struct inode *inode)
++{
++ struct dentry *de;
++
++ list_for_each_entry(de, &inode->i_dentry, d_alias) {
++ if (de->d_parent == de) /* detached dentry, perhaps */
++ continue;
++ /* first access to parent, make sure its qlnk initialized */
++ __vzquota_inode_init(de->d_parent->d_inode, VZ_QUOTAO_ACT);
++ if (!VZ_QUOTA_IS_ACTUAL(de->d_parent->d_inode))
++ return de->d_parent->d_inode;
++ }
++ return NULL;
++}
++
++static struct vz_quota_master *vzquota_dparents_check_same(struct inode *inode)
++{
++ struct dentry *de;
++ struct vz_quota_master *qmblk;
++
++ qmblk = NULL;
++ list_for_each_entry(de, &inode->i_dentry, d_alias) {
++ if (de->d_parent == de) /* detached dentry, perhaps */
++ continue;
++ if (qmblk == NULL) {
++ qmblk = INODE_QLNK(de->d_parent->d_inode)->qmblk;
++ continue;
++ }
++ if (INODE_QLNK(de->d_parent->d_inode)->qmblk != qmblk) {
++ printk(KERN_WARNING "VZDQ: multiple quotas for "
++ "dev %s, inode %lu, fs %s\n",
++ inode->i_sb->s_id,
++ inode->i_ino,
++ inode->i_sb->s_type->name);
++ qmblk = VZ_QUOTA_BAD;
++ break;
++ }
++ }
++ if (qmblk == NULL) {
++ printk(KERN_WARNING "VZDQ: not attached to tree, "
++ "dev %s, inode %lu, fs %s\n",
++ inode->i_sb->s_id,
++ inode->i_ino,
++ inode->i_sb->s_type->name);
++ qmblk = VZ_QUOTA_BAD;
++ }
++ return qmblk;
++}
++
++static void vzquota_dbranch_actualize(struct inode *inode,
++ struct inode *refinode)
++{
++ struct inode *pinode;
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ilink qlnk;
++
++ vzquota_qlnk_init(&qlnk);
++
++start:
++ if (inode == inode->i_sb->s_root->d_inode) {
++ /* filesystem root */
++ atomic_inc(&inode->i_count);
++ do {
++ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++ } while (vzquota_inode_qmblk_set(inode, qmblk, &qlnk));
++ goto out;
++ }
++
++ if (!vzquota_dparents_check_attach(inode)) {
++ pinode = vzquota_dparents_check_actual(inode);
++ if (pinode != NULL) {
++ inode = pinode;
++ goto start;
++ }
++ }
++
++ atomic_inc(&inode->i_count);
++ while (1) {
++ if (VZ_QUOTA_IS_ACTUAL(inode)) /* actualized without us */
++ break;
++ /*
++ * Need to check parents again if we have slept inside
++ * vzquota_inode_qmblk_set() in the loop.
++ * If the state of parents is different, just return and repeat
++ * the actualizing process again from the inode passed to
++ * vzquota_inode_qmblk_recalc().
++ */
++ if (!vzquota_dparents_check_attach(inode)) {
++ if (vzquota_dparents_check_actual(inode) != NULL)
++ break;
++ qmblk = vzquota_dparents_check_same(inode);
++ } else
++ qmblk = VZ_QUOTA_BAD;
++ if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk)){/* success */
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_ACT;
++ break;
++ }
++ }
++
++out:
++ spin_unlock(&dcache_lock);
++ inode_qmblk_unlock(refinode->i_sb);
++ vzquota_qlnk_destroy(&qlnk);
++ iput(inode);
++ inode_qmblk_lock(refinode->i_sb);
++ spin_lock(&dcache_lock);
++}
++
++static void vzquota_dtree_qmblk_recalc(struct inode *inode,
++ struct vz_quota_ilink *qlnk)
++{
++ struct inode *pinode;
++ struct vz_quota_master *qmblk;
++
++ if (inode == inode->i_sb->s_root->d_inode) {
++ /* filesystem root */
++ do {
++ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++ } while (vzquota_inode_qmblk_set(inode, qmblk, qlnk));
++ return;
++ }
++
++start:
++ if (VZ_QUOTA_IS_ACTUAL(inode))
++ return;
++ /*
++ * Here qmblk is (re-)initialized for all ancestors.
++ * This is not a very efficient procedure, but it guarantees that
++ * the quota tree is consistent (that is, the inode doesn't have two
++ * ancestors with different qmblk).
++ */
++ if (!vzquota_dparents_check_attach(inode)) {
++ pinode = vzquota_dparents_check_actual(inode);
++ if (pinode != NULL) {
++ vzquota_dbranch_actualize(pinode, inode);
++ goto start;
++ }
++ qmblk = vzquota_dparents_check_same(inode);
++ } else
++ qmblk = VZ_QUOTA_BAD;
++
++ if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
++ goto start;
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_DTREE;
++}
++
++static void vzquota_det_qmblk_recalc(struct inode *inode,
++ struct vz_quota_ilink *qlnk)
++{
++ struct inode *parent;
++ struct vz_quota_master *qmblk;
++ char *msg;
++ int cnt;
++ time_t timeout;
++
++ cnt = 0;
++ parent = NULL;
++start:
++ /*
++ * qmblk of detached inodes shouldn't be considered as not actual.
++ * They are not in any dentry tree, so quota on/off shouldn't affect
++ * them.
++ */
++ if (!vzquota_qlnk_is_empty(INODE_QLNK(inode)))
++ return;
++
++ timeout = 3;
++ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++ msg = "detached inode not in creation";
++ if (inode->i_op != VZ_QUOTA_EMPTY_IOPS)
++ goto fail;
++ qmblk = VZ_QUOTA_BAD;
++ msg = "unexpected creation context";
++ if (!vzquota_cur_qmblk_check())
++ goto fail;
++ timeout = 0;
++ parent = vzquota_cur_qmblk_fetch();
++ msg = "uninitialized parent";
++ if (vzquota_qlnk_is_empty(INODE_QLNK(parent)))
++ goto fail;
++ msg = "parent not in tree";
++ if (list_empty(&parent->i_dentry))
++ goto fail;
++ msg = "parent has 0 refcount";
++ if (!atomic_read(&parent->i_count))
++ goto fail;
++ msg = "parent has different sb";
++ if (parent->i_sb != inode->i_sb)
++ goto fail;
++ if (!VZ_QUOTA_IS_ACTUAL(parent)) {
++ vzquota_dbranch_actualize(parent, inode);
++ goto start;
++ }
++
++ qmblk = INODE_QLNK(parent)->qmblk;
++set:
++ if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
++ goto start;
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_DET;
++ return;
++
++fail:
++ {
++ struct timeval tv, tvo;
++ do_gettimeofday(&tv);
++ memcpy(&tvo, __VZ_QUOTA_TSTAMP(inode->i_sb), sizeof(tvo));
++ tv.tv_sec -= tvo.tv_sec;
++ if (tv.tv_usec < tvo.tv_usec) {
++ tv.tv_sec--;
++ tv.tv_usec += USEC_PER_SEC - tvo.tv_usec;
++ } else
++ tv.tv_usec -= tvo.tv_usec;
++ if (tv.tv_sec < timeout)
++ goto set;
++ printk(KERN_ERR "VZDQ: %s, orig %u,"
++ " dev %s, inode %lu, fs %s\n",
++ msg, INODE_QLNK(inode)->origin,
++ inode->i_sb->s_id, inode->i_ino,
++ inode->i_sb->s_type->name);
++ if (!cnt++) {
++ printk(KERN_ERR "current %d (%s), VE %d,"
++ " time %ld.%06ld\n",
++ current->pid, current->comm,
++ VEID(get_exec_env()),
++ tv.tv_sec, tv.tv_usec);
++ dump_stack();
++ }
++ if (parent != NULL)
++ printk(KERN_ERR "VZDQ: parent of %lu is %lu\n",
++ inode->i_ino, parent->i_ino);
++ }
++ goto set;
++}
++
++static void vzquota_inode_qmblk_recalc(struct inode *inode,
++ struct vz_quota_ilink *qlnk)
++{
++ spin_lock(&dcache_lock);
++ if (!list_empty(&inode->i_dentry))
++ vzquota_dtree_qmblk_recalc(inode, qlnk);
++ else
++ vzquota_det_qmblk_recalc(inode, qlnk);
++ spin_unlock(&dcache_lock);
++}
++
++/**
++ * vzquota_inode_qmblk - obtain inode's qmblk
++ *
++ * Returns qmblk with refcounter taken, %NULL if not under
++ * VZ quota or %VZ_QUOTA_BAD.
++ *
++ * FIXME: This function should be removed when vzquota_find_qmblk /
++ * get_quota_root / vzquota_dstat code is cleaned up.
++ */
++struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ilink qlnk;
++
++ might_sleep();
++
++ if (inode->i_sb->dq_op != &vz_quota_operations)
++ return NULL;
++#if defined(VZ_QUOTA_UNLOAD)
++#error Make sure qmblk does not disappear
++#endif
++
++ vzquota_qlnk_init(&qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++ if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++ !VZ_QUOTA_IS_ACTUAL(inode))
++ vzquota_inode_qmblk_recalc(inode, &qlnk);
++
++ qmblk = INODE_QLNK(inode)->qmblk;
++ if (qmblk != VZ_QUOTA_BAD) {
++ if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb))
++ qmblk_get(qmblk);
++ else
++ qmblk = NULL;
++ }
++
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(&qlnk);
++ return qmblk;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Calls from quota operations
++ *
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_inode_init_call - call from DQUOT_INIT
++ */
++void vzquota_inode_init_call(struct inode *inode)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++
++ /* initializes inode's quota inside */
++ qmblk = vzquota_inode_data(inode, &data);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ vzquota_data_unlock(inode, &data);
++
++ /*
++ * The check is needed for repeated new_inode() calls from a single
++ * ext3 call like create or mkdir in case of -ENOSPC.
++ */
++ spin_lock(&dcache_lock);
++ if (!list_empty(&inode->i_dentry))
++ vzquota_cur_qmblk_set(inode);
++ spin_unlock(&dcache_lock);
++}
++
++/**
++ * vzquota_inode_drop_call - call from DQUOT_DROP
++ */
++void vzquota_inode_drop_call(struct inode *inode)
++{
++ vzquota_inode_drop(inode);
++}
++
++/**
++ * vzquota_inode_data - initialize (if nec.) and lock inode quota ptrs
++ * @inode: the inode
++ * @data: storage space
++ *
++ * Returns: qmblk is NULL or VZ_QUOTA_BAD or actualized qmblk.
++ * On return if qmblk is neither NULL nor VZ_QUOTA_BAD:
++ * qmblk in inode's qlnk is the same as returned,
++ * ugid pointers inside inode's qlnk are valid,
++ * some locks are taken (and should be released by vzquota_data_unlock).
++ * If qmblk is NULL or VZ_QUOTA_BAD, locks are NOT taken.
++ */
++struct vz_quota_master *vzquota_inode_data(struct inode *inode,
++ struct vz_quota_datast *data)
++{
++ struct vz_quota_master *qmblk;
++
++ might_sleep();
++
++ vzquota_qlnk_init(&data->qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++ if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++ !VZ_QUOTA_IS_ACTUAL(inode))
++ vzquota_inode_qmblk_recalc(inode, &data->qlnk);
++
++ qmblk = INODE_QLNK(inode)->qmblk;
++ if (qmblk != VZ_QUOTA_BAD) {
++ if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb)) {
++ /*
++ * Note that in the current implementation,
++ * inode_qmblk_lock can theoretically be dropped here.
++ * This place is serialized with quota_off because
++ * quota_off fails when there are extra dentry
++ * references and syncs inodes before removing quota
++ * information from them.
++ * However, quota usage information should stop being
++ * updated immediately after vzquota_off.
++ */
++ qmblk_data_write_lock(qmblk);
++ } else {
++ inode_qmblk_unlock(inode->i_sb);
++ qmblk = NULL;
++ }
++ } else {
++ inode_qmblk_unlock(inode->i_sb);
++ }
++ return qmblk;
++}
++
++void vzquota_data_unlock(struct inode *inode,
++ struct vz_quota_datast *data)
++{
++ qmblk_data_write_unlock(INODE_QLNK(inode)->qmblk);
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(&data->qlnk);
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_inode_transfer_call - call from vzquota_transfer
++ */
++int vzquota_inode_transfer_call(struct inode *inode, struct iattr *iattr)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++ struct vz_quota_ilink qlnew;
++ int mask;
++ int ret;
++
++ might_sleep();
++ vzquota_qlnk_init(&qlnew);
++start:
++ qmblk = vzquota_inode_data(inode, &data);
++ ret = NO_QUOTA;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out_destr;
++ ret = QUOTA_OK;
++ if (qmblk == NULL)
++ goto out_destr;
++ qmblk_get(qmblk);
++
++ ret = QUOTA_OK;
++ if (!(qmblk->dq_flags & VZDQUG_ON))
++ /* no ugid quotas */
++ goto out_unlock;
++
++ mask = 0;
++ if ((iattr->ia_valid & ATTR_UID) && iattr->ia_uid != inode->i_uid)
++ mask |= 1 << USRQUOTA;
++ if ((iattr->ia_valid & ATTR_GID) && iattr->ia_gid != inode->i_gid)
++ mask |= 1 << GRPQUOTA;
++ while (1) {
++ if (vzquota_qlnk_is_empty(&qlnew) &&
++ vzquota_qlnk_fill_attr(&qlnew, inode, iattr, mask, qmblk))
++ break;
++ if (qlnew.qmblk == INODE_QLNK(inode)->qmblk &&
++ qlnew.qmblk == qmblk)
++ goto finish;
++ if (vzquota_qlnk_reinit_attr(&qlnew, inode, qmblk))
++ break;
++ }
++
++ /* prepare for restart */
++ vzquota_data_unlock(inode, &data);
++ qmblk_put(qmblk);
++ goto start;
++
++finish:
++ /* all references obtained successfully */
++ ret = vzquota_transfer_usage(inode, mask, &qlnew);
++ if (!ret) {
++ vzquota_qlnk_swap(&qlnew, INODE_QLNK(inode));
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_TRANS;
++ }
++out_unlock:
++ vzquota_data_unlock(inode, &data);
++ qmblk_put(qmblk);
++out_destr:
++ vzquota_qlnk_destroy(&qlnew);
++ return ret;
++}
++#endif
++
++int vzquota_rename_check(struct inode *inode,
++ struct inode *old_dir, struct inode *new_dir)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ilink qlnk1, qlnk2;
++ int c, ret;
++
++ if (inode->i_sb != old_dir->i_sb || inode->i_sb != new_dir->i_sb)
++ return -1;
++
++ might_sleep();
++
++ vzquota_qlnk_init(&qlnk1);
++ vzquota_qlnk_init(&qlnk2);
++ inode_qmblk_lock(inode->i_sb);
++ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++ __vzquota_inode_init(old_dir, VZ_QUOTAO_INICAL);
++ __vzquota_inode_init(new_dir, VZ_QUOTAO_INICAL);
++
++ do {
++ c = 0;
++ if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++ !VZ_QUOTA_IS_ACTUAL(inode)) {
++ vzquota_inode_qmblk_recalc(inode, &qlnk1);
++ c++;
++ }
++ if (vzquota_qlnk_is_empty(INODE_QLNK(new_dir)) ||
++ !VZ_QUOTA_IS_ACTUAL(new_dir)) {
++ vzquota_inode_qmblk_recalc(new_dir, &qlnk2);
++ c++;
++ }
++ } while (c);
++
++ ret = 0;
++ qmblk = INODE_QLNK(inode)->qmblk;
++ if (qmblk != INODE_QLNK(new_dir)->qmblk) {
++ ret = -1;
++ if (qmblk != VZ_QUOTA_BAD &&
++ !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
++ qmblk->dq_root_dentry->d_inode == inode &&
++ VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(new_dir)->qmblk,
++ inode->i_sb) &&
++ VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(old_dir)->qmblk,
++ inode->i_sb))
++ /* quota root rename is allowed */
++ ret = 0;
++ }
++
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(&qlnk2);
++ vzquota_qlnk_destroy(&qlnk1);
++ return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * qmblk-related parts of on/off operations
++ *
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_check_dtree - check dentry tree if quota on/off is allowed
++ *
++ * This function doesn't allow quota to be turned on/off if some dentries in
++ * the tree have external references.
++ * In addition to technical reasons, it enforces user-space correctness:
++ * current usage (taken from or reported to the user space) can be meaningful
++ * and accurate only if the tree is not being modified.
++ * Side effect: additional vfsmount structures referencing the tree (bind
++ * mounts of tree nodes to some other places) are not allowed at on/off time.
++ */
++int vzquota_check_dtree(struct vz_quota_master *qmblk, int off)
++{
++ struct dentry *dentry;
++ int err, count;
++
++ err = -EBUSY;
++ dentry = qmblk->dq_root_dentry;
++
++ if (d_unhashed(dentry))
++ goto unhashed;
++
++ /* attempt to shrink */
++ if (!list_empty(&dentry->d_subdirs)) {
++ spin_unlock(&dcache_lock);
++ inode_qmblk_unlock(dentry->d_sb);
++ shrink_dcache_parent(dentry);
++ inode_qmblk_lock(dentry->d_sb);
++ spin_lock(&dcache_lock);
++ if (!list_empty(&dentry->d_subdirs))
++ goto out;
++
++ count = 1;
++ if (dentry == dentry->d_sb->s_root)
++ count += 2; /* sb and mnt refs */
++ if (atomic_read(&dentry->d_count) < count) {
++ printk(KERN_ERR "%s: too small count %d vs %d.\n",
++ __FUNCTION__,
++ atomic_read(&dentry->d_count), count);
++ goto out;
++ }
++ if (atomic_read(&dentry->d_count) > count)
++ goto out;
++ }
++
++ err = 0;
++out:
++ return err;
++
++unhashed:
++ /*
++ * Quota root is removed.
++ * Allow to turn quota off, but not on.
++ */
++ if (off)
++ err = 0;
++ goto out;
++}
++
++int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
++ struct vz_quota_master *qmblk)
++{
++ struct vz_quota_ilink qlnk;
++ struct vz_quota_master *qold, *qnew;
++ int err;
++
++ might_sleep();
++
++ qold = NULL;
++ qnew = vzquota_alloc_fake();
++ if (qnew == NULL)
++ return -ENOMEM;
++
++ vzquota_qlnk_init(&qlnk);
++ inode_qmblk_lock(sb);
++ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++ spin_lock(&dcache_lock);
++ while (1) {
++ err = vzquota_check_dtree(qmblk, 0);
++ if (err)
++ break;
++ if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk))
++ break;
++ }
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_ON;
++ spin_unlock(&dcache_lock);
++
++ if (!err) {
++ qold = __VZ_QUOTA_NOQUOTA(sb);
++ qold->dq_flags |= VZDQ_NOACT;
++ __VZ_QUOTA_NOQUOTA(sb) = qnew;
++ }
++
++ inode_qmblk_unlock(sb);
++ vzquota_qlnk_destroy(&qlnk);
++ if (qold != NULL)
++ qmblk_put(qold);
++
++ return err;
++}
++
++int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk)
++{
++ int ret;
++
++ ret = 0;
++ inode_qmblk_lock(sb);
++
++ spin_lock(&dcache_lock);
++ if (vzquota_check_dtree(qmblk, 1))
++ ret = -EBUSY;
++ spin_unlock(&dcache_lock);
++
++ if (!ret)
++ qmblk->dq_flags |= VZDQ_NOACT | VZDQ_NOQUOT;
++ inode_qmblk_unlock(sb);
++ return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * External interfaces
++ *
++ * ---------------------------------------------------------------------*/
++
++static int vzquota_ioctl(struct inode *ino, struct file *file,
++ unsigned int cmd, unsigned long arg)
++{
++ int err;
++ struct vzctl_quotactl qb;
++ struct vzctl_quotaugidctl qub;
++
++ switch (cmd) {
++ case VZCTL_QUOTA_CTL:
++ err = -ENOTTY;
++ break;
++ case VZCTL_QUOTA_NEW_CTL:
++ err = -EFAULT;
++ if (copy_from_user(&qb, (void *)arg, sizeof(qb)))
++ break;
++ err = do_vzquotactl(qb.cmd, qb.quota_id,
++ qb.qstat, qb.ve_root);
++ break;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ case VZCTL_QUOTA_UGID_CTL:
++ err = -EFAULT;
++ if (copy_from_user(&qub, (void *)arg, sizeof(qub)))
++ break;
++ err = do_vzquotaugidctl(&qub);
++ break;
++#endif
++ default:
++ err = -ENOTTY;
++ }
++ might_sleep(); /* debug */
++ return err;
++}
++
++static struct vzioctlinfo vzdqcalls = {
++ .type = VZDQCTLTYPE,
++ .func = vzquota_ioctl,
++ .owner = THIS_MODULE,
++};
++
++/**
++ * vzquota_dstat - get quota usage info for virtual superblock
++ */
++static int vzquota_dstat(struct super_block *super, struct dq_stat *qstat)
++{
++ struct vz_quota_master *qmblk;
++
++ qmblk = vzquota_find_qmblk(super);
++ if (qmblk == NULL)
++ return -ENOENT;
++ if (qmblk == VZ_QUOTA_BAD) {
++ memset(qstat, 0, sizeof(*qstat));
++ return 0;
++ }
++
++ qmblk_data_read_lock(qmblk);
++ memcpy(qstat, &qmblk->dq_stat, sizeof(*qstat));
++ qmblk_data_read_unlock(qmblk);
++ qmblk_put(qmblk);
++ return 0;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Init/exit helpers
++ *
++ * ---------------------------------------------------------------------*/
++
++static int vzquota_cache_init(void)
++{
++ int i;
++
++ vzquota_cachep = kmem_cache_create("vz_quota_master",
++ sizeof(struct vz_quota_master),
++ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++ if (vzquota_cachep == NULL) {
++ printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
++ goto nomem2;
++ }
++ for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
++ INIT_LIST_HEAD(&vzquota_hash_table[i]);
++
++ return 0;
++
++nomem2:
++ return -ENOMEM;
++}
++
++static void vzquota_cache_release(void)
++{
++ int i;
++
++ /* sanity check */
++ for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
++ if (!list_empty(&vzquota_hash_table[i]))
++ BUG();
++
++ /* release caches */
++ if (kmem_cache_destroy(vzquota_cachep))
++ printk(KERN_ERR
++ "VZQUOTA: vz_quota_master kmem_cache_destroy failed\n");
++ vzquota_cachep = NULL;
++}
++
++static int quota_notifier_call(struct vnotifier_block *self,
++ unsigned long n, void *data, int err)
++{
++ struct virt_info_quota *viq;
++ struct super_block *sb;
++
++ viq = (struct virt_info_quota *)data;
++ switch (n) {
++ case VIRTINFO_QUOTA_ON:
++ err = NOTIFY_BAD;
++ if (!try_module_get(THIS_MODULE))
++ break;
++ sb = viq->super;
++ memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++ err = NOTIFY_OK;
++ break;
++ case VIRTINFO_QUOTA_OFF:
++ module_put(THIS_MODULE);
++ err = NOTIFY_OK;
++ break;
++ case VIRTINFO_QUOTA_GETSTAT:
++ err = NOTIFY_BAD;
++ if (vzquota_dstat(viq->super, viq->qstat))
++ break;
++ err = NOTIFY_OK;
++ break;
++ }
++ return err;
++}
++
++struct vnotifier_block quota_notifier_block = {
++ .notifier_call = quota_notifier_call,
++ .priority = INT_MAX,
++};
++
++/* ----------------------------------------------------------------------
++ *
++ * Init/exit procedures
++ *
++ * ---------------------------------------------------------------------*/
++
++static int __init vzquota_init(void)
++{
++ int err;
++
++ if ((err = vzquota_cache_init()) != 0)
++ goto out_cache;
++
++ if ((err = vzquota_proc_init()) != 0)
++ goto out_proc;
++
++#ifdef CONFIG_VZ_QUOTA_UGID
++ if ((err = vzquota_ugid_init()) != 0)
++ goto out_ugid;
++#endif
++
++ init_MUTEX(&vz_quota_sem);
++ vzioctl_register(&vzdqcalls);
++ virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
++#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
++ vzaquota_init();
++#endif
++
++ return 0;
++
++#ifdef CONFIG_VZ_QUOTA_UGID
++out_ugid:
++ vzquota_proc_release();
++#endif
++out_proc:
++ vzquota_cache_release();
++out_cache:
++ return err;
++}
++
++#if defined(VZ_QUOTA_UNLOAD)
++static void __exit vzquota_release(void)
++{
++ virtinfo_notifier_unregister(VITYPE_QUOTA, &quota_notifier_block);
++ vzioctl_unregister(&vzdqcalls);
++#ifdef CONFIG_VZ_QUOTA_UGID
++#ifdef CONFIG_PROC_FS
++ vzaquota_fini();
++#endif
++ vzquota_ugid_release();
++#endif
++ vzquota_proc_release();
++ vzquota_cache_release();
++}
++#endif
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Disk Quota");
++MODULE_LICENSE("GPL v2");
++
++module_init(vzquota_init)
++#if defined(VZ_QUOTA_UNLOAD)
++module_exit(vzquota_release)
++#endif
+diff -uprN linux-2.6.15.orig/include/asm-i386/elf.h linux-2.6.15-ve025stab014/include/asm-i386/elf.h
+--- linux-2.6.15.orig/include/asm-i386/elf.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-i386/elf.h 2006-01-27 14:48:08.000000000 +0300
+@@ -108,7 +108,7 @@ typedef struct user_fxsr_struct elf_fpxr
+ For the moment, we have only optimizations for the Intel generations,
+ but that could change... */
+
+-#define ELF_PLATFORM (system_utsname.machine)
++#define ELF_PLATFORM (ve_utsname.machine)
+
+ #ifdef __KERNEL__
+ #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
+diff -uprN linux-2.6.15.orig/include/asm-i386/mman.h linux-2.6.15-ve025stab014/include/asm-i386/mman.h
+--- linux-2.6.15.orig/include/asm-i386/mman.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-i386/mman.h 2006-01-27 14:48:05.000000000 +0300
+@@ -22,6 +22,7 @@
+ #define MAP_NORESERVE 0x4000 /* don't check for reservations */
+ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
+ #define MAP_NONBLOCK 0x10000 /* do not block on IO */
++#define MAP_EXECPRIO 0x20000 /* do soft ubc charge */
+
+ #define MS_ASYNC 1 /* sync memory asynchronously */
+ #define MS_INVALIDATE 2 /* invalidate the caches */
+diff -uprN linux-2.6.15.orig/include/asm-i386/timex.h linux-2.6.15-ve025stab014/include/asm-i386/timex.h
+--- linux-2.6.15.orig/include/asm-i386/timex.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-i386/timex.h 2006-01-27 14:48:08.000000000 +0300
+@@ -36,13 +36,17 @@ static inline cycles_t get_cycles (void)
+ {
+ unsigned long long ret=0;
+
+-#ifndef CONFIG_X86_TSC
+- if (!cpu_has_tsc)
+- return 0;
+-#endif
+-
+ #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+ rdtscll(ret);
++#elif defined(CONFIG_VE)
++ /*
++ * get_cycles is used in the following calculations:
++ * - VPS idle and iowait times in kernel/shced.h
++ * - task's sleep time to be shown with SyRq-t
++ * - kstat latencies in linux/vzstat.h
++ * - sched latency via wakeup_stamp in linux/ve_task.h
++ */
++#warning "some of VPS statistics won't be correct without get_cycles() (kstat_lat, ve_idle, etc)"
+ #endif
+ return ret;
+ }
+diff -uprN linux-2.6.15.orig/include/asm-i386/unistd.h linux-2.6.15-ve025stab014/include/asm-i386/unistd.h
+--- linux-2.6.15.orig/include/asm-i386/unistd.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-i386/unistd.h 2006-01-27 14:48:05.000000000 +0300
+@@ -299,8 +299,11 @@
+ #define __NR_inotify_init 291
+ #define __NR_inotify_add_watch 292
+ #define __NR_inotify_rm_watch 293
+-
+-#define NR_syscalls 294
++#define __NR_getluid 510
++#define __NR_setluid 511
++#define __NR_setublimit 512
++#define __NR_ubstat 513
++#define NR_syscalls 513
+
+ /*
+ * user-visible error numbers are in the range -1 - -128: see
+diff -uprN linux-2.6.15.orig/include/asm-ia64/mman.h linux-2.6.15-ve025stab014/include/asm-ia64/mman.h
+--- linux-2.6.15.orig/include/asm-ia64/mman.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-ia64/mman.h 2006-01-27 14:48:05.000000000 +0300
+@@ -30,6 +30,7 @@
+ #define MAP_NORESERVE 0x04000 /* don't check for reservations */
+ #define MAP_POPULATE 0x08000 /* populate (prefault) pagetables */
+ #define MAP_NONBLOCK 0x10000 /* do not block on IO */
++#define MAP_EXECPRIO 0x20000 /* soft ubc charge */
+
+ #define MS_ASYNC 1 /* sync memory asynchronously */
+ #define MS_INVALIDATE 2 /* invalidate the caches */
+diff -uprN linux-2.6.15.orig/include/asm-ia64/pgalloc.h linux-2.6.15-ve025stab014/include/asm-ia64/pgalloc.h
+--- linux-2.6.15.orig/include/asm-ia64/pgalloc.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-ia64/pgalloc.h 2006-01-27 14:48:05.000000000 +0300
+@@ -20,6 +20,8 @@
+ #include <linux/page-flags.h>
+ #include <linux/threads.h>
+
++#include <ub/ub_mem.h>
++
+ #include <asm/mmu_context.h>
+
+ DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
+@@ -38,7 +40,7 @@ static inline long pgtable_quicklist_tot
+ return ql_size;
+ }
+
+-static inline void *pgtable_quicklist_alloc(void)
++static inline void *pgtable_quicklist_alloc(int charge)
+ {
+ unsigned long *ret = NULL;
+
+@@ -46,13 +48,19 @@ static inline void *pgtable_quicklist_al
+
+ ret = pgtable_quicklist;
+ if (likely(ret != NULL)) {
++ if (ub_page_charge(virt_to_page(ret), 0,
++ charge ? __GFP_UBC|__GFP_SOFT_UBC : 0))
++ goto out;
++
+ pgtable_quicklist = (unsigned long *)(*ret);
+ ret[0] = 0;
+ --pgtable_quicklist_size;
++out:
+ preempt_enable();
+ } else {
+ preempt_enable();
+- ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
++ ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO |
++ (charge ? __GFP_UBC | __GFP_SOFT_UBC : 0));
+ }
+
+ return ret;
+@@ -70,6 +78,7 @@ static inline void pgtable_quicklist_fre
+ #endif
+
+ preempt_disable();
++ ub_page_uncharge(virt_to_page(pgtable_entry), 0);
+ *(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
+ pgtable_quicklist = (unsigned long *)pgtable_entry;
+ ++pgtable_quicklist_size;
+@@ -78,7 +87,7 @@ static inline void pgtable_quicklist_fre
+
+ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+- return pgtable_quicklist_alloc();
++ return pgtable_quicklist_alloc(1);
+ }
+
+ static inline void pgd_free(pgd_t * pgd)
+@@ -95,7 +104,7 @@ pgd_populate(struct mm_struct *mm, pgd_t
+
+ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+- return pgtable_quicklist_alloc();
++ return pgtable_quicklist_alloc(1);
+ }
+
+ static inline void pud_free(pud_t * pud)
+@@ -113,7 +122,7 @@ pud_populate(struct mm_struct *mm, pud_t
+
+ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+- return pgtable_quicklist_alloc();
++ return pgtable_quicklist_alloc(1);
+ }
+
+ static inline void pmd_free(pmd_t * pmd)
+@@ -138,13 +147,13 @@ pmd_populate_kernel(struct mm_struct *mm
+ static inline struct page *pte_alloc_one(struct mm_struct *mm,
+ unsigned long addr)
+ {
+- return virt_to_page(pgtable_quicklist_alloc());
++ return virt_to_page(pgtable_quicklist_alloc(1));
+ }
+
+ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long addr)
+ {
+- return pgtable_quicklist_alloc();
++ return pgtable_quicklist_alloc(0);
+ }
+
+ static inline void pte_free(struct page *pte)
+diff -uprN linux-2.6.15.orig/include/asm-ia64/processor.h linux-2.6.15-ve025stab014/include/asm-ia64/processor.h
+--- linux-2.6.15.orig/include/asm-ia64/processor.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-ia64/processor.h 2006-01-27 14:48:08.000000000 +0300
+@@ -306,7 +306,7 @@ struct thread_struct {
+ regs->loadrs = 0; \
+ regs->r8 = current->mm->dumpable; /* set "don't zap registers" flag */ \
+ regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \
+- if (unlikely(!current->mm->dumpable)) { \
++ if (unlikely(!current->mm->dumpable || !current->mm->vps_dumpable)) { \
+ /* \
+ * Zap scratch regs to avoid leaking bits between processes with different \
+ * uid/privileges. \
+diff -uprN linux-2.6.15.orig/include/asm-ia64/unistd.h linux-2.6.15-ve025stab014/include/asm-ia64/unistd.h
+--- linux-2.6.15.orig/include/asm-ia64/unistd.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-ia64/unistd.h 2006-01-27 14:48:05.000000000 +0300
+@@ -269,12 +269,17 @@
+ #define __NR_inotify_init 1277
+ #define __NR_inotify_add_watch 1278
+ #define __NR_inotify_rm_watch 1279
++#define __NR_getluid 1505
++#define __NR_setluid 1506
++#define __NR_setublimit 1507
++#define __NR_ubstat 1508
+
+ #ifdef __KERNEL__
+
+ #include <linux/config.h>
+
+-#define NR_syscalls 256 /* length of syscall table */
++/* length of syscall table */
++#define NR_syscalls (__NR_ubstat - __NR_ni_syscall + 1)
+
+ #define __ARCH_WANT_SYS_RT_SIGACTION
+
+diff -uprN linux-2.6.15.orig/include/asm-powerpc/pgalloc.h linux-2.6.15-ve025stab014/include/asm-powerpc/pgalloc.h
+--- linux-2.6.15.orig/include/asm-powerpc/pgalloc.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-powerpc/pgalloc.h 2006-01-27 14:48:05.000000000 +0300
+@@ -32,7 +32,8 @@ extern kmem_cache_t *pgtable_cache[];
+
+ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+- return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
++ return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM],
++ GFP_KERNEL_UBC | __GFP_SOFT_UBC);
+ }
+
+ static inline void pgd_free(pgd_t *pgd)
+@@ -47,7 +48,7 @@ static inline void pgd_free(pgd_t *pgd)
+ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+ return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
+- GFP_KERNEL|__GFP_REPEAT);
++ GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
+ }
+
+ static inline void pud_free(pud_t *pud)
+@@ -83,7 +84,7 @@ static inline void pmd_populate_kernel(s
+ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+ return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
+- GFP_KERNEL|__GFP_REPEAT);
++ GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
+ }
+
+ static inline void pmd_free(pmd_t *pmd)
+@@ -91,17 +92,21 @@ static inline void pmd_free(pmd_t *pmd)
+ kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
+ }
+
++static inline pte_t *__pte_alloc(gfp_t flags)
++{
++ return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], flags);
++}
++
+ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+ {
+- return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
+- GFP_KERNEL|__GFP_REPEAT);
++ return __pte_alloc(GFP_KERNEL | __GFP_REPEAT);
+ }
+
+ static inline struct page *pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+ {
+- return virt_to_page(pte_alloc_one_kernel(mm, address));
++ return virt_to_page(__pte_alloc(GFP_KERNEL_UBC | __GFP_SOFT_UBC));
+ }
+
+ static inline void pte_free_kernel(pte_t *pte)
+diff -uprN linux-2.6.15.orig/include/asm-powerpc/unistd.h linux-2.6.15-ve025stab014/include/asm-powerpc/unistd.h
+--- linux-2.6.15.orig/include/asm-powerpc/unistd.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-powerpc/unistd.h 2006-01-27 14:48:05.000000000 +0300
+@@ -297,7 +297,12 @@
+ #define __NR_inotify_add_watch 276
+ #define __NR_inotify_rm_watch 277
+
+-#define __NR_syscalls 278
++#define __NR_getluid 410
++#define __NR_setluid 411
++#define __NR_setublimit 412
++#define __NR_ubstat 413
++
++#define __NR_syscalls 414
+
+ #ifdef __KERNEL__
+ #define __NR__exit __NR_exit
+diff -uprN linux-2.6.15.orig/include/asm-s390/pgalloc.h linux-2.6.15-ve025stab014/include/asm-s390/pgalloc.h
+--- linux-2.6.15.orig/include/asm-s390/pgalloc.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-s390/pgalloc.h 2006-01-27 14:48:05.000000000 +0300
+@@ -34,12 +34,12 @@ static inline pgd_t *pgd_alloc(struct mm
+ int i;
+
+ #ifndef __s390x__
+- pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,1);
++ pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 1);
+ if (pgd != NULL)
+ for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE));
+ #else /* __s390x__ */
+- pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,2);
++ pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 2);
+ if (pgd != NULL)
+ for (i = 0; i < PTRS_PER_PGD; i++)
+ pgd_clear(pgd + i);
+@@ -72,7 +72,7 @@ static inline pmd_t * pmd_alloc_one(stru
+ pmd_t *pmd;
+ int i;
+
+- pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, 2);
++ pmd = (pmd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 2);
+ if (pmd != NULL) {
+ for (i=0; i < PTRS_PER_PMD; i++)
+ pmd_clear(pmd+i);
+@@ -118,16 +118,13 @@ pmd_populate(struct mm_struct *mm, pmd_t
+ pmd_populate_kernel(mm, pmd, (pte_t *)((page-mem_map) << PAGE_SHIFT));
+ }
+
+-/*
+- * page table entry allocation/free routines.
+- */
+-static inline pte_t *
+-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
++static inline pte_t *pte_alloc(struct mm_struct *mm, unsigned long vmaddr,
++ gfp_t mask)
+ {
+ pte_t *pte;
+ int i;
+
+- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
++ pte = (pte_t *)__get_free_page(mask);
+ if (pte != NULL) {
+ for (i=0; i < PTRS_PER_PTE; i++) {
+ pte_clear(mm, vmaddr, pte+i);
+@@ -137,10 +134,20 @@ pte_alloc_one_kernel(struct mm_struct *m
+ return pte;
+ }
+
++/*
++ * page table entry allocation/free routines.
++ */
++static inline pte_t *
++pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
++{
++ return pte_alloc(mm, vmaddr, GFP_KERNEL | __GFP_REPEAT);
++}
++
+ static inline struct page *
+ pte_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
+ {
+- pte_t *pte = pte_alloc_one_kernel(mm, vmaddr);
++ pte_t *pte = pte_alloc(mm, vmaddr, GFP_KERNEL_UBC | __GFP_SOFT_UBC |
++ __GFP_REPEAT);
+ if (pte)
+ return virt_to_page(pte);
+ return 0;
+diff -uprN linux-2.6.15.orig/include/asm-s390/unistd.h linux-2.6.15-ve025stab014/include/asm-s390/unistd.h
+--- linux-2.6.15.orig/include/asm-s390/unistd.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-s390/unistd.h 2006-01-27 14:48:05.000000000 +0300
+@@ -279,8 +279,12 @@
+ #define __NR_inotify_init 284
+ #define __NR_inotify_add_watch 285
+ #define __NR_inotify_rm_watch 286
++#define __NR_getluid 410
++#define __NR_setluid 411
++#define __NR_setublimit 412
++#define __NR_ubstat 413
+
+-#define NR_syscalls 287
++#define NR_syscalls 414
+
+ /*
+ * There are some system calls that are not present on 64 bit, some
+diff -uprN linux-2.6.15.orig/include/asm-x86_64/mman.h linux-2.6.15-ve025stab014/include/asm-x86_64/mman.h
+--- linux-2.6.15.orig/include/asm-x86_64/mman.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-x86_64/mman.h 2006-01-27 14:48:05.000000000 +0300
+@@ -23,6 +23,7 @@
+ #define MAP_NORESERVE 0x4000 /* don't check for reservations */
+ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
+ #define MAP_NONBLOCK 0x10000 /* do not block on IO */
++#define MAP_EXECPRIO 0x20000 /* soft ubc charge */
+
+ #define MS_ASYNC 1 /* sync memory asynchronously */
+ #define MS_INVALIDATE 2 /* invalidate the caches */
+diff -uprN linux-2.6.15.orig/include/asm-x86_64/pgalloc.h linux-2.6.15-ve025stab014/include/asm-x86_64/pgalloc.h
+--- linux-2.6.15.orig/include/asm-x86_64/pgalloc.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-x86_64/pgalloc.h 2006-01-27 14:48:05.000000000 +0300
+@@ -31,12 +31,14 @@ static inline void pmd_free(pmd_t *pmd)
+
+ static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+ {
+- return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++ return (pmd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
++ __GFP_SOFT_UBC);
+ }
+
+ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+- return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++ return (pud_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
++ __GFP_SOFT_UBC);
+ }
+
+ static inline void pud_free (pud_t *pud)
+@@ -48,7 +50,8 @@ static inline void pud_free (pud_t *pud)
+ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ unsigned boundary;
+- pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
++ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL_UBC|__GFP_REPEAT|
++ __GFP_SOFT_UBC);
+ if (!pgd)
+ return NULL;
+ /*
+@@ -77,7 +80,8 @@ static inline pte_t *pte_alloc_one_kerne
+
+ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+ {
+- void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++ void *p = (void *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
++ __GFP_SOFT_UBC);
+ if (!p)
+ return NULL;
+ return virt_to_page(p);
+diff -uprN linux-2.6.15.orig/include/asm-x86_64/unistd.h linux-2.6.15-ve025stab014/include/asm-x86_64/unistd.h
+--- linux-2.6.15.orig/include/asm-x86_64/unistd.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/asm-x86_64/unistd.h 2006-01-27 14:48:05.000000000 +0300
+@@ -571,8 +571,16 @@ __SYSCALL(__NR_inotify_init, sys_inotify
+ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
+ #define __NR_inotify_rm_watch 255
+ __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
++#define __NR_getluid 500
++__SYSCALL(__NR_getluid, sys_getluid)
++#define __NR_setluid 501
++__SYSCALL(__NR_setluid, sys_setluid)
++#define __NR_setublimit 502
++__SYSCALL(__NR_setublimit, sys_setublimit)
++#define __NR_ubstat 503
++__SYSCALL(__NR_ubstat, sys_ubstat)
+
+-#define __NR_syscall_max __NR_inotify_rm_watch
++#define __NR_syscall_max __NR_ubstat
+ #ifndef __NO_STUBS
+
+ /* user-visible error numbers are in the range -1 - -4095 */
+diff -uprN linux-2.6.15.orig/include/linux/capability.h linux-2.6.15-ve025stab014/include/linux/capability.h
+--- linux-2.6.15.orig/include/linux/capability.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/capability.h 2006-01-27 14:48:08.000000000 +0300
+@@ -145,12 +145,9 @@ typedef __u32 kernel_cap_t;
+
+ #define CAP_NET_BROADCAST 11
+
+-/* Allow interface configuration */
+ /* Allow administration of IP firewall, masquerading and accounting */
+ /* Allow setting debug option on sockets */
+ /* Allow modification of routing tables */
+-/* Allow setting arbitrary process / process group ownership on
+- sockets */
+ /* Allow binding to any address for transparent proxying */
+ /* Allow setting TOS (type of service) */
+ /* Allow setting promiscuous mode */
+@@ -199,24 +196,19 @@ typedef __u32 kernel_cap_t;
+
+ /* Allow configuration of the secure attention key */
+ /* Allow administration of the random device */
+-/* Allow examination and configuration of disk quotas */
+ /* Allow configuring the kernel's syslog (printk behaviour) */
+ /* Allow setting the domainname */
+ /* Allow setting the hostname */
+ /* Allow calling bdflush() */
+-/* Allow mount() and umount(), setting up new smb connection */
++/* Allow setting up new smb connection */
+ /* Allow some autofs root ioctls */
+ /* Allow nfsservctl */
+ /* Allow VM86_REQUEST_IRQ */
+ /* Allow to read/write pci config on alpha */
+ /* Allow irix_prctl on mips (setstacksize) */
+ /* Allow flushing all cache on m68k (sys_cacheflush) */
+-/* Allow removing semaphores */
+-/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
+- and shared memory */
+ /* Allow locking/unlocking of shared memory segment */
+ /* Allow turning swap on/off */
+-/* Allow forged pids on socket credentials passing */
+ /* Allow setting readahead and flushing buffers on block devices */
+ /* Allow setting geometry in floppy driver */
+ /* Allow turning DMA on/off in xd driver */
+@@ -287,7 +279,52 @@ typedef __u32 kernel_cap_t;
+
+ #define CAP_AUDIT_CONTROL 30
+
++/*
++ * Important note: VZ capabilities do intersect with CAP_AUDIT
++ * this is due to compatibility reasons. Nothing bad.
++ * Both VZ and Audit/SELinux caps are disabled in VPSs.
++ */
++
++/* Allow access to all information. In the other case some structures will be
++ hiding to ensure different Virtual Environment non-interaction on the same
++ node */
++#define CAP_SETVEID 29
++
++#define CAP_VE_ADMIN 30
++
+ #ifdef __KERNEL__
++
++#include <linux/config.h>
++
++#ifdef CONFIG_VE
++
++/* Replacement for CAP_NET_ADMIN:
++ delegated rights to the Virtual environment of its network administration.
++ For now the following rights have been delegated:
++
++ Allow setting arbitrary process / process group ownership on sockets
++ Allow interface configuration
++ */
++#define CAP_VE_NET_ADMIN CAP_VE_ADMIN
++
++/* Replacement for CAP_SYS_ADMIN:
++ delegated rights to the Virtual environment of its administration.
++ For now the following rights have been delegated:
++ */
++/* Allow mount/umount/remount */
++/* Allow examination and configuration of disk quotas */
++/* Allow removing semaphores */
++/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
++ and shared memory */
++/* Allow locking/unlocking of shared memory segment */
++/* Allow forged pids on socket credentials passing */
++
++#define CAP_VE_SYS_ADMIN CAP_VE_ADMIN
++#else
++#define CAP_VE_NET_ADMIN CAP_NET_ADMIN
++#define CAP_VE_SYS_ADMIN CAP_SYS_ADMIN
++#endif
++
+ /*
+ * Bounding set
+ */
+@@ -351,9 +388,14 @@ static inline kernel_cap_t cap_invert(ke
+ #define cap_issubset(a,set) (!(cap_t(a) & ~cap_t(set)))
+
+ #define cap_clear(c) do { cap_t(c) = 0; } while(0)
++#ifndef CONFIG_VE
+ #define cap_set_full(c) do { cap_t(c) = ~0; } while(0)
++#else
++#define cap_set_full(c) \
++ do {cap_t(c) = ve_is_super(get_exec_env()) ? ~0 : \
++ get_exec_env()->cap_default; } while(0)
++#endif
+ #define cap_mask(c,mask) do { cap_t(c) &= cap_t(mask); } while(0)
+-
+ #define cap_is_fs_cap(c) (CAP_TO_MASK(c) & CAP_FS_MASK)
+
+ #endif /* __KERNEL__ */
+diff -uprN linux-2.6.15.orig/include/linux/dcache.h linux-2.6.15-ve025stab014/include/linux/dcache.h
+--- linux-2.6.15.orig/include/linux/dcache.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/dcache.h 2006-01-27 14:48:08.000000000 +0300
+@@ -10,6 +10,8 @@
+ #include <linux/rcupdate.h>
+ #include <asm/bug.h>
+
++#include <ub/ub_dcache.h>
++
+ struct nameidata;
+ struct vfsmount;
+
+@@ -105,6 +107,9 @@ struct dentry {
+ struct rcu_head d_rcu;
+ struct dcookie_struct *d_cookie; /* cookie, if any */
+ int d_mounted;
++#ifdef CONFIG_USER_RESOURCE
++ struct dentry_beancounter dentry_bc;
++#endif
+ unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
+ };
+
+@@ -155,7 +160,11 @@ d_iput: no no no yes
+
+ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
+ #define DCACHE_UNHASHED 0x0010
++#define DCACHE_VIRTUAL 0x0100 /* ve accessible */
++
++extern void mark_tree_virtual(struct vfsmount *m, struct dentry *d);
+
++extern kmem_cache_t *dentry_cache;
+ extern spinlock_t dcache_lock;
+
+ /**
+@@ -209,7 +218,8 @@ extern struct dentry * d_alloc_anon(stru
+ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+ extern void shrink_dcache_sb(struct super_block *);
+ extern void shrink_dcache_parent(struct dentry *);
+-extern void shrink_dcache_anon(struct hlist_head *);
++extern void shrink_dcache_anon(struct super_block *);
++extern void dcache_shrinker_wait_sb(struct super_block *sb);
+ extern int d_invalidate(struct dentry *);
+
+ /* only used at mount-time */
+@@ -271,6 +281,7 @@ extern struct dentry * __d_lookup(struct
+ /* validate "insecure" dentry pointer */
+ extern int d_validate(struct dentry *, struct dentry *);
+
++extern int d_root_check(struct dentry *, struct vfsmount *);
+ extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
+
+ /* Allocation counts.. */
+@@ -291,6 +302,8 @@ extern char * d_path(struct dentry *, st
+ static inline struct dentry *dget(struct dentry *dentry)
+ {
+ if (dentry) {
++ if (ub_dget_testone(dentry))
++ BUG();
+ BUG_ON(!atomic_read(&dentry->d_count));
+ atomic_inc(&dentry->d_count);
+ }
+@@ -334,6 +347,8 @@ extern struct dentry *lookup_create(stru
+
+ extern int sysctl_vfs_cache_pressure;
+
++extern int check_area_access_ve(struct dentry *, struct vfsmount *);
++extern int check_area_execute_ve(struct dentry *, struct vfsmount *);
+ #endif /* __KERNEL__ */
+
+ #endif /* __LINUX_DCACHE_H */
+diff -uprN linux-2.6.15.orig/include/linux/devpts_fs.h linux-2.6.15-ve025stab014/include/linux/devpts_fs.h
+--- linux-2.6.15.orig/include/linux/devpts_fs.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/devpts_fs.h 2006-01-27 14:48:08.000000000 +0300
+@@ -21,6 +21,15 @@ int devpts_pty_new(struct tty_struct *tt
+ struct tty_struct *devpts_get_tty(int number); /* get tty structure */
+ void devpts_pty_kill(int number); /* unlink */
+
++struct devpts_config {
++ int setuid;
++ int setgid;
++ uid_t uid;
++ gid_t gid;
++ umode_t mode;
++};
++
++extern struct devpts_config devpts_config;
+ #else
+
+ /* Dummy stubs in the no-pty case */
+diff -uprN linux-2.6.15.orig/include/linux/faudit.h linux-2.6.15-ve025stab014/include/linux/faudit.h
+--- linux-2.6.15.orig/include/linux/faudit.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/faudit.h 2006-01-27 14:48:07.000000000 +0300
+@@ -0,0 +1,38 @@
++/*
++ * include/linux/faudit.h
++ *
++ * Copyright (C) 2005 SWSoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __FAUDIT_H_
++#define __FAUDIT_H_
++
++#include <linux/config.h>
++#include <linux/virtinfo.h>
++
++struct vfsmount;
++struct dentry;
++struct pt_regs;
++
++struct faudit_regs_arg {
++ int err;
++ struct pt_regs *regs;
++};
++
++struct faudit_stat_arg {
++ int err;
++ struct vfsmount *mnt;
++ struct dentry *dentry;
++ void *stat;
++};
++
++#define VIRTINFO_FAUDIT (0)
++#define VIRTINFO_FAUDIT_STAT (VIRTINFO_FAUDIT + 0)
++#define VIRTINFO_FAUDIT_STATFS (VIRTINFO_FAUDIT + 1)
++#define VIRTINFO_FAUDIT_STATFS64 (VIRTINFO_FAUDIT + 2)
++
++#endif
+diff -uprN linux-2.6.15.orig/include/linux/fs.h linux-2.6.15-ve025stab014/include/linux/fs.h
+--- linux-2.6.15.orig/include/linux/fs.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/fs.h 2006-01-27 14:48:08.000000000 +0300
+@@ -7,6 +7,7 @@
+ */
+
+ #include <linux/config.h>
++#include <linux/ve_owner.h>
+ #include <linux/limits.h>
+ #include <linux/ioctl.h>
+ #include <linux/rcuref.h>
+@@ -64,6 +65,7 @@ extern int dir_notify_enable;
+ #define FMODE_LSEEK 4
+ #define FMODE_PREAD 8
+ #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */
++#define FMODE_QUOTACTL 4
+
+ #define RW_MASK 1
+ #define RWA_MASK 2
+@@ -83,6 +85,7 @@ extern int dir_notify_enable;
+ /* public flags for file_system_type */
+ #define FS_REQUIRES_DEV 1
+ #define FS_BINARY_MOUNTDATA 2
++#define FS_VIRTUALIZED 64 /* Can mount this fstype inside ve */
+ #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
+ #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon
+ * as nfs_rename() will be cleaned up
+@@ -301,6 +304,9 @@ struct iattr {
+ * Includes for diskquotas.
+ */
+ #include <linux/quota.h>
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++#include <linux/vzquota_qlnk.h>
++#endif
+
+ /*
+ * oh the beauties of C type declarations.
+@@ -464,6 +470,9 @@ struct inode {
+ #ifdef CONFIG_QUOTA
+ struct dquot *i_dquot[MAXQUOTAS];
+ #endif
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++ struct vz_quota_ilink i_qlnk;
++#endif
+ /* These three should probably be a union */
+ struct list_head i_devices;
+ struct pipe_inode_info *i_pipe;
+@@ -498,6 +507,8 @@ struct inode {
+ #endif
+ };
+
++extern kmem_cache_t *inode_cachep;
++
+ /*
+ * NOTE: in a 32bit arch with a preemptable kernel and
+ * an UP compile the i_size_read/write must be atomic
+@@ -617,7 +628,10 @@ struct file {
+ spinlock_t f_ep_lock;
+ #endif /* #ifdef CONFIG_EPOLL */
+ struct address_space *f_mapping;
++ struct ve_struct *owner_env;
+ };
++DCL_VE_OWNER_PROTO(FILP, struct file, owner_env)
++
+ extern spinlock_t files_lock;
+ #define file_list_lock() spin_lock(&files_lock);
+ #define file_list_unlock() spin_unlock(&files_lock);
+@@ -681,6 +695,9 @@ struct file_lock {
+ struct file *fl_file;
+ unsigned char fl_flags;
+ unsigned char fl_type;
++#ifdef CONFIG_USER_RESOURCE
++ unsigned char fl_charged;
++#endif
+ loff_t fl_start;
+ loff_t fl_end;
+
+@@ -803,6 +820,7 @@ struct super_block {
+ struct list_head s_io; /* parked for writeback */
+ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
+ struct list_head s_files;
++ struct list_head s_dshrinkers; /* active dcache shrinkers */
+
+ struct block_device *s_bdev;
+ struct list_head s_instances;
+@@ -1059,6 +1077,8 @@ struct super_operations {
+
+ ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
+ ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
++
++ struct inode *(*get_quota_root)(struct super_block *);
+ };
+
+ /* Inode state bits. Protected by inode_lock. */
+@@ -1221,8 +1241,14 @@ struct file_system_type {
+ struct module *owner;
+ struct file_system_type * next;
+ struct list_head fs_supers;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(FSTYPE, struct file_system_type, owner_env)
++
++void get_filesystem(struct file_system_type *fs);
++void put_filesystem(struct file_system_type *fs);
++
+ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data,
+ int (*fill_super)(struct super_block *, void *, int));
+@@ -1260,6 +1286,7 @@ extern struct vfsmount *kern_mount(struc
+ extern int may_umount_tree(struct vfsmount *);
+ extern int may_umount(struct vfsmount *);
+ extern void umount_tree(struct vfsmount *, int, struct list_head *);
++#define kern_umount mntput
+ extern void release_mounts(struct list_head *);
+ extern long do_mount(char *, char *, char *, unsigned long, void *);
+ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
+@@ -1365,7 +1392,7 @@ extern int chrdev_open(struct inode *, s
+ #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
+ extern const char *__bdevname(dev_t, char *buffer);
+ extern const char *bdevname(struct block_device *bdev, char *buffer);
+-extern struct block_device *lookup_bdev(const char *);
++extern struct block_device *lookup_bdev(const char *, int mode);
+ extern struct block_device *open_bdev_excl(const char *, int, void *);
+ extern void close_bdev_excl(struct block_device *);
+
+diff -uprN linux-2.6.15.orig/include/linux/genhd.h linux-2.6.15-ve025stab014/include/linux/genhd.h
+--- linux-2.6.15.orig/include/linux/genhd.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/genhd.h 2006-01-27 14:48:08.000000000 +0300
+@@ -421,6 +421,7 @@ static inline struct block_device *bdget
+ return bdget(MKDEV(disk->major, disk->first_minor) + index);
+ }
+
++extern struct subsystem block_subsys;
+ #endif
+
+ #endif
+diff -uprN linux-2.6.15.orig/include/linux/gfp.h linux-2.6.15-ve025stab014/include/linux/gfp.h
+--- linux-2.6.15.orig/include/linux/gfp.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/gfp.h 2006-01-27 14:48:05.000000000 +0300
+@@ -47,6 +47,8 @@ struct vm_area_struct;
+ #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
+ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
+ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
++#define __GFP_UBC ((__force gfp_t)0x40000u)/* charge kmem in buddy and slab */
++#define __GFP_SOFT_UBC ((__force gfp_t)0x80000u)/* use soft charging */
+
+ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
+ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+@@ -55,13 +57,16 @@ struct vm_area_struct;
+ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
+ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
+ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+- __GFP_NOMEMALLOC|__GFP_HARDWALL)
++ __GFP_NOMEMALLOC|__GFP_HARDWALL| \
++ __GFP_UBC|__GFP_SOFT_UBC)
+
+ #define GFP_ATOMIC (__GFP_HIGH)
+ #define GFP_NOIO (__GFP_WAIT)
+ #define GFP_NOFS (__GFP_WAIT | __GFP_IO)
+ #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
++#define GFP_KERNEL_UBC (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
+ #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
++#define GFP_USER_UBC (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | __GFP_UBC)
+ #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
+ __GFP_HIGHMEM)
+
+diff -uprN linux-2.6.15.orig/include/linux/inetdevice.h linux-2.6.15-ve025stab014/include/linux/inetdevice.h
+--- linux-2.6.15.orig/include/linux/inetdevice.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/inetdevice.h 2006-01-27 14:48:08.000000000 +0300
+@@ -34,6 +34,12 @@ struct ipv4_devconf
+ };
+
+ extern struct ipv4_devconf ipv4_devconf;
++extern struct ipv4_devconf ipv4_devconf_dflt;
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv4_devconf (*(get_exec_env()->_ipv4_devconf))
++#else
++#define ve_ipv4_devconf ipv4_devconf
++#endif
+
+ struct in_device
+ {
+@@ -60,29 +66,29 @@ struct in_device
+ };
+
+ #define IN_DEV_FORWARD(in_dev) ((in_dev)->cnf.forwarding)
+-#define IN_DEV_MFORWARD(in_dev) (ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
+-#define IN_DEV_RPFILTER(in_dev) (ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
+-#define IN_DEV_SOURCE_ROUTE(in_dev) (ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
+-#define IN_DEV_BOOTP_RELAY(in_dev) (ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
+-
+-#define IN_DEV_LOG_MARTIANS(in_dev) (ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
+-#define IN_DEV_PROXY_ARP(in_dev) (ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
+-#define IN_DEV_SHARED_MEDIA(in_dev) (ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
+-#define IN_DEV_TX_REDIRECTS(in_dev) (ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
+-#define IN_DEV_SEC_REDIRECTS(in_dev) (ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
++#define IN_DEV_MFORWARD(in_dev) (ve_ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
++#define IN_DEV_RPFILTER(in_dev) (ve_ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
++#define IN_DEV_SOURCE_ROUTE(in_dev) (ve_ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
++#define IN_DEV_BOOTP_RELAY(in_dev) (ve_ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
++
++#define IN_DEV_LOG_MARTIANS(in_dev) (ve_ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
++#define IN_DEV_PROXY_ARP(in_dev) (ve_ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
++#define IN_DEV_SHARED_MEDIA(in_dev) (ve_ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
++#define IN_DEV_TX_REDIRECTS(in_dev) (ve_ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
++#define IN_DEV_SEC_REDIRECTS(in_dev) (ve_ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
+ #define IN_DEV_IDTAG(in_dev) ((in_dev)->cnf.tag)
+ #define IN_DEV_MEDIUM_ID(in_dev) ((in_dev)->cnf.medium_id)
+ #define IN_DEV_PROMOTE_SECONDARIES(in_dev) (ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries)
+
+ #define IN_DEV_RX_REDIRECTS(in_dev) \
+ ((IN_DEV_FORWARD(in_dev) && \
+- (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
++ (ve_ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
+ || (!IN_DEV_FORWARD(in_dev) && \
+- (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
++ (ve_ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
+
+-#define IN_DEV_ARPFILTER(in_dev) (ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
+-#define IN_DEV_ARP_ANNOUNCE(in_dev) (max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
+-#define IN_DEV_ARP_IGNORE(in_dev) (max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
++#define IN_DEV_ARPFILTER(in_dev) (ve_ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
++#define IN_DEV_ARP_ANNOUNCE(in_dev) (max(ve_ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
++#define IN_DEV_ARP_IGNORE(in_dev) (max(ve_ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
+
+ struct in_ifaddr
+ {
+@@ -113,6 +119,7 @@ extern u32 inet_select_addr(const struc
+ extern u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
+ extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
+ extern void inet_forward_change(void);
++extern void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
+
+ static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
+ {
+@@ -180,6 +187,10 @@ static inline void in_dev_put(struct in_
+ #define __in_dev_put(idev) atomic_dec(&(idev)->refcnt)
+ #define in_dev_hold(idev) atomic_inc(&(idev)->refcnt)
+
++struct ve_struct;
++extern int devinet_sysctl_init(struct ve_struct *);
++extern void devinet_sysctl_fini(struct ve_struct *);
++extern void devinet_sysctl_free(struct ve_struct *);
+ #endif /* __KERNEL__ */
+
+ static __inline__ __u32 inet_make_mask(int logmask)
+diff -uprN linux-2.6.15.orig/include/linux/kdev_t.h linux-2.6.15-ve025stab014/include/linux/kdev_t.h
+--- linux-2.6.15.orig/include/linux/kdev_t.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/kdev_t.h 2006-01-27 14:48:08.000000000 +0300
+@@ -87,6 +87,57 @@ static inline unsigned sysv_minor(u32 de
+ return dev & 0x3ffff;
+ }
+
++#define UNNAMED_MAJOR_COUNT 16
++
++#if UNNAMED_MAJOR_COUNT > 1
++
++extern int unnamed_dev_majors[UNNAMED_MAJOR_COUNT];
++
++static inline dev_t make_unnamed_dev(int idx)
++{
++ /*
++ * Here we transfer bits from 8 to 8+log2(UNNAMED_MAJOR_COUNT) of the
++ * unnamed device index into major number.
++ */
++ return MKDEV(unnamed_dev_majors[(idx >> 8) & (UNNAMED_MAJOR_COUNT - 1)],
++ idx & ~((UNNAMED_MAJOR_COUNT - 1) << 8));
++}
++
++static inline int unnamed_dev_idx(dev_t dev)
++{
++ int i;
++ for (i = 0; i < UNNAMED_MAJOR_COUNT &&
++ MAJOR(dev) != unnamed_dev_majors[i]; i++);
++ return MINOR(dev) | (i << 8);
++}
++
++static inline int is_unnamed_dev(dev_t dev)
++{
++ int i;
++ for (i = 0; i < UNNAMED_MAJOR_COUNT &&
++ MAJOR(dev) != unnamed_dev_majors[i]; i++);
++ return i < UNNAMED_MAJOR_COUNT;
++}
++
++#else /* UNNAMED_MAJOR_COUNT */
++
++static inline dev_t make_unnamed_dev(int idx)
++{
++ return MKDEV(0, idx);
++}
++
++static inline int unnamed_dev_idx(dev_t dev)
++{
++ return MINOR(dev);
++}
++
++static inline int is_unnamed_dev(dev_t dev)
++{
++ return MAJOR(dev) == 0;
++}
++
++#endif /* UNNAMED_MAJOR_COUNT */
++
+
+ #else /* __KERNEL__ */
+
+diff -uprN linux-2.6.15.orig/include/linux/kernel.h linux-2.6.15-ve025stab014/include/linux/kernel.h
+--- linux-2.6.15.orig/include/linux/kernel.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/kernel.h 2006-01-27 14:48:08.000000000 +0300
+@@ -128,6 +128,9 @@ asmlinkage int vprintk(const char *fmt,
+ __attribute__ ((format (printf, 1, 0)));
+ asmlinkage int printk(const char * fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
++asmlinkage int ve_printk(int, const char * fmt, ...)
++ __attribute__ ((format (printf, 2, 3)));
++void prepare_printk(void);
+ #else
+ static inline int vprintk(const char *s, va_list args)
+ __attribute__ ((format (printf, 1, 0)));
+@@ -135,8 +138,16 @@ static inline int vprintk(const char *s,
+ static inline int printk(const char *s, ...)
+ __attribute__ ((format (printf, 1, 2)));
+ static inline int printk(const char *s, ...) { return 0; }
++static inline int ve_printk(int d, const char *s, ...)
++ __attribute__ ((format (printf, 1, 2)));
++static inline int printk(int d, const char *s, ...) { return 0; }
++#define prepare_printk() do { } while (0)
+ #endif
+
++#define VE0_LOG 1
++#define VE_LOG 2
++#define VE_LOG_BOTH (VE0_LOG | VE_LOG)
++
+ unsigned long int_sqrt(unsigned long);
+
+ static inline int __attribute_pure__ long_log2(unsigned long x)
+@@ -171,6 +182,7 @@ extern int oops_in_progress; /* If set,
+ extern __deprecated_for_modules int panic_timeout;
+ extern int panic_on_oops;
+ extern int tainted;
++extern int kernel_text_csum_broken;
+ extern const char *print_tainted(void);
+ extern void add_taint(unsigned);
+
+diff -uprN linux-2.6.15.orig/include/linux/kmem_cache.h linux-2.6.15-ve025stab014/include/linux/kmem_cache.h
+--- linux-2.6.15.orig/include/linux/kmem_cache.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/kmem_cache.h 2006-01-27 14:48:05.000000000 +0300
+@@ -0,0 +1,188 @@
++#ifndef __KMEM_CACHE_H__
++#define __KMEM_CACHE_H__
++
++#include <linux/threads.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/list.h>
++#include <linux/mm.h>
++#include <asm/atomic.h>
++
++/*
++ * SLAB_DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
++ * SLAB_RED_ZONE & SLAB_POISON.
++ * 0 for faster, smaller code (especially in the critical paths).
++ *
++ * SLAB_STATS - 1 to collect stats for /proc/slabinfo.
++ * 0 for faster, smaller code (especially in the critical paths).
++ *
++ * SLAB_FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
++ */
++
++#ifdef CONFIG_DEBUG_SLAB
++#define SLAB_DEBUG 1
++#define SLAB_STATS 1
++#define SLAB_FORCED_DEBUG 1
++#else
++#define SLAB_DEBUG 0
++#define SLAB_STATS 0 /* must be off, see kmem_cache.h */
++#define SLAB_FORCED_DEBUG 0
++#endif
++
++/*
++ * struct array_cache
++ *
++ * Purpose:
++ * - LIFO ordering, to hand out cache-warm objects from _alloc
++ * - reduce the number of linked list operations
++ * - reduce spinlock operations
++ *
++ * The limit is stored in the per-cpu structure to reduce the data cache
++ * footprint.
++ *
++ */
++struct array_cache {
++ unsigned int avail;
++ unsigned int limit;
++ unsigned int batchcount;
++ unsigned int touched;
++ spinlock_t lock;
++ void *entry[0]; /*
++ * Must have this definition in here for the proper
++ * alignment of array_cache. Also simplifies accessing
++ * the entries.
++ * [0] is for gcc 2.95. It should really be [].
++ */
++};
++
++/* bootstrap: The caches do not work without cpuarrays anymore,
++ * but the cpuarrays are allocated from the generic caches...
++ */
++#define BOOT_CPUCACHE_ENTRIES 1
++struct arraycache_init {
++ struct array_cache cache;
++ void * entries[BOOT_CPUCACHE_ENTRIES];
++};
++
++/*
++ * The slab lists for all objects.
++ */
++struct kmem_list3 {
++ struct list_head slabs_partial; /* partial list first, better asm code */
++ struct list_head slabs_full;
++ struct list_head slabs_free;
++ unsigned long free_objects;
++ unsigned long next_reap;
++ int free_touched;
++ unsigned int free_limit;
++ spinlock_t list_lock;
++ struct array_cache *shared; /* shared per node */
++ struct array_cache **alien; /* on other nodes */
++};
++
++/*
++ * kmem_cache_t
++ *
++ * manages a cache.
++ */
++
++struct kmem_cache {
++/* 1) per-cpu data, touched during every alloc/free */
++ struct array_cache *array[NR_CPUS];
++ unsigned int batchcount;
++ unsigned int limit;
++ unsigned int shared;
++ unsigned int objsize;
++/* 2) touched by every alloc & free from the backend */
++ struct kmem_list3 *nodelists[MAX_NUMNODES];
++ unsigned int flags; /* constant flags */
++ unsigned int num; /* # of objs per slab */
++ spinlock_t spinlock;
++
++/* 3) cache_grow/shrink */
++ /* order of pgs per slab (2^n) */
++ unsigned int gfporder;
++
++ /* force GFP flags, e.g. GFP_DMA */
++ gfp_t gfpflags;
++
++ size_t colour; /* cache colouring range */
++ unsigned int colour_off; /* colour offset */
++ unsigned int colour_next; /* cache colouring */
++ kmem_cache_t *slabp_cache;
++ unsigned int slab_size;
++ unsigned int dflags; /* dynamic flags */
++
++ /* constructor func */
++ void (*ctor)(void *, kmem_cache_t *, unsigned long);
++
++ /* de-constructor func */
++ void (*dtor)(void *, kmem_cache_t *, unsigned long);
++
++/* 4) cache creation/removal */
++ const char *name;
++ struct list_head next;
++
++/* 5) statistics */
++#if SLAB_STATS
++ unsigned long num_active;
++ unsigned long num_allocations;
++ unsigned long high_mark;
++ unsigned long grown;
++ unsigned long reaped;
++ unsigned long errors;
++ unsigned long max_freeable;
++ unsigned long node_allocs;
++ unsigned long node_frees;
++ atomic_t allochit;
++ atomic_t allocmiss;
++ atomic_t freehit;
++ atomic_t freemiss;
++#endif
++#if SLAB_DEBUG
++ int dbghead;
++ int reallen;
++#endif
++#ifdef CONFIG_USER_RESOURCE
++ unsigned int objuse;
++#endif
++};
++
++struct slab;
++
++/* Functions and macros for storing/retrieving the cachep and or slab from the
++ * global 'mem_map'. These are used to find the slab an obj belongs to.
++ * With kfree(), these are used to find the cache which an obj belongs to.
++ */
++static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
++{
++ page->lru.next = (struct list_head *)cache;
++}
++
++static inline struct kmem_cache *page_get_cache(struct page *page)
++{
++ return (struct kmem_cache *)page->lru.next;
++}
++
++static inline void page_set_slab(struct page *page, struct slab *slab)
++{
++ page->lru.prev = (struct list_head *)slab;
++}
++
++static inline struct slab *page_get_slab(struct page *page)
++{
++ return (struct slab *)page->lru.prev;
++}
++
++#define SET_PAGE_CACHE(pg,x) page_set_cache(pg,x)
++#define GET_PAGE_CACHE(pg) page_get_cache(pg)
++#define SET_PAGE_SLAB(pg,x) page_set_slab(pg,x)
++#define GET_PAGE_SLAB(pg) page_get_slab(pg)
++
++#define CFLGS_OFF_SLAB (0x80000000UL)
++#define CFLGS_ENVIDS (0x04000000UL)
++#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
++#define ENVIDS(x) ((x)->flags & CFLGS_ENVIDS)
++
++#define kmem_mark_nocharge(c) do { (c)->flags |= SLAB_NO_CHARGE; } while (0)
++#endif /* __KMEM_CACHE_H__ */
+diff -uprN linux-2.6.15.orig/include/linux/kmem_slab.h linux-2.6.15-ve025stab014/include/linux/kmem_slab.h
+--- linux-2.6.15.orig/include/linux/kmem_slab.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/kmem_slab.h 2006-01-27 14:48:05.000000000 +0300
+@@ -0,0 +1,71 @@
++#ifndef __KMEM_SLAB_H__
++#define __KMEM_SLAB_H__
++
++/*
++ * kmem_bufctl_t:
++ *
++ * Bufctl's are used for linking objs within a slab
++ * linked offsets.
++ *
++ * This implementation relies on "struct page" for locating the cache &
++ * slab an object belongs to.
++ * This allows the bufctl structure to be small (one int), but limits
++ * the number of objects a slab (not a cache) can contain when off-slab
++ * bufctls are used. The limit is the size of the largest general cache
++ * that does not use off-slab slabs.
++ * For 32bit archs with 4 kB pages, is this 56.
++ * This is not serious, as it is only for large objects, when it is unwise
++ * to have too many per slab.
++ * Note: This limit can be raised by introducing a general cache whose size
++ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
++ */
++
++typedef unsigned int kmem_bufctl_t;
++#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
++#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
++#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)
++
++/*
++ * struct slab
++ *
++ * Manages the objs in a slab. Placed either at the beginning of mem allocated
++ * for a slab, or allocated from an general cache.
++ * Slabs are chained into three list: fully used, partial, fully free slabs.
++ */
++struct slab {
++ struct list_head list;
++ unsigned long colouroff;
++ void *s_mem; /* including colour offset */
++ unsigned int inuse; /* num of objs active in slab */
++ kmem_bufctl_t free;
++ unsigned short nodeid;
++};
++
++/*
++ * struct slab_rcu
++ *
++ * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
++ * arrange for kmem_freepages to be called via RCU. This is useful if
++ * we need to approach a kernel structure obliquely, from its address
++ * obtained without the usual locking. We can lock the structure to
++ * stabilize it and check it's still at the given address, only if we
++ * can be sure that the memory has not been meanwhile reused for some
++ * other kind of object (which our subsystem's lock might corrupt).
++ *
++ * rcu_read_lock before reading the address, then rcu_read_unlock after
++ * taking the spinlock within the structure expected at that address.
++ *
++ * We assume struct slab_rcu can overlay struct slab when destroying.
++ */
++struct slab_rcu {
++ struct rcu_head head;
++ kmem_cache_t *cachep;
++ void *addr;
++};
++
++static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
++{
++ return (kmem_bufctl_t *)(slabp+1);
++}
++
++#endif /* __KMEM_SLAB_H__ */
+diff -uprN linux-2.6.15.orig/include/linux/list.h linux-2.6.15-ve025stab014/include/linux/list.h
+--- linux-2.6.15.orig/include/linux/list.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/list.h 2006-01-27 14:48:08.000000000 +0300
+@@ -409,6 +409,20 @@ static inline void list_splice_init(stru
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+ /**
++ * list_for_each_entry_continue_reverse - iterate backwards over list of given
++ * type continuing after existing point
++ * @pos: the type * to use as a loop counter.
++ * @head: the head for your list.
++ * @member: the name of the list_struct within the struct.
++ */
++#define list_for_each_entry_continue_reverse(pos, head, member) \
++ for (pos = list_entry(pos->member.prev, typeof(*pos), member), \
++ prefetch(pos->member.prev); \
++ &pos->member != (head); \
++ pos = list_entry(pos->member.prev, typeof(*pos), member), \
++ prefetch(pos->member.prev))
++
++/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+diff -uprN linux-2.6.15.orig/include/linux/major.h linux-2.6.15-ve025stab014/include/linux/major.h
+--- linux-2.6.15.orig/include/linux/major.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/major.h 2006-01-27 14:48:08.000000000 +0300
+@@ -165,4 +165,7 @@
+
+ #define VIOTAPE_MAJOR 230
+
++#define UNNAMED_EXTRA_MAJOR 130
++#define UNNAMED_EXTRA_MAJOR_COUNT 120
++
+ #endif
+diff -uprN linux-2.6.15.orig/include/linux/mm.h linux-2.6.15-ve025stab014/include/linux/mm.h
+--- linux-2.6.15.orig/include/linux/mm.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/mm.h 2006-01-27 14:48:05.000000000 +0300
+@@ -39,6 +39,27 @@ extern int sysctl_legacy_va_layout;
+
+ #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
+
++#include <linux/mm_counter.h>
++
++#ifdef CONFIG_USER_RESOURCE
++#define set_vma_rss(vma, v) set_mm_counter(vma, vm_rss, v)
++#define get_vma_rss(vma) get_mm_counter(vma, vm_rss)
++#define inc_vma_rss(vma) inc_mm_counter(vma, vm_rss)
++#define dec_vma_rss(vma) dec_mm_counter(vma, vm_rss)
++#define add_vma_rss(vma, v) add_mm_counter(vma, vm_rss, v)
++#define sub_vma_rss(vma, v) do { \
++ if (unlikely(dec_mm_counter_chk(vma, vm_rss, v))) \
++ warn_bad_rss(vma, v); \
++ } while (0)
++#else
++#define set_vma_rss(vma, v) do { } while (0)
++#define get_vma_rss(vma) (0)
++#define inc_vma_rss(vma) do { } while (0)
++#define dec_vma_rss(vma) do { } while (0)
++#define add_vma_rss(vma, v) do { } while (0)
++#define sub_vma_rss(vma, v) do { } while (0)
++#endif
++
+ /*
+ * Linux kernel virtual memory manager primitives.
+ * The idea being to have a "virtual" mm in the same way
+@@ -109,6 +130,9 @@ struct vm_area_struct {
+ #ifdef CONFIG_NUMA
+ struct mempolicy *vm_policy; /* NUMA policy for the VMA */
+ #endif
++#ifdef CONFIG_USER_RESOURCE
++ mm_counter_t _vm_rss;
++#endif
+ };
+
+ /*
+@@ -259,6 +283,12 @@ struct page {
+ void *virtual; /* Kernel virtual address (NULL if
+ not kmapped, ie. highmem) */
+ #endif /* WANT_PAGE_VIRTUAL */
++#ifdef CONFIG_USER_RESOURCE
++ union {
++ struct user_beancounter *page_ub;
++ struct page_beancounter *page_pb;
++ } bc;
++#endif
+ };
+
+ #define page_private(page) ((page)->u.private)
+@@ -631,10 +661,8 @@ struct page *shmem_nopage(struct vm_area
+ int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
+ struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
+ unsigned long addr);
+-int shmem_lock(struct file *file, int lock, struct user_struct *user);
+ #else
+ #define shmem_nopage filemap_nopage
+-#define shmem_lock(a, b, c) ({0;}) /* always in memory, no need to lock */
+ #define shmem_set_policy(a, b) (0)
+ #define shmem_get_policy(a, b) (NULL)
+ #endif
+@@ -677,7 +705,7 @@ void free_pgd_range(struct mmu_gather **
+ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
+ unsigned long floor, unsigned long ceiling);
+ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+- struct vm_area_struct *vma);
++ struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+ int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
+ unsigned long size, pgprot_t prot);
+ void unmap_mapping_range(struct address_space *mapping,
+@@ -964,6 +992,7 @@ struct page *follow_page(struct vm_area_
+ #define FOLL_TOUCH 0x02 /* mark page accessed */
+ #define FOLL_GET 0x04 /* do get_page on page */
+ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
++#define FOLL_KERN 0x10 /* lookup kernel page */
+
+ #ifdef CONFIG_PROC_FS
+ void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
+diff -uprN linux-2.6.15.orig/include/linux/mm_counter.h linux-2.6.15-ve025stab014/include/linux/mm_counter.h
+--- linux-2.6.15.orig/include/linux/mm_counter.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/mm_counter.h 2006-01-27 14:48:05.000000000 +0300
+@@ -0,0 +1,44 @@
++#ifndef __MM_COUNTER_H_
++#define __MM_COUNTER_H_
++#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
++/*
++ * The mm counters are not protected by its page_table_lock,
++ * so must be incremented atomically.
++ */
++#ifdef ATOMIC64_INIT
++#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
++#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
++#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
++#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
++#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
++#define dec_mm_counter_chk(mm, member, value) atomic64_add_negative(-(value), &(mm)->_##member)
++typedef atomic64_t mm_counter_t;
++#else /* !ATOMIC64_INIT */
++/*
++ * The counters wrap back to 0 at 2^32 * PAGE_SIZE,
++ * that is, at 16TB if using 4kB page size.
++ */
++#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
++#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
++#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
++#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
++#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
++#define dec_mm_counter_chk(mm, member, value) atomic_add_negative(-(value), &(mm)->_##member)
++typedef atomic_t mm_counter_t;
++#endif /* !ATOMIC64_INIT */
++
++#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
++/*
++ * The mm counters are protected by its page_table_lock,
++ * so can be incremented directly.
++ */
++#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
++#define get_mm_counter(mm, member) ((mm)->_##member)
++#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
++#define inc_mm_counter(mm, member) (mm)->_##member++
++#define dec_mm_counter(mm, member) (mm)->_##member--
++#define dec_mm_counter_chk(mm, member, value) (((mm)->_##member -= (value)) < 0)
++typedef unsigned long mm_counter_t;
++
++#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
++#endif
+diff -uprN linux-2.6.15.orig/include/linux/namei.h linux-2.6.15-ve025stab014/include/linux/namei.h
+--- linux-2.6.15.orig/include/linux/namei.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/namei.h 2006-01-27 14:48:08.000000000 +0300
+@@ -48,12 +48,15 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+ #define LOOKUP_PARENT 16
+ #define LOOKUP_NOALT 32
+ #define LOOKUP_REVAL 64
++#define LOOKUP_STRICT 128 /* no symlinks or other filesystems */
++
+ /*
+ * Intent data
+ */
+ #define LOOKUP_OPEN (0x0100)
+ #define LOOKUP_CREATE (0x0200)
+ #define LOOKUP_ACCESS (0x0400)
++#define LOOKUP_NOAREACHECK (0x0800) /* no area check on lookup */
+
+ extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
+ #define user_path_walk(name,nd) \
+diff -uprN linux-2.6.15.orig/include/linux/namespace.h linux-2.6.15-ve025stab014/include/linux/namespace.h
+--- linux-2.6.15.orig/include/linux/namespace.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/namespace.h 2006-01-27 14:48:08.000000000 +0300
+@@ -13,6 +13,8 @@ struct namespace {
+ int event;
+ };
+
++extern struct rw_semaphore namespace_sem;
++
+ extern int copy_namespace(int, struct task_struct *);
+ extern void __put_namespace(struct namespace *namespace);
+
+diff -uprN linux-2.6.15.orig/include/linux/netdevice.h linux-2.6.15-ve025stab014/include/linux/netdevice.h
+--- linux-2.6.15.orig/include/linux/netdevice.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netdevice.h 2006-01-27 14:48:08.000000000 +0300
+@@ -37,6 +37,7 @@
+ #include <linux/config.h>
+ #include <linux/device.h>
+ #include <linux/percpu.h>
++#include <linux/ctype.h>
+
+ struct divert_blk;
+ struct vlan_group;
+@@ -233,6 +234,11 @@ enum netdev_state_t
+ __LINK_STATE_LINKWATCH_PENDING
+ };
+
++struct netdev_bc {
++ struct user_beancounter *exec_ub, *owner_ub;
++};
++
++#define netdev_bc(dev) (&(dev)->dev_bc)
+
+ /*
+ * This structure holds at boot time configured netdevice settings. They
+@@ -309,6 +315,7 @@ struct net_device
+ #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
+ #define NETIF_F_LLTX 4096 /* LockLess TX */
+ #define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/
++#define NETIF_F_VENET 0x80000000 /* Device is VENET device */
+
+ struct net_device *next_sched;
+
+@@ -431,6 +438,7 @@ struct net_device
+ enum { NETREG_UNINITIALIZED=0,
+ NETREG_REGISTERING, /* called register_netdevice */
+ NETREG_REGISTERED, /* completed register todo */
++ NETREG_REGISTER_ERR, /* register todo failed */
+ NETREG_UNREGISTERING, /* called unregister_netdevice */
+ NETREG_UNREGISTERED, /* completed unregister todo */
+ NETREG_RELEASED, /* called free_netdev */
+@@ -500,8 +508,18 @@ struct net_device
+ struct divert_blk *divert;
+ #endif /* CONFIG_NET_DIVERT */
+
++ unsigned orig_mtu; /* MTU value before move to VE */
++ struct ve_struct *owner_env; /* Owner VE of the interface */
++ struct netdev_bc dev_bc;
++
+ /* class/net/name entry */
+ struct class_device class_dev;
++
++#ifdef CONFIG_VE
++ /* List entry in global devices list to keep track of their names
++ * assignment */
++ struct list_head dev_global_list_entry;
++#endif
+ };
+
+ #define NETDEV_ALIGN 32
+@@ -535,9 +553,22 @@ struct packet_type {
+ #include <linux/notifier.h>
+
+ extern struct net_device loopback_dev; /* The loopback */
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define visible_loopback_dev (*get_exec_env()->_loopback_dev)
++#define dev_base (get_exec_env()->_net_dev_base)
++#define visible_dev_head(x) (&(x)->_net_dev_head)
++#define visible_dev_index_head(x) (&(x)->_net_dev_index_head)
++#else
++#define visible_loopback_dev loopback_dev
+ extern struct net_device *dev_base; /* All devices */
++#define visible_dev_head(x) NULL
++#define visible_dev_index_head(x) NULL
++#endif
+ extern rwlock_t dev_base_lock; /* Device list lock */
+
++struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env);
++struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env);
++
+ extern int netdev_boot_setup_check(struct net_device *dev);
+ extern unsigned long netdev_boot_base(const char *prefix, int unit);
+ extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr);
+@@ -554,6 +585,7 @@ extern int dev_alloc_name(struct net_de
+ extern int dev_open(struct net_device *dev);
+ extern int dev_close(struct net_device *dev);
+ extern int dev_queue_xmit(struct sk_buff *skb);
++extern int dev_set_mtu(struct net_device *dev, int new_mtu);
+ extern int register_netdevice(struct net_device *dev);
+ extern int unregister_netdevice(struct net_device *dev);
+ extern void free_netdev(struct net_device *dev);
+@@ -946,6 +978,29 @@ extern void dev_seq_stop(struct seq_file
+
+ extern void linkwatch_run_queue(void);
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static inline int ve_is_dev_movable(struct net_device *dev)
++{
++ if (strcmp(dev->name, "lo") == 0)
++ return 0;
++ else if ((strncmp(dev->name, "venet", 5) == 0) &&
++ (strlen(dev->name) > 5) && isdigit(dev->name[5]))
++ return 0;
++ else if ((strncmp(dev->name, "tun", 3) == 0) &&
++ (strlen(dev->name) > 3) && isdigit(dev->name[3]))
++ return 0;
++ else if ((strncmp(dev->name, "tap", 3) == 0) &&
++ (strlen(dev->name) > 3) && isdigit(dev->name[3]))
++ return 0;
++ return 1;
++}
++#else
++static inline int ve_is_dev_movable(struct net_device *dev)
++{
++ return 0;
++}
++#endif
++
+ #endif /* __KERNEL__ */
+
+ #endif /* _LINUX_DEV_H */
+diff -uprN linux-2.6.15.orig/include/linux/netfilter/nf_conntrack_ftp.h linux-2.6.15-ve025stab014/include/linux/netfilter/nf_conntrack_ftp.h
+--- linux-2.6.15.orig/include/linux/netfilter/nf_conntrack_ftp.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter/nf_conntrack_ftp.h 2006-01-27 14:48:08.000000000 +0300
+@@ -32,13 +32,22 @@ struct ip_conntrack_expect;
+
+ /* For NAT to hook in when we find a packet which describes what other
+ * connection we should expect. */
+-extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
++typedef unsigned int (*ip_nat_helper_ftp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ enum ip_ct_ftp_type type,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp,
+ u32 *seq);
++extern ip_nat_helper_ftp_hook ip_nat_ftp_hook;
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_nat_ftp_hook \
++ ((ip_nat_helper_ftp_hook) \
++ (get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook))
++#else
++#define ve_ip_nat_ftp_hook ip_nat_ftp_hook
++#endif
+ #endif /* __KERNEL__ */
+
+ #endif /* _NF_CONNTRACK_FTP_H */
+diff -uprN linux-2.6.15.orig/include/linux/netfilter.h linux-2.6.15-ve025stab014/include/linux/netfilter.h
+--- linux-2.6.15.orig/include/linux/netfilter.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter.h 2006-01-27 14:48:08.000000000 +0300
+@@ -39,6 +39,8 @@
+ #define NFC_ALTERED 0x8000
+ #endif
+
++#define NFC_IPT_MASK (0x00FFFFFF)
++
+ #ifdef __KERNEL__
+ #include <linux/config.h>
+ #ifdef CONFIG_NETFILTER
+@@ -107,12 +109,21 @@ struct nf_info
+ int nf_register_hook(struct nf_hook_ops *reg);
+ void nf_unregister_hook(struct nf_hook_ops *reg);
+
++int virt_nf_register_hook(struct nf_hook_ops *reg);
++int virt_nf_unregister_hook(struct nf_hook_ops *reg);
++
+ /* Functions to register get/setsockopt ranges (non-inclusive). You
+ need to check permissions yourself! */
+ int nf_register_sockopt(struct nf_sockopt_ops *reg);
+ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
+
++#ifdef CONFIG_VE_IPTABLES
++#define ve_nf_hooks \
++ ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
++#else
+ extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
++#define ve_nf_hooks nf_hooks
++#endif
+
+ /* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
+ * disappear once iptables is replaced with pkttables. Please DO NOT use them
+@@ -202,13 +213,13 @@ __ret;})
+ #else
+ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
+ ({int __ret; \
+-if (list_empty(&nf_hooks[pf][hook]) || \
++if (list_empty(&ve_nf_hooks[pf][hook]) || \
+ (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN)) == 1) \
+ __ret = (okfn)(skb); \
+ __ret;})
+ #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
+ ({int __ret; \
+-if (list_empty(&nf_hooks[pf][hook]) || \
++if (list_empty(&ve_nf_hooks[pf][hook]) || \
+ (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1) \
+ __ret = (okfn)(skb); \
+ __ret;})
+diff -uprN linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack.h
+--- linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack.h 2006-01-27 14:48:08.000000000 +0300
+@@ -71,6 +71,11 @@ do { \
+
+ struct ip_conntrack_helper;
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/ve.h>
++#include <linux/ve_owner.h>
++#endif
++
+ struct ip_conntrack
+ {
+ /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
+@@ -122,8 +127,15 @@ struct ip_conntrack
+ /* Traversed often, so hopefully in different cacheline to top */
+ /* These are my tuples; original and reply */
+ struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_struct *ct_owner_env;
++#endif
+ };
+
++#ifdef CONFIG_VE_IPTABLES
++DCL_VE_OWNER_PROTO(CT, struct ip_conntrack, ct_owner_env)
++#endif
++
+ struct ip_conntrack_expect
+ {
+ /* Internal linked list (global expectation list) */
+@@ -235,7 +247,15 @@ extern void ip_conntrack_tcp_update(stru
+ enum ip_conntrack_dir dir);
+
+ /* Call me when a conntrack is destroyed. */
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_conntrack_destroyed \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_destroyed)
++#else
+ extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
++#define ve_ip_conntrack_destroyed ip_conntrack_destroyed
++#endif
++
+
+ /* Fake conntrack entry for untracked connections */
+ extern struct ip_conntrack ip_conntrack_untracked;
+@@ -264,7 +284,7 @@ extern void ip_conntrack_proto_put(struc
+ extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
+
+ extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
+- struct ip_conntrack_tuple *);
++ struct ip_conntrack_tuple *, struct user_beancounter *);
+
+ extern void ip_conntrack_free(struct ip_conntrack *ct);
+
+@@ -294,6 +314,7 @@ static inline int is_dying(struct ip_con
+ }
+
+ extern unsigned int ip_conntrack_htable_size;
++extern int ip_conntrack_disable_ve0;
+
+ #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
+
+diff -uprN linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack_core.h
+--- linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack_core.h 2006-01-27 14:48:08.000000000 +0300
+@@ -3,7 +3,6 @@
+ #include <linux/netfilter.h>
+
+ #define MAX_IP_CT_PROTO 256
+-extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+
+ /* This header is used to share core functionality between the
+ standalone connection tracking module, and the compatibility layer's use
+@@ -54,8 +53,26 @@ static inline int ip_conntrack_confirm(s
+
+ extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp);
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_ct_protos \
++ (get_exec_env()->_ip_conntrack->_ip_ct_protos)
++#define ve_ip_conntrack_hash \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_hash)
++#define ve_ip_conntrack_expect_list \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_expect_list)
++#define ve_ip_conntrack_vmalloc \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_vmalloc)
++#else
++extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+ extern struct list_head *ip_conntrack_hash;
+ extern struct list_head ip_conntrack_expect_list;
++#define ve_ip_ct_protos ip_ct_protos
++#define ve_ip_conntrack_hash ip_conntrack_hash
++#define ve_ip_conntrack_expect_list ip_conntrack_expect_list
++#define ve_ip_conntrack_vmalloc ip_conntrack_vmalloc
++#endif /* CONFIG_VE_IPTABLES */
++
+ extern rwlock_t ip_conntrack_lock;
+ #endif /* _IP_CONNTRACK_CORE_H */
+
+diff -uprN linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+--- linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack_helper.h 2006-01-27 14:48:08.000000000 +0300
+@@ -31,6 +31,9 @@ struct ip_conntrack_helper
+ extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
+ extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
+
++extern int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *);
++extern void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
++
+ /* Allocate space for an expectation: this is mandatory before calling
+ ip_conntrack_expect_related. You will have to call put afterwards. */
+ extern struct ip_conntrack_expect *
+@@ -41,4 +44,5 @@ extern void ip_conntrack_expect_put(stru
+ extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
+ extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
+
++extern struct list_head helpers;
+ #endif /*_IP_CONNTRACK_HELPER_H*/
+diff -uprN linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack_irc.h
+--- linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack_irc.h 2006-01-27 14:48:08.000000000 +0300
+@@ -14,16 +14,26 @@
+ #ifndef _IP_CONNTRACK_IRC_H
+ #define _IP_CONNTRACK_IRC_H
+
++#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++
+ /* This structure exists only once per master */
+ struct ip_ct_irc_master {
+ };
+
+ #ifdef __KERNEL__
+-extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
+- enum ip_conntrack_info ctinfo,
+- unsigned int matchoff,
+- unsigned int matchlen,
+- struct ip_conntrack_expect *exp);
++typedef unsigned int (*ip_nat_helper_irc_hook)(struct sk_buff **,
++ enum ip_conntrack_info, unsigned int, unsigned int,
++ struct ip_conntrack_expect *);
++
++extern ip_nat_helper_irc_hook ip_nat_irc_hook;
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_nat_irc_hook \
++ ((ip_nat_helper_irc_hook) \
++ (get_exec_env()->_ip_conntrack->_ip_nat_irc_hook))
++#else
++#define ve_ip_nat_irc_hook ip_nat_irc_hook
++#endif
+
+ #define IRC_PORT 6667
+
+diff -uprN linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+--- linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_conntrack_protocol.h 2006-01-27 14:48:08.000000000 +0300
+@@ -67,6 +67,7 @@ struct ip_conntrack_protocol
+ /* Protocol registration. */
+ extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
+ extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
++
+ /* Existing built-in protocols */
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
+@@ -74,6 +75,41 @@ extern struct ip_conntrack_protocol ip_c
+ extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
+ extern int ip_conntrack_protocol_tcp_init(void);
+
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_SYSCTL)
++#include <linux/sched.h>
++#define ve_ip_ct_tcp_timeouts \
++ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeouts)
++#define ve_ip_ct_udp_timeout \
++ (get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout)
++#define ve_ip_ct_udp_timeout_stream \
++ (get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout_stream)
++#define ve_ip_ct_icmp_timeout \
++ (get_exec_env()->_ip_conntrack->_ip_ct_icmp_timeout)
++#define ve_ip_ct_generic_timeout \
++ (get_exec_env()->_ip_conntrack->_ip_ct_generic_timeout)
++#define ve_ip_ct_log_invalid \
++ (get_exec_env()->_ip_conntrack->_ip_ct_log_invalid)
++#define ve_ip_ct_tcp_timeout_max_retrans \
++ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeout_max_retrans)
++#define ve_ip_ct_tcp_loose \
++ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_loose)
++#define ve_ip_ct_tcp_be_liberal \
++ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_be_liberal)
++#define ve_ip_ct_tcp_max_retrans \
++ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_max_retrans)
++#else
++#define ve_ip_ct_tcp_timeouts *tcp_timeouts
++#define ve_ip_ct_udp_timeout ip_ct_udp_timeout
++#define ve_ip_ct_udp_timeout_stream ip_ct_udp_timeout_stream
++#define ve_ip_ct_icmp_timeout ip_ct_icmp_timeout
++#define ve_ip_ct_generic_timeout ip_ct_generic_timeout
++#define ve_ip_ct_log_invalid ip_ct_log_invalid
++#define ve_ip_ct_tcp_timeout_max_retrans ip_ct_tcp_timeout_max_retrans
++#define ve_ip_ct_tcp_loose ip_ct_tcp_loose
++#define ve_ip_ct_tcp_be_liberal ip_ct_tcp_be_liberal
++#define ve_ip_ct_tcp_max_retrans ip_ct_tcp_max_retrans
++#endif
++
+ /* Log invalid packets */
+ extern unsigned int ip_ct_log_invalid;
+
+@@ -85,10 +121,10 @@ extern int ip_ct_port_nfattr_to_tuple(st
+ #ifdef CONFIG_SYSCTL
+ #ifdef DEBUG_INVALID_PACKETS
+ #define LOG_INVALID(proto) \
+- (ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
++ (ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW)
+ #else
+ #define LOG_INVALID(proto) \
+- ((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
++ ((ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW) \
+ && net_ratelimit())
+ #endif
+ #else
+diff -uprN linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_nat_core.h linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_nat_core.h
+--- linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_nat_core.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_nat_core.h 2006-01-27 14:48:08.000000000 +0300
+@@ -15,4 +15,5 @@ extern int ip_nat_icmp_reply_translation
+ struct ip_conntrack *ct,
+ enum ip_nat_manip_type manip,
+ enum ip_conntrack_dir dir);
++
+ #endif /* _IP_NAT_CORE_H */
+diff -uprN linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_nat_rule.h linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_nat_rule.h
+--- linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_nat_rule.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_nat_rule.h 2006-01-27 14:48:08.000000000 +0300
+@@ -6,7 +6,7 @@
+
+ #ifdef __KERNEL__
+
+-extern int ip_nat_rule_init(void) __init;
++extern int ip_nat_rule_init(void);
+ extern void ip_nat_rule_cleanup(void);
+ extern int ip_nat_rule_find(struct sk_buff **pskb,
+ unsigned int hooknum,
+diff -uprN linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_tables.h
+--- linux-2.6.15.orig/include/linux/netfilter_ipv4/ip_tables.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netfilter_ipv4/ip_tables.h 2006-01-27 14:48:08.000000000 +0300
+@@ -430,9 +430,15 @@ struct ipt_target
+ extern int ipt_register_target(struct ipt_target *target);
+ extern void ipt_unregister_target(struct ipt_target *target);
+
++extern int virt_ipt_register_target(struct ipt_target *target);
++extern void virt_ipt_unregister_target(struct ipt_target *target);
++
+ extern int ipt_register_match(struct ipt_match *match);
+ extern void ipt_unregister_match(struct ipt_match *match);
+
++extern int virt_ipt_register_match(struct ipt_match *match);
++extern void virt_ipt_unregister_match(struct ipt_match *match);
++
+ /* Furniture shopping... */
+ struct ipt_table
+ {
+@@ -486,6 +492,10 @@ extern unsigned int ipt_do_table(struct
+ struct ipt_table *table,
+ void *userdata);
+
++extern struct ipt_table *virt_ipt_register_table(struct ipt_table *table,
++ const struct ipt_replace *repl);
++extern void virt_ipt_unregister_table(struct ipt_table *table);
++
+ #define IPT_ALIGN(s) (((s) + (__alignof__(struct ipt_entry)-1)) & ~(__alignof__(struct ipt_entry)-1))
+ #endif /*__KERNEL__*/
+ #endif /* _IPTABLES_H */
+diff -uprN linux-2.6.15.orig/include/linux/netlink.h linux-2.6.15-ve025stab014/include/linux/netlink.h
+--- linux-2.6.15.orig/include/linux/netlink.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/netlink.h 2006-01-27 14:48:05.000000000 +0300
+@@ -160,7 +160,8 @@ extern int netlink_unregister_notifier(s
+
+ /* finegrained unicast helpers: */
+ struct sock *netlink_getsockbyfilp(struct file *filp);
+-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo);
++int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
++ long timeo, struct sock *ssk);
+ void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
+ int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
+
+diff -uprN linux-2.6.15.orig/include/linux/nfcalls.h linux-2.6.15-ve025stab014/include/linux/nfcalls.h
+--- linux-2.6.15.orig/include/linux/nfcalls.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/nfcalls.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,210 @@
++/*
++ * include/linux/nfcalls.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_NFCALLS_H
++#define _LINUX_NFCALLS_H
++
++#include <linux/rcupdate.h>
++
++extern struct module no_module;
++
++#define DECL_KSYM_MODULE(name) \
++ extern struct module *vz_mod_##name
++#define DECL_KSYM_CALL(type, name, args) \
++ extern type (*vz_##name) args
++
++#define INIT_KSYM_MODULE(name) \
++ struct module *vz_mod_##name = &no_module; \
++ EXPORT_SYMBOL(vz_mod_##name)
++#define INIT_KSYM_CALL(type, name, args) \
++ type (*vz_##name) args; \
++ EXPORT_SYMBOL(vz_##name)
++
++#define __KSYMERRCALL(err, type, mod, name, args) \
++({ \
++ type ret = (type)err; \
++ if (!__vzksym_module_get(vz_mod_##mod)) { \
++ if (vz_##name) \
++ ret = ((*vz_##name)args); \
++ __vzksym_module_put(vz_mod_##mod); \
++ } \
++ ret; \
++})
++#define __KSYMSAFECALL_VOID(mod, name, args) \
++do { \
++ if (!__vzksym_module_get(vz_mod_##mod)) { \
++ if (vz_##name) \
++ ((*vz_##name)args); \
++ __vzksym_module_put(vz_mod_##mod); \
++ } \
++} while (0)
++
++#define KSYMERRCALL(err, mod, name, args) \
++ __KSYMERRCALL(err, int, mod, name, args)
++#define KSYMSAFECALL(type, mod, name, args) \
++ __KSYMERRCALL(0, type, mod, name, args)
++#define KSYMSAFECALL_VOID(mod, name, args) \
++ __KSYMSAFECALL_VOID(mod, name, args)
++
++#ifdef CONFIG_VE
++/* should be called _after_ KSYMRESOLVE's */
++#define KSYMMODRESOLVE(name) \
++ __vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
++#define KSYMMODUNRESOLVE(name) \
++ __vzksym_modunresolve(&vz_mod_##name)
++
++#define KSYMRESOLVE(name) \
++ vz_##name = &name
++#define KSYMUNRESOLVE(name) \
++ vz_##name = NULL
++#else
++#define KSYMRESOLVE(name) do { } while (0)
++#define KSYMUNRESOLVE(name) do { } while (0)
++#define KSYMMODRESOLVE(name) do { } while (0)
++#define KSYMMODUNRESOLVE(name) do { } while (0)
++#endif
++
++static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
++{
++ /*
++ * we want to be sure, that pointer updates are visible first:
++ * 1. wmb() is here only for piece of sure
++ * (note, no rmb() in KSYMSAFECALL)
++ * 2. synchronize_sched() guarantees that updates are visible
++ * on all cpus and allows us to remove rmb() in KSYMSAFECALL
++ */
++ wmb(); synchronize_sched();
++ *modp = mod;
++ /* just to be sure, our changes are visible as soon as possible */
++ wmb(); synchronize_sched();
++}
++
++static inline void __vzksym_modunresolve(struct module **modp)
++{
++ /*
++ * try_module_get() in KSYMSAFECALL should fail at this moment since
++ * THIS_MODULE in in unloading state (we should be called from fini),
++ * no need to syncronize pointers/ve_module updates.
++ */
++ *modp = &no_module;
++ /*
++ * synchronize_sched() guarantees here that we see
++ * updated module pointer before the module really gets away
++ */
++ synchronize_sched();
++}
++
++static inline int __vzksym_module_get(struct module *mod)
++{
++ /*
++ * we want to avoid rmb(), so use synchronize_sched() in KSYMUNRESOLVE
++ * and smp_read_barrier_depends() here...
++ */
++ smp_read_barrier_depends(); /* for module loading */
++ if (!try_module_get(mod))
++ return -EBUSY;
++
++ return 0;
++}
++
++static inline void __vzksym_module_put(struct module *mod)
++{
++ module_put(mod);
++}
++
++#if defined(CONFIG_VE_IPTABLES)
++DECL_KSYM_MODULE(ip_tables);
++DECL_KSYM_MODULE(iptable_filter);
++DECL_KSYM_MODULE(iptable_mangle);
++DECL_KSYM_MODULE(ipt_limit);
++DECL_KSYM_MODULE(ipt_multiport);
++DECL_KSYM_MODULE(ipt_tos);
++DECL_KSYM_MODULE(ipt_TOS);
++DECL_KSYM_MODULE(ipt_REJECT);
++DECL_KSYM_MODULE(ipt_TCPMSS);
++DECL_KSYM_MODULE(ipt_tcpmss);
++DECL_KSYM_MODULE(ipt_ttl);
++DECL_KSYM_MODULE(ipt_LOG);
++DECL_KSYM_MODULE(ipt_length);
++DECL_KSYM_MODULE(ip_conntrack);
++DECL_KSYM_MODULE(ip_conntrack_ftp);
++DECL_KSYM_MODULE(ip_conntrack_irc);
++DECL_KSYM_MODULE(ipt_conntrack);
++DECL_KSYM_MODULE(ipt_state);
++DECL_KSYM_MODULE(ipt_helper);
++DECL_KSYM_MODULE(ip_nat);
++DECL_KSYM_MODULE(iptable_nat);
++DECL_KSYM_MODULE(ip_nat_ftp);
++DECL_KSYM_MODULE(ip_nat_irc);
++
++struct sk_buff;
++
++DECL_KSYM_CALL(int, init_netfilter, (void));
++DECL_KSYM_CALL(int, init_iptables, (void));
++DECL_KSYM_CALL(int, init_iptable_filter, (void));
++DECL_KSYM_CALL(int, init_iptable_mangle, (void));
++DECL_KSYM_CALL(int, init_iptable_limit, (void));
++DECL_KSYM_CALL(int, init_iptable_multiport, (void));
++DECL_KSYM_CALL(int, init_iptable_tos, (void));
++DECL_KSYM_CALL(int, init_iptable_TOS, (void));
++DECL_KSYM_CALL(int, init_iptable_REJECT, (void));
++DECL_KSYM_CALL(int, init_iptable_TCPMSS, (void));
++DECL_KSYM_CALL(int, init_iptable_tcpmss, (void));
++DECL_KSYM_CALL(int, init_iptable_ttl, (void));
++DECL_KSYM_CALL(int, init_iptable_LOG, (void));
++DECL_KSYM_CALL(int, init_iptable_length, (void));
++DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
++DECL_KSYM_CALL(int, init_iptable_ftp, (void));
++DECL_KSYM_CALL(int, init_iptable_irc, (void));
++DECL_KSYM_CALL(int, init_iptable_conntrack_match, (void));
++DECL_KSYM_CALL(int, init_iptable_state, (void));
++DECL_KSYM_CALL(int, init_iptable_helper, (void));
++DECL_KSYM_CALL(int, ip_nat_init, (void));
++DECL_KSYM_CALL(int, init_iptable_nat, (void));
++DECL_KSYM_CALL(int, init_iptable_nat_ftp, (void));
++DECL_KSYM_CALL(int, init_iptable_nat_irc, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat_irc, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat, (void));
++DECL_KSYM_CALL(void, ip_nat_cleanup, (void));
++DECL_KSYM_CALL(void, fini_iptable_helper, (void));
++DECL_KSYM_CALL(void, fini_iptable_state, (void));
++DECL_KSYM_CALL(void, fini_iptable_conntrack_match, (void));
++DECL_KSYM_CALL(void, fini_iptable_irc, (void));
++DECL_KSYM_CALL(void, fini_iptable_ftp, (void));
++DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
++DECL_KSYM_CALL(void, fini_iptable_length, (void));
++DECL_KSYM_CALL(void, fini_iptable_LOG, (void));
++DECL_KSYM_CALL(void, fini_iptable_ttl, (void));
++DECL_KSYM_CALL(void, fini_iptable_tcpmss, (void));
++DECL_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
++DECL_KSYM_CALL(void, fini_iptable_REJECT, (void));
++DECL_KSYM_CALL(void, fini_iptable_TOS, (void));
++DECL_KSYM_CALL(void, fini_iptable_tos, (void));
++DECL_KSYM_CALL(void, fini_iptable_multiport, (void));
++DECL_KSYM_CALL(void, fini_iptable_limit, (void));
++DECL_KSYM_CALL(void, fini_iptable_filter, (void));
++DECL_KSYM_CALL(void, fini_iptable_mangle, (void));
++DECL_KSYM_CALL(void, fini_iptables, (void));
++DECL_KSYM_CALL(void, fini_netfilter, (void));
++
++DECL_KSYM_CALL(void, ipt_flush_table, (struct ipt_table *table));
++#endif /* CONFIG_VE_IPTABLES */
++
++#ifdef CONFIG_VE_CALLS_MODULE
++DECL_KSYM_MODULE(vzmon);
++DECL_KSYM_CALL(int, real_get_device_perms_ve,
++ (int dev_type, dev_t dev, int access_mode));
++DECL_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
++DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
++DECL_KSYM_CALL(void, real_update_load_avg_ve, (void));
++#endif
++
++#endif /* _LINUX_NFCALLS_H */
+diff -uprN linux-2.6.15.orig/include/linux/notifier.h linux-2.6.15-ve025stab014/include/linux/notifier.h
+--- linux-2.6.15.orig/include/linux/notifier.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/notifier.h 2006-01-27 14:48:06.000000000 +0300
+@@ -27,8 +27,9 @@ extern int notifier_call_chain(struct no
+
+ #define NOTIFY_DONE 0x0000 /* Don't care */
+ #define NOTIFY_OK 0x0001 /* Suits me */
++#define NOTIFY_FAIL 0x0002 /* Reject */
+ #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */
+-#define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) /* Bad/Veto action */
++#define NOTIFY_BAD (NOTIFY_STOP_MASK|NOTIFY_FAIL) /* Bad/Veto action */
+ /*
+ * Clean way to return from the notifier and stop further calls.
+ */
+diff -uprN linux-2.6.15.orig/include/linux/pid.h linux-2.6.15-ve025stab014/include/linux/pid.h
+--- linux-2.6.15.orig/include/linux/pid.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/pid.h 2006-01-27 14:48:08.000000000 +0300
+@@ -1,6 +1,18 @@
+ #ifndef _LINUX_PID_H
+ #define _LINUX_PID_H
+
++#define VPID_BIT 10
++#define VPID_DIV (1<<VPID_BIT)
++
++#ifdef CONFIG_VE
++#define __is_virtual_pid(pid) ((pid) & VPID_DIV)
++#define is_virtual_pid(pid) \
++ (__is_virtual_pid(pid) || ((pid)==1 && !ve_is_super(get_exec_env())))
++#else
++#define __is_virtual_pid(pid) 0
++#define is_virtual_pid(pid) 0
++#endif
++
+ enum pid_type
+ {
+ PIDTYPE_PID,
+@@ -15,6 +27,9 @@ struct pid
+ /* Try to keep pid_chain in the same cacheline as nr for find_pid */
+ int nr;
+ struct hlist_node pid_chain;
++#ifdef CONFIG_VE
++ int vnr;
++#endif
+ /* list of pids with the same nr, only one of them is in the hash */
+ struct list_head pid_list;
+ };
+@@ -40,16 +55,89 @@ extern int alloc_pidmap(void);
+ extern void FASTCALL(free_pidmap(int));
+ extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
+
+-#define do_each_task_pid(who, type, task) \
+- if ((task = find_task_by_pid_type(type, who))) { \
++#ifndef CONFIG_VE
++
++#define vpid_to_pid(pid) (pid)
++#define __vpid_to_pid(pid) (pid)
++#define pid_type_to_vpid(type, pid) (pid)
++#define __pid_type_to_vpid(type, pid) (pid)
++
++#define comb_vpid_to_pid(pid) (pid)
++#define comb_pid_to_vpid(pid) (pid)
++
++#else
++
++struct ve_struct;
++extern void free_vpid(int vpid, struct ve_struct *ve);
++extern int alloc_vpid(int pid, int vpid);
++extern int vpid_to_pid(int pid);
++extern int __vpid_to_pid(int pid);
++extern pid_t pid_type_to_vpid(int type, pid_t pid);
++extern pid_t _pid_type_to_vpid(int type, pid_t pid);
++
++static inline int comb_vpid_to_pid(int vpid)
++{
++ int pid = vpid;
++
++ if (vpid > 0) {
++ pid = vpid_to_pid(vpid);
++ if (unlikely(pid < 0))
++ return 0;
++ } else if (vpid < 0) {
++ pid = vpid_to_pid(-vpid);
++ if (unlikely(pid < 0))
++ return 0;
++ pid = -pid;
++ }
++ return pid;
++}
++
++static inline int comb_pid_to_vpid(int pid)
++{
++ int vpid = pid;
++
++ if (pid > 0) {
++ vpid = pid_type_to_vpid(PIDTYPE_PID, pid);
++ if (unlikely(vpid < 0))
++ return 0;
++ } else if (pid < 0) {
++ vpid = pid_type_to_vpid(PIDTYPE_PGID, -pid);
++ if (unlikely(vpid < 0))
++ return 0;
++ vpid = -vpid;
++ }
++ return vpid;
++}
++#endif
++
++#define do_each_task_pid_all(who, type, task) \
++ if ((task = find_task_by_pid_type_all(type, who))) { \
+ prefetch((task)->pids[type].pid_list.next); \
+ do {
+
+-#define while_each_task_pid(who, type, task) \
++#define while_each_task_pid_all(who, type, task) \
+ } while (task = pid_task((task)->pids[type].pid_list.next,\
+ type), \
+ prefetch((task)->pids[type].pid_list.next), \
+ hlist_unhashed(&(task)->pids[type].pid_chain)); \
+ } \
+
++#ifndef CONFIG_VE
++#define __do_each_task_pid_ve(who, type, task, owner) \
++ do_each_task_pid_all(who, type, task)
++#define __while_each_task_pid_ve(who, type, task, owner) \
++ while_each_task_pid_all(who, type, task)
++#else /* CONFIG_VE */
++#define __do_each_task_pid_ve(who, type, task, owner) \
++ do_each_task_pid_all(who, type, task) \
++ if (ve_accessible(VE_TASK_INFO(task)->owner_env, owner))
++#define __while_each_task_pid_ve(who, type, task, owner) \
++ while_each_task_pid_all(who, type, task)
++#endif /* CONFIG_VE */
++
++#define do_each_task_pid_ve(who, type, task) \
++ __do_each_task_pid_ve(who, type, task, get_exec_env());
++#define while_each_task_pid_ve(who, type, task) \
++ __while_each_task_pid_ve(who, type, task, get_exec_env());
++
+ #endif /* _LINUX_PID_H */
+diff -uprN linux-2.6.15.orig/include/linux/proc_fs.h linux-2.6.15-ve025stab014/include/linux/proc_fs.h
+--- linux-2.6.15.orig/include/linux/proc_fs.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/proc_fs.h 2006-01-27 14:48:08.000000000 +0300
+@@ -86,8 +86,14 @@ struct vmcore {
+
+ extern struct proc_dir_entry proc_root;
+ extern struct proc_dir_entry *proc_root_fs;
++#ifdef CONFIG_VE
++#include <linux/sched.h>
++#define proc_net (get_exec_env()->_proc_net)
++#define proc_net_stat (get_exec_env()->_proc_net_stat)
++#else
+ extern struct proc_dir_entry *proc_net;
+ extern struct proc_dir_entry *proc_net_stat;
++#endif
+ extern struct proc_dir_entry *proc_bus;
+ extern struct proc_dir_entry *proc_root_driver;
+ extern struct proc_dir_entry *proc_root_kcore;
+@@ -98,8 +104,8 @@ extern void proc_misc_init(void);
+ struct mm_struct;
+
+ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+-struct dentry *proc_pid_unhash(struct task_struct *p);
+-void proc_pid_flush(struct dentry *proc_dentry);
++void proc_pid_unhash(struct task_struct *p, struct dentry * [2]);
++void proc_pid_flush(struct dentry *proc_dentry[2]);
+ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+ unsigned long task_vsize(struct mm_struct *);
+ int task_statm(struct mm_struct *, int *, int *, int *, int *);
+@@ -107,7 +113,11 @@ char *task_mem(struct mm_struct *, char
+
+ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+ struct proc_dir_entry *parent);
++extern struct proc_dir_entry *create_proc_glob_entry(const char *name,
++ mode_t mode,
++ struct proc_dir_entry *parent);
+ extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
++extern void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent);
+
+ extern struct vfsmount *proc_mnt;
+ extern int proc_fill_super(struct super_block *,void *,int);
+@@ -189,6 +199,15 @@ static inline struct proc_dir_entry *pro
+ return res;
+ }
+
++static inline struct proc_dir_entry *proc_glob_fops_create(const char *name,
++ mode_t mode, struct file_operations *fops)
++{
++ struct proc_dir_entry *res = create_proc_glob_entry(name, mode, NULL);
++ if (res)
++ res->proc_fops = fops;
++ return res;
++}
++
+ static inline void proc_net_remove(const char *name)
+ {
+ remove_proc_entry(name,proc_net);
+@@ -201,16 +220,21 @@ static inline void proc_net_remove(const
+ #define proc_bus NULL
+
+ #define proc_net_fops_create(name, mode, fops) ({ (void)(mode), NULL; })
++#define proc_glob_fops_create(name, mode, fops) ({ (void)(mode), NULL; })
+ #define proc_net_create(name, mode, info) ({ (void)(mode), NULL; })
+ static inline void proc_net_remove(const char *name) {}
+
+-static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; }
+-static inline void proc_pid_flush(struct dentry *proc_dentry) { }
++static inline struct dentry *proc_pid_unhash(struct task_struct *p,
++ struct dentry *d[2]) { return NULL; }
++static inline void proc_pid_flush(struct dentry *proc_dentry[2]) { }
+
+ static inline struct proc_dir_entry *create_proc_entry(const char *name,
+ mode_t mode, struct proc_dir_entry *parent) { return NULL; }
++static inline struct proc_dir_entry *create_proc_glob_entry(const char *name,
++ mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+
+ #define remove_proc_entry(name, parent) do {} while (0)
++#define remove_proc_glob_entry(name, parent) do {} while (0)
+
+ static inline struct proc_dir_entry *proc_symlink(const char *name,
+ struct proc_dir_entry *parent,const char *dest) {return NULL;}
+@@ -261,4 +285,18 @@ static inline struct proc_dir_entry *PDE
+ return PROC_I(inode)->pde;
+ }
+
++static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
++{
++ if (de)
++ atomic_inc(&de->count);
++ return de;
++}
++
++extern void de_put(struct proc_dir_entry *);
++
++#define LPDE(inode) (PROC_I((inode))->pde)
++#ifdef CONFIG_VE
++#define GPDE(inode) (*(struct proc_dir_entry **)(&(inode)->i_pipe))
++#endif
++
+ #endif /* _LINUX_PROC_FS_H */
+diff -uprN linux-2.6.15.orig/include/linux/quota.h linux-2.6.15-ve025stab014/include/linux/quota.h
+--- linux-2.6.15.orig/include/linux/quota.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/quota.h 2006-01-27 14:48:09.000000000 +0300
+@@ -37,7 +37,6 @@
+
+ #include <linux/errno.h>
+ #include <linux/types.h>
+-#include <linux/spinlock.h>
+
+ #define __DQUOT_VERSION__ "dquot_6.5.1"
+ #define __DQUOT_NUM_VERSION__ 6*10000+5*100+1
+@@ -45,8 +44,6 @@
+ typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+ typedef __u64 qsize_t; /* Type in which we store sizes */
+
+-extern spinlock_t dq_data_lock;
+-
+ /* Size of blocks in which are counted size limits */
+ #define QUOTABLOCK_BITS 10
+ #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+@@ -133,6 +130,10 @@ struct if_dqinfo {
+
+ #ifdef __KERNEL__
+
++#include <linux/spinlock.h>
++
++extern spinlock_t dq_data_lock;
++
+ #include <linux/dqblk_xfs.h>
+ #include <linux/dqblk_v1.h>
+ #include <linux/dqblk_v2.h>
+@@ -242,6 +243,8 @@ struct quota_format_ops {
+ int (*release_dqblk)(struct dquot *dquot); /* Called when last reference to dquot is being dropped */
+ };
+
++struct inode;
++struct iattr;
+ /* Operations working with dquots */
+ struct dquot_operations {
+ int (*initialize) (struct inode *, int);
+@@ -256,9 +259,11 @@ struct dquot_operations {
+ int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */
+ int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */
+ int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */
++ int (*rename) (struct inode *, struct inode *, struct inode *);
+ };
+
+ /* Operations handling requests from userspace */
++struct v2_disk_dqblk;
+ struct quotactl_ops {
+ int (*quota_on)(struct super_block *, int, int, char *);
+ int (*quota_off)(struct super_block *, int);
+@@ -271,6 +276,9 @@ struct quotactl_ops {
+ int (*set_xstate)(struct super_block *, unsigned int, int);
+ int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
+ int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
++#ifdef CONFIG_QUOTA_COMPAT
++ int (*get_quoti)(struct super_block *, int, unsigned int, struct v2_disk_dqblk *);
++#endif
+ };
+
+ struct quota_format_type {
+@@ -291,6 +299,10 @@ struct quota_info {
+ struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */
+ struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
+ struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++ struct vz_quota_master *vzdq_master;
++ int vzdq_count;
++#endif
+ };
+
+ /* Inline would be better but we need to dereference super_block which is not defined yet */
+diff -uprN linux-2.6.15.orig/include/linux/quotaops.h linux-2.6.15-ve025stab014/include/linux/quotaops.h
+--- linux-2.6.15.orig/include/linux/quotaops.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/quotaops.h 2006-01-27 14:48:08.000000000 +0300
+@@ -171,6 +171,19 @@ static __inline__ int DQUOT_TRANSFER(str
+ return 0;
+ }
+
++static __inline__ int DQUOT_RENAME(struct inode *inode,
++ struct inode *old_dir, struct inode *new_dir)
++{
++ struct dquot_operations *q_op;
++
++ q_op = inode->i_sb->dq_op;
++ if (q_op && q_op->rename) {
++ if (q_op->rename(inode, old_dir, new_dir) == NO_QUOTA)
++ return 1;
++ }
++ return 0;
++}
++
+ /* The following two functions cannot be called inside a transaction */
+ #define DQUOT_SYNC(sb) sync_dquots(sb, -1)
+
+@@ -198,6 +211,7 @@ static __inline__ int DQUOT_OFF(struct s
+ #define DQUOT_SYNC(sb) do { } while(0)
+ #define DQUOT_OFF(sb) do { } while(0)
+ #define DQUOT_TRANSFER(inode, iattr) (0)
++#define DQUOT_RENAME(inode, old_dir, new_dir) (0)
+ static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+ {
+ inode_add_bytes(inode, nr);
+diff -uprN linux-2.6.15.orig/include/linux/sched.h linux-2.6.15-ve025stab014/include/linux/sched.h
+--- linux-2.6.15.orig/include/linux/sched.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/sched.h 2006-01-27 14:48:09.000000000 +0300
+@@ -37,7 +37,10 @@
+
+ #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
+
++#include <ub/ub_task.h>
++
+ struct exec_domain;
++struct ve_struct;
+
+ /*
+ * cloning flags:
+@@ -91,15 +94,34 @@ extern unsigned long avenrun[]; /* Load
+ load += n*(FIXED_1-exp); \
+ load >>= FSHIFT;
+
++#define LOAD_INT(x) ((x) >> FSHIFT)
++#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
++
+ extern unsigned long total_forks;
+ extern int nr_threads;
+ extern int last_pid;
+ DECLARE_PER_CPU(unsigned long, process_counts);
+ extern int nr_processes(void);
++
++extern unsigned long nr_sleeping(void);
++extern unsigned long nr_stopped(void);
++extern unsigned long nr_zombie;
++extern unsigned long nr_dead;
+ extern unsigned long nr_running(void);
+ extern unsigned long nr_uninterruptible(void);
+ extern unsigned long nr_iowait(void);
+
++#ifdef CONFIG_VE
++struct ve_struct;
++extern unsigned long nr_running_ve(struct ve_struct *);
++extern unsigned long nr_iowait_ve(struct ve_struct *);
++extern unsigned long nr_uninterruptible_ve(struct ve_struct *);
++#else
++#define nr_running_ve(ve) 0
++#define nr_iowait_ve(ve) 0
++#define nr_uninterruptible_ve(ve) 0
++#endif
++
+ #include <linux/time.h>
+ #include <linux/param.h>
+ #include <linux/resource.h>
+@@ -249,44 +271,7 @@ arch_get_unmapped_area_topdown(struct fi
+ extern void arch_unmap_area(struct mm_struct *, unsigned long);
+ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+
+-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+-/*
+- * The mm counters are not protected by its page_table_lock,
+- * so must be incremented atomically.
+- */
+-#ifdef ATOMIC64_INIT
+-#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
+-#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
+-#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
+-#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
+-#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
+-typedef atomic64_t mm_counter_t;
+-#else /* !ATOMIC64_INIT */
+-/*
+- * The counters wrap back to 0 at 2^32 * PAGE_SIZE,
+- * that is, at 16TB if using 4kB page size.
+- */
+-#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
+-#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
+-#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
+-#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
+-#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
+-typedef atomic_t mm_counter_t;
+-#endif /* !ATOMIC64_INIT */
+-
+-#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+-/*
+- * The mm counters are protected by its page_table_lock,
+- * so can be incremented directly.
+- */
+-#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
+-#define get_mm_counter(mm, member) ((mm)->_##member)
+-#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
+-#define inc_mm_counter(mm, member) (mm)->_##member++
+-#define dec_mm_counter(mm, member) (mm)->_##member--
+-typedef unsigned long mm_counter_t;
+-
+-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
++#include <linux/mm_counter.h>
+
+ #define get_mm_rss(mm) \
+ (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
+@@ -341,6 +326,7 @@ struct mm_struct {
+ unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
+
+ unsigned dumpable:2;
++ unsigned vps_dumpable:1;
+ cpumask_t cpu_vm_mask;
+
+ /* Architecture-specific MM context */
+@@ -357,6 +343,8 @@ struct mm_struct {
+ /* aio bits */
+ rwlock_t ioctx_list_lock;
+ struct kioctx *ioctx_list;
++
++ struct user_beancounter *mm_ub;
+ };
+
+ struct sighand_struct {
+@@ -365,6 +353,9 @@ struct sighand_struct {
+ spinlock_t siglock;
+ };
+
++#include <linux/ve.h>
++#include <linux/ve_task.h>
++
+ /*
+ * NOTE! "signal_struct" does not have it's own
+ * locking, because a shared signal_struct always
+@@ -857,6 +848,16 @@ struct task_struct {
+ int cpuset_mems_generation;
+ #endif
+ atomic_t fs_excl; /* holding fs exclusive resources */
++#ifdef CONFIG_USER_RESOURCE
++ struct task_beancounter task_bc;
++#endif
++#ifdef CONFIG_VE
++ struct ve_task_info ve_task_info;
++#endif
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++ unsigned long magic;
++ struct inode *ino;
++#endif
+ };
+
+ static inline pid_t process_group(struct task_struct *tsk)
+@@ -948,6 +949,21 @@ static inline int set_cpus_allowed(task_
+ extern unsigned long long sched_clock(void);
+ extern unsigned long long current_sched_time(const task_t *current_task);
+
++static inline unsigned long cycles_to_clocks(cycles_t cycles)
++{
++ extern unsigned long cycles_per_clock;
++ do_div(cycles, cycles_per_clock);
++ return cycles;
++}
++
++static inline u64 cycles_to_jiffies(cycles_t cycles)
++{
++ extern unsigned long cycles_per_jiffy;
++ do_div(cycles, cycles_per_jiffy);
++ return cycles;
++}
++
++
+ /* sched_exec is called by processes performing an exec */
+ #ifdef CONFIG_SMP
+ extern void sched_exec(void);
+@@ -1000,12 +1016,227 @@ extern struct task_struct init_task;
+
+ extern struct mm_struct init_mm;
+
+-#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr)
+-extern struct task_struct *find_task_by_pid_type(int type, int pid);
++#define find_task_by_pid_all(nr) \
++ find_task_by_pid_type_all(PIDTYPE_PID, nr)
++extern struct task_struct *find_task_by_pid_type_all(int type, int pid);
+ extern void set_special_pids(pid_t session, pid_t pgrp);
+ extern void __set_special_pids(pid_t session, pid_t pgrp);
+
++#ifndef CONFIG_VE
++#define find_task_by_pid_ve find_task_by_pid_all
++
++#define get_exec_env() ((struct ve_struct *)NULL)
++#define set_exec_env(new_env) ((struct ve_struct *)NULL)
++
++#define ve_is_super(env) 1
++#define ve_accessible(target, owner) 1
++#define ve_accessible_strict(target, owner) 1
++#define ve_accessible_veid(target, owner) 1
++#define ve_accessible_strict_veid(target, owner) 1
++
++#define VEID(envid) 0
++#define get_ve0() NULL
++
++static inline pid_t virt_pid(struct task_struct *tsk)
++{
++ return tsk->pid;
++}
++
++static inline pid_t virt_tgid(struct task_struct *tsk)
++{
++ return tsk->tgid;
++}
++
++static inline pid_t virt_pgid(struct task_struct *tsk)
++{
++ return tsk->signal->pgrp;
++}
++
++static inline pid_t virt_sid(struct task_struct *tsk)
++{
++ return tsk->signal->session;
++}
++
++#define get_task_pid_ve(tsk, ve) get_task_pid(tsk)
++
++static inline pid_t get_task_pid(struct task_struct *tsk)
++{
++ return tsk->pid;
++}
++
++static inline pid_t get_task_tgid(struct task_struct *tsk)
++{
++ return tsk->tgid;
++}
++
++static inline pid_t get_task_pgid(struct task_struct *tsk)
++{
++ return tsk->signal->pgrp;
++}
++
++static inline pid_t get_task_sid(struct task_struct *tsk)
++{
++ return tsk->signal->session;
++}
++
++static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline pid_t get_task_ppid(struct task_struct *p)
++{
++ return pid_alive(p) ? p->group_leader->real_parent->tgid : 0;
++}
++
++#else /* CONFIG_VE */
++
++#include <asm/current.h>
++#include <linux/ve.h>
++
++extern struct ve_struct ve0;
++
++#define find_task_by_pid_ve(nr) \
++ find_task_by_pid_type_ve(PIDTYPE_PID, nr)
++
++extern struct task_struct *find_task_by_pid_type_ve(int type, int pid);
++
++#define get_ve0() (&ve0)
++#define VEID(envid) ((envid)->veid)
++
++#define get_exec_env() (VE_TASK_INFO(current)->exec_env)
++static inline struct ve_struct *set_exec_env(struct ve_struct *new_env)
++{
++ struct ve_struct *old_env;
++
++ old_env = VE_TASK_INFO(current)->exec_env;
++ VE_TASK_INFO(current)->exec_env = new_env;
++
++ return old_env;
++}
++
++#define ve_is_super(env) ((env) == get_ve0())
++#define ve_accessible_strict(target, owner) ((target) == (owner))
++static inline int ve_accessible(struct ve_struct *target,
++ struct ve_struct *owner) {
++ return ve_is_super(owner) || ve_accessible_strict(target, owner);
++}
++
++#define ve_accessible_strict_veid(target, owner) ((target) == (owner))
++static inline int ve_accessible_veid(envid_t target, envid_t owner)
++{
++ return get_ve0()->veid == owner ||
++ ve_accessible_strict_veid(target, owner);
++}
++
++static inline pid_t virt_pid(struct task_struct *tsk)
++{
++ return tsk->pids[PIDTYPE_PID].vnr;
++}
++
++static inline pid_t virt_tgid(struct task_struct *tsk)
++{
++ return tsk->pids[PIDTYPE_TGID].vnr;
++}
++
++static inline pid_t virt_pgid(struct task_struct *tsk)
++{
++ return tsk->pids[PIDTYPE_PGID].vnr;
++}
++
++static inline pid_t virt_sid(struct task_struct *tsk)
++{
++ return tsk->pids[PIDTYPE_SID].vnr;
++}
++
++static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *env)
++{
++ return ve_is_super(env) ? tsk->pid : virt_pid(tsk);
++}
++
++static inline pid_t get_task_pid(struct task_struct *tsk)
++{
++ return get_task_pid_ve(tsk, get_exec_env());
++}
++
++static inline pid_t get_task_tgid(struct task_struct *tsk)
++{
++ return ve_is_super(get_exec_env()) ? tsk->tgid : virt_tgid(tsk);
++}
++
++static inline pid_t get_task_pgid(struct task_struct *tsk)
++{
++ return ve_is_super(get_exec_env()) ? tsk->signal->pgrp : virt_pgid(tsk);
++}
++
++static inline pid_t get_task_sid(struct task_struct *tsk)
++{
++ return ve_is_super(get_exec_env()) ? tsk->signal->session : virt_sid(tsk);
++}
++
++static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
++{
++ tsk->pids[PIDTYPE_PID].vnr = pid;
++}
++
++static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
++{
++ tsk->pids[PIDTYPE_TGID].vnr = pid;
++}
++
++static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
++{
++ tsk->pids[PIDTYPE_PGID].vnr = pid;
++}
++
++static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
++{
++ tsk->pids[PIDTYPE_SID].vnr = pid;
++}
++
++static inline pid_t get_task_ppid(struct task_struct *p)
++{
++ struct task_struct *parent;
++ struct ve_struct *env;
++
++ if (!pid_alive(p))
++ return 0;
++ env = get_exec_env();
++ if (get_task_pid_ve(p, env) == 1)
++ return 0;
++ parent = p->group_leader->real_parent;
++ return ve_accessible(VE_TASK_INFO(parent)->owner_env, env) ?
++ get_task_tgid(parent) : 1;
++}
++
++void ve_sched_get_cpu_stat(struct ve_struct *envid, cycles_t *idle,
++ cycles_t *strv, unsigned int cpu);
++void ve_sched_attach(struct ve_struct *envid);
++
++#endif /* CONFIG_VE */
++
++
++#ifdef CONFIG_VE
++extern cycles_t ve_sched_get_idle_time(struct ve_struct *, int);
++extern cycles_t ve_sched_get_iowait_time(struct ve_struct *, int);
++#else
++#define ve_sched_get_idle_time(ve, cpu) 0
++#define ve_sched_get_iowait_time(ve, cpu) 0
++#endif
++
+ /* per-UID process charging. */
++extern int set_user(uid_t new_ruid, int dumpclear);
+ extern struct user_struct * alloc_uid(uid_t);
+ static inline struct user_struct *get_uid(struct user_struct *u)
+ {
+@@ -1182,22 +1413,100 @@ extern void wait_task_inactive(task_t *
+ add_parent(p, (p)->parent); \
+ } while (0)
+
+-#define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks)
+-#define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks)
++#define next_task_all(p) list_entry((p)->tasks.next, struct task_struct, tasks)
++#define prev_task_all(p) list_entry((p)->tasks.prev, struct task_struct, tasks)
+
+-#define for_each_process(p) \
+- for (p = &init_task ; (p = next_task(p)) != &init_task ; )
++#define for_each_process_all(p) \
++ for (p = &init_task ; (p = next_task_all(p)) != &init_task ; )
+
+ /*
+ * Careful: do_each_thread/while_each_thread is a double loop so
+ * 'break' will not work as expected - use goto instead.
+ */
+-#define do_each_thread(g, t) \
+- for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
++#define do_each_thread_all(g, t) \
++ for (g = t = &init_task ; (g = t = next_task_all(g)) != &init_task ; ) do
++
++#define while_each_thread_all(g, t) \
++ while ((t = next_thread(t)) != g)
++
++#ifndef CONFIG_VE
++
++#define SET_VE_LINKS(p)
++#define REMOVE_VE_LINKS(p)
++#define for_each_process_ve(p) for_each_process_all(p)
++#define do_each_thread_ve(g, t) do_each_thread_all(g, t)
++#define while_each_thread_ve(g, t) while_each_thread_all(g, t)
++#define first_task_ve() next_task_ve(&init_task)
++#define __first_task_ve(owner) next_task_ve(&init_task)
++#define __next_task_ve(owner, p) next_task_ve(p)
++#define next_task_ve(p) \
++ (next_task_all(p) != &init_task ? next_task_all(p) : NULL)
++
++#else /* CONFIG_VE */
++
++#define SET_VE_LINKS(p) \
++ do { \
++ if (thread_group_leader(p)) \
++ list_add_tail(&VE_TASK_INFO(p)->vetask_list, \
++ &VE_TASK_INFO(p)->owner_env->vetask_lh); \
++ } while (0)
++
++#define REMOVE_VE_LINKS(p) \
++ do { \
++ if (thread_group_leader(p)) \
++ list_del(&VE_TASK_INFO(p)->vetask_list); \
++ } while(0)
++
++static inline task_t* __first_task_ve(struct ve_struct *ve)
++{
++ task_t *tsk;
++
++ if (unlikely(ve_is_super(ve))) {
++ tsk = next_task_all(&init_task);
++ if (tsk == &init_task)
++ tsk = NULL;
++ } else {
++ /* probably can return ve->init_entry, but it's more clear */
++ BUG_ON(list_empty(&ve->vetask_lh));
++ tsk = VE_TASK_LIST_2_TASK(ve->vetask_lh.next);
++ }
++ return tsk;
++}
++
++static inline task_t* __next_task_ve(struct ve_struct *ve, task_t *tsk)
++{
++ if (unlikely(ve_is_super(ve))) {
++ tsk = next_task_all(tsk);
++ if (tsk == &init_task)
++ tsk = NULL;
++ } else {
++ struct list_head *tmp;
++
++ BUG_ON(VE_TASK_INFO(tsk)->owner_env != ve);
++ tmp = VE_TASK_INFO(tsk)->vetask_list.next;
++ if (tmp == &ve->vetask_lh)
++ tsk = NULL;
++ else
++ tsk = VE_TASK_LIST_2_TASK(tmp);
++ }
++ return tsk;
++}
+
+-#define while_each_thread(g, t) \
++#define first_task_ve() __first_task_ve(get_exec_env())
++#define next_task_ve(p) __next_task_ve(get_exec_env(), p)
++/* no one uses prev_task_ve(), copy next_task_ve() if needed */
++
++#define for_each_process_ve(p) \
++ for (p = first_task_ve(); p != NULL ; p = next_task_ve(p))
++
++#define do_each_thread_ve(g, t) \
++ for (g = t = first_task_ve() ; g != NULL; g = t = next_task_ve(g)) do
++
++#define while_each_thread_ve(g, t) \
+ while ((t = next_thread(t)) != g)
+
++#endif /* CONFIG_VE */
++
+ extern task_t * FASTCALL(next_thread(const task_t *p));
+
+ #define thread_group_leader(p) (p->pid == p->tgid)
+diff -uprN linux-2.6.15.orig/include/linux/shm.h linux-2.6.15-ve025stab014/include/linux/shm.h
+--- linux-2.6.15.orig/include/linux/shm.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/shm.h 2006-01-27 14:48:08.000000000 +0300
+@@ -86,6 +86,7 @@ struct shmid_kernel /* private to the ke
+ pid_t shm_cprid;
+ pid_t shm_lprid;
+ struct user_struct *mlock_user;
++ struct ipc_ids *_shm_ids;
+ };
+
+ /* shm_mode upper byte flags */
+diff -uprN linux-2.6.15.orig/include/linux/shmem_fs.h linux-2.6.15-ve025stab014/include/linux/shmem_fs.h
+--- linux-2.6.15.orig/include/linux/shmem_fs.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/shmem_fs.h 2006-01-27 14:48:06.000000000 +0300
+@@ -19,6 +19,9 @@ struct shmem_inode_info {
+ swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */
+ struct list_head swaplist; /* chain of maybes on swap */
+ struct inode vfs_inode;
++#ifdef CONFIG_USER_RESOURCE
++ struct user_beancounter *shmi_ub;
++#endif
+ };
+
+ struct shmem_sb_info {
+diff -uprN linux-2.6.15.orig/include/linux/signal.h linux-2.6.15-ve025stab014/include/linux/signal.h
+--- linux-2.6.15.orig/include/linux/signal.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/signal.h 2006-01-27 14:48:06.000000000 +0300
+@@ -28,6 +28,9 @@ struct sigqueue {
+ int flags;
+ siginfo_t info;
+ struct user_struct *user;
++#ifdef CONFIG_USER_RESOURCE
++ struct user_beancounter *sig_ub;
++#endif
+ };
+
+ /* flags values. */
+diff -uprN linux-2.6.15.orig/include/linux/skbuff.h linux-2.6.15-ve025stab014/include/linux/skbuff.h
+--- linux-2.6.15.orig/include/linux/skbuff.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/skbuff.h 2006-01-27 14:48:08.000000000 +0300
+@@ -19,6 +19,7 @@
+ #include <linux/compiler.h>
+ #include <linux/time.h>
+ #include <linux/cache.h>
++#include <linux/ve_owner.h>
+
+ #include <asm/atomic.h>
+ #include <asm/types.h>
+@@ -212,6 +213,8 @@ enum {
+ * @tc_verd: traffic control verdict
+ */
+
++#include <ub/ub_sk.h>
++
+ struct sk_buff {
+ /* These two members must be first. */
+ struct sk_buff *next;
+@@ -295,13 +298,18 @@ struct sk_buff {
+ *data,
+ *tail,
+ *end;
++ struct skb_beancounter skb_bc;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(SKB, struct sk_buff, owner_env)
++
+ #ifdef __KERNEL__
+ /*
+ * Handling routines are only of interest to the kernel
+ */
+ #include <linux/slab.h>
++#include <ub/ub_net.h>
+
+ #include <asm/system.h>
+
+diff -uprN linux-2.6.15.orig/include/linux/slab.h linux-2.6.15-ve025stab014/include/linux/slab.h
+--- linux-2.6.15.orig/include/linux/slab.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/slab.h 2006-01-27 14:48:06.000000000 +0300
+@@ -48,6 +48,26 @@ typedef struct kmem_cache kmem_cache_t;
+ #define SLAB_PANIC 0x00040000UL /* panic if kmem_cache_create() fails */
+ #define SLAB_DESTROY_BY_RCU 0x00080000UL /* defer freeing pages to RCU */
+
++/*
++ * allocation rules: __GFP_UBC 0
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * cache (SLAB_UBC) charge charge
++ * (usual caches: mm, vma, task_struct, ...)
++ *
++ * cache (SLAB_UBC | SLAB_NO_CHARGE) charge ---
++ * (ub_kmalloc) (kmalloc)
++ *
++ * cache (no UB flags) BUG() ---
++ * (nonub caches, mempools)
++ *
++ * pages charge ---
++ * (ub_vmalloc, (vmalloc,
++ * poll, fdsets, ...) non-ub allocs)
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ */
++#define SLAB_UBC 0x20000000UL /* alloc space for ubs ... */
++#define SLAB_NO_CHARGE 0x40000000UL /* ... but don't charge */
++
+ /* flags passed to a constructor func */
+ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
+ #define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */
+diff -uprN linux-2.6.15.orig/include/linux/socket.h linux-2.6.15-ve025stab014/include/linux/socket.h
+--- linux-2.6.15.orig/include/linux/socket.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/socket.h 2006-01-27 14:48:08.000000000 +0300
+@@ -298,6 +298,7 @@ extern int memcpy_toiovec(struct iovec *
+ extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
+ extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
+ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
++extern int vz_security_proto_check(int family, int type, int protocol);
+
+ #endif
+ #endif /* not kernel and not glibc */
+diff -uprN linux-2.6.15.orig/include/linux/swap.h linux-2.6.15-ve025stab014/include/linux/swap.h
+--- linux-2.6.15.orig/include/linux/swap.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/swap.h 2006-01-27 14:48:06.000000000 +0300
+@@ -80,6 +80,7 @@ struct address_space;
+ struct sysinfo;
+ struct writeback_control;
+ struct zone;
++struct user_beancounter;
+
+ /*
+ * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
+@@ -119,6 +120,7 @@ enum {
+ /*
+ * The in-memory structure used to track swap areas.
+ */
++struct user_beancounter;
+ struct swap_info_struct {
+ unsigned int flags;
+ int prio; /* swap priority */
+@@ -136,6 +138,9 @@ struct swap_info_struct {
+ unsigned int max;
+ unsigned int inuse_pages;
+ int next; /* next entry on swap list */
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++ struct user_beancounter **swap_ubs;
++#endif
+ };
+
+ struct swap_list_t {
+@@ -209,7 +214,7 @@ extern long total_swap_pages;
+ extern unsigned int nr_swapfiles;
+ extern struct swap_info_struct swap_info[];
+ extern void si_swapinfo(struct sysinfo *);
+-extern swp_entry_t get_swap_page(void);
++extern swp_entry_t get_swap_page(struct user_beancounter *);
+ extern int swap_duplicate(swp_entry_t);
+ extern int valid_swaphandles(swp_entry_t, unsigned long *);
+ extern void swap_free(swp_entry_t);
+@@ -277,7 +282,7 @@ static inline int remove_exclusive_swap_
+ return 0;
+ }
+
+-static inline swp_entry_t get_swap_page(void)
++static inline swp_entry_t get_swap_page(struct user_beancounter *ub)
+ {
+ swp_entry_t entry;
+ entry.val = 0;
+diff -uprN linux-2.6.15.orig/include/linux/sysctl.h linux-2.6.15-ve025stab014/include/linux/sysctl.h
+--- linux-2.6.15.orig/include/linux/sysctl.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/sysctl.h 2006-01-27 14:48:08.000000000 +0300
+@@ -146,6 +146,7 @@ enum
+ KERN_RANDOMIZE=68, /* int: randomize virtual address space */
+ KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */
+ KERN_SPIN_RETRY=70, /* int: number of spinlock retries */
++ KERN_VIRT_PIDS=202, /* int: VE pids virtualization */
+ };
+
+
+@@ -394,6 +395,7 @@ enum
+
+ enum {
+ NET_IPV4_ROUTE_FLUSH=1,
++ NET_IPV4_ROUTE_SRC_CHECK=188,
+ NET_IPV4_ROUTE_MIN_DELAY=2,
+ NET_IPV4_ROUTE_MAX_DELAY=3,
+ NET_IPV4_ROUTE_GC_THRESH=4,
+@@ -893,6 +895,8 @@ extern int proc_doulongvec_minmax(ctl_ta
+ void __user *, size_t *, loff_t *);
+ extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
+ struct file *, void __user *, size_t *, loff_t *);
++extern int proc_doutsstring(ctl_table *table, int write, struct file *,
++ void __user *, size_t *, loff_t *);
+
+ extern int do_sysctl (int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+@@ -947,6 +951,8 @@ extern ctl_handler sysctl_ms_jiffies;
+ */
+
+ /* A sysctl table is an array of struct ctl_table: */
++struct ve_struct;
++
+ struct ctl_table
+ {
+ int ctl_name; /* Binary ID */
+@@ -960,6 +966,7 @@ struct ctl_table
+ struct proc_dir_entry *de; /* /proc control block */
+ void *extra1;
+ void *extra2;
++ struct ve_struct *owner_env;
+ };
+
+ /* struct ctl_table_header is used to maintain dynamic lists of
+@@ -976,6 +983,9 @@ struct ctl_table_header * register_sysct
+ int insert_at_head);
+ void unregister_sysctl_table(struct ctl_table_header * table);
+
++ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr);
++void free_sysctl_clone(ctl_table *clone);
++
+ #else /* __KERNEL__ */
+
+ #endif /* __KERNEL__ */
+diff -uprN linux-2.6.15.orig/include/linux/tty.h linux-2.6.15-ve025stab014/include/linux/tty.h
+--- linux-2.6.15.orig/include/linux/tty.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/tty.h 2006-01-27 14:48:08.000000000 +0300
+@@ -297,8 +297,11 @@ struct tty_struct {
+ spinlock_t read_lock;
+ /* If the tty has a pending do_SAK, queue it here - akpm */
+ struct work_struct SAK_work;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(TTY, struct tty_struct, owner_env)
++
+ /* tty magic number */
+ #define TTY_MAGIC 0x5401
+
+@@ -325,6 +328,7 @@ struct tty_struct {
+ #define TTY_PTY_LOCK 16 /* pty private */
+ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */
+ #define TTY_HUPPED 18 /* Post driver->hangup() */
++#define TTY_CHARGED 19 /* Charged as ub resource */
+
+ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
+
+diff -uprN linux-2.6.15.orig/include/linux/tty_driver.h linux-2.6.15-ve025stab014/include/linux/tty_driver.h
+--- linux-2.6.15.orig/include/linux/tty_driver.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/tty_driver.h 2006-01-27 14:48:08.000000000 +0300
+@@ -115,6 +115,7 @@
+ * character to the device.
+ */
+
++#include <linux/ve_owner.h>
+ #include <linux/fs.h>
+ #include <linux/list.h>
+ #include <linux/cdev.h>
+@@ -214,9 +215,18 @@ struct tty_driver {
+ unsigned int set, unsigned int clear);
+
+ struct list_head tty_drivers;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(TTYDRV, struct tty_driver, owner_env)
++
++#ifdef CONFIG_LEGACY_PTYS
++extern struct tty_driver *pty_driver;
++extern struct tty_driver *pty_slave_driver;
++#endif
++
+ extern struct list_head tty_drivers;
++extern rwlock_t tty_driver_guard;
+
+ struct tty_driver *alloc_tty_driver(int lines);
+ void put_tty_driver(struct tty_driver *driver);
+diff -uprN linux-2.6.15.orig/include/linux/ve.h linux-2.6.15-ve025stab014/include/linux/ve.h
+--- linux-2.6.15.orig/include/linux/ve.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/ve.h 2006-01-27 14:48:09.000000000 +0300
+@@ -0,0 +1,321 @@
++/*
++ * include/linux/ve.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VE_H
++#define _LINUX_VE_H
++
++#include <linux/config.h>
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++#include <linux/types.h>
++#include <linux/capability.h>
++#include <linux/utsname.h>
++#include <linux/sysctl.h>
++#include <linux/vzstat.h>
++#include <linux/kobject.h>
++
++#ifdef VZMON_DEBUG
++# define VZTRACE(fmt,args...) \
++ printk(KERN_DEBUG fmt, ##args)
++#else
++# define VZTRACE(fmt,args...)
++#endif /* VZMON_DEBUG */
++
++struct tty_driver;
++struct devpts_config;
++struct task_struct;
++struct new_utsname;
++struct file_system_type;
++struct icmp_mib;
++struct ip_mib;
++struct tcp_mib;
++struct udp_mib;
++struct linux_mib;
++struct fib_info;
++struct fib_rule;
++struct veip_struct;
++struct ve_monitor;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++struct fib_table;
++struct devcnfv4_struct;
++#ifdef CONFIG_VE_IPTABLES
++struct ipt_filter_initial_table;
++struct ipt_nat_initial_table;
++struct ipt_table;
++struct ip_conntrack;
++struct nf_hook_ops;
++typedef unsigned int (*ip_nat_helper_func)(void);
++struct ve_ip_conntrack {
++ struct list_head *_ip_conntrack_hash;
++ struct list_head _ip_conntrack_expect_list;
++ struct ip_conntrack_protocol ** _ip_ct_protos;
++ struct list_head _ip_conntrack_helpers;
++ int _ip_conntrack_max;
++ int _ip_conntrack_vmalloc;
++ atomic_t _ip_conntrack_count;
++ void (*_ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
++#ifdef CONFIG_SYSCTL
++ unsigned long _ip_ct_tcp_timeouts[10];
++ unsigned long _ip_ct_udp_timeout;
++ unsigned long _ip_ct_udp_timeout_stream;
++ unsigned long _ip_ct_icmp_timeout;
++ unsigned long _ip_ct_generic_timeout;
++ unsigned int _ip_ct_log_invalid;
++ unsigned long _ip_ct_tcp_timeout_max_retrans;
++ int _ip_ct_tcp_loose;
++ int _ip_ct_tcp_be_liberal;
++ int _ip_ct_tcp_max_retrans;
++ struct ctl_table_header *_ip_ct_sysctl_header;
++ ctl_table *_ip_ct_net_table;
++ ctl_table *_ip_ct_ipv4_table;
++ ctl_table *_ip_ct_netfilter_table;
++ ctl_table *_ip_ct_sysctl_table;
++#endif /*CONFIG_SYSCTL*/
++
++ struct ip_nat_protocol **_ip_nat_protos;
++ ip_nat_helper_func _ip_nat_ftp_hook;
++ ip_nat_helper_func _ip_nat_irc_hook;
++ struct list_head *_ip_nat_bysource;
++ struct ipt_table *_ip_nat_table;
++
++ /* resource accounting */
++ struct user_beancounter *ub;
++};
++#endif
++#endif
++
++#define UIDHASH_BITS_VE 6
++#define UIDHASH_SZ_VE (1 << UIDHASH_BITS_VE)
++
++struct ve_cpu_stats {
++ cycles_t idle_time;
++ cycles_t iowait_time;
++ cycles_t strt_idle_time;
++ cycles_t used_time;
++ seqcount_t stat_lock;
++ int nr_running;
++ int nr_unint;
++ int nr_iowait;
++ cputime64_t user;
++ cputime64_t nice;
++ cputime64_t system;
++} ____cacheline_aligned;
++
++struct ve_struct {
++ struct ve_struct *prev;
++ struct ve_struct *next;
++
++ envid_t veid;
++ struct task_struct *init_entry;
++ struct list_head vetask_lh;
++ kernel_cap_t cap_default;
++ atomic_t pcounter;
++ /* ref counter to ve from ipc */
++ atomic_t counter;
++ unsigned int class_id;
++ struct veip_struct *veip;
++ struct rw_semaphore op_sem;
++ int is_running;
++ int virt_pids;
++
++/* VE's root */
++ struct vfsmount *fs_rootmnt;
++ struct dentry *fs_root;
++
++/* sysctl */
++ struct new_utsname *utsname;
++ struct list_head sysctl_lh;
++ struct ctl_table_header *kern_header;
++ struct ctl_table *kern_table;
++ struct ctl_table_header *quota_header;
++ struct ctl_table *quota_table;
++ struct file_system_type *proc_fstype;
++ struct vfsmount *proc_mnt;
++ struct proc_dir_entry *proc_root;
++ struct proc_dir_entry *proc_sys_root;
++ struct proc_dir_entry *_proc_net;
++ struct proc_dir_entry *_proc_net_stat;
++
++/* SYSV IPC */
++ struct ipc_ids *_shm_ids;
++ struct ipc_ids *_msg_ids;
++ struct ipc_ids *_sem_ids;
++ int _used_sems;
++ int _shm_tot;
++ size_t _shm_ctlmax;
++ size_t _shm_ctlall;
++ int _shm_ctlmni;
++ int _msg_ctlmax;
++ int _msg_ctlmni;
++ int _msg_ctlmnb;
++ int _sem_ctls[4];
++
++/* BSD pty's */
++ struct tty_driver *pty_driver;
++ struct tty_driver *pty_slave_driver;
++
++#ifdef CONFIG_UNIX98_PTYS
++ struct tty_driver *ptm_driver;
++ struct tty_driver *pts_driver;
++ struct idr *allocated_ptys;
++ struct file_system_type *devpts_fstype;
++ struct vfsmount *devpts_mnt;
++ struct dentry *devpts_root;
++ struct devpts_config *devpts_config;
++#endif
++
++ struct file_system_type *shmem_fstype;
++ struct vfsmount *shmem_mnt;
++#ifdef CONFIG_VE_SYSFS
++ struct file_system_type *sysfs_fstype;
++ struct vfsmount *sysfs_mnt;
++ struct super_block *sysfs_sb;
++#endif
++ struct subsystem *class_subsys;
++ struct subsystem *class_obj_subsys;
++ struct class *net_class;
++
++/* User uids hash */
++ struct list_head uidhash_table[UIDHASH_SZ_VE];
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ struct hlist_head _net_dev_head;
++ struct hlist_head _net_dev_index_head;
++ struct net_device *_net_dev_base, **_net_dev_tail;
++ int ifindex;
++ struct net_device *_loopback_dev;
++ struct net_device *_venet_dev;
++ struct ipv4_devconf *_ipv4_devconf;
++ struct ipv4_devconf *_ipv4_devconf_dflt;
++ struct ctl_table_header *forward_header;
++ struct ctl_table *forward_table;
++#endif
++ unsigned long rt_flush_required;
++
++/* per VE CPU stats*/
++ struct timespec start_timespec;
++ u64 start_jiffies;
++ cycles_t start_cycles;
++ unsigned long avenrun[3]; /* loadavg data */
++
++ cycles_t cpu_used_ve;
++ struct kstat_lat_pcpu_struct sched_lat_ve;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ struct hlist_head *_fib_info_hash;
++ struct hlist_head *_fib_info_laddrhash;
++ int _fib_hash_size;
++ int _fib_info_cnt;
++
++ struct fib_rule *_local_rule;
++ struct fib_rule *_fib_rules;
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++ /* XXX: why a magic constant? */
++ struct fib_table *_fib_tables[256]; /* RT_TABLE_MAX - for now */
++#else
++ struct fib_table *_main_table;
++ struct fib_table *_local_table;
++#endif
++ struct icmp_mib *_icmp_statistics[2];
++ struct ipstats_mib *_ip_statistics[2];
++ struct tcp_mib *_tcp_statistics[2];
++ struct udp_mib *_udp_statistics[2];
++ struct linux_mib *_net_statistics[2];
++ struct venet_stat *stat;
++#ifdef CONFIG_VE_IPTABLES
++/* core/netfilter.c virtualization */
++ void *_nf_hooks;
++ struct ipt_table *_ve_ipt_filter_pf; /* packet_filter struct */
++ struct ipt_table *_ipt_mangle_table;
++ struct list_head *_ipt_target;
++ struct list_head *_ipt_match;
++ struct list_head *_ipt_tables;
++
++ struct ipt_target *_ipt_standard_target;
++ struct ipt_target *_ipt_error_target;
++ struct ipt_match *_tcp_matchstruct;
++ struct ipt_match *_udp_matchstruct;
++ struct ipt_match *_icmp_matchstruct;
++
++ __u64 _iptables_modules;
++ struct ve_ip_conntrack *_ip_conntrack;
++#endif /* CONFIG_VE_IPTABLES */
++#endif
++ wait_queue_head_t *_log_wait;
++ unsigned long *_log_start;
++ unsigned long *_log_end;
++ unsigned long *_logged_chars;
++ char *log_buf;
++#define VE_DEFAULT_LOG_BUF_LEN 4096
++
++ struct ve_cpu_stats ve_cpu_stats[NR_CPUS] ____cacheline_aligned;
++ unsigned long down_at;
++ struct list_head cleanup_list;
++
++ unsigned char sparse_vpid;
++ struct ve_monitor *monitor;
++ struct proc_dir_entry *monitor_proc;
++};
++
++#define VE_CPU_STATS(ve, cpu) (&((ve)->ve_cpu_stats[(cpu)]))
++
++extern int nr_ve;
++
++#ifdef CONFIG_VE
++
++#ifdef CONFIG_VE_CALLS
++#define get_device_perms_ve real_get_device_perms_ve
++#define do_env_cleanup real_do_env_cleanup
++#define do_env_free real_do_env_free
++#define do_update_load_avg_ve real_update_load_avg_ve
++#endif
++
++int get_device_perms_ve(int dev_type, dev_t dev, int access_mode);
++void do_env_cleanup(struct ve_struct *envid);
++void do_update_load_avg_ve(void);
++void do_env_free(struct ve_struct *ptr);
++
++#define ve_utsname (*get_exec_env()->utsname)
++
++static inline struct ve_struct *get_ve(struct ve_struct *ptr)
++{
++ if (ptr != NULL)
++ atomic_inc(&ptr->counter);
++ return ptr;
++}
++
++static inline void put_ve(struct ve_struct *ptr)
++{
++ if (ptr && atomic_dec_and_test(&ptr->counter)) {
++ if (atomic_read(&ptr->pcounter) > 0)
++ BUG();
++ if (ptr->is_running)
++ BUG();
++ do_env_free(ptr);
++ }
++}
++
++#ifdef CONFIG_FAIRSCHED
++#define ve_cpu_online_map(ve, mask) fairsched_cpu_online_map(ve->veid, mask)
++#else
++#define ve_cpu_online_map(ve, mask) do { *(mask) = cpu_online_map; } while (0)
++#endif
++#else /* CONFIG_VE */
++#define ve_utsname system_utsname
++#define get_ve(ve) (NULL)
++#define put_ve(ve) do { } while (0)
++#endif /* CONFIG_VE */
++
++#endif /* _LINUX_VE_H */
+diff -uprN linux-2.6.15.orig/include/linux/ve_owner.h linux-2.6.15-ve025stab014/include/linux/ve_owner.h
+--- linux-2.6.15.orig/include/linux/ve_owner.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/ve_owner.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,32 @@
++/*
++ * include/linux/ve_owner.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_OWNER_H__
++#define __VE_OWNER_H__
++
++#include <linux/config.h>
++#include <linux/vmalloc.h>
++
++
++#define DCL_VE_OWNER(name, type, member)
++ /* prototype declares static inline functions */
++
++#define DCL_VE_OWNER_PROTO(name, type, member) \
++type; \
++static inline struct ve_struct *VE_OWNER_##name(const type *obj) \
++{ \
++ return obj->member; \
++} \
++static inline void SET_VE_OWNER_##name(type *obj, struct ve_struct *ve) \
++{ \
++ obj->member = ve; \
++}
++
++#endif /* __VE_OWNER_H__ */
+diff -uprN linux-2.6.15.orig/include/linux/ve_proto.h linux-2.6.15-ve025stab014/include/linux/ve_proto.h
+--- linux-2.6.15.orig/include/linux/ve_proto.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/ve_proto.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,96 @@
++/*
++ * include/linux/ve_proto.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_H__
++#define __VE_H__
++
++#ifdef CONFIG_VE
++
++extern struct semaphore ve_call_guard;
++extern rwlock_t ve_call_lock;
++
++#ifdef CONFIG_SYSVIPC
++extern void prepare_ipc(void);
++extern int init_ve_ipc(struct ve_struct *);
++extern void fini_ve_ipc(struct ve_struct *);
++extern void ve_ipc_cleanup(void);
++#endif
++
++#ifdef CONFIG_UNIX98_PTYS
++extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */
++extern struct tty_driver *pts_driver; /* Unix98 pty slaves; for /dev/ptmx */
++#endif
++
++extern rwlock_t tty_driver_guard;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++void ip_fragment_cleanup(struct ve_struct *envid);
++void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
++struct fib_table * fib_hash_init(int id);
++int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr);
++extern int main_loopback_init(struct net_device*);
++int venet_init(void);
++#endif
++
++extern struct ve_struct *ve_list_head;
++extern rwlock_t ve_list_guard;
++extern struct ve_struct *get_ve_by_id(envid_t);
++extern struct ve_struct *__find_ve_by_id(envid_t);
++
++extern int do_setdevperms(envid_t veid, unsigned type,
++ dev_t dev, unsigned mask);
++
++#ifdef CONFIG_VE_IPTABLES
++extern int init_netfilter(void);
++extern int init_iptables(void);
++extern int init_iptable_filter(void);
++extern int init_iptable_limit(void);
++extern int init_iptable_multiport(void);
++extern int init_iptable_tos(void);
++extern int init_iptable_REJECT(void);
++extern void fini_netfilter(void);
++extern int fini_iptables(void);
++extern int fini_iptable_filter(void);
++extern int fini_iptable_limit(void);
++extern int fini_iptable_multiport(void);
++extern int fini_iptable_tos(void);
++extern int fini_iptable_REJECT(void);
++#endif
++
++#define VE_HOOK_INIT 0
++#define VE_HOOK_FINI 1
++#define VE_MAX_HOOKS 2
++
++typedef int ve_hookfn(unsigned int hooknum, void *data);
++
++struct ve_hook
++{
++ struct list_head list;
++ ve_hookfn *hook;
++ ve_hookfn *undo;
++ struct module *owner;
++ int hooknum;
++ /* Functions are called in ascending priority. */
++ int priority;
++};
++
++extern int ve_hook_register(struct ve_hook *vh);
++extern void ve_hook_unregister(struct ve_hook *vh);
++
++struct ve_hook_init_data
++{
++ struct ve_struct *env;
++ u32 class_id;
++ struct env_create_param *data;
++ int datalen;
++};
++
++#endif
++#endif
+diff -uprN linux-2.6.15.orig/include/linux/ve_task.h linux-2.6.15-ve025stab014/include/linux/ve_task.h
+--- linux-2.6.15.orig/include/linux/ve_task.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/ve_task.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,34 @@
++/*
++ * include/linux/ve_task.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_TASK_H__
++#define __VE_TASK_H__
++
++#include <linux/seqlock.h>
++
++struct ve_task_info {
++/* virtualization */
++ struct ve_struct *owner_env;
++ struct ve_struct *exec_env;
++ struct list_head vetask_list;
++ struct dentry *glob_proc_dentry;
++/* statistics: scheduling latency */
++ cycles_t sleep_time;
++ cycles_t sched_time;
++ cycles_t sleep_stamp;
++ cycles_t wakeup_stamp;
++ seqcount_t wakeup_lock;
++};
++
++#define VE_TASK_INFO(task) (&(task)->ve_task_info)
++#define VE_TASK_LIST_2_TASK(lh) \
++ list_entry(lh, struct task_struct, ve_task_info.vetask_list)
++
++#endif /* __VE_TASK_H__ */
+diff -uprN linux-2.6.15.orig/include/linux/venet.h linux-2.6.15-ve025stab014/include/linux/venet.h
+--- linux-2.6.15.orig/include/linux/venet.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/venet.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,68 @@
++/*
++ * include/linux/venet.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VENET_H
++#define _VENET_H
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/vzcalluser.h>
++
++#define VEIP_HASH_SZ 512
++
++struct ve_struct;
++struct venet_stat;
++struct ip_entry_struct
++{
++ __u32 ip;
++ struct ve_struct *active_env;
++ struct venet_stat *stat;
++ struct veip_struct *veip;
++ struct list_head ip_hash;
++ struct list_head ve_list;
++};
++
++struct veip_struct
++{
++ struct list_head src_lh;
++ struct list_head dst_lh;
++ struct list_head ip_lh;
++ struct list_head list;
++ envid_t veid;
++};
++
++/* veip_hash_lock should be taken for write by caller */
++void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip);
++/* veip_hash_lock should be taken for write by caller */
++void ip_entry_unhash(struct ip_entry_struct *entry);
++/* veip_hash_lock should be taken for read by caller */
++struct ip_entry_struct *ip_entry_lookup(u32 addr);
++
++/* veip_hash_lock should be taken for read by caller */
++struct veip_struct *veip_find(envid_t veid);
++/* veip_hash_lock should be taken for write by caller */
++struct veip_struct *veip_findcreate(envid_t veid);
++/* veip_hash_lock should be taken for write by caller */
++void veip_put(struct veip_struct *veip);
++
++int veip_start(struct ve_struct *ve);
++void veip_stop(struct ve_struct *ve);
++int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr);
++int veip_entry_del(envid_t veid, struct sockaddr_in *addr);
++int venet_change_skb_owner(struct sk_buff *skb);
++
++extern struct list_head ip_entry_hash_table[];
++extern rwlock_t veip_hash_lock;
++
++#ifdef CONFIG_PROC_FS
++int veip_seq_show(struct seq_file *m, void *v);
++#endif
++
++#endif
+diff -uprN linux-2.6.15.orig/include/linux/virtinfo.h linux-2.6.15-ve025stab014/include/linux/virtinfo.h
+--- linux-2.6.15.orig/include/linux/virtinfo.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/virtinfo.h 2006-01-27 14:48:07.000000000 +0300
+@@ -0,0 +1,52 @@
++/*
++ * include/linux/virtinfo.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __LINUX_VIRTINFO_H
++#define __LINUX_VIRTINFO_H
++
++#include <linux/kernel.h>
++#include <linux/page-flags.h>
++#include <linux/rwsem.h>
++#include <linux/notifier.h>
++
++struct vnotifier_block
++{
++ int (*notifier_call)(struct vnotifier_block *self,
++ unsigned long, void *, int);
++ struct vnotifier_block *next;
++ int priority;
++};
++
++void virtinfo_notifier_register(int type, struct vnotifier_block *nb);
++void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb);
++int virtinfo_notifier_call(int type, unsigned long n, void *data);
++
++struct meminfo {
++ struct sysinfo si;
++ unsigned long active, inactive;
++ unsigned long cache, swapcache;
++ unsigned long committed_space;
++ unsigned long allowed;
++ struct page_state ps;
++ unsigned long vmalloc_total, vmalloc_used, vmalloc_largest;
++};
++
++#define VIRTINFO_MEMINFO 0
++#define VIRTINFO_ENOUGHMEM 1
++
++enum virt_info_types {
++ VITYPE_GENERAL,
++ VITYPE_FAUDIT,
++ VITYPE_QUOTA,
++
++ VIRT_TYPES
++};
++
++#endif /* __LINUX_VIRTINFO_H */
+diff -uprN linux-2.6.15.orig/include/linux/vmalloc.h linux-2.6.15-ve025stab014/include/linux/vmalloc.h
+--- linux-2.6.15.orig/include/linux/vmalloc.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vmalloc.h 2006-01-27 14:48:08.000000000 +0300
+@@ -18,6 +18,10 @@
+ #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */
+ #endif
+
++/* align size to 2^n page boundary */
++#define POWER2_PAGE_ALIGN(size) \
++ ((typeof(size))(1UL << (PAGE_SHIFT + get_order(size))))
++
+ struct vm_struct {
+ void *addr;
+ unsigned long size;
+@@ -36,6 +40,8 @@ extern void *vmalloc_node(unsigned long
+ extern void *vmalloc_exec(unsigned long size);
+ extern void *vmalloc_32(unsigned long size);
+ extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
++extern void *vmalloc_best(unsigned long size);
++extern void *ub_vmalloc_best(unsigned long size);
+ extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
+ pgprot_t prot);
+ extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
+@@ -52,6 +58,9 @@ extern void vunmap(void *addr);
+ extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
+ extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
+ unsigned long start, unsigned long end);
++extern struct vm_struct * get_vm_area_best(unsigned long size,
++ unsigned long flags);
++extern void vprintstat(void);
+ extern struct vm_struct *get_vm_area_node(unsigned long size,
+ unsigned long flags, int node);
+ extern struct vm_struct *remove_vm_area(void *addr);
+diff -uprN linux-2.6.15.orig/include/linux/vzcalluser.h linux-2.6.15-ve025stab014/include/linux/vzcalluser.h
+--- linux-2.6.15.orig/include/linux/vzcalluser.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vzcalluser.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,206 @@
++/*
++ * include/linux/vzcalluser.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VZCALLUSER_H
++#define _LINUX_VZCALLUSER_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#define KERN_VZ_PRIV_RANGE 51
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++/*
++ * VE management ioctls
++ */
++
++struct vzctl_old_env_create {
++ envid_t veid;
++ unsigned flags;
++#define VE_CREATE 1 /* Create VE, VE_ENTER added automatically */
++#define VE_EXCLUSIVE 2 /* Fail if exists */
++#define VE_ENTER 4 /* Enter existing VE */
++#define VE_TEST 8 /* Test if VE exists */
++ __u32 addr;
++};
++
++struct vzctl_mark_env_to_down {
++ envid_t veid;
++};
++
++struct vzctl_setdevperms {
++ envid_t veid;
++ unsigned type;
++#define VE_USE_MAJOR 010 /* Test MAJOR supplied in rule */
++#define VE_USE_MINOR 030 /* Test MINOR supplied in rule */
++#define VE_USE_MASK 030 /* Testing mask, VE_USE_MAJOR|VE_USE_MINOR */
++ unsigned dev;
++ unsigned mask;
++};
++
++struct vzctl_ve_netdev {
++ envid_t veid;
++ int op;
++#define VE_NETDEV_ADD 1
++#define VE_NETDEV_DEL 2
++ char *dev_name;
++};
++
++/* these masks represent modules */
++#define VE_IP_IPTABLES_MOD (1U<<0)
++#define VE_IP_FILTER_MOD (1U<<1)
++#define VE_IP_MANGLE_MOD (1U<<2)
++#define VE_IP_MATCH_LIMIT_MOD (1U<<3)
++#define VE_IP_MATCH_MULTIPORT_MOD (1U<<4)
++#define VE_IP_MATCH_TOS_MOD (1U<<5)
++#define VE_IP_TARGET_TOS_MOD (1U<<6)
++#define VE_IP_TARGET_REJECT_MOD (1U<<7)
++#define VE_IP_TARGET_TCPMSS_MOD (1U<<8)
++#define VE_IP_MATCH_TCPMSS_MOD (1U<<9)
++#define VE_IP_MATCH_TTL_MOD (1U<<10)
++#define VE_IP_TARGET_LOG_MOD (1U<<11)
++#define VE_IP_MATCH_LENGTH_MOD (1U<<12)
++#define VE_IP_CONNTRACK_MOD (1U<<14)
++#define VE_IP_CONNTRACK_FTP_MOD (1U<<15)
++#define VE_IP_CONNTRACK_IRC_MOD (1U<<16)
++#define VE_IP_MATCH_CONNTRACK_MOD (1U<<17)
++#define VE_IP_MATCH_STATE_MOD (1U<<18)
++#define VE_IP_MATCH_HELPER_MOD (1U<<19)
++#define VE_IP_NAT_MOD (1U<<20)
++#define VE_IP_NAT_FTP_MOD (1U<<21)
++#define VE_IP_NAT_IRC_MOD (1U<<22)
++
++/* these masks represent modules with their dependences */
++#define VE_IP_IPTABLES (VE_IP_IPTABLES_MOD)
++#define VE_IP_FILTER (VE_IP_FILTER_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MANGLE (VE_IP_MANGLE_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_LIMIT (VE_IP_MATCH_LIMIT_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_MULTIPORT (VE_IP_MATCH_MULTIPORT_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_TOS (VE_IP_MATCH_TOS_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_TARGET_TOS (VE_IP_TARGET_TOS_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_TARGET_REJECT (VE_IP_TARGET_REJECT_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_TARGET_TCPMSS (VE_IP_TARGET_TCPMSS_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_TCPMSS (VE_IP_MATCH_TCPMSS_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_TTL (VE_IP_MATCH_TTL_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_TARGET_LOG (VE_IP_TARGET_LOG_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_LENGTH (VE_IP_MATCH_LENGTH_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_CONNTRACK (VE_IP_CONNTRACK_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_CONNTRACK_FTP (VE_IP_CONNTRACK_FTP_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_CONNTRACK_IRC (VE_IP_CONNTRACK_IRC_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_MATCH_CONNTRACK (VE_IP_MATCH_CONNTRACK_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_MATCH_STATE (VE_IP_MATCH_STATE_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_MATCH_HELPER (VE_IP_MATCH_HELPER_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_NAT (VE_IP_NAT_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_NAT_FTP (VE_IP_NAT_FTP_MOD \
++ | VE_IP_NAT | VE_IP_CONNTRACK_FTP)
++#define VE_IP_NAT_IRC (VE_IP_NAT_IRC_MOD \
++ | VE_IP_NAT | VE_IP_CONNTRACK_IRC)
++
++/* safe iptables mask to be used by default */
++#define VE_IP_DEFAULT \
++ (VE_IP_IPTABLES | \
++ VE_IP_FILTER | VE_IP_MANGLE | \
++ VE_IP_MATCH_LIMIT | VE_IP_MATCH_MULTIPORT | \
++ VE_IP_MATCH_TOS | VE_IP_TARGET_REJECT | \
++ VE_IP_TARGET_TCPMSS | VE_IP_MATCH_TCPMSS | \
++ VE_IP_MATCH_TTL | VE_IP_MATCH_LENGTH)
++
++#define VE_IPT_CMP(x,y) (((x) & (y)) == (y))
++
++struct vzctl_env_create_cid {
++ envid_t veid;
++ unsigned flags;
++ __u32 class_id;
++};
++
++struct vzctl_env_create {
++ envid_t veid;
++ unsigned flags;
++ __u32 class_id;
++};
++
++struct env_create_param {
++ __u64 iptables_mask;
++#define VZCTL_ENV_CREATE_DATA_MINLEN sizeof(__u64)
++#define VZCTL_ENV_CREATE_DATA_MAXLEN sizeof(__u64)
++};
++
++struct vzctl_env_create_data {
++ envid_t veid;
++ unsigned flags;
++ __u32 class_id;
++ struct env_create_param *data;
++ int datalen;
++};
++
++struct vz_load_avg {
++ int val_int;
++ int val_frac;
++};
++
++struct vz_cpu_stat {
++ unsigned long user_jif;
++ unsigned long nice_jif;
++ unsigned long system_jif;
++ unsigned long uptime_jif;
++ cycles_t idle_clk;
++ cycles_t strv_clk;
++ cycles_t uptime_clk;
++ struct vz_load_avg avenrun[3]; /* loadavg data */
++};
++
++struct vzctl_cpustatctl {
++ envid_t veid;
++ struct vz_cpu_stat *cpustat;
++};
++
++#define VZCTLTYPE '.'
++#define VZCTL_OLD_ENV_CREATE _IOW(VZCTLTYPE, 0, \
++ struct vzctl_old_env_create)
++#define VZCTL_MARK_ENV_TO_DOWN _IOW(VZCTLTYPE, 1, \
++ struct vzctl_mark_env_to_down)
++#define VZCTL_SETDEVPERMS _IOW(VZCTLTYPE, 2, \
++ struct vzctl_setdevperms)
++#define VZCTL_ENV_CREATE_CID _IOW(VZCTLTYPE, 4, \
++ struct vzctl_env_create_cid)
++#define VZCTL_ENV_CREATE _IOW(VZCTLTYPE, 5, \
++ struct vzctl_env_create)
++#define VZCTL_GET_CPU_STAT _IOW(VZCTLTYPE, 6, \
++ struct vzctl_cpustatctl)
++#define VZCTL_ENV_CREATE_DATA _IOW(VZCTLTYPE, 10, \
++ struct vzctl_env_create_data)
++#define VZCTL_VE_NETDEV _IOW(VZCTLTYPE, 11, \
++ struct vzctl_ve_netdev)
++
++
++#endif
+diff -uprN linux-2.6.15.orig/include/linux/vzctl.h linux-2.6.15-ve025stab014/include/linux/vzctl.h
+--- linux-2.6.15.orig/include/linux/vzctl.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vzctl.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,30 @@
++/*
++ * include/linux/vzctl.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VZCTL_H
++#define _LINUX_VZCTL_H
++
++#include <linux/list.h>
++
++struct module;
++struct inode;
++struct file;
++struct vzioctlinfo {
++ unsigned type;
++ int (*func)(struct inode *, struct file *,
++ unsigned int, unsigned long);
++ struct module *owner;
++ struct list_head list;
++};
++
++extern void vzioctl_register(struct vzioctlinfo *inf);
++extern void vzioctl_unregister(struct vzioctlinfo *inf);
++
++#endif
+diff -uprN linux-2.6.15.orig/include/linux/vzctl_quota.h linux-2.6.15-ve025stab014/include/linux/vzctl_quota.h
+--- linux-2.6.15.orig/include/linux/vzctl_quota.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vzctl_quota.h 2006-01-27 14:48:09.000000000 +0300
+@@ -0,0 +1,43 @@
++/*
++ * include/linux/vzctl_quota.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __LINUX_VZCTL_QUOTA_H__
++#define __LINUX_VZCTL_QUOTA_H__
++
++/*
++ * Quota management ioctl
++ */
++
++struct vz_quota_stat;
++struct vzctl_quotactl {
++ int cmd;
++ unsigned int quota_id;
++ struct vz_quota_stat *qstat;
++ char *ve_root;
++};
++
++struct vzctl_quotaugidctl {
++ int cmd; /* subcommand */
++ unsigned int quota_id; /* quota id where it applies to */
++ unsigned int ugid_index;/* for reading statistic. index of first
++ uid/gid record to read */
++ unsigned int ugid_size; /* size of ugid_buf array */
++ void *addr; /* user-level buffer */
++};
++
++#define VZDQCTLTYPE '+'
++#define VZCTL_QUOTA_CTL _IOWR(VZDQCTLTYPE, 1, \
++ struct vzctl_quotactl)
++#define VZCTL_QUOTA_NEW_CTL _IOWR(VZDQCTLTYPE, 2, \
++ struct vzctl_quotactl)
++#define VZCTL_QUOTA_UGID_CTL _IOWR(VZDQCTLTYPE, 3, \
++ struct vzctl_quotaugidctl)
++
++#endif /* __LINUX_VZCTL_QUOTA_H__ */
+diff -uprN linux-2.6.15.orig/include/linux/vzctl_venet.h linux-2.6.15-ve025stab014/include/linux/vzctl_venet.h
+--- linux-2.6.15.orig/include/linux/vzctl_venet.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vzctl_venet.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,36 @@
++/*
++ * include/linux/vzctl_venet.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VZCTL_VENET_H
++#define _VZCTL_VENET_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++struct vzctl_ve_ip_map {
++ envid_t veid;
++ int op;
++#define VE_IP_ADD 1
++#define VE_IP_DEL 2
++ struct sockaddr *addr;
++ int addrlen;
++};
++
++#define VENETCTLTYPE '('
++
++#define VENETCTL_VE_IP_MAP _IOW(VENETCTLTYPE, 3, \
++ struct vzctl_ve_ip_map)
++
++#endif
+diff -uprN linux-2.6.15.orig/include/linux/vzdq_tree.h linux-2.6.15-ve025stab014/include/linux/vzdq_tree.h
+--- linux-2.6.15.orig/include/linux/vzdq_tree.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vzdq_tree.h 2006-01-27 14:48:09.000000000 +0300
+@@ -0,0 +1,99 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo disk quota tree definition
++ */
++
++#ifndef _VZDQ_TREE_H
++#define _VZDQ_TREE_H
++
++#include <linux/list.h>
++#include <asm/string.h>
++
++typedef unsigned int quotaid_t;
++#define QUOTAID_BITS 32
++#define QUOTAID_BBITS 4
++#define QUOTAID_EBITS 8
++
++#if QUOTAID_EBITS % QUOTAID_BBITS
++#error Quota bit assumption failure
++#endif
++
++#define QUOTATREE_BSIZE (1 << QUOTAID_BBITS)
++#define QUOTATREE_BMASK (QUOTATREE_BSIZE - 1)
++#define QUOTATREE_DEPTH ((QUOTAID_BITS + QUOTAID_BBITS - 1) \
++ / QUOTAID_BBITS)
++#define QUOTATREE_EDEPTH ((QUOTAID_BITS + QUOTAID_EBITS - 1) \
++ / QUOTAID_EBITS)
++#define QUOTATREE_BSHIFT(lvl) ((QUOTATREE_DEPTH - (lvl) - 1) * QUOTAID_BBITS)
++
++/*
++ * Depth of keeping unused node (not inclusive).
++ * 0 means release all nodes including root,
++ * QUOTATREE_DEPTH means never release nodes.
++ * Current value: release all nodes strictly after QUOTATREE_EDEPTH
++ * (measured in external shift units).
++ */
++#define QUOTATREE_CDEPTH (QUOTATREE_DEPTH \
++ - 2 * QUOTATREE_DEPTH / QUOTATREE_EDEPTH \
++ + 1)
++
++/*
++ * Levels 0..(QUOTATREE_DEPTH-1) are tree nodes.
++ * On level i the maximal number of nodes is 2^(i*QUOTAID_BBITS),
++ * and each node contains 2^QUOTAID_BBITS pointers.
++ * Level 0 is a (single) tree root node.
++ *
++ * Nodes of level (QUOTATREE_DEPTH-1) contain pointers to caller's data.
++ * Nodes of lower levels contain pointers to nodes.
++ *
++ * Double pointer in array of i-level node, pointing to a (i+1)-level node
++ * (such as inside quotatree_find_state) are marked by level (i+1), not i.
++ * Level 0 double pointer is a pointer to root inside tree struct.
++ *
++ * The tree is permanent, i.e. all index blocks allocated are keeped alive to
++ * preserve the blocks numbers in the quota file tree to keep its changes
++ * locally.
++ */
++struct quotatree_node {
++ struct list_head list;
++ quotaid_t num;
++ void *blocks[QUOTATREE_BSIZE];
++};
++
++struct quotatree_level {
++ struct list_head usedlh, freelh;
++ quotaid_t freenum;
++};
++
++struct quotatree_tree {
++ struct quotatree_level levels[QUOTATREE_DEPTH];
++ struct quotatree_node *root;
++ unsigned int leaf_num;
++};
++
++struct quotatree_find_state {
++ void **block;
++ int level;
++};
++
++/* number of leafs (objects) and leaf level of the tree */
++#define QTREE_LEAFNUM(tree) ((tree)->leaf_num)
++#define QTREE_LEAFLVL(tree) (&(tree)->levels[QUOTATREE_DEPTH - 1])
++
++struct quotatree_tree *quotatree_alloc(void);
++void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
++ struct quotatree_find_state *st);
++int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
++ struct quotatree_find_state *st, void *data);
++void quotatree_remove(struct quotatree_tree *tree, quotaid_t id);
++void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *));
++void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id);
++void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index);
++
++#endif /* _VZDQ_TREE_H */
++
+diff -uprN linux-2.6.15.orig/include/linux/vzquota.h linux-2.6.15-ve025stab014/include/linux/vzquota.h
+--- linux-2.6.15.orig/include/linux/vzquota.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vzquota.h 2006-01-27 14:48:09.000000000 +0300
+@@ -0,0 +1,288 @@
++/*
++ *
++ * Copyright (C) 2001-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo disk quota implementation
++ */
++
++#ifndef _VZDQUOTA_H
++#define _VZDQUOTA_H
++
++#include <linux/types.h>
++#include <linux/quota.h>
++
++/* vzquotactl syscall commands */
++#define VZ_DQ_CREATE 5 /* create quota master block */
++#define VZ_DQ_DESTROY 6 /* destroy qmblk */
++#define VZ_DQ_ON 7 /* mark dentry with already created qmblk */
++#define VZ_DQ_OFF 8 /* remove mark, don't destroy qmblk */
++#define VZ_DQ_SETLIMIT 9 /* set new limits */
++#define VZ_DQ_GETSTAT 10 /* get usage statistic */
++/* set of syscalls to maintain UGID quotas */
++#define VZ_DQ_UGID_GETSTAT 1 /* get usage/limits for ugid(s) */
++#define VZ_DQ_UGID_ADDSTAT 2 /* set usage/limits statistic for ugid(s) */
++#define VZ_DQ_UGID_GETGRACE 3 /* get expire times */
++#define VZ_DQ_UGID_SETGRACE 4 /* set expire times */
++#define VZ_DQ_UGID_GETCONFIG 5 /* get ugid_max limit, cnt, flags of qmblk */
++#define VZ_DQ_UGID_SETCONFIG 6 /* set ugid_max limit, flags of qmblk */
++#define VZ_DQ_UGID_SETLIMIT 7 /* set ugid B/I limits */
++#define VZ_DQ_UGID_SETINFO 8 /* set ugid info */
++
++/* common structure for vz and ugid quota */
++struct dq_stat {
++ /* blocks limits */
++ __u64 bhardlimit; /* absolute limit in bytes */
++ __u64 bsoftlimit; /* preferred limit in bytes */
++ time_t btime; /* time limit for excessive disk use */
++ __u64 bcurrent; /* current bytes count */
++ /* inodes limits */
++ __u32 ihardlimit; /* absolute limit on allocated inodes */
++ __u32 isoftlimit; /* preferred inode limit */
++ time_t itime; /* time limit for excessive inode use */
++ __u32 icurrent; /* current # allocated inodes */
++};
++
++/* Values for dq_info->flags */
++#define VZ_QUOTA_INODES 0x01 /* inodes limit warning printed */
++#define VZ_QUOTA_SPACE 0x02 /* space limit warning printed */
++
++struct dq_info {
++ time_t bexpire; /* expire timeout for excessive disk use */
++ time_t iexpire; /* expire timeout for excessive inode use */
++ unsigned flags; /* see previos defines */
++};
++
++struct vz_quota_stat {
++ struct dq_stat dq_stat;
++ struct dq_info dq_info;
++};
++
++/* UID/GID interface record - for user-kernel level exchange */
++struct vz_quota_iface {
++ unsigned int qi_id; /* UID/GID this applies to */
++ unsigned int qi_type; /* USRQUOTA|GRPQUOTA */
++ struct dq_stat qi_stat; /* limits, options, usage stats */
++};
++
++/* values for flags and dq_flags */
++/* this flag is set if the userspace has been unable to provide usage
++ * information about all ugids
++ * if the flag is set, we don't allocate new UG quota blocks (their
++ * current usage is unknown) or free existing UG quota blocks (not to
++ * lose information that this block is ok) */
++#define VZDQUG_FIXED_SET 0x01
++/* permit to use ugid quota */
++#define VZDQUG_ON 0x02
++#define VZDQ_USRQUOTA 0x10
++#define VZDQ_GRPQUOTA 0x20
++#define VZDQ_NOACT 0x1000 /* not actual */
++#define VZDQ_NOQUOT 0x2000 /* not under quota tree */
++
++struct vz_quota_ugid_stat {
++ unsigned int limit; /* max amount of ugid records */
++ unsigned int count; /* amount of ugid records */
++ unsigned int flags;
++};
++
++struct vz_quota_ugid_setlimit {
++ unsigned int type; /* quota type (USR/GRP) */
++ unsigned int id; /* ugid */
++ struct if_dqblk dqb; /* limits info */
++};
++
++struct vz_quota_ugid_setinfo {
++ unsigned int type; /* quota type (USR/GRP) */
++ struct if_dqinfo dqi; /* grace info */
++};
++
++#ifdef __KERNEL__
++#include <linux/list.h>
++#include <asm/atomic.h>
++#include <asm/semaphore.h>
++#include <linux/time.h>
++#include <linux/vzquota_qlnk.h>
++#include <linux/vzdq_tree.h>
++
++/* Values for dq_info flags */
++#define VZ_QUOTA_INODES 0x01 /* inodes limit warning printed */
++#define VZ_QUOTA_SPACE 0x02 /* space limit warning printed */
++
++/* values for dq_state */
++#define VZDQ_STARTING 0 /* created, not turned on yet */
++#define VZDQ_WORKING 1 /* quota created, turned on */
++#define VZDQ_STOPING 2 /* created, turned on and off */
++
++/* master quota record - one per veid */
++struct vz_quota_master {
++ struct list_head dq_hash; /* next quota in hash list */
++ atomic_t dq_count; /* inode reference count */
++ unsigned int dq_flags; /* see VZDQUG_FIXED_SET */
++ unsigned int dq_state; /* see values above */
++ unsigned int dq_id; /* VEID this applies to */
++ struct dq_stat dq_stat; /* limits, grace, usage stats */
++ struct dq_info dq_info; /* grace times and flags */
++ spinlock_t dq_data_lock; /* for dq_stat */
++
++ struct semaphore dq_sem; /* semaphore to protect
++ ugid tree */
++
++ struct list_head dq_ilink_list; /* list of vz_quota_ilink */
++ struct quotatree_tree *dq_uid_tree; /* vz_quota_ugid tree for UIDs */
++ struct quotatree_tree *dq_gid_tree; /* vz_quota_ugid tree for GIDs */
++ unsigned int dq_ugid_count; /* amount of ugid records */
++ unsigned int dq_ugid_max; /* max amount of ugid records */
++ struct dq_info dq_ugid_info[MAXQUOTAS]; /* ugid grace times */
++
++ struct dentry *dq_root_dentry;/* dentry of fs tree */
++ struct vfsmount *dq_root_mnt; /* vfsmnt of this dentry */
++ struct super_block *dq_sb; /* superblock of our quota root */
++};
++
++/* UID/GID quota record - one per pair (quota_master, uid or gid) */
++struct vz_quota_ugid {
++ unsigned int qugid_id; /* UID/GID this applies to */
++ struct dq_stat qugid_stat; /* limits, options, usage stats */
++ int qugid_type; /* USRQUOTA|GRPQUOTA */
++ atomic_t qugid_count; /* reference count */
++};
++
++#define VZ_QUOTA_UGBAD ((struct vz_quota_ugid *)0xfeafea11)
++
++struct vz_quota_datast {
++ struct vz_quota_ilink qlnk;
++};
++
++#define VIRTINFO_QUOTA_GETSTAT 0
++#define VIRTINFO_QUOTA_ON 1
++#define VIRTINFO_QUOTA_OFF 2
++
++struct virt_info_quota {
++ struct super_block *super;
++ struct dq_stat *qstat;
++};
++
++/*
++ * Interface to VZ quota core
++ */
++#define INODE_QLNK(inode) (&(inode)->i_qlnk)
++#define QLNK_INODE(qlnk) container_of((qlnk), struct inode, i_qlnk)
++
++#define VZ_QUOTA_BAD ((struct vz_quota_master *)0xefefefef)
++
++#define VZ_QUOTAO_SETE 1
++#define VZ_QUOTAO_INIT 2
++#define VZ_QUOTAO_DESTR 3
++#define VZ_QUOTAO_SWAP 4
++#define VZ_QUOTAO_INICAL 5
++#define VZ_QUOTAO_DRCAL 6
++#define VZ_QUOTAO_QSET 7
++#define VZ_QUOTAO_TRANS 8
++#define VZ_QUOTAO_ACT 9
++#define VZ_QUOTAO_DTREE 10
++#define VZ_QUOTAO_DET 11
++#define VZ_QUOTAO_ON 12
++
++extern struct semaphore vz_quota_sem;
++void inode_qmblk_lock(struct super_block *sb);
++void inode_qmblk_unlock(struct super_block *sb);
++void qmblk_data_read_lock(struct vz_quota_master *qmblk);
++void qmblk_data_read_unlock(struct vz_quota_master *qmblk);
++void qmblk_data_write_lock(struct vz_quota_master *qmblk);
++void qmblk_data_write_unlock(struct vz_quota_master *qmblk);
++
++/* for quota operations */
++void vzquota_inode_init_call(struct inode *inode);
++void vzquota_inode_drop_call(struct inode *inode);
++int vzquota_inode_transfer_call(struct inode *, struct iattr *);
++struct vz_quota_master *vzquota_inode_data(struct inode *inode,
++ struct vz_quota_datast *);
++void vzquota_data_unlock(struct inode *inode, struct vz_quota_datast *);
++int vzquota_rename_check(struct inode *inode,
++ struct inode *old_dir, struct inode *new_dir);
++struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode);
++/* for second-level quota */
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
++/* for management operations */
++struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
++ struct vz_quota_stat *qstat);
++void vzquota_free_master(struct vz_quota_master *);
++struct vz_quota_master *vzquota_find_master(unsigned int quota_id);
++int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
++ struct vz_quota_master *qmblk);
++int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk);
++int vzquota_get_super(struct super_block *sb);
++void vzquota_put_super(struct super_block *sb);
++
++static inline struct vz_quota_master *qmblk_get(struct vz_quota_master *qmblk)
++{
++ if (!atomic_read(&qmblk->dq_count))
++ BUG();
++ atomic_inc(&qmblk->dq_count);
++ return qmblk;
++}
++
++static inline void __qmblk_put(struct vz_quota_master *qmblk)
++{
++ atomic_dec(&qmblk->dq_count);
++}
++
++static inline void qmblk_put(struct vz_quota_master *qmblk)
++{
++ if (!atomic_dec_and_test(&qmblk->dq_count))
++ return;
++ vzquota_free_master(qmblk);
++}
++
++extern struct list_head vzquota_hash_table[];
++extern int vzquota_hash_size;
++
++/*
++ * Interface to VZ UGID quota
++ */
++extern struct quotactl_ops vz_quotactl_operations;
++extern struct dquot_operations vz_quota_operations2;
++extern struct quota_format_type vz_quota_empty_v2_format;
++
++#define QUGID_TREE(qmblk, type) (((type) == USRQUOTA) ? \
++ qmblk->dq_uid_tree : \
++ qmblk->dq_gid_tree)
++
++#define VZDQUG_FIND_DONT_ALLOC 1
++#define VZDQUG_FIND_FAKE 2
++struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
++ unsigned int quota_id, int type, int flags);
++struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
++ unsigned int quota_id, int type, int flags);
++struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid);
++void vzquota_put_ugid(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid);
++void vzquota_kill_ugid(struct vz_quota_master *qmblk);
++int vzquota_ugid_init(void);
++void vzquota_ugid_release(void);
++int vzquota_transfer_usage(struct inode *inode, int mask,
++ struct vz_quota_ilink *qlnk);
++
++struct vzctl_quotaugidctl;
++long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub);
++
++/*
++ * Other VZ quota parts
++ */
++extern struct dquot_operations vz_quota_operations;
++
++long do_vzquotactl(int cmd, unsigned int quota_id,
++ struct vz_quota_stat *qstat, const char *ve_root);
++int vzquota_proc_init(void);
++void vzquota_proc_release(void);
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
++extern struct semaphore vz_quota_sem;
++
++void vzaquota_init(void);
++void vzaquota_fini(void);
++
++#endif /* __KERNEL__ */
++
++#endif /* _VZDQUOTA_H */
+diff -uprN linux-2.6.15.orig/include/linux/vzquota_qlnk.h linux-2.6.15-ve025stab014/include/linux/vzquota_qlnk.h
+--- linux-2.6.15.orig/include/linux/vzquota_qlnk.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vzquota_qlnk.h 2006-01-27 14:48:09.000000000 +0300
+@@ -0,0 +1,25 @@
++/*
++ * include/linux/vzquota_qlnk.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VZDQUOTA_QLNK_H
++#define _VZDQUOTA_QLNK_H
++
++struct vz_quota_master;
++struct vz_quota_ugid;
++
++/* inode link, used to track inodes using quota via dq_ilink_list */
++struct vz_quota_ilink {
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid *qugid[MAXQUOTAS];
++ struct list_head list;
++ unsigned char origin;
++};
++
++#endif /* _VZDQUOTA_QLNK_H */
+diff -uprN linux-2.6.15.orig/include/linux/vzratelimit.h linux-2.6.15-ve025stab014/include/linux/vzratelimit.h
+--- linux-2.6.15.orig/include/linux/vzratelimit.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vzratelimit.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,28 @@
++/*
++ * include/linux/vzratelimit.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VZ_RATELIMIT_H__
++#define __VZ_RATELIMIT_H__
++
++/*
++ * Generic ratelimiting stuff.
++ */
++
++struct vz_rate_info {
++ int burst;
++ int interval; /* jiffy_t per event */
++ int bucket; /* kind of leaky bucket */
++ unsigned long last; /* last event */
++};
++
++/* Return true if rate limit permits. */
++int vz_ratelimit(struct vz_rate_info *p);
++
++#endif /* __VZ_RATELIMIT_H__ */
+diff -uprN linux-2.6.15.orig/include/linux/vzstat.h linux-2.6.15-ve025stab014/include/linux/vzstat.h
+--- linux-2.6.15.orig/include/linux/vzstat.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/linux/vzstat.h 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,182 @@
++/*
++ * include/linux/vzstat.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VZSTAT_H__
++#define __VZSTAT_H__
++
++struct swap_cache_info_struct {
++ unsigned long add_total;
++ unsigned long del_total;
++ unsigned long find_success;
++ unsigned long find_total;
++ unsigned long noent_race;
++ unsigned long exist_race;
++ unsigned long remove_race;
++};
++
++struct kstat_lat_snap_struct {
++ cycles_t maxlat, totlat;
++ unsigned long count;
++};
++struct kstat_lat_pcpu_snap_struct {
++ cycles_t maxlat, totlat;
++ unsigned long count;
++ seqcount_t lock;
++} ____cacheline_maxaligned_in_smp;
++
++struct kstat_lat_struct {
++ struct kstat_lat_snap_struct cur, last;
++ cycles_t avg[3];
++};
++struct kstat_lat_pcpu_struct {
++ struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
++ cycles_t max_snap;
++ struct kstat_lat_snap_struct last;
++ cycles_t avg[3];
++};
++
++struct kstat_perf_snap_struct {
++ cycles_t wall_tottime, cpu_tottime;
++ cycles_t wall_maxdur, cpu_maxdur;
++ unsigned long count;
++};
++struct kstat_perf_struct {
++ struct kstat_perf_snap_struct cur, last;
++};
++
++struct kstat_zone_avg {
++ unsigned long free_pages_avg[3],
++ nr_active_avg[3],
++ nr_inactive_avg[3];
++};
++
++#define KSTAT_ALLOCSTAT_NR 5
++
++struct kernel_stat_glob {
++ unsigned long nr_unint_avg[3];
++
++ unsigned long alloc_fails[KSTAT_ALLOCSTAT_NR];
++ struct kstat_lat_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
++ struct kstat_lat_pcpu_struct sched_lat;
++ struct kstat_lat_struct swap_in;
++
++ struct kstat_perf_struct ttfp, cache_reap,
++ refill_inact, shrink_icache, shrink_dcache;
++
++ struct kstat_zone_avg zone_avg[3]; /* MAX_NR_ZONES */
++} ____cacheline_aligned;
++
++extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
++extern spinlock_t kstat_glb_lock;
++
++#ifdef CONFIG_VE
++#define KSTAT_PERF_ENTER(name) \
++ unsigned long flags; \
++ cycles_t start, sleep_time; \
++ \
++ start = get_cycles(); \
++ sleep_time = VE_TASK_INFO(current)->sleep_time; \
++
++#define KSTAT_PERF_LEAVE(name) \
++ spin_lock_irqsave(&kstat_glb_lock, flags); \
++ kstat_glob.name.cur.count++; \
++ start = get_cycles() - start; \
++ if (kstat_glob.name.cur.wall_maxdur < start) \
++ kstat_glob.name.cur.wall_maxdur = start;\
++ kstat_glob.name.cur.wall_tottime += start; \
++ start -= VE_TASK_INFO(current)->sleep_time - \
++ sleep_time; \
++ if (kstat_glob.name.cur.cpu_maxdur < start) \
++ kstat_glob.name.cur.cpu_maxdur = start; \
++ kstat_glob.name.cur.cpu_tottime += start; \
++ spin_unlock_irqrestore(&kstat_glb_lock, flags); \
++
++#else
++#define KSTAT_PERF_ENTER(name)
++#define KSTAT_PERF_LEAVE(name)
++#endif
++
++/*
++ * Add another statistics reading.
++ * Serialization is the caller's due.
++ */
++static inline void KSTAT_LAT_ADD(struct kstat_lat_struct *p,
++ cycles_t dur)
++{
++ p->cur.count++;
++ if (p->cur.maxlat < dur)
++ p->cur.maxlat = dur;
++ p->cur.totlat += dur;
++}
++
++static inline void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, int cpu,
++ cycles_t dur)
++{
++ struct kstat_lat_pcpu_snap_struct *cur;
++
++ cur = &p->cur[cpu];
++ write_seqcount_begin(&cur->lock);
++ cur->count++;
++ if (cur->maxlat < dur)
++ cur->maxlat = dur;
++ cur->totlat += dur;
++ write_seqcount_end(&cur->lock);
++}
++
++/*
++ * Move current statistics to last, clear last.
++ * Serialization is the caller's due.
++ */
++static inline void KSTAT_LAT_UPDATE(struct kstat_lat_struct *p)
++{
++ cycles_t m;
++ memcpy(&p->last, &p->cur, sizeof(p->last));
++ p->cur.maxlat = 0;
++ m = p->last.maxlat;
++ CALC_LOAD(p->avg[0], EXP_1, m)
++ CALC_LOAD(p->avg[1], EXP_5, m)
++ CALC_LOAD(p->avg[2], EXP_15, m)
++}
++
++static inline void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
++{
++ unsigned i, cpu;
++ struct kstat_lat_pcpu_snap_struct snap, *cur;
++ cycles_t m;
++
++ memset(&p->last, 0, sizeof(p->last));
++ for (cpu = 0; cpu < NR_CPUS; cpu++) {
++ cur = &p->cur[cpu];
++ do {
++ i = read_seqcount_begin(&cur->lock);
++ memcpy(&snap, cur, sizeof(snap));
++ } while (read_seqcount_retry(&cur->lock, i));
++ /*
++ * read above and this update of maxlat is not atomic,
++ * but this is OK, since it happens rarely and losing
++ * a couple of peaks is not essential. xemul
++ */
++ cur->maxlat = 0;
++
++ p->last.count += snap.count;
++ p->last.totlat += snap.totlat;
++ if (p->last.maxlat < snap.maxlat)
++ p->last.maxlat = snap.maxlat;
++ }
++
++ m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
++ CALC_LOAD(p->avg[0], EXP_1, m);
++ CALC_LOAD(p->avg[1], EXP_5, m);
++ CALC_LOAD(p->avg[2], EXP_15, m);
++ /* reset max_snap to calculate it correctly next time */
++ p->max_snap = 0;
++}
++
++#endif /* __VZSTAT_H__ */
+diff -uprN linux-2.6.15.orig/include/net/af_unix.h linux-2.6.15-ve025stab014/include/net/af_unix.h
+--- linux-2.6.15.orig/include/net/af_unix.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/af_unix.h 2006-01-27 14:48:08.000000000 +0300
+@@ -19,23 +19,37 @@ extern atomic_t unix_tot_inflight;
+
+ static inline struct sock *first_unix_socket(int *i)
+ {
++ struct sock *s;
++ struct ve_struct *ve;
++
++ ve = get_exec_env();
+ for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
+- if (!hlist_empty(&unix_socket_table[*i]))
+- return __sk_head(&unix_socket_table[*i]);
++ for (s = sk_head(&unix_socket_table[*i]);
++ s != NULL && !ve_accessible(s->sk_owner_env, ve);
++ s = sk_next(s));
++ if (s != NULL)
++ return s;
+ }
+ return NULL;
+ }
+
+ static inline struct sock *next_unix_socket(int *i, struct sock *s)
+ {
+- struct sock *next = sk_next(s);
+- /* More in this chain? */
+- if (next)
+- return next;
++ struct ve_struct *ve;
++
++ ve = get_exec_env();
++ for (s = sk_next(s); s != NULL; s = sk_next(s)) {
++ if (!ve_accessible(s->sk_owner_env, ve))
++ continue;
++ return s;
++ }
+ /* Look for next non-empty chain. */
+ for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
+- if (!hlist_empty(&unix_socket_table[*i]))
+- return __sk_head(&unix_socket_table[*i]);
++ for (s = sk_head(&unix_socket_table[*i]);
++ s != NULL && !ve_accessible(s->sk_owner_env, ve);
++ s = sk_next(s));
++ if (s != NULL)
++ return s;
+ }
+ return NULL;
+ }
+diff -uprN linux-2.6.15.orig/include/net/flow.h linux-2.6.15-ve025stab014/include/net/flow.h
+--- linux-2.6.15.orig/include/net/flow.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/flow.h 2006-01-27 14:48:08.000000000 +0300
+@@ -10,6 +10,7 @@
+ #include <linux/in6.h>
+ #include <asm/atomic.h>
+
++struct ve_struct;
+ struct flowi {
+ int oif;
+ int iif;
+@@ -78,6 +79,9 @@ struct flowi {
+ #define fl_icmp_type uli_u.icmpt.type
+ #define fl_icmp_code uli_u.icmpt.code
+ #define fl_ipsec_spi uli_u.spi
++#ifdef CONFIG_VE
++ struct ve_struct *owner_env;
++#endif
+ } __attribute__((__aligned__(BITS_PER_LONG/8)));
+
+ #define FLOW_DIR_IN 0
+diff -uprN linux-2.6.15.orig/include/net/icmp.h linux-2.6.15-ve025stab014/include/net/icmp.h
+--- linux-2.6.15.orig/include/net/icmp.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/icmp.h 2006-01-27 14:48:08.000000000 +0300
+@@ -34,9 +34,14 @@ struct icmp_err {
+
+ extern struct icmp_err icmp_err_convert[];
+ DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+-#define ICMP_INC_STATS(field) SNMP_INC_STATS(icmp_statistics, field)
+-#define ICMP_INC_STATS_BH(field) SNMP_INC_STATS_BH(icmp_statistics, field)
+-#define ICMP_INC_STATS_USER(field) SNMP_INC_STATS_USER(icmp_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_icmp_statistics (get_exec_env()->_icmp_statistics)
++#else
++#define ve_icmp_statistics icmp_statistics
++#endif
++#define ICMP_INC_STATS(field) SNMP_INC_STATS(ve_icmp_statistics, field)
++#define ICMP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_icmp_statistics, field)
++#define ICMP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_icmp_statistics, field)
+
+ extern void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info);
+ extern int icmp_rcv(struct sk_buff *skb);
+diff -uprN linux-2.6.15.orig/include/net/inet_hashtables.h linux-2.6.15-ve025stab014/include/net/inet_hashtables.h
+--- linux-2.6.15.orig/include/net/inet_hashtables.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/inet_hashtables.h 2006-01-27 14:48:08.000000000 +0300
+@@ -24,6 +24,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/types.h>
+ #include <linux/wait.h>
++#include <linux/ve_owner.h>
+
+ #include <net/inet_connection_sock.h>
+ #include <net/route.h>
+@@ -74,11 +75,13 @@ struct inet_ehash_bucket {
+ * ports are created in O(1) time? I thought so. ;-) -DaveM
+ */
+ struct inet_bind_bucket {
++ struct ve_struct *owner_env;
+ unsigned short port;
+ signed short fastreuse;
+ struct hlist_node node;
+ struct hlist_head owners;
+ };
++DCL_VE_OWNER_PROTO(TB, struct inet_bind_bucket, owner_env)
+
+ #define inet_bind_bucket_for_each(tb, node, head) \
+ hlist_for_each_entry(tb, node, head, node)
+@@ -129,9 +132,10 @@ struct inet_hashinfo {
+ };
+
+ static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
+- const __u32 faddr, const __u16 fport)
++ const __u32 faddr, const __u16 fport,
++ const envid_t veid)
+ {
+- unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
++ int h = (laddr ^ lport) ^ (faddr ^ fport) ^ (veid ^ (veid >> 16));
+ h ^= h >> 16;
+ h ^= h >> 8;
+ return h;
+@@ -144,8 +148,9 @@ static inline int inet_sk_ehashfn(const
+ const __u16 lport = inet->num;
+ const __u32 faddr = inet->daddr;
+ const __u16 fport = inet->dport;
++ envid_t veid = VEID(VE_OWNER_SK(sk));
+
+- return inet_ehashfn(laddr, lport, faddr, fport);
++ return inet_ehashfn(laddr, lport, faddr, fport, veid);
+ }
+
+ static inline struct inet_ehash_bucket *inet_ehash_bucket(
+@@ -158,37 +163,43 @@ static inline struct inet_ehash_bucket *
+ extern struct inet_bind_bucket *
+ inet_bind_bucket_create(kmem_cache_t *cachep,
+ struct inet_bind_hashbucket *head,
+- const unsigned short snum);
++ const unsigned short snum,
++ struct ve_struct *env);
+ extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
+ struct inet_bind_bucket *tb);
+
+-static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
++static inline int inet_bhashfn(const __u16 lport, const int bhash_size,
++ unsigned veid)
+ {
+- return lport & (bhash_size - 1);
++ return ((lport + (veid ^ (veid >> 16))) & (bhash_size - 1));
+ }
+
+ extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
+ const unsigned short snum);
+
+ /* These can have wildcards, don't try too hard. */
+-static inline int inet_lhashfn(const unsigned short num)
++static inline int inet_lhashfn(const unsigned short num, unsigned veid)
+ {
+- return num & (INET_LHTABLE_SIZE - 1);
++ return ((num + (veid ^ (veid >> 16))) & (INET_LHTABLE_SIZE - 1));
+ }
+
+ static inline int inet_sk_listen_hashfn(const struct sock *sk)
+ {
+- return inet_lhashfn(inet_sk(sk)->num);
++ return inet_lhashfn(inet_sk(sk)->num, VEID(VE_OWNER_SK(sk)));
+ }
+
+ /* Caller must disable local BH processing. */
+ static inline void __inet_inherit_port(struct inet_hashinfo *table,
+ struct sock *sk, struct sock *child)
+ {
+- const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
+- struct inet_bind_hashbucket *head = &table->bhash[bhash];
++ int bhash;
++ struct inet_bind_hashbucket *head;
+ struct inet_bind_bucket *tb;
+
++ bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size,
++ VEID(VE_OWNER_SK(child)));
++ head = &table->bhash[bhash];
++
+ spin_lock(&head->lock);
+ tb = inet_csk(sk)->icsk_bind_hash;
+ sk_add_bind_node(child, &tb->owners);
+@@ -294,7 +305,8 @@ static inline int inet_iif(const struct
+ extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
+ const u32 daddr,
+ const unsigned short hnum,
+- const int dif);
++ const int dif,
++ struct ve_struct *env);
+
+ /* Optimize the common listener case. */
+ static inline struct sock *
+@@ -304,18 +316,21 @@ static inline struct sock *
+ {
+ struct sock *sk = NULL;
+ const struct hlist_head *head;
++ struct ve_struct *env;
+
++ env = get_exec_env();
+ read_lock(&hashinfo->lhash_lock);
+- head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
++ head = &hashinfo->listening_hash[inet_lhashfn(hnum, VEID(env))];
+ if (!hlist_empty(head)) {
+ const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+
+ if (inet->num == hnum && !sk->sk_node.next &&
++ ve_accessible_strict(VE_OWNER_SK(sk), env) &&
+ (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+ (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+ !sk->sk_bound_dev_if)
+ goto sherry_cache;
+- sk = __inet_lookup_listener(head, daddr, hnum, dif);
++ sk = __inet_lookup_listener(head, daddr, hnum, dif, env);
+ }
+ if (sk) {
+ sherry_cache:
+@@ -342,25 +357,25 @@ sherry_cache:
+ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
+ const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
+ #endif /* __BIG_ENDIAN */
+-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
++#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+ (((__sk)->sk_hash == (__hash)) && \
+ ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \
+ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
+ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
++#define INET_TW_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+ (((__sk)->sk_hash == (__hash)) && \
+ ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \
+ ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
+ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ #else /* 32-bit arch */
+ #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
+-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \
++#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \
+ (((__sk)->sk_hash == (__hash)) && \
+ (inet_sk(__sk)->daddr == (__saddr)) && \
+ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
+ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
+ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \
++#define INET_TW_MATCH_ALLVE(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \
+ (((__sk)->sk_hash == (__hash)) && \
+ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \
+ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \
+@@ -368,6 +383,18 @@ sherry_cache:
+ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ #endif /* 64-bit arch */
+
++#define INET_MATCH(__sk, __hash, __cookie, __saddr, \
++ __daddr, __ports, __dif, __ve) \
++ (INET_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr), \
++ (__daddr), (__ports), (__dif)) \
++ && ve_accessible_strict(VE_OWNER_SK(__sk), (__ve)))
++
++#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, \
++ __daddr, __ports, __dif, __ve) \
++ (INET_TW_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr), \
++ (__daddr), (__ports), (__dif)) \
++ && ve_accessible_strict(inet_twsk(__sk)->tw_owner_env, VEID(__ve)))
++
+ /*
+ * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
+ * not check it for lookups anymore, thanks Alexey. -DaveM
+@@ -387,19 +414,25 @@ static inline struct sock *
+ /* Optimize here for direct hit, only listening connections can
+ * have wildcards anyways.
+ */
+- unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
+- struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
+-
++ unsigned int hash;
++ struct inet_ehash_bucket *head;
++ struct ve_struct *env;
++
++ env = get_exec_env();
++ hash = inet_ehashfn(daddr, hnum, saddr, sport, VEID(env));
++ head = inet_ehash_bucket(hashinfo, hash);
+ prefetch(head->chain.first);
+ read_lock(&head->lock);
+ sk_for_each(sk, node, &head->chain) {
+- if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
++ if (INET_MATCH(sk, hash, acookie, saddr, daddr,
++ ports, dif, env))
+ goto hit; /* You sunk my battleship! */
+ }
+
+ /* Must check for a TIME_WAIT'er before going to listener hash. */
+ sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
+- if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
++ if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr,
++ ports, dif, env))
+ goto hit;
+ }
+ sk = NULL;
+diff -uprN linux-2.6.15.orig/include/net/inet_timewait_sock.h linux-2.6.15-ve025stab014/include/net/inet_timewait_sock.h
+--- linux-2.6.15.orig/include/net/inet_timewait_sock.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/inet_timewait_sock.h 2006-01-27 14:48:08.000000000 +0300
+@@ -132,6 +132,7 @@ struct inet_timewait_sock {
+ unsigned long tw_ttd;
+ struct inet_bind_bucket *tw_tb;
+ struct hlist_node tw_death_node;
++ envid_t tw_owner_env;
+ };
+
+ static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
+diff -uprN linux-2.6.15.orig/include/net/ip.h linux-2.6.15-ve025stab014/include/net/ip.h
+--- linux-2.6.15.orig/include/net/ip.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/ip.h 2006-01-27 14:48:08.000000000 +0300
+@@ -149,15 +149,25 @@ struct ipv4_config
+
+ extern struct ipv4_config ipv4_config;
+ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+-#define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field)
+-#define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field)
+-#define IP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ip_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ip_statistics (get_exec_env()->_ip_statistics)
++#else
++#define ve_ip_statistics ip_statistics
++#endif
++#define IP_INC_STATS(field) SNMP_INC_STATS(ve_ip_statistics, field)
++#define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_ip_statistics, field)
++#define IP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_ip_statistics, field)
+ DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
+-#define NET_INC_STATS(field) SNMP_INC_STATS(net_statistics, field)
+-#define NET_INC_STATS_BH(field) SNMP_INC_STATS_BH(net_statistics, field)
+-#define NET_INC_STATS_USER(field) SNMP_INC_STATS_USER(net_statistics, field)
+-#define NET_ADD_STATS_BH(field, adnd) SNMP_ADD_STATS_BH(net_statistics, field, adnd)
+-#define NET_ADD_STATS_USER(field, adnd) SNMP_ADD_STATS_USER(net_statistics, field, adnd)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_net_statistics (get_exec_env()->_net_statistics)
++#else
++#define ve_net_statistics net_statistics
++#endif
++#define NET_INC_STATS(field) SNMP_INC_STATS(ve_net_statistics, field)
++#define NET_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_net_statistics, field)
++#define NET_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_net_statistics, field)
++#define NET_ADD_STATS_BH(field, adnd) SNMP_ADD_STATS_BH(ve_net_statistics, field, adnd)
++#define NET_ADD_STATS_USER(field, adnd) SNMP_ADD_STATS_USER(ve_net_statistics, field, adnd)
+
+ extern int sysctl_local_port_range[2];
+ extern int sysctl_ip_default_ttl;
+@@ -375,4 +385,11 @@ extern int ip_misc_proc_init(void);
+
+ extern struct ctl_table ipv4_table[];
+
++#ifdef CONFIG_SYSCTL
++extern int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
++ void __user *buffer, size_t *lenp, loff_t *ppos);
++extern int ipv4_sysctl_forward_strategy(ctl_table *table, int __user *name,
++ int nlen, void __user *oldval, size_t __user *oldlenp,
++ void __user *newval, size_t newlen, void **context);
++#endif
+ #endif /* _IP_H */
+diff -uprN linux-2.6.15.orig/include/net/ip_fib.h linux-2.6.15-ve025stab014/include/net/ip_fib.h
+--- linux-2.6.15.orig/include/net/ip_fib.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/ip_fib.h 2006-01-27 14:48:08.000000000 +0300
+@@ -168,10 +168,22 @@ struct fib_table {
+ unsigned char tb_data[0];
+ };
+
++struct fn_zone;
++struct fn_hash
++{
++ struct fn_zone *fn_zones[33];
++ struct fn_zone *fn_zone_list;
++};
++
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ip_fib_local_table get_exec_env()->_local_table
++#define ip_fib_main_table get_exec_env()->_main_table
++#else
+ extern struct fib_table *ip_fib_local_table;
+ extern struct fib_table *ip_fib_main_table;
++#endif
+
+ static inline struct fib_table *fib_get_table(int id)
+ {
+@@ -203,7 +215,12 @@ static inline void fib_select_default(co
+ #define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
+ #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_tables get_exec_env()->_fib_tables
++#else
+ extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
++#endif
++
+ extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
+ extern struct fib_table *__fib_new_table(int id);
+ extern void fib_rule_put(struct fib_rule *r);
+@@ -248,10 +265,19 @@ extern u32 __fib_res_prefsrc(struct fib
+
+ /* Exported by fib_hash.c */
+ extern struct fib_table *fib_hash_init(int id);
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++struct ve_struct;
++extern int init_ve_route(struct ve_struct *ve);
++extern void fini_ve_route(struct ve_struct *ve);
++#else
++#define init_ve_route(ve) (0)
++#define fini_ve_route(ve) do { } while (0)
++#endif
+
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ /* Exported by fib_rules.c */
+-
++extern int fib_rules_create(void);
++extern void fib_rules_destroy(void);
+ extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
+ extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
+ extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
+diff -uprN linux-2.6.15.orig/include/net/route.h linux-2.6.15-ve025stab014/include/net/route.h
+--- linux-2.6.15.orig/include/net/route.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/route.h 2006-01-27 14:48:08.000000000 +0300
+@@ -200,4 +200,14 @@ static inline struct inet_peer *rt_get_p
+
+ extern ctl_table ipv4_route_table[];
+
++#ifdef CONFIG_SYSCTL
++extern int ipv4_flush_delay;
++extern int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
++ struct file *filp, void __user *buffer, size_t *lenp,
++ loff_t *ppos);
++extern int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
++ int __user *name, int nlen, void __user *oldval,
++ size_t __user *oldlenp, void __user *newval,
++ size_t newlen, void **context);
++#endif
+ #endif /* _ROUTE_H */
+diff -uprN linux-2.6.15.orig/include/net/scm.h linux-2.6.15-ve025stab014/include/net/scm.h
+--- linux-2.6.15.orig/include/net/scm.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/scm.h 2006-01-27 14:48:08.000000000 +0300
+@@ -40,7 +40,7 @@ static __inline__ int scm_send(struct so
+ memset(scm, 0, sizeof(*scm));
+ scm->creds.uid = current->uid;
+ scm->creds.gid = current->gid;
+- scm->creds.pid = current->tgid;
++ scm->creds.pid = virt_tgid(current);
+ if (msg->msg_controllen <= 0)
+ return 0;
+ return __scm_send(sock, msg, scm);
+diff -uprN linux-2.6.15.orig/include/net/sock.h linux-2.6.15-ve025stab014/include/net/sock.h
+--- linux-2.6.15.orig/include/net/sock.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/sock.h 2006-01-27 14:48:08.000000000 +0300
+@@ -55,6 +55,8 @@
+ #include <net/dst.h>
+ #include <net/checksum.h>
+
++#include <ub/ub_net.h>
++
+ /*
+ * This structure really needs to be cleaned up.
+ * Most of it is for TCP, and not used by any of
+@@ -251,8 +253,12 @@ struct sock {
+ int (*sk_backlog_rcv)(struct sock *sk,
+ struct sk_buff *skb);
+ void (*sk_destruct)(struct sock *sk);
++ struct sock_beancounter sk_bc;
++ struct ve_struct *sk_owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(SK, struct sock, sk_owner_env)
++
+ /*
+ * Hashed lists helper routines
+ */
+@@ -485,7 +491,8 @@ static inline void sk_add_backlog(struct
+ })
+
+ extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
+-extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
++extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p,
++ unsigned long amount);
+ extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
+ extern int sk_stream_error(struct sock *sk, int flags, int err);
+ extern void sk_stream_kill_queues(struct sock *sk);
+@@ -706,8 +713,11 @@ static inline void sk_stream_writequeue_
+
+ static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
+ {
+- return (int)skb->truesize <= sk->sk_forward_alloc ||
+- sk_stream_mem_schedule(sk, skb->truesize, 1);
++ if ((int)skb->truesize > sk->sk_forward_alloc &&
++ !sk_stream_mem_schedule(sk, skb->truesize, 1))
++ /* The situation is bad according to mainstream. Den */
++ return 0;
++ return ub_tcprcvbuf_charge(sk, skb) == 0;
+ }
+
+ static inline int sk_stream_wmem_schedule(struct sock *sk, int size)
+@@ -765,6 +775,11 @@ extern struct sk_buff *sock_alloc_send
+ unsigned long size,
+ int noblock,
+ int *errcode);
++extern struct sk_buff *sock_alloc_send_skb2(struct sock *sk,
++ unsigned long size,
++ unsigned long size2,
++ int noblock,
++ int *errcode);
+ extern void *sock_kmalloc(struct sock *sk, int size,
+ gfp_t priority);
+ extern void sock_kfree_s(struct sock *sk, void *mem, int size);
+@@ -1119,6 +1134,10 @@ static inline int sock_queue_rcv_skb(str
+ goto out;
+ }
+
++ err = ub_sockrcvbuf_charge(sk, skb);
++ if (err < 0)
++ goto out;
++
+ /* It would be deadlock, if sock_queue_rcv_skb is used
+ with socket lock! We assume that users of this
+ function are lock free.
+diff -uprN linux-2.6.15.orig/include/net/tcp.h linux-2.6.15-ve025stab014/include/net/tcp.h
+--- linux-2.6.15.orig/include/net/tcp.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/tcp.h 2006-01-27 14:48:08.000000000 +0300
+@@ -40,6 +40,7 @@
+ #include <net/tcp_states.h>
+
+ #include <linux/seq_file.h>
++#include <ub/ub_net.h>
+
+ extern struct inet_hashinfo tcp_hashinfo;
+
+@@ -297,12 +298,17 @@ static inline int between(__u32 seq1, __
+ extern struct proto tcp_prot;
+
+ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
+-#define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field)
+-#define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field)
+-#define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field)
+-#define TCP_DEC_STATS(field) SNMP_DEC_STATS(tcp_statistics, field)
+-#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
+-#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_tcp_statistics (get_exec_env()->_tcp_statistics)
++#else
++#define ve_tcp_statistics tcp_statistics
++#endif
++#define TCP_INC_STATS(field) SNMP_INC_STATS(ve_tcp_statistics, field)
++#define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_tcp_statistics, field)
++#define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_tcp_statistics, field)
++#define TCP_DEC_STATS(field) SNMP_DEC_STATS(ve_tcp_statistics, field)
++#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(ve_tcp_statistics, field, val)
++#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(ve_tcp_statistics, field, val)
+
+ extern void tcp_v4_err(struct sk_buff *skb, u32);
+
+diff -uprN linux-2.6.15.orig/include/net/udp.h linux-2.6.15-ve025stab014/include/net/udp.h
+--- linux-2.6.15.orig/include/net/udp.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/net/udp.h 2006-01-27 14:48:08.000000000 +0300
+@@ -40,13 +40,19 @@ extern rwlock_t udp_hash_lock;
+
+ extern int udp_port_rover;
+
+-static inline int udp_lport_inuse(u16 num)
++static inline int udp_hashfn(u16 num, unsigned veid)
++{
++ return ((num + (veid ^ (veid >> 16))) & (UDP_HTABLE_SIZE - 1));
++}
++
++static inline int udp_lport_inuse(u16 num, struct ve_struct *env)
+ {
+ struct sock *sk;
+ struct hlist_node *node;
+
+- sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
+- if (inet_sk(sk)->num == num)
++ sk_for_each(sk, node, &udp_hash[udp_hashfn(num, VEID(env))])
++ if (inet_sk(sk)->num == num &&
++ ve_accessible_strict(sk->sk_owner_env, env))
+ return 1;
+ return 0;
+ }
+@@ -75,9 +81,14 @@ extern unsigned int udp_poll(struct file
+ poll_table *wait);
+
+ DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
+-#define UDP_INC_STATS(field) SNMP_INC_STATS(udp_statistics, field)
+-#define UDP_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_statistics, field)
+-#define UDP_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_udp_statistics (get_exec_env()->_udp_statistics)
++#else
++#define ve_udp_statistics udp_statistics
++#endif
++#define UDP_INC_STATS(field) SNMP_INC_STATS(ve_udp_statistics, field)
++#define UDP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_udp_statistics, field)
++#define UDP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_udp_statistics, field)
+
+ /* /proc */
+ struct udp_seq_afinfo {
+diff -uprN linux-2.6.15.orig/include/ub/beancounter.h linux-2.6.15-ve025stab014/include/ub/beancounter.h
+--- linux-2.6.15.orig/include/ub/beancounter.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/beancounter.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,329 @@
++/*
++ * include/ub/beancounter.h
++ *
++ * Copyright (C) 1999-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * Andrey Savochkin saw@sw-soft.com
++ *
++ */
++
++#ifndef _LINUX_BEANCOUNTER_H
++#define _LINUX_BEANCOUNTER_H
++
++#include <linux/config.h>
++
++/*
++ * Generic ratelimiting stuff.
++ */
++
++struct ub_rate_info {
++ int burst;
++ int interval; /* jiffy_t per event */
++ int bucket; /* kind of leaky bucket */
++ unsigned long last; /* last event */
++};
++
++/* Return true if rate limit permits. */
++int ub_ratelimit(struct ub_rate_info *);
++
++
++/*
++ * This magic is used to distinuish user beancounter and pages beancounter
++ * in struct page. page_ub and page_bc are placed in union and MAGIC
++ * ensures us that we don't use pbc as ubc in ub_page_uncharge().
++ */
++#define UB_MAGIC 0x62756275
++
++/*
++ * Resource list.
++ */
++
++#define UB_KMEMSIZE 0 /* Unswappable kernel memory size including
++ * struct task, page directories, etc.
++ */
++#define UB_LOCKEDPAGES 1 /* Mlock()ed pages. */
++#define UB_PRIVVMPAGES 2 /* Total number of pages, counting potentially
++ * private pages as private and used.
++ */
++#define UB_SHMPAGES 3 /* IPC SHM segment size. */
++#define UB_ZSHMPAGES 4 /* Anonymous shared memory. */
++#define UB_NUMPROC 5 /* Number of processes. */
++#define UB_PHYSPAGES 6 /* All resident pages, for swapout guarantee. */
++#define UB_VMGUARPAGES 7 /* Guarantee for memory allocation,
++ * checked against PRIVVMPAGES.
++ */
++#define UB_OOMGUARPAGES 8 /* Guarantees against OOM kill.
++ * Only limit is used, no accounting.
++ */
++#define UB_NUMTCPSOCK 9 /* Number of TCP sockets. */
++#define UB_NUMFLOCK 10 /* Number of file locks. */
++#define UB_NUMPTY 11 /* Number of PTYs. */
++#define UB_NUMSIGINFO 12 /* Number of siginfos. */
++#define UB_TCPSNDBUF 13 /* Total size of tcp send buffers. */
++#define UB_TCPRCVBUF 14 /* Total size of tcp receive buffers. */
++#define UB_OTHERSOCKBUF 15 /* Total size of other socket
++ * send buffers (all buffers for PF_UNIX).
++ */
++#define UB_DGRAMRCVBUF 16 /* Total size of other socket
++ * receive buffers.
++ */
++#define UB_NUMOTHERSOCK 17 /* Number of other sockets. */
++#define UB_DCACHESIZE 18 /* Size of busy dentry/inode cache. */
++#define UB_NUMFILE 19 /* Number of open files. */
++
++#define UB_RESOURCES 24
++
++#define UB_UNUSEDPRIVVM (UB_RESOURCES + 0)
++#define UB_TMPFSPAGES (UB_RESOURCES + 1)
++#define UB_SWAPPAGES (UB_RESOURCES + 2)
++#define UB_HELDPAGES (UB_RESOURCES + 3)
++
++struct ubparm {
++ /*
++ * A barrier over which resource allocations are failed gracefully.
++ * If the amount of consumed memory is over the barrier further sbrk()
++ * or mmap() calls fail, the existing processes are not killed.
++ */
++ unsigned long barrier;
++ /* hard resource limit */
++ unsigned long limit;
++ /* consumed resources */
++ unsigned long held;
++ /* maximum amount of consumed resources through the last period */
++ unsigned long maxheld;
++ /* minimum amount of consumed resources through the last period */
++ unsigned long minheld;
++ /* count of failed charges */
++ unsigned long failcnt;
++};
++
++/*
++ * Kernel internal part.
++ */
++
++#ifdef __KERNEL__
++
++#include <ub/ub_debug.h>
++#include <linux/interrupt.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <linux/cache.h>
++#include <linux/threads.h>
++
++/*
++ * UB_MAXVALUE is essentially LONG_MAX declared in a cross-compiling safe form.
++ */
++#define UB_MAXVALUE ( (1UL << (sizeof(unsigned long)*8-1)) - 1)
++
++
++/*
++ * Resource management structures
++ * Serialization issues:
++ * beancounter list management is protected via ub_hash_lock
++ * task pointers are set only for current task and only once
++ * refcount is managed atomically
++ * value and limit comparison and change are protected by per-ub spinlock
++ */
++
++struct page_beancounter;
++struct task_beancounter;
++struct sock_beancounter;
++
++struct page_private {
++ unsigned long ubp_unused_privvmpages;
++ unsigned long ubp_tmpfs_respages;
++ unsigned long ubp_swap_pages;
++ unsigned long long ubp_held_pages;
++};
++
++struct sock_private {
++ unsigned long ubp_rmem_thres;
++ unsigned long ubp_wmem_pressure;
++ unsigned long ubp_maxadvmss;
++ unsigned long ubp_rmem_pressure;
++#define UB_RMEM_EXPAND 0
++#define UB_RMEM_KEEP 1
++#define UB_RMEM_SHRINK 2
++ struct list_head ubp_other_socks;
++ struct list_head ubp_tcp_socks;
++ atomic_t ubp_orphan_count;
++};
++
++struct ub_perfstat {
++ unsigned long unmap;
++ unsigned long swapin;
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ long pages_charged;
++ long vmalloc_charged;
++ long pbcs;
++#endif
++} ____cacheline_aligned_in_smp;
++
++struct user_beancounter
++{
++ unsigned long ub_magic;
++ atomic_t ub_refcount;
++ struct user_beancounter *ub_next;
++ spinlock_t ub_lock;
++ uid_t ub_uid;
++
++ struct ub_rate_info ub_limit_rl;
++ int ub_oom_noproc;
++
++ struct page_private ppriv;
++#define ub_unused_privvmpages ppriv.ubp_unused_privvmpages
++#define ub_tmpfs_respages ppriv.ubp_tmpfs_respages
++#define ub_swap_pages ppriv.ubp_swap_pages
++#define ub_held_pages ppriv.ubp_held_pages
++ struct sock_private spriv;
++#define ub_rmem_thres spriv.ubp_rmem_thres
++#define ub_maxadvmss spriv.ubp_maxadvmss
++#define ub_rmem_pressure spriv.ubp_rmem_pressure
++#define ub_wmem_pressure spriv.ubp_wmem_pressure
++#define ub_tcp_sk_list spriv.ubp_tcp_socks
++#define ub_other_sk_list spriv.ubp_other_socks
++#define ub_orphan_count spriv.ubp_orphan_count
++
++ struct user_beancounter *parent;
++ void *private_data;
++
++ /* resources statistic and settings */
++ struct ubparm ub_parms[UB_RESOURCES];
++ /* resources statistic for last interval */
++ struct ubparm ub_store[UB_RESOURCES];
++
++ struct ub_perfstat ub_stat[NR_CPUS];
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ struct list_head ub_cclist;
++#endif
++};
++
++enum severity { UB_HARD, UB_SOFT, UB_FORCE };
++
++static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
++{
++ return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
++}
++
++static inline int ub_hfbarrier_hit(struct user_beancounter *ub, int resource)
++{
++ return (ub->ub_parms[resource].held >
++ ((ub->ub_parms[resource].barrier) >> 1));
++}
++
++#ifndef CONFIG_USER_RESOURCE
++
++extern inline struct user_beancounter *get_beancounter_byuid
++ (uid_t uid, int create) { return NULL; }
++extern inline struct user_beancounter *get_beancounter
++ (struct user_beancounter *ub) { return NULL; }
++extern inline void put_beancounter(struct user_beancounter *ub) {;}
++
++static inline void ub_init_cache(unsigned long mempages) { };
++static inline void ub_init_ub0(void) { };
++
++#define get_ub0() NULL
++
++#else /* CONFIG_USER_RESOURCE */
++
++/*
++ * Charge/uncharge operations
++ */
++
++extern int __charge_beancounter_locked(struct user_beancounter *ub,
++ int resource, unsigned long val, enum severity strict);
++
++extern void __uncharge_beancounter_locked(struct user_beancounter *ub,
++ int resource, unsigned long val);
++
++extern void __put_beancounter(struct user_beancounter *ub);
++
++extern void uncharge_warn(struct user_beancounter *ub, int resource,
++ unsigned long val, unsigned long held);
++
++extern const char *ub_rnames[];
++/*
++ * Put a beancounter reference
++ */
++
++static inline void put_beancounter(struct user_beancounter *ub)
++{
++ if (unlikely(ub == NULL))
++ return;
++
++ __put_beancounter(ub);
++}
++
++/*
++ * Create a new beancounter reference
++ */
++extern struct user_beancounter *get_beancounter_byuid(uid_t uid, int create);
++
++static inline
++struct user_beancounter *get_beancounter(struct user_beancounter *ub)
++{
++ if (unlikely(ub == NULL))
++ return NULL;
++
++ atomic_inc(&ub->ub_refcount);
++ return ub;
++}
++
++extern struct user_beancounter *get_subbeancounter_byid(
++ struct user_beancounter *,
++ int id, int create);
++extern struct user_beancounter *subbeancounter_findcreate(
++ struct user_beancounter *p, int id);
++
++extern struct user_beancounter ub0;
++
++extern void ub_init_cache(unsigned long);
++extern void ub_init_ub0(void);
++#define get_ub0() (&ub0)
++
++extern void print_ub_uid(struct user_beancounter *ub, char *buf, int size);
++
++/*
++ * Resource charging
++ * Change user's account and compare against limits
++ */
++
++static inline void ub_adjust_maxheld(struct user_beancounter *ub, int resource)
++{
++ if (ub->ub_parms[resource].maxheld < ub->ub_parms[resource].held)
++ ub->ub_parms[resource].maxheld = ub->ub_parms[resource].held;
++ if (ub->ub_parms[resource].minheld > ub->ub_parms[resource].held)
++ ub->ub_parms[resource].minheld = ub->ub_parms[resource].held;
++}
++
++#endif /* CONFIG_USER_RESOURCE */
++
++#include <ub/ub_decl.h>
++UB_DECLARE_FUNC(int, charge_beancounter(struct user_beancounter *ub,
++ int resource, unsigned long val, enum severity strict));
++UB_DECLARE_VOID_FUNC(uncharge_beancounter(struct user_beancounter *ub,
++ int resource, unsigned long val));
++
++UB_DECLARE_VOID_FUNC(charge_beancounter_notop(struct user_beancounter *ub,
++ int resource, unsigned long val));
++UB_DECLARE_VOID_FUNC(uncharge_beancounter_notop(struct user_beancounter *ub,
++ int resource, unsigned long val));
++
++#ifndef CONFIG_USER_RESOURCE_PROC
++static inline void ub_init_proc(void) { };
++#else
++extern void ub_init_proc(void);
++#endif
++
++#ifdef CONFIG_USER_RSS_ACCOUNTING
++extern void ub_init_pbc(void);
++#else
++static inline void ub_ini_pbc(void) { }
++#endif
++#endif /* __KERNEL__ */
++#endif /* _LINUX_BEANCOUNTER_H */
+diff -uprN linux-2.6.15.orig/include/ub/ub_dcache.h linux-2.6.15-ve025stab014/include/ub/ub_dcache.h
+--- linux-2.6.15.orig/include/ub/ub_dcache.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_dcache.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,57 @@
++/*
++ * include/ub/ub_dcache.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DCACHE_H_
++#define __UB_DCACHE_H_
++
++#include <ub/ub_decl.h>
++
++/*
++ * UB_DCACHESIZE accounting
++ */
++
++struct dentry_beancounter
++{
++ /*
++ * d_inuse =
++ * <number of external refs> +
++ * <number of 'used' childs>
++ *
++ * d_inuse == -1 means that dentry is unused
++ * state change -1 => 0 causes charge
++ * state change 0 => -1 causes uncharge
++ */
++ atomic_t d_inuse;
++ /* charged size, including name length if name is not inline */
++ unsigned long d_ubsize;
++ struct user_beancounter *d_ub;
++};
++
++struct dentry;
++
++UB_DECLARE_FUNC(int, ub_dentry_alloc(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_charge_nofail(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_uncharge(struct dentry *d))
++
++#ifdef CONFIG_USER_RESOURCE
++UB_DECLARE_FUNC(int, ub_dentry_charge(struct dentry *d))
++#define ub_dget_testone(d) (atomic_inc_and_test(&(d)->dentry_bc.d_inuse))
++#define ub_dput_testzero(d) (atomic_add_negative(-1, &(d)->dentry_bc.d_inuse))
++#define INUSE_INIT 0
++#else
++#define ub_dentry_charge(d) ({ \
++ spin_unlock(&d->d_lock); \
++ rcu_read_unlock(); \
++ 0; \
++ })
++#define ub_dget_testone(d) (0)
++#define ub_dput_testzero(d) (0)
++#endif
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_debug.h linux-2.6.15-ve025stab014/include/ub/ub_debug.h
+--- linux-2.6.15.orig/include/ub/ub_debug.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_debug.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,95 @@
++/*
++ * include/ub/ub_debug.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DEBUG_H_
++#define __UB_DEBUG_H_
++
++/*
++ * general debugging
++ */
++
++#define UBD_ALLOC 0x1
++#define UBD_CHARGE 0x2
++#define UBD_LIMIT 0x4
++#define UBD_TRACE 0x8
++
++/*
++ * ub_net debugging
++ */
++
++#define UBD_NET_SOCKET 0x10
++#define UBD_NET_SLEEP 0x20
++#define UBD_NET_SEND 0x40
++#define UBD_NET_RECV 0x80
++
++/*
++ * Main routines
++ */
++
++#define UB_DEBUG (0)
++#define DEBUG_RESOURCE (0ULL)
++
++#define ub_dbg_cond(__cond, __str, args...) \
++ do { \
++ if ((__cond) != 0) \
++ printk(__str, ##args); \
++ } while(0)
++
++#define ub_debug(__section, __str, args...) \
++ ub_dbg_cond(UB_DEBUG & (__section), __str, ##args)
++
++#define ub_debug_resource(__resource, __str, args...) \
++ ub_dbg_cond((UB_DEBUG & UBD_CHARGE) && \
++ (DEBUG_RESOURCE & (1 << (__resource))), \
++ __str, ##args)
++
++#if UB_DEBUG & UBD_TRACE
++#define ub_debug_trace(__cond, __b, __r) \
++ do { \
++ static struct ub_rate_info ri = { __b, __r }; \
++ if ((__cond) != 0 && ub_ratelimit(&ri)) \
++ dump_stack(); \
++ } while(0)
++#else
++#define ub_debug_trace(__cond, __burst, __rate)
++#endif
++
++#include <linux/config.h>
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++#include <linux/list.h>
++#include <linux/kmem_cache.h>
++
++struct user_beancounter;
++struct ub_cache_counter {
++ struct list_head ulist;
++ struct ub_cache_counter *next;
++ struct user_beancounter *ub;
++ kmem_cache_t *cachep;
++ unsigned long counter;
++};
++
++extern spinlock_t cc_lock;
++extern void init_cache_counters(void);
++extern void ub_free_counters(struct user_beancounter *);
++extern void ub_kmemcache_free(kmem_cache_t *cachep);
++
++struct vm_struct;
++extern void inc_vmalloc_charged(struct vm_struct *, int);
++extern void dec_vmalloc_charged(struct vm_struct *);
++#else
++#define init_cache_counters() do { } while (0)
++#define inc_vmalloc_charged(vm, f) do { } while (0)
++#define dec_vmalloc_charged(vm) do { } while (0)
++#define ub_free_counters(ub) do { } while (0)
++#define ub_kmemcache_free(cachep) do { } while (0)
++#endif
++
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_decl.h linux-2.6.15-ve025stab014/include/ub/ub_decl.h
+--- linux-2.6.15.orig/include/ub/ub_decl.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_decl.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,40 @@
++/*
++ * include/ub/ub_decl.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DECL_H_
++#define __UB_DECL_H_
++
++#include <linux/config.h>
++
++/*
++ * Naming convension:
++ * ub_<section|object>_<operation>
++ */
++
++#ifdef CONFIG_USER_RESOURCE
++
++#define UB_DECLARE_FUNC(ret_type, decl) extern ret_type decl;
++#define UB_DECLARE_VOID_FUNC(decl) extern void decl;
++
++#else /* CONFIG_USER_RESOURCE */
++
++#define UB_DECLARE_FUNC(ret_type, decl) \
++ static inline ret_type decl \
++ { \
++ return (ret_type)0; \
++ }
++#define UB_DECLARE_VOID_FUNC(decl) \
++ static inline void decl \
++ { \
++ }
++
++#endif /* CONFIG_USER_RESOURCE */
++
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_hash.h linux-2.6.15-ve025stab014/include/ub/ub_hash.h
+--- linux-2.6.15.orig/include/ub/ub_hash.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_hash.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,41 @@
++/*
++ * include/ub/ub_hash.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_UBHASH_H
++#define _LINUX_UBHASH_H
++
++#ifdef __KERNEL__
++
++#define UB_HASH_SIZE 256
++
++struct ub_hash_slot {
++ struct user_beancounter *ubh_beans;
++};
++
++extern struct ub_hash_slot ub_hash[];
++extern spinlock_t ub_hash_lock;
++
++#ifdef CONFIG_USER_RESOURCE
++
++/*
++ * Iterate over beancounters
++ * @__slot - hash slot
++ * @__ubp - beancounter ptr
++ * Can use break :)
++ */
++#define for_each_beancounter(__slot, __ubp) \
++ for (__slot = 0, __ubp = NULL; \
++ __slot < UB_HASH_SIZE && __ubp == NULL; __slot++) \
++ for (__ubp = ub_hash[__slot].ubh_beans; __ubp; \
++ __ubp = __ubp->ub_next)
++
++#endif /* CONFIG_USER_RESOURCE */
++#endif /* __KERNEL__ */
++#endif /* _LINUX_UBHASH_H */
+diff -uprN linux-2.6.15.orig/include/ub/ub_mem.h linux-2.6.15-ve025stab014/include/ub/ub_mem.h
+--- linux-2.6.15.orig/include/ub/ub_mem.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_mem.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,82 @@
++/*
++ * include/ub/ub_mem.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_SLAB_H_
++#define __UB_SLAB_H_
++
++#include <linux/config.h>
++#include <linux/kmem_slab.h>
++#include <ub/beancounter.h>
++#include <ub/ub_decl.h>
++
++/*
++ * UB_KMEMSIZE accounting
++ */
++
++#ifdef CONFIG_UBC_DEBUG_ITEMS
++#define CHARGE_ORDER(__o) (1 << __o)
++#define CHARGE_SIZE(__s) 1
++#else
++#define CHARGE_ORDER(__o) (PAGE_SIZE << (__o))
++#define CHARGE_SIZE(__s) (__s)
++#endif
++
++#define page_ub(__page) ((__page)->bc.page_ub)
++
++struct mm_struct;
++struct page;
++
++UB_DECLARE_FUNC(struct user_beancounter *, slab_ub(void *obj))
++UB_DECLARE_FUNC(struct user_beancounter *, vmalloc_ub(void *obj))
++UB_DECLARE_FUNC(struct user_beancounter *, mem_ub(void *obj))
++
++UB_DECLARE_FUNC(int, ub_page_charge(struct page *page, int order, int mask))
++UB_DECLARE_VOID_FUNC(ub_page_uncharge(struct page *page, int order))
++UB_DECLARE_FUNC(int, ub_slab_charge(void *objp, int flags))
++UB_DECLARE_VOID_FUNC(ub_slab_uncharge(void *obj))
++
++#define slab_ubcs(cachep, slabp) ((struct user_beancounter **)\
++ (ALIGN((unsigned long)(slab_bufctl(slabp) + (cachep)->num),\
++ sizeof(void *))))
++
++#ifdef CONFIG_USER_RESOURCE
++/* Flags without __GFP_UBC must comply with vmalloc */
++#define ub_vmalloc(size) __vmalloc(size, \
++ GFP_KERNEL | __GFP_HIGHMEM | __GFP_UBC, PAGE_KERNEL)
++#define ub_kmalloc(size, flags) kmalloc(size, ((flags) | __GFP_UBC))
++extern struct user_beancounter *ub_select_worst(long *);
++
++/* mm/slab.c needed stuff */
++#define UB_ALIGN(flags) (flags & SLAB_UBC ? sizeof(void *) : 1)
++#define UB_EXTRA(flags) (flags & SLAB_UBC ? sizeof(void *) : 0)
++#define set_cache_objuse(cachep) do { \
++ (cachep)->objuse = ((PAGE_SIZE << (cachep)->gfporder) + \
++ (cachep)->num - 1) / (cachep)->num; \
++ if (!OFF_SLAB(cachep)) \
++ break; \
++ (cachep)->objuse += ((cachep)->slabp_cache->objuse + \
++ (cachep)->num - 1) / (cachep)->num; \
++ } while (0)
++#define init_slab_ubps(cachep, slabp) do { \
++ if (!((cachep)->flags & SLAB_UBC)) \
++ break; \
++ memset(slab_ubcs(cachep, slabp), 0, \
++ (cachep)->num * sizeof(void *)); \
++ } while (0)
++#define kmem_obj_memusage(o) (GET_PAGE_CACHE(virt_to_page(o))->objuse)
++#else
++#define ub_vmalloc(size) vmalloc(size)
++#define ub_kmalloc(size, flags) kmalloc(size, flags)
++#define UB_ALIGN(flags) 1
++#define UB_EXTRA(flags) 0
++#define set_cache_objuse(c) do { } while (0)
++#define init_slab_ubps(c, s) do { } while (0)
++#endif
++#endif /* __UB_SLAB_H_ */
+diff -uprN linux-2.6.15.orig/include/ub/ub_misc.h linux-2.6.15-ve025stab014/include/ub/ub_misc.h
+--- linux-2.6.15.orig/include/ub/ub_misc.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_misc.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,49 @@
++/*
++ * include/ub/ub_misc.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_MISC_H_
++#define __UB_MISC_H_
++
++#include <ub/ub_decl.h>
++
++struct tty_struct;
++struct file;
++struct file_lock;
++struct sigqueue;
++
++UB_DECLARE_FUNC(int, ub_file_charge(struct file *f))
++UB_DECLARE_VOID_FUNC(ub_file_uncharge(struct file *f))
++UB_DECLARE_FUNC(int, ub_flock_charge(struct file_lock *fl, int hard))
++UB_DECLARE_VOID_FUNC(ub_flock_uncharge(struct file_lock *fl))
++UB_DECLARE_FUNC(int, ub_siginfo_charge(struct sigqueue *q,
++ struct user_beancounter *ub))
++UB_DECLARE_VOID_FUNC(ub_siginfo_uncharge(struct sigqueue *q))
++UB_DECLARE_FUNC(int, ub_task_charge(struct task_struct *parent,
++ struct task_struct *task))
++UB_DECLARE_VOID_FUNC(ub_task_uncharge(struct task_struct *task))
++UB_DECLARE_FUNC(int, ub_pty_charge(struct tty_struct *tty))
++UB_DECLARE_VOID_FUNC(ub_pty_uncharge(struct tty_struct *tty))
++
++#ifdef CONFIG_USER_RESOURCE
++#define set_flock_charged(fl) do { (fl)->fl_charged = 1; } while (0)
++#define set_mm_ub(mm, tsk) do { \
++ (mm)->mm_ub = get_beancounter(tsk ? \
++ tsk->task_bc.task_ub : get_exec_ub()); \
++ } while (0)
++#define put_mm_ub(mm) do { \
++ put_beancounter((mm)->mm_ub); \
++ (mm)->mm_ub = NULL; \
++ } while (0)
++#else
++#define set_flock_charged(fl) do { } while (0)
++#define set_mm_ub(mm, tsk) do { } while (0)
++#define put_mm_ub(mm) do { } while (0)
++#endif
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_net.h linux-2.6.15-ve025stab014/include/ub/ub_net.h
+--- linux-2.6.15.orig/include/ub/ub_net.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_net.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,141 @@
++/*
++ * include/ub/ub_net.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_NET_H_
++#define __UB_NET_H_
++
++/*
++ * UB_NUMXXXSOCK, UB_XXXBUF accounting
++ */
++
++#include <ub/ub_decl.h>
++#include <ub/ub_sk.h>
++
++#define bid2sid(__bufid) \
++ ((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
++
++#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
++ ~(SMP_CACHE_BYTES-1)))
++#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
++
++
++#define IS_TCP_SOCK(__family, __type) \
++ ((__family) == PF_INET && (__type) == SOCK_STREAM)
++
++UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
++UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk))
++UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask))
++UB_DECLARE_VOID_FUNC(ub_skb_free_bc(struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
++UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_VOID_FUNC(ub_sock_snd_queue_add(struct sock *sk, int resource,
++ unsigned long size))
++UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo,
++ unsigned long size))
++
++UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge_forced(struct sock *sk,
++ struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge_forced(struct sock *sk,
++ struct sk_buff *skb))
++
++/* Charge size */
++static inline unsigned long skb_charge_datalen(unsigned long chargesize)
++{
++#ifdef CONFIG_USER_RESOURCE
++ unsigned long slabsize;
++
++ chargesize -= sizeof(struct sk_buff);
++ slabsize = 64;
++ do {
++ slabsize <<= 1;
++ } while (slabsize <= chargesize);
++
++ slabsize >>= 1;
++ return (slabsize - sizeof(struct skb_shared_info)) &
++ ~(SMP_CACHE_BYTES-1);
++#else
++ return 0;
++#endif
++}
++
++static inline unsigned long skb_charge_size_gen(unsigned long size)
++{
++#ifdef CONFIG_USER_RESOURCE
++ unsigned int slabsize;
++
++ size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
++ slabsize = 32; /* min size is 64 because of skb_shared_info */
++ do {
++ slabsize <<= 1;
++ } while (slabsize < size);
++
++ return slabsize + sizeof(struct sk_buff);
++#else
++ return 0;
++#endif
++
++}
++
++static inline unsigned long skb_charge_size_const(unsigned long size)
++{
++#ifdef CONFIG_USER_RESOURCE
++ unsigned int ret;
++ if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
++ ret = 64 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
++ ret = 128 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
++ ret = 256 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
++ ret = 512 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
++ ret = 1024 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
++ ret = 2048 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
++ ret = 4096 + sizeof(struct sk_buff);
++ else
++ ret = skb_charge_size_gen(size);
++ return ret;
++#else
++ return 0;
++#endif
++}
++
++
++#define skb_charge_size(__size) \
++ (__builtin_constant_p(__size) ? \
++ skb_charge_size_const(__size) : \
++ skb_charge_size_gen(__size))
++
++UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
++UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb,
++ struct sock *sk, unsigned long size, int res))
++
++/* Poll reserv */
++UB_DECLARE_FUNC(int, ub_sock_makewres_other(struct sock *sk, unsigned long sz))
++UB_DECLARE_FUNC(int, ub_sock_makewres_tcp(struct sock *sk, unsigned long size))
++UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk, unsigned long size))
++UB_DECLARE_FUNC(int, ub_sock_getwres_tcp(struct sock *sk, unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk, unsigned long size,
++ unsigned long ressize))
++UB_DECLARE_VOID_FUNC(ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
++ unsigned long ressize))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_other(struct sock *sk,
++ unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
++
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_oom.h linux-2.6.15-ve025stab014/include/ub/ub_oom.h
+--- linux-2.6.15.orig/include/ub/ub_oom.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_oom.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,45 @@
++#ifndef __UB_OOM_H_
++#define __UB_OOM_H_
++
++#include <ub/ub_decl.h>
++
++/* oom killer */
++UB_DECLARE_VOID_FUNC(ub_oom_init(void))
++UB_DECLARE_FUNC(int, ub_oom_start(void))
++UB_DECLARE_FUNC(struct user_beancounter *, ub_oom_select_worst(void))
++UB_DECLARE_VOID_FUNC(ub_oom_kill_task(struct task_struct *tsk,
++ struct mm_struct *mm))
++UB_DECLARE_VOID_FUNC(ub_oom_stop(void))
++UB_DECLARE_VOID_FUNC(ub_oom_task_exit(struct task_struct *tsk))
++
++#ifdef CONFIG_USER_RESOURCE
++#define ub_oom_task_match(t, ub) (((ub) == NULL) || \
++ ((t)->mm != NULL && (t)->mm->mm_ub == (ub)))
++#define ub_oom_panic(ub) ((ub) == NULL)
++#else
++#define ub_oom_task_match(t, ub) 1
++#define ub_oom_panic(ub) 1
++#endif
++
++/* vmscan stuff */
++struct scan_control;
++struct oom_freeing_stat {
++ unsigned long free;
++ unsigned long swap; /* page referrence counters removed */
++ unsigned long write; /* IO started */
++ unsigned long slab; /* slabs shrinked */
++};
++
++#ifdef CONFIG_USER_RESOURCE
++#define ub_oom_inc(sc, res, nr) do { (sc)->oom_stat.res += nr; } while (0)
++#define ub_oom_did_progress(sc) (nr_swap_pages && ( \
++ (sc)->oom_stat.slab || \
++ (sc)->oom_stat.swap || \
++ (sc)->oom_stat.write || \
++ (sc)->oom_stat.free \
++ ))
++#else
++#define ub_oom_inc(sc, res, nr) do { } while (0)
++#define ub_oom_did_progress(sc) 0
++#endif
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_orphan.h linux-2.6.15-ve025stab014/include/ub/ub_orphan.h
+--- linux-2.6.15.orig/include/ub/ub_orphan.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_orphan.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,56 @@
++/*
++ * include/ub/ub_orphan.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_ORPHAN_H_
++#define __UB_ORPHAN_H_
++
++#include <net/tcp.h>
++
++#include "ub/beancounter.h"
++#include "ub/ub_net.h"
++
++
++static inline atomic_t *__ub_get_orphan_count_ptr(struct sock *sk)
++{
++#ifdef CONFIG_USER_RESOURCE
++ if (sock_has_ubc(sk))
++ return &sock_bc(sk)->ub->ub_orphan_count;
++#endif
++ return sk->sk_prot->orphan_count;
++}
++
++static inline void ub_inc_orphan_count(struct sock *sk)
++{
++ atomic_inc(__ub_get_orphan_count_ptr(sk));
++}
++
++static inline void ub_dec_orphan_count(struct sock *sk)
++{
++ atomic_dec(__ub_get_orphan_count_ptr(sk));
++}
++
++static inline int ub_get_orphan_count(struct sock *sk)
++{
++ return atomic_read(__ub_get_orphan_count_ptr(sk));
++}
++
++extern int __ub_too_many_orphans(struct sock *sk, int count);
++static inline int ub_too_many_orphans(struct sock *sk, int count)
++{
++#ifdef CONFIG_USER_RESOURCE
++ if (__ub_too_many_orphans(sk, count))
++ return 1;
++#endif
++ return (ub_get_orphan_count(sk) > sysctl_tcp_max_orphans ||
++ (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
++ atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
++}
++
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_page.h linux-2.6.15-ve025stab014/include/ub/ub_page.h
+--- linux-2.6.15.orig/include/ub/ub_page.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_page.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,48 @@
++/*
++ * include/ub/ub_page.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_PAGE_H_
++#define __UB_PAGE_H_
++
++#include <linux/config.h>
++
++/*
++ * Page_beancounters
++ */
++
++struct page;
++struct user_beancounter;
++
++#define PB_MAGIC 0x62700001UL
++
++struct page_beancounter {
++ unsigned long pb_magic;
++ struct page *page;
++ struct user_beancounter *ub;
++ struct page_beancounter *next_hash;
++ unsigned refcount;
++ struct list_head page_list;
++};
++
++#define PB_REFCOUNT_BITS 24
++#define PB_SHIFT_GET(c) ((c) >> PB_REFCOUNT_BITS)
++#define PB_SHIFT_INC(c) ((c) += (1 << PB_REFCOUNT_BITS))
++#define PB_SHIFT_DEC(c) ((c) -= (1 << PB_REFCOUNT_BITS))
++#define PB_COUNT_GET(c) ((c) & ((1 << PB_REFCOUNT_BITS) - 1))
++#define PB_COUNT_INC(c) ((c)++)
++#define PB_COUNT_DEC(c) ((c)--)
++#define PB_REFCOUNT_MAKE(s, c) (((s) << PB_REFCOUNT_BITS) + (c))
++
++#define page_pbc(__page) ((__page)->bc.page_pb)
++
++struct address_space;
++extern int is_shmem_mapping(struct address_space *);
++
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_sk.h linux-2.6.15-ve025stab014/include/ub/ub_sk.h
+--- linux-2.6.15.orig/include/ub/ub_sk.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_sk.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,43 @@
++/*
++ * include/ub/ub_sk.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_SK_H_
++#define __UB_SK_H_
++
++#include <linux/config.h>
++#include <ub/ub_task.h>
++
++struct sock;
++struct sk_buff;
++
++struct skb_beancounter {
++ struct user_beancounter *ub;
++ unsigned long charged:27, resource:5;
++};
++
++struct sock_beancounter {
++ /*
++ * already charged for future sends, to make poll work;
++ * changes are protected by bc spinlock, read is under socket
++ * semaphore for sends and unprotected in poll
++ */
++ unsigned long poll_reserv;
++ unsigned long ub_waitspc; /* space waiting for */
++ unsigned long ub_wcharged;
++ struct list_head ub_sock_list;
++ struct user_beancounter *ub;
++};
++
++#define sock_bc(__sk) (&(__sk)->sk_bc)
++#define skb_bc(__skb) (&(__skb)->skb_bc)
++#define skbc_sock(__skbc) (container_of(__skbc, struct sock, sk_bc))
++#define sock_has_ubc(__sk) (sock_bc(__sk)->ub != NULL)
++
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_stat.h linux-2.6.15-ve025stab014/include/ub/ub_stat.h
+--- linux-2.6.15.orig/include/ub/ub_stat.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_stat.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,70 @@
++/*
++ * include/ub/ub_stat.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_STAT_H_
++#define __UB_STAT_H_
++
++/* sys_ubstat commands list */
++#define UBSTAT_READ_ONE 0x010000
++#define UBSTAT_READ_ALL 0x020000
++#define UBSTAT_READ_FULL 0x030000
++#define UBSTAT_UBLIST 0x040000
++#define UBSTAT_UBPARMNUM 0x050000
++#define UBSTAT_GETTIME 0x060000
++
++#define UBSTAT_CMD(func) ((func) & 0xF0000)
++#define UBSTAT_PARMID(func) ((func) & 0x0FFFF)
++
++#define TIME_MAX_SEC (LONG_MAX / HZ)
++#define TIME_MAX_JIF (TIME_MAX_SEC * HZ)
++
++typedef unsigned long ubstattime_t;
++
++typedef struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstattime_t cur_time;
++} ubgettime_t;
++
++typedef struct {
++ long maxinterval;
++ int signum;
++} ubnotifrq_t;
++
++typedef struct {
++ unsigned long maxheld;
++ unsigned long failcnt;
++} ubstatparm_t;
++
++typedef struct {
++ unsigned long barrier;
++ unsigned long limit;
++ unsigned long held;
++ unsigned long maxheld;
++ unsigned long minheld;
++ unsigned long failcnt;
++ unsigned long __unused1;
++ unsigned long __unused2;
++} ubstatparmf_t;
++
++typedef struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstatparmf_t param[0];
++} ubstatfull_t;
++
++#ifdef __KERNEL__
++struct ub_stat_notify {
++ struct list_head list;
++ struct task_struct *task;
++ int signum;
++};
++#endif
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_task.h linux-2.6.15-ve025stab014/include/ub/ub_task.h
+--- linux-2.6.15.orig/include/ub/ub_task.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_task.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,49 @@
++/*
++ * include/ub/ub_task.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_TASK_H_
++#define __UB_TASK_H_
++
++#include <linux/config.h>
++
++struct user_beancounter;
++
++
++#ifdef CONFIG_USER_RESOURCE
++
++struct task_beancounter {
++ struct user_beancounter *exec_ub;
++ struct user_beancounter *task_ub;
++ struct user_beancounter *fork_sub;
++ void *task_fnode, *task_freserv;
++ unsigned long oom_generation;
++ unsigned long task_data[4];
++};
++
++#define get_exec_ub() (current->task_bc.exec_ub)
++#define get_task_ub(__task) ((__task)->task_bc.task_ub)
++#define set_exec_ub(__newub) \
++({ \
++ struct user_beancounter *old; \
++ struct task_beancounter *tbc; \
++ tbc = &current->task_bc; \
++ old = tbc->exec_ub; \
++ tbc->exec_ub = __newub; \
++ old; \
++})
++
++#else /* CONFIG_USER_RESOURCE */
++
++#define get_exec_ub() (NULL)
++#define get_task_ub(task) (NULL)
++#define set_exec_ub(__ub) (NULL)
++
++#endif /* CONFIG_USER_RESOURCE */
++#endif /* __UB_TASK_H_ */
+diff -uprN linux-2.6.15.orig/include/ub/ub_tcp.h linux-2.6.15-ve025stab014/include/ub/ub_tcp.h
+--- linux-2.6.15.orig/include/ub/ub_tcp.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_tcp.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,79 @@
++/*
++ * include/ub/ub_tcp.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_TCP_H_
++#define __UB_TCP_H_
++
++/*
++ * UB_NUMXXXSOCK, UB_XXXBUF accounting
++ */
++
++#include <ub/ub_sk.h>
++#include <ub/beancounter.h>
++
++static inline void ub_tcp_update_maxadvmss(struct sock *sk)
++{
++#ifdef CONFIG_USER_RESOURCE
++ if (!sock_has_ubc(sk))
++ return;
++ if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
++ return;
++
++ sock_bc(sk)->ub->ub_maxadvmss =
++ skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
++ + sizeof(struct tcphdr) + tcp_sk(sk)->advmss);
++#endif
++}
++
++static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
++{
++ if (tcp_memory_pressure)
++ return 0;
++#ifdef CONFIG_USER_RESOURCE
++ if (sock_has_ubc(sk)) {
++ struct user_beancounter *ub;
++
++ ub = sock_bc(sk)->ub;
++ if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
++ return 1;
++ if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
++ return 0;
++ return sk->sk_rcvbuf <= ub->ub_rmem_thres;
++ }
++#endif
++ return 1;
++}
++
++static inline int ub_tcp_memory_pressure(struct sock *sk)
++{
++ if (tcp_memory_pressure)
++ return 1;
++#ifdef CONFIG_USER_RESOURCE
++ if (sock_has_ubc(sk))
++ return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
++#endif
++ return 0;
++}
++
++static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
++{
++ if (tcp_memory_pressure)
++ return 1;
++#ifdef CONFIG_USER_RESOURCE
++ if (sock_has_ubc(sk))
++ return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
++#endif
++ return 0;
++}
++
++UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
++
++#endif
+diff -uprN linux-2.6.15.orig/include/ub/ub_vmpages.h linux-2.6.15-ve025stab014/include/ub/ub_vmpages.h
+--- linux-2.6.15.orig/include/ub/ub_vmpages.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/include/ub/ub_vmpages.h 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,165 @@
++/*
++ * include/ub/ub_vmpages.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_PAGES_H_
++#define __UB_PAGES_H_
++
++#include <linux/linkage.h>
++#include <linux/config.h>
++#include <ub/beancounter.h>
++#include <ub/ub_decl.h>
++
++/*
++ * Check whether vma has private or copy-on-write mapping.
++ * Should match checks in ub_protected_charge().
++ */
++#define VM_UB_PRIVATE(__flags, __file) \
++ ( ((__flags) & VM_WRITE) ? \
++ (__file) == NULL || !((__flags) & VM_SHARED) : \
++ 0 \
++ )
++
++/* Mprotect charging result */
++#define PRIVVM_ERROR -1
++#define PRIVVM_NO_CHARGE 0 /* UB_DECLARE_FUNC retval with ubc off */
++#define PRIVVM_TO_PRIVATE 1
++#define PRIVVM_TO_SHARED 2
++
++UB_DECLARE_FUNC(int, ub_protected_charge(struct mm_struct *mm,
++ unsigned long size,
++ unsigned long newflags,
++ struct vm_area_struct *vma))
++
++UB_DECLARE_VOID_FUNC(ub_unused_privvm_add(struct mm_struct *mm,
++ struct vm_area_struct *vma,
++ unsigned long num))
++#define ub_unused_privvm_inc(mm, vma) ub_unused_privvm_add(mm, vma, 1)
++UB_DECLARE_VOID_FUNC(ub_unused_privvm_sub(struct mm_struct *mm,
++ struct vm_area_struct *vma,
++ unsigned long num))
++#define ub_unused_privvm_dec(mm, vma) ub_unused_privvm_sub(mm, vma, 1)
++
++UB_DECLARE_VOID_FUNC(__ub_unused_privvm_dec(struct mm_struct *mm,
++ long sz))
++
++UB_DECLARE_FUNC(int, ub_memory_charge(struct mm_struct *mm,
++ unsigned long size,
++ unsigned vm_flags,
++ struct file *vm_file,
++ int strict))
++UB_DECLARE_VOID_FUNC(ub_memory_uncharge(struct mm_struct *mm,
++ unsigned long size,
++ unsigned vm_flags,
++ struct file *vm_file))
++
++struct shmem_inode_info;
++UB_DECLARE_FUNC(int, ub_shmpages_charge(struct shmem_inode_info *i, long sz))
++UB_DECLARE_VOID_FUNC(ub_shmpages_uncharge(struct shmem_inode_info *i, long sz))
++UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_inc(struct shmem_inode_info *shi))
++UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
++ unsigned long size))
++#define ub_tmpfs_respages_dec(shi) ub_tmpfs_respages_sub(shi, 1)
++
++#ifdef CONFIG_USER_RESOURCE
++#define shmi_ub_set(shi, ub) do { \
++ (shi)->shmi_ub = get_beancounter(ub); \
++ } while (0)
++#define shmi_ub_put(shi) do { \
++ put_beancounter((shi)->shmi_ub); \
++ (shi)->shmi_ub = NULL; \
++ } while (0)
++#else
++#define shmi_ub_set(shi, ub) do { } while (0)
++#define shmi_ub_put(shi) do { } while (0)
++#endif
++
++UB_DECLARE_FUNC(int, ub_locked_charge(struct mm_struct *mm,
++ unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_locked_uncharge(struct mm_struct *mm,
++ unsigned long size))
++UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi,
++ unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi,
++ unsigned long size))
++
++UB_DECLARE_FUNC(unsigned long, pages_in_vma_range(struct vm_area_struct *vma,
++ unsigned long addr, unsigned long end))
++UB_DECLARE_VOID_FUNC(warn_bad_rss(struct vm_area_struct *vma,
++ unsigned long freed))
++#define pages_in_vma(vma) (pages_in_vma_range(vma, \
++ vma->vm_start, vma->vm_end))
++
++#define UB_PAGE_WEIGHT_SHIFT 24
++#define UB_PAGE_WEIGHT (1 << UB_PAGE_WEIGHT_SHIFT)
++
++struct page_beancounter;
++
++/* Mprotect charging result */
++#define PRIVVM_ERROR -1
++#define PRIVVM_NO_CHARGE 0
++#define PRIVVM_TO_PRIVATE 1
++#define PRIVVM_TO_SHARED 2
++
++extern void fastcall __ub_update_physpages(struct user_beancounter *ub);
++extern void fastcall __ub_update_oomguarpages(struct user_beancounter *ub);
++extern void fastcall __ub_update_privvm(struct user_beancounter *ub);
++
++#ifdef CONFIG_USER_RSS_ACCOUNTING
++#define PB_DECLARE_FUNC(ret, decl) UB_DECLARE_FUNC(ret, decl)
++#define PB_DECLARE_VOID_FUNC(decl) UB_DECLARE_VOID_FUNC(decl)
++#else
++#define PB_DECLARE_FUNC(ret, decl) static inline ret decl {return (ret)0;}
++#define PB_DECLARE_VOID_FUNC(decl) static inline void decl { }
++#endif
++
++PB_DECLARE_FUNC(int, __pb_alloc(struct page_beancounter **pbc, gfp_t mask))
++#define pb_alloc(pbp) (__pb_alloc(pbp, GFP_KERNEL))
++PB_DECLARE_FUNC(int, pb_alloc_list(struct page_beancounter **pbc, int num))
++PB_DECLARE_FUNC(int, pb_alloc_all(struct page_beancounter **pbc))
++PB_DECLARE_FUNC(int, pb_add_ref(struct page *page,
++ struct mm_struct *mm,
++ struct page_beancounter **pbc))
++PB_DECLARE_VOID_FUNC(pb_add_list_ref(struct page *page,
++ struct mm_struct *mm,
++ struct page_beancounter **pbc))
++PB_DECLARE_VOID_FUNC(pb_free_list(struct page_beancounter **pb))
++PB_DECLARE_VOID_FUNC(pb_free(struct page_beancounter **pb))
++PB_DECLARE_VOID_FUNC(pb_remove_ref(struct page *page,
++ struct mm_struct *mm))
++
++PB_DECLARE_FUNC(struct user_beancounter *, pb_grab_page_ub(struct page *page))
++#endif
++
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++#define SWP_DECLARE_FUNC(ret, decl) UB_DECLARE_FUNC(ret, decl)
++#define SWP_DECLARE_VOID_FUNC(decl) UB_DECLARE_VOID_FUNC(decl)
++#else
++#define SWP_DECLARE_FUNC(ret, decl) static inline ret decl {return (ret)0;}
++#define SWP_DECLARE_VOID_FUNC(decl) static inline void decl { }
++#endif
++
++struct swap_info_struct;
++SWP_DECLARE_FUNC(int, ub_swap_init(struct swap_info_struct *si, pgoff_t n))
++SWP_DECLARE_VOID_FUNC(ub_swap_fini(struct swap_info_struct *si))
++SWP_DECLARE_VOID_FUNC(ub_swapentry_inc(struct swap_info_struct *si, pgoff_t n,
++ struct user_beancounter *ub))
++SWP_DECLARE_VOID_FUNC(ub_swapentry_dec(struct swap_info_struct *si, pgoff_t n))
++
++#ifdef CONFIG_USER_RESOURCE
++#define ub_unmap_inc(mm) do { \
++ (mm)->mm_ub->ub_stat[smp_processor_id()].unmap++; \
++ } while (0)
++#define ub_swapin_inc(mm) do { \
++ (mm)->mm_ub->ub_stat[smp_processor_id()].swapin++; \
++ } while (0)
++#else
++#define ub_unmap_inc(mm) do { } while (0)
++#define ub_swapin_inc(mm) do { } while (0)
++#endif
+diff -uprN linux-2.6.15.orig/init/calibrate.c linux-2.6.15-ve025stab014/init/calibrate.c
+--- linux-2.6.15.orig/init/calibrate.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/init/calibrate.c 2006-01-27 14:48:08.000000000 +0300
+@@ -7,6 +7,7 @@
+ #include <linux/sched.h>
+ #include <linux/delay.h>
+ #include <linux/init.h>
++#include <linux/module.h>
+
+ #include <asm/timex.h>
+
+@@ -105,6 +106,60 @@ static unsigned long __devinit calibrate
+ static unsigned long __devinit calibrate_delay_direct(void) {return 0;}
+ #endif
+
++unsigned long cycles_per_jiffy, cycles_per_clock;
++
++static __devinit void calibrate_cycles(void)
++{
++ unsigned long ticks;
++ cycles_t time;
++
++ ticks = jiffies;
++ while (ticks == jiffies)
++ /* nothing */;
++ time = get_cycles();
++ ticks = jiffies;
++ while (ticks == jiffies)
++ /* nothing */;
++
++ time = get_cycles() - time;
++ cycles_per_jiffy = time;
++ if ((time >> 32) != 0) {
++ printk("CPU too fast! timings are incorrect\n");
++ cycles_per_jiffy = -1;
++ }
++}
++
++EXPORT_SYMBOL(cycles_per_jiffy);
++EXPORT_SYMBOL(cycles_per_clock);
++
++static __devinit void calc_cycles_per_jiffy(void)
++{
++#if defined(__i386__)
++ extern unsigned long fast_gettimeoffset_quotient;
++ unsigned long low, high;
++
++ if (fast_gettimeoffset_quotient != 0) {
++ __asm__("divl %2"
++ :"=a" (low), "=d" (high)
++ :"r" (fast_gettimeoffset_quotient),
++ "0" (0), "1" (1000000/HZ));
++
++ cycles_per_jiffy = low;
++ }
++#endif
++ if (cycles_per_jiffy == 0)
++ calibrate_cycles();
++
++ if (cycles_per_jiffy == 0) {
++ printk(KERN_WARNING "Cycles are stuck! "
++ "Some VPS statistics will not be available.");
++ /* to prevent division by zero in cycles_to_(clocks|jiffies) */
++ cycles_per_jiffy = 1;
++ cycles_per_clock = 1;
++ } else
++ cycles_per_clock = cycles_per_jiffy * (HZ / CLOCKS_PER_SEC);
++}
++
+ /*
+ * This is the number of bits of precision for the loops_per_jiffy. Each
+ * bit takes on average 1.5/HZ seconds. This (like the original) is a little
+@@ -170,4 +225,5 @@ void __devinit calibrate_delay(void)
+ loops_per_jiffy);
+ }
+
++ calc_cycles_per_jiffy();
+ }
+diff -uprN linux-2.6.15.orig/init/main.c linux-2.6.15-ve025stab014/init/main.c
+--- linux-2.6.15.orig/init/main.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/init/main.c 2006-01-27 14:48:08.000000000 +0300
+@@ -49,6 +49,8 @@
+ #include <linux/key.h>
+ #include <net/sock.h>
+
++#include <ub/beancounter.h>
++
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+ #include <asm/setup.h>
+@@ -108,6 +110,20 @@ extern void tc_init(void);
+ enum system_states system_state;
+ EXPORT_SYMBOL(system_state);
+
++#ifdef CONFIG_VE
++extern void init_ve_system(void);
++extern void prepare_ve0_process(struct task_struct *tsk);
++extern void prepare_ve0_proc_root(void);
++extern void prepare_ve0_sysctl(void);
++extern void prepare_ve0_loopback(void);
++#else
++#define init_ve_system() do { } while (0)
++#define prepare_ve0_process(tsk) do { } while (0)
++#define prepare_ve0_proc_root() do { } while (0)
++#define prepare_ve0_sysctl() do { } while (0)
++#define prepare_ve0_loopback() do { } while (0)
++#endif
++
+ /*
+ * Boot command-line arguments
+ */
+@@ -451,6 +467,10 @@ asmlinkage void __init start_kernel(void
+ * enable them
+ */
+ lock_kernel();
++ /*
++ * Prepare ub0 to account early allocations if any
++ */
++ ub_init_ub0();
+ page_address_init();
+ printk(KERN_NOTICE);
+ printk(linux_banner);
+@@ -463,6 +483,8 @@ asmlinkage void __init start_kernel(void
+ */
+ smp_prepare_boot_cpu();
+
++ prepare_ve0_process(&init_task);
++
+ /*
+ * Set up the scheduler prior starting any interrupts (such as the
+ * timer interrupt). Full topology setup happens at smp_init()
+@@ -526,6 +548,7 @@ asmlinkage void __init start_kernel(void
+ #endif
+ fork_init(num_physpages);
+ proc_caches_init();
++ ub_init_cache(num_physpages);
+ buffer_init();
+ unnamed_dev_init();
+ key_init();
+@@ -536,7 +559,10 @@ asmlinkage void __init start_kernel(void
+ /* rootfs populating might need page-writeback */
+ page_writeback_init();
+ #ifdef CONFIG_PROC_FS
++ prepare_ve0_proc_root();
++ prepare_ve0_sysctl();
+ proc_root_init();
++ ub_init_proc();
+ #endif
+ cpuset_init();
+
+@@ -544,6 +570,10 @@ asmlinkage void __init start_kernel(void
+
+ acpi_early_init(); /* before LAPIC and SMP init */
+
++#ifdef CONFIG_USER_RESOURCE
++ ub_init_pbc();
++#endif
++
+ /* Do the rest non-__init'ed, we're now alive */
+ rest_init();
+ }
+@@ -605,6 +635,9 @@ static void __init do_initcalls(void)
+ */
+ static void __init do_basic_setup(void)
+ {
++ prepare_ve0_loopback();
++ init_ve_system();
++
+ /* drivers will send hotplug events */
+ init_workqueues();
+ usermodehelper_init();
+diff -uprN linux-2.6.15.orig/ipc/mqueue.c linux-2.6.15-ve025stab014/ipc/mqueue.c
+--- linux-2.6.15.orig/ipc/mqueue.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/ipc/mqueue.c 2006-01-27 14:48:05.000000000 +0300
+@@ -1010,7 +1010,8 @@ retry:
+ goto out;
+ }
+
+- ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
++ ret = netlink_attachskb(sock, nc, 0,
++ MAX_SCHEDULE_TIMEOUT, NULL);
+ if (ret == 1)
+ goto retry;
+ if (ret) {
+diff -uprN linux-2.6.15.orig/ipc/msg.c linux-2.6.15-ve025stab014/ipc/msg.c
+--- linux-2.6.15.orig/ipc/msg.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/ipc/msg.c 2006-01-27 14:48:08.000000000 +0300
+@@ -87,6 +87,45 @@ void __init msg_init (void)
+ sysvipc_msg_proc_show);
+ }
+
++#ifdef CONFIG_VE
++void __init prepare_msg(void)
++{
++ get_ve0()->_msg_ids = &msg_ids;
++ get_ve0()->_msg_ctlmax = msg_ctlmax;
++ get_ve0()->_msg_ctlmnb = msg_ctlmnb;
++ get_ve0()->_msg_ctlmni = msg_ctlmni;
++}
++
++#define msg_ids (*(get_exec_env()->_msg_ids))
++#define msg_ctlmax (get_exec_env()->_msg_ctlmax)
++#define msg_ctlmnb (get_exec_env()->_msg_ctlmnb)
++#define msg_ctlmni (get_exec_env()->_msg_ctlmni)
++
++void init_ve_ipc_msg(void)
++{
++ msg_ctlmax = MSGMAX;
++ msg_ctlmnb = MSGMNB;
++ msg_ctlmni = MSGMNI;
++ ipc_init_ids(&msg_ids, MSGMNI);
++}
++
++void cleanup_ve_ipc_msg(void)
++{
++ int i;
++ struct msg_queue *msq;
++
++ down(&msg_ids.sem);
++ for (i = 0; i <= msg_ids.max_id; i++) {
++ msq = msg_lock(i);
++ if (msq == NULL)
++ continue;
++
++ freeque(msq, i);
++ }
++ up(&msg_ids.sem);
++}
++#endif
++
+ static int newque (key_t key, int msgflg)
+ {
+ int id;
+@@ -449,7 +488,7 @@ asmlinkage long sys_msgctl (int msqid, i
+ ipcp = &msq->q_perm;
+ err = -EPERM;
+ if (current->euid != ipcp->cuid &&
+- current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
++ current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN))
+ /* We _could_ check for CAP_CHOWN above, but we don't */
+ goto out_unlock_up;
+
+@@ -539,7 +578,7 @@ static inline int pipelined_send(struct
+ msr->r_msg = ERR_PTR(-E2BIG);
+ } else {
+ msr->r_msg = NULL;
+- msq->q_lrpid = msr->r_tsk->pid;
++ msq->q_lrpid = virt_pid(msr->r_tsk);
+ msq->q_rtime = get_seconds();
+ wake_up_process(msr->r_tsk);
+ smp_mb();
+@@ -621,7 +660,7 @@ asmlinkage long sys_msgsnd (int msqid, s
+ }
+ }
+
+- msq->q_lspid = current->tgid;
++ msq->q_lspid = virt_tgid(current);
+ msq->q_stime = get_seconds();
+
+ if(!pipelined_send(msq,msg)) {
+@@ -717,7 +756,7 @@ asmlinkage long sys_msgrcv (int msqid, s
+ list_del(&msg->m_list);
+ msq->q_qnum--;
+ msq->q_rtime = get_seconds();
+- msq->q_lrpid = current->tgid;
++ msq->q_lrpid = virt_tgid(current);
+ msq->q_cbytes -= msg->m_ts;
+ atomic_sub(msg->m_ts,&msg_bytes);
+ atomic_dec(&msg_hdrs);
+diff -uprN linux-2.6.15.orig/ipc/msgutil.c linux-2.6.15-ve025stab014/ipc/msgutil.c
+--- linux-2.6.15.orig/ipc/msgutil.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/ipc/msgutil.c 2006-01-27 14:48:06.000000000 +0300
+@@ -17,6 +17,8 @@
+
+ #include "util.h"
+
++#include <ub/ub_mem.h>
++
+ struct msg_msgseg {
+ struct msg_msgseg* next;
+ /* the next part of the message follows immediately */
+@@ -36,7 +38,7 @@ struct msg_msg *load_msg(const void __us
+ if (alen > DATALEN_MSG)
+ alen = DATALEN_MSG;
+
+- msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
++ msg = (struct msg_msg *)ub_kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
+ if (msg == NULL)
+ return ERR_PTR(-ENOMEM);
+
+@@ -56,7 +58,7 @@ struct msg_msg *load_msg(const void __us
+ alen = len;
+ if (alen > DATALEN_SEG)
+ alen = DATALEN_SEG;
+- seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
++ seg = (struct msg_msgseg *)ub_kmalloc(sizeof(*seg) + alen,
+ GFP_KERNEL);
+ if (seg == NULL) {
+ err = -ENOMEM;
+diff -uprN linux-2.6.15.orig/ipc/sem.c linux-2.6.15-ve025stab014/ipc/sem.c
+--- linux-2.6.15.orig/ipc/sem.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/ipc/sem.c 2006-01-27 14:48:08.000000000 +0300
+@@ -77,6 +77,7 @@
+ #include <asm/uaccess.h>
+ #include "util.h"
+
++#include <ub/ub_mem.h>
+
+ #define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id))
+ #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
+@@ -123,6 +124,48 @@ void __init sem_init (void)
+ sysvipc_sem_proc_show);
+ }
+
++#ifdef CONFIG_VE
++void __init prepare_sem(void)
++{
++ get_ve0()->_sem_ids = &sem_ids;
++ get_ve0()->_used_sems = used_sems;
++ get_ve0()->_sem_ctls[0] = sem_ctls[0];
++ get_ve0()->_sem_ctls[1] = sem_ctls[1];
++ get_ve0()->_sem_ctls[2] = sem_ctls[2];
++ get_ve0()->_sem_ctls[3] = sem_ctls[3];
++}
++
++#define sem_ids (*(get_exec_env()->_sem_ids))
++#define used_sems (get_exec_env()->_used_sems)
++#define sem_ctls (get_exec_env()->_sem_ctls)
++
++void init_ve_ipc_sem(void)
++{
++ used_sems = 0;
++ sem_ctls[0] = SEMMSL;
++ sem_ctls[1] = SEMMNS;
++ sem_ctls[2] = SEMOPM;
++ sem_ctls[3] = SEMMNI;
++ ipc_init_ids(&sem_ids, SEMMNI);
++}
++
++void cleanup_ve_ipc_sem(void)
++{
++ int i;
++ struct sem_array *sma;
++
++ down(&sem_ids.sem);
++ for (i = 0; i <= sem_ids.max_id; i++) {
++ sma = sem_lock(i);
++ if (sma == NULL)
++ continue;
++
++ freeary(sma, i);
++ }
++ up(&sem_ids.sem);
++}
++#endif
++
+ /*
+ * Lockless wakeup algorithm:
+ * Without the check/retry algorithm a lockless wakeup is possible:
+@@ -742,7 +785,7 @@ static int semctl_main(int semid, int se
+ for (un = sma->undo; un; un = un->id_next)
+ un->semadj[semnum] = 0;
+ curr->semval = val;
+- curr->sempid = current->tgid;
++ curr->sempid = virt_tgid(current);
+ sma->sem_ctime = get_seconds();
+ /* maybe some queued-up processes were waiting for this */
+ update_queue(sma);
+@@ -822,7 +865,7 @@ static int semctl_down(int semid, int se
+ ipcp = &sma->sem_perm;
+
+ if (current->euid != ipcp->cuid &&
+- current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
++ current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN)) {
+ err=-EPERM;
+ goto out_unlock;
+ }
+@@ -943,7 +986,8 @@ static inline int get_undo_list(struct s
+ undo_list = current->sysvsem.undo_list;
+ if (!undo_list) {
+ size = sizeof(struct sem_undo_list);
+- undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
++ undo_list = (struct sem_undo_list *) ub_kmalloc(size,
++ GFP_KERNEL);
+ if (undo_list == NULL)
+ return -ENOMEM;
+ memset(undo_list, 0, size);
+@@ -1007,7 +1051,8 @@ static struct sem_undo *find_undo(int se
+ ipc_rcu_getref(sma);
+ sem_unlock(sma);
+
+- new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
++ new = (struct sem_undo *) ub_kmalloc(sizeof(struct sem_undo) +
++ sizeof(short)*nsems, GFP_KERNEL);
+ if (!new) {
+ ipc_lock_by_ptr(&sma->sem_perm);
+ ipc_rcu_putref(sma);
+@@ -1065,7 +1110,7 @@ asmlinkage long sys_semtimedop(int semid
+ if (nsops > sc_semopm)
+ return -E2BIG;
+ if(nsops > SEMOPM_FAST) {
+- sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
++ sops = ub_kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
+ if(sops==NULL)
+ return -ENOMEM;
+ }
+@@ -1149,7 +1194,7 @@ retry_undos:
+ queue.sops = sops;
+ queue.nsops = nsops;
+ queue.undo = un;
+- queue.pid = current->tgid;
++ queue.pid = virt_tgid(current);
+ queue.id = semid;
+ queue.alter = alter;
+ if (alter)
+@@ -1319,7 +1364,7 @@ found:
+ sem->semval = 0;
+ if (sem->semval > SEMVMX)
+ sem->semval = SEMVMX;
+- sem->sempid = current->tgid;
++ sem->sempid = virt_tgid(current);
+ }
+ }
+ sma->sem_otime = get_seconds();
+diff -uprN linux-2.6.15.orig/ipc/shm.c linux-2.6.15-ve025stab014/ipc/shm.c
+--- linux-2.6.15.orig/ipc/shm.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/ipc/shm.c 2006-01-27 14:48:08.000000000 +0300
+@@ -29,9 +29,13 @@
+ #include <linux/audit.h>
+ #include <linux/ptrace.h>
+ #include <linux/seq_file.h>
++#include <linux/shmem_fs.h>
+
+ #include <asm/uaccess.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
+ #include "util.h"
+
+ #define shm_flags shm_perm.mode
+@@ -50,6 +54,7 @@ static struct ipc_ids shm_ids;
+ static int newseg (key_t key, int shmflg, size_t size);
+ static void shm_open (struct vm_area_struct *shmd);
+ static void shm_close (struct vm_area_struct *shmd);
++static void shm_destroy (struct shmid_kernel *shmd);
+ #ifdef CONFIG_PROC_FS
+ static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
+ #endif
+@@ -69,6 +74,62 @@ void __init shm_init (void)
+ sysvipc_shm_proc_show);
+ }
+
++#ifdef CONFIG_VE
++void __init prepare_shm(void)
++{
++ get_ve0()->_shm_ids = &shm_ids;
++ get_ve0()->_shm_ctlmax = shm_ctlmax;
++ get_ve0()->_shm_ctlall = shm_ctlall;
++ get_ve0()->_shm_ctlmni = shm_ctlmni;
++ get_ve0()->_shm_tot = shm_tot;
++}
++
++#define shm_ids (*(get_exec_env()->_shm_ids))
++#define shm_ctlmax (get_exec_env()->_shm_ctlmax)
++#define shm_ctlall (get_exec_env()->_shm_ctlall)
++#define shm_ctlmni (get_exec_env()->_shm_ctlmni)
++#define shm_total (get_exec_env()->_shm_tot)
++
++void init_ve_ipc_shm(void)
++{
++ shm_ctlmax = SHMMAX;
++ shm_ctlall = SHMALL;
++ shm_ctlmni = SHMMNI;
++ shm_total = 0;
++ ipc_init_ids(&shm_ids, 1);
++}
++
++void cleanup_ve_ipc_shm(void)
++{
++ int i;
++ struct shmid_kernel *shp;
++
++ down(&shm_ids.sem);
++ for (i = 0; i <= shm_ids.max_id; i++) {
++ shp = shm_lock(i);
++ if (shp == NULL)
++ continue;
++
++ if (shp->shm_nattch) {
++ shp->shm_flags |= SHM_DEST;
++ shp->shm_perm.key = IPC_PRIVATE;
++ shm_unlock(shp);
++ } else
++ shm_destroy(shp);
++ }
++ up(&shm_ids.sem);
++}
++#define sb_ve(sb) VE_OWNER_FSTYPE(sb->s_type)
++#define shm_total_sb(sb) (&sb_ve(sb)->_shm_tot)
++#define shm_lock_sb(id, sb) ((struct shmid_kernel *) \
++ ipc_lock(sb_ve(sb)->_shm_ids, id))
++#else
++/* renamed since there is a struct field named shm_tot */
++#define shm_total shm_tot
++#define shm_total_sb(sb) (&shm_tot)
++#define shm_lock_sb(id, sb) shm_lock(id)
++#endif
++
+ static inline int shm_checkid(struct shmid_kernel *s, int id)
+ {
+ if (ipc_checkid(&shm_ids,&s->shm_perm,id))
+@@ -76,9 +137,9 @@ static inline int shm_checkid(struct shm
+ return 0;
+ }
+
+-static inline struct shmid_kernel *shm_rmid(int id)
++static inline struct shmid_kernel *shm_rmid(struct ipc_ids *ids, int id)
+ {
+- return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
++ return (struct shmid_kernel *)ipc_rmid(ids,id);
+ }
+
+ static inline int shm_addid(struct shmid_kernel *shp)
+@@ -88,13 +149,13 @@ static inline int shm_addid(struct shmid
+
+
+
+-static inline void shm_inc (int id) {
++static inline void shm_inc(int id, struct super_block *sb) {
+ struct shmid_kernel *shp;
+
+- if(!(shp = shm_lock(id)))
++ if(!(shp = shm_lock_sb(id, sb)))
+ BUG();
+ shp->shm_atim = get_seconds();
+- shp->shm_lprid = current->tgid;
++ shp->shm_lprid = virt_tgid(current);
+ shp->shm_nattch++;
+ shm_unlock(shp);
+ }
+@@ -102,7 +163,50 @@ static inline void shm_inc (int id) {
+ /* This is called by fork, once for every shm attach. */
+ static void shm_open (struct vm_area_struct *shmd)
+ {
+- shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
++ shm_inc(shmd->vm_file->f_dentry->d_inode->i_ino,
++ shmd->vm_file->f_dentry->d_inode->i_sb);
++}
++
++static int shmem_lock(struct shmid_kernel *shp, int lock,
++ struct user_struct *user)
++{
++ struct file *file = shp->shm_file;
++ struct inode *inode = file->f_dentry->d_inode;
++ struct shmem_inode_info *info = SHMEM_I(inode);
++ unsigned long size;
++
++ size = shp->shm_segsz + PAGE_SIZE - 1;
++
++#ifdef CONFIG_SHMEM
++ spin_lock(&info->lock);
++ if (lock && !(info->flags & VM_LOCKED)) {
++ if (ub_lockedshm_charge(info, size) < 0)
++ goto out_ch;
++
++ if (!user_shm_lock(inode->i_size, user))
++ goto out_user;
++ info->flags |= VM_LOCKED;
++ }
++ if (!lock && (info->flags & VM_LOCKED) && user) {
++ ub_lockedshm_uncharge(info, size);
++ user_shm_unlock(inode->i_size, user);
++ info->flags &= ~VM_LOCKED;
++ }
++ spin_unlock(&info->lock);
++ return 0;
++
++out_user:
++ ub_lockedshm_uncharge(info, size);
++out_ch:
++ spin_unlock(&info->lock);
++ return -ENOMEM;
++#else
++ if (lock && ub_lockedshm_charge(info, size))
++ return -ENOMEM;
++ if (!lock)
++ ub_lockedshm_uncharge(info, size);
++ return 0;
++#endif
+ }
+
+ /*
+@@ -115,15 +219,24 @@ static void shm_open (struct vm_area_str
+ */
+ static void shm_destroy (struct shmid_kernel *shp)
+ {
+- shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- shm_rmid (shp->id);
++ int numpages, *shm_totalp;
++ struct file *f;
++ struct super_block *sb;
++
++ f = shp->shm_file;
++ sb = f->f_dentry->d_inode->i_sb;
++ numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
++ shm_totalp = shm_total_sb(sb);
++ *shm_totalp -= numpages;
++
++ shm_rmid (shp->_shm_ids, shp->id);
+ shm_unlock(shp);
+ if (!is_file_hugepages(shp->shm_file))
+- shmem_lock(shp->shm_file, 0, shp->mlock_user);
++ shmem_lock(shp, 0, shp->mlock_user);
+ else
+ user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
+ shp->mlock_user);
+- fput (shp->shm_file);
++ fput(f);
+ security_shm_free(shp);
+ ipc_rcu_putref(shp);
+ }
+@@ -139,12 +252,24 @@ static void shm_close (struct vm_area_st
+ struct file * file = shmd->vm_file;
+ int id = file->f_dentry->d_inode->i_ino;
+ struct shmid_kernel *shp;
++ struct super_block *sb;
++ struct ipc_ids *ids;
++#ifdef CONFIG_VE
++ struct ve_struct *ve;
++
++ sb = file->f_dentry->d_inode->i_sb;
++ ve = get_ve(sb_ve(sb));
++ ids = ve->_shm_ids;
++#else
++ sb = file->f_dentry->d_inode->i_sb;
++ ids = &shm_ids;
++#endif
+
+- down (&shm_ids.sem);
++ down (&ids->sem);
+ /* remove from the list of attaches of the shm segment */
+- if(!(shp = shm_lock(id)))
++ if(!(shp = shm_lock_sb(id, sb)))
+ BUG();
+- shp->shm_lprid = current->tgid;
++ shp->shm_lprid = virt_tgid(current);
+ shp->shm_dtim = get_seconds();
+ shp->shm_nattch--;
+ if(shp->shm_nattch == 0 &&
+@@ -152,14 +277,18 @@ static void shm_close (struct vm_area_st
+ shm_destroy (shp);
+ else
+ shm_unlock(shp);
+- up (&shm_ids.sem);
++ up(&ids->sem);
++#ifdef CONFIG_VE
++ put_ve(ve);
++#endif
+ }
+
+ static int shm_mmap(struct file * file, struct vm_area_struct * vma)
+ {
+ file_accessed(file);
+ vma->vm_ops = &shm_vm_ops;
+- shm_inc(file->f_dentry->d_inode->i_ino);
++ shm_inc(file->f_dentry->d_inode->i_ino,
++ file->f_dentry->d_inode->i_sb);
+ return 0;
+ }
+
+@@ -183,13 +312,13 @@ static int newseg (key_t key, int shmflg
+ struct shmid_kernel *shp;
+ int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ struct file * file;
+- char name[13];
++ char name[26];
+ int id;
+
+ if (size < SHMMIN || size > shm_ctlmax)
+ return -EINVAL;
+
+- if (shm_tot + numpages >= shm_ctlall)
++ if (shm_total + numpages >= shm_ctlall)
+ return -ENOSPC;
+
+ shp = ipc_rcu_alloc(sizeof(*shp));
+@@ -220,7 +349,11 @@ static int newseg (key_t key, int shmflg
+ if ((shmflg & SHM_NORESERVE) &&
+ sysctl_overcommit_memory != OVERCOMMIT_NEVER)
+ acctflag = 0;
++#ifdef CONFIG_VE
++ sprintf (name, "VE%d.SYSV%08x", get_exec_env()->veid, key);
++#else
+ sprintf (name, "SYSV%08x", key);
++#endif
+ file = shmem_file_setup(name, size, acctflag);
+ }
+ error = PTR_ERR(file);
+@@ -232,13 +365,14 @@ static int newseg (key_t key, int shmflg
+ if(id == -1)
+ goto no_id;
+
+- shp->shm_cprid = current->tgid;
++ shp->shm_cprid = virt_tgid(current);
+ shp->shm_lprid = 0;
+ shp->shm_atim = shp->shm_dtim = 0;
+ shp->shm_ctim = get_seconds();
+ shp->shm_segsz = size;
+ shp->shm_nattch = 0;
+ shp->id = shm_buildid(id,shp->shm_perm.seq);
++ shp->_shm_ids = &shm_ids;
+ shp->shm_file = file;
+ file->f_dentry->d_inode->i_ino = shp->id;
+
+@@ -246,7 +380,7 @@ static int newseg (key_t key, int shmflg
+ if (!(shmflg & SHM_HUGETLB))
+ file->f_op = &shm_file_operations;
+
+- shm_tot += numpages;
++ shm_total += numpages;
+ shm_unlock(shp);
+ return shp->id;
+
+@@ -463,7 +597,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ down(&shm_ids.sem);
+ shm_info.used_ids = shm_ids.in_use;
+ shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
+- shm_info.shm_tot = shm_tot;
++ shm_info.shm_tot = shm_total;
+ shm_info.swap_attempts = 0;
+ shm_info.swap_successes = 0;
+ err = shm_ids.max_id;
+@@ -550,14 +684,14 @@ asmlinkage long sys_shmctl (int shmid, i
+ if(cmd==SHM_LOCK) {
+ struct user_struct * user = current->user;
+ if (!is_file_hugepages(shp->shm_file)) {
+- err = shmem_lock(shp->shm_file, 1, user);
++ err = shmem_lock(shp, 1, user);
+ if (!err) {
+ shp->shm_flags |= SHM_LOCKED;
+ shp->mlock_user = user;
+ }
+ }
+ } else if (!is_file_hugepages(shp->shm_file)) {
+- shmem_lock(shp->shm_file, 0, shp->mlock_user);
++ shmem_lock(shp, 0, shp->mlock_user);
+ shp->shm_flags &= ~SHM_LOCKED;
+ shp->mlock_user = NULL;
+ }
+@@ -587,7 +721,7 @@ asmlinkage long sys_shmctl (int shmid, i
+
+ if (current->euid != shp->shm_perm.uid &&
+ current->euid != shp->shm_perm.cuid &&
+- !capable(CAP_SYS_ADMIN)) {
++ !capable(CAP_VE_SYS_ADMIN)) {
+ err=-EPERM;
+ goto out_unlock_up;
+ }
+@@ -626,7 +760,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ err=-EPERM;
+ if (current->euid != shp->shm_perm.uid &&
+ current->euid != shp->shm_perm.cuid &&
+- !capable(CAP_SYS_ADMIN)) {
++ !capable(CAP_VE_SYS_ADMIN)) {
+ goto out_unlock_up;
+ }
+
+diff -uprN linux-2.6.15.orig/ipc/util.c linux-2.6.15-ve025stab014/ipc/util.c
+--- linux-2.6.15.orig/ipc/util.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/ipc/util.c 2006-01-27 14:48:08.000000000 +0300
+@@ -13,6 +13,7 @@
+ */
+
+ #include <linux/config.h>
++#include <linux/module.h>
+ #include <linux/mm.h>
+ #include <linux/shm.h>
+ #include <linux/init.h>
+@@ -29,6 +30,8 @@
+
+ #include <asm/unistd.h>
+
++#include <ub/ub_mem.h>
++
+ #include "util.h"
+
+ struct ipc_proc_iface {
+@@ -64,7 +67,7 @@ __initcall(ipc_init);
+ * array itself.
+ */
+
+-void __init ipc_init_ids(struct ipc_ids* ids, int size)
++void __ve_init ipc_init_ids(struct ipc_ids* ids, int size)
+ {
+ int i;
+ sema_init(&ids->sem,1);
+@@ -93,6 +96,8 @@ void __init ipc_init_ids(struct ipc_ids*
+ ids->entries->size = size;
+ for(i=0;i<size;i++)
+ ids->entries->p[i] = NULL;
++
++ ids->owner_env = get_exec_env();
+ }
+
+ #ifdef CONFIG_PROC_FS
+@@ -228,7 +233,8 @@ int ipc_addid(struct ipc_ids* ids, struc
+ }
+ return -1;
+ found:
+- ids->in_use++;
++ if (ids->in_use++ == 0)
++ (void)get_ve(ids->owner_env);
+ if (id > ids->max_id)
+ ids->max_id = id;
+
+@@ -275,7 +281,8 @@ struct kern_ipc_perm* ipc_rmid(struct ip
+ ids->entries->p[lid] = NULL;
+ if(p==NULL)
+ BUG();
+- ids->in_use--;
++ if (--ids->in_use == 0)
++ put_ve(ids->owner_env);
+
+ if (lid == ids->max_id) {
+ do {
+@@ -301,9 +308,9 @@ void* ipc_alloc(int size)
+ {
+ void* out;
+ if(size > PAGE_SIZE)
+- out = vmalloc(size);
++ out = ub_vmalloc(size);
+ else
+- out = kmalloc(size, GFP_KERNEL);
++ out = ub_kmalloc(size, GFP_KERNEL);
+ return out;
+ }
+
+@@ -386,14 +393,14 @@ void* ipc_rcu_alloc(int size)
+ * workqueue if necessary (for vmalloc).
+ */
+ if (rcu_use_vmalloc(size)) {
+- out = vmalloc(HDRLEN_VMALLOC + size);
++ out = ub_vmalloc(HDRLEN_VMALLOC + size);
+ if (out) {
+ out += HDRLEN_VMALLOC;
+ container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
+ container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
+ }
+ } else {
+- out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
++ out = ub_kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
+ if (out) {
+ out += HDRLEN_KMALLOC;
+ container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
+@@ -602,6 +609,80 @@ int ipc_checkid(struct ipc_ids* ids, str
+ return 0;
+ }
+
++#ifdef CONFIG_VE
++void __init prepare_ipc(void)
++{
++ prepare_msg();
++ prepare_sem();
++ prepare_shm();
++}
++
++int init_ve_ipc(struct ve_struct * envid)
++{
++ envid->_msg_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++ GFP_KERNEL);
++ if (envid->_msg_ids == NULL)
++ goto out_nomem;
++ envid->_sem_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++ GFP_KERNEL);
++ if (envid->_sem_ids == NULL)
++ goto out_free_msg;
++ envid->_shm_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++ GFP_KERNEL);
++ if (envid->_shm_ids == NULL)
++ goto out_free_sem;
++
++ init_ve_ipc_msg();
++ init_ve_ipc_sem();
++ init_ve_ipc_shm();
++ return 0;
++
++out_free_sem:
++ kfree(envid->_sem_ids);
++out_free_msg:
++ kfree(envid->_msg_ids);
++out_nomem:
++ return -ENOMEM;
++}
++
++void ve_ipc_cleanup(void)
++{
++ cleanup_ve_ipc_msg();
++ cleanup_ve_ipc_sem();
++ cleanup_ve_ipc_shm();
++}
++
++void ve_ipc_free(struct ve_struct *envid)
++{
++ if (envid->_msg_ids) {
++ ipc_rcu_putref(envid->_msg_ids->entries);
++ kfree(envid->_msg_ids);
++ envid->_msg_ids = NULL;
++ }
++ if (envid->_sem_ids) {
++ ipc_rcu_putref(envid->_sem_ids->entries);
++ kfree(envid->_sem_ids);
++ envid->_sem_ids = NULL;
++ }
++ if (envid->_shm_ids) {
++ ipc_rcu_putref(envid->_shm_ids->entries);
++ kfree(envid->_shm_ids);
++ envid->_shm_ids = NULL;
++ }
++}
++
++void fini_ve_ipc(struct ve_struct *ptr)
++{
++ ve_ipc_cleanup();
++ ve_ipc_free(ptr);
++}
++
++EXPORT_SYMBOL(init_ve_ipc);
++EXPORT_SYMBOL(ve_ipc_cleanup);
++EXPORT_SYMBOL(ve_ipc_free);
++EXPORT_SYMBOL(fini_ve_ipc);
++#endif /* CONFIG_VE */
++
+ #ifdef __ARCH_WANT_IPC_PARSE_VERSION
+
+
+diff -uprN linux-2.6.15.orig/ipc/util.h linux-2.6.15-ve025stab014/ipc/util.h
+--- linux-2.6.15.orig/ipc/util.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/ipc/util.h 2006-01-27 14:48:08.000000000 +0300
+@@ -15,6 +15,22 @@ void sem_init (void);
+ void msg_init (void);
+ void shm_init (void);
+
++#ifdef CONFIG_VE
++void prepare_msg(void);
++void prepare_sem(void);
++void prepare_shm(void);
++void init_ve_ipc_msg(void);
++void init_ve_ipc_sem(void);
++void init_ve_ipc_shm(void);
++void cleanup_ve_ipc_msg(void);
++void cleanup_ve_ipc_sem(void);
++void cleanup_ve_ipc_shm(void);
++
++#define __ve_init
++#else
++#define __ve_init __init
++#endif
++
+ struct ipc_id_ary {
+ int size;
+ struct kern_ipc_perm *p[0];
+@@ -28,10 +44,11 @@ struct ipc_ids {
+ struct semaphore sem;
+ struct ipc_id_ary nullentry;
+ struct ipc_id_ary* entries;
++ struct ve_struct *owner_env;
+ };
+
+ struct seq_file;
+-void __init ipc_init_ids(struct ipc_ids* ids, int size);
++void __ve_init ipc_init_ids(struct ipc_ids *ids, int size);
+ #ifdef CONFIG_PROC_FS
+ void __init ipc_init_proc_interface(const char *path, const char *header,
+ struct ipc_ids *ids,
+diff -uprN linux-2.6.15.orig/kernel/Kconfig.openvz linux-2.6.15-ve025stab014/kernel/Kconfig.openvz
+--- linux-2.6.15.orig/kernel/Kconfig.openvz 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/Kconfig.openvz 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,48 @@
++# Copyright (C) 2005 SWsoft
++# All rights reserved.
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++config VE
++ bool "Virtual Environment support"
++ default y
++ help
++ This option adds support of virtual Linux running on the original box
++ with fully supported virtual network driver, tty subsystem and
++ configurable access for hardware and other resources.
++
++config VE_CALLS
++ tristate "VE calls interface"
++ depends on VE
++ default m
++ help
++ This option controls how to build vzmon code containing VE calls.
++ By default it's build in module vzmon.o
++
++config VE_SYSFS
++ bool "Enable sysfs support in Virtual Environments"
++ depends on VE
++ default y
++ help
++ This option enables sysfs support in Virtual Environments
++
++config VE_NETDEV
++ tristate "VE networking"
++ depends on VE
++ default m
++ help
++ This option controls whether to build VE networking code.
++
++config VE_IPTABLES
++ bool "VE netfiltering"
++ depends on VE && VE_NETDEV && INET && NETFILTER
++ default y
++ help
++ This option controls whether to build VE netfiltering code.
++
++config VZ_WDOG
++ tristate "VE watchdog module"
++ depends on VE
++ default m
++ help
++ This option controls building of vzwdog module, which dumps
++ a lot of useful system info on console periodically.
+diff -uprN linux-2.6.15.orig/kernel/Makefile linux-2.6.15-ve025stab014/kernel/Makefile
+--- linux-2.6.15.orig/kernel/Makefile 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/Makefile 2006-01-27 14:48:09.000000000 +0300
+@@ -9,6 +9,15 @@ obj-y = sched.o fork.o exec_domain.o
+ rcupdate.o intermodule.o extable.o params.o posix-timers.o \
+ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o
+
++obj-y += ub/
++
++obj-$(CONFIG_VE) += ve.o
++obj-$(CONFIG_VE) += veowner.o
++obj-$(CONFIG_VE_CALLS) += vzdev.o
++obj-$(CONFIG_VZ_WDOG) += vzwdog.o
++obj-$(CONFIG_VE_CALLS) += vzmon.o
++vzmon-objs = vecalls.o
++
+ obj-$(CONFIG_FUTEX) += futex.o
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+ obj-$(CONFIG_SMP) += cpu.o spinlock.o
+@@ -33,6 +42,8 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+ obj-$(CONFIG_SECCOMP) += seccomp.o
+ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+
++obj-$(CONFIG_VE_CALLS) += vzcompat.o
++
+ ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
+ # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
+ # needed for x86 only. Why this used to be enabled for all architectures is beyond
+diff -uprN linux-2.6.15.orig/kernel/capability.c linux-2.6.15-ve025stab014/kernel/capability.c
+--- linux-2.6.15.orig/kernel/capability.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/capability.c 2006-01-27 14:48:08.000000000 +0300
+@@ -24,6 +24,7 @@ EXPORT_SYMBOL(cap_bset);
+ * Locking rule: acquire this prior to tasklist_lock.
+ */
+ static DEFINE_SPINLOCK(task_capability_lock);
++EXPORT_SYMBOL(task_capability_lock);
+
+ /*
+ * For sys_getproccap() and sys_setproccap(), any of the three
+@@ -66,8 +67,8 @@ asmlinkage long sys_capget(cap_user_head
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+- if (pid && pid != current->pid) {
+- target = find_task_by_pid(pid);
++ if (pid && pid != virt_pid(current)) {
++ target = find_task_by_pid_ve(pid);
+ if (!target) {
+ ret = -ESRCH;
+ goto out;
+@@ -99,9 +100,13 @@ static inline int cap_set_pg(int pgrp, k
+ int ret = -EPERM;
+ int found = 0;
+
+- do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
++ pgrp = vpid_to_pid(pgrp);
++ if (pgrp < 0)
++ return ret;
++
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, g) {
+ target = g;
+- while_each_thread(g, target) {
++ while_each_thread_ve(g, target) {
+ if (!security_capset_check(target, effective,
+ inheritable,
+ permitted)) {
+@@ -112,7 +117,7 @@ static inline int cap_set_pg(int pgrp, k
+ }
+ found = 1;
+ }
+- } while_each_task_pid(pgrp, PIDTYPE_PGID, g);
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, g);
+
+ if (!found)
+ ret = 0;
+@@ -131,7 +136,7 @@ static inline int cap_set_all(kernel_cap
+ int ret = -EPERM;
+ int found = 0;
+
+- do_each_thread(g, target) {
++ do_each_thread_ve(g, target) {
+ if (target == current || target->pid == 1)
+ continue;
+ found = 1;
+@@ -140,7 +145,7 @@ static inline int cap_set_all(kernel_cap
+ continue;
+ ret = 0;
+ security_capset_set(target, effective, inheritable, permitted);
+- } while_each_thread(g, target);
++ } while_each_thread_ve(g, target);
+
+ if (!found)
+ ret = 0;
+@@ -187,7 +192,7 @@ asmlinkage long sys_capset(cap_user_head
+ if (get_user(pid, &header->pid))
+ return -EFAULT;
+
+- if (pid && pid != current->pid && !capable(CAP_SETPCAP))
++ if (pid && pid != virt_pid(current) && !capable(CAP_SETPCAP))
+ return -EPERM;
+
+ if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
+@@ -198,8 +203,8 @@ asmlinkage long sys_capset(cap_user_head
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+- if (pid > 0 && pid != current->pid) {
+- target = find_task_by_pid(pid);
++ if (pid > 0 && pid != virt_pid(current)) {
++ target = find_task_by_pid_ve(pid);
+ if (!target) {
+ ret = -ESRCH;
+ goto out;
+diff -uprN linux-2.6.15.orig/kernel/configs.c linux-2.6.15-ve025stab014/kernel/configs.c
+--- linux-2.6.15.orig/kernel/configs.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/configs.c 2006-01-27 14:48:08.000000000 +0300
+@@ -89,8 +89,7 @@ static int __init ikconfig_init(void)
+ struct proc_dir_entry *entry;
+
+ /* create the current config file */
+- entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
+- &proc_root);
++ entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL);
+ if (!entry)
+ return -ENOMEM;
+
+diff -uprN linux-2.6.15.orig/kernel/cpu.c linux-2.6.15-ve025stab014/kernel/cpu.c
+--- linux-2.6.15.orig/kernel/cpu.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/cpu.c 2006-01-27 14:48:08.000000000 +0300
+@@ -95,7 +95,7 @@ static inline void check_for_tasks(int c
+ struct task_struct *p;
+
+ write_lock_irq(&tasklist_lock);
+- for_each_process(p) {
++ for_each_process_all(p) {
+ if (task_cpu(p) == cpu &&
+ (!cputime_eq(p->utime, cputime_zero) ||
+ !cputime_eq(p->stime, cputime_zero)))
+diff -uprN linux-2.6.15.orig/kernel/cpuset.c linux-2.6.15-ve025stab014/kernel/cpuset.c
+--- linux-2.6.15.orig/kernel/cpuset.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/cpuset.c 2006-01-27 14:48:08.000000000 +0300
+@@ -857,7 +857,7 @@ static int attach_task(struct cpuset *cs
+ if (pid) {
+ read_lock(&tasklist_lock);
+
+- tsk = find_task_by_pid(pid);
++ tsk = find_task_by_pid_all(pid);
+ if (!tsk || tsk->flags & PF_EXITING) {
+ read_unlock(&tasklist_lock);
+ return -ESRCH;
+@@ -1259,13 +1259,13 @@ static inline int pid_array_load(pid_t *
+
+ read_lock(&tasklist_lock);
+
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ if (p->cpuset == cs) {
+ pidarray[n++] = p->pid;
+ if (unlikely(n == npids))
+ goto array_full;
+ }
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+
+ array_full:
+ read_unlock(&tasklist_lock);
+diff -uprN linux-2.6.15.orig/kernel/exit.c linux-2.6.15-ve025stab014/kernel/exit.c
+--- linux-2.6.15.orig/kernel/exit.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/exit.c 2006-01-27 14:48:08.000000000 +0300
+@@ -35,6 +35,8 @@
+ #include <asm/pgtable.h>
+ #include <asm/mmu_context.h>
+
++#include <ub/ub_oom.h>
++
+ extern void sem_exit (void);
+ extern struct task_struct *child_reaper;
+
+@@ -55,18 +57,19 @@ static void __unhash_process(struct task
+ }
+
+ REMOVE_LINKS(p);
++ REMOVE_VE_LINKS(p);
+ }
+
+ void release_task(struct task_struct * p)
+ {
+ int zap_leader;
+ task_t *leader;
+- struct dentry *proc_dentry;
++ struct dentry *proc_dentry[2];
+
+ repeat:
+ atomic_dec(&p->user->processes);
+ spin_lock(&p->proc_lock);
+- proc_dentry = proc_pid_unhash(p);
++ proc_pid_unhash(p, proc_dentry);
+ write_lock_irq(&tasklist_lock);
+ if (unlikely(p->ptrace))
+ __ptrace_unlink(p);
+@@ -79,6 +82,8 @@ repeat:
+ * the process by __unhash_process.
+ */
+ __unhash_process(p);
++ nr_zombie--;
++ nr_dead++;
+
+ /*
+ * If we are the last non-leader member of the thread
+@@ -106,6 +111,10 @@ repeat:
+ spin_unlock(&p->proc_lock);
+ proc_pid_flush(proc_dentry);
+ release_thread(p);
++#ifdef CONFIG_VE
++ if (atomic_dec_and_test(&VE_TASK_INFO(p)->owner_env->pcounter))
++ do_env_cleanup(VE_TASK_INFO(p)->owner_env);
++#endif
+ put_task_struct(p);
+
+ p = leader;
+@@ -117,10 +126,10 @@ repeat:
+
+ void unhash_process(struct task_struct *p)
+ {
+- struct dentry *proc_dentry;
++ struct dentry *proc_dentry[2];
+
+ spin_lock(&p->proc_lock);
+- proc_dentry = proc_pid_unhash(p);
++ proc_pid_unhash(p, proc_dentry);
+ write_lock_irq(&tasklist_lock);
+ __unhash_process(p);
+ write_unlock_irq(&tasklist_lock);
+@@ -138,14 +147,16 @@ int session_of_pgrp(int pgrp)
+ struct task_struct *p;
+ int sid = -1;
+
++ WARN_ON(is_virtual_pid(pgrp));
++
+ read_lock(&tasklist_lock);
+- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ if (p->signal->session > 0) {
+ sid = p->signal->session;
+ goto out;
+ }
+- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+- p = find_task_by_pid(pgrp);
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
++ p = find_task_by_pid_ve(pgrp);
+ if (p)
+ sid = p->signal->session;
+ out:
+@@ -167,17 +178,19 @@ static int will_become_orphaned_pgrp(int
+ struct task_struct *p;
+ int ret = 1;
+
+- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
++ WARN_ON(is_virtual_pid(pgrp));
++
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ if (p == ignored_task
+ || p->exit_state
+- || p->real_parent->pid == 1)
++ || virt_pid(p->real_parent) == 1)
+ continue;
+ if (process_group(p->real_parent) != pgrp
+ && p->real_parent->signal->session == p->signal->session) {
+ ret = 0;
+ break;
+ }
+- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ return ret; /* (sighing) "Often!" */
+ }
+
+@@ -185,6 +198,8 @@ int is_orphaned_pgrp(int pgrp)
+ {
+ int retval;
+
++ WARN_ON(is_virtual_pid(pgrp));
++
+ read_lock(&tasklist_lock);
+ retval = will_become_orphaned_pgrp(pgrp, NULL);
+ read_unlock(&tasklist_lock);
+@@ -197,7 +212,7 @@ static inline int has_stopped_jobs(int p
+ int retval = 0;
+ struct task_struct *p;
+
+- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ if (p->state != TASK_STOPPED)
+ continue;
+
+@@ -213,7 +228,7 @@ static inline int has_stopped_jobs(int p
+
+ retval = 1;
+ break;
+- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ return retval;
+ }
+
+@@ -260,6 +275,9 @@ void __set_special_pids(pid_t session, p
+ {
+ struct task_struct *curr = current;
+
++ WARN_ON(is_virtual_pid(pgrp));
++ WARN_ON(is_virtual_pid(session));
++
+ if (curr->signal->session != session) {
+ detach_pid(curr, PIDTYPE_SID);
+ curr->signal->session = session;
+@@ -278,6 +296,7 @@ void set_special_pids(pid_t session, pid
+ __set_special_pids(session, pgrp);
+ write_unlock_irq(&tasklist_lock);
+ }
++EXPORT_SYMBOL(set_special_pids);
+
+ /*
+ * Let kernel threads use this to say that they
+@@ -607,13 +626,12 @@ static inline void reparent_thread(task_
+ static inline void forget_original_parent(struct task_struct * father,
+ struct list_head *to_release)
+ {
+- struct task_struct *p, *reaper = father;
++ struct task_struct *p, *tsk_reaper, *reaper = father;
+ struct list_head *_p, *_n;
+
+ do {
+ reaper = next_thread(reaper);
+ if (reaper == father) {
+- reaper = child_reaper;
+ break;
+ }
+ } while (reaper->exit_state);
+@@ -635,9 +653,16 @@ static inline void forget_original_paren
+ /* if father isn't the real parent, then ptrace must be enabled */
+ BUG_ON(father != p->real_parent && !ptrace);
+
++ tsk_reaper = reaper;
++ if (tsk_reaper == father)
++#ifdef CONFIG_VE
++ tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
++ if (tsk_reaper == p)
++#endif
++ tsk_reaper = child_reaper;
+ if (father == p->real_parent) {
+- /* reparent with a reaper, real father it's us */
+- choose_new_parent(p, reaper, child_reaper);
++ /* reparent with a tsk_reaper, real father it's us */
++ choose_new_parent(p, tsk_reaper, child_reaper);
+ reparent_thread(p, father, 0);
+ } else {
+ /* reparent ptraced task to its real parent */
+@@ -658,7 +683,15 @@ static inline void forget_original_paren
+ }
+ list_for_each_safe(_p, _n, &father->ptrace_children) {
+ p = list_entry(_p,struct task_struct,ptrace_list);
+- choose_new_parent(p, reaper, child_reaper);
++
++ tsk_reaper = reaper;
++ if (tsk_reaper == father)
++#ifdef CONFIG_VE
++ tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
++ if (tsk_reaper == p)
++#endif
++ tsk_reaper = child_reaper;
++ choose_new_parent(p, tsk_reaper, child_reaper);
+ reparent_thread(p, father, 1);
+ }
+ }
+@@ -754,6 +787,9 @@ static void exit_notify(struct task_stru
+ && !capable(CAP_KILL))
+ tsk->exit_signal = SIGCHLD;
+
++ if (tsk->exit_signal != -1 && t == child_reaper)
++ /* We dont want people slaying init. */
++ tsk->exit_signal = SIGCHLD;
+
+ /* If something other than our normal parent is ptracing us, then
+ * send it a SIGCHLD instead of honoring exit_signal. exit_signal
+@@ -772,6 +808,7 @@ static void exit_notify(struct task_stru
+ unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT)))
+ state = EXIT_DEAD;
+ tsk->exit_state = state;
++ nr_zombie++;
+
+ write_unlock_irq(&tasklist_lock);
+
+@@ -786,6 +823,82 @@ static void exit_notify(struct task_stru
+ release_task(tsk);
+ }
+
++#ifdef CONFIG_VE
++/*
++ * Handle exitting of init process, it's a special case for VE.
++ */
++static void do_initproc_exit(void)
++{
++ struct task_struct *tsk;
++ struct ve_struct *env;
++ struct siginfo info;
++ struct task_struct *g, *p;
++ long delay = 1L;
++
++ tsk = current;
++ env = VE_TASK_INFO(current)->owner_env;
++ if (env->init_entry != tsk)
++ return;
++
++ if (ve_is_super(env) && tsk->pid == 1)
++ panic("Attempted to kill init!");
++
++ memset(&info, 0, sizeof(info));
++ info.si_errno = 0;
++ info.si_code = SI_KERNEL;
++ info.si_pid = virt_pid(tsk);
++ info.si_uid = current->uid;
++ info.si_signo = SIGKILL;
++
++ /*
++ * Here the VE changes its state into "not running".
++ * op_sem taken for write is a barrier to all VE manipulations from
++ * ioctl: it waits for operations currently in progress and blocks all
++ * subsequent operations until is_running is set to 0 and op_sem is
++ * released.
++ */
++ down_write(&env->op_sem);
++ env->is_running = 0;
++ up_write(&env->op_sem);
++
++ /* send kill to all processes of VE */
++ read_lock(&tasklist_lock);
++ do_each_thread_ve(g, p) {
++ force_sig_info(SIGKILL, &info, p);
++ } while_each_thread_ve(g, p);
++ read_unlock(&tasklist_lock);
++
++ /* wait for all init childs exit */
++ while (atomic_read(&env->pcounter) > 1) {
++ if (sys_wait4(-1, NULL, __WALL | WNOHANG, NULL) > 0)
++ continue;
++ /* it was ENOCHLD or no more children somehow */
++ if (atomic_read(&env->pcounter) == 1)
++ break;
++
++ /* clear all signals to avoid wakeups */
++ if (signal_pending(tsk))
++ flush_signals(tsk);
++ /* we have child without signal sent */
++ __set_current_state(TASK_INTERRUPTIBLE);
++ schedule_timeout(delay);
++ delay = (delay < HZ) ? (delay << 1) : HZ;
++ read_lock(&tasklist_lock);
++ do_each_thread_ve(g, p) {
++ if (p != tsk)
++ force_sig_info(SIGKILL, &info, p);
++ } while_each_thread_ve(g, p);
++ read_unlock(&tasklist_lock);
++ }
++ env->init_entry = child_reaper;
++ write_lock_irq(&tasklist_lock);
++ REMOVE_LINKS(tsk);
++ tsk->parent = tsk->real_parent = child_reaper;
++ SET_LINKS(tsk);
++ write_unlock_irq(&tasklist_lock);
++}
++#endif
++
+ fastcall NORET_TYPE void do_exit(long code)
+ {
+ struct task_struct *tsk = current;
+@@ -799,8 +912,12 @@ fastcall NORET_TYPE void do_exit(long co
+ panic("Aiee, killing interrupt handler!");
+ if (unlikely(!tsk->pid))
+ panic("Attempted to kill the idle task!");
++#ifdef CONFIG_VE
++ do_initproc_exit();
++#else
+ if (unlikely(tsk->pid == 1))
+ panic("Attempted to kill init!");
++#endif
+ if (tsk->io_context)
+ exit_io_context();
+
+@@ -866,6 +983,7 @@ fastcall NORET_TYPE void do_exit(long co
+ tsk->exit_code = code;
+ proc_exit_connector(tsk);
+ exit_notify(tsk);
++ ub_oom_task_exit(tsk);
+ #ifdef CONFIG_NUMA
+ mpol_free(tsk->mempolicy);
+ tsk->mempolicy = NULL;
+@@ -901,7 +1019,14 @@ asmlinkage long sys_exit(int error_code)
+
+ task_t fastcall *next_thread(const task_t *p)
+ {
+- return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
++ task_t *tsk;
++
++ tsk = pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
++#ifdef CONFIG_VE
++ /* all threads should belong to ONE ve! */
++ BUG_ON(VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(p)->owner_env);
++#endif
++ return tsk;
+ }
+
+ EXPORT_SYMBOL(next_thread);
+@@ -951,14 +1076,19 @@ asmlinkage void sys_exit_group(int error
+ static int eligible_child(pid_t pid, int options, task_t *p)
+ {
+ if (pid > 0) {
+- if (p->pid != pid)
++ if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
+ return 0;
+ } else if (!pid) {
+ if (process_group(p) != process_group(current))
+ return 0;
+ } else if (pid != -1) {
+- if (process_group(p) != -pid)
+- return 0;
++ if (__is_virtual_pid(-pid)) {
++ if (virt_pgid(p) != -pid)
++ return 0;
++ } else {
++ if (process_group(p) != -pid)
++ return 0;
++ }
+ }
+
+ /*
+@@ -1143,7 +1273,7 @@ static int wait_task_zombie(task_t *p, i
+ p->exit_state = EXIT_ZOMBIE;
+ return retval;
+ }
+- retval = p->pid;
++ retval = get_task_pid(p);
+ if (p->real_parent != p->parent) {
+ write_lock_irq(&tasklist_lock);
+ /* Double-check with lock held. */
+@@ -1278,7 +1408,7 @@ bail_ref:
+ if (!retval && infop)
+ retval = put_user(p->uid, &infop->si_uid);
+ if (!retval)
+- retval = p->pid;
++ retval = get_task_pid(p);
+ put_task_struct(p);
+
+ BUG_ON(!retval);
+diff -uprN linux-2.6.15.orig/kernel/fork.c linux-2.6.15-ve025stab014/kernel/fork.c
+--- linux-2.6.15.orig/kernel/fork.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/fork.c 2006-01-27 14:48:08.000000000 +0300
+@@ -20,6 +20,7 @@
+ #include <linux/vmalloc.h>
+ #include <linux/completion.h>
+ #include <linux/namespace.h>
++#include <linux/file.h>
+ #include <linux/personality.h>
+ #include <linux/mempolicy.h>
+ #include <linux/sem.h>
+@@ -51,11 +52,15 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++#include <ub/ub_misc.h>
++
+ /*
+ * Protected counters by write_lock_irq(&tasklist_lock)
+ */
+ unsigned long total_forks; /* Handle normal Linux uptimes. */
+ int nr_threads; /* The idle threads do not count.. */
++EXPORT_SYMBOL(nr_threads);
+
+ int max_threads; /* tunable limit on nr_threads */
+
+@@ -102,6 +107,7 @@ static kmem_cache_t *mm_cachep;
+
+ void free_task(struct task_struct *tsk)
+ {
++ ub_task_uncharge(tsk);
+ free_thread_info(tsk->thread_info);
+ free_task_struct(tsk);
+ }
+@@ -119,6 +125,12 @@ void __put_task_struct(struct task_struc
+ free_uid(tsk->user);
+ put_group_info(tsk->group_info);
+
++#ifdef CONFIG_VE
++ put_ve(VE_TASK_INFO(tsk)->owner_env);
++ write_lock_irq(&tasklist_lock);
++ nr_dead--;
++ write_unlock_irq(&tasklist_lock);
++#endif
+ if (!profile_handoff_task(tsk))
+ free_task(tsk);
+ }
+@@ -132,7 +144,7 @@ void __init fork_init(unsigned long memp
+ /* create a slab on which task_structs can be allocated */
+ task_struct_cachep =
+ kmem_cache_create("task_struct", sizeof(struct task_struct),
+- ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
++ ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_UBC, NULL, NULL);
+ #endif
+
+ /*
+@@ -163,22 +175,33 @@ static struct task_struct *dup_task_stru
+
+ tsk = alloc_task_struct();
+ if (!tsk)
+- return NULL;
++ goto out;
+
+ ti = alloc_thread_info(tsk);
+- if (!ti) {
+- free_task_struct(tsk);
+- return NULL;
+- }
++ if (!ti)
++ goto out_tsk;
+
+ *tsk = *orig;
+ tsk->thread_info = ti;
+ setup_thread_stack(tsk, orig);
+
++ if (test_ti_thread_flag(orig->thread_info, TIF_MEMDIE))
++ goto out_ti;
++
++ if (ub_task_charge(orig, tsk))
++ goto out_ti;
++
+ /* One for us, one for whoever does the "release_task()" (usually parent) */
+ atomic_set(&tsk->usage,2);
+ atomic_set(&tsk->fs_excl, 0);
+ return tsk;
++
++out_ti:
++ free_thread_info(ti);
++out_tsk:
++ free_task_struct(tsk);
++out:
++ return NULL;
+ }
+
+ #ifdef CONFIG_MMU
+@@ -216,7 +239,12 @@ static inline int dup_mmap(struct mm_str
+ -pages);
+ continue;
+ }
++
+ charge = 0;
++ if (ub_memory_charge(mm, mpnt->vm_end - mpnt->vm_start,
++ mpnt->vm_flags & ~VM_LOCKED,
++ mpnt->vm_file, UB_HARD))
++ goto fail_noch;
+ if (mpnt->vm_flags & VM_ACCOUNT) {
+ unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ if (security_vm_enough_memory(len))
+@@ -235,6 +263,7 @@ static inline int dup_mmap(struct mm_str
+ tmp->vm_flags &= ~VM_LOCKED;
+ tmp->vm_mm = mm;
+ tmp->vm_next = NULL;
++ set_vma_rss(tmp, 0);
+ anon_vma_link(tmp);
+ file = tmp->vm_file;
+ if (file) {
+@@ -263,7 +292,7 @@ static inline int dup_mmap(struct mm_str
+ rb_parent = &tmp->vm_rb;
+
+ mm->map_count++;
+- retval = copy_page_range(mm, oldmm, mpnt);
++ retval = copy_page_range(mm, oldmm, tmp, mpnt);
+
+ if (tmp->vm_ops && tmp->vm_ops->open)
+ tmp->vm_ops->open(tmp);
+@@ -280,6 +309,9 @@ out:
+ fail_nomem_policy:
+ kmem_cache_free(vm_area_cachep, tmp);
+ fail_nomem:
++ ub_memory_uncharge(mm, mpnt->vm_end - mpnt->vm_start,
++ mpnt->vm_flags & ~VM_LOCKED, mpnt->vm_file);
++fail_noch:
+ retval = -ENOMEM;
+ vm_unacct_memory(charge);
+ goto out;
+@@ -310,7 +342,8 @@ static inline void mm_free_pgd(struct mm
+
+ #include <linux/init_task.h>
+
+-static struct mm_struct * mm_init(struct mm_struct * mm)
++static struct mm_struct * mm_init(struct mm_struct * mm,
++ struct task_struct *tsk)
+ {
+ atomic_set(&mm->mm_users, 1);
+ atomic_set(&mm->mm_count, 1);
+@@ -325,11 +358,14 @@ static struct mm_struct * mm_init(struct
+ mm->ioctx_list = NULL;
+ mm->free_area_cache = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = ~0UL;
++ set_mm_ub(mm, tsk);
+
+ if (likely(!mm_alloc_pgd(mm))) {
+ mm->def_flags = 0;
+ return mm;
+ }
++
++ put_mm_ub(mm);
+ free_mm(mm);
+ return NULL;
+ }
+@@ -344,7 +380,7 @@ struct mm_struct * mm_alloc(void)
+ mm = allocate_mm();
+ if (mm) {
+ memset(mm, 0, sizeof(*mm));
+- mm = mm_init(mm);
++ mm = mm_init(mm, NULL);
+ }
+ return mm;
+ }
+@@ -359,6 +395,7 @@ void fastcall __mmdrop(struct mm_struct
+ BUG_ON(mm == &init_mm);
+ mm_free_pgd(mm);
+ destroy_context(mm);
++ put_mm_ub(mm);
+ free_mm(mm);
+ }
+
+@@ -478,7 +515,7 @@ static int copy_mm(unsigned long clone_f
+
+ /* Copy the current MM stuff.. */
+ memcpy(mm, oldmm, sizeof(*mm));
+- if (!mm_init(mm))
++ if (!mm_init(mm, tsk))
+ goto fail_nomem;
+
+ if (init_new_context(tsk,mm))
+@@ -848,7 +885,7 @@ asmlinkage long sys_set_tid_address(int
+ {
+ current->clear_child_tid = tidptr;
+
+- return current->pid;
++ return virt_pid(current);
+ }
+
+ /*
+@@ -865,7 +902,7 @@ static task_t *copy_process(unsigned lon
+ unsigned long stack_size,
+ int __user *parent_tidptr,
+ int __user *child_tidptr,
+- int pid)
++ int pid, long pid0)
+ {
+ int retval;
+ struct task_struct *p = NULL;
+@@ -926,12 +963,20 @@ static task_t *copy_process(unsigned lon
+ p->did_exec = 0;
+ copy_flags(clone_flags, p);
+ p->pid = pid;
++#ifdef CONFIG_VE
++ set_virt_pid(p, alloc_vpid(p->pid, pid0 ? : -1));
++ if (virt_pid(p) < 0)
++ goto bad_fork_cleanup_module;
++#endif
+ retval = -EFAULT;
+ if (clone_flags & CLONE_PARENT_SETTID)
+- if (put_user(p->pid, parent_tidptr))
++ if (put_user(virt_pid(p), parent_tidptr))
+ goto bad_fork_cleanup;
+
+ p->proc_dentry = NULL;
++#ifdef CONFIG_VE
++ p->ve_task_info.glob_proc_dentry = NULL;
++#endif
+
+ INIT_LIST_HEAD(&p->children);
+ INIT_LIST_HEAD(&p->sibling);
+@@ -974,8 +1019,13 @@ static task_t *copy_process(unsigned lon
+ #endif
+
+ p->tgid = p->pid;
+- if (clone_flags & CLONE_THREAD)
++ set_virt_tgid(p, virt_pid(p));
++ set_virt_pgid(p, virt_pgid(current));
++ set_virt_sid(p, virt_sid(current));
++ if (clone_flags & CLONE_THREAD) {
+ p->tgid = current->tgid;
++ set_virt_tgid(p, virt_tgid(current));
++ }
+
+ if ((retval = security_task_alloc(p)))
+ goto bad_fork_cleanup_policy;
+@@ -1124,6 +1174,12 @@ static task_t *copy_process(unsigned lon
+ if (unlikely(p->ptrace & PT_PTRACED))
+ __ptrace_link(p, current->parent);
+
++#ifdef CONFIG_VE
++ SET_VE_LINKS(p);
++ atomic_inc(&p->ve_task_info.owner_env->pcounter);
++ get_ve(p->ve_task_info.owner_env);
++ seqcount_init(&p->ve_task_info.wakeup_lock);
++#endif
+ attach_pid(p, PIDTYPE_PID, p->pid);
+ attach_pid(p, PIDTYPE_TGID, p->tgid);
+ if (thread_group_leader(p)) {
+@@ -1174,6 +1230,11 @@ bad_fork_cleanup_policy:
+ mpol_free(p->mempolicy);
+ #endif
+ bad_fork_cleanup:
++#ifdef CONFIG_VE
++ if (virt_pid(p) != p->pid && virt_pid(p) > 0)
++ free_vpid(virt_pid(p), get_exec_env());
++bad_fork_cleanup_module:
++#endif
+ if (p->binfmt)
+ module_put(p->binfmt->module);
+ bad_fork_cleanup_put_domain:
+@@ -1198,7 +1259,7 @@ task_t * __devinit fork_idle(int cpu)
+ task_t *task;
+ struct pt_regs regs;
+
+- task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
++ task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0, 0);
+ if (!task)
+ return ERR_PTR(-ENOMEM);
+ init_idle(task, cpu);
+@@ -1228,12 +1289,13 @@ static inline int fork_traceflag (unsign
+ * It copies the process, and if successful kick-starts
+ * it and waits for it to finish using the VM if required.
+ */
+-long do_fork(unsigned long clone_flags,
++long do_fork_pid(unsigned long clone_flags,
+ unsigned long stack_start,
+ struct pt_regs *regs,
+ unsigned long stack_size,
+ int __user *parent_tidptr,
+- int __user *child_tidptr)
++ int __user *child_tidptr,
++ long pid0)
+ {
+ struct task_struct *p;
+ int trace = 0;
+@@ -1247,7 +1309,8 @@ long do_fork(unsigned long clone_flags,
+ clone_flags |= CLONE_PTRACE;
+ }
+
+- p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
++ p = copy_process(clone_flags, stack_start, regs, stack_size,
++ parent_tidptr, child_tidptr, pid, pid0);
+ /*
+ * Do this prior waking up the new thread - the thread pointer
+ * might get invalid after that point, if the thread exits quickly.
+@@ -1255,6 +1318,7 @@ long do_fork(unsigned long clone_flags,
+ if (!IS_ERR(p)) {
+ struct completion vfork;
+
++ pid = virt_pid(p);
+ if (clone_flags & CLONE_VFORK) {
+ p->vfork_done = &vfork;
+ init_completion(&vfork);
+@@ -1290,24 +1354,37 @@ long do_fork(unsigned long clone_flags,
+ return pid;
+ }
+
++EXPORT_SYMBOL(do_fork_pid);
++
++long do_fork(unsigned long clone_flags,
++ unsigned long stack_start,
++ struct pt_regs *regs,
++ unsigned long stack_size,
++ int __user *parent_tidptr,
++ int __user *child_tidptr)
++{
++ return do_fork_pid(clone_flags, stack_start, regs, stack_size,
++ parent_tidptr, child_tidptr, 0);
++}
++
+ void __init proc_caches_init(void)
+ {
+ sighand_cachep = kmem_cache_create("sighand_cache",
+ sizeof(struct sighand_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ signal_cachep = kmem_cache_create("signal_cache",
+ sizeof(struct signal_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ files_cachep = kmem_cache_create("files_cache",
+ sizeof(struct files_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ fs_cachep = kmem_cache_create("fs_cache",
+ sizeof(struct fs_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), 0,
+- SLAB_PANIC, NULL, NULL);
++ SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ mm_cachep = kmem_cache_create("mm_struct",
+ sizeof(struct mm_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ }
+diff -uprN linux-2.6.15.orig/kernel/irq/handle.c linux-2.6.15-ve025stab014/kernel/irq/handle.c
+--- linux-2.6.15.orig/kernel/irq/handle.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/irq/handle.c 2006-01-27 14:48:06.000000000 +0300
+@@ -14,6 +14,8 @@
+
+ #include "internals.h"
+
++#include <ub/beancounter.h>
++
+ /*
+ * Linux has a controller-independent interrupt architecture.
+ * Every controller has a 'controller-template', that is used
+@@ -80,10 +82,12 @@ fastcall int handle_IRQ_event(unsigned i
+ struct irqaction *action)
+ {
+ int ret, retval = 0, status = 0;
++ struct user_beancounter *ub;
+
+ if (!(action->flags & SA_INTERRUPT))
+ local_irq_enable();
+
++ ub = set_exec_ub(get_ub0());
+ do {
+ ret = action->handler(irq, action->dev_id, regs);
+ if (ret == IRQ_HANDLED)
+@@ -91,6 +95,7 @@ fastcall int handle_IRQ_event(unsigned i
+ retval |= ret;
+ action = action->next;
+ } while (action);
++ (void)set_exec_ub(ub);
+
+ if (status & SA_SAMPLE_RANDOM)
+ add_interrupt_randomness(irq);
+diff -uprN linux-2.6.15.orig/kernel/kmod.c linux-2.6.15-ve025stab014/kernel/kmod.c
+--- linux-2.6.15.orig/kernel/kmod.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/kmod.c 2006-01-27 14:48:08.000000000 +0300
+@@ -78,6 +78,10 @@ int request_module(const char *fmt, ...)
+ #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
+ static int kmod_loop_msg;
+
++ /* Don't allow request_module() inside VE. */
++ if (!ve_is_super(get_exec_env()))
++ return -EPERM;
++
+ va_start(args, fmt);
+ ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+ va_end(args);
+@@ -246,6 +250,9 @@ int call_usermodehelper_keys(char *path,
+ };
+ DECLARE_WORK(work, __call_usermodehelper, &sub_info);
+
++ if (!ve_is_super(get_exec_env()))
++ return -EPERM;
++
+ if (!khelper_wq)
+ return -EBUSY;
+
+diff -uprN linux-2.6.15.orig/kernel/kthread.c linux-2.6.15-ve025stab014/kernel/kthread.c
+--- linux-2.6.15.orig/kernel/kthread.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/kthread.c 2006-01-27 14:48:08.000000000 +0300
+@@ -114,7 +114,7 @@ static void keventd_create_kthread(void
+ create->result = ERR_PTR(pid);
+ } else {
+ wait_for_completion(&create->started);
+- create->result = find_task_by_pid(pid);
++ create->result = find_task_by_pid_all(pid);
+ }
+ complete(&create->done);
+ }
+diff -uprN linux-2.6.15.orig/kernel/panic.c linux-2.6.15-ve025stab014/kernel/panic.c
+--- linux-2.6.15.orig/kernel/panic.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/panic.c 2006-01-27 14:48:08.000000000 +0300
+@@ -23,6 +23,8 @@
+ int panic_timeout;
+ int panic_on_oops;
+ int tainted;
++int kernel_text_csum_broken;
++EXPORT_SYMBOL(kernel_text_csum_broken);
+
+ EXPORT_SYMBOL(panic_timeout);
+
+@@ -155,7 +157,8 @@ const char *print_tainted(void)
+ {
+ static char buf[20];
+ if (tainted) {
+- snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
++ snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c",
++ kernel_text_csum_broken ? 'B' : ' ',
+ tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
+ tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
+ tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
+diff -uprN linux-2.6.15.orig/kernel/pid.c linux-2.6.15-ve025stab014/kernel/pid.c
+--- linux-2.6.15.orig/kernel/pid.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/pid.c 2006-01-27 14:48:08.000000000 +0300
+@@ -27,6 +27,10 @@
+ #include <linux/bootmem.h>
+ #include <linux/hash.h>
+
++#ifdef CONFIG_VE
++static void __free_vpid(int vpid, struct ve_struct *ve);
++#endif
++
+ #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
+ static struct hlist_head *pid_hash[PIDTYPE_MAX];
+ static int pidhash_shift;
+@@ -57,8 +61,14 @@ typedef struct pidmap {
+ void *page;
+ } pidmap_t;
+
++#ifdef CONFIG_VE
++#define PIDMAP_NRFREE (BITS_PER_PAGE/2)
++#else
++#define PIDMAP_NRFREE BITS_PER_PAGE
++#endif
++
+ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
+- { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
++ { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(PIDMAP_NRFREE), NULL } };
+
+ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
+
+@@ -67,6 +77,8 @@ fastcall void free_pidmap(int pid)
+ pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
+ int offset = pid & BITS_PER_PAGE_MASK;
+
++ BUG_ON(__is_virtual_pid(pid) || pid == 1);
++
+ clear_bit(offset, map->page);
+ atomic_inc(&map->nr_free);
+ }
+@@ -77,6 +89,8 @@ int alloc_pidmap(void)
+ pidmap_t *map;
+
+ pid = last + 1;
++ if (__is_virtual_pid(pid))
++ pid += VPID_DIV;
+ if (pid >= pid_max)
+ pid = RESERVED_PIDS;
+ offset = pid & BITS_PER_PAGE_MASK;
+@@ -106,6 +120,8 @@ int alloc_pidmap(void)
+ return pid;
+ }
+ offset = find_next_offset(map, offset);
++ if (__is_virtual_pid(offset))
++ offset += VPID_DIV;
+ pid = mk_pid(map, offset);
+ /*
+ * find_next_offset() found a bit, the pid from it
+@@ -143,6 +159,7 @@ struct pid * fastcall find_pid(enum pid_
+ }
+ return NULL;
+ }
++EXPORT_SYMBOL(find_pid);
+
+ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
+ {
+@@ -201,13 +218,26 @@ void fastcall detach_pid(task_t *task, e
+ if (tmp != type && find_pid(tmp, nr))
+ return;
+
++#ifdef CONFIG_VE
++ __free_vpid(task->pids[type].vnr, VE_TASK_INFO(task)->owner_env);
++#endif
+ free_pidmap(nr);
+ }
+
+ task_t *find_task_by_pid_type(int type, int nr)
+ {
++ BUG();
++ return NULL;
++}
++
++EXPORT_SYMBOL(find_task_by_pid_type);
++
++task_t *find_task_by_pid_type_all(int type, int nr)
++{
+ struct pid *pid;
+
++ BUG_ON(nr != -1 && is_virtual_pid(nr));
++
+ pid = find_pid(type, nr);
+ if (!pid)
+ return NULL;
+@@ -215,7 +245,35 @@ task_t *find_task_by_pid_type(int type,
+ return pid_task(&pid->pid_list, type);
+ }
+
+-EXPORT_SYMBOL(find_task_by_pid_type);
++EXPORT_SYMBOL(find_task_by_pid_type_all);
++
++#ifdef CONFIG_VE
++
++task_t *find_task_by_pid_type_ve(int type, int nr)
++{
++ task_t *tsk;
++ int gnr = nr;
++ struct pid *pid;
++
++ if (is_virtual_pid(nr)) {
++ gnr = __vpid_to_pid(nr);
++ if (unlikely(gnr == -1))
++ return NULL;
++ }
++
++ pid = find_pid(type, gnr);
++ if (!pid)
++ return NULL;
++
++ tsk = pid_task(&pid->pid_list, type);
++ if (!ve_accessible(VE_TASK_INFO(tsk)->owner_env, get_exec_env()))
++ return NULL;
++ return tsk;
++}
++
++EXPORT_SYMBOL(find_task_by_pid_type_ve);
++
++#endif
+
+ /*
+ * This function switches the PIDs if a non-leader thread calls
+@@ -234,6 +292,9 @@ void switch_exec_pids(task_t *leader, ta
+
+ leader->pid = leader->tgid = thread->pid;
+ thread->pid = thread->tgid;
++ set_virt_tgid(leader, virt_pid(thread));
++ set_virt_pid(leader, virt_pid(thread));
++ set_virt_pid(thread, virt_tgid(thread));
+
+ attach_pid(thread, PIDTYPE_PID, thread->pid);
+ attach_pid(thread, PIDTYPE_TGID, thread->tgid);
+@@ -247,6 +308,337 @@ void switch_exec_pids(task_t *leader, ta
+ attach_pid(leader, PIDTYPE_SID, leader->signal->session);
+ }
+
++#ifdef CONFIG_VE
++
++/* Virtual PID bits.
++ *
++ * At the moment all internal structures in kernel store real global pid.
++ * The only place, where virtual PID is used, is at user frontend. We
++ * remap virtual pids obtained from user to global ones (vpid_to_pid) and
++ * map globals to virtuals before showing them to user (virt_pid_type).
++ *
++ * We hold virtual PIDs inside struct pid, so map global -> virtual is easy.
++ */
++
++pid_t _pid_type_to_vpid(int type, pid_t pid)
++{
++ struct pid * p;
++
++ if (unlikely(is_virtual_pid(pid)))
++ return -1;
++
++ read_lock(&tasklist_lock);
++ p = find_pid(type, pid);
++ if (p) {
++ pid = p->vnr;
++ } else {
++ pid = -1;
++ }
++ read_unlock(&tasklist_lock);
++ return pid;
++}
++
++pid_t pid_type_to_vpid(int type, pid_t pid)
++{
++ int vpid;
++
++ if (unlikely(pid <= 0))
++ return pid;
++
++ BUG_ON(is_virtual_pid(pid));
++
++ if (ve_is_super(get_exec_env()))
++ return pid;
++
++ vpid = _pid_type_to_vpid(type, pid);
++ if (unlikely(vpid == -1)) {
++ /* It is allowed: global pid can be used everywhere.
++ * This can happen, when kernel remembers stray pids:
++ * signal queues, locks etc.
++ */
++ vpid = pid;
++ }
++ return vpid;
++}
++
++/* To map virtual pids to global we maintain special hash table.
++ *
++ * Mapping entries are allocated when a process with non-trivial
++ * mapping is forked, which is possible only after VE migrated.
++ * Mappings are destroyed, when a global pid is removed from global
++ * pidmap, which means we do not need to refcount mappings.
++ */
++
++static struct hlist_head *vpid_hash;
++
++struct vpid_mapping
++{
++ int vpid;
++ int veid;
++ int pid;
++ struct hlist_node link;
++};
++
++static kmem_cache_t *vpid_mapping_cachep;
++
++static inline int vpid_hashfn(int vnr, int veid)
++{
++ return hash_long((unsigned long)(vnr+(veid<<16)), pidhash_shift);
++}
++
++struct vpid_mapping *__lookup_vpid_mapping(int vnr, int veid)
++{
++ struct hlist_node *elem;
++ struct vpid_mapping *map;
++
++ hlist_for_each_entry(map, elem,
++ &vpid_hash[vpid_hashfn(vnr, veid)], link) {
++ if (map->vpid == vnr && map->veid == veid)
++ return map;
++ }
++ return NULL;
++}
++
++/* __vpid_to_pid() is raw version of vpid_to_pid(). It is to be used
++ * only under tasklist_lock. In some places we must use only this version
++ * (f.e. __kill_pg_info is called under write lock!)
++ *
++ * Caller should pass virtual pid. This function returns an error, when
++ * seeing a global pid.
++ */
++int __vpid_to_pid(int pid)
++{
++ struct vpid_mapping *map;
++
++ if (unlikely(!is_virtual_pid(pid) || ve_is_super(get_exec_env())))
++ return -1;
++
++ if (!get_exec_env()->sparse_vpid) {
++ if (pid != 1)
++ return pid - VPID_DIV;
++ return get_exec_env()->init_entry->pid;
++ }
++
++ map = __lookup_vpid_mapping(pid, VEID(get_exec_env()));
++ if (map)
++ return map->pid;
++ return -1;
++}
++
++int vpid_to_pid(int pid)
++{
++ /* User gave bad pid. It is his problem. */
++ if (unlikely(pid <= 0))
++ return pid;
++
++ if (!is_virtual_pid(pid))
++ return pid;
++
++ read_lock(&tasklist_lock);
++ pid = __vpid_to_pid(pid);
++ read_unlock(&tasklist_lock);
++ return pid;
++}
++
++/* VEs which never migrated have trivial "arithmetic" mapping pid <-> vpid:
++ *
++ * vpid == 1 -> ve->init_task->pid
++ * else pid & ~VPID_DIV
++ *
++ * In this case VE has ve->sparse_vpid = 0 and we do not use vpid hash table.
++ *
++ * When VE migrates and we see non-trivial mapping the first time, we
++ * scan process table and populate mapping hash table.
++ */
++
++static int add_mapping(int pid, int vpid, int veid, struct hlist_head *cache)
++{
++ if (pid > 0 && vpid > 0 && !__lookup_vpid_mapping(vpid, veid)) {
++ struct vpid_mapping *m;
++ if (hlist_empty(cache)) {
++ m = kmem_cache_alloc(vpid_mapping_cachep, GFP_ATOMIC);
++ if (unlikely(m == NULL))
++ return -ENOMEM;
++ } else {
++ m = hlist_entry(cache->first, struct vpid_mapping, link);
++ hlist_del(&m->link);
++ }
++ m->pid = pid;
++ m->vpid = vpid;
++ m->veid = veid;
++ hlist_add_head(&m->link,
++ &vpid_hash[vpid_hashfn(vpid, veid)]);
++ }
++ return 0;
++}
++
++static int switch_to_sparse_mapping(int pid)
++{
++ struct ve_struct *env = get_exec_env();
++ struct hlist_head cache;
++ task_t *g, *t;
++ int pcount;
++ int err;
++
++ /* Transition happens under write_lock_irq, so we try to make
++ * it more reliable and fast preallocating mapping entries.
++ * pcounter may be not enough, we could have lots of orphaned
++ * process groups and sessions, which also require mappings.
++ */
++ INIT_HLIST_HEAD(&cache);
++ pcount = atomic_read(&env->pcounter);
++ err = -ENOMEM;
++ while (pcount > 0) {
++ struct vpid_mapping *m;
++ m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
++ if (!m)
++ goto out;
++ hlist_add_head(&m->link, &cache);
++ pcount--;
++ }
++
++ write_lock_irq(&tasklist_lock);
++ err = 0;
++ if (env->sparse_vpid)
++ goto out_unlock;
++
++ err = -ENOMEM;
++ do_each_thread_ve(g, t) {
++ if (t->pid == pid)
++ continue;
++ if (add_mapping(t->pid, virt_pid(t), VEID(env), &cache))
++ goto out_unlock;
++ } while_each_thread_ve(g, t);
++
++ for_each_process_ve(t) {
++ if (t->pid == pid)
++ continue;
++
++ if (add_mapping(t->tgid, virt_tgid(t), VEID(env), &cache))
++ goto out_unlock;
++ if (add_mapping(t->signal->pgrp, virt_pgid(t), VEID(env), &cache))
++ goto out_unlock;
++ if (add_mapping(t->signal->session, virt_sid(t), VEID(env), &cache))
++ goto out_unlock;
++ }
++ env->sparse_vpid = 1;
++ err = 0;
++
++out_unlock:
++ if (err) {
++ int i;
++
++ for (i=0; i<(1<<pidhash_shift); i++) {
++ struct hlist_node *elem, *next;
++ struct vpid_mapping *map;
++
++ hlist_for_each_entry_safe(map, elem, next, &vpid_hash[i], link) {
++ if (map->veid == VEID(env)) {
++ hlist_del(elem);
++ hlist_add_head(elem, &cache);
++ }
++ }
++ }
++ }
++ write_unlock_irq(&tasklist_lock);
++
++out:
++ while (!hlist_empty(&cache)) {
++ struct vpid_mapping *m;
++ m = hlist_entry(cache.first, struct vpid_mapping, link);
++ hlist_del(&m->link);
++ kmem_cache_free(vpid_mapping_cachep, m);
++ }
++ return err;
++}
++
++int alloc_vpid(int pid, int virt_pid)
++{
++ int result;
++ struct vpid_mapping *m;
++ struct ve_struct *env = get_exec_env();
++
++ if (ve_is_super(env) || !env->virt_pids)
++ return pid;
++
++ if (!env->sparse_vpid) {
++ if (virt_pid == -1)
++ return pid + VPID_DIV;
++
++ if (virt_pid == 1 || virt_pid == pid + VPID_DIV)
++ return virt_pid;
++
++ if ((result = switch_to_sparse_mapping(pid)) < 0)
++ return result;
++ }
++
++ m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
++ if (!m)
++ return -ENOMEM;
++
++ m->pid = pid;
++ m->veid = VEID(env);
++
++ result = (virt_pid == -1) ? pid + VPID_DIV : virt_pid;
++
++ write_lock_irq(&tasklist_lock);
++ if (unlikely(__lookup_vpid_mapping(result, m->veid))) {
++ if (virt_pid > 0) {
++ result = -EEXIST;
++ goto out;
++ }
++
++ /* No luck. Now we search for some not-existing vpid.
++ * It is weak place. We do linear search. */
++ do {
++ result++;
++ if (!__is_virtual_pid(result))
++ result += VPID_DIV;
++ if (result >= pid_max)
++ result = RESERVED_PIDS + VPID_DIV;
++ } while (__lookup_vpid_mapping(result, m->veid) != NULL);
++
++ /* And set last_pid in hope future alloc_pidmap to avoid
++ * collisions after future alloc_pidmap() */
++ last_pid = result - VPID_DIV;
++ }
++ if (result > 0) {
++ m->vpid = result;
++ hlist_add_head(&m->link,
++ &vpid_hash[vpid_hashfn(result, m->veid)]);
++ }
++out:
++ write_unlock_irq(&tasklist_lock);
++ if (result < 0)
++ kmem_cache_free(vpid_mapping_cachep, m);
++ return result;
++}
++
++static void __free_vpid(int vpid, struct ve_struct *ve)
++{
++ struct vpid_mapping *m;
++
++ if (!ve->sparse_vpid)
++ return;
++
++ if (!__is_virtual_pid(vpid) && (vpid != 1 || ve_is_super(ve)))
++ return;
++
++ m = __lookup_vpid_mapping(vpid, ve->veid);
++ BUG_ON(m == NULL);
++ hlist_del(&m->link);
++ kmem_cache_free(vpid_mapping_cachep, m);
++}
++EXPORT_SYMBOL(alloc_vpid);
++
++void free_vpid(int vpid, struct ve_struct *ve)
++{
++ write_lock_irq(&tasklist_lock);
++ __free_vpid(vpid, ve);
++ write_unlock_irq(&tasklist_lock);
++}
++#endif
++
+ /*
+ * The pid hash table is scaled according to the amount of memory in the
+ * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
+@@ -273,6 +665,14 @@ void __init pidhash_init(void)
+ for (j = 0; j < pidhash_size; j++)
+ INIT_HLIST_HEAD(&pid_hash[i][j]);
+ }
++
++#ifdef CONFIG_VE
++ vpid_hash = alloc_bootmem(pidhash_size * sizeof(struct hlist_head));
++ if (!vpid_hash)
++ panic("Could not alloc vpid_hash!\n");
++ for (j = 0; j < pidhash_size; j++)
++ INIT_HLIST_HEAD(&vpid_hash[j]);
++#endif
+ }
+
+ void __init pidmap_init(void)
+@@ -289,4 +689,12 @@ void __init pidmap_init(void)
+
+ for (i = 0; i < PIDTYPE_MAX; i++)
+ attach_pid(current, i, 0);
++
++#ifdef CONFIG_VE
++ vpid_mapping_cachep =
++ kmem_cache_create("vpid_mapping",
++ sizeof(struct vpid_mapping),
++ __alignof__(struct vpid_mapping),
++ SLAB_PANIC|SLAB_UBC, NULL, NULL);
++#endif
+ }
+diff -uprN linux-2.6.15.orig/kernel/posix-cpu-timers.c linux-2.6.15-ve025stab014/kernel/posix-cpu-timers.c
+--- linux-2.6.15.orig/kernel/posix-cpu-timers.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/posix-cpu-timers.c 2006-01-27 14:48:08.000000000 +0300
+@@ -20,7 +20,7 @@ static int check_clock(clockid_t which_c
+ return 0;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
+ p->tgid != current->tgid : p->tgid != pid)) {
+ error = -EINVAL;
+@@ -303,7 +303,7 @@ int posix_cpu_clock_get(clockid_t which_
+ */
+ struct task_struct *p;
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ if (p) {
+ if (CPUCLOCK_PERTHREAD(which_clock)) {
+ if (p->tgid == current->tgid) {
+@@ -347,7 +347,7 @@ int posix_cpu_timer_create(struct k_itim
+ if (pid == 0) {
+ p = current;
+ } else {
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ if (p && p->tgid != current->tgid)
+ p = NULL;
+ }
+@@ -355,7 +355,7 @@ int posix_cpu_timer_create(struct k_itim
+ if (pid == 0) {
+ p = current->group_leader;
+ } else {
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ if (p && p->tgid != pid)
+ p = NULL;
+ }
+diff -uprN linux-2.6.15.orig/kernel/posix-timers.c linux-2.6.15-ve025stab014/kernel/posix-timers.c
+--- linux-2.6.15.orig/kernel/posix-timers.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/posix-timers.c 2006-01-27 14:48:08.000000000 +0300
+@@ -31,6 +31,7 @@
+ * POSIX clocks & timers
+ */
+ #include <linux/mm.h>
++#include <linux/module.h>
+ #include <linux/smp_lock.h>
+ #include <linux/interrupt.h>
+ #include <linux/slab.h>
+@@ -48,6 +49,8 @@
+ #include <linux/workqueue.h>
+ #include <linux/module.h>
+
++#include <ub/beancounter.h>
++
+ #ifndef div_long_long_rem
+ #include <asm/div64.h>
+
+@@ -258,7 +261,8 @@ static __init int init_posix_timers(void
+ register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
+
+ posix_timers_cache = kmem_cache_create("posix_timers_cache",
+- sizeof (struct k_itimer), 0, 0, NULL, NULL);
++ sizeof (struct k_itimer), 0,
++ SLAB_UBC, NULL, NULL);
+ idr_init(&posix_timers_id);
+ return 0;
+ }
+@@ -411,6 +415,13 @@ exit:
+
+ int posix_timer_event(struct k_itimer *timr,int si_private)
+ {
++ int ret;
++ struct ve_struct *ve;
++ struct user_beancounter *ub;
++
++ ve = set_exec_env(timr->it_process->ve_task_info.owner_env);
++ ub = set_exec_ub(timr->it_process->task_bc.task_ub);
++
+ memset(&timr->sigq->info, 0, sizeof(siginfo_t));
+ timr->sigq->info.si_sys_private = si_private;
+ /*
+@@ -430,11 +441,11 @@ int posix_timer_event(struct k_itimer *t
+
+ if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
+ struct task_struct *leader;
+- int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
++ ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
+ timr->it_process);
+
+ if (likely(ret >= 0))
+- return ret;
++ goto out;
+
+ timr->it_sigev_notify = SIGEV_SIGNAL;
+ leader = timr->it_process->group_leader;
+@@ -442,8 +453,12 @@ int posix_timer_event(struct k_itimer *t
+ timr->it_process = leader;
+ }
+
+- return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
++ ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
+ timr->it_process);
++out:
++ (void)set_exec_ub(ub);
++ (void)set_exec_env(ve);
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(posix_timer_event);
+
+@@ -518,7 +533,7 @@ static inline struct task_struct * good_
+ struct task_struct *rtn = current->group_leader;
+
+ if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+- (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
++ (!(rtn = find_task_by_pid_ve(event->sigev_notify_thread_id)) ||
+ rtn->tgid != current->tgid ||
+ (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+ return NULL;
+@@ -1219,6 +1234,7 @@ int do_posix_clock_monotonic_gettime(str
+ {
+ return do_posix_clock_monotonic_get(CLOCK_MONOTONIC, tp);
+ }
++EXPORT_SYMBOL(do_posix_clock_monotonic_gettime);
+
+ int do_posix_clock_nosettime(clockid_t clockid, struct timespec *tp)
+ {
+diff -uprN linux-2.6.15.orig/kernel/power/process.c linux-2.6.15-ve025stab014/kernel/power/process.c
+--- linux-2.6.15.orig/kernel/power/process.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/power/process.c 2006-01-27 14:48:08.000000000 +0300
+@@ -67,7 +67,7 @@ int freeze_processes(void)
+ do {
+ todo = 0;
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ if (!freezeable(p))
+ continue;
+ if (frozen(p))
+@@ -78,7 +78,7 @@ int freeze_processes(void)
+ signal_wake_up(p, 0);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ todo++;
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+ read_unlock(&tasklist_lock);
+ yield(); /* Yield is okay here */
+ if (todo && time_after(jiffies, start_time + TIMEOUT)) {
+@@ -95,7 +95,7 @@ int freeze_processes(void)
+ */
+ if (todo) {
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p)
++ do_each_thread_all(g, p)
+ if (freezing(p)) {
+ pr_debug(" clean up: %s\n", p->comm);
+ p->flags &= ~PF_FREEZE;
+@@ -103,7 +103,7 @@ int freeze_processes(void)
+ recalc_sigpending_tsk(p);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ }
+- while_each_thread(g, p);
++ while_each_thread_all(g, p);
+ read_unlock(&tasklist_lock);
+ return todo;
+ }
+@@ -119,12 +119,12 @@ void thaw_processes(void)
+
+ printk( "Restarting tasks..." );
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ if (!freezeable(p))
+ continue;
+ if (!thaw_process(p))
+ printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+
+ read_unlock(&tasklist_lock);
+ schedule();
+diff -uprN linux-2.6.15.orig/kernel/power/swsusp.c linux-2.6.15-ve025stab014/kernel/power/swsusp.c
+--- linux-2.6.15.orig/kernel/power/swsusp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/power/swsusp.c 2006-01-27 14:48:06.000000000 +0300
+@@ -71,6 +71,9 @@
+ #include <linux/crypto.h>
+ #include <asm/scatterlist.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
+ #include "power.h"
+
+ #ifdef CONFIG_HIGHMEM
+@@ -361,7 +364,7 @@ static int write_page(unsigned long addr
+ swp_entry_t entry;
+ int error = 0;
+
+- entry = get_swap_page();
++ entry = get_swap_page(get_ub0());
+ if (swp_offset(entry) &&
+ swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
+ error = rw_swap_page_sync(WRITE, entry,
+diff -uprN linux-2.6.15.orig/kernel/printk.c linux-2.6.15-ve025stab014/kernel/printk.c
+--- linux-2.6.15.orig/kernel/printk.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/printk.c 2006-01-27 14:48:08.000000000 +0300
+@@ -30,6 +30,7 @@
+ #include <linux/smp.h>
+ #include <linux/security.h>
+ #include <linux/bootmem.h>
++#include <linux/vzratelimit.h>
+ #include <linux/syscalls.h>
+
+ #include <asm/uaccess.h>
+@@ -83,7 +84,7 @@ static int console_locked;
+ * It is also used in interesting ways to provide interlocking in
+ * release_console_sem().
+ */
+-static DEFINE_SPINLOCK(logbuf_lock);
++DEFINE_SPINLOCK(logbuf_lock);
+
+ #define LOG_BUF_MASK (log_buf_len-1)
+ #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
+@@ -122,6 +123,31 @@ static char *log_buf = __log_buf;
+ static int log_buf_len = __LOG_BUF_LEN;
+ static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */
+
++#ifdef CONFIG_VE
++
++#define ve_log_wait (*(get_exec_env()->_log_wait))
++#define ve_log_start (*(get_exec_env()->_log_start))
++#define ve_log_end (*(get_exec_env()->_log_end))
++#define ve_logged_chars (*(get_exec_env()->_logged_chars))
++#define ve_log_buf (get_exec_env()->log_buf)
++#define ve_log_buf_len (ve_is_super(get_exec_env()) ? \
++ log_buf_len : VE_DEFAULT_LOG_BUF_LEN)
++#define VE_LOG_BUF_MASK (ve_log_buf_len - 1)
++#define VE_LOG_BUF(idx) (ve_log_buf[(idx) & VE_LOG_BUF_MASK])
++
++#else
++
++#define ve_log_wait log_wait
++#define ve_log_start log_start
++#define ve_log_end log_end
++#define ve_logged_chars logged_chars
++#define ve_log_buf log_buf
++#define ve_log_buf_len log_buf_len
++#define VE_LOG_BUF_MASK LOG_BUF_MASK
++#define VE_LOG_BUF(idx) LOG_BUF(idx)
++
++#endif /* CONFIG_VE */
++
+ /*
+ * Setup a list of consoles. Called from init/main.c
+ */
+@@ -179,18 +205,18 @@ static int __init log_buf_len_setup(char
+
+ spin_lock_irqsave(&logbuf_lock, flags);
+ log_buf_len = size;
+- log_buf = new_log_buf;
++ ve_log_buf = new_log_buf;
+
+- offset = start = min(con_start, log_start);
++ offset = start = min(con_start, ve_log_start);
+ dest_idx = 0;
+- while (start != log_end) {
+- log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
++ while (start != ve_log_end) {
++ ve_log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
+ start++;
+ dest_idx++;
+ }
+- log_start -= offset;
++ ve_log_start -= offset;
+ con_start -= offset;
+- log_end -= offset;
++ ve_log_end -= offset;
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+
+ printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len);
+@@ -223,6 +249,10 @@ int do_syslog(int type, char __user *buf
+ char c;
+ int error = 0;
+
++ if (!ve_is_super(get_exec_env()) &&
++ (type == 6 || type == 7 || type == 8))
++ goto out;
++
+ error = security_syslog(type);
+ if (error)
+ return error;
+@@ -243,15 +273,15 @@ int do_syslog(int type, char __user *buf
+ error = -EFAULT;
+ goto out;
+ }
+- error = wait_event_interruptible(log_wait,
+- (log_start - log_end));
++ error = wait_event_interruptible(ve_log_wait,
++ (ve_log_start - ve_log_end));
+ if (error)
+ goto out;
+ i = 0;
+ spin_lock_irq(&logbuf_lock);
+- while (!error && (log_start != log_end) && i < len) {
+- c = LOG_BUF(log_start);
+- log_start++;
++ while (!error && (ve_log_start != ve_log_end) && i < len) {
++ c = VE_LOG_BUF(ve_log_start);
++ ve_log_start++;
+ spin_unlock_irq(&logbuf_lock);
+ error = __put_user(c,buf);
+ buf++;
+@@ -277,15 +307,17 @@ int do_syslog(int type, char __user *buf
+ error = -EFAULT;
+ goto out;
+ }
++ if (ve_log_buf == NULL)
++ goto out;
+ count = len;
+- if (count > log_buf_len)
+- count = log_buf_len;
++ if (count > ve_log_buf_len)
++ count = ve_log_buf_len;
+ spin_lock_irq(&logbuf_lock);
+- if (count > logged_chars)
+- count = logged_chars;
++ if (count > ve_logged_chars)
++ count = ve_logged_chars;
+ if (do_clear)
+- logged_chars = 0;
+- limit = log_end;
++ ve_logged_chars = 0;
++ limit = ve_log_end;
+ /*
+ * __put_user() could sleep, and while we sleep
+ * printk() could overwrite the messages
+@@ -294,9 +326,9 @@ int do_syslog(int type, char __user *buf
+ */
+ for (i = 0; i < count && !error; i++) {
+ j = limit-1-i;
+- if (j + log_buf_len < log_end)
++ if (j + ve_log_buf_len < ve_log_end)
+ break;
+- c = LOG_BUF(j);
++ c = VE_LOG_BUF(j);
+ spin_unlock_irq(&logbuf_lock);
+ error = __put_user(c,&buf[count-1-i]);
+ cond_resched();
+@@ -320,7 +352,7 @@ int do_syslog(int type, char __user *buf
+ }
+ break;
+ case 5: /* Clear ring buffer */
+- logged_chars = 0;
++ ve_logged_chars = 0;
+ break;
+ case 6: /* Disable logging to console */
+ console_loglevel = minimum_console_loglevel;
+@@ -338,10 +370,10 @@ int do_syslog(int type, char __user *buf
+ error = 0;
+ break;
+ case 9: /* Number of chars in the log buffer */
+- error = log_end - log_start;
++ error = ve_log_end - ve_log_start;
+ break;
+ case 10: /* Size of the log buffer */
+- error = log_buf_len;
++ error = ve_log_buf_len;
+ break;
+ default:
+ error = -EINVAL;
+@@ -365,7 +397,7 @@ static void __call_console_drivers(unsig
+
+ for (con = console_drivers; con; con = con->next) {
+ if ((con->flags & CON_ENABLED) && con->write)
+- con->write(con, &LOG_BUF(start), end - start);
++ con->write(con, &VE_LOG_BUF(start), end - start);
+ }
+ }
+
+@@ -377,11 +409,11 @@ static void _call_console_drivers(unsign
+ {
+ if (msg_log_level < console_loglevel &&
+ console_drivers && start != end) {
+- if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
++ if ((start & VE_LOG_BUF_MASK) > (end & VE_LOG_BUF_MASK)) {
+ /* wrapped write */
+- __call_console_drivers(start & LOG_BUF_MASK,
+- log_buf_len);
+- __call_console_drivers(0, end & LOG_BUF_MASK);
++ __call_console_drivers(start & VE_LOG_BUF_MASK,
++ ve_log_buf_len);
++ __call_console_drivers(0, end & VE_LOG_BUF_MASK);
+ } else {
+ __call_console_drivers(start, end);
+ }
+@@ -405,16 +437,16 @@ static void call_console_drivers(unsigne
+ start_print = start;
+ while (cur_index != end) {
+ if (msg_level < 0 && ((end - cur_index) > 2) &&
+- LOG_BUF(cur_index + 0) == '<' &&
+- LOG_BUF(cur_index + 1) >= '0' &&
+- LOG_BUF(cur_index + 1) <= '7' &&
+- LOG_BUF(cur_index + 2) == '>') {
+- msg_level = LOG_BUF(cur_index + 1) - '0';
++ VE_LOG_BUF(cur_index + 0) == '<' &&
++ VE_LOG_BUF(cur_index + 1) >= '0' &&
++ VE_LOG_BUF(cur_index + 1) <= '7' &&
++ VE_LOG_BUF(cur_index + 2) == '>') {
++ msg_level = VE_LOG_BUF(cur_index + 1) - '0';
+ cur_index += 3;
+ start_print = cur_index;
+ }
+ while (cur_index != end) {
+- char c = LOG_BUF(cur_index);
++ char c = VE_LOG_BUF(cur_index);
+
+ cur_index++;
+ if (c == '\n') {
+@@ -439,14 +471,14 @@ static void call_console_drivers(unsigne
+
+ static void emit_log_char(char c)
+ {
+- LOG_BUF(log_end) = c;
+- log_end++;
+- if (log_end - log_start > log_buf_len)
+- log_start = log_end - log_buf_len;
+- if (log_end - con_start > log_buf_len)
+- con_start = log_end - log_buf_len;
+- if (logged_chars < log_buf_len)
+- logged_chars++;
++ VE_LOG_BUF(ve_log_end) = c;
++ ve_log_end++;
++ if (ve_log_end - ve_log_start > ve_log_buf_len)
++ ve_log_start = ve_log_end - ve_log_buf_len;
++ if (ve_is_super(get_exec_env()) && ve_log_end - con_start > ve_log_buf_len)
++ con_start = ve_log_end - ve_log_buf_len;
++ if (ve_logged_chars < ve_log_buf_len)
++ ve_logged_chars++;
+ }
+
+ /*
+@@ -511,18 +543,68 @@ __attribute__((weak)) unsigned long long
+ * printf(3)
+ */
+
++static inline int ve_log_init(void)
++{
++#ifdef CONFIG_VE
++ if (ve_log_buf != NULL)
++ return 0;
++
++ if (ve_is_super(get_exec_env())) {
++ ve0._log_wait = &log_wait;
++ ve0._log_start = &log_start;
++ ve0._log_end = &log_end;
++ ve0._logged_chars = &logged_chars;
++ ve0.log_buf = log_buf;
++ return 0;
++ }
++
++ ve_log_buf = kmalloc(ve_log_buf_len, GFP_ATOMIC);
++ if (!ve_log_buf)
++ return -ENOMEM;
++
++ memset(ve_log_buf, 0, ve_log_buf_len);
++#endif
++ return 0;
++}
++
+ asmlinkage int printk(const char *fmt, ...)
+ {
+ va_list args;
+ int r;
++ struct ve_struct *ve;
+
+ va_start(args, fmt);
++ ve = set_exec_env(get_ve0());
+ r = vprintk(fmt, args);
++ (void)set_exec_env(ve);
+ va_end(args);
+
+ return r;
+ }
+
++asmlinkage int ve_printk(int dst, const char *fmt, ...)
++{
++ va_list args;
++ int printed_len;
++
++ printed_len = 0;
++ if (ve_is_super(get_exec_env()) || (dst & VE0_LOG)) {
++ struct ve_struct *env;
++ va_start(args, fmt);
++ env = set_exec_env(get_ve0());
++ printed_len = vprintk(fmt, args);
++ (void)set_exec_env(env);
++ va_end(args);
++ }
++ if (!ve_is_super(get_exec_env()) && (dst & VE_LOG)) {
++ va_start(args, fmt);
++ printed_len = vprintk(fmt, args);
++ va_end(args);
++ }
++ return printed_len;
++}
++EXPORT_SYMBOL(ve_printk);
++
+ /* cpu currently holding logbuf_lock */
+ static volatile unsigned int printk_cpu = UINT_MAX;
+
+@@ -533,6 +615,7 @@ asmlinkage int vprintk(const char *fmt,
+ char *p;
+ static char printk_buf[1024];
+ static int log_level_unknown = 1;
++ int err, need_wake;
+
+ preempt_disable();
+ if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
+@@ -544,6 +627,12 @@ asmlinkage int vprintk(const char *fmt,
+ spin_lock_irqsave(&logbuf_lock, flags);
+ printk_cpu = smp_processor_id();
+
++ err = ve_log_init();
++ if (err) {
++ spin_unlock_irqrestore(&logbuf_lock, flags);
++ return err;
++ }
++
+ /* Emit the output into the temporary buffer */
+ printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+
+@@ -615,7 +704,12 @@ asmlinkage int vprintk(const char *fmt,
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+ goto out;
+ }
+- if (!down_trylock(&console_sem)) {
++ if (!ve_is_super(get_exec_env())) {
++ need_wake = (ve_log_start != ve_log_end);
++ spin_unlock_irqrestore(&logbuf_lock, flags);
++ if (!oops_in_progress && need_wake)
++ wake_up_interruptible(&ve_log_wait);
++ } else if (!down_trylock(&console_sem)) {
+ console_locked = 1;
+ /*
+ * We own the drivers. We can drop the spinlock and let
+@@ -754,12 +848,12 @@ void release_console_sem(void)
+
+ for ( ; ; ) {
+ spin_lock_irqsave(&logbuf_lock, flags);
+- wake_klogd |= log_start - log_end;
+- if (con_start == log_end)
++ wake_klogd |= ve_log_start - ve_log_end;
++ if (con_start == ve_log_end)
+ break; /* Nothing to print */
+ _con_start = con_start;
+- _log_end = log_end;
+- con_start = log_end; /* Flush */
++ _log_end = ve_log_end;
++ con_start = ve_log_end; /* Flush */
+ spin_unlock(&logbuf_lock);
+ call_console_drivers(_con_start, _log_end);
+ local_irq_restore(flags);
+@@ -768,8 +862,8 @@ void release_console_sem(void)
+ console_may_schedule = 0;
+ up(&console_sem);
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+- if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
+- wake_up_interruptible(&log_wait);
++ if (wake_klogd && !oops_in_progress && waitqueue_active(&ve_log_wait))
++ wake_up_interruptible(&ve_log_wait);
+ }
+ EXPORT_SYMBOL(release_console_sem);
+
+@@ -940,7 +1034,7 @@ void register_console(struct console *co
+ * for us.
+ */
+ spin_lock_irqsave(&logbuf_lock, flags);
+- con_start = log_start;
++ con_start = ve_log_start;
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+ }
+ release_console_sem();
+@@ -1049,3 +1143,33 @@ int printk_ratelimit(void)
+ printk_ratelimit_burst);
+ }
+ EXPORT_SYMBOL(printk_ratelimit);
++
++/*
++ * Rate limiting stuff.
++ */
++int vz_ratelimit(struct vz_rate_info *p)
++{
++ unsigned long cjif, djif;
++ unsigned long flags;
++ static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
++ long new_bucket;
++
++ spin_lock_irqsave(&ratelimit_lock, flags);
++ cjif = jiffies;
++ djif = cjif - p->last;
++ if (djif < p->interval) {
++ if (p->bucket >= p->burst) {
++ spin_unlock_irqrestore(&ratelimit_lock, flags);
++ return 0;
++ }
++ p->bucket++;
++ } else {
++ new_bucket = p->bucket - (djif / (unsigned)p->interval);
++ if (new_bucket < 0)
++ new_bucket = 0;
++ p->bucket = new_bucket + 1;
++ }
++ p->last = cjif;
++ spin_unlock_irqrestore(&ratelimit_lock, flags);
++ return 1;
++}
+diff -uprN linux-2.6.15.orig/kernel/ptrace.c linux-2.6.15-ve025stab014/kernel/ptrace.c
+--- linux-2.6.15.orig/kernel/ptrace.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ptrace.c 2006-01-27 14:48:08.000000000 +0300
+@@ -135,7 +135,10 @@ static int may_attach(struct task_struct
+ smp_rmb();
+ if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+ return -EPERM;
+-
++ if (!task->mm->vps_dumpable && !ve_is_super(get_exec_env()))
++ return -EPERM;
++ if (!ve_accessible(VE_TASK_INFO(task)->owner_env, get_exec_env()))
++ return -EPERM;
+ return security_ptrace(current, task);
+ }
+
+@@ -445,7 +448,7 @@ static int ptrace_get_task_struct(long r
+
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+diff -uprN linux-2.6.15.orig/kernel/sched.c linux-2.6.15-ve025stab014/kernel/sched.c
+--- linux-2.6.15.orig/kernel/sched.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/sched.c 2006-01-27 14:48:08.000000000 +0300
+@@ -219,6 +219,9 @@ struct runqueue {
+ */
+ unsigned long nr_uninterruptible;
+
++ unsigned long nr_sleeping;
++ unsigned long nr_stopped;
++
+ unsigned long expired_timestamp;
+ unsigned long long timestamp_last_tick;
+ task_t *curr, *idle;
+@@ -283,6 +286,11 @@ for (domain = rcu_dereference(cpu_rq(cpu
+ # define finish_arch_switch(prev) do { } while (0)
+ #endif
+
++struct kernel_stat_glob kstat_glob;
++spinlock_t kstat_glb_lock = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(kstat_glob);
++EXPORT_SYMBOL(kstat_glb_lock);
++
+ #ifndef __ARCH_WANT_UNLOCKED_CTXSW
+ static inline int task_running(runqueue_t *rq, task_t *p)
+ {
+@@ -373,6 +381,185 @@ static inline void task_rq_unlock(runque
+ spin_unlock_irqrestore(&rq->lock, *flags);
+ }
+
++#ifdef CONFIG_VE
++#define ve_nr_iowait_inc(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_iowait++; \
++ } while(0)
++#define ve_nr_iowait_dec(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_iowait--; \
++ } while(0)
++#define ve_nr_unint_inc(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_unint++; \
++ } while(0)
++#define ve_nr_unint_dec(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_unint--; \
++ } while(0)
++
++#define cycles_after(a, b) ((long long)(b) - (long long)(a) < 0)
++
++cycles_t ve_sched_get_idle_time(struct ve_struct *ve, int cpu)
++{
++ struct ve_cpu_stats *ve_stat;
++ unsigned v;
++ cycles_t strt, ret, cycles;
++
++ ve_stat = VE_CPU_STATS(ve, cpu);
++ do {
++ v = read_seqcount_begin(&ve_stat->stat_lock);
++ ret = ve_stat->idle_time;
++ strt = ve_stat->strt_idle_time;
++ if (strt && nr_uninterruptible_ve(ve) == 0) {
++ cycles = get_cycles();
++ if (cycles_after(cycles, strt))
++ ret += cycles - strt;
++ }
++ } while (read_seqcount_retry(&ve_stat->stat_lock, v));
++ return ret;
++}
++EXPORT_SYMBOL(ve_sched_get_idle_time);
++
++cycles_t ve_sched_get_iowait_time(struct ve_struct *ve, int cpu)
++{
++ struct ve_cpu_stats *ve_stat;
++ unsigned v;
++ cycles_t strt, ret, cycles;
++
++ ve_stat = VE_CPU_STATS(ve, cpu);
++ do {
++ v = read_seqcount_begin(&ve_stat->stat_lock);
++ ret = ve_stat->iowait_time;
++ strt = ve_stat->strt_idle_time;
++ if (strt && nr_uninterruptible_ve(ve) > 0) {
++ cycles = get_cycles();
++ if (cycles_after(cycles, strt))
++ ret += cycles - strt;
++ }
++ } while (read_seqcount_retry(&ve_stat->stat_lock, v));
++ return ret;
++}
++
++EXPORT_SYMBOL(ve_sched_get_iowait_time);
++
++static inline void ve_stop_idle(struct ve_struct *ve,
++ unsigned int cpu, cycles_t cycles)
++{
++ struct ve_cpu_stats *ve_stat;
++
++ ve_stat = VE_CPU_STATS(ve, cpu);
++
++ write_seqcount_begin(&ve_stat->stat_lock);
++ if (ve_stat->strt_idle_time) {
++ if (cycles_after(cycles, ve_stat->strt_idle_time)) {
++ if (nr_uninterruptible_ve(ve) == 0)
++ ve_stat->idle_time += cycles -
++ ve_stat->strt_idle_time;
++ else
++ ve_stat->iowait_time += cycles -
++ ve_stat->strt_idle_time;
++ }
++ ve_stat->strt_idle_time = 0;
++ }
++ write_seqcount_end(&ve_stat->stat_lock);
++}
++
++static inline void ve_strt_idle(struct ve_struct *ve,
++ unsigned int cpu, cycles_t cycles)
++{
++ struct ve_cpu_stats *ve_stat;
++
++ ve_stat = VE_CPU_STATS(ve, cpu);
++
++ write_seqcount_begin(&ve_stat->stat_lock);
++ ve_stat->strt_idle_time = cycles;
++ write_seqcount_end(&ve_stat->stat_lock);
++}
++
++#define ve_nr_running_inc(env, cpu, cycles) do { \
++ if (++VE_CPU_STATS((env), (cpu))->nr_running == 1) \
++ ve_stop_idle(env, cpu, cycles); \
++ } while (0)
++#define ve_nr_running_dec(env, cpu, cyclses) do { \
++ if (--VE_CPU_STATS((env), (cpu))->nr_running == 0) \
++ ve_strt_idle(env, cpu, cycles); \
++ } while (0)
++
++void ve_sched_attach(struct ve_struct *envid)
++{
++ struct task_struct *tsk;
++ unsigned int cpu;
++ cycles_t cycles;
++
++ tsk = current;
++ preempt_disable();
++ cycles = get_cycles();
++ cpu = task_cpu(tsk);
++ ve_nr_running_dec(VE_TASK_INFO(tsk)->owner_env, cpu, cycles);
++ ve_nr_running_inc(envid, cpu, cycles);
++ preempt_enable();
++}
++EXPORT_SYMBOL(ve_sched_attach);
++
++static inline void write_wakeup_stamp(struct task_struct *p, cycles_t cyc)
++{
++ struct ve_task_info *ti;
++
++ ti = VE_TASK_INFO(p);
++ write_seqcount_begin(&ti->wakeup_lock);
++ ti->wakeup_stamp = cyc;
++ write_seqcount_end(&ti->wakeup_lock);
++}
++
++static inline void update_sched_lat(struct task_struct *t, cycles_t cycles)
++{
++ int cpu;
++ cycles_t ve_wstamp;
++
++ /* safe due to runqueue lock */
++ cpu = smp_processor_id();
++ ve_wstamp = t->ve_task_info.wakeup_stamp;
++
++ if (ve_wstamp && cycles > ve_wstamp) {
++ KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat,
++ cpu, cycles - ve_wstamp);
++ KSTAT_LAT_PCPU_ADD(&t->ve_task_info.exec_env->sched_lat_ve,
++ cpu, cycles - ve_wstamp);
++ }
++}
++
++static inline void update_ve_task_info(task_t *prev, cycles_t cycles)
++{
++#ifdef CONFIG_FAIRSCHED
++ if (prev != this_pcpu()->idle) {
++#else
++ if (prev != this_rq()->idle) {
++#endif
++ VE_CPU_STATS(prev->ve_task_info.owner_env,
++ smp_processor_id())->used_time +=
++ cycles - prev->ve_task_info.sched_time;
++
++ prev->ve_task_info.sched_time = cycles;
++ }
++}
++
++#else
++#define ve_nr_running_inc(env, cpu, cycles) do { } while(0)
++#define ve_nr_running_dec(env, cpu, cycles) do { } while(0)
++#define ve_nr_iowait_inc(env, cpu) do { } while(0)
++#define ve_nr_iowait_dec(env, cpu) do { } while(0)
++#define ve_nr_unint_inc(env, cpu) do { } while(0)
++#define ve_nr_unint_dec(env, cpu) do { } while(0)
++#define update_ve_task_info(prev, cycles) do { } while (0)
++#endif
++
++unsigned long nr_zombie = 0; /* protected by tasklist_lock */
++unsigned long nr_dead = 0;
++EXPORT_SYMBOL(nr_zombie);
++EXPORT_SYMBOL(nr_dead);
++
+ #ifdef CONFIG_SCHEDSTATS
+ /*
+ * bump this up when changing the output format or the meaning of an existing
+@@ -671,7 +858,7 @@ static inline void dec_prio_bias(runqueu
+ rq->prio_bias -= MAX_PRIO - prio;
+ }
+
+-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
++static inline void inc_nr_running(task_t *p, runqueue_t *rq, cycles_t cycles)
+ {
+ rq->nr_running++;
+ if (rt_task(p)) {
+@@ -684,9 +871,11 @@ static inline void inc_nr_running(task_t
+ inc_prio_bias(rq, p->prio);
+ } else
+ inc_prio_bias(rq, p->static_prio);
++
++ ve_nr_running_inc(p->ve_task_info.owner_env, task_cpu(p), cycles);
+ }
+
+-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
++static inline void dec_nr_running(task_t *p, runqueue_t *rq, cycles_t cycles)
+ {
+ rq->nr_running--;
+ if (rt_task(p)) {
+@@ -694,6 +883,8 @@ static inline void dec_nr_running(task_t
+ dec_prio_bias(rq, p->prio);
+ } else
+ dec_prio_bias(rq, p->static_prio);
++
++ ve_nr_running_dec(p->ve_task_info.owner_env, task_cpu(p), cycles);
+ }
+ #else
+ static inline void inc_prio_bias(runqueue_t *rq, int prio)
+@@ -704,14 +895,16 @@ static inline void dec_prio_bias(runqueu
+ {
+ }
+
+-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
++static inline void inc_nr_running(task_t *p, runqueue_t *rq, cycles_t cycles)
+ {
+ rq->nr_running++;
++ ve_nr_running_inc(p->ve_task_info.owner_env, task_cpu(p), cycles);
+ }
+
+-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
++static inline void dec_nr_running(task_t *p, runqueue_t *rq, cycles_t cycles)
+ {
+ rq->nr_running--;
++ ve_nr_running_dec(p->ve_task_info.owner_env, task_cpu(p), cycles);
+ }
+ #endif
+
+@@ -720,8 +913,15 @@ static inline void dec_nr_running(task_t
+ */
+ static inline void __activate_task(task_t *p, runqueue_t *rq)
+ {
++ cycles_t cycles;
++
++#ifdef CONFIG_VE
++ cycles = get_cycles();
++ write_wakeup_stamp(p, cycles);
++ p->ve_task_info.sleep_time += cycles;
++#endif
+ enqueue_task(p, rq->active);
+- inc_nr_running(p, rq);
++ inc_nr_running(p, rq, cycles);
+ }
+
+ /*
+@@ -730,7 +930,7 @@ static inline void __activate_task(task_
+ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
+ {
+ enqueue_task_head(p, rq->active);
+- inc_nr_running(p, rq);
++ inc_nr_running(p, rq, 0);
+ }
+
+ static int recalc_task_prio(task_t *p, unsigned long long now)
+@@ -850,7 +1050,25 @@ static void activate_task(task_t *p, run
+ */
+ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
+ {
+- dec_nr_running(p, rq);
++ cycles_t cycles;
++#ifdef CONFIG_VE
++ unsigned int cpu;
++ struct ve_struct *ve;
++
++ cycles = get_cycles();
++ cpu = task_cpu(p);
++ ve = p->ve_task_info.owner_env;
++
++ p->ve_task_info.sleep_time -= cycles;
++#endif
++ if (p->state == TASK_UNINTERRUPTIBLE)
++ ve_nr_unint_inc(ve, cpu);
++ if (p->state == TASK_INTERRUPTIBLE)
++ rq->nr_sleeping++;
++ if (p->state == TASK_STOPPED)
++ rq->nr_stopped++;
++
++ dec_nr_running(p, rq, cycles);
+ dequeue_task(p, p->array);
+ p->array = NULL;
+ }
+@@ -1353,7 +1571,13 @@ out_set_cpu:
+
+ out_activate:
+ #endif /* CONFIG_SMP */
+- if (old_state == TASK_UNINTERRUPTIBLE) {
++ if (old_state == TASK_INTERRUPTIBLE)
++ rq->nr_sleeping--;
++ else if (old_state == TASK_STOPPED)
++ rq->nr_stopped--;
++ else if (old_state == TASK_UNINTERRUPTIBLE) {
++ ve_nr_unint_dec(p->ve_task_info.owner_env,
++ smp_processor_id());
+ rq->nr_uninterruptible--;
+ /*
+ * Tasks on involuntary sleep don't earn
+@@ -1453,6 +1677,10 @@ void fastcall sched_fork(task_t *p, int
+ p->first_time_slice = 1;
+ current->time_slice >>= 1;
+ p->timestamp = sched_clock();
++#ifdef CONFIG_VE
++ /*cosmetic: sleep till wakeup below*/
++ p->ve_task_info.sleep_time -= get_cycles();
++#endif
+ if (unlikely(!current->time_slice)) {
+ /*
+ * This case is rare, it happens when the parent has only
+@@ -1509,7 +1737,7 @@ void fastcall wake_up_new_task(task_t *p
+ list_add_tail(&p->run_list, &current->run_list);
+ p->array = current->array;
+ p->array->nr_active++;
+- inc_nr_running(p, rq);
++ inc_nr_running(p, rq, get_cycles());
+ }
+ set_need_resched();
+ } else
+@@ -1653,7 +1881,7 @@ asmlinkage void schedule_tail(task_t *pr
+ preempt_enable();
+ #endif
+ if (current->set_child_tid)
+- put_user(current->pid, current->set_child_tid);
++ put_user(virt_pid(current), current->set_child_tid);
+ }
+
+ /*
+@@ -1701,6 +1929,7 @@ unsigned long nr_running(void)
+
+ return sum;
+ }
++EXPORT_SYMBOL(nr_running);
+
+ unsigned long nr_uninterruptible(void)
+ {
+@@ -1719,6 +1948,8 @@ unsigned long nr_uninterruptible(void)
+ return sum;
+ }
+
++EXPORT_SYMBOL(nr_uninterruptible);
++
+ unsigned long long nr_context_switches(void)
+ {
+ unsigned long long i, sum = 0;
+@@ -1729,6 +1960,8 @@ unsigned long long nr_context_switches(v
+ return sum;
+ }
+
++EXPORT_SYMBOL(nr_context_switches);
++
+ unsigned long nr_iowait(void)
+ {
+ unsigned long i, sum = 0;
+@@ -1739,6 +1972,79 @@ unsigned long nr_iowait(void)
+ return sum;
+ }
+
++EXPORT_SYMBOL(nr_iowait);
++
++unsigned long nr_stopped(void)
++{
++ unsigned long i, sum = 0;
++
++ for_each_cpu(i)
++ sum += cpu_rq(i)->nr_stopped;
++
++ return sum;
++}
++
++EXPORT_SYMBOL(nr_stopped);
++
++unsigned long nr_sleeping(void)
++{
++ unsigned long i, sum = 0;
++
++ for_each_cpu(i)
++ sum += cpu_rq(i)->nr_sleeping;
++
++ return sum;
++}
++
++EXPORT_SYMBOL(nr_sleeping);
++
++#ifdef CONFIG_VE
++unsigned long nr_running_ve(struct ve_struct *ve)
++{
++ int i;
++ long sum;
++ cpumask_t ve_cpus;
++
++ sum = 0;
++ ve_cpu_online_map(ve, &ve_cpus);
++ for_each_cpu_mask(i, ve_cpus)
++ sum += VE_CPU_STATS(ve, i)->nr_running;
++ return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++EXPORT_SYMBOL(nr_running_ve);
++
++unsigned long nr_uninterruptible_ve(struct ve_struct *ve)
++{
++ int i;
++ long sum;
++ cpumask_t ve_cpus;
++
++ sum = 0;
++ ve_cpu_online_map(ve, &ve_cpus);
++ for_each_cpu_mask(i, ve_cpus)
++ sum += VE_CPU_STATS(ve, i)->nr_unint;
++ return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++EXPORT_SYMBOL(nr_uninterruptible_ve);
++
++unsigned long nr_iowait_ve(struct ve_struct *ve)
++{
++ int i;
++ long sum;
++ cpumask_t ve_cpus;
++
++ sum = 0;
++ ve_cpu_online_map(ve, &ve_cpus);
++ for_each_cpu_mask(i, ve_cpus)
++ sum += VE_CPU_STATS(ve, i)->nr_iowait;
++ return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++EXPORT_SYMBOL(nr_iowait_ve);
++#endif
++
+ #ifdef CONFIG_SMP
+
+ /*
+@@ -1853,10 +2159,14 @@ static inline
+ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
+ runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
+ {
++ cycles_t cycles;
++
++ cycles = get_cycles();
++
+ dequeue_task(p, src_array);
+- dec_nr_running(p, src_rq);
++ dec_nr_running(p, src_rq, cycles);
+ set_task_cpu(p, this_cpu);
+- inc_nr_running(p, this_rq);
++ inc_nr_running(p, this_rq, cycles);
+ enqueue_task(p, this_array);
+ p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
+ + this_rq->timestamp_last_tick;
+@@ -2560,6 +2870,15 @@ unsigned long long current_sched_time(co
+ STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
+ ((rq)->curr->static_prio > (rq)->best_expired_prio))
+
++#ifdef CONFIG_VE
++#define update_ve_cpu_time(p, time, tick) do { \
++ VE_CPU_STATS((p)->ve_task_info.owner_env, \
++ task_cpu(p))->time += tick; \
++ } while (0)
++#else
++#define update_ve_cpu_time(p, time, tick) do { } while (0)
++#endif
++
+ /*
+ * Account user cpu time to a process.
+ * @p: the process that the cpu time gets accounted to
+@@ -2575,10 +2894,13 @@ void account_user_time(struct task_struc
+
+ /* Add user time to cpustat. */
+ tmp = cputime_to_cputime64(cputime);
+- if (TASK_NICE(p) > 0)
++ if (TASK_NICE(p) > 0) {
+ cpustat->nice = cputime64_add(cpustat->nice, tmp);
+- else
++ update_ve_cpu_time(p, nice, tmp);
++ } else {
+ cpustat->user = cputime64_add(cpustat->user, tmp);
++ update_ve_cpu_time(p, user, tmp);
++ }
+ }
+
+ /*
+@@ -2595,9 +2917,11 @@ void account_system_time(struct task_str
+ cputime64_t tmp;
+
+ p->stime = cputime_add(p->stime, cputime);
++ tmp = cputime_to_cputime64(cputime);
++
++ update_ve_cpu_time(p, system, tmp);
+
+ /* Add system time to cpustat. */
+- tmp = cputime_to_cputime64(cputime);
+ if (hardirq_count() - hardirq_offset)
+ cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ else if (softirq_count())
+@@ -3099,11 +3423,30 @@ switch_tasks:
+
+ sched_info_switch(prev, next);
+ if (likely(prev != next)) {
++ cycles_t cycles;
++
++ cycles = get_cycles();
+ next->timestamp = now;
+ rq->nr_switches++;
+ rq->curr = next;
+ ++*switch_count;
+
++#ifdef CONFIG_VE
++ prev->ve_task_info.sleep_stamp = cycles;
++ if (prev->state == TASK_RUNNING && prev != this_rq()->idle)
++ write_wakeup_stamp(prev, cycles);
++ update_sched_lat(next, cycles);
++
++ /* because next & prev are protected with
++ * runqueue lock we may not worry about
++ * wakeup_stamp and sched_time protection
++ * (same thing in 'else' branch below)
++ */
++ update_ve_task_info(prev, cycles);
++ next->ve_task_info.sched_time = cycles;
++ write_wakeup_stamp(next, 0);
++#endif
++
+ prepare_task_switch(rq, next);
+ prev = context_switch(rq, prev, next);
+ barrier();
+@@ -3113,8 +3456,10 @@ switch_tasks:
+ * frame will be invalid.
+ */
+ finish_task_switch(this_rq(), prev);
+- } else
++ } else {
++ update_ve_task_info(prev, get_cycles());
+ spin_unlock_irq(&rq->lock);
++ }
+
+ prev = current;
+ if (unlikely(reacquire_kernel_lock(prev) < 0))
+@@ -3680,7 +4025,7 @@ task_t *idle_task(int cpu)
+ */
+ static inline task_t *find_process_by_pid(pid_t pid)
+ {
+- return pid ? find_task_by_pid(pid) : current;
++ return pid ? find_task_by_pid_ve(pid) : current;
+ }
+
+ /* Actually do priority change: must hold rq lock. */
+@@ -3732,7 +4077,7 @@ recheck:
+ /*
+ * Allow unprivileged RT tasks to decrease priority:
+ */
+- if (!capable(CAP_SYS_NICE)) {
++ if (!capable(CAP_SYS_ADMIN)) {
+ /* can't change policy */
+ if (policy != p->policy &&
+ !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
+@@ -4181,8 +4526,15 @@ void __sched io_schedule(void)
+ {
+ struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+
++#ifdef CONFIG_VE
++ struct ve_struct *ve;
++ ve = current->ve_task_info.owner_env;
++#endif
++
+ atomic_inc(&rq->nr_iowait);
++ ve_nr_iowait_inc(ve, smp_processor_id());
+ schedule();
++ ve_nr_iowait_dec(ve, smp_processor_id());
+ atomic_dec(&rq->nr_iowait);
+ }
+
+@@ -4193,8 +4545,15 @@ long __sched io_schedule_timeout(long ti
+ struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+ long ret;
+
++#ifdef CONFIG_VE
++ struct ve_struct *ve;
++ ve = current->ve_task_info.owner_env;
++#endif
++
+ atomic_inc(&rq->nr_iowait);
++ ve_nr_iowait_inc(ve, smp_processor_id());
+ ret = schedule_timeout(timeout);
++ ve_nr_iowait_dec(ve, smp_processor_id());
+ atomic_dec(&rq->nr_iowait);
+ return ret;
+ }
+@@ -4315,15 +4674,9 @@ static void show_task(task_t *p)
+ else
+ printk("?");
+ #if (BITS_PER_LONG == 32)
+- if (state == TASK_RUNNING)
+- printk(" running ");
+- else
+- printk(" %08lX ", thread_saved_pc(p));
++ printk(" %08lX ", (unsigned long)p);
+ #else
+- if (state == TASK_RUNNING)
+- printk(" running task ");
+- else
+- printk(" %016lx ", thread_saved_pc(p));
++ printk(" %016lx ", (unsigned long)p);
+ #endif
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+ {
+@@ -4362,21 +4715,21 @@ void show_state(void)
+ #if (BITS_PER_LONG == 32)
+ printk("\n"
+ " sibling\n");
+- printk(" task PC pid father child younger older\n");
++ printk(" task taskaddr pid father child younger older\n");
+ #else
+ printk("\n"
+ " sibling\n");
+- printk(" task PC pid father child younger older\n");
++ printk(" task taskaddr pid father child younger older\n");
+ #endif
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ /*
+ * reset the NMI-timeout, listing all files on a slow
+ * console might take alot of time:
+ */
+ touch_nmi_watchdog();
+ show_task(p);
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+
+ read_unlock(&tasklist_lock);
+ }
+@@ -4655,13 +5008,13 @@ static void migrate_live_tasks(int src_c
+
+ write_lock_irq(&tasklist_lock);
+
+- do_each_thread(t, tsk) {
++ do_each_thread_all(t, tsk) {
+ if (tsk == current)
+ continue;
+
+ if (task_cpu(tsk) == src_cpu)
+ move_task_off_dead_cpu(src_cpu, tsk);
+- } while_each_thread(t, tsk);
++ } while_each_thread_all(t, tsk);
+
+ write_unlock_irq(&tasklist_lock);
+ }
+@@ -5680,7 +6033,7 @@ void normalize_rt_tasks(void)
+ runqueue_t *rq;
+
+ read_lock_irq(&tasklist_lock);
+- for_each_process (p) {
++ for_each_process_all (p) {
+ if (!rt_task(p))
+ continue;
+
+diff -uprN linux-2.6.15.orig/kernel/signal.c linux-2.6.15-ve025stab014/kernel/signal.c
+--- linux-2.6.15.orig/kernel/signal.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/signal.c 2006-01-27 14:48:08.000000000 +0300
+@@ -25,11 +25,14 @@
+ #include <linux/posix-timers.h>
+ #include <linux/signal.h>
+ #include <linux/audit.h>
++#include <linux/kmem_cache.h>
+ #include <asm/param.h>
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+ #include <asm/siginfo.h>
+
++#include <ub/ub_misc.h>
++
+ /*
+ * SLAB caches for signal bits.
+ */
+@@ -270,8 +273,13 @@ static struct sigqueue *__sigqueue_alloc
+ atomic_inc(&t->user->sigpending);
+ if (override_rlimit ||
+ atomic_read(&t->user->sigpending) <=
+- t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
++ t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
+ q = kmem_cache_alloc(sigqueue_cachep, flags);
++ if (q && ub_siginfo_charge(q, get_task_ub(t))) {
++ kmem_cache_free(sigqueue_cachep, q);
++ q = NULL;
++ }
++ }
+ if (unlikely(q == NULL)) {
+ atomic_dec(&t->user->sigpending);
+ } else {
+@@ -288,6 +296,7 @@ static inline void __sigqueue_free(struc
+ return;
+ atomic_dec(&q->user->sigpending);
+ free_uid(q->user);
++ ub_siginfo_uncharge(q);
+ kmem_cache_free(sigqueue_cachep, q);
+ }
+
+@@ -513,7 +522,16 @@ static int __dequeue_signal(struct sigpe
+ {
+ int sig = 0;
+
+- sig = next_signal(pending, mask);
++ /* SIGKILL must have priority, otherwise it is quite easy
++ * to create an unkillable process, sending sig < SIGKILL
++ * to self */
++ if (unlikely(sigismember(&pending->signal, SIGKILL))) {
++ if (!sigismember(mask, SIGKILL))
++ sig = SIGKILL;
++ }
++
++ if (likely(!sig))
++ sig = next_signal(pending, mask);
+ if (sig) {
+ if (current->notifier) {
+ if (sigismember(current->notifier_mask, sig)) {
+@@ -800,7 +818,7 @@ static int send_signal(int sig, struct s
+ q->info.si_signo = sig;
+ q->info.si_errno = 0;
+ q->info.si_code = SI_USER;
+- q->info.si_pid = current->pid;
++ q->info.si_pid = virt_pid(current);
+ q->info.si_uid = current->uid;
+ break;
+ case (unsigned long) SEND_SIG_PRIV:
+@@ -1110,13 +1128,18 @@ int __kill_pg_info(int sig, struct sigin
+ if (pgrp <= 0)
+ return -EINVAL;
+
++ /* Use __vpid_to_pid(). This function is used under write_lock
++ * tasklist_lock. */
++ if (is_virtual_pid(pgrp))
++ pgrp = __vpid_to_pid(pgrp);
++
+ success = 0;
+ retval = -ESRCH;
+- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ int err = group_send_sig_info(sig, info, p);
+ success |= !err;
+ retval = err;
+- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ return success ? 0 : retval;
+ }
+
+@@ -1139,7 +1162,7 @@ kill_proc_info(int sig, struct siginfo *
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ error = -ESRCH;
+ if (p)
+ error = group_send_sig_info(sig, info, p);
+@@ -1158,7 +1181,7 @@ int kill_proc_info_as_uid(int sig, struc
+ return ret;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ if (!p) {
+ ret = -ESRCH;
+ goto out_unlock;
+@@ -1198,8 +1221,8 @@ static int kill_something_info(int sig,
+ struct task_struct * p;
+
+ read_lock(&tasklist_lock);
+- for_each_process(p) {
+- if (p->pid > 1 && p->tgid != current->tgid) {
++ for_each_process_ve(p) {
++ if (virt_pid(p) > 1 && p->tgid != current->tgid) {
+ int err = group_send_sig_info(sig, info, p);
+ ++count;
+ if (err != -EPERM)
+@@ -1467,9 +1490,17 @@ void do_notify_parent(struct task_struct
+ BUG_ON(!tsk->ptrace &&
+ (tsk->group_leader != tsk || !thread_group_empty(tsk)));
+
++#ifdef CONFIG_VE
++ /* Allow to send only SIGCHLD from VE */
++ if (sig != SIGCHLD &&
++ tsk->ve_task_info.owner_env !=
++ tsk->parent->ve_task_info.owner_env)
++ sig = SIGCHLD;
++#endif
++
+ info.si_signo = sig;
+ info.si_errno = 0;
+- info.si_pid = tsk->pid;
++ info.si_pid = get_task_pid_ve(tsk, tsk->parent->ve_task_info.owner_env);
+ info.si_uid = tsk->uid;
+
+ /* FIXME: find out whether or not this is supposed to be c*time. */
+@@ -1534,7 +1565,7 @@ static void do_notify_parent_cldstop(str
+
+ info.si_signo = SIGCHLD;
+ info.si_errno = 0;
+- info.si_pid = tsk->pid;
++ info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(parent)->owner_env);
+ info.si_uid = tsk->uid;
+
+ /* FIXME: find out whether or not this is supposed to be c*time. */
+@@ -1862,7 +1893,7 @@ relock:
+ info->si_signo = signr;
+ info->si_errno = 0;
+ info->si_code = SI_USER;
+- info->si_pid = current->parent->pid;
++ info->si_pid = virt_pid(current->parent);
+ info->si_uid = current->parent->uid;
+ }
+
+@@ -1893,8 +1924,14 @@ relock:
+ continue;
+
+ /* Init gets no signals it doesn't want. */
+- if (current->pid == 1)
++ if (virt_pid(current) == 1) {
++ /* Allow SIGKILL for non-root VE */
++#ifdef CONFIG_VE
++ if (ve_is_super(get_exec_env()) ||
++ signr != SIGKILL)
++#endif
+ continue;
++ }
+
+ if (sig_kernel_stop(signr)) {
+ /*
+@@ -2245,7 +2282,7 @@ sys_kill(int pid, int sig)
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_USER;
+- info.si_pid = current->tgid;
++ info.si_pid = virt_tgid(current);
+ info.si_uid = current->uid;
+
+ return kill_something_info(sig, &info, pid);
+@@ -2261,12 +2298,12 @@ static int do_tkill(int tgid, int pid, i
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_TKILL;
+- info.si_pid = current->tgid;
++ info.si_pid = virt_tgid(current);
+ info.si_uid = current->uid;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
+- if (p && (tgid <= 0 || p->tgid == tgid)) {
++ p = find_task_by_pid_ve(pid);
++ if (p && (tgid <= 0 || virt_tgid(p) == tgid)) {
+ error = check_kill_permission(sig, &info, p);
+ /*
+ * The null signal is a permissions and process existence
+diff -uprN linux-2.6.15.orig/kernel/softirq.c linux-2.6.15-ve025stab014/kernel/softirq.c
+--- linux-2.6.15.orig/kernel/softirq.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/softirq.c 2006-01-27 14:48:08.000000000 +0300
+@@ -17,6 +17,8 @@
+ #include <linux/kthread.h>
+ #include <linux/rcupdate.h>
+
++#include <ub/beancounter.h>
++
+ #include <asm/irq.h>
+ /*
+ - No shared variables, all the data are CPU local.
+@@ -73,10 +75,14 @@ static inline void wakeup_softirqd(void)
+
+ asmlinkage void __do_softirq(void)
+ {
++ struct user_beancounter *ub;
+ struct softirq_action *h;
+ __u32 pending;
+ int max_restart = MAX_SOFTIRQ_RESTART;
+ int cpu;
++ struct ve_struct *envid;
++
++ envid = set_exec_env(get_ve0());
+
+ pending = local_softirq_pending();
+
+@@ -90,6 +96,7 @@ restart:
+
+ h = softirq_vec;
+
++ ub = set_exec_ub(get_ub0());
+ do {
+ if (pending & 1) {
+ h->action(h);
+@@ -98,6 +105,7 @@ restart:
+ h++;
+ pending >>= 1;
+ } while (pending);
++ (void)set_exec_ub(ub);
+
+ local_irq_disable();
+
+@@ -108,6 +116,7 @@ restart:
+ if (pending)
+ wakeup_softirqd();
+
++ (void)set_exec_env(envid);
+ __local_bh_enable();
+ }
+
+diff -uprN linux-2.6.15.orig/kernel/sys.c linux-2.6.15-ve025stab014/kernel/sys.c
+--- linux-2.6.15.orig/kernel/sys.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/sys.c 2006-01-27 14:48:08.000000000 +0300
+@@ -11,6 +11,7 @@
+ #include <linux/mman.h>
+ #include <linux/smp_lock.h>
+ #include <linux/notifier.h>
++#include <linux/virtinfo.h>
+ #include <linux/reboot.h>
+ #include <linux/prctl.h>
+ #include <linux/init.h>
+@@ -223,6 +224,94 @@ int unregister_reboot_notifier(struct no
+
+ EXPORT_SYMBOL(unregister_reboot_notifier);
+
++static DECLARE_MUTEX(virtinfo_sem);
++static struct vnotifier_block *virtinfo_chain[VIRT_TYPES];
++
++void virtinfo_notifier_register(int type, struct vnotifier_block *nb)
++{
++ struct vnotifier_block **p;
++
++ down(&virtinfo_sem);
++ for (p = &virtinfo_chain[type];
++ *p != NULL && nb->priority < (*p)->priority;
++ p = &(*p)->next);
++ nb->next = *p;
++ smp_wmb();
++ *p = nb;
++ up(&virtinfo_sem);
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_register);
++
++struct virtinfo_cnt_struct {
++ volatile unsigned long exit[NR_CPUS];
++ volatile unsigned long entry;
++};
++static DEFINE_PER_CPU(struct virtinfo_cnt_struct, virtcnt);
++
++void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb)
++{
++ struct vnotifier_block **p;
++ int entry_cpu, exit_cpu;
++ unsigned long cnt, ent;
++
++ down(&virtinfo_sem);
++ for (p = &virtinfo_chain[type]; *p != nb; p = &(*p)->next);
++ *p = nb->next;
++ smp_mb();
++
++ for_each_cpu_mask(entry_cpu, cpu_possible_map) {
++ while (1) {
++ cnt = 0;
++ for_each_cpu_mask(exit_cpu, cpu_possible_map)
++ cnt +=
++ per_cpu(virtcnt, entry_cpu).exit[exit_cpu];
++ smp_rmb();
++ ent = per_cpu(virtcnt, entry_cpu).entry;
++ if (cnt == ent)
++ break;
++ __set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(HZ / 100);
++ }
++ }
++ up(&virtinfo_sem);
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_unregister);
++
++int virtinfo_notifier_call(int type, unsigned long n, void *data)
++{
++ int ret;
++ int entry_cpu, exit_cpu;
++ struct vnotifier_block *nb;
++
++ entry_cpu = get_cpu();
++ per_cpu(virtcnt, entry_cpu).entry++;
++ smp_wmb();
++ put_cpu();
++
++ nb = virtinfo_chain[type];
++ ret = NOTIFY_DONE;
++ while (nb)
++ {
++ ret = nb->notifier_call(nb, n, data, ret);
++ if(ret & NOTIFY_STOP_MASK) {
++ ret &= ~NOTIFY_STOP_MASK;
++ break;
++ }
++ nb = nb->next;
++ }
++
++ exit_cpu = get_cpu();
++ smp_wmb();
++ per_cpu(virtcnt, entry_cpu).exit[exit_cpu]++;
++ put_cpu();
++
++ return ret;
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_call);
++
+ static int set_one_prio(struct task_struct *p, int niceval, int error)
+ {
+ int no_nice;
+@@ -268,17 +357,19 @@ asmlinkage long sys_setpriority(int whic
+ switch (which) {
+ case PRIO_PROCESS:
+ if (!who)
+- who = current->pid;
+- p = find_task_by_pid(who);
++ who = virt_pid(current);
++ p = find_task_by_pid_ve(who);
+ if (p)
+ error = set_one_prio(p, niceval, error);
+ break;
+ case PRIO_PGRP:
+ if (!who)
+ who = process_group(current);
+- do_each_task_pid(who, PIDTYPE_PGID, p) {
++ else
++ who = vpid_to_pid(who);
++ do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ error = set_one_prio(p, niceval, error);
+- } while_each_task_pid(who, PIDTYPE_PGID, p);
++ } while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ break;
+ case PRIO_USER:
+ user = current->user;
+@@ -288,10 +379,10 @@ asmlinkage long sys_setpriority(int whic
+ if ((who != current->uid) && !(user = find_user(who)))
+ goto out_unlock; /* No processes for this user */
+
+- do_each_thread(g, p)
++ do_each_thread_ve(g, p)
+ if (p->uid == who)
+ error = set_one_prio(p, niceval, error);
+- while_each_thread(g, p);
++ while_each_thread_ve(g, p);
+ if (who != current->uid)
+ free_uid(user); /* For find_user() */
+ break;
+@@ -321,8 +412,8 @@ asmlinkage long sys_getpriority(int whic
+ switch (which) {
+ case PRIO_PROCESS:
+ if (!who)
+- who = current->pid;
+- p = find_task_by_pid(who);
++ who = virt_pid(current);
++ p = find_task_by_pid_ve(who);
+ if (p) {
+ niceval = 20 - task_nice(p);
+ if (niceval > retval)
+@@ -332,11 +423,13 @@ asmlinkage long sys_getpriority(int whic
+ case PRIO_PGRP:
+ if (!who)
+ who = process_group(current);
+- do_each_task_pid(who, PIDTYPE_PGID, p) {
++ else
++ who = vpid_to_pid(who);
++ do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ niceval = 20 - task_nice(p);
+ if (niceval > retval)
+ retval = niceval;
+- } while_each_task_pid(who, PIDTYPE_PGID, p);
++ } while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ break;
+ case PRIO_USER:
+ user = current->user;
+@@ -346,13 +439,13 @@ asmlinkage long sys_getpriority(int whic
+ if ((who != current->uid) && !(user = find_user(who)))
+ goto out_unlock; /* No processes for this user */
+
+- do_each_thread(g, p)
++ do_each_thread_ve(g, p)
+ if (p->uid == who) {
+ niceval = 20 - task_nice(p);
+ if (niceval > retval)
+ retval = niceval;
+ }
+- while_each_thread(g, p);
++ while_each_thread_ve(g, p);
+ if (who != current->uid)
+ free_uid(user); /* for find_user() */
+ break;
+@@ -489,6 +582,35 @@ asmlinkage long sys_reboot(int magic1, i
+ magic2 != LINUX_REBOOT_MAGIC2C))
+ return -EINVAL;
+
++#ifdef CONFIG_VE
++ if (!ve_is_super(get_exec_env()))
++ switch (cmd) {
++ case LINUX_REBOOT_CMD_RESTART:
++ case LINUX_REBOOT_CMD_HALT:
++ case LINUX_REBOOT_CMD_POWER_OFF:
++ case LINUX_REBOOT_CMD_RESTART2: {
++ struct siginfo info;
++
++ info.si_errno = 0;
++ info.si_code = SI_KERNEL;
++ info.si_pid = virt_pid(current);
++ info.si_uid = current->uid;
++ info.si_signo = SIGKILL;
++
++ /* Sending to real init is safe */
++ send_sig_info(SIGKILL, &info,
++ get_exec_env()->init_entry);
++ }
++
++ case LINUX_REBOOT_CMD_CAD_ON:
++ case LINUX_REBOOT_CMD_CAD_OFF:
++ return 0;
++
++ default:
++ return -EINVAL;
++ }
++#endif
++
+ lock_kernel();
+ switch (cmd) {
+ case LINUX_REBOOT_CMD_RESTART:
+@@ -672,7 +794,7 @@ asmlinkage long sys_setgid(gid_t gid)
+ return 0;
+ }
+
+-static int set_user(uid_t new_ruid, int dumpclear)
++int set_user(uid_t new_ruid, int dumpclear)
+ {
+ struct user_struct *new_user;
+
+@@ -697,6 +819,7 @@ static int set_user(uid_t new_ruid, int
+ current->uid = new_ruid;
+ return 0;
+ }
++EXPORT_SYMBOL(set_user);
+
+ /*
+ * Unprivileged users may change the real uid to the effective uid
+@@ -1065,7 +1188,12 @@ asmlinkage long sys_times(struct tms __u
+ if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
+ return -EFAULT;
+ }
++#ifndef CONFIG_VE
+ return (long) jiffies_64_to_clock_t(get_jiffies_64());
++#else
++ return (long) jiffies_64_to_clock_t(get_jiffies_64() -
++ get_exec_env()->start_jiffies);
++#endif
+ }
+
+ /*
+@@ -1085,21 +1213,24 @@ asmlinkage long sys_setpgid(pid_t pid, p
+ {
+ struct task_struct *p;
+ int err = -EINVAL;
++ int _pgid;
+
+ if (!pid)
+- pid = current->pid;
++ pid = virt_pid(current);
+ if (!pgid)
+ pgid = pid;
+ if (pgid < 0)
+ return -EINVAL;
+
++ _pgid = vpid_to_pid(pgid);
++
+ /* From this point forward we keep holding onto the tasklist lock
+ * so that our parent does not change from under us. -DaveM
+ */
+ write_lock_irq(&tasklist_lock);
+
+ err = -ESRCH;
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ if (!p)
+ goto out;
+
+@@ -1124,25 +1255,35 @@ asmlinkage long sys_setpgid(pid_t pid, p
+ if (p->signal->leader)
+ goto out;
+
+- if (pgid != pid) {
++ pgid = virt_pid(p);
++ if (_pgid != p->pid) {
+ struct task_struct *p;
+
+- do_each_task_pid(pgid, PIDTYPE_PGID, p) {
+- if (p->signal->session == current->signal->session)
++ do_each_task_pid_ve(_pgid, PIDTYPE_PGID, p) {
++ if (p->signal->session == current->signal->session) {
++ pgid = virt_pgid(p);
+ goto ok_pgid;
+- } while_each_task_pid(pgid, PIDTYPE_PGID, p);
++ }
++ } while_each_task_pid_ve(_pgid, PIDTYPE_PGID, p);
+ goto out;
+ }
+
+ ok_pgid:
+- err = security_task_setpgid(p, pgid);
++ err = security_task_setpgid(p, _pgid);
+ if (err)
+ goto out;
+
+ if (process_group(p) != pgid) {
+ detach_pid(p, PIDTYPE_PGID);
+- p->signal->pgrp = pgid;
+- attach_pid(p, PIDTYPE_PGID, pgid);
++ p->signal->pgrp = _pgid;
++ set_virt_pgid(p, pgid);
++ attach_pid(p, PIDTYPE_PGID, _pgid);
++ if (atomic_read(&p->signal->count) != 1) {
++ task_t *t;
++ for (t = next_thread(p); t != p; t = next_thread(t)) {
++ set_virt_pgid(t, pgid);
++ }
++ }
+ }
+
+ err = 0;
+@@ -1155,19 +1296,19 @@ out:
+ asmlinkage long sys_getpgid(pid_t pid)
+ {
+ if (!pid) {
+- return process_group(current);
++ return virt_pgid(current);
+ } else {
+ int retval;
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+
+ retval = -ESRCH;
+ if (p) {
+ retval = security_task_getpgid(p);
+ if (!retval)
+- retval = process_group(p);
++ retval = virt_pgid(p);
+ }
+ read_unlock(&tasklist_lock);
+ return retval;
+@@ -1179,7 +1320,7 @@ asmlinkage long sys_getpgid(pid_t pid)
+ asmlinkage long sys_getpgrp(void)
+ {
+ /* SMP - assuming writes are word atomic this is fine */
+- return process_group(current);
++ return virt_pgid(current);
+ }
+
+ #endif
+@@ -1187,19 +1328,19 @@ asmlinkage long sys_getpgrp(void)
+ asmlinkage long sys_getsid(pid_t pid)
+ {
+ if (!pid) {
+- return current->signal->session;
++ return virt_sid(current);
+ } else {
+ int retval;
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+
+ retval = -ESRCH;
+ if(p) {
+ retval = security_task_getsid(p);
+ if (!retval)
+- retval = p->signal->session;
++ retval = virt_sid(p);
+ }
+ read_unlock(&tasklist_lock);
+ return retval;
+@@ -1223,9 +1364,19 @@ asmlinkage long sys_setsid(void)
+
+ current->signal->leader = 1;
+ __set_special_pids(current->pid, current->pid);
++ set_virt_pgid(current, virt_pid(current));
++ set_virt_sid(current, virt_pid(current));
+ current->signal->tty = NULL;
+ current->signal->tty_old_pgrp = 0;
+- err = process_group(current);
++ if (atomic_read(&current->signal->count) != 1) {
++ task_t *t;
++ for (t = next_thread(current); t != current; t = next_thread(t)) {
++ set_virt_pgid(t, virt_pid(current));
++ set_virt_sid(t, virt_pid(current));
++ }
++ }
++
++ err = virt_pgid(current);
+ out:
+ write_unlock_irq(&tasklist_lock);
+ up(&tty_sem);
+@@ -1505,7 +1656,7 @@ asmlinkage long sys_newuname(struct new_
+ int errno = 0;
+
+ down_read(&uts_sem);
+- if (copy_to_user(name,&system_utsname,sizeof *name))
++ if (copy_to_user(name,&ve_utsname,sizeof *name))
+ errno = -EFAULT;
+ up_read(&uts_sem);
+ return errno;
+@@ -1516,15 +1667,15 @@ asmlinkage long sys_sethostname(char __u
+ int errno;
+ char tmp[__NEW_UTS_LEN];
+
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ if (len < 0 || len > __NEW_UTS_LEN)
+ return -EINVAL;
+ down_write(&uts_sem);
+ errno = -EFAULT;
+ if (!copy_from_user(tmp, name, len)) {
+- memcpy(system_utsname.nodename, tmp, len);
+- system_utsname.nodename[len] = 0;
++ memcpy(ve_utsname.nodename, tmp, len);
++ ve_utsname.nodename[len] = 0;
+ errno = 0;
+ }
+ up_write(&uts_sem);
+@@ -1540,11 +1691,11 @@ asmlinkage long sys_gethostname(char __u
+ if (len < 0)
+ return -EINVAL;
+ down_read(&uts_sem);
+- i = 1 + strlen(system_utsname.nodename);
++ i = 1 + strlen(ve_utsname.nodename);
+ if (i > len)
+ i = len;
+ errno = 0;
+- if (copy_to_user(name, system_utsname.nodename, i))
++ if (copy_to_user(name, ve_utsname.nodename, i))
+ errno = -EFAULT;
+ up_read(&uts_sem);
+ return errno;
+@@ -1561,7 +1712,7 @@ asmlinkage long sys_setdomainname(char _
+ int errno;
+ char tmp[__NEW_UTS_LEN];
+
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ if (len < 0 || len > __NEW_UTS_LEN)
+ return -EINVAL;
+@@ -1569,8 +1720,8 @@ asmlinkage long sys_setdomainname(char _
+ down_write(&uts_sem);
+ errno = -EFAULT;
+ if (!copy_from_user(tmp, name, len)) {
+- memcpy(system_utsname.domainname, tmp, len);
+- system_utsname.domainname[len] = 0;
++ memcpy(ve_utsname.domainname, tmp, len);
++ ve_utsname.domainname[len] = 0;
+ errno = 0;
+ }
+ up_write(&uts_sem);
+diff -uprN linux-2.6.15.orig/kernel/sysctl.c linux-2.6.15-ve025stab014/kernel/sysctl.c
+--- linux-2.6.15.orig/kernel/sysctl.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/sysctl.c 2006-01-27 14:48:08.000000000 +0300
+@@ -25,6 +25,8 @@
+ #include <linux/slab.h>
+ #include <linux/sysctl.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve_owner.h>
++#include <linux/ve.h>
+ #include <linux/ctype.h>
+ #include <linux/utsname.h>
+ #include <linux/capability.h>
+@@ -67,6 +69,12 @@ extern int min_free_kbytes;
+ extern int printk_ratelimit_jiffies;
+ extern int printk_ratelimit_burst;
+ extern int pid_max_min, pid_max_max;
++#ifdef CONFIG_VE
++int glob_virt_pids = 1;
++EXPORT_SYMBOL(glob_virt_pids);
++#endif
++
++extern int ve_area_access_check; /* fs/namei.c */
+
+ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+ int unknown_nmi_panic;
+@@ -128,8 +136,6 @@ int randomize_va_space = 1;
+
+ static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
+ ctl_table *, void **);
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+- void __user *buffer, size_t *lenp, loff_t *ppos);
+
+ static ctl_table root_table[];
+ static struct ctl_table_header root_table_header =
+@@ -585,6 +591,16 @@ static ctl_table kern_table[] = {
+ .extra1 = &pid_max_min,
+ .extra2 = &pid_max_max,
+ },
++#ifdef CONFIG_VE
++ {
++ .ctl_name = KERN_VIRT_PIDS,
++ .procname = "virt_pids",
++ .data = &glob_virt_pids,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++#endif
+ {
+ .ctl_name = KERN_PANIC_ON_OOPS,
+ .procname = "panic_on_oops",
+@@ -1046,6 +1062,7 @@ int do_sysctl(int __user *name, int nlen
+ {
+ struct list_head *tmp;
+ int error = -ENOTDIR;
++ struct ve_struct *ve;
+
+ if (nlen <= 0 || nlen >= CTL_MAXNAME)
+ return -ENOTDIR;
+@@ -1054,13 +1071,24 @@ int do_sysctl(int __user *name, int nlen
+ if (!oldlenp || get_user(old_len, oldlenp))
+ return -EFAULT;
+ }
++ ve = get_exec_env();
+ spin_lock(&sysctl_lock);
++#ifdef CONFIG_VE
++ tmp = ve->sysctl_lh.next;
++#else
+ tmp = &root_table_header.ctl_entry;
++#endif
+ do {
+- struct ctl_table_header *head =
+- list_entry(tmp, struct ctl_table_header, ctl_entry);
++ struct ctl_table_header *head;
+ void *context = NULL;
+
++#ifdef CONFIG_VE
++ if (tmp == &ve->sysctl_lh)
++ /* second pass over global variables */
++ tmp = &root_table_header.ctl_entry;
++#endif
++
++ head = list_entry(tmp, struct ctl_table_header, ctl_entry);
+ if (!use_table(head))
+ continue;
+
+@@ -1114,10 +1142,14 @@ static int test_perm(int mode, int op)
+ static inline int ctl_perm(ctl_table *table, int op)
+ {
+ int error;
++ int mode = table->mode;
++
+ error = security_sysctl(table, op);
+ if (error)
+ return error;
+- return test_perm(table->mode, op);
++ if (!ve_accessible(table->owner_env, get_exec_env()))
++ mode &= ~0222; /* disable write access */
++ return test_perm(mode, op);
+ }
+
+ static int parse_table(int __user *name, int nlen,
+@@ -1283,6 +1315,8 @@ struct ctl_table_header *register_sysctl
+ int insert_at_head)
+ {
+ struct ctl_table_header *tmp;
++ struct list_head *lh;
++
+ tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
+ if (!tmp)
+ return NULL;
+@@ -1291,17 +1325,52 @@ struct ctl_table_header *register_sysctl
+ tmp->used = 0;
+ tmp->unregistering = NULL;
+ spin_lock(&sysctl_lock);
++#ifdef CONFIG_VE
++ lh = &get_exec_env()->sysctl_lh;
++#else
++ lh = &root_table_header.ctl_entry;
++#endif
+ if (insert_at_head)
+- list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
++ list_add(&tmp->ctl_entry, lh);
+ else
+- list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
++ list_add_tail(&tmp->ctl_entry, lh);
+ spin_unlock(&sysctl_lock);
+ #ifdef CONFIG_PROC_FS
++#ifdef CONFIG_VE
++ register_proc_table(table, get_exec_env()->proc_sys_root, tmp);
++#else
+ register_proc_table(table, proc_sys_root, tmp);
+ #endif
++#endif
+ return tmp;
+ }
+
++void free_sysctl_clone(ctl_table *clone)
++{
++ kfree(clone);
++}
++
++ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr)
++{
++ int i;
++ ctl_table *clone;
++
++ clone = kmalloc(nr * sizeof(ctl_table), GFP_KERNEL);
++ if (clone == NULL)
++ return NULL;
++
++ memcpy(clone, tmpl, nr * sizeof(ctl_table));
++ for (i = 0; i < nr; i++) {
++ if (tmpl[i].ctl_name == 0)
++ continue;
++ clone[i].owner_env = get_exec_env();
++ if (tmpl[i].child == NULL)
++ continue;
++ clone[i].child = clone + (tmpl[i].child - tmpl);
++ }
++ return clone;
++}
++
+ /**
+ * unregister_sysctl_table - unregister a sysctl table hierarchy
+ * @header: the header returned from register_sysctl_table
+@@ -1315,8 +1384,12 @@ void unregister_sysctl_table(struct ctl_
+ spin_lock(&sysctl_lock);
+ start_unregistering(header);
+ #ifdef CONFIG_PROC_FS
++#ifdef CONFIG_VE
++ unregister_proc_table(header->ctl_table, get_exec_env()->proc_sys_root);
++#else
+ unregister_proc_table(header->ctl_table, proc_sys_root);
+ #endif
++#endif
+ spin_unlock(&sysctl_lock);
+ kfree(header);
+ }
+@@ -1402,11 +1475,6 @@ static void unregister_proc_table(ctl_ta
+ * its fields. We are under sysctl_lock here.
+ */
+ de->data = NULL;
+-
+- /* Don't unregister proc entries that are still being used.. */
+- if (atomic_read(&de->count))
+- continue;
+-
+ table->de = NULL;
+ remove_proc_entry(table->procname, root);
+ }
+@@ -1548,7 +1616,7 @@ int proc_dostring(ctl_table *table, int
+ * to observe. Should this be in kernel/sys.c ????
+ */
+
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
++int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ int r;
+@@ -2123,7 +2191,7 @@ int proc_dostring(ctl_table *table, int
+ return -ENOSYS;
+ }
+
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
++int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ return -ENOSYS;
+@@ -2182,7 +2250,6 @@ int proc_doulongvec_ms_jiffies_minmax(ct
+
+ #endif /* CONFIG_PROC_FS */
+
+-
+ /*
+ * General sysctl support routines
+ */
+@@ -2427,6 +2494,14 @@ void unregister_sysctl_table(struct ctl_
+ {
+ }
+
++ctl_table * clone_sysctl_template(ctl_table *tmpl, int nr)
++{
++ return NULL;
++}
++
++void free_sysctl_clone(ctl_table *tmpl)
++{
++}
+ #endif /* CONFIG_SYSCTL */
+
+ /*
+@@ -2439,6 +2514,7 @@ EXPORT_SYMBOL(proc_dointvec_minmax);
+ EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
+ EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
+ EXPORT_SYMBOL(proc_dostring);
++EXPORT_SYMBOL(proc_doutsstring);
+ EXPORT_SYMBOL(proc_doulongvec_minmax);
+ EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
+ EXPORT_SYMBOL(register_sysctl_table);
+@@ -2447,3 +2523,5 @@ EXPORT_SYMBOL(sysctl_jiffies);
+ EXPORT_SYMBOL(sysctl_ms_jiffies);
+ EXPORT_SYMBOL(sysctl_string);
+ EXPORT_SYMBOL(unregister_sysctl_table);
++EXPORT_SYMBOL(clone_sysctl_template);
++EXPORT_SYMBOL(free_sysctl_clone);
+diff -uprN linux-2.6.15.orig/kernel/timer.c linux-2.6.15-ve025stab014/kernel/timer.c
+--- linux-2.6.15.orig/kernel/timer.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/timer.c 2006-01-27 14:48:08.000000000 +0300
+@@ -459,7 +459,11 @@ static inline void __run_timers(tvec_bas
+ spin_unlock_irq(&base->t_base.lock);
+ {
+ int preempt_count = preempt_count();
++ struct ve_struct *ve;
++
++ ve = set_exec_env(get_ve0());
+ fn(data);
++ (void)set_exec_env(ve);
+ if (preempt_count != preempt_count()) {
+ printk(KERN_WARNING "huh, entered %p "
+ "with preempt_count %08x, exited"
+@@ -822,6 +826,23 @@ EXPORT_SYMBOL(avenrun);
+ * calc_load - given tick count, update the avenrun load estimates.
+ * This is called while holding a write_lock on xtime_lock.
+ */
++
++static void calc_load_ve(void)
++{
++ unsigned long flags, nr_unint;
++
++ nr_unint = nr_uninterruptible() * FIXED_1;
++ spin_lock_irqsave(&kstat_glb_lock, flags);
++ CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
++ CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
++ CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
++ spin_unlock_irqrestore(&kstat_glb_lock, flags);
++
++#ifdef CONFIG_VE
++ do_update_load_avg_ve();
++#endif
++}
++
+ static inline void calc_load(unsigned long ticks)
+ {
+ unsigned long active_tasks; /* fixed-point */
+@@ -834,6 +855,7 @@ static inline void calc_load(unsigned lo
+ CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+ CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+ CALC_LOAD(avenrun[2], EXP_15, active_tasks);
++ calc_load_ve();
+ }
+ }
+
+@@ -941,7 +963,7 @@ asmlinkage unsigned long sys_alarm(unsig
+ */
+ asmlinkage long sys_getpid(void)
+ {
+- return current->tgid;
++ return virt_tgid(current);
+ }
+
+ /*
+@@ -963,28 +985,15 @@ asmlinkage long sys_getpid(void)
+ asmlinkage long sys_getppid(void)
+ {
+ int pid;
+- struct task_struct *me = current;
+- struct task_struct *parent;
+-
+- parent = me->group_leader->real_parent;
+- for (;;) {
+- pid = parent->tgid;
+-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+-{
+- struct task_struct *old = parent;
+
+- /*
+- * Make sure we read the pid before re-reading the
+- * parent pointer:
+- */
+- smp_rmb();
+- parent = me->group_leader->real_parent;
+- if (old != parent)
+- continue;
+-}
+-#endif
+- break;
+- }
++ /* Some smart code used to be here. It was wrong.
++ * ->real_parent could be released before dereference and
++ * we accessed freed kernel memory, which faults with debugging on.
++ * Keep it simple and stupid.
++ */
++ read_lock(&tasklist_lock);
++ pid = virt_tgid(current->group_leader->real_parent);
++ read_unlock(&tasklist_lock);
+ return pid;
+ }
+
+@@ -1115,7 +1124,7 @@ EXPORT_SYMBOL(schedule_timeout_uninterru
+ /* Thread ID - the internal kernel "pid" */
+ asmlinkage long sys_gettid(void)
+ {
+- return current->pid;
++ return virt_pid(current);
+ }
+
+ static long __sched nanosleep_restart(struct restart_block *restart)
+@@ -1183,11 +1192,12 @@ asmlinkage long sys_sysinfo(struct sysin
+ unsigned long mem_total, sav_total;
+ unsigned int mem_unit, bitcount;
+ unsigned long seq;
++ unsigned long *__avenrun;
++ struct timespec tp;
+
+ memset((char *)&val, 0, sizeof(struct sysinfo));
+
+ do {
+- struct timespec tp;
+ seq = read_seqbegin(&xtime_lock);
+
+ /*
+@@ -1204,14 +1214,25 @@ asmlinkage long sys_sysinfo(struct sysin
+ tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
+ tp.tv_sec++;
+ }
+- val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+-
+- val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+- val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+- val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
++ } while (read_seqretry(&xtime_lock, seq));
+
++ if (ve_is_super(get_exec_env())) {
++ val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
++ __avenrun = &avenrun[0];
+ val.procs = nr_threads;
+- } while (read_seqretry(&xtime_lock, seq));
++ }
++#ifdef CONFIG_VE
++ else {
++ struct ve_struct *ve;
++ ve = get_exec_env();
++ __avenrun = &ve->avenrun[0];
++ val.procs = atomic_read(&ve->pcounter);
++ val.uptime = tp.tv_sec - ve->start_timespec.tv_sec;
++ }
++#endif
++ val.loads[0] = __avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
++ val.loads[1] = __avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
++ val.loads[2] = __avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+
+ si_meminfo(&val);
+ si_swapinfo(&val);
+diff -uprN linux-2.6.15.orig/kernel/ub/Kconfig linux-2.6.15-ve025stab014/kernel/ub/Kconfig
+--- linux-2.6.15.orig/kernel/ub/Kconfig 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/Kconfig 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,89 @@
++#
++# User resources part (UBC)
++#
++# Copyright (C) 2005 SWsoft
++# All rights reserved.
++#
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++menu "User resources"
++
++config USER_RESOURCE
++ bool "Enable user resource accounting"
++ default y
++ help
++ This patch provides accounting and allows to configure
++ limits for user's consumption of exhaustible system resources.
++ The most important resource controlled by this patch is unswappable
++ memory (either mlock'ed or used by internal kernel structures and
++ buffers). The main goal of this patch is to protect processes
++ from running short of important resources because of an accidental
++ misbehavior of processes or malicious activity aiming to ``kill''
++ the system. It's worth to mention that resource limits configured
++ by setrlimit(2) do not give an acceptable level of protection
++ because they cover only small fraction of resources and work on a
++ per-process basis. Per-process accounting doesn't prevent malicious
++ users from spawning a lot of resource-consuming processes.
++
++config USER_RSS_ACCOUNTING
++ bool "Account physical memory usage"
++ default y
++ depends on USER_RESOURCE
++ help
++ This allows to estimate per beancounter physical memory usage.
++ Implemented alghorithm accounts shared pages of memory as well,
++ dividing them by number of beancounter which use the page.
++
++config USER_SWAP_ACCOUNTING
++ bool "Account swap usage"
++ default y
++ depends on USER_RESOURCE
++ help
++ This allows accounting of swap usage.
++
++config USER_RESOURCE_PROC
++ bool "Report resource usage in /proc"
++ default y
++ depends on USER_RESOURCE
++ help
++ Allows a system administrator to inspect resource accounts and limits.
++
++config UBC_DEBUG
++ bool "User resources debug features"
++ default n
++ depends on USER_RESOURCE
++ help
++ Enables to setup debug features for user resource accounting
++
++config UBC_DEBUG_KMEM
++ bool "Debug kmemsize with cache counters"
++ default n
++ depends on UBC_DEBUG
++ help
++ Adds /proc/user_beancounters_debug entry to get statistics
++ about cache usage of each beancounter
++
++config UBC_KEEP_UNUSED
++ bool "Keep unused beancounter alive"
++ default y
++ depends on UBC_DEBUG
++ help
++ If on, unused beancounters are kept on the hash and maxheld value
++ can be looked through.
++
++config UBC_DEBUG_ITEMS
++ bool "Account resources in items rather than in bytes"
++ default y
++ depends on UBC_DEBUG
++ help
++ When true some of the resources (e.g. kmemsize) are accounted
++ in items instead of bytes.
++
++config UBC_UNLIMITED
++ bool "Use unlimited ubc settings"
++ default y
++ depends on UBC_DEBUG
++ help
++ When ON all limits and barriers are set to max values.
++
++endmenu
+diff -uprN linux-2.6.15.orig/kernel/ub/Makefile linux-2.6.15-ve025stab014/kernel/ub/Makefile
+--- linux-2.6.15.orig/kernel/ub/Makefile 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/Makefile 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,20 @@
++#
++# User resources part (UBC)
++#
++# Copyright (C) 2005 SWsoft
++# All rights reserved.
++#
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++obj-y := ub_sys.o
++obj-$(CONFIG_USER_RESOURCE) += beancounter.o
++obj-$(CONFIG_USER_RESOURCE) += ub_dcache.o
++obj-$(CONFIG_USER_RESOURCE) += ub_mem.o
++obj-$(CONFIG_USER_RESOURCE) += ub_misc.o
++obj-$(CONFIG_USER_RESOURCE) += ub_net.o
++obj-$(CONFIG_USER_RESOURCE) += ub_pages.o
++obj-$(CONFIG_USER_RESOURCE) += ub_stat.o
++obj-$(CONFIG_USER_RESOURCE) += ub_oom.o
++
++obj-$(CONFIG_USER_RSS_ACCOUNTING) += ub_page_bc.o
++obj-$(CONFIG_USER_RESOURCE_PROC) += ub_proc.o
+diff -uprN linux-2.6.15.orig/kernel/ub/beancounter.c linux-2.6.15-ve025stab014/kernel/ub/beancounter.c
+--- linux-2.6.15.orig/kernel/ub/beancounter.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/beancounter.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,675 @@
++/*
++ * linux/kernel/ub/beancounter.c
++ *
++ * Copyright (C) 1998 Alan Cox
++ * 1998-2000 Andrey V. Savochkin <saw@saw.sw.com.sg>
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ * - more intelligent limit check in mremap(): currently the new size is
++ * charged and _then_ old size is uncharged
++ * (almost done: !move_vma case is completely done,
++ * move_vma in its current implementation requires too many conditions to
++ * do things right, because it may be not only expansion, but shrinking
++ * also, plus do_munmap will require an additional parameter...)
++ * - problem: bad pmd page handling
++ * - consider /proc redesign
++ * - TCP/UDP ports
++ * + consider whether __charge_beancounter_locked should be inline
++ *
++ * Changes:
++ * 1999/08/17 Marcelo Tosatti <marcelo@conectiva.com.br>
++ * - Set "barrier" and "limit" parts of limits atomically.
++ * 1999/10/06 Marcelo Tosatti <marcelo@conectiva.com.br>
++ * - setublimit system call.
++ */
++
++#include <linux/slab.h>
++#include <linux/module.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_vmpages.h>
++
++static kmem_cache_t *ub_cachep;
++static struct user_beancounter default_beancounter;
++struct user_beancounter ub0;
++
++const char *ub_rnames[] = {
++ "kmemsize", /* 0 */
++ "lockedpages",
++ "privvmpages",
++ "shmpages",
++ "dummy",
++ "numproc", /* 5 */
++ "physpages",
++ "vmguarpages",
++ "oomguarpages",
++ "numtcpsock",
++ "numflock", /* 10 */
++ "numpty",
++ "numsiginfo",
++ "tcpsndbuf",
++ "tcprcvbuf",
++ "othersockbuf", /* 15 */
++ "dgramrcvbuf",
++ "numothersock",
++ "dcachesize",
++ "numfile",
++ "dummy", /* 20 */
++ "dummy",
++ "dummy",
++ "numiptent",
++ "unused_privvmpages", /* UB_RESOURCES */
++ "tmpfs_respages",
++ "swap_pages",
++ "held_pages",
++};
++
++static void init_beancounter_struct(struct user_beancounter *ub);
++static void init_beancounter_store(struct user_beancounter *ub);
++static void init_beancounter_nolimits(struct user_beancounter *ub);
++
++void print_ub_uid(struct user_beancounter *ub, char *buf, int size)
++{
++ if (ub->parent != NULL)
++ snprintf(buf, size, "%u.%u", ub->parent->ub_uid, ub->ub_uid);
++ else
++ snprintf(buf, size, "%u", ub->ub_uid);
++}
++EXPORT_SYMBOL(print_ub_uid);
++
++#define ub_hash_fun(x) ((((x) >> 8) ^ (x)) & (UB_HASH_SIZE - 1))
++#define ub_subhash_fun(p, id) ub_hash_fun((p)->ub_uid + (id) * 17)
++struct ub_hash_slot ub_hash[UB_HASH_SIZE];
++spinlock_t ub_hash_lock;
++EXPORT_SYMBOL(ub_hash);
++EXPORT_SYMBOL(ub_hash_lock);
++
++/*
++ * Per user resource beancounting. Resources are tied to their luid.
++ * The resource structure itself is tagged both to the process and
++ * the charging resources (a socket doesn't want to have to search for
++ * things at irq time for example). Reference counters keep things in
++ * hand.
++ *
++ * The case where a user creates resource, kills all his processes and
++ * then starts new ones is correctly handled this way. The refcounters
++ * will mean the old entry is still around with resource tied to it.
++ */
++struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
++{
++ struct user_beancounter *new_ub, *ub;
++ unsigned long flags;
++ struct ub_hash_slot *slot;
++
++ slot = &ub_hash[ub_hash_fun(uid)];
++ new_ub = NULL;
++
++retry:
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ ub = slot->ubh_beans;
++ while (ub != NULL && (ub->ub_uid != uid || ub->parent != NULL))
++ ub = ub->ub_next;
++
++ if (ub != NULL) {
++ /* found */
++ get_beancounter(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ if (new_ub != NULL)
++ kmem_cache_free(ub_cachep, new_ub);
++ return ub;
++ }
++
++ if (!create) {
++ /* no ub found */
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return NULL;
++ }
++
++ if (new_ub != NULL) {
++ /* install new ub */
++ new_ub->ub_next = slot->ubh_beans;
++ slot->ubh_beans = new_ub;
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return new_ub;
++ }
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++ /* alloc new ub */
++ new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep,
++ GFP_KERNEL);
++ if (new_ub == NULL)
++ return NULL;
++
++ ub_debug(UBD_ALLOC, "Creating ub %p in slot %p\n", new_ub, slot);
++ memcpy(new_ub, &default_beancounter, sizeof(*new_ub));
++ init_beancounter_struct(new_ub);
++ new_ub->ub_uid = uid;
++ goto retry;
++}
++EXPORT_SYMBOL(get_beancounter_byuid);
++
++struct user_beancounter *get_subbeancounter_byid(struct user_beancounter *p,
++ int id, int create)
++{
++ struct user_beancounter *new_ub, *ub;
++ unsigned long flags;
++ struct ub_hash_slot *slot;
++
++ slot = &ub_hash[ub_subhash_fun(p, id)];
++ new_ub = NULL;
++
++retry:
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ ub = slot->ubh_beans;
++ while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
++ ub = ub->ub_next;
++
++ if (ub != NULL) {
++ /* found */
++ get_beancounter(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ if (new_ub != NULL) {
++ put_beancounter(new_ub->parent);
++ kmem_cache_free(ub_cachep, new_ub);
++ }
++ return ub;
++ }
++
++ if (!create) {
++ /* no ub found */
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return NULL;
++ }
++
++ if (new_ub != NULL) {
++ /* install new ub */
++ get_beancounter(new_ub);
++ new_ub->ub_next = slot->ubh_beans;
++ slot->ubh_beans = new_ub;
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return new_ub;
++ }
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++ /* alloc new ub */
++ new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep,
++ GFP_KERNEL);
++ if (new_ub == NULL)
++ return NULL;
++
++ ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", new_ub, slot);
++ memset(new_ub, 0, sizeof(*new_ub));
++ init_beancounter_nolimits(new_ub);
++ init_beancounter_store(new_ub);
++ init_beancounter_struct(new_ub);
++ atomic_set(&new_ub->ub_refcount, 0);
++ new_ub->ub_uid = id;
++ new_ub->parent = get_beancounter(p);
++ goto retry;
++}
++EXPORT_SYMBOL(get_subbeancounter_byid);
++
++struct user_beancounter *subbeancounter_findcreate(struct user_beancounter *p,
++ int id)
++{
++ struct user_beancounter *ub;
++ unsigned long flags;
++ struct ub_hash_slot *slot;
++
++ slot = &ub_hash[ub_subhash_fun(p, id)];
++
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ ub = slot->ubh_beans;
++ while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
++ ub = ub->ub_next;
++
++ if (ub != NULL) {
++ /* found */
++ get_beancounter(ub);
++ goto done;
++ }
++
++ /* alloc new ub */
++ /* Can be called from non-atomic contexts. Den */
++ ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, GFP_ATOMIC);
++ if (ub == NULL)
++ goto done;
++
++ ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", ub, slot);
++ memset(ub, 0, sizeof(*ub));
++ init_beancounter_nolimits(ub);
++ init_beancounter_store(ub);
++ init_beancounter_struct(ub);
++ atomic_set(&ub->ub_refcount, 0);
++ ub->ub_uid = id;
++ ub->parent = get_beancounter(p);
++
++ /* install new ub */
++ get_beancounter(ub);
++ ub->ub_next = slot->ubh_beans;
++ slot->ubh_beans = ub;
++
++done:
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return ub;
++}
++EXPORT_SYMBOL(subbeancounter_findcreate);
++#ifndef CONFIG_UBC_KEEP_UNUSED
++
++static int verify_res(struct user_beancounter *ub, int resource,
++ unsigned long held)
++{
++ char id[64];
++
++ if (likely(held == 0))
++ return 1;
++
++ print_ub_uid(ub, id, sizeof(id));
++ printk(KERN_WARNING "Ub %s helds %lu in %s on put\n",
++ id, held, ub_rnames[resource]);
++ return 0;
++}
++
++static inline void verify_held(struct user_beancounter *ub)
++{
++ int i, clean;
++
++ clean = 1;
++ for (i = 0; i < UB_RESOURCES; i++)
++ clean &= verify_res(ub, i, ub->ub_parms[i].held);
++
++ clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
++ clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
++ clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
++ clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
++
++ ub_debug_trace(!clean, 5, 60*HZ);
++}
++
++static void __unhash_beancounter(struct user_beancounter *ub)
++{
++ struct user_beancounter **ubptr;
++ struct ub_hash_slot *slot;
++
++ if (ub->parent != NULL)
++ slot = &ub_hash[ub_subhash_fun(ub->parent, ub->ub_uid)];
++ else
++ slot = &ub_hash[ub_hash_fun(ub->ub_uid)];
++ ubptr = &slot->ubh_beans;
++
++ while (*ubptr != NULL) {
++ if (*ubptr == ub) {
++ verify_held(ub);
++ *ubptr = ub->ub_next;
++ return;
++ }
++ ubptr = &((*ubptr)->ub_next);
++ }
++ printk(KERN_ERR "Invalid beancounter %p, luid=%d on free, slot %p\n",
++ ub, ub->ub_uid, slot);
++}
++#endif
++
++void __put_beancounter(struct user_beancounter *ub)
++{
++ unsigned long flags;
++ struct user_beancounter *parent;
++
++again:
++ parent = ub->parent;
++ ub_debug(UBD_ALLOC, "__put bc %p (cnt %d) for %.20s pid %d "
++ "cur %08lx cpu %d.\n",
++ ub, atomic_read(&ub->ub_refcount),
++ current->comm, current->pid,
++ (unsigned long)current, smp_processor_id());
++
++ /* equevalent to atomic_dec_and_lock_irqsave() */
++ local_irq_save(flags);
++ if (likely(!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock))) {
++ if (unlikely(atomic_read(&ub->ub_refcount) < 0))
++ printk(KERN_ERR "UB: Bad ub refcount: ub=%p, "
++ "luid=%d, ref=%d\n",
++ ub, ub->ub_uid,
++ atomic_read(&ub->ub_refcount));
++ local_irq_restore(flags);
++ return;
++ }
++
++ if (unlikely(ub == get_ub0())) {
++ printk(KERN_ERR "Trying to put ub0\n");
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return;
++ }
++
++#ifndef CONFIG_UBC_KEEP_UNUSED
++ __unhash_beancounter(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ ub_free_counters(ub);
++ kmem_cache_free(ub_cachep, ub);
++#else
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++#endif
++ ub = parent;
++ if (ub != NULL)
++ goto again;
++}
++EXPORT_SYMBOL(__put_beancounter);
++
++/*
++ * Generic resource charging stuff
++ */
++
++int __charge_beancounter_locked(struct user_beancounter *ub,
++ int resource, unsigned long val, enum severity strict)
++{
++ ub_debug_resource(resource, "Charging %lu for %d of %p with %lu\n",
++ val, resource, ub, ub->ub_parms[resource].held);
++ /*
++ * ub_value <= UB_MAXVALUE, value <= UB_MAXVALUE, and only one addition
++ * at the moment is possible so an overflow is impossible.
++ */
++ ub->ub_parms[resource].held += val;
++
++ switch (strict) {
++ case UB_HARD:
++ if (ub->ub_parms[resource].held >
++ ub->ub_parms[resource].barrier)
++ break;
++ case UB_SOFT:
++ if (ub->ub_parms[resource].held >
++ ub->ub_parms[resource].limit)
++ break;
++ case UB_FORCE:
++ ub_adjust_maxheld(ub, resource);
++ return 0;
++ default:
++ BUG();
++ }
++
++ if (strict == UB_SOFT && ub_ratelimit(&ub->ub_limit_rl))
++ printk(KERN_INFO "Fatal resource shortage: %s, UB %d.\n",
++ ub_rnames[resource], ub->ub_uid);
++ ub->ub_parms[resource].failcnt++;
++ ub->ub_parms[resource].held -= val;
++ return -ENOMEM;
++}
++
++int charge_beancounter(struct user_beancounter *ub,
++ int resource, unsigned long val, enum severity strict)
++{
++ int retval;
++ struct user_beancounter *p, *q;
++ unsigned long flags;
++
++ retval = -EINVAL;
++ if (val > UB_MAXVALUE)
++ goto out;
++
++ local_irq_save(flags);
++ for (p = ub; p != NULL; p = p->parent) {
++ spin_lock(&p->ub_lock);
++ retval = __charge_beancounter_locked(p, resource, val, strict);
++ spin_unlock(&p->ub_lock);
++ if (retval)
++ goto unroll;
++ }
++out_restore:
++ local_irq_restore(flags);
++out:
++ return retval;
++
++unroll:
++ for (q = ub; q != p; q = q->parent) {
++ spin_lock(&q->ub_lock);
++ __uncharge_beancounter_locked(q, resource, val);
++ spin_unlock(&q->ub_lock);
++ }
++ goto out_restore;
++}
++
++EXPORT_SYMBOL(charge_beancounter);
++
++void charge_beancounter_notop(struct user_beancounter *ub,
++ int resource, unsigned long val)
++{
++ struct user_beancounter *p;
++ unsigned long flags;
++
++ local_irq_save(flags);
++ for (p = ub; p->parent != NULL; p = p->parent) {
++ spin_lock(&p->ub_lock);
++ __charge_beancounter_locked(p, resource, val, UB_FORCE);
++ spin_unlock(&p->ub_lock);
++ }
++ local_irq_restore(flags);
++}
++
++EXPORT_SYMBOL(charge_beancounter_notop);
++
++void uncharge_warn(struct user_beancounter *ub, int resource,
++ unsigned long val, unsigned long held)
++{
++ char id[64];
++
++ print_ub_uid(ub, id, sizeof(id));
++ printk(KERN_ERR "Uncharging too much %lu h %lu, res %s ub %s\n",
++ val, held, ub_rnames[resource], id);
++ ub_debug_trace(1, 10, 10*HZ);
++}
++
++void __uncharge_beancounter_locked(struct user_beancounter *ub,
++ int resource, unsigned long val)
++{
++ ub_debug_resource(resource, "Uncharging %lu for %d of %p with %lu\n",
++ val, resource, ub, ub->ub_parms[resource].held);
++ if (ub->ub_parms[resource].held < val) {
++ uncharge_warn(ub, resource,
++ val, ub->ub_parms[resource].held);
++ val = ub->ub_parms[resource].held;
++ }
++ ub->ub_parms[resource].held -= val;
++}
++
++void uncharge_beancounter(struct user_beancounter *ub,
++ int resource, unsigned long val)
++{
++ unsigned long flags;
++ struct user_beancounter *p;
++
++ for (p = ub; p != NULL; p = p->parent) {
++ spin_lock_irqsave(&p->ub_lock, flags);
++ __uncharge_beancounter_locked(p, resource, val);
++ spin_unlock_irqrestore(&p->ub_lock, flags);
++ }
++}
++
++EXPORT_SYMBOL(uncharge_beancounter);
++
++void uncharge_beancounter_notop(struct user_beancounter *ub,
++ int resource, unsigned long val)
++{
++ struct user_beancounter *p;
++ unsigned long flags;
++
++ local_irq_save(flags);
++ for (p = ub; p->parent != NULL; p = p->parent) {
++ spin_lock(&p->ub_lock);
++ __uncharge_beancounter_locked(p, resource, val);
++ spin_unlock(&p->ub_lock);
++ }
++ local_irq_restore(flags);
++}
++
++EXPORT_SYMBOL(uncharge_beancounter_notop);
++
++
++/*
++ * Rate limiting stuff.
++ */
++int ub_ratelimit(struct ub_rate_info *p)
++{
++ unsigned long cjif, djif;
++ unsigned long flags;
++ static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
++ long new_bucket;
++
++ spin_lock_irqsave(&ratelimit_lock, flags);
++ cjif = jiffies;
++ djif = cjif - p->last;
++ if (djif < p->interval) {
++ if (p->bucket >= p->burst) {
++ spin_unlock_irqrestore(&ratelimit_lock, flags);
++ return 0;
++ }
++ p->bucket++;
++ } else {
++ new_bucket = p->bucket - (djif / (unsigned)p->interval);
++ if (new_bucket < 0)
++ new_bucket = 0;
++ p->bucket = new_bucket + 1;
++ }
++ p->last = cjif;
++ spin_unlock_irqrestore(&ratelimit_lock, flags);
++ return 1;
++}
++EXPORT_SYMBOL(ub_ratelimit);
++
++
++/*
++ * Initialization
++ *
++ * struct user_beancounter contains
++ * - limits and other configuration settings,
++ * with a copy stored for accounting purposes,
++ * - structural fields: lists, spinlocks and so on.
++ *
++ * Before these parts are initialized, the structure should be memset
++ * to 0 or copied from a known clean structure. That takes care of a lot
++ * of fields not initialized explicitly.
++ */
++
++static void init_beancounter_struct(struct user_beancounter *ub)
++{
++ ub->ub_magic = UB_MAGIC;
++ atomic_set(&ub->ub_refcount, 1);
++ spin_lock_init(&ub->ub_lock);
++ INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
++ INIT_LIST_HEAD(&ub->ub_other_sk_list);
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ INIT_LIST_HEAD(&ub->ub_cclist);
++#endif
++}
++
++static void init_beancounter_store(struct user_beancounter *ub)
++{
++ int k;
++
++ for (k = 0; k < UB_RESOURCES; k++) {
++ memcpy(&ub->ub_store[k], &ub->ub_parms[k],
++ sizeof(struct ubparm));
++ }
++}
++
++static void init_beancounter_nolimits(struct user_beancounter *ub)
++{
++ int k;
++
++ for (k = 0; k < UB_RESOURCES; k++) {
++ ub->ub_parms[k].limit = UB_MAXVALUE;
++ /* FIXME: whether this is right for physpages and guarantees? */
++ ub->ub_parms[k].barrier = UB_MAXVALUE;
++ }
++
++ /* FIXME: set unlimited rate? */
++ ub->ub_limit_rl.burst = 4;
++ ub->ub_limit_rl.interval = 300*HZ;
++}
++
++static void init_beancounter_syslimits(struct user_beancounter *ub,
++ unsigned long mp)
++{
++ extern int max_threads;
++ int k;
++
++ ub->ub_parms[UB_KMEMSIZE].limit =
++ mp > (192*1024*1024 >> PAGE_SHIFT) ?
++ 32*1024*1024 : (mp << PAGE_SHIFT) / 6;
++ ub->ub_parms[UB_LOCKEDPAGES].limit = 8;
++ ub->ub_parms[UB_PRIVVMPAGES].limit = UB_MAXVALUE;
++ ub->ub_parms[UB_SHMPAGES].limit = 64;
++ ub->ub_parms[UB_NUMPROC].limit = max_threads / 2;
++ ub->ub_parms[UB_NUMTCPSOCK].limit = 1024;
++ ub->ub_parms[UB_TCPSNDBUF].limit = 1024*4*1024; /* 4k per socket */
++ ub->ub_parms[UB_TCPRCVBUF].limit = 1024*6*1024; /* 6k per socket */
++ ub->ub_parms[UB_NUMOTHERSOCK].limit = 256;
++ ub->ub_parms[UB_DGRAMRCVBUF].limit = 256*4*1024; /* 4k per socket */
++ ub->ub_parms[UB_OTHERSOCKBUF].limit = 256*8*1024; /* 8k per socket */
++ ub->ub_parms[UB_NUMFLOCK].limit = 1024;
++ ub->ub_parms[UB_NUMPTY].limit = 16;
++ ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
++ ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
++ ub->ub_parms[UB_NUMFILE].limit = 1024;
++
++ for (k = 0; k < UB_RESOURCES; k++)
++ ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
++
++ ub->ub_limit_rl.burst = 4;
++ ub->ub_limit_rl.interval = 300*HZ;
++}
++
++void __init ub_init_ub0(void)
++{
++ struct user_beancounter *ub;
++
++ init_cache_counters();
++ ub = get_ub0();
++ memset(ub, 0, sizeof(*ub));
++ ub->ub_uid = 0;
++ init_beancounter_nolimits(ub);
++ init_beancounter_store(ub);
++ init_beancounter_struct(ub);
++
++ memset(&current->task_bc, 0, sizeof(struct task_beancounter));
++ (void)set_exec_ub(get_ub0());
++ current->task_bc.fork_sub = get_beancounter(get_ub0());
++ init_mm.mm_ub = get_beancounter(ub);
++}
++
++void __init ub_hash_init(void)
++{
++ struct ub_hash_slot *slot;
++
++ spin_lock_init(&ub_hash_lock);
++ /* insert ub0 into the hash */
++ slot = &ub_hash[ub_hash_fun(get_ub0()->ub_uid)];
++ slot->ubh_beans = get_ub0();
++}
++
++void __init ub_init_cache(unsigned long mempages)
++{
++ extern int skbc_cache_init(void);
++ int res;
++
++ res = skbc_cache_init();
++ ub_cachep = kmem_cache_create("user_beancounters",
++ sizeof(struct user_beancounter),
++ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++ if (res < 0 || ub_cachep == NULL)
++ panic("Can't create ubc caches\n");
++
++ memset(&default_beancounter, 0, sizeof(default_beancounter));
++#ifdef CONFIG_UBC_UNLIMITED
++ init_beancounter_nolimits(&default_beancounter);
++#else
++ init_beancounter_syslimits(&default_beancounter, mempages);
++#endif
++ init_beancounter_store(&default_beancounter);
++ init_beancounter_struct(&default_beancounter);
++
++ ub_hash_init();
++}
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_dcache.c linux-2.6.15-ve025stab014/kernel/ub/ub_dcache.c
+--- linux-2.6.15.orig/kernel/ub/ub_dcache.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_dcache.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,325 @@
++/*
++ * kernel/ub/ub_dcache.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/dcache.h>
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/fs.h>
++#include <linux/err.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_dcache.h>
++
++/*
++ * Locking
++ * traverse dcache_lock d_lock
++ * ub_dentry_charge + + +
++ * ub_dentry_uncharge + - +
++ * ub_dentry_charge_nofail + + -
++ *
++ * d_inuse is atomic so that we can inc dentry's parent d_inuse in
++ * ub_dentry_charhe with the only dentry's d_lock held.
++ *
++ * Race in uncharge vs charge_nofail is handled with dcache_lock.
++ * Race in charge vs charge_nofail is inessential since they both inc d_inuse.
++ * Race in uncharge vs charge is handled by altering d_inuse under d_lock.
++ *
++ * Race with d_move is handled this way:
++ * - charge_nofail and uncharge are protected by dcache_lock;
++ * - charge works only with dentry and dentry->d_parent->d_inuse, so
++ * it's enough to lock only the dentry.
++ */
++
++/*
++ * Beancounting
++ * UB argument must NOT be NULL
++ */
++
++static int do_charge_dcache(struct user_beancounter *ub, unsigned long size,
++ enum severity sv)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (__charge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size), sv))
++ goto out_mem;
++ if (__charge_beancounter_locked(ub, UB_DCACHESIZE, size, sv))
++ goto out_dcache;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return 0;
++
++out_dcache:
++ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
++out_mem:
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return -ENOMEM;
++}
++
++static void do_uncharge_dcache(struct user_beancounter *ub,
++ unsigned long size)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
++ __uncharge_beancounter_locked(ub, UB_DCACHESIZE, size);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static int charge_dcache(struct user_beancounter *ub, unsigned long size,
++ enum severity sv)
++{
++ struct user_beancounter *p, *q;
++
++ for (p = ub; p != NULL; p = p->parent) {
++ if (do_charge_dcache(p, size, sv))
++ goto unroll;
++ }
++ return 0;
++
++unroll:
++ for (q = ub; q != p; q = q->parent)
++ do_uncharge_dcache(q, size);
++ return -ENOMEM;
++}
++
++void uncharge_dcache(struct user_beancounter *ub, unsigned long size)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_uncharge_dcache(ub, size);
++}
++
++static inline void charge_dcache_forced(struct user_beancounter *ub,
++ unsigned long size)
++{
++ charge_dcache(ub, size, UB_FORCE);
++}
++
++static inline void d_forced_charge(struct dentry_beancounter *d_bc)
++{
++ d_bc->d_ub = get_beancounter(get_exec_ub());
++ if (d_bc->d_ub == NULL)
++ return;
++
++ charge_dcache_forced(d_bc->d_ub, d_bc->d_ubsize);
++}
++
++static inline void d_uncharge(struct dentry_beancounter *d_bc)
++{
++ if (d_bc->d_ub == NULL)
++ return;
++
++ uncharge_dcache(d_bc->d_ub, d_bc->d_ubsize);
++ put_beancounter(d_bc->d_ub);
++ d_bc->d_ub = NULL;
++}
++
++/*
++ * Alloc / free dentry_beancounter
++ */
++
++static inline int d_alloc_beancounter(struct dentry *d)
++{
++ return 0;
++}
++
++static inline void d_free_beancounter(struct dentry_beancounter *d_bc)
++{
++}
++
++static inline unsigned long d_charge_size(struct dentry *dentry)
++{
++ /* dentry's d_name is already set to appropriate value (see d_alloc) */
++ return inode_cachep->objuse + dentry_cache->objuse +
++ (dname_external(dentry) ?
++ kmem_obj_memusage((void *)dentry->d_name.name) : 0);
++}
++
++/*
++ * dentry mark in use operation
++ * d_lock is held
++ */
++
++static int d_inc_inuse(struct dentry *dentry)
++{
++ struct user_beancounter *ub;
++ struct dentry_beancounter *d_bc;
++
++ if (dentry != dentry->d_parent) {
++ struct dentry *parent;
++
++ /*
++ * Increment d_inuse of parent.
++ * It can't change since dentry->d_lock is held.
++ */
++ parent = dentry->d_parent;
++ if (ub_dget_testone(parent))
++ BUG();
++ }
++
++ d_bc = &dentry->dentry_bc;
++ ub = get_beancounter(get_exec_ub());
++
++ if (ub != NULL && charge_dcache(ub, d_bc->d_ubsize, UB_SOFT))
++ goto out_err;
++
++ d_bc->d_ub = ub;
++ return 0;
++
++out_err:
++ put_beancounter(ub);
++ d_bc->d_ub = NULL;
++ return -ENOMEM;
++}
++
++/*
++ * no locks
++ */
++int ub_dentry_alloc(struct dentry *dentry)
++{
++ int err;
++ struct dentry_beancounter *d_bc;
++
++ err = d_alloc_beancounter(dentry);
++ if (err < 0)
++ return err;
++
++ d_bc = &dentry->dentry_bc;
++ d_bc->d_ub = get_beancounter(get_exec_ub());
++ atomic_set(&d_bc->d_inuse, INUSE_INIT); /* see comment in ub_dcache.h */
++ d_bc->d_ubsize = d_charge_size(dentry);
++
++ err = 0;
++ if (d_bc->d_ub != NULL &&
++ charge_dcache(d_bc->d_ub, d_bc->d_ubsize, UB_HARD)) {
++ put_beancounter(d_bc->d_ub);
++ d_free_beancounter(d_bc);
++ err = -ENOMEM;
++ }
++
++ return err;
++}
++
++/*
++ * Charge / uncharge functions.
++ *
++ * We take d_lock to protect dentry_bc from concurrent acces
++ * when simultaneous __d_lookup and d_put happens on one dentry.
++ */
++
++/*
++ * no dcache_lock, d_lock and rcu_read_lock are held
++ * drops d_lock, rcu_read_lock and returns error if any
++ */
++int ub_dentry_charge(struct dentry *dentry)
++{
++ int err;
++
++ err = 0;
++ if (ub_dget_testone(dentry))
++ err = d_inc_inuse(dentry);
++
++ /*
++ * d_lock and rcu_read_lock are dropped here
++ * (see also __d_lookup)
++ */
++ spin_unlock(&dentry->d_lock);
++ rcu_read_unlock();
++
++ if (!err)
++ return 0;
++
++ /*
++ * d_invlaidate is required for real_lookup
++ * since it tries to create new dentry on
++ * d_lookup failure.
++ */
++ if (!d_invalidate(dentry))
++ return err;
++
++ /* didn't succeeded, force dentry to be charged */
++ d_forced_charge(&dentry->dentry_bc);
++ return 0;
++}
++
++/*
++ * dcache_lock is held
++ * no d_locks, sequentaly takes and drops from dentry upward
++ */
++void ub_dentry_uncharge(struct dentry *dentry)
++{
++ struct dentry *parent;
++
++ /* go up until status is changed and root is not reached */
++ while (1) {
++ /*
++ * We need d_lock here to handle
++ * the race with ub_dentry_charge
++ */
++ spin_lock(&dentry->d_lock);
++ if (!ub_dput_testzero(dentry)) {
++ spin_unlock(&dentry->d_lock);
++ break;
++ }
++
++ /* state transition 0 => -1 */
++ d_uncharge(&dentry->dentry_bc);
++ parent = dentry->d_parent;
++ spin_unlock(&dentry->d_lock);
++
++ /*
++ * dcache_lock is held (see comment in __dget_locked)
++ * so we can safely move upwards.
++ */
++ if (dentry == parent)
++ break;
++ dentry = parent;
++ }
++}
++
++/*
++ * forced version. for dget in clean cache, when error is not an option
++ *
++ * dcache_lock is held
++ * no d_locks
++ */
++void ub_dentry_charge_nofail(struct dentry *dentry)
++{
++ struct dentry *parent;
++
++ /* go up until status is changed and root is not reached */
++ while (1) {
++ if (!ub_dget_testone(dentry))
++ break;
++
++ /*
++ * state transition -1 => 0
++ *
++ * No need to lock dentry before atomic_inc
++ * like we do in ub_dentry_uncharge.
++ * We can't race with ub_dentry_uncharge due
++ * to dcache_lock. The only possible race with
++ * ub_dentry_charge is OK since they both
++ * do atomic_inc.
++ */
++ d_forced_charge(&dentry->dentry_bc);
++ /*
++ * dcache_lock is held (see comment in __dget_locked)
++ * so we can safely move upwards.
++ */
++ parent = dentry->d_parent;
++
++ if (dentry == parent)
++ break;
++ dentry = parent;
++ }
++}
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_mem.c linux-2.6.15-ve025stab014/kernel/ub/ub_mem.c
+--- linux-2.6.15.orig/kernel/ub/ub_mem.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_mem.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,388 @@
++/*
++ * kernel/ub/ub_mem.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/kmem_slab.h>
++#include <linux/highmem.h>
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include <linux/gfp.h>
++#include <linux/swap.h>
++#include <linux/spinlock.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_hash.h>
++
++/*
++ * Initialization
++ */
++
++/*
++ * Slab accounting
++ */
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++
++#define CC_HASH_SIZE 1024
++static struct ub_cache_counter *cc_hash[CC_HASH_SIZE];
++spinlock_t cc_lock;
++
++static void __free_cache_counters(struct user_beancounter *ub,
++ kmem_cache_t *cachep)
++{
++ struct ub_cache_counter *cc, **pprev, *del;
++ int i;
++ unsigned long flags;
++
++ del = NULL;
++ spin_lock_irqsave(&cc_lock, flags);
++ for (i = 0; i < CC_HASH_SIZE; i++) {
++ pprev = &cc_hash[i];
++ cc = cc_hash[i];
++ while (cc != NULL) {
++ if (cc->ub != ub && cc->cachep != cachep) {
++ pprev = &cc->next;
++ cc = cc->next;
++ continue;
++ }
++
++ list_del(&cc->ulist);
++ *pprev = cc->next;
++ cc->next = del;
++ del = cc;
++ cc = *pprev;
++ }
++ }
++ spin_unlock_irqrestore(&cc_lock, flags);
++
++ while (del != NULL) {
++ cc = del->next;
++ kfree(del);
++ del = cc;
++ }
++}
++
++void ub_free_counters(struct user_beancounter *ub)
++{
++ __free_cache_counters(ub, NULL);
++}
++
++void ub_kmemcache_free(kmem_cache_t *cachep)
++{
++ __free_cache_counters(NULL, cachep);
++}
++
++void __init init_cache_counters(void)
++{
++ memset(cc_hash, 0, CC_HASH_SIZE * sizeof(cc_hash[0]));
++ spin_lock_init(&cc_lock);
++}
++
++#define cc_hash_fun(ub, cachep) ( \
++ (((unsigned long)(ub) >> L1_CACHE_SHIFT) ^ \
++ ((unsigned long)(ub) >> (BITS_PER_LONG / 2)) ^ \
++ ((unsigned long)(cachep) >> L1_CACHE_SHIFT) ^ \
++ ((unsigned long)(cachep) >> (BITS_PER_LONG / 2)) \
++ ) & (CC_HASH_SIZE - 1))
++
++static int change_slab_charged(struct user_beancounter *ub, void *objp,
++ unsigned long val, int mask)
++{
++ struct ub_cache_counter *cc, *new_cnt, **pprev;
++ kmem_cache_t *cachep;
++ unsigned long flags;
++
++ cachep = GET_PAGE_CACHE(virt_to_page(objp));
++ new_cnt = NULL;
++
++again:
++ spin_lock_irqsave(&cc_lock, flags);
++ cc = cc_hash[cc_hash_fun(ub, cachep)];
++ while (cc) {
++ if (cc->ub == ub && cc->cachep == cachep)
++ goto found;
++ cc = cc->next;
++ }
++
++ if (new_cnt != NULL)
++ goto insert;
++
++ spin_unlock_irqrestore(&cc_lock, flags);
++
++ new_cnt = kmalloc(sizeof(*new_cnt), mask & ~__GFP_UBC);
++ if (new_cnt == NULL)
++ return -ENOMEM;
++
++ new_cnt->counter = 0;
++ new_cnt->ub = ub;
++ new_cnt->cachep = cachep;
++ goto again;
++
++insert:
++ pprev = &cc_hash[cc_hash_fun(ub, cachep)];
++ new_cnt->next = *pprev;
++ *pprev = new_cnt;
++ list_add(&new_cnt->ulist, &ub->ub_cclist);
++ cc = new_cnt;
++ new_cnt = NULL;
++
++found:
++ cc->counter += val;
++ spin_unlock_irqrestore(&cc_lock, flags);
++ if (new_cnt)
++ kfree(new_cnt);
++ return 0;
++}
++
++static inline int inc_slab_charged(struct user_beancounter *ub,
++ void *objp, int mask)
++{
++ return change_slab_charged(ub, objp, 1, mask);
++}
++
++static inline void dec_slab_charged(struct user_beancounter *ub, void *objp)
++{
++ if (change_slab_charged(ub, objp, -1, 0) < 0)
++ BUG();
++}
++
++#include <linux/vmalloc.h>
++
++static inline int inc_pages_charged(struct user_beancounter *ub,
++ struct page *pg, int order)
++{
++ int cpu;
++
++ cpu = get_cpu();
++ ub->ub_stat[cpu].pages_charged++;
++ put_cpu();
++ return 0;
++}
++
++static inline void dec_pages_charged(struct user_beancounter *ub,
++ struct page *pg, int order)
++{
++ int cpu;
++
++ cpu = get_cpu();
++ ub->ub_stat[cpu].pages_charged--;
++ put_cpu();
++}
++
++void inc_vmalloc_charged(struct vm_struct *vm, int flags)
++{
++ int cpu;
++ struct user_beancounter *ub;
++
++ if (!(flags & __GFP_UBC))
++ return;
++
++ ub = get_exec_ub();
++ if (ub == NULL)
++ return;
++
++ cpu = get_cpu();
++ ub->ub_stat[cpu].vmalloc_charged += vm->nr_pages;
++ put_cpu();
++}
++
++void dec_vmalloc_charged(struct vm_struct *vm)
++{
++ int cpu;
++ struct user_beancounter *ub;
++
++ ub = page_ub(vm->pages[0]);
++ if (ub == NULL)
++ return;
++
++ cpu = get_cpu();
++ ub->ub_stat[cpu].vmalloc_charged -= vm->nr_pages;
++ put_cpu();
++}
++
++#else
++#define inc_slab_charged(ub, o, m) (0)
++#define dec_slab_charged(ub, o) do { } while (0)
++#define inc_pages_charged(ub, pg, o) (0)
++#define dec_pages_charged(ub, pg, o) do { } while (0)
++#endif
++
++static inline struct user_beancounter **slab_ub_ref(void *objp)
++{
++ struct page *pg;
++ kmem_cache_t *cachep;
++ struct slab *slabp;
++ int objnr;
++
++ pg = virt_to_page(objp);
++ cachep = GET_PAGE_CACHE(pg);
++ BUG_ON(!(cachep->flags & SLAB_UBC));
++ slabp = GET_PAGE_SLAB(pg);
++ objnr = (objp - slabp->s_mem) / cachep->objsize;
++ return slab_ubcs(cachep, slabp) + objnr;
++}
++
++struct user_beancounter *slab_ub(void *objp)
++{
++ struct user_beancounter **ub_ref;
++
++ ub_ref = slab_ub_ref(objp);
++ return *ub_ref;
++}
++
++EXPORT_SYMBOL(slab_ub);
++
++static inline int should_charge(void *objp, int flags)
++{
++ kmem_cache_t *cachep;
++
++ cachep = GET_PAGE_CACHE(virt_to_page(objp));
++ if (!(cachep->flags & SLAB_UBC))
++ return 0;
++ if ((cachep->flags & SLAB_NO_CHARGE) && !(flags & __GFP_UBC))
++ return 0;
++ return 1;
++}
++
++#define should_uncharge(objp) should_charge(objp, __GFP_UBC)
++
++int ub_slab_charge(void *objp, int flags)
++{
++ unsigned int size;
++ struct user_beancounter *ub;
++
++ if (!should_charge(objp, flags))
++ return 0;
++
++ ub = get_beancounter(get_exec_ub());
++ if (ub == NULL)
++ return 0;
++
++ size = CHARGE_SIZE(kmem_obj_memusage(objp));
++ if (charge_beancounter(ub, UB_KMEMSIZE, size,
++ (flags & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
++ goto out_err;
++
++ if (inc_slab_charged(ub, objp, flags) < 0) {
++ uncharge_beancounter(ub, UB_KMEMSIZE, size);
++ goto out_err;
++ }
++ *slab_ub_ref(objp) = ub;
++ return 0;
++
++out_err:
++ put_beancounter(ub);
++ return -ENOMEM;
++}
++
++void ub_slab_uncharge(void *objp)
++{
++ unsigned int size;
++ struct user_beancounter **ub_ref;
++
++ if (!should_uncharge(objp))
++ return;
++
++ ub_ref = slab_ub_ref(objp);
++ if (*ub_ref == NULL)
++ return;
++
++ dec_slab_charged(*ub_ref, objp);
++ size = CHARGE_SIZE(kmem_obj_memusage(objp));
++ uncharge_beancounter(*ub_ref, UB_KMEMSIZE, size);
++ put_beancounter(*ub_ref);
++ *ub_ref = NULL;
++}
++
++/*
++ * Pages accounting
++ */
++
++inline int ub_page_charge(struct page *page, int order, int mask)
++{
++ struct user_beancounter *ub;
++
++ ub = NULL;
++ if (!(mask & __GFP_UBC))
++ goto out;
++
++ ub = get_beancounter(get_exec_ub());
++ if (ub == NULL)
++ goto out;
++
++ if (charge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order),
++ (mask & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
++ goto err;
++ if (inc_pages_charged(ub, page, order) < 0) {
++ uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
++ goto err;
++ }
++out:
++ BUG_ON(page_ub(page) != NULL);
++ page_ub(page) = ub;
++ return 0;
++
++err:
++ BUG_ON(page_ub(page) != NULL);
++ put_beancounter(ub);
++ return -ENOMEM;
++}
++
++inline void ub_page_uncharge(struct page *page, int order)
++{
++ struct user_beancounter *ub;
++
++ ub = page_ub(page);
++ if (ub == NULL)
++ return;
++
++ dec_pages_charged(ub, page, order);
++ BUG_ON(ub->ub_magic != UB_MAGIC);
++ uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
++ put_beancounter(ub);
++ page_ub(page) = NULL;
++}
++
++/*
++ * takes init_mm.page_table_lock
++ * some outer lock to protect pages from vmalloced area must be held
++ */
++struct user_beancounter *vmalloc_ub(void *obj)
++{
++ struct page *pg;
++
++ spin_lock(&init_mm.page_table_lock);
++ pg = follow_page(NULL, (unsigned long)obj, FOLL_KERN);
++ spin_unlock(&init_mm.page_table_lock);
++ if (pg == NULL)
++ return NULL;
++
++ return page_ub(pg);
++}
++
++EXPORT_SYMBOL(vmalloc_ub);
++
++struct user_beancounter *mem_ub(void *obj)
++{
++ struct user_beancounter *ub;
++
++ if ((unsigned long)obj >= VMALLOC_START &&
++ (unsigned long)obj < VMALLOC_END)
++ ub = vmalloc_ub(obj);
++ else
++ ub = slab_ub(obj);
++
++ return ub;
++}
++
++EXPORT_SYMBOL(mem_ub);
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_misc.c linux-2.6.15-ve025stab014/kernel/ub/ub_misc.c
+--- linux-2.6.15.orig/kernel/ub/ub_misc.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_misc.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,242 @@
++/*
++ * kernel/ub/ub_misc.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/tty.h>
++#include <linux/tty_driver.h>
++#include <linux/signal.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/sched.h>
++#include <linux/kmem_cache.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++
++/*
++ * Task staff
++ */
++
++static void init_task_sub(struct task_struct *tsk,
++ struct task_beancounter *old_bc)
++{
++ struct task_beancounter *new_bc;
++ struct user_beancounter *sub;
++
++ new_bc = &tsk->task_bc;
++ sub = old_bc->fork_sub;
++ new_bc->fork_sub = get_beancounter(sub);
++ new_bc->task_fnode = NULL;
++ new_bc->task_freserv = old_bc->task_freserv;
++ old_bc->task_freserv = NULL;
++ memset(&new_bc->task_data, 0, sizeof(new_bc->task_data));
++}
++
++int ub_task_charge(struct task_struct *parent, struct task_struct *task)
++{
++ struct task_beancounter *old_bc;
++ struct task_beancounter *new_bc;
++ struct user_beancounter *ub;
++
++ old_bc = &parent->task_bc;
++#if 0
++ if (old_bc->exec_ub == NULL) {
++ /* FIXME: this won't work if task_bc is outside task_struct */
++ init_task_sub(task, old_bc);
++ return 0;
++ }
++#endif
++ ub = old_bc->fork_sub;
++
++ if (charge_beancounter(ub, UB_NUMPROC, 1, UB_HARD) < 0)
++ return -ENOMEM;
++
++ new_bc = &task->task_bc;
++ new_bc->task_ub = get_beancounter(ub);
++ new_bc->exec_ub = get_beancounter(ub);
++ init_task_sub(task, old_bc);
++ return 0;
++}
++
++void ub_task_uncharge(struct task_struct *task)
++{
++ struct task_beancounter *task_bc;
++
++ task_bc = &task->task_bc;
++ if (task_bc->task_ub != NULL)
++ uncharge_beancounter(task_bc->task_ub, UB_NUMPROC, 1);
++
++ put_beancounter(task_bc->exec_ub);
++ put_beancounter(task_bc->task_ub);
++ put_beancounter(task_bc->fork_sub);
++ /* can't be freed elsewhere, failures possible in the middle of fork */
++ if (task_bc->task_freserv != NULL)
++ kfree(task_bc->task_freserv);
++
++ task_bc->exec_ub = (struct user_beancounter *)0xdeadbcbc;
++}
++
++/*
++ * Files and file locks.
++ */
++
++int ub_file_charge(struct file *f)
++{
++ struct user_beancounter *ub;
++
++ /* No need to get_beancounter here since it's already got in slab */
++ ub = slab_ub(f);
++ if (ub == NULL)
++ return 0;
++
++ return charge_beancounter(ub, UB_NUMFILE, 1, UB_HARD);
++}
++
++void ub_file_uncharge(struct file *f)
++{
++ struct user_beancounter *ub;
++
++ /* Ub will be put in slab */
++ ub = slab_ub(f);
++ if (ub == NULL)
++ return;
++
++ uncharge_beancounter(ub, UB_NUMFILE, 1);
++}
++
++int ub_flock_charge(struct file_lock *fl, int hard)
++{
++ struct user_beancounter *ub;
++ int err;
++
++ /* No need to get_beancounter here since it's already got in slab */
++ ub = slab_ub(fl);
++ if (ub == NULL)
++ return 0;
++
++ err = charge_beancounter(ub, UB_NUMFLOCK, 1, hard ? UB_HARD : UB_SOFT);
++ if (!err)
++ fl->fl_charged = 1;
++ return err;
++}
++
++void ub_flock_uncharge(struct file_lock *fl)
++{
++ struct user_beancounter *ub;
++
++ /* Ub will be put in slab */
++ ub = slab_ub(fl);
++ if (ub == NULL || !fl->fl_charged)
++ return;
++
++ uncharge_beancounter(ub, UB_NUMFLOCK, 1);
++ fl->fl_charged = 0;
++}
++
++/*
++ * Signal handling
++ */
++
++static int do_ub_siginfo_charge(struct user_beancounter *ub,
++ unsigned long size)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (__charge_beancounter_locked(ub, UB_KMEMSIZE, size, UB_HARD))
++ goto out_kmem;
++
++ if (__charge_beancounter_locked(ub, UB_NUMSIGINFO, 1, UB_HARD))
++ goto out_num;
++
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return 0;
++
++out_num:
++ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
++out_kmem:
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return -ENOMEM;
++}
++
++static void do_ub_siginfo_uncharge(struct user_beancounter *ub,
++ unsigned long size)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
++ __uncharge_beancounter_locked(ub, UB_NUMSIGINFO, 1);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_siginfo_charge(struct sigqueue *sq, struct user_beancounter *ub)
++{
++ unsigned long size;
++ struct user_beancounter *p, *q;
++
++ size = CHARGE_SIZE(kmem_obj_memusage(sq));
++ for (p = ub; p != NULL; p = p->parent) {
++ if (do_ub_siginfo_charge(p, size))
++ goto unroll;
++ }
++
++ sq->sig_ub = get_beancounter(ub);
++ return 0;
++
++unroll:
++ for (q = ub; q != p; q = q->parent)
++ do_ub_siginfo_uncharge(q, size);
++ return -ENOMEM;
++}
++
++void ub_siginfo_uncharge(struct sigqueue *sq)
++{
++ unsigned long size;
++ struct user_beancounter *ub, *p;
++
++ p = ub = sq->sig_ub;
++ sq->sig_ub = NULL;
++ size = CHARGE_SIZE(kmem_obj_memusage(sq));
++ for (; ub != NULL; ub = ub->parent)
++ do_ub_siginfo_uncharge(ub, size);
++ put_beancounter(p);
++}
++
++/*
++ * PTYs
++ */
++
++int ub_pty_charge(struct tty_struct *tty)
++{
++ struct user_beancounter *ub;
++ int retval;
++
++ ub = slab_ub(tty);
++ retval = 0;
++ if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
++ !test_bit(TTY_CHARGED, &tty->flags)) {
++ retval = charge_beancounter(ub, UB_NUMPTY, 1, UB_HARD);
++ if (!retval)
++ set_bit(TTY_CHARGED, &tty->flags);
++ }
++ return retval;
++}
++
++void ub_pty_uncharge(struct tty_struct *tty)
++{
++ struct user_beancounter *ub;
++
++ ub = slab_ub(tty);
++ if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
++ test_bit(TTY_CHARGED, &tty->flags)) {
++ uncharge_beancounter(ub, UB_NUMPTY, 1);
++ clear_bit(TTY_CHARGED, &tty->flags);
++ }
++}
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_net.c linux-2.6.15-ve025stab014/kernel/ub/ub_net.c
+--- linux-2.6.15.orig/kernel/ub/ub_net.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_net.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,1041 @@
++/*
++ * linux/kernel/ub/ub_net.c
++ *
++ * Copyright (C) 1998-2004 Andrey V. Savochkin <saw@saw.sw.com.sg>
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ * - sizeof(struct inode) charge
++ * = tcp_mem_schedule() feedback based on ub limits
++ * + measures so that one socket won't exhaust all send buffers,
++ * see bug in bugzilla
++ * = sk->socket check for NULL in snd_wakeups
++ * (tcp_write_space checks for NULL itself)
++ * + in tcp_close(), orphaned socket abortion should be based on ubc
++ * resources (same in tcp_out_of_resources)
++ * Beancounter should also have separate orphaned socket counter...
++ * + for rcv, in-order segment should be accepted
++ * if only barrier is exceeded
++ * = tcp_rmem_schedule() feedback based on ub limits
++ * - repair forward_alloc mechanism for receive buffers
++ * It's idea is that some buffer space is pre-charged so that receive fast
++ * path doesn't need to take spinlocks and do other heavy stuff
++ * + tcp_prune_queue actions based on ub limits
++ * + window adjustments depending on available buffers for receive
++ * - window adjustments depending on available buffers for send
++ * + race around usewreserv
++ * + avoid allocating new page for each tiny-gram, see letter from ANK
++ * + rename ub_sock_lock
++ * + sk->sleep wait queue probably can be used for all wakeups, and
++ * sk->ub_wait is unnecessary
++ * + for UNIX sockets, the current algorithm will lead to
++ * UB_UNIX_MINBUF-sized messages only for non-blocking case
++ * - charge for af_packet sockets
++ * + all datagram sockets should be charged to NUMUNIXSOCK
++ * - we do not charge for skb copies and clones staying in device queues
++ * + live-lock if number of sockets is big and buffer limits are small
++ * [diff-ubc-dbllim3]
++ * - check that multiple readers/writers on the same socket won't cause fatal
++ * consequences
++ * - check allocation/charge orders
++ * + There is potential problem with callback_lock. In *snd_wakeup we take
++ * beancounter first, in sock_def_error_report - callback_lock first.
++ * then beancounter. This is not a problem if callback_lock taken
++ * readonly, but anyway...
++ * - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
++ * General kernel problems:
++ * - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
++ * notification won't get signals
++ * - datagram_poll looks racy
++ *
++ */
++
++#include <linux/net.h>
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/gfp.h>
++#include <linux/err.h>
++#include <linux/socket.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++
++#include <net/sock.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_net.h>
++#include <ub/ub_debug.h>
++
++
++/* Skb truesize definition. Bad place. Den */
++
++static inline int skb_chargesize_head(struct sk_buff *skb)
++{
++ return skb_charge_size(skb->end - skb->head +
++ sizeof(struct skb_shared_info));
++}
++
++int skb_charge_fullsize(struct sk_buff *skb)
++{
++ int chargesize;
++ struct sk_buff *skbfrag;
++
++ chargesize = skb_chargesize_head(skb) +
++ PAGE_SIZE * skb_shinfo(skb)->nr_frags;
++ if (likely(skb_shinfo(skb)->frag_list == NULL))
++ return chargesize;
++ for (skbfrag = skb_shinfo(skb)->frag_list;
++ skbfrag != NULL;
++ skbfrag = skbfrag->next) {
++ chargesize += skb_charge_fullsize(skbfrag);
++ }
++ return chargesize;
++}
++EXPORT_SYMBOL(skb_charge_fullsize);
++
++static int ub_sock_makewreserv_locked(struct sock *sk,
++ int bufid, int sockid, unsigned long size);
++
++int __ub_too_many_orphans(struct sock *sk, int count)
++{
++ struct user_beancounter *ub;
++
++ if (sock_has_ubc(sk)) {
++ for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
++ if (count >= ub->ub_parms[UB_NUMTCPSOCK].barrier >> 2)
++ return 1;
++ }
++ return 0;
++}
++
++/*
++ * Queueing
++ */
++
++static void ub_sock_snd_wakeup(struct user_beancounter *ub)
++{
++ struct list_head *p;
++ struct sock_beancounter *skbc;
++ struct sock *sk;
++ struct user_beancounter *cub;
++ unsigned long added;
++
++ while (!list_empty(&ub->ub_other_sk_list)) {
++ p = ub->ub_other_sk_list.next;
++ skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
++ sk = skbc_sock(skbc);
++ ub_debug(UBD_NET_SLEEP, "Found sock to wake up\n");
++ added = -skbc->poll_reserv;
++ if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
++ UB_NUMOTHERSOCK, skbc->ub_waitspc))
++ break;
++ added += skbc->poll_reserv;
++
++ /*
++ * See comments in ub_tcp_snd_wakeup.
++ * Locking note: both unix_write_space and
++ * sock_def_write_space take callback_lock themselves.
++ * We take it here just to be on the safe side and to
++ * act the same way as ub_tcp_snd_wakeup does.
++ */
++ sk->sk_write_space(sk);
++
++ list_del_init(&skbc->ub_sock_list);
++
++ if (skbc->ub != ub && added) {
++ cub = get_beancounter(skbc->ub);
++ spin_unlock(&ub->ub_lock);
++ charge_beancounter_notop(cub, UB_OTHERSOCKBUF, added);
++ put_beancounter(cub);
++ spin_lock(&ub->ub_lock);
++ }
++ }
++}
++
++static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
++{
++ struct list_head *p;
++ struct sock *sk;
++ struct sock_beancounter *skbc;
++ struct socket *sock;
++ struct user_beancounter *cub;
++ unsigned long added;
++
++ while (!list_empty(&ub->ub_tcp_sk_list)) {
++ p = ub->ub_tcp_sk_list.next;
++ skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
++ sk = skbc_sock(skbc);
++
++ added = 0;
++ sock = sk->sk_socket;
++ if (sock == NULL)
++ /* sk being destroyed */
++ goto cont;
++
++ ub_debug(UBD_NET_SLEEP,
++ "Checking queue, waiting %lu, reserv %lu\n",
++ skbc->ub_waitspc, skbc->poll_reserv);
++ added = -skbc->poll_reserv;
++ if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
++ UB_NUMTCPSOCK, skbc->ub_waitspc))
++ break;
++ added += skbc->poll_reserv;
++
++ /*
++ * Send async notifications and wake up.
++ * Locking note: we get callback_lock here because
++ * tcp_write_space is over-optimistic about calling context
++ * (socket lock is presumed). So we get the lock here although
++ * it belongs to the callback.
++ */
++ sk->sk_write_space(sk);
++
++cont:
++ list_del_init(&skbc->ub_sock_list);
++
++ if (skbc->ub != ub && added) {
++ cub = get_beancounter(skbc->ub);
++ spin_unlock(&ub->ub_lock);
++ charge_beancounter_notop(cub, UB_TCPSNDBUF, added);
++ put_beancounter(cub);
++ spin_lock(&ub->ub_lock);
++ }
++ }
++}
++
++void ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
++{
++ unsigned long flags;
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long added_reserv;
++
++ if (!sock_has_ubc(sk))
++ return;
++
++ skbc = sock_bc(sk);
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
++ added_reserv = -skbc->poll_reserv;
++ if (!ub_sock_makewreserv_locked(sk, res, bid2sid(res), size)) {
++ /*
++ * It looks a bit hackish, but it is compatible with both
++ * wait_for_xx_ubspace and poll.
++ * This __set_current_state is equivalent to a wakeup event
++ * right after spin_unlock_irqrestore.
++ */
++ __set_current_state(TASK_RUNNING);
++ added_reserv += skbc->poll_reserv;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ if (added_reserv)
++ charge_beancounter_notop(skbc->ub, res, added_reserv);
++ return;
++ }
++
++ ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
++ skbc->ub_waitspc = size;
++ if (!list_empty(&skbc->ub_sock_list)) {
++ ub_debug(UBD_NET_SOCKET,
++ "re-adding socket to beancounter %p.\n", ub);
++ goto out;
++ }
++
++ switch (res) {
++ case UB_TCPSNDBUF:
++ list_add_tail(&skbc->ub_sock_list,
++ &ub->ub_tcp_sk_list);
++ break;
++ case UB_OTHERSOCKBUF:
++ list_add_tail(&skbc->ub_sock_list,
++ &ub->ub_other_sk_list);
++ break;
++ default:
++ BUG();
++ }
++out:
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++
++/*
++ * Helpers
++ */
++
++void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
++ unsigned long size, int resource)
++{
++ if (!sock_has_ubc(sk))
++ return;
++
++ if (sock_bc(sk)->ub == NULL)
++ BUG();
++ skb_bc(skb)->ub = sock_bc(sk)->ub;
++ skb_bc(skb)->charged = size;
++ skb_bc(skb)->resource = resource;
++
++ /* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
++ if (skb->sk == NULL)
++ skb->sk = sk;
++}
++
++static inline void ub_skb_set_uncharge(struct sk_buff *skb)
++{
++ skb_bc(skb)->ub = NULL;
++ skb_bc(skb)->charged = 0;
++ skb_bc(skb)->resource = 0;
++}
++
++static inline void __uncharge_sockbuf(struct sock_beancounter *skbc,
++ struct user_beancounter *ub, int resource, unsigned long size)
++{
++ if (ub != NULL)
++ __uncharge_beancounter_locked(ub, resource, size);
++
++ if (skbc != NULL) {
++ if (skbc->ub_wcharged > size)
++ skbc->ub_wcharged -= size;
++ else
++ skbc->ub_wcharged = 0;
++ }
++}
++
++static void ub_update_rmem_thres(struct sock_beancounter *skub)
++{
++ struct user_beancounter *ub;
++
++ if (skub && skub->ub) {
++ for (ub = skub->ub; ub->parent != NULL; ub = ub->parent);
++ ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
++ (ub->ub_parms[UB_NUMTCPSOCK].held + 1);
++ }
++}
++inline int ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask)
++{
++ memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
++ return 0;
++}
++
++inline void ub_skb_free_bc(struct sk_buff *skb)
++{
++}
++
++
++/*
++ * Charge socket number
++ */
++
++static inline int sk_alloc_beancounter(struct sock *sk)
++{
++ struct sock_beancounter *skbc;
++
++ skbc = sock_bc(sk);
++ memset(skbc, 0, sizeof(struct sock_beancounter));
++ return 0;
++}
++
++static inline void sk_free_beancounter(struct sock *sk)
++{
++}
++
++static int __sock_charge(struct sock *sk, int res)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++
++ ub = get_exec_ub();
++ if (ub == NULL)
++ return 0;
++ if (sk_alloc_beancounter(sk) < 0)
++ return -ENOMEM;
++
++ skbc = sock_bc(sk);
++ INIT_LIST_HEAD(&skbc->ub_sock_list);
++
++ if (charge_beancounter(ub, res, 1, UB_HARD) < 0)
++ goto out_limit;
++
++ /* TCP listen sock or process keeps referrence to UB */
++ skbc->ub = get_beancounter(ub);
++ return 0;
++
++out_limit:
++ sk_free_beancounter(sk);
++ return -ENOMEM;
++}
++
++int ub_tcp_sock_charge(struct sock *sk)
++{
++ int ret;
++
++ ret = __sock_charge(sk, UB_NUMTCPSOCK);
++ ub_update_rmem_thres(sock_bc(sk));
++
++ return ret;
++}
++
++int ub_other_sock_charge(struct sock *sk)
++{
++ return __sock_charge(sk, UB_NUMOTHERSOCK);
++}
++
++EXPORT_SYMBOL(ub_other_sock_charge);
++
++int ub_sock_charge(struct sock *sk, int family, int type)
++{
++ return (IS_TCP_SOCK(family, type) ?
++ ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
++}
++
++/*
++ * Uncharge socket number
++ */
++
++void ub_sock_uncharge(struct sock *sk)
++{
++ int is_tcp_sock;
++ unsigned long flags;
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long reserv;
++
++ if (!sock_has_ubc(sk))
++ return;
++
++ is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
++ skbc = sock_bc(sk);
++ ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
++
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (!list_empty(&skbc->ub_sock_list)) {
++ ub_debug(UBD_NET_SOCKET,
++ "ub_sock_uncharge: removing from ub(%p) queue.\n",
++ skbc);
++ list_del_init(&skbc->ub_sock_list);
++ }
++
++ reserv = skbc->poll_reserv;
++ __uncharge_beancounter_locked(ub,
++ (is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
++ reserv);
++ __uncharge_beancounter_locked(ub,
++ (is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
++
++ /* The check sk->sk_family != PF_NETLINK is made as the skb is
++ * queued to the kernel end of socket while changed to the user one.
++ * Den */
++ if (skbc->ub_wcharged > reserv &&
++ sk->sk_family != PF_NETLINK) {
++ skbc->ub_wcharged -= reserv;
++ printk(KERN_WARNING
++ "ub_sock_uncharge: wch=%lu for ub %p (%d).\n",
++ skbc->ub_wcharged, skbc->ub, skbc->ub->ub_uid);
++ } else
++ skbc->ub_wcharged = 0;
++ skbc->poll_reserv = 0;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ uncharge_beancounter_notop(skbc->ub,
++ (is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
++ reserv);
++ uncharge_beancounter_notop(skbc->ub,
++ (is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
++
++ put_beancounter(skbc->ub);
++ sk_free_beancounter(sk);
++}
++
++/*
++ * Send - receive buffers
++ */
++
++/* Special case for netlink_dump - (un)charges precalculated size */
++int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
++{
++ int ret;
++ unsigned long chargesize;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ chargesize = skb_charge_fullsize(skb);
++ ret = charge_beancounter(sock_bc(sk)->ub,
++ UB_DGRAMRCVBUF, chargesize, UB_HARD);
++ if (ret < 0)
++ return ret;
++ ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
++ return ret;
++}
++
++/*
++ * Poll reserv accounting
++ */
++static int ub_sock_makewreserv_locked(struct sock *sk,
++ int bufid, int sockid, unsigned long size)
++{
++ unsigned long wcharge_added;
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++
++ if (!sock_has_ubc(sk))
++ goto out;
++
++ skbc = sock_bc(sk);
++ if (skbc->poll_reserv >= size) /* no work to be done */
++ goto out;
++
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ ub->ub_parms[bufid].held += size - skbc->poll_reserv;
++
++ wcharge_added = 0;
++ /*
++ * Logic:
++ * 1) when used memory hits barrier, we set wmem_pressure;
++ * wmem_pressure is reset under barrier/2;
++ * between barrier/2 and barrier we limit per-socket buffer growth;
++ * 2) each socket is guaranteed to get (limit-barrier)/maxsockets
++ * calculated on the base of memory eaten after the barrier is hit
++ */
++ skbc = sock_bc(sk);
++ if (!ub_hfbarrier_hit(ub, bufid)) {
++ if (ub->ub_wmem_pressure)
++ ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
++ "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++ sk, size, skbc->poll_reserv,
++ ub->ub_parms[bufid].held,
++ skbc->ub_wcharged, sk->sk_sndbuf);
++ ub->ub_wmem_pressure = 0;
++ }
++ if (ub_barrier_hit(ub, bufid)) {
++ if (!ub->ub_wmem_pressure)
++ ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
++ "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++ sk, size, skbc->poll_reserv,
++ ub->ub_parms[bufid].held,
++ skbc->ub_wcharged, sk->sk_sndbuf);
++ ub->ub_wmem_pressure = 1;
++ wcharge_added = size - skbc->poll_reserv;
++ skbc->ub_wcharged += wcharge_added;
++ if (skbc->ub_wcharged * ub->ub_parms[sockid].limit +
++ ub->ub_parms[bufid].barrier >
++ ub->ub_parms[bufid].limit)
++ goto unroll;
++ }
++ if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
++ goto unroll;
++
++ ub_adjust_maxheld(ub, bufid);
++ skbc->poll_reserv = size;
++out:
++ return 0;
++
++unroll:
++ ub_debug(UBD_NET_SEND,
++ "makewres: deny "
++ "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++ sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
++ skbc->ub_wcharged, sk->sk_sndbuf);
++ skbc->ub_wcharged -= wcharge_added;
++ ub->ub_parms[bufid].failcnt++;
++ ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
++ return -ENOMEM;
++}
++
++int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long flags;
++ unsigned long added_reserv;
++ int err;
++
++ skbc = sock_bc(sk);
++
++ /*
++ * This function provides that there is sufficient reserve upon return
++ * only if sk has only one user. We can check poll_reserv without
++ * serialization and avoid locking if the reserve already exists.
++ */
++ if (!sock_has_ubc(sk) || skbc->poll_reserv >= size)
++ return 0;
++
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ added_reserv = -skbc->poll_reserv;
++ err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
++ added_reserv += skbc->poll_reserv;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ if (added_reserv)
++ charge_beancounter_notop(skbc->ub, bufid, added_reserv);
++
++ return err;
++}
++
++int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long flags;
++ unsigned long added_reserv;
++ int err;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ skbc = sock_bc(sk);
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ added_reserv = -skbc->poll_reserv;
++ err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
++ added_reserv += skbc->poll_reserv;
++ if (!err)
++ skbc->poll_reserv -= size;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ if (added_reserv)
++ charge_beancounter_notop(skbc->ub, bufid, added_reserv);
++
++ return err;
++}
++
++void ub_sock_ret_wreserv(struct sock *sk, int bufid,
++ unsigned long size, unsigned long ressize)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long extra;
++ unsigned long flags;
++
++ if (!sock_has_ubc(sk))
++ return;
++
++ extra = 0;
++ skbc = sock_bc(sk);
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ skbc->poll_reserv += size;
++ if (skbc->poll_reserv > ressize) {
++ extra = skbc->poll_reserv - ressize;
++ __uncharge_beancounter_locked(ub, bufid, extra);
++
++ if (skbc->ub_wcharged > skbc->poll_reserv - ressize)
++ skbc->ub_wcharged -= skbc->poll_reserv - ressize;
++ else
++ skbc->ub_wcharged = 0;
++ skbc->poll_reserv = ressize;
++ }
++
++ ub_tcp_snd_wakeup(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ if (extra)
++ uncharge_beancounter_notop(skbc->ub, bufid, extra);
++}
++
++long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
++{
++ DECLARE_WAITQUEUE(wait, current);
++
++ add_wait_queue(sk->sk_sleep, &wait);
++ for (;;) {
++ if (signal_pending(current))
++ break;
++ set_current_state(TASK_INTERRUPTIBLE);
++ if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
++ break;
++
++ if (sk->sk_shutdown & SEND_SHUTDOWN)
++ break;
++ if (sk->sk_err)
++ break;
++ ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
++ timeo = schedule_timeout(timeo);
++ }
++ __set_current_state(TASK_RUNNING);
++ remove_wait_queue(sk->sk_sleep, &wait);
++ return timeo;
++}
++
++int ub_sock_makewres_other(struct sock *sk, unsigned long size)
++{
++ return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
++}
++
++int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
++{
++ return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
++}
++
++int ub_sock_getwres_other(struct sock *sk, unsigned long size)
++{
++ return ub_sock_get_wreserv(sk, UB_OTHERSOCKBUF, size);
++}
++
++int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
++{
++ return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
++}
++
++void ub_sock_retwres_other(struct sock *sk, unsigned long size,
++ unsigned long ressize)
++{
++ ub_sock_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
++}
++
++void ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
++ unsigned long ressize)
++{
++ ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
++}
++
++void ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
++{
++ ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
++}
++
++void ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
++{
++ ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
++}
++
++void ub_sock_sndqueuedel(struct sock *sk)
++{
++ struct sock_beancounter *skbc;
++ unsigned long flags;
++
++ if (!sock_has_ubc(sk))
++ return;
++ skbc = sock_bc(sk);
++
++ /* race with write_space callback of other socket */
++ spin_lock_irqsave(&skbc->ub->ub_lock, flags);
++ list_del_init(&skbc->ub_sock_list);
++ spin_unlock_irqrestore(&skbc->ub->ub_lock, flags);
++}
++
++/*
++ * UB_DGRAMRCVBUF
++ */
++
++int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++ unsigned long chargesize;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ chargesize = skb_charge_fullsize(skb);
++ if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF,
++ chargesize, UB_HARD))
++ return -ENOMEM;
++
++ ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
++ return 0;
++}
++
++EXPORT_SYMBOL(ub_sockrcvbuf_charge);
++
++static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
++{
++ uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
++ skb_bc(skb)->charged);
++ ub_skb_set_uncharge(skb);
++}
++
++/*
++ * UB_TCPRCVBUF
++ */
++static int charge_tcprcvbuf(struct sock *sk, struct sk_buff *skb,
++ enum severity strict)
++{
++ int retval;
++ unsigned long flags;
++ struct user_beancounter *ub;
++ unsigned long chargesize;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ /*
++ * Memory pressure reactions:
++ * 1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
++ * 2) set UB_RMEM_SHRINK and tcp_clamp_window()
++ * tcp_collapse_queues() if rmem_alloc > rcvbuf
++ * 3) drop OFO, tcp_purge_ofo()
++ * 4) drop all.
++ * Currently, we do #2 and #3 at once (which means that current
++ * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
++ * for example...)
++ * On memory pressure we jump from #0 to #3, and when the pressure
++ * subsides, to #1.
++ */
++ retval = 0;
++ chargesize = skb_charge_fullsize(skb);
++
++ for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
++ if (ub->ub_parms[UB_TCPRCVBUF].held >
++ ub->ub_parms[UB_TCPRCVBUF].barrier &&
++ strict != UB_FORCE)
++ goto excess;
++ ub_adjust_maxheld(ub, UB_TCPRCVBUF);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++out:
++ if (retval == 0) {
++ charge_beancounter_notop(sock_bc(sk)->ub, UB_TCPRCVBUF,
++ chargesize);
++ ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
++ }
++ return retval;
++
++excess:
++ ub->ub_rmem_pressure = UB_RMEM_SHRINK;
++ if (strict == UB_HARD)
++ retval = -ENOMEM;
++ if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
++ retval = -ENOMEM;
++ /*
++ * We try to leave numsock*maxadvmss as a reserve for sockets not
++ * queueing any data yet (if the difference between the barrier and the
++ * limit is enough for this reserve).
++ */
++ if (ub->ub_parms[UB_TCPRCVBUF].held +
++ ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
++ > ub->ub_parms[UB_TCPRCVBUF].limit &&
++ atomic_read(&sk->sk_rmem_alloc))
++ retval = -ENOMEM;
++ if (retval) {
++ ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
++ ub->ub_parms[UB_TCPRCVBUF].failcnt++;
++ }
++ ub_adjust_maxheld(ub, UB_TCPRCVBUF);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ goto out;
++}
++
++int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++ return charge_tcprcvbuf(sk, skb, UB_HARD);
++}
++
++int ub_tcprcvbuf_charge_forced(struct sock *sk, struct sk_buff *skb)
++{
++ return charge_tcprcvbuf(sk, skb, UB_FORCE);
++}
++
++static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
++{
++ unsigned long flags;
++ unsigned long held, bar;
++ int prev_pres;
++ struct user_beancounter *ub;
++
++ for (ub = skb_bc(skb)->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
++ printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
++ skb_bc(skb)->charged,
++ ub, ub->ub_parms[UB_TCPRCVBUF].held);
++ /* ass-saving bung */
++ skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
++ }
++ ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
++ held = ub->ub_parms[UB_TCPRCVBUF].held;
++ bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
++ prev_pres = ub->ub_rmem_pressure;
++ if (held <= bar - (bar >> 2))
++ ub->ub_rmem_pressure = UB_RMEM_EXPAND;
++ else if (held <= bar)
++ ub->ub_rmem_pressure = UB_RMEM_KEEP;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ uncharge_beancounter_notop(skb_bc(skb)->ub, UB_TCPRCVBUF,
++ skb_bc(skb)->charged);
++ ub_skb_set_uncharge(skb);
++}
++
++
++/*
++ * UB_OTHERSOCKBUF
++ */
++
++static void ub_socksndbuf_uncharge(struct sk_buff *skb)
++{
++ unsigned long flags;
++ struct user_beancounter *ub, *cub;
++ struct sock_beancounter *sk_bc;
++
++ /* resource was set. no check for ub required */
++ cub = skb_bc(skb)->ub;
++ for (ub = cub; ub->parent != NULL; ub = ub->parent);
++ skb_bc(skb)->ub = NULL;
++ if (skb->sk != NULL)
++ sk_bc = sock_bc(skb->sk);
++ else
++ sk_bc = NULL;
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_sockbuf(sk_bc, ub, UB_OTHERSOCKBUF,
++ skb_bc(skb)->charged);
++ ub_sock_snd_wakeup(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ uncharge_beancounter_notop(cub, UB_OTHERSOCKBUF, skb_bc(skb)->charged);
++ ub_skb_set_uncharge(skb);
++}
++
++static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
++{
++ unsigned long flags;
++ struct user_beancounter *ub, *cub;
++
++ /* resource can be not set, called manually */
++ cub = skb_bc(skb)->ub;
++ if (cub == NULL)
++ return;
++ for (ub = cub; ub->parent != NULL; ub = ub->parent);
++ skb_bc(skb)->ub = NULL;
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_sockbuf(sock_bc(skb->sk), ub, UB_TCPSNDBUF,
++ skb_bc(skb)->charged);
++ ub_tcp_snd_wakeup(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ uncharge_beancounter_notop(cub, UB_TCPSNDBUF, skb_bc(skb)->charged);
++ ub_skb_set_uncharge(skb);
++}
++
++void ub_skb_uncharge(struct sk_buff *skb)
++{
++ switch (skb_bc(skb)->resource) {
++ case UB_TCPSNDBUF:
++ ub_tcpsndbuf_uncharge(skb);
++ break;
++ case UB_TCPRCVBUF:
++ ub_tcprcvbuf_uncharge(skb);
++ break;
++ case UB_DGRAMRCVBUF:
++ ub_sockrcvbuf_uncharge(skb);
++ break;
++ case UB_OTHERSOCKBUF:
++ ub_socksndbuf_uncharge(skb);
++ break;
++ }
++}
++
++EXPORT_SYMBOL(ub_skb_uncharge); /* due to skb_orphan()/conntracks */
++
++/*
++ * TCP send buffers accouting. Paged part
++ */
++int ub_sock_tcp_chargepage(struct sock *sk)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long added;
++ unsigned long flags;
++ int err;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ skbc = sock_bc(sk);
++
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ /* Try to charge full page */
++ err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
++ PAGE_SIZE);
++ if (err == 0) {
++ skbc->poll_reserv -= PAGE_SIZE;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, PAGE_SIZE);
++ return 0;
++ }
++
++ /* Try to charge page enough to satisfy sys_select. The possible
++ overdraft for the rest of the page is generally better then
++ requesting full page in tcp_poll. This should not happen
++ frequently. Den */
++ added = -skbc->poll_reserv;
++ err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
++ SOCK_MIN_UBCSPACE);
++ if (err < 0) {
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return err;
++ }
++ __charge_beancounter_locked(ub, UB_TCPSNDBUF,
++ PAGE_SIZE - skbc->poll_reserv,
++ UB_FORCE);
++ added += PAGE_SIZE;
++ skbc->poll_reserv = 0;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, added);
++
++ return 0;
++
++}
++
++void ub_sock_tcp_detachpage(struct sock *sk)
++{
++ struct sk_buff *skb;
++
++ if (!sock_has_ubc(sk))
++ return;
++
++ /* The page is just detached from socket. The last skb in queue
++ with paged part holds referrence to it */
++ skb = skb_peek_tail(&sk->sk_write_queue);
++ if (skb == NULL) {
++ /* If the queue is empty - all data is sent and page is about
++ to be freed */
++ uncharge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, PAGE_SIZE);
++ return;
++ }
++ /* Last skb is a good aproximation for a last skb with paged part */
++ skb_bc(skb)->charged += PAGE_SIZE;
++}
++
++static int charge_tcpsndbuf(struct sock *sk, struct sk_buff *skb,
++ enum severity strict)
++{
++ int ret;
++ unsigned long chargesize;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ chargesize = skb_charge_fullsize(skb);
++ ret = charge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, chargesize,
++ strict);
++ if (ret < 0)
++ return ret;
++ ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
++ sock_bc(sk)->ub_wcharged += chargesize;
++ return ret;
++}
++
++int ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++ return charge_tcpsndbuf(sk, skb, UB_HARD);
++}
++
++int ub_tcpsndbuf_charge_forced(struct sock *sk, struct sk_buff *skb)
++{
++ return charge_tcpsndbuf(sk, skb, UB_FORCE);
++}
++
++/*
++ * Initialization staff
++ */
++int __init skbc_cache_init(void)
++{
++ return 0;
++}
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_oom.c linux-2.6.15-ve025stab014/kernel/ub/ub_oom.c
+--- linux-2.6.15.orig/kernel/ub/ub_oom.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_oom.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,157 @@
++/*
++ * kernel/ub/ub_oom.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/spinlock.h>
++#include <linux/mm.h>
++#include <linux/swap.h>
++
++#include <asm/page.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_oom.h>
++#include <ub/ub_misc.h>
++#include <ub/ub_hash.h>
++
++static DEFINE_SPINLOCK(oom_generation_lock);
++static int oom_generation = 0;
++static int oom_kill_counter = 0;
++
++void ub_oom_kill_task(struct task_struct *tsk, struct mm_struct * mm)
++{
++ struct user_beancounter *ub;
++ static struct ub_rate_info ri = { 5, 60*HZ };
++ char ub_uid[64];
++
++ ub = mm->mm_ub;
++ if (ub)
++ print_ub_uid(ub, ub_uid, sizeof(ub_uid));
++ else {
++ ub_uid[0] = '-';
++ ub_uid[1] = '1';
++ ub_uid[2] = '\0';
++ }
++
++ printk(KERN_INFO"MM to kill %p (UB=%s, VM=%lu, free=%u).\n",
++ mm, ub_uid, mm->total_vm, nr_free_pages());
++
++ if (ub_ratelimit(&ri))
++ show_mem();
++
++ WARN_ON(!spin_is_locked(&oom_generation_lock));
++ WARN_ON(!test_ti_thread_flag(tsk->thread_info, TIF_MEMDIE));
++ mm->mm_ub->ub_parms[UB_OOMGUARPAGES].failcnt++;
++ oom_kill_counter++;
++}
++
++void ub_oom_init(void)
++{
++ current->task_bc.oom_generation = oom_generation;
++}
++
++int ub_oom_start(void)
++{
++ int i;
++ unsigned long flags;
++ struct user_beancounter *ub;
++
++ i = current->task_bc.oom_generation;
++ spin_lock(&oom_generation_lock);
++ /* Someone already helped us */
++ if (i != oom_generation) {
++ spin_unlock(&oom_generation_lock);
++ return -EALREADY;
++ }
++ /* OOM in progress */
++ if (oom_kill_counter) {
++ spin_unlock(&oom_generation_lock);
++ schedule_timeout_uninterruptible(5 * HZ);
++ spin_lock(&oom_generation_lock);
++ if (i != oom_generation) {
++ spin_unlock(&oom_generation_lock);
++ return -EALREADY;
++ }
++ }
++ /*
++ * Some process is stuck exiting.
++ * No choice other than to kill something else.
++ * Return with oom_generation_lock held hoping
++ * not to forget to drop it later
++ */
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ for_each_beancounter(i, ub)
++ ub->ub_oom_noproc = 0;
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return 0;
++}
++
++static inline long ub_current_overdraft(struct user_beancounter *ub)
++{
++ return ub->ub_parms[UB_OOMGUARPAGES].held +
++ ((ub->ub_parms[UB_KMEMSIZE].held
++ + ub->ub_parms[UB_TCPSNDBUF].held
++ + ub->ub_parms[UB_TCPRCVBUF].held
++ + ub->ub_parms[UB_OTHERSOCKBUF].held
++ + ub->ub_parms[UB_DGRAMRCVBUF].held)
++ >> PAGE_SHIFT) - ub->ub_parms[UB_OOMGUARPAGES].barrier;
++}
++
++/*
++ * Select an user_beancounter to find task inside it to be killed.
++ * Select the beancounter with the biggest excess of resource usage
++ * to kill a process belonging to that beancounter later, or returns
++ * NULL if there are no beancounters with such excess.
++ */
++
++struct user_beancounter *ub_oom_select_worst(void)
++{
++ struct user_beancounter *ub, *walkp;
++ unsigned long flags, ub_maxover;
++ int i;
++
++ ub_maxover = 0;
++ ub = NULL;
++ spin_lock_irqsave(&ub_hash_lock, flags);
++
++ for_each_beancounter(i, walkp) {
++ if (walkp->parent != NULL)
++ continue;
++ if (walkp->ub_oom_noproc)
++ continue;
++ if (ub_current_overdraft(walkp) <= ub_maxover)
++ continue;
++
++ ub = walkp;
++ }
++ if(ub) {
++ get_beancounter(ub);
++ ub->ub_oom_noproc = 1;
++ }
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return ub;
++}
++
++void ub_oom_stop(void)
++{
++ spin_unlock(&oom_generation_lock);
++}
++
++void ub_oom_task_exit(struct task_struct *tsk)
++{
++ /* In order to allow OOM to happen from now on */
++ spin_lock(&oom_generation_lock);
++ if (test_ti_thread_flag(tsk->thread_info, TIF_MEMDIE)) {
++ if (--oom_kill_counter == 0)
++ oom_generation++;
++ printk("OOM killed process %d exited, free=%u.\n",
++ tsk->pid, nr_free_pages());
++ }
++ spin_unlock(&oom_generation_lock);
++}
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_page_bc.c linux-2.6.15-ve025stab014/kernel/ub/ub_page_bc.c
+--- linux-2.6.15.orig/kernel/ub/ub_page_bc.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_page_bc.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,418 @@
++/*
++ * kernel/ub/ub_page_bc.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/spinlock.h>
++#include <linux/slab.h>
++#include <linux/mm.h>
++#include <linux/gfp.h>
++#include <linux/vmalloc.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_vmpages.h>
++#include <ub/ub_page.h>
++
++static kmem_cache_t *pb_cachep;
++static spinlock_t pb_lock = SPIN_LOCK_UNLOCKED;
++static struct page_beancounter **pb_hash_table;
++static unsigned int pb_hash_mask;
++
++/*
++ * Auxiliary staff
++ */
++
++static inline struct page_beancounter *next_page_pb(struct page_beancounter *p)
++{
++ return list_entry(p->page_list.next, struct page_beancounter,
++ page_list);
++}
++
++static inline struct page_beancounter *prev_page_pb(struct page_beancounter *p)
++{
++ return list_entry(p->page_list.prev, struct page_beancounter,
++ page_list);
++}
++
++/*
++ * Held pages manipulation
++ */
++static inline void set_held_pages(struct user_beancounter *bc)
++{
++ /* all three depend on ub_held_pages */
++ __ub_update_physpages(bc);
++ __ub_update_oomguarpages(bc);
++ __ub_update_privvm(bc);
++}
++
++static inline void do_dec_held_pages(struct user_beancounter *ub, int value)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_held_pages -= value;
++ set_held_pages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static void dec_held_pages(struct user_beancounter *ub, int value)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_dec_held_pages(ub, value);
++}
++
++static inline void do_inc_held_pages(struct user_beancounter *ub, int value)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_held_pages += value;
++ set_held_pages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static void inc_held_pages(struct user_beancounter *ub, int value)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_inc_held_pages(ub, value);
++}
++
++/*
++ * Alloc - free
++ */
++
++inline int __pb_alloc(struct page_beancounter **pbc, gfp_t mask)
++{
++ *pbc = kmem_cache_alloc(pb_cachep, mask);
++ if (*pbc != NULL)
++ (*pbc)->pb_magic = PB_MAGIC;
++ return (*pbc == NULL);
++}
++
++inline void pb_free(struct page_beancounter **pb)
++{
++ if (*pb != NULL) {
++ kmem_cache_free(pb_cachep, *pb);
++ *pb = NULL;
++ }
++}
++
++void pb_free_list(struct page_beancounter **p_pb)
++{
++ struct page_beancounter *list = *p_pb, *pb;
++ while (list) {
++ pb = list;
++ list = list->next_hash;
++ pb_free(&pb);
++ }
++ *p_pb = NULL;
++}
++
++/*
++ * head -> <new objs> -> <old objs> -> ...
++ */
++static int __alloc_list(struct page_beancounter **head, int num)
++{
++ struct page_beancounter *pb;
++
++ while (num > 0) {
++ if (pb_alloc(&pb))
++ return -1;
++ pb->next_hash = *head;
++ *head = pb;
++ num--;
++ }
++
++ return num;
++}
++
++/*
++ * Ensure that the list contains at least num elements.
++ * p_pb points to an initialized list, may be of the zero length.
++ *
++ * mm->page_table_lock should be held
++ */
++int pb_alloc_list(struct page_beancounter **p_pb, int num)
++{
++ struct page_beancounter *list;
++
++ for (list = *p_pb; list != NULL && num; list = list->next_hash, num--);
++ if (!num)
++ return 0;
++
++ /*
++ * *p_pb(after) *p_pb (before)
++ * \ \
++ * <new objs> -...-> <old objs> -> ...
++ */
++ if (__alloc_list(p_pb, num) < 0)
++ goto nomem;
++ return 0;
++
++nomem:
++ pb_free_list(p_pb);
++ return -ENOMEM;
++}
++
++/*
++ * Hash routines
++ */
++
++static inline int pb_hash(struct user_beancounter *ub, struct page *page)
++{
++ return (((unsigned long)ub << 16) + ((unsigned long)ub >> 16) +
++ (page_to_pfn(page) >> 7)) & pb_hash_mask;
++}
++
++/* pb_lock should be held */
++static inline void insert_pb(struct page_beancounter *p, struct page *page,
++ struct user_beancounter *ub, int hash)
++{
++ p->page = page;
++ p->ub = get_beancounter(ub);
++ p->next_hash = pb_hash_table[hash];
++ pb_hash_table[hash] = p;
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ ub->ub_stat[smp_processor_id()].pbcs++;
++#endif
++}
++
++/*
++ * Heart
++ */
++
++int pb_alloc_all(struct page_beancounter **pbs)
++{
++ int i, need_alloc;
++ unsigned long flags;
++ struct user_beancounter *ub;
++
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ need_alloc = 0;
++ for_each_beancounter(i, ub)
++ need_alloc++;
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++ if (!__alloc_list(pbs, need_alloc))
++ return 0;
++
++ pb_free_list(pbs);
++ return -ENOMEM;
++}
++
++int pb_add_ref(struct page *page, struct mm_struct *mm,
++ struct page_beancounter **p_pb)
++{
++ int hash;
++ struct user_beancounter *bc;
++ struct page_beancounter *p;
++ int shift;
++ struct page_beancounter *head;
++
++ bc = mm->mm_ub;
++ if (bc == NULL)
++ return 0;
++
++ if (!PageAnon(page) && is_shmem_mapping(page->mapping))
++ return 0;
++
++ hash = pb_hash(bc, page);
++
++ spin_lock(&pb_lock);
++ for (p = pb_hash_table[hash];
++ p != NULL && (p->page != page || p->ub != bc);
++ p = p->next_hash);
++ if (p != NULL) {
++ /*
++ * This page is already associated with this beancounter,
++ * increment the usage counter.
++ */
++ PB_COUNT_INC(p->refcount);
++ spin_unlock(&pb_lock);
++ return 0;
++ }
++
++ p = *p_pb;
++ if (p == NULL) {
++ spin_unlock(&pb_lock);
++ return -1;
++ }
++
++ *p_pb = NULL;
++ insert_pb(p, page, bc, hash);
++ head = page_pbc(page);
++
++ if (head != NULL) {
++ /*
++ * Move the first element to the end of the list.
++ * List head (pb_head) is set to the next entry.
++ * Note that this code works even if head is the only element
++ * on the list (because it's cyclic).
++ */
++ BUG_ON(head->pb_magic != PB_MAGIC);
++ page_pbc(page) = next_page_pb(head);
++ PB_SHIFT_INC(head->refcount);
++ shift = PB_SHIFT_GET(head->refcount);
++ /*
++ * Update user beancounter, the share of head has been changed.
++ * Note that the shift counter is taken after increment.
++ */
++ dec_held_pages(head->ub, UB_PAGE_WEIGHT >> shift);
++ /* add the new page beancounter to the end of the list */
++ list_add_tail(&p->page_list, &page_pbc(page)->page_list);
++ } else {
++ page_pbc(page) = p;
++ shift = 0;
++ INIT_LIST_HEAD(&p->page_list);
++ }
++
++ p->refcount = PB_REFCOUNT_MAKE(shift, 1);
++ spin_unlock(&pb_lock);
++
++ /* update user beancounter for the new page beancounter */
++ inc_held_pages(bc, UB_PAGE_WEIGHT >> shift);
++ return 0;
++}
++
++void pb_remove_ref(struct page *page, struct mm_struct *mm)
++{
++ int hash;
++ struct user_beancounter *bc;
++ struct page_beancounter *p, **q;
++ int shift, shiftt;
++
++ bc = mm->mm_ub;
++ if (bc == NULL)
++ return;
++
++ if (!PageAnon(page) && is_shmem_mapping(page->mapping))
++ return;
++
++ hash = pb_hash(bc, page);
++
++ spin_lock(&pb_lock);
++ BUG_ON(page_pbc(page) != NULL && page_pbc(page)->pb_magic != PB_MAGIC);
++ for (q = pb_hash_table + hash, p = *q;
++ p != NULL && (p->page != page || p->ub != bc);
++ q = &p->next_hash, p = *q);
++ if (p == NULL)
++ goto out_unlock;
++
++ PB_COUNT_DEC(p->refcount);
++ if (PB_COUNT_GET(p->refcount))
++ /*
++ * More references from the same user beancounter exist.
++ * Nothing needs to be done.
++ */
++ goto out_unlock;
++
++ /* remove from the hash list */
++ *q = p->next_hash;
++
++ shift = PB_SHIFT_GET(p->refcount);
++
++ dec_held_pages(p->ub, UB_PAGE_WEIGHT >> shift);
++
++ if (page_pbc(page) == p) {
++ if (list_empty(&p->page_list))
++ goto out_free;
++ page_pbc(page) = next_page_pb(p);
++ }
++ list_del(&p->page_list);
++ put_beancounter(p->ub);
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ p->ub->ub_stat[smp_processor_id()].pbcs--;
++#endif
++ pb_free(&p);
++
++ /* Now balance the list. Move the tail and adjust its shift counter. */
++ p = prev_page_pb(page_pbc(page));
++ shiftt = PB_SHIFT_GET(p->refcount);
++ page_pbc(page) = p;
++ PB_SHIFT_DEC(p->refcount);
++
++ inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
++
++ /*
++ * If the shift counter of the moved beancounter is different from the
++ * removed one's, repeat the procedure for one more tail beancounter
++ */
++ if (shiftt > shift) {
++ p = prev_page_pb(page_pbc(page));
++ page_pbc(page) = p;
++ PB_SHIFT_DEC(p->refcount);
++ inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
++ }
++ spin_unlock(&pb_lock);
++ return;
++
++out_free:
++ page_pbc(page) = NULL;
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ p->ub->ub_stat[smp_processor_id()].pbcs--;
++#endif
++ put_beancounter(p->ub);
++ pb_free(&p);
++out_unlock:
++ spin_unlock(&pb_lock);
++ return;
++}
++
++void pb_add_list_ref(struct page *page, struct mm_struct *mm,
++ struct page_beancounter **p_pb)
++{
++ struct page_beancounter *list, *pb;
++
++ pb = *p_pb;
++ if (pb == NULL) {
++ /* Typical case due to caller constraints */
++ if (pb_add_ref(page, mm, &pb))
++ BUG();
++ return;
++ }
++
++ list = pb->next_hash;
++ if (pb_add_ref(page, mm, &pb))
++ BUG();
++ if (pb != NULL) {
++ pb->next_hash = list;
++ list = pb;
++ }
++ *p_pb = list;
++}
++
++struct user_beancounter *pb_grab_page_ub(struct page *page)
++{
++ struct page_beancounter *pb;
++ struct user_beancounter *ub;
++
++ spin_lock(&pb_lock);
++ pb = page_pbc(page);
++ ub = (pb == NULL ? ERR_PTR(-EINVAL) :
++ get_beancounter(pb->ub));
++ spin_unlock(&pb_lock);
++ return ub;
++}
++
++void __init ub_init_pbc(void)
++{
++ unsigned long hash_size;
++
++ pb_cachep = kmem_cache_create("page_beancounter",
++ sizeof(struct page_beancounter), 0,
++ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
++ hash_size = num_physpages >> 2;
++ for (pb_hash_mask = 1;
++ (hash_size & pb_hash_mask) != hash_size;
++ pb_hash_mask = (pb_hash_mask << 1) + 1);
++ hash_size = pb_hash_mask + 1;
++ printk(KERN_INFO "Page beancounter hash is %lu entries.\n", hash_size);
++ pb_hash_table = vmalloc(hash_size * sizeof(struct page_beancounter *));
++ memset(pb_hash_table, 0, hash_size * sizeof(struct page_beancounter *));
++}
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_pages.c linux-2.6.15-ve025stab014/kernel/ub/ub_pages.c
+--- linux-2.6.15.orig/kernel/ub/ub_pages.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_pages.c 2006-01-27 14:48:07.000000000 +0300
+@@ -0,0 +1,530 @@
++/*
++ * kernel/ub/ub_pages.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/mm.h>
++#include <linux/highmem.h>
++#include <linux/virtinfo.h>
++#include <linux/module.h>
++#include <linux/shmem_fs.h>
++#include <linux/vmalloc.h>
++
++#include <asm/pgtable.h>
++#include <asm/page.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
++void warn_bad_rss(struct vm_area_struct *vma, unsigned long freed)
++{
++ static struct ub_rate_info ri = {
++ .burst = 10,
++ .interval = 40 * HZ,
++ };
++ struct user_beancounter *ub;
++ char ubuid[64] = "No UB";
++ unsigned long vmrss;
++
++ if (!ub_ratelimit(&ri))
++ return;
++
++ ub = vma->vm_mm->mm_ub;
++ if (ub)
++ print_ub_uid(ub, ubuid, sizeof(ubuid));
++
++ vmrss = get_vma_rss(vma) + freed;
++ printk(KERN_WARNING
++ "%s vm_rss: process pid %d comm %.20s flags %lx\n"
++ "vma %p/%p rss %lu/%lu freed %lu\n"
++ "flags %lx, ub %s\n",
++ vmrss > freed ? "Positive" : "Negative",
++ current->pid, current->comm, current->flags,
++ vma, vma->vm_mm, vmrss, vma_pages(vma), freed,
++ vma->vm_flags, ubuid);
++ dump_stack();
++}
++
++static inline unsigned long pages_in_pte_range(struct vm_area_struct *vma,
++ pmd_t *pmd, unsigned long addr, unsigned long end,
++ unsigned long *ret)
++{
++ pte_t *pte;
++ spinlock_t *ptl;
++
++ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
++ do {
++ if (!pte_none(*pte) && pte_present(*pte))
++ (*ret)++;
++ } while (pte++, addr += PAGE_SIZE, (addr != end));
++ pte_unmap_unlock(pte - 1, ptl);
++
++ return addr;
++}
++
++static inline unsigned long pages_in_pmd_range(struct vm_area_struct *vma,
++ pud_t *pud, unsigned long addr, unsigned long end,
++ unsigned long *ret)
++{
++ pmd_t *pmd;
++ unsigned long next;
++
++ pmd = pmd_offset(pud, addr);
++ do {
++ next = pmd_addr_end(addr, end);
++ if (pmd_none_or_clear_bad(pmd))
++ continue;
++ next = pages_in_pte_range(vma, pmd, addr, next, ret);
++ } while (pmd++, addr = next, (addr != end));
++
++ return addr;
++}
++
++static inline unsigned long pages_in_pud_range(struct vm_area_struct *vma,
++ pgd_t *pgd, unsigned long addr, unsigned long end,
++ unsigned long *ret)
++{
++ pud_t *pud;
++ unsigned long next;
++
++ pud = pud_offset(pgd, addr);
++ do {
++ next = pud_addr_end(addr, end);
++ if (pud_none_or_clear_bad(pud))
++ continue;
++ next = pages_in_pmd_range(vma, pud, addr, next, ret);
++ } while (pud++, addr = next, (addr != end));
++
++ return addr;
++}
++
++unsigned long pages_in_vma_range(struct vm_area_struct *vma,
++ unsigned long addr, unsigned long end)
++{
++ pgd_t *pgd;
++ unsigned long next;
++ unsigned long ret;
++
++ ret = 0;
++ BUG_ON(addr >= end);
++ pgd = pgd_offset(vma->vm_mm, addr);
++ do {
++ next = pgd_addr_end(addr, end);
++ if (pgd_none_or_clear_bad(pgd))
++ continue;
++ next = pages_in_pud_range(vma, pgd, addr, next, &ret);
++ } while (pgd++, addr = next, (addr != end));
++ return ret;
++}
++
++void fastcall __ub_update_physpages(struct user_beancounter *ub)
++{
++ ub->ub_parms[UB_PHYSPAGES].held = ub->ub_tmpfs_respages
++ + (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT);
++ ub_adjust_maxheld(ub, UB_PHYSPAGES);
++}
++
++void fastcall __ub_update_oomguarpages(struct user_beancounter *ub)
++{
++ ub->ub_parms[UB_OOMGUARPAGES].held =
++ ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
++ ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
++}
++
++void fastcall __ub_update_privvm(struct user_beancounter *ub)
++{
++ ub->ub_parms[UB_PRIVVMPAGES].held =
++ (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT)
++ + ub->ub_unused_privvmpages
++ + ub->ub_parms[UB_SHMPAGES].held;
++ ub_adjust_maxheld(ub, UB_PRIVVMPAGES);
++}
++
++static inline int __charge_privvm_locked(struct user_beancounter *ub,
++ unsigned long s, enum severity strict)
++{
++ if (__charge_beancounter_locked(ub, UB_PRIVVMPAGES, s, strict) < 0)
++ return -ENOMEM;
++
++ ub->ub_unused_privvmpages += s;
++ return 0;
++}
++
++static void __unused_privvm_dec_locked(struct user_beancounter *ub,
++ long size)
++{
++ /* catch possible overflow */
++ if (ub->ub_unused_privvmpages < size) {
++ uncharge_warn(ub, UB_UNUSEDPRIVVM,
++ size, ub->ub_unused_privvmpages);
++ size = ub->ub_unused_privvmpages;
++ }
++ ub->ub_unused_privvmpages -= size;
++ __ub_update_privvm(ub);
++}
++
++void __ub_unused_privvm_dec(struct mm_struct *mm, long size)
++{
++ unsigned long flags;
++ struct user_beancounter *ub;
++
++ ub = mm->mm_ub;
++ if (ub == NULL)
++ return;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __unused_privvm_dec_locked(ub, size);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_unused_privvm_sub(struct mm_struct *mm,
++ struct vm_area_struct *vma, unsigned long count)
++{
++ if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
++ __ub_unused_privvm_dec(mm, count);
++}
++
++void ub_unused_privvm_add(struct mm_struct *mm,
++ struct vm_area_struct *vma, unsigned long size)
++{
++ unsigned long flags;
++ struct user_beancounter *ub;
++
++ ub = mm->mm_ub;
++ if (ub == NULL || !VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
++ return;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_unused_privvmpages += size;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_protected_charge(struct mm_struct *mm, unsigned long size,
++ unsigned long newflags, struct vm_area_struct *vma)
++{
++ unsigned long flags;
++ struct file *file;
++ struct user_beancounter *ub;
++
++ ub = mm->mm_ub;
++ if (ub == NULL)
++ return PRIVVM_NO_CHARGE;
++
++ flags = vma->vm_flags;
++ if (!((newflags ^ flags) & VM_WRITE))
++ return PRIVVM_NO_CHARGE;
++
++ file = vma->vm_file;
++ if (!VM_UB_PRIVATE(newflags | VM_WRITE, file))
++ return PRIVVM_NO_CHARGE;
++
++ if (flags & VM_WRITE)
++ return PRIVVM_TO_SHARED;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (__charge_privvm_locked(ub, size, UB_SOFT) < 0)
++ goto err;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return PRIVVM_TO_PRIVATE;
++
++err:
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return PRIVVM_ERROR;
++}
++
++int ub_memory_charge(struct mm_struct *mm, unsigned long size,
++ unsigned vm_flags, struct file *vm_file, int sv)
++{
++ struct user_beancounter *ub, *ubl;
++ unsigned long flags;
++
++ ub = mm->mm_ub;
++ if (ub == NULL)
++ return 0;
++
++ size >>= PAGE_SHIFT;
++ if (size > UB_MAXVALUE)
++ return -EINVAL;
++
++ BUG_ON(sv != UB_SOFT && sv != UB_HARD);
++
++ if (vm_flags & VM_LOCKED) {
++ if (charge_beancounter(ub, UB_LOCKEDPAGES, size, sv))
++ goto out_err;
++ }
++ if (VM_UB_PRIVATE(vm_flags, vm_file)) {
++ for (ubl = ub; ubl->parent != NULL; ubl = ubl->parent);
++ spin_lock_irqsave(&ubl->ub_lock, flags);
++ if (__charge_privvm_locked(ubl, size, sv))
++ goto out_private;
++ spin_unlock_irqrestore(&ubl->ub_lock, flags);
++ }
++ return 0;
++
++out_private:
++ spin_unlock_irqrestore(&ubl->ub_lock, flags);
++ if (vm_flags & VM_LOCKED)
++ uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
++out_err:
++ return -ENOMEM;
++}
++
++void ub_memory_uncharge(struct mm_struct *mm, unsigned long size,
++ unsigned vm_flags, struct file *vm_file)
++{
++ struct user_beancounter *ub;
++ unsigned long flags;
++
++ ub = mm->mm_ub;
++ if (ub == NULL)
++ return;
++
++ size >>= PAGE_SHIFT;
++
++ if (vm_flags & VM_LOCKED)
++ uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
++ if (VM_UB_PRIVATE(vm_flags, vm_file)) {
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __unused_privvm_dec_locked(ub, size);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ }
++}
++
++int ub_locked_charge(struct mm_struct *mm, unsigned long size)
++{
++ struct user_beancounter *ub;
++
++ ub = mm->mm_ub;
++ if (ub == NULL)
++ return 0;
++
++ return charge_beancounter(ub, UB_LOCKEDPAGES,
++ size >> PAGE_SHIFT, UB_HARD);
++}
++
++void ub_locked_uncharge(struct mm_struct *mm, unsigned long size)
++{
++ struct user_beancounter *ub;
++
++ ub = mm->mm_ub;
++ if (ub == NULL)
++ return;
++
++ uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
++}
++
++int ub_lockedshm_charge(struct shmem_inode_info *shi, unsigned long size)
++{
++ struct user_beancounter *ub;
++
++ ub = shi->shmi_ub;
++ if (ub == NULL)
++ return 0;
++
++ return charge_beancounter(ub, UB_LOCKEDPAGES,
++ size >> PAGE_SHIFT, UB_HARD);
++}
++
++void ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)
++{
++ struct user_beancounter *ub;
++
++ ub = shi->shmi_ub;
++ if (ub == NULL)
++ return;
++
++ uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
++}
++
++
++static inline void do_ub_tmpfs_respages_inc(struct user_beancounter *ub)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_tmpfs_respages++;
++ __ub_update_physpages(ub);
++ __ub_update_oomguarpages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_tmpfs_respages_inc(struct shmem_inode_info *shi)
++{
++ struct user_beancounter *ub;
++
++ for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
++ do_ub_tmpfs_respages_inc(ub);
++}
++
++static inline void do_ub_tmpfs_respages_sub(struct user_beancounter *ub,
++ unsigned long size)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ /* catch possible overflow */
++ if (ub->ub_tmpfs_respages < size) {
++ uncharge_warn(ub, UB_TMPFSPAGES,
++ size, ub->ub_tmpfs_respages);
++ size = ub->ub_tmpfs_respages;
++ }
++ ub->ub_tmpfs_respages -= size;
++ /* update values what is the most interesting */
++ __ub_update_physpages(ub);
++ __ub_update_oomguarpages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
++ unsigned long size)
++{
++ struct user_beancounter *ub;
++
++ for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
++ do_ub_tmpfs_respages_sub(ub, size);
++}
++
++int ub_shmpages_charge(struct shmem_inode_info *shi, long size)
++{
++ int ret;
++ unsigned long flags;
++ struct user_beancounter *ub;
++
++ ub = shi->shmi_ub;
++ if (ub == NULL || size <= 0)
++ return 0;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ret = __charge_beancounter_locked(ub, UB_SHMPAGES, size, UB_HARD);
++ if (ret == 0)
++ __ub_update_privvm(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return ret;
++}
++
++void ub_shmpages_uncharge(struct shmem_inode_info *shi, long size)
++{
++ unsigned long flags;
++ struct user_beancounter *ub;
++
++ ub = shi->shmi_ub;
++ if (ub == NULL)
++ return;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_beancounter_locked(ub, UB_SHMPAGES, size);
++ __ub_update_privvm(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++static inline void do_ub_swapentry_inc(struct user_beancounter *ub)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_swap_pages++;
++ __ub_update_oomguarpages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_swapentry_inc(struct swap_info_struct *si, pgoff_t num,
++ struct user_beancounter *ub)
++{
++ si->swap_ubs[num] = get_beancounter(ub);
++ for (; ub != NULL; ub = ub->parent)
++ do_ub_swapentry_inc(ub);
++}
++EXPORT_SYMBOL(ub_swapentry_inc);
++
++static inline void do_ub_swapentry_dec(struct user_beancounter *ub)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (ub->ub_swap_pages <= 0)
++ uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
++ else
++ ub->ub_swap_pages--;
++ __ub_update_oomguarpages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_swapentry_dec(struct swap_info_struct *si, pgoff_t num)
++{
++ struct user_beancounter *ub, *ubp;
++
++ ub = si->swap_ubs[num];
++ si->swap_ubs[num] = NULL;
++ for (ubp = ub; ubp != NULL; ubp = ubp->parent)
++ do_ub_swapentry_dec(ubp);
++ put_beancounter(ub);
++}
++EXPORT_SYMBOL(ub_swapentry_dec);
++
++int ub_swap_init(struct swap_info_struct *si, pgoff_t num)
++{
++ struct user_beancounter **ubs;
++
++ ubs = vmalloc(num * sizeof(struct user_beancounter *));
++ if (ubs == NULL)
++ return -ENOMEM;
++
++ memset(ubs, 0, num * sizeof(struct user_beancounter *));
++ si->swap_ubs = ubs;
++ return 0;
++}
++
++void ub_swap_fini(struct swap_info_struct *si)
++{
++ if (si->swap_ubs) {
++ vfree(si->swap_ubs);
++ si->swap_ubs = NULL;
++ }
++}
++#endif
++
++static int vmguar_enough_memory(struct vnotifier_block *self,
++ unsigned long event, void *arg, int old_ret)
++{
++ struct user_beancounter *ub;
++
++ if (event != VIRTINFO_ENOUGHMEM)
++ return old_ret;
++
++ for (ub = current->mm->mm_ub; ub->parent != NULL; ub = ub->parent);
++ if (ub->ub_parms[UB_PRIVVMPAGES].held >
++ ub->ub_parms[UB_VMGUARPAGES].barrier)
++ return old_ret;
++
++ return NOTIFY_OK;
++}
++
++static struct vnotifier_block vmguar_notifier_block = {
++ .notifier_call = vmguar_enough_memory
++};
++
++static int __init init_vmguar_notifier(void)
++{
++ virtinfo_notifier_register(VITYPE_GENERAL, &vmguar_notifier_block);
++ return 0;
++}
++
++static void __exit fini_vmguar_notifier(void)
++{
++ virtinfo_notifier_unregister(VITYPE_GENERAL, &vmguar_notifier_block);
++}
++
++module_init(init_vmguar_notifier);
++module_exit(fini_vmguar_notifier);
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_proc.c linux-2.6.15-ve025stab014/kernel/ub/ub_proc.c
+--- linux-2.6.15.orig/kernel/ub/ub_proc.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_proc.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,389 @@
++/*
++ * linux/fs/proc/proc_ub.c
++ *
++ * Copyright (C) 1998-2000 Andrey V. Savochkin <saw@saw.sw.com.sg>
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ *
++ * Changes:
++ */
++
++#include <linux/errno.h>
++#include <linux/sched.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/proc_fs.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_debug.h>
++#include <ub/ub_page.h>
++
++#include <asm/page.h>
++#include <asm/uaccess.h>
++
++/*
++ * we have 8 format strings depending on:
++ * 1. BITS_PER_LONG
++ * 2. CONFIG_UBC_KEEP_UNUSED
++ * 3. resource number (see out_proc_beancounter)
++ */
++
++#ifdef CONFIG_UBC_KEEP_UNUSED
++#define REF_FORMAT "%5.5s %4i: %-12s "
++#define UID_HEAD_STR "uid ref"
++#else
++#define REF_FORMAT "%10.10s: %-12s "
++#define UID_HEAD_STR "uid"
++#endif
++#define REF2_FORMAT "%10s %-12s "
++
++#if BITS_PER_LONG == 32
++#define RES_FORMAT "%10lu %10lu %10lu %10lu %10lu"
++#define HEAD_FORMAT "%10s %10s %10s %10s %10s"
++#define UB_PROC_LINE_TEXT (10+2+12+1+10+1+10+1+10+1+10+1+10)
++#else
++#define RES_FORMAT "%20lu %20lu %20lu %20lu %20lu"
++#define HEAD_FORMAT "%20s %20s %20s %20s %20s"
++#define UB_PROC_LINE_TEXT (10+2+12+1+20+1+20+1+20+1+20+1+20)
++#endif
++
++#define UB_PROC_LINE_LEN (UB_PROC_LINE_TEXT + 1)
++
++static void out_proc_version(char *buf)
++{
++ int len;
++
++ len = sprintf(buf, "Version: 2.5");
++ memset(buf + len, ' ', UB_PROC_LINE_TEXT - len);
++ buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static void out_proc_head(char *buf)
++{
++ sprintf(buf, REF2_FORMAT HEAD_FORMAT,
++ UID_HEAD_STR, "resource", "held", "maxheld",
++ "barrier", "limit", "failcnt");
++ buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static void out_proc_beancounter(char *buf, struct user_beancounter *ub, int r)
++{
++ if (r == 0) {
++ char tmpbuf[64];
++ print_ub_uid(ub, tmpbuf, sizeof(tmpbuf));
++ sprintf(buf, REF_FORMAT RES_FORMAT,
++ tmpbuf,
++#ifdef CONFIG_UBC_KEEP_UNUSED
++ atomic_read(&ub->ub_refcount),
++#endif
++ ub_rnames[r], ub->ub_parms[r].held,
++ ub->ub_parms[r].maxheld, ub->ub_parms[r].barrier,
++ ub->ub_parms[r].limit, ub->ub_parms[r].failcnt);
++ } else
++ sprintf(buf, REF2_FORMAT RES_FORMAT,
++ "", ub_rnames[r],
++ ub->ub_parms[r].held, ub->ub_parms[r].maxheld,
++ ub->ub_parms[r].barrier, ub->ub_parms[r].limit,
++ ub->ub_parms[r].failcnt);
++
++ buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static int ub_accessible(struct user_beancounter *ub,
++ struct user_beancounter *exec_ub,
++ struct file *file)
++{
++ struct user_beancounter *p, *q;
++
++ for (p = exec_ub; p->parent != NULL; p = p->parent);
++ for (q = ub; q->parent != NULL; q = q->parent);
++ if (p != get_ub0() && q != p)
++ return 0;
++ if (ub->parent == NULL)
++ return 1;
++ return file->private_data == NULL ? 0 : 1;
++}
++
++static ssize_t ub_proc_read(struct file *file, char *usrbuf, size_t len,
++ loff_t *poff)
++{
++ ssize_t retval;
++ char *buf;
++ unsigned long flags;
++ int i, resource;
++ struct ub_hash_slot *slot;
++ struct user_beancounter *ub;
++ struct user_beancounter *exec_ub = get_exec_ub();
++ loff_t n, off;
++ int rem, produced, job, tocopy;
++ const int is_capable =
++ (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH));
++
++ retval = -ENOBUFS;
++ buf = (char *)__get_free_page(GFP_KERNEL);
++ if (buf == NULL)
++ goto out;
++
++ retval = 0;
++ if (!is_capable)
++ goto out_free;
++
++ off = *poff;
++ if (off < 0) /* can't happen, just in case */
++ goto inval;
++
++again:
++ i = 0;
++ slot = ub_hash;
++ n = off; /* The amount of data tp skip */
++ produced = 0;
++ if (n < (UB_PROC_LINE_LEN * 2)) {
++ if (n < UB_PROC_LINE_LEN) {
++ out_proc_version(buf);
++ produced += UB_PROC_LINE_LEN;
++ n += UB_PROC_LINE_LEN;
++ }
++ out_proc_head(buf + produced);
++ produced += UB_PROC_LINE_LEN;
++ n += UB_PROC_LINE_LEN;
++ }
++ n -= (2 * UB_PROC_LINE_LEN);
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ while (1) {
++ for (ub = slot->ubh_beans;
++ ub != NULL && n >= (UB_RESOURCES * UB_PROC_LINE_LEN);
++ ub = ub->ub_next)
++ if (is_capable && ub_accessible(ub, exec_ub, file))
++ n -= (UB_RESOURCES * UB_PROC_LINE_LEN);
++ if (ub != NULL || ++i >= UB_HASH_SIZE)
++ break;
++ ++slot;
++ }
++ rem = n; /* the amount of the data in the buffer to skip */
++ job = PAGE_SIZE - UB_PROC_LINE_LEN + 1; /* end of buffer data */
++ if (len < job - rem)
++ job = rem + len;
++ while (ub != NULL && produced < job) {
++ if (is_capable && ub_accessible(ub, exec_ub, file))
++ for (resource = 0;
++ produced < job && resource < UB_RESOURCES;
++ resource++, produced += UB_PROC_LINE_LEN)
++ {
++ out_proc_beancounter(buf + produced,
++ ub, resource);
++ }
++ if (produced >= job)
++ break;
++ /* Find the next beancounter to produce more data. */
++ ub = ub->ub_next;
++ while (ub == NULL && ++i < UB_HASH_SIZE) {
++ ++slot;
++ ub = slot->ubh_beans;
++ }
++ }
++
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ ub_debug(UBD_ALLOC, KERN_DEBUG "UB_PROC: produced %d, job %d, rem %d\n",
++ produced, job, rem);
++
++ /*
++ * Temporary buffer `buf' contains `produced' bytes.
++ * Extract no more than `len' bytes at offset `rem'.
++ */
++ if (produced <= rem)
++ goto out_free;
++ tocopy = produced - rem;
++ if (len < tocopy)
++ tocopy = len;
++ if (!tocopy)
++ goto out_free;
++ if (copy_to_user(usrbuf, buf + rem, tocopy))
++ goto fault;
++ off += tocopy; /* can't overflow */
++ *poff = off;
++ len -= tocopy;
++ retval += tocopy;
++ if (!len)
++ goto out_free;
++ usrbuf += tocopy;
++ goto again;
++
++fault:
++ retval = -EFAULT;
++out_free:
++ free_page((unsigned long)buf);
++out:
++ return retval;
++
++inval:
++ retval = -EINVAL;
++ goto out_free;
++}
++
++static int ub_proc_open(struct inode *inode, struct file *file)
++{
++ file->private_data = strcmp(file->f_dentry->d_name.name,
++ "user_beancounters") ?
++ (void *)-1 : NULL;
++ return 0;
++}
++
++static struct file_operations ub_file_operations = {
++ .read = &ub_proc_read,
++ .open = &ub_proc_open
++};
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++#include <linux/seq_file.h>
++#include <linux/kmem_cache.h>
++
++static void *ubd_start(struct seq_file *m, loff_t *pos)
++{
++ loff_t n = *pos;
++ struct user_beancounter *ub;
++ long slot;
++
++ spin_lock_irq(&ub_hash_lock);
++ for (slot = 0; slot < UB_HASH_SIZE; slot++)
++ for (ub = ub_hash[slot].ubh_beans; ub; ub = ub->ub_next) {
++ if (n == 0) {
++ m->private = (void *)slot;
++ return (void *)ub;
++ }
++ n--;
++ }
++ return NULL;
++}
++
++static void *ubd_next(struct seq_file *m, void *p, loff_t *pos)
++{
++ struct user_beancounter *ub;
++ long slot;
++
++ ub = (struct user_beancounter *)p;
++ slot = (long)m->private;
++
++ ++*pos;
++ ub = ub->ub_next;
++ while (1) {
++ for (; ub; ub = ub->ub_next) {
++ m->private = (void *)slot;
++ return (void *)ub;
++ }
++ slot++;
++ if (slot == UB_HASH_SIZE)
++ break;
++ ub = ub_hash[slot].ubh_beans;
++ }
++ return NULL;
++}
++
++static void ubd_stop(struct seq_file *m, void *p)
++{
++ spin_unlock_irq(&ub_hash_lock);
++}
++
++#define PROC_LINE_FMT "\t%-17s\t%5lu\t%5lu\n"
++
++static int ubd_show(struct seq_file *m, void *p)
++{
++ struct user_beancounter *ub;
++ struct ub_cache_counter *cc;
++ long pages, vmpages, pbc, swap, unmap;
++ int i;
++ char id[64];
++
++ ub = (struct user_beancounter *)p;
++ print_ub_uid(ub, id, sizeof(id));
++ seq_printf(m, "%s:%d\n", id, atomic_read(&ub->ub_refcount));
++
++ pages = vmpages = pbc = swap = unmap = 0;
++ for (i = 0; i < NR_CPUS; i++) {
++ pages += ub->ub_stat[i].pages_charged;
++ vmpages += ub->ub_stat[i].vmalloc_charged;
++ pbc += ub->ub_stat[i].pbcs;
++ swap += ub->ub_stat[i].swapin;
++ unmap += ub->ub_stat[i].unmap;
++ }
++ if (pages < 0)
++ pages = 0;
++ if (vmpages < 0)
++ vmpages = 0;
++ seq_printf(m, PROC_LINE_FMT, "pages", pages, PAGE_SIZE);
++ seq_printf(m, PROC_LINE_FMT, "vmalloced", vmpages, PAGE_SIZE);
++
++ seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_UNUSEDPRIVVM],
++ ub->ub_unused_privvmpages, PAGE_SIZE);
++ seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_TMPFSPAGES],
++ ub->ub_tmpfs_respages, PAGE_SIZE);
++ seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_SWAPPAGES],
++ ub->ub_swap_pages, PAGE_SIZE);
++ seq_printf(m, PROC_LINE_FMT, "pbcs", pbc,
++ (unsigned long)sizeof(struct page_beancounter));
++
++ seq_printf(m, PROC_LINE_FMT, "swapin", swap, 0UL);
++ seq_printf(m, PROC_LINE_FMT, "unmap", unmap, 0UL);
++ /* interrupts are disabled by locking ub_hash_lock */
++ spin_lock(&cc_lock);
++ list_for_each_entry (cc, &ub->ub_cclist, ulist) {
++ kmem_cache_t *cachep;
++
++ cachep = cc->cachep;
++ seq_printf(m, PROC_LINE_FMT,
++ cachep->name,
++ cc->counter,
++ (unsigned long)cachep->objuse);
++ }
++ spin_unlock(&cc_lock);
++ return 0;
++}
++
++static struct seq_operations kmemdebug_op = {
++ .start = ubd_start,
++ .next = ubd_next,
++ .stop = ubd_stop,
++ .show = ubd_show,
++};
++
++static int kmem_debug_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &kmemdebug_op);
++}
++
++static struct file_operations kmem_debug_ops = {
++ .open = kmem_debug_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++#endif
++
++void __init ub_init_proc(void)
++{
++ struct proc_dir_entry *entry;
++
++ entry = create_proc_entry("user_beancounters", S_IRUGO, NULL);
++ if (entry)
++ entry->proc_fops = &ub_file_operations;
++ else
++ panic("Can't create /proc/user_beancounters entry!\n");
++
++ entry = create_proc_entry("user_beancounters_sub", S_IRUGO, NULL);
++ if (entry)
++ entry->proc_fops = &ub_file_operations;
++ else
++ panic("Can't create /proc/user_beancounters2 entry!\n");
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ entry = create_proc_entry("user_beancounters_debug", S_IRUGO, NULL);
++ if (entry)
++ entry->proc_fops = &kmem_debug_ops;
++ else
++ panic("Can't create /proc/user_beancounters_debug entry!\n");
++#endif
++}
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_stat.c linux-2.6.15-ve025stab014/kernel/ub/ub_stat.c
+--- linux-2.6.15.orig/kernel/ub/ub_stat.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_stat.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,465 @@
++/*
++ * kernel/ub/ub_stat.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/timer.h>
++#include <linux/sched.h>
++#include <linux/init.h>
++#include <linux/jiffies.h>
++#include <linux/list.h>
++#include <linux/errno.h>
++#include <linux/suspend.h>
++
++#include <asm/uaccess.h>
++#include <asm/param.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_stat.h>
++
++static spinlock_t ubs_notify_lock = SPIN_LOCK_UNLOCKED;
++static LIST_HEAD(ubs_notify_list);
++static long ubs_min_interval;
++static ubstattime_t ubs_start_time, ubs_end_time;
++static struct timer_list ubs_timer;
++
++static int ubstat_get_list(void *buf, long size)
++{
++ int retval;
++ unsigned long flags;
++ int slotnr;
++ struct ub_hash_slot *slot;
++ struct user_beancounter *ub, *last_ub;
++ long *page, *ptr, *end;
++ int len;
++
++ page = (long *)__get_free_page(GFP_KERNEL);
++ if (page == NULL)
++ return -ENOMEM;
++
++ retval = 0;
++ slotnr = 0;
++ slot = ub_hash;
++ last_ub = NULL;
++ while (1) {
++ ptr = page;
++ end = page + PAGE_SIZE / sizeof(*ptr);
++
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ if (last_ub == NULL)
++ ub = slot->ubh_beans;
++ else
++ ub = last_ub->ub_next;
++ while (1) {
++ for (; ub != NULL; ub = ub->ub_next) {
++ if (ub->parent != NULL)
++ continue;
++ *ptr++ = ub->ub_uid;
++ if (ptr == end)
++ break;
++ }
++ if (ptr == end)
++ break;
++ ++slot;
++ if (++slotnr >= UB_HASH_SIZE)
++ break;
++ ub = slot->ubh_beans;
++ }
++ if (ptr == page)
++ goto out_unlock;
++ if (ub != NULL)
++ get_beancounter(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++ if (last_ub != NULL)
++ put_beancounter(last_ub);
++ last_ub = ub; /* last visited beancounter in the slot */
++
++ len = min_t(long, (ptr - page) * sizeof(*ptr), size);
++ if (copy_to_user(buf, page, len)) {
++ retval = -EFAULT;
++ break;
++ }
++ retval += len;
++ if (len < PAGE_SIZE)
++ break;
++ buf += len;
++ size -= len;
++ }
++out:
++ if (last_ub != NULL)
++ put_beancounter(last_ub);
++ free_page((unsigned long)page);
++ return retval;
++
++out_unlock:
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ goto out;
++}
++
++static int ubstat_gettime(void *buf, long size)
++{
++ ubgettime_t data;
++ int retval;
++
++ spin_lock(&ubs_notify_lock);
++ data.start_time = ubs_start_time;
++ data.end_time = ubs_end_time;
++ data.cur_time = ubs_start_time + (jiffies - ubs_start_time * HZ) / HZ;
++ spin_unlock(&ubs_notify_lock);
++
++ retval = min_t(long, sizeof(data), size);
++ if (copy_to_user(buf, &data, retval))
++ retval = -EFAULT;
++ return retval;
++}
++
++static int ubstat_do_read_one(struct user_beancounter *ub, int res, void *kbuf)
++{
++ struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstatparm_t param[1];
++ } *data;
++
++ data = kbuf;
++ data->start_time = ubs_start_time;
++ data->end_time = ubs_end_time;
++
++ data->param[0].maxheld = ub->ub_store[res].maxheld;
++ data->param[0].failcnt = ub->ub_store[res].failcnt;
++
++ return sizeof(*data);
++}
++
++static int ubstat_do_read_all(struct user_beancounter *ub, void *kbuf, int size)
++{
++ int wrote;
++ struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstatparm_t param[UB_RESOURCES];
++ } *data;
++ int resource;
++
++ data = kbuf;
++ data->start_time = ubs_start_time;
++ data->end_time = ubs_end_time;
++ wrote = sizeof(data->start_time) + sizeof(data->end_time);
++
++ for (resource = 0; resource < UB_RESOURCES; resource++) {
++ if (size < wrote + sizeof(data->param[resource]))
++ break;
++ data->param[resource].maxheld = ub->ub_store[resource].maxheld;
++ data->param[resource].failcnt = ub->ub_store[resource].failcnt;
++ wrote += sizeof(data->param[resource]);
++ }
++
++ return wrote;
++}
++
++static int ubstat_do_read_full(struct user_beancounter *ub, void *kbuf,
++ int size)
++{
++ int wrote;
++ struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstatparmf_t param[UB_RESOURCES];
++ } *data;
++ int resource;
++
++ data = kbuf;
++ data->start_time = ubs_start_time;
++ data->end_time = ubs_end_time;
++ wrote = sizeof(data->start_time) + sizeof(data->end_time);
++
++ for (resource = 0; resource < UB_RESOURCES; resource++) {
++ if (size < wrote + sizeof(data->param[resource]))
++ break;
++ /* The beginning of ubstatparmf_t matches struct ubparm. */
++ memcpy(&data->param[resource], &ub->ub_store[resource],
++ sizeof(ub->ub_store[resource]));
++ data->param[resource].__unused1 = 0;
++ data->param[resource].__unused2 = 0;
++ wrote += sizeof(data->param[resource]);
++ }
++ return wrote;
++}
++
++static int ubstat_get_stat(struct user_beancounter *ub, long cmd,
++ void *buf, long size)
++{
++ void *kbuf;
++ int retval;
++
++ kbuf = (void *)__get_free_page(GFP_KERNEL);
++ if (kbuf == NULL)
++ return -ENOMEM;
++
++ spin_lock(&ubs_notify_lock);
++ switch (UBSTAT_CMD(cmd)) {
++ case UBSTAT_READ_ONE:
++ retval = -EINVAL;
++ if (UBSTAT_PARMID(cmd) >= UB_RESOURCES)
++ break;
++ retval = ubstat_do_read_one(ub,
++ UBSTAT_PARMID(cmd), kbuf);
++ break;
++ case UBSTAT_READ_ALL:
++ retval = ubstat_do_read_all(ub, kbuf, PAGE_SIZE);
++ break;
++ case UBSTAT_READ_FULL:
++ retval = ubstat_do_read_full(ub, kbuf, PAGE_SIZE);
++ break;
++ default:
++ retval = -EINVAL;
++ }
++ spin_unlock(&ubs_notify_lock);
++
++ if (retval > 0) {
++ retval = min_t(long, retval, size);
++ if (copy_to_user(buf, kbuf, retval))
++ retval = -EFAULT;
++ }
++
++ free_page((unsigned long)kbuf);
++ return retval;
++}
++
++static int ubstat_handle_notifrq(ubnotifrq_t *req)
++{
++ int retval;
++ struct ub_stat_notify *new_notify;
++ struct list_head *entry;
++ struct task_struct *tsk_to_free;
++
++ new_notify = kmalloc(sizeof(new_notify), GFP_KERNEL);
++ if (new_notify == NULL)
++ return -ENOMEM;
++
++ tsk_to_free = NULL;
++ INIT_LIST_HEAD(&new_notify->list);
++
++ spin_lock(&ubs_notify_lock);
++ list_for_each(entry, &ubs_notify_list) {
++ struct ub_stat_notify *notify;
++
++ notify = list_entry(entry, struct ub_stat_notify, list);
++ if (notify->task == current) {
++ kfree(new_notify);
++ new_notify = notify;
++ break;
++ }
++ }
++
++ retval = -EINVAL;
++ if (req->maxinterval < 1)
++ goto out_unlock;
++ if (req->maxinterval > TIME_MAX_SEC)
++ req->maxinterval = TIME_MAX_SEC;
++ if (req->maxinterval < ubs_min_interval) {
++ unsigned long dif;
++
++ ubs_min_interval = req->maxinterval;
++ dif = (ubs_timer.expires - jiffies + HZ - 1) / HZ;
++ if (dif > req->maxinterval)
++ mod_timer(&ubs_timer,
++ ubs_timer.expires -
++ (dif - req->maxinterval) * HZ);
++ }
++
++ if (entry != &ubs_notify_list) {
++ list_del(&new_notify->list);
++ tsk_to_free = new_notify->task;
++ }
++ if (req->signum) {
++ new_notify->task = current;
++ get_task_struct(new_notify->task);
++ new_notify->signum = req->signum;
++ list_add(&new_notify->list, &ubs_notify_list);
++ } else
++ kfree(new_notify);
++ retval = 0;
++out_unlock:
++ spin_unlock(&ubs_notify_lock);
++ if (tsk_to_free != NULL)
++ put_task_struct(tsk_to_free);
++ return retval;
++}
++
++/*
++ * former sys_ubstat
++ */
++long do_ubstat(int func, unsigned long arg1, unsigned long arg2, void *buf,
++ long size)
++{
++ int retval;
++ struct user_beancounter *ub;
++
++ if (func == UBSTAT_UBPARMNUM)
++ return UB_RESOURCES;
++ if (func == UBSTAT_UBLIST)
++ return ubstat_get_list(buf, size);
++ if (!(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)))
++ return -EPERM;
++
++ if (func == UBSTAT_GETTIME) {
++ retval = ubstat_gettime(buf, size);
++ goto notify;
++ }
++
++ ub = get_exec_ub();
++ if (ub != NULL && ub->ub_uid == arg1)
++ get_beancounter(ub);
++ else /* FIXME must be if (ve_is_super) */
++ ub = get_beancounter_byuid(arg1, 0);
++
++ if (ub == NULL)
++ return -ESRCH;
++
++ retval = ubstat_get_stat(ub, func, buf, size);
++ put_beancounter(ub);
++notify:
++ /* Handle request for notification */
++ if (retval >= 0) {
++ ubnotifrq_t notifrq;
++ int err;
++
++ err = -EFAULT;
++ if (!copy_from_user(&notifrq, (void *)arg2, sizeof(notifrq)))
++ err = ubstat_handle_notifrq(&notifrq);
++ if (err)
++ retval = err;
++ }
++
++ return retval;
++}
++
++static void ubstat_save_onestat(struct user_beancounter *ub)
++{
++ int resource;
++
++ /* called with local irq disabled */
++ spin_lock(&ub->ub_lock);
++ for (resource = 0; resource < UB_RESOURCES; resource++) {
++ memcpy(&ub->ub_store[resource], &ub->ub_parms[resource],
++ sizeof(struct ubparm));
++ ub->ub_parms[resource].minheld =
++ ub->ub_parms[resource].maxheld =
++ ub->ub_parms[resource].held;
++ }
++ spin_unlock(&ub->ub_lock);
++}
++
++static void ubstat_save_statistics(void)
++{
++ unsigned long flags;
++ int i;
++ struct user_beancounter *ub;
++
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ for_each_beancounter(i, ub)
++ ubstat_save_onestat(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++}
++
++static void ubstatd_timeout(unsigned long __data)
++{
++ struct task_struct *p;
++
++ p = (struct task_struct *) __data;
++ wake_up_process(p);
++}
++
++/*
++ * Safe wrapper for send_sig. It prevents a race with release_task
++ * for sighand.
++ * Should be called under tasklist_lock.
++ */
++static void task_send_sig(struct ub_stat_notify *notify)
++{
++ if (likely(notify->task->sighand != NULL))
++ send_sig(notify->signum, notify->task, 1);
++}
++
++static inline void do_notifies(void)
++{
++ LIST_HEAD(notif_free_list);
++ struct ub_stat_notify *notify;
++ struct ub_stat_notify *tmp;
++
++ spin_lock(&ubs_notify_lock);
++ ubs_start_time = ubs_end_time;
++ /*
++ * the expression below relies on time being unsigned long and
++ * arithmetic promotion rules
++ */
++ ubs_end_time += (ubs_timer.expires - ubs_start_time * HZ) / HZ;
++ mod_timer(&ubs_timer, ubs_timer.expires + ubs_min_interval * HZ);
++ ubs_min_interval = TIME_MAX_SEC;
++ /* save statistics accumulated for the interval */
++ ubstat_save_statistics();
++ /* send signals */
++ read_lock(&tasklist_lock);
++ while (!list_empty(&ubs_notify_list)) {
++ notify = list_entry(ubs_notify_list.next,
++ struct ub_stat_notify, list);
++ task_send_sig(notify);
++ list_del(&notify->list);
++ list_add(&notify->list, &notif_free_list);
++ }
++ read_unlock(&tasklist_lock);
++ spin_unlock(&ubs_notify_lock);
++
++ list_for_each_entry_safe(notify, tmp, &notif_free_list, list) {
++ put_task_struct(notify->task);
++ kfree(notify);
++ }
++}
++
++/*
++ * Kernel thread
++ */
++static int ubstatd(void *unused)
++{
++ /* daemonize call will take care of signals */
++ daemonize("ubstatd");
++
++ ubs_timer.data = (unsigned long)current;
++ ubs_timer.function = ubstatd_timeout;
++ add_timer(&ubs_timer);
++
++ while (1) {
++ set_task_state(current, TASK_INTERRUPTIBLE);
++ if (time_after(ubs_timer.expires, jiffies)) {
++ schedule();
++ try_to_freeze();
++ continue;
++ }
++
++ __set_task_state(current, TASK_RUNNING);
++ do_notifies();
++ }
++ return 0;
++}
++
++static int __init ubstatd_init(void)
++{
++ init_timer(&ubs_timer);
++ ubs_timer.expires = TIME_MAX_JIF;
++ ubs_min_interval = TIME_MAX_SEC;
++ ubs_start_time = ubs_end_time = 0;
++
++ kernel_thread(ubstatd, NULL, 0);
++ return 0;
++}
++
++module_init(ubstatd_init);
+diff -uprN linux-2.6.15.orig/kernel/ub/ub_sys.c linux-2.6.15-ve025stab014/kernel/ub/ub_sys.c
+--- linux-2.6.15.orig/kernel/ub/ub_sys.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ub/ub_sys.c 2006-01-27 14:48:06.000000000 +0300
+@@ -0,0 +1,148 @@
++/*
++ * kernel/ub/ub_sys.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <asm/uaccess.h>
++
++#include <ub/beancounter.h>
++
++#ifndef CONFIG_USER_RESOURCE
++asmlinkage long sys_getluid(void)
++{
++ return -ENOSYS;
++}
++
++asmlinkage long sys_setluid(uid_t uid)
++{
++ return -ENOSYS;
++}
++
++asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
++ unsigned long *limits)
++{
++ return -ENOSYS;
++}
++
++asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2,
++ void *buf, long size)
++{
++ return -ENOSYS;
++}
++#else /* CONFIG_USER_RESOURCE */
++
++/*
++ * The (rather boring) getluid syscall
++ */
++asmlinkage long sys_getluid(void)
++{
++ struct user_beancounter *ub;
++
++ ub = get_exec_ub();
++ if (ub == NULL)
++ return -EINVAL;
++
++ return ub->ub_uid;
++}
++
++/*
++ * The setluid syscall
++ */
++asmlinkage long sys_setluid(uid_t uid)
++{
++ struct user_beancounter *ub;
++ struct task_beancounter *task_bc;
++ int error;
++
++ task_bc = &current->task_bc;
++
++ /* You may not disown a setluid */
++ error = -EINVAL;
++ if (uid == (uid_t)-1)
++ goto out;
++
++ /* You may only set an ub as root */
++ error = -EPERM;
++ if (!capable(CAP_SETUID))
++ goto out;
++
++ /* Ok - set up a beancounter entry for this user */
++ error = -ENOBUFS;
++ ub = get_beancounter_byuid(uid, 1);
++ if (ub == NULL)
++ goto out;
++
++ ub_debug(UBD_ALLOC | UBD_LIMIT, "setluid, bean %p (count %d) "
++ "for %.20s pid %d\n",
++ ub, atomic_read(&ub->ub_refcount),
++ current->comm, current->pid);
++ /* install bc */
++ put_beancounter(task_bc->exec_ub);
++ task_bc->exec_ub = ub;
++ put_beancounter(task_bc->fork_sub);
++ task_bc->fork_sub = get_beancounter(ub);
++ error = 0;
++out:
++ return error;
++}
++
++/*
++ * The setbeanlimit syscall
++ */
++asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
++ unsigned long *limits)
++{
++ int error;
++ unsigned long flags;
++ struct user_beancounter *ub;
++ unsigned long new_limits[2];
++
++ error = -EPERM;
++ if(!capable(CAP_SYS_RESOURCE))
++ goto out;
++
++ error = -EINVAL;
++ if (resource >= UB_RESOURCES)
++ goto out;
++
++ error = -EFAULT;
++ if (copy_from_user(&new_limits, limits, sizeof(new_limits)))
++ goto out;
++
++ error = -EINVAL;
++ if (new_limits[0] > UB_MAXVALUE || new_limits[1] > UB_MAXVALUE)
++ goto out;
++
++ error = -ENOENT;
++ ub = get_beancounter_byuid(uid, 0);
++ if (ub == NULL) {
++ ub_debug(UBD_LIMIT, "No login bc for uid %d\n", uid);
++ goto out;
++ }
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_parms[resource].barrier = new_limits[0];
++ ub->ub_parms[resource].limit = new_limits[1];
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ put_beancounter(ub);
++
++ error = 0;
++out:
++ return error;
++}
++
++extern long do_ubstat(int func, unsigned long arg1, unsigned long arg2,
++ void *buf, long size);
++asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2,
++ void *buf, long size)
++{
++ return do_ubstat(func, arg1, arg2, buf, size);
++}
++#endif
+diff -uprN linux-2.6.15.orig/kernel/user.c linux-2.6.15-ve025stab014/kernel/user.c
+--- linux-2.6.15.orig/kernel/user.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/user.c 2006-01-27 14:48:08.000000000 +0300
+@@ -23,7 +23,20 @@
+ #define UIDHASH_SZ (1 << UIDHASH_BITS)
+ #define UIDHASH_MASK (UIDHASH_SZ - 1)
+ #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
+-#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
++#define __uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
++
++#ifdef CONFIG_VE
++#define UIDHASH_MASK_VE (UIDHASH_SZ_VE - 1)
++#define __uidhashfn_ve(uid) (((uid >> UIDHASH_BITS_VE) ^ uid) & \
++ UIDHASH_MASK_VE)
++#define __uidhashentry_ve(uid, envid) ((envid)->uidhash_table + \
++ __uidhashfn_ve(uid))
++#define uidhashentry_ve(uid) (ve_is_super(get_exec_env()) ? \
++ __uidhashentry(uid) : \
++ __uidhashentry_ve(uid, get_exec_env()))
++#else
++#define uidhashentry_ve(uid) __uidhashentry(uid)
++#endif
+
+ static kmem_cache_t *uid_cachep;
+ static struct list_head uidhash_table[UIDHASH_SZ];
+@@ -84,7 +97,7 @@ struct user_struct *find_user(uid_t uid)
+ struct user_struct *ret;
+
+ spin_lock(&uidhash_lock);
+- ret = uid_hash_find(uid, uidhashentry(uid));
++ ret = uid_hash_find(uid, uidhashentry_ve(uid));
+ spin_unlock(&uidhash_lock);
+ return ret;
+ }
+@@ -102,7 +115,7 @@ void free_uid(struct user_struct *up)
+
+ struct user_struct * alloc_uid(uid_t uid)
+ {
+- struct list_head *hashent = uidhashentry(uid);
++ struct list_head *hashent = uidhashentry_ve(uid);
+ struct user_struct *up;
+
+ spin_lock(&uidhash_lock);
+@@ -177,14 +190,14 @@ static int __init uid_cache_init(void)
+ int n;
+
+ uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
+- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+
+ for(n = 0; n < UIDHASH_SZ; ++n)
+ INIT_LIST_HEAD(uidhash_table + n);
+
+ /* Insert the root user immediately (init already runs as root) */
+ spin_lock(&uidhash_lock);
+- uid_hash_insert(&root_user, uidhashentry(0));
++ uid_hash_insert(&root_user, __uidhashentry(0));
+ spin_unlock(&uidhash_lock);
+
+ return 0;
+diff -uprN linux-2.6.15.orig/kernel/ve.c linux-2.6.15-ve025stab014/kernel/ve.c
+--- linux-2.6.15.orig/kernel/ve.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/ve.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,177 @@
++/*
++ * linux/kernel/ve.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * 've.c' helper file performing VE sub-system initialization
++ */
++
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/capability.h>
++#include <linux/ve.h>
++#include <linux/smp_lock.h>
++#include <linux/init.h>
++
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/slab.h>
++#include <linux/sys.h>
++#include <linux/kdev_t.h>
++#include <linux/termios.h>
++#include <linux/tty_driver.h>
++#include <linux/netdevice.h>
++#include <linux/utsname.h>
++#include <linux/proc_fs.h>
++#include <linux/kernel_stat.h>
++#include <linux/module.h>
++#include <linux/rcupdate.h>
++#include <linux/ve_proto.h>
++#include <linux/ve_owner.h>
++#include <linux/devpts_fs.h>
++
++#include <linux/nfcalls.h>
++
++unsigned long vz_rstamp = 0x37e0f59d;
++
++struct module no_module = { .state = MODULE_STATE_GOING };
++EXPORT_SYMBOL(no_module);
++
++#ifdef CONFIG_VE
++
++DCL_VE_OWNER(SKB, struct sk_buff, owner_env)
++DCL_VE_OWNER(SK, struct sock, sk_owner_env)
++DCL_VE_OWNER(TW, struct tcp_tw_bucket, tw_owner_env)
++DCL_VE_OWNER(FILP, struct file, owner_env)
++DCL_VE_OWNER(FSTYPE, struct file_system_type, owner_env)
++
++#if defined(CONFIG_VE_IPTABLES)
++INIT_KSYM_MODULE(ip_tables);
++INIT_KSYM_MODULE(iptable_filter);
++INIT_KSYM_MODULE(iptable_mangle);
++INIT_KSYM_MODULE(ipt_limit);
++INIT_KSYM_MODULE(ipt_multiport);
++INIT_KSYM_MODULE(ipt_tos);
++INIT_KSYM_MODULE(ipt_TOS);
++INIT_KSYM_MODULE(ipt_REJECT);
++INIT_KSYM_MODULE(ipt_TCPMSS);
++INIT_KSYM_MODULE(ipt_tcpmss);
++INIT_KSYM_MODULE(ipt_ttl);
++INIT_KSYM_MODULE(ipt_LOG);
++INIT_KSYM_MODULE(ipt_length);
++INIT_KSYM_MODULE(ip_conntrack);
++INIT_KSYM_MODULE(ip_conntrack_ftp);
++INIT_KSYM_MODULE(ip_conntrack_irc);
++INIT_KSYM_MODULE(ipt_conntrack);
++INIT_KSYM_MODULE(ipt_state);
++INIT_KSYM_MODULE(ipt_helper);
++INIT_KSYM_MODULE(ip_nat);
++INIT_KSYM_MODULE(iptable_nat);
++INIT_KSYM_MODULE(ip_nat_ftp);
++INIT_KSYM_MODULE(ip_nat_irc);
++
++INIT_KSYM_CALL(int, init_netfilter, (void));
++INIT_KSYM_CALL(int, init_iptables, (void));
++INIT_KSYM_CALL(int, init_iptable_filter, (void));
++INIT_KSYM_CALL(int, init_iptable_mangle, (void));
++INIT_KSYM_CALL(int, init_iptable_limit, (void));
++INIT_KSYM_CALL(int, init_iptable_multiport, (void));
++INIT_KSYM_CALL(int, init_iptable_tos, (void));
++INIT_KSYM_CALL(int, init_iptable_TOS, (void));
++INIT_KSYM_CALL(int, init_iptable_REJECT, (void));
++INIT_KSYM_CALL(int, init_iptable_TCPMSS, (void));
++INIT_KSYM_CALL(int, init_iptable_tcpmss, (void));
++INIT_KSYM_CALL(int, init_iptable_ttl, (void));
++INIT_KSYM_CALL(int, init_iptable_LOG, (void));
++INIT_KSYM_CALL(int, init_iptable_length, (void));
++INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
++INIT_KSYM_CALL(int, init_iptable_ftp, (void));
++INIT_KSYM_CALL(int, init_iptable_irc, (void));
++INIT_KSYM_CALL(int, init_iptable_conntrack_match, (void));
++INIT_KSYM_CALL(int, init_iptable_state, (void));
++INIT_KSYM_CALL(int, init_iptable_helper, (void));
++INIT_KSYM_CALL(int, ip_nat_init, (void));
++INIT_KSYM_CALL(int, init_iptable_nat, (void));
++INIT_KSYM_CALL(int, init_iptable_nat_ftp, (void));
++INIT_KSYM_CALL(int, init_iptable_nat_irc, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat_irc, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat, (void));
++INIT_KSYM_CALL(void, ip_nat_cleanup, (void));
++INIT_KSYM_CALL(void, fini_iptable_helper, (void));
++INIT_KSYM_CALL(void, fini_iptable_state, (void));
++INIT_KSYM_CALL(void, fini_iptable_conntrack_match, (void));
++INIT_KSYM_CALL(void, fini_iptable_irc, (void));
++INIT_KSYM_CALL(void, fini_iptable_ftp, (void));
++INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
++INIT_KSYM_CALL(void, fini_iptable_length, (void));
++INIT_KSYM_CALL(void, fini_iptable_LOG, (void));
++INIT_KSYM_CALL(void, fini_iptable_ttl, (void));
++INIT_KSYM_CALL(void, fini_iptable_tcpmss, (void));
++INIT_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
++INIT_KSYM_CALL(void, fini_iptable_REJECT, (void));
++INIT_KSYM_CALL(void, fini_iptable_TOS, (void));
++INIT_KSYM_CALL(void, fini_iptable_tos, (void));
++INIT_KSYM_CALL(void, fini_iptable_multiport, (void));
++INIT_KSYM_CALL(void, fini_iptable_limit, (void));
++INIT_KSYM_CALL(void, fini_iptable_filter, (void));
++INIT_KSYM_CALL(void, fini_iptable_mangle, (void));
++INIT_KSYM_CALL(void, fini_iptables, (void));
++INIT_KSYM_CALL(void, fini_netfilter, (void));
++
++INIT_KSYM_CALL(void, ipt_flush_table, (struct ipt_table *table));
++#endif
++
++#ifdef CONFIG_VE_CALLS_MODULE
++INIT_KSYM_MODULE(vzmon);
++INIT_KSYM_CALL(int, real_get_device_perms_ve,
++ (int dev_type, dev_t dev, int access_mode));
++INIT_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
++INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
++INIT_KSYM_CALL(void, real_update_load_avg_ve, (void));
++
++int get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
++{
++ return KSYMSAFECALL(int, vzmon, real_get_device_perms_ve,
++ (dev_type, dev, access_mode));
++}
++EXPORT_SYMBOL(get_device_perms_ve);
++
++void do_env_cleanup(struct ve_struct *env)
++{
++ KSYMSAFECALL_VOID(vzmon, real_do_env_cleanup, (env));
++}
++
++void do_env_free(struct ve_struct *env)
++{
++ KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
++}
++EXPORT_SYMBOL(do_env_free);
++
++void do_update_load_avg_ve(void)
++{
++ KSYMSAFECALL_VOID(vzmon, real_update_load_avg_ve, ());
++}
++#endif
++
++struct ve_struct ve0 = {
++ .utsname = &system_utsname,
++ .vetask_lh = LIST_HEAD_INIT(ve0.vetask_lh),
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ ._net_dev_tail = &ve0._net_dev_base,
++ .ifindex = -1,
++#endif
++#ifdef CONFIG_UNIX98_PTYS
++ .devpts_config = &devpts_config,
++#endif
++};
++
++EXPORT_SYMBOL(ve0);
++
++#endif /* CONFIG_VE */
+diff -uprN linux-2.6.15.orig/kernel/vecalls.c linux-2.6.15-ve025stab014/kernel/vecalls.c
+--- linux-2.6.15.orig/kernel/vecalls.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/vecalls.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,3206 @@
++/*
++ * linux/kernel/vecalls.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ */
++
++/*
++ * 'vecalls.c' is file with basic VE support. It provides basic primities
++ * along with initialization script
++ */
++
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/capability.h>
++#include <linux/ve.h>
++#include <linux/smp_lock.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/ve_owner.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/sys.h>
++#include <linux/fs.h>
++#include <linux/namespace.h>
++#include <linux/termios.h>
++#include <linux/tty_driver.h>
++#include <linux/netdevice.h>
++#include <linux/wait.h>
++#include <linux/inetdevice.h>
++#include <linux/utsname.h>
++#include <linux/sysctl.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/kernel_stat.h>
++#include <linux/module.h>
++#include <linux/suspend.h>
++#include <linux/rcupdate.h>
++#include <linux/in.h>
++#include <linux/major.h>
++#include <linux/kdev_t.h>
++#include <linux/idr.h>
++#include <linux/inetdevice.h>
++#include <net/pkt_sched.h>
++#include <linux/divert.h>
++#include <ub/beancounter.h>
++
++#include <net/route.h>
++#include <net/ip_fib.h>
++
++#include <linux/ve_proto.h>
++#include <linux/venet.h>
++#include <linux/vzctl.h>
++#include <linux/vzcalluser.h>
++#ifdef CONFIG_FAIRSCHED
++#include <linux/fairsched.h>
++#endif
++
++#include <linux/nfcalls.h>
++
++struct ve_struct *ve_list_head = NULL;
++int nr_ve = 1; /* One VE always exists. Compatibility with vestat */
++rwlock_t ve_list_guard = RW_LOCK_UNLOCKED;
++static rwlock_t devperms_hash_guard = RW_LOCK_UNLOCKED;
++
++extern int glob_virt_pids;
++
++static int do_env_enter(struct ve_struct *ve);
++int real_env_create(envid_t veid, unsigned flags, u32 class_id,
++ struct env_create_param *data, int datalen);
++static void do_clean_devperms(envid_t veid);
++static int alloc_ve_tty_drivers(struct ve_struct* ve);
++static void free_ve_tty_drivers(struct ve_struct* ve);
++static int register_ve_tty_drivers(struct ve_struct* ve);
++static void unregister_ve_tty_drivers(struct ve_struct* ve);
++static int init_ve_tty_drivers(struct ve_struct *);
++static void fini_ve_tty_drivers(struct ve_struct *);
++static void clear_termios(struct tty_driver* driver );
++static void ve_mapped_devs_cleanup(struct ve_struct *ve);
++
++static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf);
++
++static void vecalls_exit(void);
++
++struct ve_struct *__find_ve_by_id(envid_t veid)
++{
++ struct ve_struct *ve;
++ for (ve = ve_list_head;
++ ve != NULL && ve->veid != veid;
++ ve = ve->next);
++ return ve;
++}
++
++struct ve_struct *get_ve_by_id(envid_t veid)
++{
++ struct ve_struct *ve;
++ read_lock(&ve_list_guard);
++ ve = __find_ve_by_id(veid);
++ get_ve(ve);
++ read_unlock(&ve_list_guard);
++ return ve;
++}
++
++/*
++ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
++ */
++void real_do_env_free(struct ve_struct *ve);
++static inline void real_put_ve(struct ve_struct *ve)
++{
++ if (ve && atomic_dec_and_test(&ve->counter)) {
++ if (atomic_read(&ve->pcounter) > 0)
++ BUG();
++ if (ve->is_running)
++ BUG();
++ real_do_env_free(ve);
++ }
++}
++
++extern struct file_system_type devpts_fs_type;
++extern struct file_system_type sysfs_fs_type;
++extern struct file_system_type tmpfs_fs_type;
++extern struct file_system_type proc_fs_type;
++
++extern spinlock_t task_capability_lock;
++extern void ve_ipc_free(struct ve_struct * ve);
++extern void ip_fragment_cleanup(struct ve_struct *ve);
++
++static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf)
++{
++ struct ve_struct *ve;
++ struct vz_cpu_stat *vstat;
++ int retval;
++ int i, cpu;
++ unsigned long tmp;
++
++ if (!ve_is_super(get_exec_env()) && (veid != get_exec_env()->veid))
++ return -EPERM;
++ if (veid == 0)
++ return -ESRCH;
++
++ vstat = kmalloc(sizeof(*vstat), GFP_KERNEL);
++ if (!vstat)
++ return -ENOMEM;
++ memset(vstat, 0, sizeof(*vstat));
++
++ retval = -ESRCH;
++ read_lock(&ve_list_guard);
++ ve = __find_ve_by_id(veid);
++ if (ve == NULL)
++ goto out_unlock;
++ for (cpu = 0; cpu < NR_CPUS; cpu++) {
++ struct ve_cpu_stats *st;
++
++ st = VE_CPU_STATS(ve, cpu);
++ vstat->user_jif += st->user;
++ vstat->nice_jif += st->nice;
++ vstat->system_jif += st->system;
++ vstat->idle_clk += ve_sched_get_idle_time(ve, cpu);
++ }
++ vstat->uptime_clk = get_cycles() - ve->start_cycles;
++ vstat->uptime_jif = jiffies - ve->start_jiffies;
++ for (i = 0; i < 3; i++) {
++ tmp = ve->avenrun[i] + (FIXED_1/200);
++ vstat->avenrun[i].val_int = LOAD_INT(tmp);
++ vstat->avenrun[i].val_frac = LOAD_FRAC(tmp);
++ }
++ read_unlock(&ve_list_guard);
++
++ retval = 0;
++ if (copy_to_user(buf, vstat, sizeof(*vstat)))
++ retval = -EFAULT;
++out_free:
++ kfree(vstat);
++ return retval;
++
++out_unlock:
++ read_unlock(&ve_list_guard);
++ goto out_free;
++}
++
++/**********************************************************************
++ * Devices permissions routines,
++ * character and block devices separately
++ **********************************************************************/
++
++/* Rules applied in the following order:
++ MAJOR!=0, MINOR!=0
++ MAJOR!=0, MINOR==0
++ MAJOR==0, MINOR==0
++*/
++struct devperms_struct
++{
++ dev_t dev; /* device id */
++ unsigned char mask;
++ unsigned type;
++ envid_t veid;
++
++ struct devperms_struct *devhash_next;
++ struct devperms_struct **devhash_pprev;
++};
++
++static struct devperms_struct original_perms[] =
++{{
++ MKDEV(0,0), /*device*/
++ S_IROTH | S_IWOTH,
++ S_IFCHR, /*type*/
++ 0, /*veid*/
++ NULL, NULL
++},
++{
++ MKDEV(0,0), /*device*/
++ S_IXGRP | S_IROTH | S_IWOTH,
++ S_IFBLK, /*type*/
++ 0, /*veid*/
++ NULL, NULL
++}};
++
++static struct devperms_struct default_major_perms[] = {
++ {MKDEV(UNIX98_PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++ {MKDEV(UNIX98_PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++ {MKDEV(PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++ {MKDEV(PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++};
++static struct devperms_struct default_minor_perms[] = {
++ {MKDEV(MEM_MAJOR, 3), S_IROTH | S_IWOTH, S_IFCHR}, /* null */
++ {MKDEV(MEM_MAJOR, 5), S_IROTH | S_IWOTH, S_IFCHR}, /* zero */
++ {MKDEV(MEM_MAJOR, 7), S_IROTH | S_IWOTH, S_IFCHR}, /* full */
++ {MKDEV(TTYAUX_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},/* tty */
++ {MKDEV(TTYAUX_MAJOR, 2), S_IROTH | S_IWOTH, S_IFCHR},/* ptmx */
++ {MKDEV(MEM_MAJOR, 8), S_IROTH, S_IFCHR}, /* random */
++ {MKDEV(MEM_MAJOR, 9), S_IROTH, S_IFCHR}, /* urandom */
++};
++
++static struct devperms_struct default_deny_perms = {
++ MKDEV(0, 0), 0, S_IFCHR
++};
++
++static inline struct devperms_struct *find_default_devperms(int type,
++ dev_t dev)
++{
++ int i;
++
++ /* XXX all defaults perms are S_IFCHR */
++ if (type != S_IFCHR)
++ return &default_deny_perms;
++
++ for (i = 0;
++ i < sizeof(default_minor_perms)/sizeof(struct devperms_struct);
++ i++)
++ if (MAJOR(dev) == MAJOR(default_minor_perms[i].dev) &&
++ MINOR(dev) == MINOR(default_minor_perms[i].dev))
++ return &default_minor_perms[i];
++ for (i = 0;
++ i < sizeof(default_major_perms)/sizeof(struct devperms_struct);
++ i++)
++ if (MAJOR(dev) == MAJOR(default_major_perms[i].dev))
++ return &default_major_perms[i];
++
++ return &default_deny_perms;
++}
++
++#define DEVPERMS_HASH_SZ 512
++struct devperms_struct *devperms_hash[DEVPERMS_HASH_SZ];
++
++#define devperms_hashfn(id,dev) \
++ ( (id << 5) ^ (id >> 5) ^ (MAJOR(dev)) ^ MINOR(dev) ) & \
++ (DEVPERMS_HASH_SZ - 1)
++
++static inline void hash_devperms(struct devperms_struct *p)
++{
++ struct devperms_struct **htable =
++ &devperms_hash[devperms_hashfn(p->veid,p->dev)];
++
++ if ((p->devhash_next = *htable) != NULL)
++ (*htable)->devhash_pprev = &p->devhash_next;
++ *htable = p;
++ p->devhash_pprev = htable;
++}
++
++static inline void unhash_devperms(struct devperms_struct *p)
++{
++ if (p->devhash_next)
++ p->devhash_next->devhash_pprev = p->devhash_pprev;
++ *p->devhash_pprev = p->devhash_next;
++}
++
++static int __init init_devperms_hash(void)
++{
++ write_lock_irq(&devperms_hash_guard);
++ memset(devperms_hash, 0, sizeof(devperms_hash));
++ hash_devperms(original_perms);
++ hash_devperms(original_perms+1);
++ write_unlock_irq(&devperms_hash_guard);
++ return 0;
++}
++
++static inline void fini_devperms_hash(void)
++{
++}
++
++static inline struct devperms_struct *find_devperms(envid_t veid,
++ int type,
++ dev_t dev)
++{
++ struct devperms_struct *p, **htable =
++ &devperms_hash[devperms_hashfn(veid,dev)];
++
++ for (p = *htable; p && !(p->type==type &&
++ MAJOR(dev)==MAJOR(p->dev) &&
++ MINOR(dev)==MINOR(p->dev) &&
++ p->veid==veid);
++ p = p->devhash_next)
++ ;
++ return p;
++}
++
++
++static void do_clean_devperms(envid_t veid)
++{
++ int i;
++ struct devperms_struct* ve;
++
++ write_lock_irq(&devperms_hash_guard);
++ for (i = 0; i < DEVPERMS_HASH_SZ; i++)
++ for (ve = devperms_hash[i]; ve;) {
++ struct devperms_struct *next = ve->devhash_next;
++ if (ve->veid == veid) {
++ unhash_devperms(ve);
++ kfree(ve);
++ }
++
++ ve = next;
++ }
++ write_unlock_irq(&devperms_hash_guard);
++}
++
++/*
++ * Mode is a mask of
++ * FMODE_READ for read access (configurable by S_IROTH)
++ * FMODE_WRITE for write access (configurable by S_IWOTH)
++ * FMODE_QUOTACTL for quotactl access (configurable by S_IXGRP)
++ */
++int real_get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
++{
++ struct devperms_struct *perms;
++ struct ve_struct *ve;
++ envid_t veid;
++
++ perms = NULL;
++ ve = get_exec_env();
++ veid = ve->veid;
++
++ read_lock(&devperms_hash_guard);
++
++ perms = find_devperms(veid, dev_type|VE_USE_MINOR, dev);
++ if (perms)
++ goto end;
++
++ perms = find_devperms(veid, dev_type|VE_USE_MAJOR, MKDEV(MAJOR(dev),0));
++ if (perms)
++ goto end;
++
++ perms = find_devperms(veid, dev_type, MKDEV(0,0));
++ if (perms)
++ goto end;
++
++ perms = find_default_devperms(dev_type, dev);
++
++end:
++ read_unlock(&devperms_hash_guard);
++
++ access_mode = "\000\004\002\006\010\014\012\016"[access_mode];
++ return perms ?
++ (((perms->mask & access_mode) == access_mode) ? 0 : -EACCES) :
++ -ENODEV;
++}
++
++int do_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
++{
++ struct devperms_struct *perms;
++
++ write_lock_irq(&devperms_hash_guard);
++ perms = find_devperms(veid, type, dev);
++ if (!perms) {
++ struct devperms_struct *perms_new;
++ write_unlock_irq(&devperms_hash_guard);
++
++ perms_new = kmalloc(sizeof(struct devperms_struct), GFP_KERNEL);
++ if (!perms_new)
++ return -ENOMEM;
++
++ write_lock_irq(&devperms_hash_guard);
++ perms = find_devperms(veid, type, dev);
++ if (perms) {
++ kfree(perms_new);
++ perms_new = perms;
++ }
++
++ switch (type & VE_USE_MASK) {
++ case 0:
++ dev = 0;
++ break;
++ case VE_USE_MAJOR:
++ dev = MKDEV(MAJOR(dev),0);
++ break;
++ }
++
++ perms_new->veid = veid;
++ perms_new->dev = dev;
++ perms_new->type = type;
++ perms_new->mask = mask & S_IALLUGO;
++ hash_devperms(perms_new);
++ } else
++ perms->mask = mask & S_IALLUGO;
++ write_unlock_irq(&devperms_hash_guard);
++ return 0;
++}
++EXPORT_SYMBOL(do_setdevperms);
++
++int real_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
++{
++ struct ve_struct *ve;
++ int err;
++
++ if (!capable(CAP_SETVEID) || veid == 0)
++ return -EPERM;
++
++ if ((ve = get_ve_by_id(veid)) == NULL)
++ return -ESRCH;
++
++ down_read(&ve->op_sem);
++ err = -ESRCH;
++ if (ve->is_running)
++ err = do_setdevperms(veid, type, dev, mask);
++ up_read(&ve->op_sem);
++ real_put_ve(ve);
++ return err;
++}
++
++void real_update_load_avg_ve(void)
++{
++ struct ve_struct *ve;
++ unsigned long nr_active;
++
++ read_lock(&ve_list_guard);
++ for (ve = ve_list_head; ve != NULL; ve = ve->next) {
++ nr_active = nr_running_ve(ve) + nr_uninterruptible_ve(ve);
++ nr_active *= FIXED_1;
++ CALC_LOAD(ve->avenrun[0], EXP_1, nr_active);
++ CALC_LOAD(ve->avenrun[1], EXP_5, nr_active);
++ CALC_LOAD(ve->avenrun[2], EXP_15, nr_active);
++ }
++ read_unlock(&ve_list_guard);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * FS-related helpers to VE start/stop
++ *
++ **********************************************************************
++ **********************************************************************/
++
++/*
++ * DEVPTS needs a virtualization: each environment should see each own list of
++ * pseudo-terminals.
++ * To implement it we need to have separate devpts superblocks for each
++ * VE, and each VE should mount its own one.
++ * Thus, separate vfsmount structures are required.
++ * To minimize intrusion into vfsmount lookup code, separate file_system_type
++ * structures are created.
++ *
++ * In addition to this, patch fo character device itself is required, as file
++ * system itself is used only for MINOR/MAJOR lookup.
++ */
++static int register_ve_fs_type(struct ve_struct *ve,
++ struct file_system_type *template,
++ struct file_system_type **p_fs_type, struct vfsmount **p_mnt)
++{
++ struct vfsmount *mnt;
++ struct file_system_type *local_fs_type;
++ int ret;
++
++ VZTRACE("register_ve_fs_type(\"%s\")\n", template->name);
++
++ local_fs_type = kmalloc(sizeof(*local_fs_type) + sizeof(void *),
++ GFP_KERNEL);
++ if (local_fs_type == NULL)
++ return -ENOMEM;
++
++ memset(local_fs_type, 0, sizeof(*local_fs_type));
++ local_fs_type->name = template->name;
++ local_fs_type->fs_flags = template->fs_flags;
++ local_fs_type->get_sb = template->get_sb;
++ local_fs_type->kill_sb = template->kill_sb;
++ local_fs_type->owner = template->owner;
++ /*
++ * 1. we do not have refcounter on fstype
++ * 2. fstype holds reference to ve using get_ve()/put_ve().
++ * so we free fstype when freeing ve and we are sure it's ok to free it
++ */
++ SET_VE_OWNER_FSTYPE(local_fs_type, ve);
++ get_filesystem(local_fs_type); /* get_ve() inside */
++
++ ret = register_filesystem(local_fs_type); /* does not get */
++ if (ret)
++ goto reg_err;
++
++ mnt = kern_mount(local_fs_type);
++ if (IS_ERR(mnt))
++ goto mnt_err;
++
++ /* Usage counters after succesful execution kern_mount:
++ * local_fs_type - +1 (get_fs_type,get_sb_single,put_filesystem)
++ * mnt - +1 == 1 (alloc_vfsmnt)
++ */
++
++ *p_fs_type = local_fs_type;
++ *p_mnt = mnt;
++ return 0;
++
++mnt_err:
++ ret = PTR_ERR(mnt);
++ unregister_filesystem(local_fs_type); /* does not put */
++
++reg_err:
++ put_filesystem(local_fs_type);
++ kfree(local_fs_type);
++ printk(KERN_DEBUG
++ "register_ve_fs_type(\"%s\") err=%d\n", template->name, ret);
++ return ret;
++}
++
++static void umount_ve_fs_type(struct file_system_type *local_fs_type)
++{
++ struct vfsmount *mnt;
++ struct list_head *p, *q;
++ LIST_HEAD(kill);
++ LIST_HEAD(umount_list);
++
++ down_write(&namespace_sem);
++ spin_lock(&vfsmount_lock);
++ list_for_each_safe(p, q, &current->namespace->list) {
++ mnt = list_entry(p, struct vfsmount, mnt_list);
++ if (mnt->mnt_sb->s_type != local_fs_type)
++ continue;
++ list_del(p);
++ list_add(p, &kill);
++ }
++
++ while (!list_empty(&kill)) {
++ mnt = list_entry(kill.next, struct vfsmount, mnt_list);
++ umount_tree(mnt, 1, &umount_list);
++ }
++ spin_unlock(&vfsmount_lock);
++ up_write(&namespace_sem);
++ release_mounts(&umount_list);
++}
++
++static void unregister_ve_fs_type(struct file_system_type *local_fs_type,
++ struct vfsmount *local_fs_mount)
++{
++ if (local_fs_mount == NULL ||
++ local_fs_type == NULL) {
++ if (local_fs_mount != NULL ||
++ local_fs_type != NULL)
++ BUG();
++ return;
++ }
++
++ VZTRACE("unregister_ve_fs_type(\"%s\")\n", local_fs_type->name);
++
++ unregister_filesystem(local_fs_type);
++ umount_ve_fs_type(local_fs_type);
++ kern_umount(local_fs_mount); /* alias to mntput, drop our ref */
++ put_filesystem(local_fs_type);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * FS-related helpers to VE start/stop
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#ifdef CONFIG_SYSCTL
++static ctl_table ve_sysctl_tables[] = {
++ /* kernel */
++ {
++ .ctl_name = CTL_KERN,
++ .procname = "kernel",
++ .mode = 0555,
++ .child = &ve_sysctl_tables[2],
++ },
++ { .ctl_name = 0 },
++ /* kernel/[vars] */
++ {
++ .ctl_name = KERN_NODENAME,
++ .procname = "hostname",
++ .maxlen = 64,
++ .mode = 0644,
++ .proc_handler = &proc_doutsstring,
++ .strategy = &sysctl_string,
++ },
++ {
++ .ctl_name = KERN_DOMAINNAME,
++ .procname = "domainname",
++ .maxlen = 64,
++ .mode = 0644,
++ .proc_handler = &proc_doutsstring,
++ .strategy = &sysctl_string,
++ },
++ {
++ .ctl_name = KERN_SHMMAX,
++ .procname = "shmmax",
++ .maxlen = sizeof(size_t),
++ .mode = 0644,
++ .proc_handler = &proc_doulongvec_minmax,
++ },
++ {
++ .ctl_name = KERN_SHMALL,
++ .procname = "shmall",
++ .maxlen = sizeof(size_t),
++ .mode = 0644,
++ .proc_handler = &proc_doulongvec_minmax,
++ },
++ {
++ .ctl_name = KERN_SHMMNI,
++ .procname = "shmmni",
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .ctl_name = KERN_MSGMAX,
++ .procname = "msgmax",
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .ctl_name = KERN_MSGMNI,
++ .procname = "msgmni",
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .ctl_name = KERN_MSGMNB,
++ .procname = "msgmnb",
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .ctl_name = KERN_SEM,
++ .procname = "sem",
++ .maxlen = 4 * sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ { .ctl_name = 0, }
++};
++
++static int register_ve_sysctltables(struct ve_struct *ve)
++{
++ struct ctl_table_header *header;
++ ctl_table *root, *table;
++
++ VZTRACE("register_ve_sysctltables\n");
++
++ root = clone_sysctl_template(ve_sysctl_tables,
++ sizeof(ve_sysctl_tables) / sizeof(ctl_table));
++ if (root == NULL)
++ goto out;
++
++ table = root->child;
++ table[0].data = &ve->utsname->nodename;
++ table[1].data = &ve->utsname->domainname;
++ table[2].data = &ve->_shm_ctlmax;
++ table[3].data = &ve->_shm_ctlall;
++ table[4].data = &ve->_shm_ctlmni;
++ table[5].data = &ve->_msg_ctlmax;
++ table[6].data = &ve->_msg_ctlmni;
++ table[7].data = &ve->_msg_ctlmnb;
++ table[8].data = &ve->_sem_ctls[0];
++
++ /* insert at head to override kern entries */
++ header = register_sysctl_table(root, 1);
++ if (header == NULL)
++ goto out_free;
++
++ ve->kern_header = header;
++ ve->kern_table = root;
++ return 0;
++
++out_free:
++ free_sysctl_clone(root);
++out:
++ return -ENOMEM;
++}
++
++static inline void unregister_ve_sysctltables(struct ve_struct *ve)
++{
++ unregister_sysctl_table(ve->kern_header);
++}
++
++static inline void free_ve_sysctltables(struct ve_struct *ve)
++{
++ free_sysctl_clone(ve->kern_table);
++}
++#endif
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE start: subsystems
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#include <net/ip.h>
++#include <net/tcp.h>
++#include <net/udp.h>
++#include <net/icmp.h>
++
++static int init_ve_utsname(struct ve_struct *ve)
++{
++ ve->utsname = kmalloc(sizeof(*ve->utsname), GFP_KERNEL);
++ if (ve->utsname == NULL)
++ return -ENOMEM;
++
++ down_read(&uts_sem); /* protect the source */
++ memcpy(ve->utsname, &system_utsname, sizeof(*ve->utsname));
++ up_read(&uts_sem);
++
++ return 0;
++}
++
++static void free_ve_utsname(struct ve_struct *ve)
++{
++ kfree(ve->utsname);
++ ve->utsname = NULL;
++}
++
++static int init_fini_ve_mibs(struct ve_struct *ve, int fini)
++{
++ if (fini)
++ goto fini;
++ if (!(ve->_net_statistics[0] = alloc_percpu(struct linux_mib)))
++ goto out1;
++ if (!(ve->_net_statistics[1] = alloc_percpu(struct linux_mib)))
++ goto out2;
++ if (!(ve->_ip_statistics[0] = alloc_percpu(struct ipstats_mib)))
++ goto out3;
++ if (!(ve->_ip_statistics[1] = alloc_percpu(struct ipstats_mib)))
++ goto out4;
++ if (!(ve->_icmp_statistics[0] = alloc_percpu(struct icmp_mib)))
++ goto out5;
++ if (!(ve->_icmp_statistics[1] = alloc_percpu(struct icmp_mib)))
++ goto out6;
++ if (!(ve->_tcp_statistics[0] = alloc_percpu(struct tcp_mib)))
++ goto out7;
++ if (!(ve->_tcp_statistics[1] = alloc_percpu(struct tcp_mib)))
++ goto out8;
++ if (!(ve->_udp_statistics[0] = alloc_percpu(struct udp_mib)))
++ goto out9;
++ if (!(ve->_udp_statistics[1] = alloc_percpu(struct udp_mib)))
++ goto out10;
++ return 0;
++fini:
++ free_percpu(ve->_udp_statistics[1]);
++out10:
++ free_percpu(ve->_udp_statistics[0]);
++out9:
++ free_percpu(ve->_tcp_statistics[1]);
++out8:
++ free_percpu(ve->_tcp_statistics[0]);
++out7:
++ free_percpu(ve->_icmp_statistics[1]);
++out6:
++ free_percpu(ve->_icmp_statistics[0]);
++out5:
++ free_percpu(ve->_ip_statistics[1]);
++out4:
++ free_percpu(ve->_ip_statistics[0]);
++out3:
++ free_percpu(ve->_net_statistics[1]);
++out2:
++ free_percpu(ve->_net_statistics[0]);
++out1:
++ return -ENOMEM;
++}
++
++static inline int init_ve_mibs(struct ve_struct *ve)
++{
++ return init_fini_ve_mibs(ve, 0);
++}
++
++static inline void fini_ve_mibs(struct ve_struct *ve)
++{
++ (void)init_fini_ve_mibs(ve, 1);
++}
++
++extern struct net_device templ_loopback_dev;
++static void veloop_setup(struct net_device *dev)
++{
++ int padded;
++ padded = dev->padded;
++ memcpy(dev, &templ_loopback_dev, sizeof(struct net_device));
++ dev->padded = padded;
++}
++
++static int init_ve_netdev(void)
++{
++ struct ve_struct *ve;
++ struct net_device_stats *stats;
++ int err;
++
++ ve = get_exec_env();
++ INIT_HLIST_HEAD(&ve->_net_dev_head);
++ ve->_net_dev_base = NULL;
++ ve->_net_dev_tail = &ve->_net_dev_base;
++
++ ve->_loopback_dev = alloc_netdev(0, templ_loopback_dev.name,
++ veloop_setup);
++ if (ve->_loopback_dev == NULL)
++ return -ENOMEM;
++ if (loopback_dev.get_stats != NULL) {
++ stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
++ if (stats != NULL) {
++ memset(stats, 0, sizeof(struct net_device_stats));
++ ve->_loopback_dev->priv = stats;
++ ve->_loopback_dev->get_stats = loopback_dev.get_stats;
++ ve->_loopback_dev->destructor = loopback_dev.destructor;
++ }
++ }
++ err = register_netdev(ve->_loopback_dev);
++ if (err) {
++ if (ve->_loopback_dev->priv != NULL)
++ kfree(ve->_loopback_dev->priv);
++ free_netdev(ve->_loopback_dev);
++ }
++ return err;
++}
++
++static void fini_ve_netdev(void)
++{
++ struct ve_struct *ve;
++ struct net_device *dev;
++
++ ve = get_exec_env();
++ while (1) {
++ rtnl_lock();
++ /*
++ * loopback is special, it can be referenced in fib's,
++ * so it must be freed the last. Doing so is
++ * sufficient to guarantee absence of such references.
++ */
++ if (dev_base == ve->_loopback_dev)
++ dev = dev_base->next;
++ else
++ dev = dev_base;
++ if (dev == NULL)
++ break;
++ unregister_netdevice(dev);
++ rtnl_unlock();
++ free_netdev(dev);
++ }
++ unregister_netdevice(ve->_loopback_dev);
++ rtnl_unlock();
++ free_netdev(ve->_loopback_dev);
++ ve->_loopback_dev = NULL;
++}
++#else
++#define init_ve_mibs(ve) (0)
++#define fini_ve_mibs(ve) do { } while (0)
++#define init_ve_netdev() (0)
++#define fini_ve_netdev() do { } while (0)
++#endif
++
++static int prepare_proc_root(struct ve_struct *ve)
++{
++ struct proc_dir_entry *de;
++
++ de = kmalloc(sizeof(struct proc_dir_entry) + 6, GFP_KERNEL);
++ if (de == NULL)
++ return -ENOMEM;
++ memset(de, 0, sizeof(struct proc_dir_entry));
++ memcpy(de + 1, "/proc", 6);
++ de->name = (char *)(de + 1);
++ de->namelen = 5;
++ de->mode = S_IFDIR | S_IRUGO | S_IXUGO;
++ de->nlink = 2;
++ atomic_set(&de->count, 1);
++
++ ve->proc_root = de;
++ return 0;
++}
++
++#ifdef CONFIG_PROC_FS
++static int init_ve_proc(struct ve_struct *ve)
++{
++ int err;
++ struct proc_dir_entry *de;
++
++ err = prepare_proc_root(ve);
++ if (err)
++ goto out_root;
++
++ err = register_ve_fs_type(ve, &proc_fs_type,
++ &ve->proc_fstype, &ve->proc_mnt);
++ if (err)
++ goto out_reg;
++
++ /* create necessary /proc subdirs in VE local proc tree */
++ err = -ENOMEM;
++ de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++ if (!de)
++ goto out_vz;
++
++#ifdef CONFIG_VE_IPTABLES
++ proc_net = proc_mkdir("net", NULL);
++ if (!proc_net)
++ goto out_net;
++#endif
++
++ return 0;
++
++#ifdef CONFIG_VE_IPTABLES
++out_net:
++ remove_proc_entry("vz", NULL);
++#endif
++out_vz:
++ unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
++ ve->proc_mnt = NULL;
++out_reg:
++ /* proc_fstype and proc_root are freed in real_put_ve -> free_ve_proc */
++ ;
++out_root:
++ return err;
++}
++
++static void fini_ve_proc(struct ve_struct *ve)
++{
++#ifdef CONFIG_VE_IPTABLES
++ remove_proc_entry("net", NULL);
++ proc_net = NULL;
++#endif
++ remove_proc_entry("vz", NULL);
++ unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
++ ve->proc_mnt = NULL;
++}
++
++static void free_ve_proc(struct ve_struct *ve)
++{
++ /* proc filesystem frees proc_dir_entries on remove_proc_entry() only,
++ so we check that everything was removed and not lost */
++ if (ve->proc_root && ve->proc_root->subdir) {
++ struct proc_dir_entry *p = ve->proc_root;
++ printk(KERN_WARNING "VPS: %d: proc entry /proc", ve->veid);
++ while ((p = p->subdir) != NULL)
++ printk("/%s", p->name);
++ printk(" is not removed!\n");
++ }
++
++ kfree(ve->proc_root);
++ kfree(ve->proc_fstype);
++
++ ve->proc_fstype = NULL;
++ ve->proc_root = NULL;
++}
++#else
++#define init_ve_proc(ve) (0)
++#define fini_ve_proc(ve) do { } while (0)
++#define free_ve_proc(ve) do { } while (0)
++#endif
++
++#ifdef CONFIG_SYSCTL
++static int init_ve_sysctl(struct ve_struct *ve)
++{
++ int err;
++
++#ifdef CONFIG_PROC_FS
++ err = -ENOMEM;
++ ve->proc_sys_root = proc_mkdir("sys", 0);
++ if (ve->proc_sys_root == NULL)
++ goto out_proc;
++#endif
++ INIT_LIST_HEAD(&ve->sysctl_lh);
++ err = register_ve_sysctltables(ve);
++ if (err)
++ goto out_reg;
++
++ err = devinet_sysctl_init(ve);
++ if (err)
++ goto out_dev;
++
++ return 0;
++
++out_dev:
++ unregister_ve_sysctltables(ve);
++ free_ve_sysctltables(ve);
++out_reg:
++#ifdef CONFIG_PROC_FS
++ remove_proc_entry("sys", NULL);
++out_proc:
++#endif
++ return err;
++}
++
++static void fini_ve_sysctl(struct ve_struct *ve)
++{
++ devinet_sysctl_fini(ve);
++ unregister_ve_sysctltables(ve);
++ remove_proc_entry("sys", NULL);
++}
++
++static void free_ve_sysctl(struct ve_struct *ve)
++{
++ devinet_sysctl_free(ve);
++ free_ve_sysctltables(ve);
++}
++#else
++#define init_ve_sysctl(ve) (0)
++#define fini_ve_sysctl(ve) do { } while (0)
++#define free_ve_sysctl(ve) do { } while (0)
++#endif
++
++#ifdef CONFIG_UNIX98_PTYS
++#include <linux/devpts_fs.h>
++
++static int init_ve_devpts(struct ve_struct *ve)
++{
++ int err;
++
++ err = -ENOMEM;
++ ve->devpts_config = kmalloc(sizeof(struct devpts_config), GFP_KERNEL);
++ if (ve->devpts_config == NULL)
++ goto out;
++ memset(ve->devpts_config, 0, sizeof(struct devpts_config));
++ ve->devpts_config->mode = 0600;
++ err = register_ve_fs_type(ve, &devpts_fs_type,
++ &ve->devpts_fstype, &ve->devpts_mnt);
++ if (err) {
++ kfree(ve->devpts_config);
++ ve->devpts_config = NULL;
++ }
++out:
++ return err;
++}
++
++static void fini_ve_devpts(struct ve_struct *ve)
++{
++ unregister_ve_fs_type(ve->devpts_fstype, ve->devpts_mnt);
++ /* devpts_fstype is freed in real_put_ve -> free_ve_filesystems */
++ ve->devpts_mnt = NULL;
++ kfree(ve->devpts_config);
++ ve->devpts_config = NULL;
++}
++#else
++#define init_ve_devpts(ve) (0)
++#define fini_ve_devpts(ve) do { } while (0)
++#endif
++
++static int init_ve_shmem(struct ve_struct *ve)
++{
++ return register_ve_fs_type(ve,
++ &tmpfs_fs_type,
++ &ve->shmem_fstype,
++ &ve->shmem_mnt);
++}
++
++static void fini_ve_shmem(struct ve_struct *ve)
++{
++ unregister_ve_fs_type(ve->shmem_fstype, ve->shmem_mnt);
++ /* shmem_fstype is freed in real_put_ve -> free_ve_filesystems */
++ ve->shmem_mnt = NULL;
++}
++
++static int init_ve_sysfs(struct ve_struct *ve)
++{
++ struct subsystem *subsys;
++ struct class *nc;
++ int err;
++ extern struct subsystem class_obj_subsys;
++ extern struct subsystem class_subsys;
++ extern struct class net_class;
++
++#ifdef CONFIG_VE_SYSFS
++ err = register_ve_fs_type(ve,
++ &sysfs_fs_type,
++ &ve->sysfs_fstype,
++ &ve->sysfs_mnt);
++ if (err != 0)
++ goto out_fs_type;
++#endif
++ err = -ENOMEM;
++ subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
++ if (subsys == NULL)
++ goto out_class_obj;
++ /* ick, this is ugly, the things we go through to keep from showing up
++ * in sysfs... */
++ memset(subsys, 0, sizeof(*subsys));
++ memcpy(&subsys->kset.kobj.name, &class_obj_subsys.kset.kobj.name,
++ sizeof(subsys->kset.kobj.name));
++ subsys->kset.ktype = class_obj_subsys.kset.ktype;
++ subsys->kset.hotplug_ops = class_obj_subsys.kset.hotplug_ops;
++ subsystem_init(subsys);
++ if (!subsys->kset.subsys)
++ subsys->kset.subsys = subsys;
++ ve->class_obj_subsys = subsys;
++
++ err = -ENOMEM;
++ subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
++ if (subsys == NULL)
++ goto out_class_subsys;
++ /* ick, this is ugly, the things we go through to keep from showing up
++ * in sysfs... */
++ memset(subsys, 0, sizeof(*subsys));
++ memcpy(&subsys->kset.kobj.name, &class_subsys.kset.kobj.name,
++ sizeof(subsys->kset.kobj.name));
++ subsys->kset.ktype = class_subsys.kset.ktype;
++ subsys->kset.hotplug_ops = class_subsys.kset.hotplug_ops;
++ ve->class_subsys = subsys;
++ err = subsystem_register(subsys);
++ if (err != 0)
++ goto out_register;
++
++ err = -ENOMEM;
++ nc = kmalloc(sizeof(*nc), GFP_KERNEL);
++ if (nc == NULL)
++ goto out_nc;
++ memset(nc, 0, sizeof(*nc));
++ nc->name = net_class.name;
++ nc->release = net_class.release;
++ nc->hotplug = net_class.hotplug;
++ err = class_register(nc);
++ if (err != 0)
++ goto out_class_register;
++ ve->net_class = nc;
++
++ return err;
++
++out_class_register:
++ kfree(nc);
++out_nc:
++ subsystem_unregister(subsys);
++out_register:
++ kfree(ve->class_subsys);
++out_class_subsys:
++ kfree(ve->class_obj_subsys);
++out_class_obj:
++#ifdef CONFIG_VE_SYSFS
++ unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
++ /* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
++out_fs_type:
++#endif
++ ve->class_subsys = NULL;
++ ve->class_obj_subsys = NULL;
++ return err;
++}
++
++static void fini_ve_sysfs(struct ve_struct *ve)
++{
++ class_unregister(ve->net_class);
++ subsystem_unregister(ve->class_subsys);
++
++ kfree(ve->net_class);
++ kfree(ve->class_subsys);
++ kfree(ve->class_obj_subsys);
++
++ ve->net_class = NULL;
++ ve->class_subsys = NULL;
++ ve->class_obj_subsys = NULL;
++#ifdef CONFIG_VE_SYSFS
++ unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
++ ve->sysfs_mnt = NULL;
++ /* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
++#endif
++}
++
++static void free_ve_filesystems(struct ve_struct *ve)
++{
++#ifdef CONFIG_VE_SYSFS
++ kfree(ve->sysfs_fstype);
++ ve->sysfs_fstype = NULL;
++#endif
++ kfree(ve->shmem_fstype);
++ ve->shmem_fstype = NULL;
++
++ kfree(ve->devpts_fstype);
++ ve->devpts_fstype = NULL;
++
++ free_ve_proc(ve);
++}
++
++static int init_printk(struct ve_struct *ve)
++{
++ struct ve_prep_printk {
++ wait_queue_head_t log_wait;
++ unsigned long log_start;
++ unsigned long log_end;
++ unsigned long logged_chars;
++ } *tmp;
++
++ tmp = kmalloc(sizeof(struct ve_prep_printk), GFP_KERNEL);
++ if (!tmp)
++ return -ENOMEM;
++ memset(tmp, 0, sizeof(struct ve_prep_printk));
++ init_waitqueue_head(&tmp->log_wait);
++ ve->_log_wait = &tmp->log_wait;
++ ve->_log_start = &tmp->log_start;
++ ve->_log_end = &tmp->log_end;
++ ve->_logged_chars = &tmp->logged_chars;
++ /* ve->log_buf will be initialized later by ve_log_init() */
++ return 0;
++}
++
++static void fini_printk(struct ve_struct *ve)
++{
++ /*
++ * there is no spinlock protection here because nobody can use
++ * log_buf at the moments when this code is called.
++ */
++ kfree(ve->log_buf);
++ kfree(ve->_log_wait);
++}
++
++static void fini_venet(struct ve_struct *ve)
++{
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ tcp_v4_kill_ve_sockets(ve);
++#endif
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ ve_mapped_devs_cleanup(ve);
++#endif
++}
++
++static int init_ve_sched(struct ve_struct *ve)
++{
++#ifdef CONFIG_FAIRSCHED
++ int err;
++
++ /*
++ * We refuse to switch to an already existing node since nodes
++ * keep a pointer to their ve_struct...
++ */
++ err = sys_fairsched_mknod(0, 1, ve->veid);
++ if (err < 0) {
++ printk(KERN_WARNING "Can't create fairsched node %d\n",
++ ve->veid);
++ return err;
++ }
++ err = sys_fairsched_mvpr(current->pid, ve->veid);
++ if (err) {
++ printk(KERN_WARNING "Can't switch to fairsched node %d\n",
++ ve->veid);
++ if (sys_fairsched_rmnod(ve->veid))
++ printk(KERN_ERR "Can't clean fairsched node %d\n",
++ ve->veid);
++ return err;
++ }
++#endif
++ ve_sched_attach(ve);
++ return 0;
++}
++
++static void fini_ve_sched(struct ve_struct *ve)
++{
++#ifdef CONFIG_FAIRSCHED
++ if (task_vsched_id(current) == ve->veid)
++ if (sys_fairsched_mvpr(current->pid, fairsched_init_node.id))
++ printk(KERN_WARNING "Can't leave fairsched node %d\n",
++ ve->veid);
++ if (sys_fairsched_rmnod(ve->veid))
++ printk(KERN_ERR "Can't remove fairsched node %d\n",
++ ve->veid);
++#endif
++}
++
++static int init_ve_struct(struct ve_struct *ve, envid_t veid,
++ u32 class_id, struct task_struct *init_tsk)
++{
++ int n;
++
++ memset(ve, 0, sizeof(struct ve_struct));
++ (void)get_ve(ve);
++ ve->veid = veid;
++ ve->class_id = class_id;
++ ve->init_entry = init_tsk;
++ INIT_LIST_HEAD(&ve->vetask_lh);
++ init_rwsem(&ve->op_sem);
++ ve->ifindex = -1;
++
++ for(n = 0; n < UIDHASH_SZ_VE; ++n)
++ INIT_LIST_HEAD(&ve->uidhash_table[n]);
++
++ do_posix_clock_monotonic_gettime(&ve->start_timespec);
++ ve->start_jiffies = jiffies;
++ ve->start_cycles = get_cycles();
++ ve->virt_pids = glob_virt_pids;
++
++ return 0;
++}
++
++static void set_ve_root(struct ve_struct *ve, struct task_struct *tsk)
++{
++ read_lock(&tsk->fs->lock);
++ ve->fs_rootmnt = tsk->fs->rootmnt;
++ ve->fs_root = tsk->fs->root;
++ read_unlock(&tsk->fs->lock);
++ mark_tree_virtual(ve->fs_rootmnt, ve->fs_root);
++}
++
++static void set_ve_caps(struct ve_struct *ve, struct task_struct *tsk)
++{
++ /* required for real_setdevperms from register_ve_<fs> above */
++ memcpy(&ve->cap_default, &tsk->cap_effective, sizeof(kernel_cap_t));
++ cap_lower(ve->cap_default, CAP_SETVEID);
++}
++
++static int ve_list_add(struct ve_struct *ve)
++{
++ write_lock_irq(&ve_list_guard);
++ if (__find_ve_by_id(ve->veid) != NULL)
++ goto err_exists;
++
++ ve->prev = NULL;
++ ve->next = ve_list_head;
++ if (ve_list_head)
++ ve_list_head->prev = ve;
++ ve_list_head = ve;
++ nr_ve++;
++ write_unlock_irq(&ve_list_guard);
++ return 0;
++
++err_exists:
++ write_unlock_irq(&ve_list_guard);
++ return -EEXIST;
++}
++
++static void ve_list_del(struct ve_struct *ve)
++{
++ write_lock_irq(&ve_list_guard);
++ if (ve->prev)
++ ve->prev->next = ve->next;
++ else
++ ve_list_head = ve->next;
++ if (ve->next)
++ ve->next->prev = ve->prev;
++ nr_ve--;
++ write_unlock_irq(&ve_list_guard);
++}
++
++static void set_task_ve_caps(struct task_struct *tsk, struct ve_struct *ve)
++{
++ spin_lock(&task_capability_lock);
++ cap_mask(tsk->cap_effective, ve->cap_default);
++ cap_mask(tsk->cap_inheritable, ve->cap_default);
++ cap_mask(tsk->cap_permitted, ve->cap_default);
++ spin_unlock(&task_capability_lock);
++}
++
++static void move_task(struct task_struct *tsk, struct ve_struct *new,
++ struct ve_struct *old)
++{
++ /* this probihibts ptracing of task entered to VPS from host system */
++ tsk->mm->vps_dumpable = 0;
++ /* setup capabilities before enter */
++ set_task_ve_caps(tsk, new);
++
++ write_lock_irq(&tasklist_lock);
++ VE_TASK_INFO(tsk)->owner_env = new;
++ VE_TASK_INFO(tsk)->exec_env = new;
++ REMOVE_VE_LINKS(tsk);
++ SET_VE_LINKS(tsk);
++
++ atomic_dec(&old->pcounter);
++ atomic_inc(&new->pcounter);
++ real_put_ve(old);
++ get_ve(new);
++ write_unlock_irq(&tasklist_lock);
++}
++
++#if (defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)) && \
++ defined(CONFIG_NETFILTER) && defined(CONFIG_VE_IPTABLES)
++#define init_ve_netfilter() init_netfilter()
++#define fini_ve_netfilter() fini_netfilter()
++#else
++#define init_ve_netfilter() (0)
++#define fini_ve_netfilter() do { } while (0)
++#endif
++
++#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args) \
++({ \
++ int ret = 0; \
++ if (VE_IPT_CMP(mask, full_mask) && \
++ VE_IPT_CMP((ve)->_iptables_modules, \
++ full_mask & ~(full_mask##_MOD))) { \
++ ret = KSYMERRCALL(1, mod, name, args); \
++ if (ret == 0) \
++ (ve)->_iptables_modules |= \
++ full_mask##_MOD; \
++ if (ret == 1) \
++ ret = 0; \
++ } \
++ ret; \
++})
++
++#define KSYMIPTFINI(mask, full_mask, mod, name, args) \
++({ \
++ if (VE_IPT_CMP(mask, full_mask##_MOD)) \
++ KSYMSAFECALL_VOID(mod, name, args); \
++})
++
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
++ int init_or_cleanup)
++{
++ int err;
++
++ err = 0;
++ if (!init_or_cleanup)
++ goto cleanup;
++
++ /* init part */
++#if defined(CONFIG_IP_NF_IPTABLES) || \
++ defined(CONFIG_IP_NF_IPTABLES_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
++ ip_tables, init_iptables, ());
++ if (err < 0)
++ goto err_iptables;
++#endif
++#if defined(CONFIG_IP_NF_CONNTRACK) || \
++ defined(CONFIG_IP_NF_CONNTRACK_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
++ ip_conntrack, init_iptable_conntrack, ());
++ if (err < 0)
++ goto err_iptable_conntrack;
++#endif
++#if defined(CONFIG_IP_NF_FTP) || \
++ defined(CONFIG_IP_NF_FTP_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_FTP,
++ ip_conntrack_ftp, init_iptable_ftp, ());
++ if (err < 0)
++ goto err_iptable_ftp;
++#endif
++#if defined(CONFIG_IP_NF_IRC) || \
++ defined(CONFIG_IP_NF_IRC_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_IRC,
++ ip_conntrack_irc, init_iptable_irc, ());
++ if (err < 0)
++ goto err_iptable_irc;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_CONNTRACK) || \
++ defined(CONFIG_IP_NF_MATCH_CONNTRACK_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_CONNTRACK,
++ ipt_conntrack, init_iptable_conntrack_match, ());
++ if (err < 0)
++ goto err_iptable_conntrack_match;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_STATE) || \
++ defined(CONFIG_IP_NF_MATCH_STATE_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_STATE,
++ ipt_state, init_iptable_state, ());
++ if (err < 0)
++ goto err_iptable_state;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_HELPER) || \
++ defined(CONFIG_IP_NF_MATCH_HELPER_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_HELPER,
++ ipt_helper, init_iptable_helper, ());
++ if (err < 0)
++ goto err_iptable_helper;
++#endif
++#if defined(CONFIG_IP_NF_NAT) || \
++ defined(CONFIG_IP_NF_NAT_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
++ ip_nat, ip_nat_init, ());
++ if (err < 0)
++ goto err_iptable_nat;
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
++ iptable_nat, init_iptable_nat, ());
++ if (err < 0)
++ goto err_iptable_nat2;
++#endif
++#if defined(CONFIG_IP_NF_NAT_FTP) || \
++ defined(CONFIG_IP_NF_NAT_FTP_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_FTP,
++ ip_nat_ftp, init_iptable_nat_ftp, ());
++ if (err < 0)
++ goto err_iptable_nat_ftp;
++#endif
++#if defined(CONFIG_IP_NF_NAT_IRC) || \
++ defined(CONFIG_IP_NF_NAT_IRC_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_IRC,
++ ip_nat_irc, init_iptable_nat_irc, ());
++ if (err < 0)
++ goto err_iptable_nat_irc;
++#endif
++#if defined(CONFIG_IP_NF_FILTER) || \
++ defined(CONFIG_IP_NF_FILTER_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
++ iptable_filter, init_iptable_filter, ());
++ if (err < 0)
++ goto err_iptable_filter;
++#endif
++#if defined(CONFIG_IP_NF_MANGLE) || \
++ defined(CONFIG_IP_NF_MANGLE_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
++ iptable_mangle, init_iptable_mangle, ());
++ if (err < 0)
++ goto err_iptable_mangle;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LIMIT) || \
++ defined(CONFIG_IP_NF_MATCH_LIMIT_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LIMIT,
++ ipt_limit, init_iptable_limit, ());
++ if (err < 0)
++ goto err_iptable_limit;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
++ defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_MULTIPORT,
++ ipt_multiport, init_iptable_multiport, ());
++ if (err < 0)
++ goto err_iptable_multiport;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TOS) || \
++ defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TOS,
++ ipt_tos, init_iptable_tos, ());
++ if (err < 0)
++ goto err_iptable_tos;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TOS) || \
++ defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TOS,
++ ipt_TOS, init_iptable_TOS, ());
++ if (err < 0)
++ goto err_iptable_TOS;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
++ defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REJECT,
++ ipt_REJECT, init_iptable_REJECT, ());
++ if (err < 0)
++ goto err_iptable_REJECT;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
++ defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TCPMSS,
++ ipt_TCPMSS, init_iptable_TCPMSS, ());
++ if (err < 0)
++ goto err_iptable_TCPMSS;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TCPMSS) || \
++ defined(CONFIG_IP_NF_MATCH_TCPMSS_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TCPMSS,
++ ipt_tcpmss, init_iptable_tcpmss, ());
++ if (err < 0)
++ goto err_iptable_tcpmss;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TTL) || \
++ defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TTL,
++ ipt_ttl, init_iptable_ttl, ());
++ if (err < 0)
++ goto err_iptable_ttl;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_LOG) || \
++ defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_LOG,
++ ipt_LOG, init_iptable_LOG, ());
++ if (err < 0)
++ goto err_iptable_LOG;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LENGTH) || \
++ defined(CONFIG_IP_NF_MATCH_LENGTH_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LENGTH,
++ ipt_length, init_iptable_length, ());
++ if (err < 0)
++ goto err_iptable_length;
++#endif
++ return 0;
++
++/* ------------------------------------------------------------------------- */
++
++cleanup:
++#if defined(CONFIG_IP_NF_MATCH_LENGTH) || \
++ defined(CONFIG_IP_NF_MATCH_LENGTH_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LENGTH,
++ ipt_length, fini_iptable_length, ());
++err_iptable_length:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_LOG) || \
++ defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_LOG,
++ ipt_LOG, fini_iptable_LOG, ());
++err_iptable_LOG:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TTL) || \
++ defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TTL,
++ ipt_ttl, fini_iptable_ttl, ());
++err_iptable_ttl:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TCPMSS) || \
++ defined(CONFIG_IP_NF_MATCH_TCPMSS_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TCPMSS,
++ ipt_tcpmss, fini_iptable_tcpmss, ());
++err_iptable_tcpmss:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
++ defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TCPMSS,
++ ipt_TCPMSS, fini_iptable_TCPMSS, ());
++err_iptable_TCPMSS:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
++ defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REJECT,
++ ipt_REJECT, fini_iptable_REJECT, ());
++err_iptable_REJECT:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TOS) || \
++ defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TOS,
++ ipt_TOS, fini_iptable_TOS, ());
++err_iptable_TOS:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TOS) || \
++ defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TOS,
++ ipt_tos, fini_iptable_tos, ());
++err_iptable_tos:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
++ defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_MULTIPORT,
++ ipt_multiport, fini_iptable_multiport, ());
++err_iptable_multiport:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LIMIT) || \
++ defined(CONFIG_IP_NF_MATCH_LIMIT_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LIMIT,
++ ipt_limit, fini_iptable_limit, ());
++err_iptable_limit:
++#endif
++#if defined(CONFIG_IP_NF_MANGLE) || \
++ defined(CONFIG_IP_NF_MANGLE_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
++ iptable_mangle, fini_iptable_mangle, ());
++err_iptable_mangle:
++#endif
++#if defined(CONFIG_IP_NF_FILTER) || \
++ defined(CONFIG_IP_NF_FILTER_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
++ iptable_filter, fini_iptable_filter, ());
++err_iptable_filter:
++#endif
++#if defined(CONFIG_IP_NF_NAT_IRC) || \
++ defined(CONFIG_IP_NF_NAT_IRC_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_IRC,
++ ip_nat_irc, fini_iptable_nat_irc, ());
++err_iptable_nat_irc:
++#endif
++#if defined(CONFIG_IP_NF_NAT_FTP) || \
++ defined(CONFIG_IP_NF_NAT_FTP_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_FTP,
++ ip_nat_ftp, fini_iptable_nat_ftp, ());
++err_iptable_nat_ftp:
++#endif
++#if defined(CONFIG_IP_NF_NAT) || \
++ defined(CONFIG_IP_NF_NAT_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
++ iptable_nat, fini_iptable_nat, ());
++err_iptable_nat2:
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
++ ip_nat, ip_nat_cleanup, ());
++err_iptable_nat:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_HELPER) || \
++ defined(CONFIG_IP_NF_MATCH_HELPER_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_HELPER,
++ ipt_helper, fini_iptable_helper, ());
++err_iptable_helper:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_STATE) || \
++ defined(CONFIG_IP_NF_MATCH_STATE_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_STATE,
++ ipt_state, fini_iptable_state, ());
++err_iptable_state:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_CONNTRACK) || \
++ defined(CONFIG_IP_NF_MATCH_CONNTRACK_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_CONNTRACK,
++ ipt_conntrack, fini_iptable_conntrack_match, ());
++err_iptable_conntrack_match:
++#endif
++#if defined(CONFIG_IP_NF_IRC) || \
++ defined(CONFIG_IP_NF_IRC_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_IRC,
++ ip_conntrack_irc, fini_iptable_irc, ());
++err_iptable_irc:
++#endif
++#if defined(CONFIG_IP_NF_FTP) || \
++ defined(CONFIG_IP_NF_FTP_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_FTP,
++ ip_conntrack_ftp, fini_iptable_ftp, ());
++err_iptable_ftp:
++#endif
++#if defined(CONFIG_IP_NF_CONNTRACK) || \
++ defined(CONFIG_IP_NF_CONNTRACK_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
++ ip_conntrack, fini_iptable_conntrack, ());
++err_iptable_conntrack:
++#endif
++#if defined(CONFIG_IP_NF_IPTABLES) || \
++ defined(CONFIG_IP_NF_IPTABLES_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
++ ip_tables, fini_iptables, ());
++err_iptables:
++#endif
++ ve->_iptables_modules = 0;
++
++ return err;
++}
++#else
++#define do_ve_iptables(ve, initmask, init) (0)
++#endif
++
++static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
++{
++ return do_ve_iptables(ve, init_mask, 1);
++}
++
++static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
++{
++ (void)do_ve_iptables(ve, init_mask, 0);
++}
++
++static void flush_ve_iptables(struct ve_struct *ve)
++{
++ /*
++ * flush all rule tables first,
++ * this helps us to avoid refs to freed objs
++ */
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE, ip_tables,
++ ipt_flush_table, (ve->_ipt_mangle_table));
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER, ip_tables,
++ ipt_flush_table, (ve->_ve_ipt_filter_pf));
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT, ip_tables,
++ ipt_flush_table, (ve->_ip_conntrack->_ip_nat_table));
++}
++
++static struct list_head ve_hooks[VE_MAX_HOOKS];
++static DECLARE_RWSEM(ve_hook_sem);
++
++int ve_hook_register(struct ve_hook *vh)
++{
++ struct list_head *lh;
++ struct ve_hook *tmp;
++
++ down_write(&ve_hook_sem);
++ list_for_each(lh, &ve_hooks[vh->hooknum]) {
++ tmp = list_entry(lh, struct ve_hook, list);
++ if (vh->priority < tmp->priority)
++ break;
++ }
++ list_add_tail(&vh->list, lh);
++ up_write(&ve_hook_sem);
++ return 0;
++}
++EXPORT_SYMBOL(ve_hook_register);
++
++void ve_hook_unregister(struct ve_hook *vh)
++{
++ down_write(&ve_hook_sem);
++ list_del(&vh->list);
++ up_write(&ve_hook_sem);
++}
++EXPORT_SYMBOL(ve_hook_unregister);
++
++static int ve_hook_iterate(unsigned int hooknum, void *data)
++{
++ struct ve_hook *vh;
++ int err;
++
++ err = 0;
++ down_read(&ve_hook_sem);
++ list_for_each_entry(vh, &ve_hooks[hooknum], list) {
++ if (!try_module_get(vh->owner))
++ continue;
++ err = vh->hook(hooknum, data);
++ module_put(vh->owner);
++ if (err)
++ break;
++ }
++
++ if (err) {
++ list_for_each_entry_continue_reverse(vh,
++ &ve_hooks[hooknum], list) {
++ if (!try_module_get(vh->owner))
++ continue;
++ if (vh->undo)
++ vh->undo(hooknum, data);
++ module_put(vh->owner);
++ }
++ }
++ up_read(&ve_hook_sem);
++ return err;
++}
++
++static void ve_hook_iterate_cleanup(unsigned int hooknum, void *data)
++{
++ struct ve_hook *vh;
++
++ down_read(&ve_hook_sem);
++ list_for_each_entry_reverse(vh, &ve_hooks[hooknum], list) {
++ if (!try_module_get(vh->owner))
++ continue;
++ (void)vh->hook(hooknum, data);
++ module_put(vh->owner);
++ }
++ up_read(&ve_hook_sem);
++}
++
++static int do_env_create(envid_t veid, u32 class_id,
++ struct env_create_param *data, int datalen)
++{
++ struct task_struct *tsk;
++ struct ve_struct *old;
++ struct ve_struct *old_exec;
++ struct ve_struct *ve;
++ struct ve_hook_init_data vhd;
++ __u64 init_mask;
++ int err;
++
++ tsk = current;
++ old = VE_TASK_INFO(tsk)->owner_env;
++
++ if (!thread_group_leader(tsk))
++ return -EINVAL;
++
++ if (tsk->signal->tty) {
++ printk("ERR: VE init has controlling terminal\n");
++ return -EINVAL;
++ }
++ if (tsk->signal->pgrp != tsk->pid || tsk->signal->session != tsk->pid) {
++ int may_setsid;
++ read_lock(&tasklist_lock);
++ may_setsid = (find_pid(PIDTYPE_PGID, tsk->pid) == NULL);
++ read_unlock(&tasklist_lock);
++ if (!may_setsid) {
++ printk("ERR: VE init is process group leader\n");
++ return -EINVAL;
++ }
++ }
++
++
++ VZTRACE("%s: veid=%d classid=%d pid=%d\n",
++ __FUNCTION__, veid, class_id, current->pid);
++
++ err = -ENOMEM;
++ ve = kmalloc(sizeof(struct ve_struct), GFP_KERNEL);
++ if (ve == NULL)
++ goto err_struct;
++
++ init_ve_struct(ve, veid, class_id, tsk);
++ __module_get(THIS_MODULE);
++ down_write(&ve->op_sem);
++ if ((err = ve_list_add(ve)) < 0)
++ goto err_exist;
++
++ /* this should be done before context switching */
++ if ((err = init_printk(ve)) < 0)
++ goto err_log_wait;
++
++ old_exec = set_exec_env(ve);
++
++ if ((err = init_ve_sched(ve)) < 0)
++ goto err_sched;
++
++ /* move user to VE */
++ if ((err = set_user(0, 0)) < 0)
++ goto err_set_user;
++
++ set_ve_root(ve, tsk);
++
++ if ((err = init_ve_utsname(ve)))
++ goto err_utsname;
++
++ if ((err = init_ve_mibs(ve)))
++ goto err_mibs;
++
++ if ((err = init_ve_proc(ve)))
++ goto err_proc;
++
++ if ((err = init_ve_sysctl(ve)))
++ goto err_sysctl;
++
++ if ((err = init_ve_sysfs(ve)))
++ goto err_sysfs;
++
++ if ((err = init_ve_route(ve)) < 0)
++ goto err_route;
++
++ if ((err = init_ve_netdev()))
++ goto err_dev;
++
++ if ((err = init_ve_tty_drivers(ve)) < 0)
++ goto err_tty;
++
++ if ((err = init_ve_shmem(ve)))
++ goto err_shmem;
++
++ if ((err = init_ve_devpts(ve)))
++ goto err_devpts;
++
++ /* init SYSV IPC variables */
++ if ((err = init_ve_ipc(ve)) < 0)
++ goto err_ipc;
++
++ set_ve_caps(ve, tsk);
++
++ if (alloc_vpid(tsk->pid, 1) < 0) {
++ err = -EBUSY;
++ goto err_vpid;
++ }
++ set_virt_pid(tsk, 1);
++ set_virt_tgid(tsk, 1);
++
++ set_special_pids(tsk->pid, tsk->pid);
++ current->signal->tty_old_pgrp = 0;
++ set_virt_pgid(tsk, 1);
++ set_virt_sid(tsk, 1);
++
++ /* It is safe to initialize netfilter here as routing initialization and
++ interface setup will be done below. This means that NO skb can be
++ passed inside. Den */
++ /* iptables ve initialization for non ve0;
++ ve0 init is in module_init */
++ if ((err = init_ve_netfilter()) < 0)
++ goto err_netfilter;
++
++ init_mask = (data)? *((__u64 *)data): VE_IP_DEFAULT;
++ if ((err = init_ve_iptables(ve, init_mask)) < 0)
++ goto err_iptables;
++
++ vhd.env = ve;
++ vhd.class_id = class_id;
++ vhd.data = data;
++ vhd.datalen = datalen;
++ if ((err = ve_hook_iterate(VE_HOOK_INIT, (void *)&vhd)) < 0)
++ goto err_ve_hook;
++
++ move_task(tsk, ve, old);
++
++ ve->is_running = 1;
++ up_write(&ve->op_sem);
++
++ printk(KERN_INFO "VPS: %d: started\n", veid);
++ return veid;
++
++err_ve_hook:
++ fini_venet(ve);
++ fini_ve_iptables(ve, init_mask);
++err_iptables:
++ fini_ve_netfilter();
++err_netfilter:
++ ;
++err_vpid:
++ fini_ve_ipc(ve);
++err_ipc:
++ fini_ve_devpts(ve);
++err_devpts:
++ fini_ve_shmem(ve);
++err_shmem:
++ fini_ve_tty_drivers(ve);
++err_tty:
++ fini_ve_netdev();
++err_dev:
++ fini_ve_route(ve);
++err_route:
++ fini_ve_sysfs(ve);
++err_sysfs:
++ fini_ve_sysctl(ve);
++err_sysctl:
++ fini_ve_proc(ve);
++err_proc:
++ do_clean_devperms(ve->veid); /* register procfs adds devperms */
++ fini_ve_mibs(ve);
++err_mibs:
++ /* free_ve_utsname() is called inside real_put_ve() */ ;
++err_utsname:
++ /* It is safe to restore current->envid here because
++ * ve_fairsched_detach does not use current->envid. */
++ /* Really fairsched code uses current->envid in sys_fairsched_mknod
++ * only. It is correct if sys_fairsched_mknod is called from
++ * userspace. If sys_fairsched_mknod is called from
++ * ve_fairsched_attach, then node->envid and node->parent_node->envid
++ * are explicitly set to valid value after the call. */
++ /* FIXME */
++ VE_TASK_INFO(tsk)->owner_env = old;
++ VE_TASK_INFO(tsk)->exec_env = old_exec;
++ /* move user back */
++ if (set_user(0, 0) < 0)
++ printk(KERN_WARNING"Can't restore UID\n");
++
++err_set_user:
++ fini_ve_sched(ve);
++err_sched:
++ (void)set_exec_env(old_exec);
++
++ /* we can jump here having incorrect envid */
++ VE_TASK_INFO(tsk)->owner_env = old;
++ fini_printk(ve);
++err_log_wait:
++ ve_list_del(ve);
++ up_write(&ve->op_sem);
++
++ real_put_ve(ve);
++err_struct:
++ printk(KERN_INFO "VPS: %d: failed to start with err=%d\n", veid, err);
++ return err;
++
++err_exist:
++ kfree(ve);
++ goto err_struct;
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE start/stop callbacks
++ *
++ **********************************************************************
++ **********************************************************************/
++
++int real_env_create(envid_t veid, unsigned flags, u32 class_id,
++ struct env_create_param *data, int datalen)
++{
++ int status;
++ struct ve_struct *ve;
++
++ if (!flags) {
++ status = get_exec_env()->veid;
++ goto out;
++ }
++
++ status = -EPERM;
++ if (!capable(CAP_SETVEID))
++ goto out;
++
++ status = -EINVAL;
++ if ((flags & VE_TEST) && (flags & (VE_ENTER|VE_CREATE)))
++ goto out;
++
++ status = -EINVAL;
++ ve = get_ve_by_id(veid);
++ if (ve) {
++ if (flags & VE_TEST) {
++ status = 0;
++ goto out_put;
++ }
++ if (flags & VE_EXCLUSIVE) {
++ status = -EACCES;
++ goto out_put;
++ }
++ if (flags & VE_CREATE) {
++ flags &= ~VE_CREATE;
++ flags |= VE_ENTER;
++ }
++ } else {
++ if (flags & (VE_TEST|VE_ENTER)) {
++ status = -ESRCH;
++ goto out;
++ }
++ }
++
++ if (flags & VE_CREATE) {
++ status = do_env_create(veid, class_id, data, datalen);
++ goto out;
++ } else if (flags & VE_ENTER)
++ status = do_env_enter(ve);
++
++ /* else: returning EINVAL */
++
++out_put:
++ real_put_ve(ve);
++out:
++ return status;
++}
++
++static int do_env_enter(struct ve_struct *ve)
++{
++ struct task_struct *tsk = current;
++ int err;
++
++ VZTRACE("%s: veid=%d\n", __FUNCTION__, ve->veid);
++
++ err = -EBUSY;
++ down_read(&ve->op_sem);
++ if (!ve->is_running)
++ goto out_up;
++
++#ifdef CONFIG_FAIRSCHED
++ err = sys_fairsched_mvpr(current->pid, ve->veid);
++ if (err)
++ goto out_up;
++#endif
++
++ ve_sched_attach(ve);
++ move_task(current, ve, VE_TASK_INFO(tsk)->owner_env);
++ err = VE_TASK_INFO(tsk)->owner_env->veid;
++
++out_up:
++ up_read(&ve->op_sem);
++ return err;
++}
++
++static void env_cleanup(struct ve_struct *ve)
++{
++ struct ve_struct *old_ve;
++
++ VZTRACE("real_do_env_cleanup\n");
++
++ down_read(&ve->op_sem);
++ old_ve = set_exec_env(ve);
++
++ ve_hook_iterate_cleanup(VE_HOOK_FINI, (void *)ve);
++
++ fini_venet(ve);
++
++ /* no new packets in flight beyond this point */
++ synchronize_net();
++ /* skb hold dst_entry, and in turn lies in the ip fragment queue */
++ ip_fragment_cleanup(ve);
++
++ fini_ve_netdev();
++ fini_ve_route(ve);
++
++ /* kill iptables */
++ /* No skb belonging to VE can exist at this point as unregister_netdev
++ is an operation awaiting until ALL skb's gone */
++ flush_ve_iptables(ve);
++ fini_ve_iptables(ve, ve->_iptables_modules);
++ fini_ve_netfilter();
++
++ ve_ipc_cleanup();
++
++ fini_ve_sched(ve);
++ do_clean_devperms(ve->veid);
++
++ fini_ve_devpts(ve);
++ fini_ve_shmem(ve);
++ fini_ve_sysfs(ve);
++ unregister_ve_tty_drivers(ve);
++ fini_ve_sysctl(ve);
++ fini_ve_proc(ve);
++
++ fini_ve_mibs(ve);
++
++ (void)set_exec_env(old_ve);
++ fini_printk(ve); /* no printk can happen in ve context anymore */
++
++ ve_list_del(ve);
++ up_read(&ve->op_sem);
++
++ real_put_ve(ve);
++}
++
++static struct list_head ve_cleanup_list;
++static spinlock_t ve_cleanup_lock;
++
++static DECLARE_COMPLETION(vzmond_complete);
++static struct task_struct *vzmond_thread;
++static volatile int stop_vzmond;
++
++void real_do_env_cleanup(struct ve_struct *ve)
++{
++ spin_lock(&ve_cleanup_lock);
++ list_add_tail(&ve->cleanup_list, &ve_cleanup_list);
++ spin_unlock(&ve_cleanup_lock);
++ wake_up_process(vzmond_thread);
++}
++
++static void do_pending_env_cleanups(void)
++{
++ struct ve_struct *ve;
++
++ spin_lock(&ve_cleanup_lock);
++ while (1) {
++ if (list_empty(&ve_cleanup_list) || need_resched())
++ break;
++ ve = list_entry(ve_cleanup_list.next, struct ve_struct,
++ cleanup_list);
++ list_del(&ve->cleanup_list);
++ spin_unlock(&ve_cleanup_lock);
++ env_cleanup(ve);
++ spin_lock(&ve_cleanup_lock);
++ }
++ spin_unlock(&ve_cleanup_lock);
++}
++
++static int have_pending_cleanups(void)
++{
++ return !list_empty(&ve_cleanup_list);
++}
++
++static int vzmond(void *arg)
++{
++ daemonize("vzmond");
++ vzmond_thread = current;
++ set_current_state(TASK_INTERRUPTIBLE);
++
++ while (!stop_vzmond) {
++ schedule();
++ try_to_freeze();
++ if (signal_pending(current))
++ flush_signals(current);
++
++ do_pending_env_cleanups();
++ set_current_state(TASK_INTERRUPTIBLE);
++ if (have_pending_cleanups())
++ __set_current_state(TASK_RUNNING);
++ }
++
++ __set_task_state(current, TASK_RUNNING);
++ complete_and_exit(&vzmond_complete, 0);
++}
++
++static int __init init_vzmond(void)
++{
++ INIT_LIST_HEAD(&ve_cleanup_list);
++ spin_lock_init(&ve_cleanup_lock);
++ stop_vzmond = 0;
++ return kernel_thread(vzmond, NULL, 0);
++}
++
++static void fini_vzmond(void)
++{
++ stop_vzmond = 1;
++ wake_up_process(vzmond_thread);
++ wait_for_completion(&vzmond_complete);
++ WARN_ON(!list_empty(&ve_cleanup_list));
++}
++
++void real_do_env_free(struct ve_struct *ve)
++{
++ VZTRACE("real_do_env_free\n");
++
++ ve_ipc_free(ve); /* free SYSV IPC resources */
++ free_ve_tty_drivers(ve);
++ free_ve_utsname(ve);
++ free_ve_sysctl(ve); /* free per ve sysctl data */
++ free_ve_filesystems(ve);
++ printk(KERN_INFO "VPS: %d: stopped\n", VEID(ve));
++ kfree(ve);
++
++ module_put(THIS_MODULE);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE TTY handling
++ *
++ **********************************************************************
++ **********************************************************************/
++
++DCL_VE_OWNER(TTYDRV, struct tty_driver, owner_env)
++
++static struct tty_driver *alloc_ve_tty_driver(struct tty_driver *base,
++ struct ve_struct *ve)
++{
++ size_t size;
++ struct tty_driver *driver;
++
++ driver = kmalloc(sizeof(struct tty_driver), GFP_KERNEL);
++ if (!driver)
++ goto out;
++
++ memcpy(driver, base, sizeof(struct tty_driver));
++
++ driver->driver_state = NULL;
++
++ size = base->num * 3 * sizeof(void *);
++ if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
++ void **p;
++ p = kmalloc(size, GFP_KERNEL);
++ if (!p)
++ goto out_free;
++ memset(p, 0, size);
++ driver->ttys = (struct tty_struct **)p;
++ driver->termios = (struct termios **)(p + driver->num);
++ driver->termios_locked = (struct termios **)(p + driver->num * 2);
++ } else {
++ driver->ttys = NULL;
++ driver->termios = NULL;
++ driver->termios_locked = NULL;
++ }
++
++ SET_VE_OWNER_TTYDRV(driver, ve);
++ driver->flags |= TTY_DRIVER_INSTALLED;
++
++ return driver;
++
++out_free:
++ kfree(driver);
++out:
++ return NULL;
++}
++
++static void free_ve_tty_driver(struct tty_driver *driver)
++{
++ if (!driver)
++ return;
++
++ clear_termios(driver);
++ kfree(driver->ttys);
++ kfree(driver);
++}
++
++static int alloc_ve_tty_drivers(struct ve_struct* ve)
++{
++#ifdef CONFIG_LEGACY_PTYS
++ /* Traditional BSD devices */
++ ve->pty_driver = alloc_ve_tty_driver(pty_driver, ve);
++ if (!ve->pty_driver)
++ goto out_mem;
++
++ ve->pty_slave_driver = alloc_ve_tty_driver(pty_slave_driver, ve);
++ if (!ve->pty_slave_driver)
++ goto out_mem;
++
++ ve->pty_driver->other = ve->pty_slave_driver;
++ ve->pty_slave_driver->other = ve->pty_driver;
++#endif
++
++#ifdef CONFIG_UNIX98_PTYS
++ ve->ptm_driver = alloc_ve_tty_driver(ptm_driver, ve);
++ if (!ve->ptm_driver)
++ goto out_mem;
++
++ ve->pts_driver = alloc_ve_tty_driver(pts_driver, ve);
++ if (!ve->pts_driver)
++ goto out_mem;
++
++ ve->ptm_driver->other = ve->pts_driver;
++ ve->pts_driver->other = ve->ptm_driver;
++
++ ve->allocated_ptys = kmalloc(sizeof(*ve->allocated_ptys), GFP_KERNEL);
++ if (!ve->allocated_ptys)
++ goto out_mem;
++ idr_init(ve->allocated_ptys);
++#endif
++ return 0;
++
++out_mem:
++ free_ve_tty_drivers(ve);
++ return -ENOMEM;
++}
++
++static void free_ve_tty_drivers(struct ve_struct* ve)
++{
++#ifdef CONFIG_LEGACY_PTYS
++ free_ve_tty_driver(ve->pty_driver);
++ free_ve_tty_driver(ve->pty_slave_driver);
++ ve->pty_driver = ve->pty_slave_driver = NULL;
++#endif
++#ifdef CONFIG_UNIX98_PTYS
++ free_ve_tty_driver(ve->ptm_driver);
++ free_ve_tty_driver(ve->pts_driver);
++ kfree(ve->allocated_ptys);
++ ve->ptm_driver = ve->pts_driver = NULL;
++ ve->allocated_ptys = NULL;
++#endif
++}
++
++static inline void __register_tty_driver(struct tty_driver *driver)
++{
++ list_add(&driver->tty_drivers, &tty_drivers);
++}
++
++static inline void __unregister_tty_driver(struct tty_driver *driver)
++{
++ if (!driver)
++ return;
++ list_del(&driver->tty_drivers);
++}
++
++static int register_ve_tty_drivers(struct ve_struct* ve)
++{
++ write_lock_irq(&tty_driver_guard);
++#ifdef CONFIG_UNIX98_PTYS
++ __register_tty_driver(ve->ptm_driver);
++ __register_tty_driver(ve->pts_driver);
++#endif
++#ifdef CONFIG_LEGACY_PTYS
++ __register_tty_driver(ve->pty_driver);
++ __register_tty_driver(ve->pty_slave_driver);
++#endif
++ write_unlock_irq(&tty_driver_guard);
++
++ return 0;
++}
++
++static void unregister_ve_tty_drivers(struct ve_struct* ve)
++{
++ VZTRACE("unregister_ve_tty_drivers\n");
++
++ write_lock_irq(&tty_driver_guard);
++ __unregister_tty_driver(ve->pty_driver);
++ __unregister_tty_driver(ve->pty_slave_driver);
++#ifdef CONFIG_UNIX98_PTYS
++ __unregister_tty_driver(ve->ptm_driver);
++ __unregister_tty_driver(ve->pts_driver);
++#endif
++ write_unlock_irq(&tty_driver_guard);
++}
++
++static int init_ve_tty_drivers(struct ve_struct *ve)
++{
++ int err;
++
++ if ((err = alloc_ve_tty_drivers(ve)))
++ goto err_ttyalloc;
++ if ((err = register_ve_tty_drivers(ve)))
++ goto err_ttyreg;
++ return 0;
++
++err_ttyreg:
++ free_ve_tty_drivers(ve);
++err_ttyalloc:
++ return err;
++}
++
++static void fini_ve_tty_drivers(struct ve_struct *ve)
++{
++ unregister_ve_tty_drivers(ve);
++ free_ve_tty_drivers(ve);
++}
++
++/*
++ * Free the termios and termios_locked structures because
++ * we don't want to get memory leaks when modular tty
++ * drivers are removed from the kernel.
++ */
++static void clear_termios(struct tty_driver *driver)
++{
++ int i;
++ struct termios *tp;
++
++ if (driver->termios == NULL)
++ return;
++ for (i = 0; i < driver->num; i++) {
++ tp = driver->termios[i];
++ if (tp) {
++ driver->termios[i] = NULL;
++ kfree(tp);
++ }
++ tp = driver->termios_locked[i];
++ if (tp) {
++ driver->termios_locked[i] = NULL;
++ kfree(tp);
++ }
++ }
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * Pieces of VE network
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#include <asm/uaccess.h>
++#include <net/sock.h>
++#include <linux/netlink.h>
++#include <linux/rtnetlink.h>
++#include <net/route.h>
++#include <net/ip_fib.h>
++#endif
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static void ve_del_ip_addrs(struct net_device *dev)
++{
++ struct in_device *in_dev;
++
++ in_dev = in_dev_get(dev);
++ if (in_dev == NULL)
++ return;
++
++ while (in_dev->ifa_list != NULL) {
++ inet_del_ifa(in_dev, &in_dev->ifa_list, 1);
++ }
++ in_dev_put(in_dev);
++}
++
++static int ve_netdev_cleanup(struct net_device *dev, int to_ve)
++{
++ int err;
++
++ err = 0;
++ ve_del_ip_addrs(dev);
++ if ((dev->flags & IFF_UP) != 0)
++ err = dev_close(dev);
++ synchronize_net();
++ dev_shutdown(dev);
++ dev_mc_discard(dev);
++ free_divert_blk(dev);
++ synchronize_net();
++
++ if (to_ve)
++ dev->orig_mtu = dev->mtu;
++ else {
++ int rc = dev_set_mtu(dev, dev->orig_mtu);
++ if (err == 0)
++ err = rc;
++ }
++
++ return err;
++}
++
++static void __ve_dev_move(struct net_device *dev, struct ve_struct *ve_src,
++ struct ve_struct *ve_dst, struct user_beancounter *exec_ub)
++{
++ struct net_device **dp, *d;
++ struct user_beancounter *ub;
++
++ for (d = ve_src->_net_dev_base, dp = NULL; d != NULL;
++ dp = &d->next, d = d->next) {
++ if (d == dev) {
++ hlist_del(&dev->name_hlist);
++ hlist_del(&dev->index_hlist);
++ if (ve_src->_net_dev_tail == &dev->next)
++ ve_src->_net_dev_tail = dp;
++ if (dp)
++ *dp = dev->next;
++ dev->next = NULL;
++ break;
++ }
++ }
++ *ve_dst->_net_dev_tail = dev;
++ ve_dst->_net_dev_tail = &dev->next;
++ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, ve_dst));
++ hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, ve_dst));
++ dev->owner_env = ve_dst;
++
++ ub = netdev_bc(dev)->exec_ub;
++ netdev_bc(dev)->exec_ub = get_beancounter(exec_ub);
++ put_beancounter(ub);
++}
++
++static int ve_dev_add(envid_t veid, char *dev_name)
++{
++ int err;
++ struct net_device *dev;
++ struct ve_struct *ve;
++ struct hlist_node *p;
++
++ dev = NULL;
++ err = -ESRCH;
++
++ ve = get_ve_by_id(veid);
++ if (ve == NULL)
++ goto out;
++
++ rtnl_lock();
++
++ read_lock(&dev_base_lock);
++ hlist_for_each(p, dev_name_hash(dev_name, get_ve0())) {
++ struct net_device *d = hlist_entry(p, struct net_device,
++ name_hlist);
++ if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
++ dev = d;
++ break;
++ }
++ }
++ read_unlock(&dev_base_lock);
++ if (dev == NULL)
++ goto out_unlock;
++
++ err = -EPERM;
++ if (!ve_is_dev_movable(dev))
++ goto out_unlock;
++
++ err = -EINVAL;
++ if (dev->flags & (IFF_SLAVE|IFF_MASTER))
++ goto out_unlock;
++
++ ve_netdev_cleanup(dev, 1);
++
++ write_lock_bh(&dev_base_lock);
++ __ve_dev_move(dev, get_ve0(), ve, get_exec_ub());
++ write_unlock_bh(&dev_base_lock);
++
++ err = 0;
++
++out_unlock:
++ rtnl_unlock();
++ real_put_ve(ve);
++
++ if (dev == NULL)
++ printk(KERN_WARNING "Device %s not found\n", dev_name);
++
++out:
++ return err;
++}
++
++static int ve_dev_del(envid_t veid, char *dev_name)
++{
++ int err;
++ struct net_device *dev;
++ struct ve_struct *ve, *old_exec;
++ struct hlist_node *p;
++
++ dev = NULL;
++ err = -ESRCH;
++
++ ve = get_ve_by_id(veid);
++ if (ve == NULL)
++ goto out;
++
++ rtnl_lock();
++
++ read_lock(&dev_base_lock);
++ hlist_for_each(p, dev_name_hash(dev_name, ve)) {
++ struct net_device *d = hlist_entry(p, struct net_device,
++ name_hlist);
++ if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
++ dev = d;
++ break;
++ }
++ }
++ read_unlock(&dev_base_lock);
++ if (dev == NULL)
++ goto out_unlock;
++
++ err = -EPERM;
++ if (!ve_is_dev_movable(dev))
++ goto out_unlock;
++
++ old_exec = set_exec_env(ve);
++ ve_netdev_cleanup(dev, 0);
++ (void)set_exec_env(old_exec);
++
++ write_lock_bh(&dev_base_lock);
++ __ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
++ write_unlock_bh(&dev_base_lock);
++
++ err = 0;
++
++out_unlock:
++ rtnl_unlock();
++ real_put_ve(ve);
++
++ if (dev == NULL)
++ printk(KERN_WARNING "Device %s not found\n", dev_name);
++
++out:
++ return err;
++}
++
++int real_ve_dev_map(envid_t veid, int op, char *dev_name)
++{
++ int err;
++ err = -EPERM;
++ if (!capable(CAP_SETVEID))
++ goto out;
++ switch (op)
++ {
++ case VE_NETDEV_ADD:
++ err = ve_dev_add(veid, dev_name);
++ break;
++ case VE_NETDEV_DEL:
++ err = ve_dev_del(veid, dev_name);
++ break;
++ default:
++ err = -EINVAL;
++ break;
++ }
++out:
++ return err;
++}
++
++static void ve_mapped_devs_cleanup(struct ve_struct *ve)
++{
++ struct net_device *dev;
++
++ rtnl_lock();
++ write_lock_bh(&dev_base_lock);
++restart:
++ for (dev = ve->_net_dev_base; dev != NULL; dev = dev->next)
++ {
++ if ((dev->features & NETIF_F_VENET) ||
++ (dev == ve->_loopback_dev)) /* Skip loopback dev */
++ continue;
++ write_unlock_bh(&dev_base_lock);
++ ve_netdev_cleanup(dev, 0);
++ write_lock_bh(&dev_base_lock);
++ __ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
++ goto restart;
++ }
++ write_unlock_bh(&dev_base_lock);
++ rtnl_unlock();
++}
++#endif
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE information via /proc
++ *
++ **********************************************************************
++ **********************************************************************/
++#ifdef CONFIG_PROC_FS
++static int devperms_seq_show(struct seq_file *m, void *v)
++{
++ struct devperms_struct *dp;
++ char dev_s[32], type_c;
++ unsigned use, type;
++ dev_t dev;
++
++ dp = (struct devperms_struct *)v;
++ if (dp == (struct devperms_struct *)1L) {
++ seq_printf(m, "Version: 2.7\n");
++ return 0;
++ }
++
++ use = dp->type & VE_USE_MASK;
++ type = dp->type & S_IFMT;
++ dev = dp->dev;
++
++ if ((use | VE_USE_MINOR) == use)
++ snprintf(dev_s, sizeof(dev_s), "%d:%d", MAJOR(dev), MINOR(dev));
++ else if ((use | VE_USE_MAJOR) == use)
++ snprintf(dev_s, sizeof(dev_s), "%d:*", MAJOR(dp->dev));
++ else
++ snprintf(dev_s, sizeof(dev_s), "*:*");
++
++ if (type == S_IFCHR)
++ type_c = 'c';
++ else if (type == S_IFBLK)
++ type_c = 'b';
++ else
++ type_c = '?';
++
++ seq_printf(m, "%10u %c %03o %s\n", dp->veid, type_c, dp->mask, dev_s);
++ return 0;
++}
++
++static void *devperms_seq_start(struct seq_file *m, loff_t *pos)
++{
++ loff_t cpos;
++ long slot;
++ struct devperms_struct *dp;
++
++ cpos = *pos;
++ read_lock(&devperms_hash_guard);
++ if (cpos-- == 0)
++ return (void *)1L;
++
++ for (slot = 0; slot < DEVPERMS_HASH_SZ; slot++)
++ for (dp = devperms_hash[slot]; dp; dp = dp->devhash_next)
++ if (cpos-- == 0) {
++ m->private = (void *)slot;
++ return dp;
++ }
++ return NULL;
++}
++
++static void *devperms_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ long slot;
++ struct devperms_struct *dp;
++
++ dp = (struct devperms_struct *)v;
++
++ if (dp == (struct devperms_struct *)1L)
++ slot = 0;
++ else if (dp->devhash_next == NULL)
++ slot = (long)m->private + 1;
++ else {
++ (*pos)++;
++ return dp->devhash_next;
++ }
++
++ for (; slot < DEVPERMS_HASH_SZ; slot++)
++ if (devperms_hash[slot]) {
++ (*pos)++;
++ m->private = (void *)slot;
++ return devperms_hash[slot];
++ }
++ return NULL;
++}
++
++static void devperms_seq_stop(struct seq_file *m, void *v)
++{
++ read_unlock(&devperms_hash_guard);
++}
++
++static struct seq_operations devperms_seq_op = {
++ .start = devperms_seq_start,
++ .next = devperms_seq_next,
++ .stop = devperms_seq_stop,
++ .show = devperms_seq_show,
++};
++
++static int devperms_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &devperms_seq_op);
++}
++
++static struct file_operations proc_devperms_ops = {
++ .open = devperms_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
++#if BITS_PER_LONG == 32
++#define VESTAT_LINE_WIDTH (6 * 11 + 6 * 21)
++#define VESTAT_LINE_FMT "%10u %10lu %10lu %10lu %10lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %10lu\n"
++#define VESTAT_HEAD_FMT "%10s %10s %10s %10s %10s %20s %20s %20s %20s %20s %20s %10s\n"
++#else
++#define VESTAT_LINE_WIDTH (12 * 21)
++#define VESTAT_LINE_FMT "%20u %20lu %20lu %20lu %20lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20lu\n"
++#define VESTAT_HEAD_FMT "%20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s\n"
++#endif
++
++static int vestat_seq_show(struct seq_file *m, void *v)
++{
++ struct ve_struct *ve = (struct ve_struct *)v;
++ struct ve_struct *curve;
++ int cpu;
++ unsigned long user_ve, nice_ve, system_ve, uptime;
++ cycles_t uptime_cycles, idle_time, strv_time, used;
++
++ curve = get_exec_env();
++ if (ve == ve_list_head ||
++ (!ve_is_super(curve) && ve == curve)) {
++ /* print header */
++ seq_printf(m, "%-*s\n",
++ VESTAT_LINE_WIDTH - 1,
++ "Version: 2.2");
++ seq_printf(m, VESTAT_HEAD_FMT, "VEID",
++ "user", "nice", "system",
++ "uptime", "idle",
++ "strv", "uptime", "used",
++ "maxlat", "totlat", "numsched");
++ }
++
++ if (ve == get_ve0())
++ return 0;
++
++ user_ve = nice_ve = system_ve = 0;
++ idle_time = strv_time = used = 0;
++
++ for (cpu = 0; cpu < NR_CPUS; cpu++) {
++ struct ve_cpu_stats *st;
++
++ st = VE_CPU_STATS(ve, cpu);
++ user_ve += st->user;
++ nice_ve += st->nice;
++ system_ve += st->system;
++ used += VE_CPU_STATS(ve, cpu)->used_time;
++ idle_time += ve_sched_get_idle_time(ve, cpu);
++ }
++ uptime_cycles = get_cycles() - ve->start_cycles;
++ uptime = jiffies - ve->start_jiffies;
++
++ seq_printf(m, VESTAT_LINE_FMT, ve->veid,
++ user_ve, nice_ve, system_ve,
++ uptime, idle_time,
++ strv_time, uptime_cycles, used,
++ ve->sched_lat_ve.last.maxlat,
++ ve->sched_lat_ve.last.totlat,
++ ve->sched_lat_ve.last.count);
++ return 0;
++}
++
++static void *ve_seq_start(struct seq_file *m, loff_t *pos)
++{
++ struct ve_struct *ve, *curve;
++ loff_t l;
++
++ curve = get_exec_env();
++ read_lock(&ve_list_guard);
++ if (!ve_is_super(curve)) {
++ if (*pos != 0)
++ return NULL;
++ return curve;
++ }
++ for (ve = ve_list_head, l = *pos;
++ ve != NULL && l > 0;
++ ve = ve->next, l--);
++ return ve;
++}
++
++static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ struct ve_struct *ve = (struct ve_struct *)v;
++
++ if (!ve_is_super(get_exec_env()))
++ return NULL;
++ (*pos)++;
++ return ve->next;
++}
++
++static void ve_seq_stop(struct seq_file *m, void *v)
++{
++ read_unlock(&ve_list_guard);
++}
++
++static struct seq_operations vestat_seq_op = {
++ start: ve_seq_start,
++ next: ve_seq_next,
++ stop: ve_seq_stop,
++ show: vestat_seq_show
++};
++
++static int vestat_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &vestat_seq_op);
++}
++
++static struct file_operations proc_vestat_operations = {
++ open: vestat_open,
++ read: seq_read,
++ llseek: seq_lseek,
++ release: seq_release
++};
++
++static int __init init_vecalls_proc(void)
++{
++ struct proc_dir_entry *de;
++
++ de = create_proc_glob_entry("vz/vestat",
++ S_IFREG|S_IRUSR, NULL);
++ if (de == NULL) {
++ /* create "vz" subdirectory, if not exist */
++ (void) create_proc_glob_entry("vz",
++ S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++ de = create_proc_glob_entry("vz/vestat",
++ S_IFREG|S_IRUSR, NULL);
++ }
++ if (de)
++ de->proc_fops = &proc_vestat_operations;
++ else
++ printk(KERN_WARNING
++ "VZMON: can't make vestat proc entry\n");
++
++ de = create_proc_entry("vz/devperms", S_IFREG | S_IRUSR, NULL);
++ if (de)
++ de->proc_fops = &proc_devperms_ops;
++ else
++ printk(KERN_WARNING
++ "VZMON: can't make devperms proc entry\n");
++ return 0;
++}
++
++static void fini_vecalls_proc(void)
++{
++ remove_proc_entry("vz/devperms", NULL);
++ remove_proc_entry("vz/vestat", NULL);
++}
++#else
++#define init_vecalls_proc() (0)
++#define fini_vecalls_proc() do { } while (0)
++#endif /* CONFIG_PROC_FS */
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * User ctl
++ *
++ **********************************************************************
++ **********************************************************************/
++
++int vzcalls_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
++static struct vzioctlinfo vzcalls = {
++ type: VZCTLTYPE,
++ func: vzcalls_ioctl,
++ owner: THIS_MODULE,
++};
++
++int vzcalls_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ int err;
++
++ err = -ENOTTY;
++ switch(cmd) {
++ case VZCTL_MARK_ENV_TO_DOWN: {
++ /* Compatibility issue */
++ err = 0;
++ }
++ break;
++ case VZCTL_SETDEVPERMS: {
++ /* Device type was mistakenly declared as dev_t
++ * in the old user-kernel interface.
++ * That's wrong, dev_t is a kernel internal type.
++ * I use `unsigned' not having anything better in mind.
++ * 2001/08/11 SAW */
++ struct vzctl_setdevperms s;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err = real_setdevperms(s.veid, s.type,
++ new_decode_dev(s.dev), s.mask);
++ }
++ break;
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ case VZCTL_VE_NETDEV: {
++ struct vzctl_ve_netdev d;
++ char *s;
++ err = -EFAULT;
++ if (copy_from_user(&d, (void *)arg, sizeof(d)))
++ break;
++ err = -ENOMEM;
++ s = kmalloc(IFNAMSIZ+1, GFP_KERNEL);
++ if (s == NULL)
++ break;
++ err = -EFAULT;
++ if (strncpy_from_user(s, d.dev_name, IFNAMSIZ) > 0) {
++ s[IFNAMSIZ] = 0;
++ err = real_ve_dev_map(d.veid, d.op, s);
++ }
++ kfree(s);
++ }
++ break;
++#endif
++ case VZCTL_ENV_CREATE: {
++ struct vzctl_env_create s;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err = real_env_create(s.veid, s.flags, s.class_id,
++ NULL, 0);
++ }
++ break;
++ case VZCTL_ENV_CREATE_DATA: {
++ struct vzctl_env_create_data s;
++ struct env_create_param *data;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err=-EINVAL;
++ if (s.datalen < VZCTL_ENV_CREATE_DATA_MINLEN ||
++ s.datalen > VZCTL_ENV_CREATE_DATA_MAXLEN ||
++ s.data == 0)
++ break;
++ err = -ENOMEM;
++ data = kmalloc(s.datalen, GFP_KERNEL);
++ if (!data)
++ break;
++ err = -EFAULT;
++ if (copy_from_user(data, (void *)s.data, s.datalen))
++ goto free_data;
++ err = real_env_create(s.veid, s.flags, s.class_id,
++ data, s.datalen);
++free_data:
++ kfree(data);
++ }
++ break;
++ case VZCTL_GET_CPU_STAT: {
++ struct vzctl_cpustatctl s;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err = ve_get_cpu_stat(s.veid, s.cpustat);
++ }
++ break;
++ }
++ return err;
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * Init/exit stuff
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#ifdef CONFIG_VE_CALLS_MODULE
++static int __init init_vecalls_symbols(void)
++{
++ KSYMRESOLVE(real_get_device_perms_ve);
++ KSYMRESOLVE(real_do_env_cleanup);
++ KSYMRESOLVE(real_do_env_free);
++ KSYMRESOLVE(real_update_load_avg_ve);
++ KSYMMODRESOLVE(vzmon);
++ return 0;
++}
++
++static void fini_vecalls_symbols(void)
++{
++ KSYMMODUNRESOLVE(vzmon);
++ KSYMUNRESOLVE(real_get_device_perms_ve);
++ KSYMUNRESOLVE(real_do_env_cleanup);
++ KSYMUNRESOLVE(real_do_env_free);
++ KSYMUNRESOLVE(real_update_load_avg_ve);
++}
++#else
++#define init_vecalls_symbols() (0)
++#define fini_vecalls_symbols() do { } while (0)
++#endif
++
++static inline __init int init_vecalls_ioctls(void)
++{
++ vzioctl_register(&vzcalls);
++ return 0;
++}
++
++static inline void fini_vecalls_ioctls(void)
++{
++ vzioctl_unregister(&vzcalls);
++}
++
++static int __init vecalls_init(void)
++{
++ int err;
++ int i;
++
++ ve_list_head = get_ve0();
++
++ err = init_vzmond();
++ if (err < 0)
++ goto out_vzmond;
++
++ err = init_devperms_hash();
++ if (err < 0)
++ goto out_perms;
++
++ err = init_vecalls_symbols();
++ if (err < 0)
++ goto out_sym;
++
++ err = init_vecalls_proc();
++ if (err < 0)
++ goto out_proc;
++
++ err = init_vecalls_ioctls();
++ if (err < 0)
++ goto out_ioctls;
++
++ for (i = 0; i < VE_MAX_HOOKS; i++)
++ INIT_LIST_HEAD(&ve_hooks[i]);
++
++ return 0;
++
++out_ioctls:
++ fini_vecalls_proc();
++out_proc:
++ fini_vecalls_symbols();
++out_sym:
++ fini_devperms_hash();
++out_perms:
++ fini_vzmond();
++out_vzmond:
++ return err;
++}
++
++static void vecalls_exit(void)
++{
++ fini_vecalls_ioctls();
++ fini_vecalls_proc();
++ fini_vecalls_symbols();
++ fini_devperms_hash();
++ fini_vzmond();
++}
++
++EXPORT_SYMBOL(get_ve_by_id);
++EXPORT_SYMBOL(__find_ve_by_id);
++EXPORT_SYMBOL(ve_list_guard);
++EXPORT_SYMBOL(ve_list_head);
++EXPORT_SYMBOL(nr_ve);
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Control");
++MODULE_LICENSE("GPL v2");
++
++module_init(vecalls_init)
++module_exit(vecalls_exit)
+diff -uprN linux-2.6.15.orig/kernel/veowner.c linux-2.6.15-ve025stab014/kernel/veowner.c
+--- linux-2.6.15.orig/kernel/veowner.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/veowner.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,297 @@
++/*
++ * kernel/veowner.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/ve.h>
++#include <linux/ve_owner.h>
++#include <linux/ve_proto.h>
++#include <linux/ipc.h>
++#include <linux/fs.h>
++#include <linux/proc_fs.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/delay.h>
++#include <linux/vmalloc.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/list.h>
++#include <asm/system.h>
++#include <asm/io.h>
++
++#include <net/tcp.h>
++
++void prepare_ve0_process(struct task_struct *tsk)
++{
++ set_virt_pid(tsk, tsk->pid);
++ set_virt_tgid(tsk, tsk->tgid);
++ if (tsk->signal) {
++ set_virt_pgid(tsk, tsk->signal->pgrp);
++ set_virt_sid(tsk, tsk->signal->session);
++ }
++ VE_TASK_INFO(tsk)->exec_env = get_ve0();
++ VE_TASK_INFO(tsk)->owner_env = get_ve0();
++ VE_TASK_INFO(tsk)->sleep_time = 0;
++ VE_TASK_INFO(tsk)->wakeup_stamp = 0;
++ VE_TASK_INFO(tsk)->sched_time = 0;
++ seqcount_init(&VE_TASK_INFO(tsk)->wakeup_lock);
++
++ if (tsk->pid) {
++ SET_VE_LINKS(tsk);
++ atomic_inc(&get_ve0()->pcounter);
++ }
++}
++
++void prepare_ve0_loopback(void)
++{
++ get_ve0()->_loopback_dev = &loopback_dev;
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * proc entries
++ * ------------------------------------------------------------------------
++ */
++
++static void proc_move(struct proc_dir_entry *ddir,
++ struct proc_dir_entry *sdir,
++ const char *name)
++{
++ struct proc_dir_entry **p, *q;
++ int len;
++
++ len = strlen(name);
++ for (p = &sdir->subdir, q = *p; q != NULL; p = &q->next, q = *p)
++ if (proc_match(len, name, q))
++ break;
++ if (q == NULL)
++ return;
++ *p = q->next;
++ q->parent = ddir;
++ q->next = ddir->subdir;
++ ddir->subdir = q;
++}
++static void prepare_proc_misc(void)
++{
++ static char *table[] = {
++ "loadavg",
++ "uptime",
++ "meminfo",
++ "version",
++ "stat",
++ "filesystems",
++ "locks",
++ "swaps",
++ "mounts",
++ "net",
++ "cpuinfo",
++ "sysvipc",
++ "sys",
++ "fs",
++ "vz",
++ "user_beancounters",
++ "cmdline",
++ "vmstat",
++ NULL,
++ };
++ char **p;
++
++ for (p = table; *p != NULL; p++)
++ proc_move(&proc_root, ve0.proc_root, *p);
++}
++int prepare_proc(void)
++{
++ struct ve_struct *envid;
++ struct proc_dir_entry *de;
++ struct proc_dir_entry *ve_root;
++
++ envid = set_exec_env(&ve0);
++ ve_root = ve0.proc_root->subdir;
++ /* move the whole tree to be visible in VE0 only */
++ ve0.proc_root->subdir = proc_root.subdir;
++ for (de = ve0.proc_root->subdir; de->next != NULL; de = de->next)
++ de->parent = ve0.proc_root;
++ de->parent = ve0.proc_root;
++ de->next = ve_root;
++
++ /* move back into the global scope some specific entries */
++ proc_root.subdir = NULL;
++ prepare_proc_misc();
++ proc_net = proc_mkdir("net", ve0.proc_root);
++ proc_net_stat = proc_mkdir("stat", proc_net);
++ proc_mkdir("vz", 0);
++#ifdef CONFIG_SYSVIPC
++ proc_mkdir("sysvipc", 0);
++#endif
++ proc_root_fs = proc_mkdir("fs", 0);
++ /* XXX proc_tty_init(); */
++
++ /* XXX process inodes */
++
++ (void)set_exec_env(envid);
++
++ (void)create_proc_glob_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++ return 0;
++}
++
++static struct proc_dir_entry ve0_proc_root = {
++ .name = "/proc",
++ .namelen = 5,
++ .mode = S_IFDIR | S_IRUGO | S_IXUGO,
++ .nlink = 2
++};
++
++void prepare_ve0_proc_root(void)
++{
++ ve0.proc_root = &ve0_proc_root;
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * Virtualized sysctl
++ * ------------------------------------------------------------------------
++ */
++
++static int semmin[4] = { 1, 1, 1, 1 };
++static int semmax[4] = { 8000, INT_MAX, 1000, IPCMNI };
++static ctl_table kern_table[] = {
++ {KERN_NODENAME, "hostname", system_utsname.nodename, 64,
++ 0644, NULL, &proc_doutsstring, &sysctl_string},
++ {KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
++ 0644, NULL, &proc_doutsstring, &sysctl_string},
++#ifdef CONFIG_SYSVIPC
++#define get_ve0_field(fname) &ve0._##fname
++ {KERN_SHMMAX, "shmmax", get_ve0_field(shm_ctlmax), sizeof (size_t),
++ 0644, NULL, &proc_doulongvec_minmax },
++ {KERN_SHMALL, "shmall", get_ve0_field(shm_ctlall), sizeof (size_t),
++ 0644, NULL, &proc_doulongvec_minmax },
++ {KERN_SHMMNI, "shmmni", get_ve0_field(shm_ctlmni), sizeof (int),
++ 0644, NULL, &proc_dointvec_minmax, NULL,
++ NULL, &semmin[0], &semmax[3] },
++ {KERN_MSGMAX, "msgmax", get_ve0_field(msg_ctlmax), sizeof (int),
++ 0644, NULL, &proc_dointvec },
++ {KERN_MSGMNI, "msgmni", get_ve0_field(msg_ctlmni), sizeof (int),
++ 0644, NULL, &proc_dointvec_minmax, NULL,
++ NULL, &semmin[0], &semmax[3] },
++ {KERN_MSGMNB, "msgmnb", get_ve0_field(msg_ctlmnb), sizeof (int),
++ 0644, NULL, &proc_dointvec },
++ {KERN_SEM, "sem", get_ve0_field(sem_ctls), 4*sizeof (int),
++ 0644, NULL, &proc_dointvec },
++#endif
++ {0}
++};
++static ctl_table root_table[] = {
++ {CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
++ {0}
++};
++extern int ip_rt_src_check;
++extern int ve_area_access_check;
++static ctl_table vz_ipv4_route_table[] = {
++ {
++ ctl_name: NET_IPV4_ROUTE_SRC_CHECK,
++ procname: "src_check",
++ data: &ip_rt_src_check,
++ maxlen: sizeof(int),
++ mode: 0644,
++ proc_handler: &proc_dointvec,
++ },
++ { 0 }
++};
++static ctl_table vz_ipv4_table[] = {
++ {NET_IPV4_ROUTE, "route", NULL, 0, 0555, vz_ipv4_route_table},
++ { 0 }
++};
++static ctl_table vz_net_table[] = {
++ {NET_IPV4, "ipv4", NULL, 0, 0555, vz_ipv4_table},
++ { 0 }
++};
++static ctl_table vz_fs_table[] = {
++ {
++ ctl_name: 226,
++ procname: "ve-area-access-check",
++ data: &ve_area_access_check,
++ maxlen: sizeof(int),
++ mode: 0644,
++ proc_handler: &proc_dointvec,
++ },
++ { 0 }
++};
++static ctl_table root_table2[] = {
++ {CTL_NET, "net", NULL, 0, 0555, vz_net_table},
++ {CTL_FS, "fs", NULL, 0, 0555, vz_fs_table},
++ { 0 }
++};
++int prepare_sysctl(void)
++{
++ struct ve_struct *envid;
++
++ envid = set_exec_env(&ve0);
++ ve0.kern_header = register_sysctl_table(root_table, 1);
++ register_sysctl_table(root_table2, 0);
++ (void)set_exec_env(envid);
++ return 0;
++}
++
++void prepare_ve0_sysctl(void)
++{
++ INIT_LIST_HEAD(&ve0.sysctl_lh);
++#ifdef CONFIG_SYSCTL
++ ve0.proc_sys_root = proc_mkdir("sys", 0);
++#endif
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * XXX init_ve_system
++ * ------------------------------------------------------------------------
++ */
++
++void init_ve_system(void)
++{
++ struct task_struct *init_entry, *p, *tsk;
++ struct ve_struct *ptr;
++ unsigned long flags;
++ int i;
++
++ ptr = get_ve0();
++ (void)get_ve(ptr);
++ atomic_set(&ptr->pcounter, 1);
++
++ /* Don't forget about idle tasks */
++ write_lock_irqsave(&tasklist_lock, flags);
++ for (i = 0; i < NR_CPUS; i++) {
++ tsk = idle_task(i);
++ if (tsk == NULL)
++ continue;
++
++ prepare_ve0_process(tsk);
++ }
++ do_each_thread_all(p, tsk) {
++ prepare_ve0_process(tsk);
++ } while_each_thread_all(p, tsk);
++ write_unlock_irqrestore(&tasklist_lock, flags);
++
++ init_entry = child_reaper;
++ ptr->init_entry = init_entry;
++ /* XXX: why? */
++ cap_set_full(ptr->cap_default);
++
++ ptr->_ipv4_devconf = &ipv4_devconf;
++ ptr->_ipv4_devconf_dflt = &ipv4_devconf_dflt;
++
++ read_lock(&init_entry->fs->lock);
++ ptr->fs_rootmnt = init_entry->fs->rootmnt;
++ ptr->fs_root = init_entry->fs->root;
++ read_unlock(&init_entry->fs->lock);
++
++ /* common prepares */
++ prepare_proc();
++ prepare_sysctl();
++ prepare_ipc();
++}
+diff -uprN linux-2.6.15.orig/kernel/vzcompat.c linux-2.6.15-ve025stab014/kernel/vzcompat.c
+--- linux-2.6.15.orig/kernel/vzcompat.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/vzcompat.c 2006-01-27 14:48:09.000000000 +0300
+@@ -0,0 +1,54 @@
++#include <linux/config.h>
++#include <linux/fs.h>
++#include <linux/proc_fs.h>
++
++
++static ssize_t fake_fairsced_read(struct file *f, char __user *buf,
++ size_t len, loff_t *pos)
++{
++ return 0;
++}
++
++static struct file_operations fake_ops = {
++ .read = fake_fairsced_read,
++};
++
++
++/*
++ * Module init/exit.
++ */
++
++static int __init ovzcompat_init(void)
++{
++ int err;
++ struct proc_dir_entry *de;
++
++ err = -ENOMEM;
++
++ de = create_proc_glob_entry("fairsched", S_IRUGO, NULL);
++ if (!de)
++ goto err_proc;
++ else
++ de->proc_fops = &fake_ops;
++
++ de = create_proc_glob_entry("fairsched2", S_IRUGO, NULL);
++ if (!de)
++ goto err_proc2;
++ else
++ de->proc_fops = &fake_ops;
++ return 0;
++
++err_proc2:
++ remove_proc_entry("fairsched", NULL);
++err_proc:
++ return err;
++}
++
++void __exit ovzcompat_exit(void)
++{
++ remove_proc_entry("fairsched2", NULL);
++ remove_proc_entry("fairsched", NULL);
++}
++
++module_init(ovzcompat_init)
++module_exit(ovzcompat_exit)
+diff -uprN linux-2.6.15.orig/kernel/vzdev.c linux-2.6.15-ve025stab014/kernel/vzdev.c
+--- linux-2.6.15.orig/kernel/vzdev.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/vzdev.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,97 @@
++/*
++ * kernel/vzdev.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/vzctl.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/vzcalluser.h>
++#include <asm/uaccess.h>
++#include <asm/pgalloc.h>
++
++#define VZCTL_MAJOR 126
++#define VZCTL_NAME "vzctl"
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Interface");
++MODULE_LICENSE("GPL v2");
++
++static LIST_HEAD(ioctls);
++static spinlock_t ioctl_lock = SPIN_LOCK_UNLOCKED;
++
++int vzctl_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ int err;
++ struct list_head *p;
++ struct vzioctlinfo *inf;
++
++ err = -ENOTTY;
++ spin_lock(&ioctl_lock);
++ list_for_each(p, &ioctls) {
++ inf = list_entry(p, struct vzioctlinfo, list);
++ if (inf->type != _IOC_TYPE(cmd))
++ continue;
++
++ err = try_module_get(inf->owner) ? 0 : -EBUSY;
++ spin_unlock(&ioctl_lock);
++ if (!err) {
++ err = (*inf->func)(ino, file, cmd, arg);
++ module_put(inf->owner);
++ }
++ return err;
++ }
++ spin_unlock(&ioctl_lock);
++ return err;
++}
++
++void vzioctl_register(struct vzioctlinfo *inf)
++{
++ spin_lock(&ioctl_lock);
++ list_add(&inf->list, &ioctls);
++ spin_unlock(&ioctl_lock);
++}
++
++void vzioctl_unregister(struct vzioctlinfo *inf)
++{
++ spin_lock(&ioctl_lock);
++ list_del_init(&inf->list);
++ spin_unlock(&ioctl_lock);
++}
++
++EXPORT_SYMBOL(vzioctl_register);
++EXPORT_SYMBOL(vzioctl_unregister);
++
++/*
++ * Init/exit stuff.
++ */
++static struct file_operations vzctl_fops = {
++ .owner = THIS_MODULE,
++ .ioctl = vzctl_ioctl,
++};
++
++static void __exit vzctl_exit(void)
++{
++ unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
++}
++
++static int __init vzctl_init(void)
++{
++ int ret;
++
++ ret = register_chrdev(VZCTL_MAJOR, VZCTL_NAME, &vzctl_fops);
++ return ret;
++}
++
++module_init(vzctl_init)
++module_exit(vzctl_exit);
+diff -uprN linux-2.6.15.orig/kernel/vzwdog.c linux-2.6.15-ve025stab014/kernel/vzwdog.c
+--- linux-2.6.15.orig/kernel/vzwdog.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.15-ve025stab014/kernel/vzwdog.c 2006-01-27 14:48:08.000000000 +0300
+@@ -0,0 +1,278 @@
++/*
++ * kernel/vzwdog.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/ctype.h>
++#include <linux/kobject.h>
++#include <linux/genhd.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/kernel_stat.h>
++#include <linux/smp_lock.h>
++#include <linux/errno.h>
++#include <linux/suspend.h>
++#include <linux/ve.h>
++#include <linux/vzstat.h>
++
++/* Staff regading kernel thread polling VE validity */
++static int sleep_timeout = 60;
++static pid_t wdog_thread_pid;
++static int wdog_thread_continue = 1;
++static DECLARE_COMPLETION(license_thread_exited);
++
++extern void show_mem(void);
++extern struct ve_struct *ve_list_head;
++
++#if 0
++static char page[PAGE_SIZE];
++
++static void parse_irq_list(int len)
++{
++ int i, k, skip;
++ for (i = 0; i < len; ) {
++ k = i;
++ while (i < len && page[i] != '\n' && page[i] != ':')
++ i++;
++ skip = 0;
++ if (i < len && page[i] != '\n') {
++ i++; /* skip ':' */
++ while (i < len && (page[i] == ' ' || page[i] == '0'))
++ i++;
++ skip = (i < len && (page[i] < '0' || page[i] > '9'));
++ while (i < len && page[i] != '\n')
++ i++;
++ }
++ if (!skip)
++ printk("\n%.*s", i - k, page + k);
++ if (i < len)
++ i++; /* skip '\n' */
++ }
++}
++#endif
++
++static void show_irq_list(void)
++{
++#if 0
++ i = KSYMSAFECALL(int, get_irq_list, (page));
++ parse_irq_list(i); /* Safe, zero was returned if unassigned */
++#endif
++}
++
++static void show_alloc_latency(void)
++{
++ static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
++ "A0",
++ "L0",
++ "H0",
++ "L1",
++ "H1"
++ };
++ int i;
++
++ printk("lat: ");
++ for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
++ struct kstat_lat_struct *p;
++ cycles_t maxlat, avg0, avg1, avg2;
++
++ p = &kstat_glob.alloc_lat[i];
++ spin_lock_irq(&kstat_glb_lock);
++ maxlat = p->last.maxlat;
++ avg0 = p->avg[0];
++ avg1 = p->avg[1];
++ avg2 = p->avg[2];
++ spin_unlock_irq(&kstat_glb_lock);
++
++ printk("%s %Lu (%Lu %Lu %Lu)",
++ alloc_descr[i],
++ maxlat,
++ avg0,
++ avg1,
++ avg2);
++ }
++ printk("\n");
++}
++
++static void show_schedule_latency(void)
++{
++ struct kstat_lat_pcpu_struct *p;
++ cycles_t maxlat, totlat, avg0, avg1, avg2;
++ unsigned long count;
++
++ p = &kstat_glob.sched_lat;
++ spin_lock_irq(&kstat_glb_lock);
++ maxlat = p->last.maxlat;
++ totlat = p->last.totlat;
++ count = p->last.count;
++ avg0 = p->avg[0];
++ avg1 = p->avg[1];
++ avg2 = p->avg[2];
++ spin_unlock_irq(&kstat_glb_lock);
++
++ printk("sched lat: %Lu/%Lu/%lu (%Lu %Lu %Lu)\n",
++ maxlat,
++ totlat,
++ count,
++ avg0,
++ avg1,
++ avg2);
++}
++
++static void show_header(void)
++{
++ struct timeval tv;
++
++ do_gettimeofday(&tv);
++ printk("*** VZWDOG 1.14: time %lu.%06lu uptime %Lu CPU %d ***\n",
++ tv.tv_sec, tv.tv_usec,
++ get_jiffies_64(), smp_processor_id());
++#ifdef CONFIG_FAIRSCHED
++ printk("*** cycles_per_jiffy %lu jiffies_per_second %u ***\n",
++ cycles_per_jiffy, HZ);
++#else
++ printk("*** jiffies_per_second %u ***\n", HZ);
++#endif
++}
++
++static void show_pgdatinfo(void)
++{
++ pg_data_t *pgdat;
++
++ printk("pgdat:");
++ for_each_pgdat(pgdat) {
++ printk(" %d: %lu,%lu,%lu,%p",
++ pgdat->node_id,
++ pgdat->node_start_pfn,
++ pgdat->node_present_pages,
++ pgdat->node_spanned_pages,
++ pgdat->node_mem_map);
++ }
++ printk("\n");
++}
++
++static void show_diskio(void)
++{
++ struct gendisk *gd;
++ char buf[BDEVNAME_SIZE];
++
++ printk("disk_io: ");
++
++ down_read(&block_subsys.rwsem);
++ list_for_each_entry(gd, &block_subsys.kset.list, kobj.entry) {
++ char *name;
++ name = disk_name(gd, 0, buf);
++ if ((strlen(name) > 4) && (strncmp(name, "loop", 4) == 0) &&
++ isdigit(name[4]))
++ continue;
++ if ((strlen(name) > 3) && (strncmp(name, "ram", 3) == 0) &&
++ isdigit(name[3]))
++ continue;
++ printk("(%u,%u) %s r(%u %u %u) w(%u %u %u)\n",
++ gd->major, gd->first_minor,
++ name,
++ disk_stat_read(gd, ios[READ]),
++ disk_stat_read(gd, sectors[READ]),
++ disk_stat_read(gd, merges[READ]),
++ disk_stat_read(gd, ios[WRITE]),
++ disk_stat_read(gd, sectors[WRITE]),
++ disk_stat_read(gd, merges[WRITE]));
++ }
++ up_read(&block_subsys.rwsem);
++
++ printk("\n");
++}
++
++static void show_nrprocs(void)
++{
++ unsigned long _nr_running, _nr_sleeping,
++ _nr_unint, _nr_zombie, _nr_dead, _nr_stopped;
++
++ _nr_running = nr_running();
++ _nr_unint = nr_uninterruptible();
++ _nr_sleeping = nr_sleeping();
++ _nr_zombie = nr_zombie;
++ _nr_dead = nr_dead;
++ _nr_stopped = nr_stopped();
++
++ printk("VEnum: %d, proc R %lu, S %lu, D %lu, "
++ "Z %lu, X %lu, T %lu (tot %d)\n",
++ nr_ve, _nr_running, _nr_sleeping, _nr_unint,
++ _nr_zombie, _nr_dead, _nr_stopped, nr_threads);
++}
++
++static void wdog_print(void)
++{
++ show_header();
++ show_irq_list();
++ show_pgdatinfo();
++ show_mem();
++ show_diskio();
++ show_schedule_latency();
++ show_alloc_latency();
++ show_nrprocs();
++}
++
++static int wdog_loop(void* data)
++{
++ struct task_struct *tsk = current;
++ DECLARE_WAIT_QUEUE_HEAD(thread_wait_queue);
++
++ /*
++ * This thread doesn't need any user-level access,
++ * so get rid of all our resources
++ */
++ daemonize("wdogd");
++
++ spin_lock_irq(&tsk->sighand->siglock);
++ sigfillset(&tsk->blocked);
++ sigdelset(&tsk->blocked, SIGHUP);
++ recalc_sigpending();
++ spin_unlock_irq(&tsk->sighand->siglock);
++
++ while (wdog_thread_continue) {
++ wdog_print();
++ interruptible_sleep_on_timeout(&thread_wait_queue,
++ sleep_timeout*HZ);
++ try_to_freeze();
++ /* clear all signals */
++ if (signal_pending(tsk))
++ flush_signals(tsk);
++ }
++
++ complete_and_exit(&license_thread_exited, 0);
++}
++
++static int __init wdog_init(void)
++{
++ wdog_thread_pid = kernel_thread(wdog_loop, NULL, 0);
++ if (wdog_thread_pid < 0)
++ return wdog_thread_pid;
++
++ return 0;
++}
++
++static void __exit wdog_exit(void)
++{
++ wdog_thread_continue = 0;
++ if (wdog_thread_pid > 0) {
++ kill_proc(wdog_thread_pid, SIGHUP, 1);
++ wait_for_completion(&license_thread_exited);
++ }
++}
++
++MODULE_PARM(sleep_timeout, "i");
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo WDOG");
++MODULE_LICENSE("GPL v2");
++
++module_init(wdog_init)
++module_exit(wdog_exit)
+diff -uprN linux-2.6.15.orig/mm/filemap_xip.c linux-2.6.15-ve025stab014/mm/filemap_xip.c
+--- linux-2.6.15.orig/mm/filemap_xip.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/filemap_xip.c 2006-01-27 14:48:06.000000000 +0300
+@@ -190,7 +190,10 @@ __xip_unmap (struct address_space * mapp
+ flush_cache_page(vma, address, pte_pfn(*pte));
+ pteval = ptep_clear_flush(vma, address, pte);
+ page_remove_rmap(page);
++ pb_remove_ref(page, mm);
++ ub_unused_privvm_inc(mm, vma);
+ dec_mm_counter(mm, file_rss);
++ dec_vma_rss(vma);
+ BUG_ON(pte_dirty(pteval));
+ pte_unmap_unlock(pte, ptl);
+ page_cache_release(page);
+diff -uprN linux-2.6.15.orig/mm/fremap.c linux-2.6.15-ve025stab014/mm/fremap.c
+--- linux-2.6.15.orig/mm/fremap.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/fremap.c 2006-01-27 14:48:06.000000000 +0300
+@@ -20,6 +20,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++
+ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+ {
+@@ -34,6 +36,7 @@ static int zap_pte(struct mm_struct *mm,
+ if (pte_dirty(pte))
+ set_page_dirty(page);
+ page_remove_rmap(page);
++ pb_remove_ref(page, mm);
+ page_cache_release(page);
+ }
+ } else {
+@@ -57,6 +60,10 @@ int install_page(struct mm_struct *mm, s
+ pte_t *pte;
+ pte_t pte_val;
+ spinlock_t *ptl;
++ struct page_beancounter *pbc;
++
++ if (unlikely(pb_alloc(&pbc)))
++ goto out_nopb;
+
+ pte = get_locked_pte(mm, addr, &ptl);
+ if (!pte)
+@@ -75,11 +82,15 @@ int install_page(struct mm_struct *mm, s
+ if (page_mapcount(page) > INT_MAX/2)
+ goto unlock;
+
+- if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
++ if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) {
++ ub_unused_privvm_dec(mm, vma);
+ inc_mm_counter(mm, file_rss);
++ inc_vma_rss(vma);
++ }
+
+ flush_icache_page(vma, page);
+ set_pte_at(mm, addr, pte, mk_pte(page, prot));
++ pb_add_ref(page, mm, &pbc);
+ page_add_file_rmap(page);
+ pte_val = *pte;
+ update_mmu_cache(vma, addr, pte_val);
+@@ -87,6 +98,8 @@ int install_page(struct mm_struct *mm, s
+ unlock:
+ pte_unmap_unlock(pte, ptl);
+ out:
++ pb_free(&pbc);
++out_nopb:
+ return err;
+ }
+ EXPORT_SYMBOL(install_page);
+@@ -109,7 +122,9 @@ int install_file_pte(struct mm_struct *m
+
+ if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
+ update_hiwater_rss(mm);
++ ub_unused_privvm_inc(mm, vma);
+ dec_mm_counter(mm, file_rss);
++ dec_vma_rss(vma);
+ }
+
+ set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
+diff -uprN linux-2.6.15.orig/mm/memory.c linux-2.6.15-ve025stab014/mm/memory.c
+--- linux-2.6.15.orig/mm/memory.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/memory.c 2006-01-27 14:48:08.000000000 +0300
+@@ -58,6 +58,8 @@
+ #include <linux/swapops.h>
+ #include <linux/elf.h>
+
++#include <ub/ub_vmpages.h>
++
+ #ifndef CONFIG_NEED_MULTIPLE_NODES
+ /* use the per-pgdat data instead for discontigmem - mbligh */
+ unsigned long max_mapnr;
+@@ -418,7 +420,7 @@ struct page *vm_normal_page(struct vm_ar
+ static inline void
+ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+- unsigned long addr, int *rss)
++ unsigned long addr, int *rss, struct page_beancounter **pbc)
+ {
+ unsigned long vm_flags = vma->vm_flags;
+ pte_t pte = *src_pte;
+@@ -461,6 +463,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
+ if (page) {
+ get_page(page);
+ page_dup_rmap(page);
++ pb_add_list_ref(page, dst_mm, pbc);
+ rss[!!PageAnon(page)]++;
+ }
+
+@@ -468,20 +471,35 @@ out_set_pte:
+ set_pte_at(dst_mm, addr, dst_pte, pte);
+ }
+
++#define pte_ptrs(a) (PTRS_PER_PTE - ((a >> PAGE_SHIFT)&(PTRS_PER_PTE - 1)))
++#ifdef CONFIG_USER_RESOURCE
++#define same_ub(mm1, mm2) ((mm1)->mm_ub == (mm2)->mm_ub)
++#else
++#define same_ub(mm1, mm2) (1)
++#endif
++
+ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+- pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
++ pmd_t *dst_pmd, pmd_t *src_pmd,
++ struct vm_area_struct *dst_vma,
++ struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+ {
+ pte_t *src_pte, *dst_pte;
+ spinlock_t *src_ptl, *dst_ptl;
+ int progress = 0;
+- int rss[2];
++ int rss[2], rss_tot;
++ struct page_beancounter *pbc;
+
++ pbc = NULL;
+ again:
++ if (!same_ub(src_mm, dst_mm) &&
++ pb_alloc_list(&pbc, pte_ptrs(addr)))
++ goto nomem;
+ rss[1] = rss[0] = 0;
+ dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
+ if (!dst_pte)
+- return -ENOMEM;
++ goto nomem_pb;
++
+ src_pte = pte_offset_map_nested(src_pmd, addr);
+ src_ptl = pte_lockptr(src_mm, src_pmd);
+ spin_lock(src_ptl);
+@@ -502,22 +520,35 @@ again:
+ progress++;
+ continue;
+ }
+- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
++ copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
++ vma, addr, rss, &pbc);
+ progress += 8;
+ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+
+ spin_unlock(src_ptl);
+ pte_unmap_nested(src_pte - 1);
++ rss_tot = rss[0] + rss[1];
++ add_vma_rss(dst_vma, rss_tot);
++ ub_unused_privvm_sub(dst_mm, dst_vma, rss_tot);
+ add_mm_rss(dst_mm, rss[0], rss[1]);
+ pte_unmap_unlock(dst_pte - 1, dst_ptl);
+ cond_resched();
+ if (addr != end)
+ goto again;
++
++ pb_free_list(&pbc);
+ return 0;
++
++nomem_pb:
++ pb_free(&pbc);
++nomem:
++ return -ENOMEM;
+ }
+
+ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+- pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
++ pud_t *dst_pud, pud_t *src_pud,
++ struct vm_area_struct *dst_vma,
++ struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+ {
+ pmd_t *src_pmd, *dst_pmd;
+@@ -532,14 +563,16 @@ static inline int copy_pmd_range(struct
+ if (pmd_none_or_clear_bad(src_pmd))
+ continue;
+ if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+- vma, addr, next))
++ dst_vma, vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+ return 0;
+ }
+
+ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+- pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
++ pgd_t *dst_pgd, pgd_t *src_pgd,
++ struct vm_area_struct *dst_vma,
++ struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+ {
+ pud_t *src_pud, *dst_pud;
+@@ -554,14 +587,14 @@ static inline int copy_pud_range(struct
+ if (pud_none_or_clear_bad(src_pud))
+ continue;
+ if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+- vma, addr, next))
++ dst_vma, vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pud++, src_pud++, addr = next, addr != end);
+ return 0;
+ }
+
+ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+- struct vm_area_struct *vma)
++ struct vm_area_struct *dst_vma, struct vm_area_struct *vma)
+ {
+ pgd_t *src_pgd, *dst_pgd;
+ unsigned long next;
+@@ -589,7 +622,7 @@ int copy_page_range(struct mm_struct *ds
+ if (pgd_none_or_clear_bad(src_pgd))
+ continue;
+ if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+- vma, addr, next))
++ dst_vma, vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+ return 0;
+@@ -605,6 +638,7 @@ static unsigned long zap_pte_range(struc
+ spinlock_t *ptl;
+ int file_rss = 0;
+ int anon_rss = 0;
++ int rss;
+
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ do {
+@@ -657,6 +691,7 @@ static unsigned long zap_pte_range(struc
+ file_rss--;
+ }
+ page_remove_rmap(page);
++ pb_remove_ref(page, mm);
+ tlb_remove_page(tlb, page);
+ continue;
+ }
+@@ -671,6 +706,9 @@ static unsigned long zap_pte_range(struc
+ pte_clear_full(mm, addr, pte, tlb->fullmm);
+ } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+
++ rss = -(file_rss + anon_rss);
++ ub_unused_privvm_add(mm, vma, rss);
++ sub_vma_rss(vma, rss);
+ add_mm_rss(mm, file_rss, anon_rss);
+ pte_unmap_unlock(pte - 1, ptl);
+
+@@ -883,8 +921,9 @@ struct page *follow_page(struct vm_area_
+ pte_t *ptep, pte;
+ spinlock_t *ptl;
+ struct page *page;
+- struct mm_struct *mm = vma->vm_mm;
++ struct mm_struct *mm;
+
++ mm = (flags & FOLL_KERN ? &init_mm : vma->vm_mm);
+ page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
+ if (!IS_ERR(page)) {
+ BUG_ON(flags & FOLL_GET);
+@@ -892,7 +931,10 @@ struct page *follow_page(struct vm_area_
+ }
+
+ page = NULL;
+- pgd = pgd_offset(mm, address);
++ if (flags & FOLL_KERN)
++ pgd = pgd_offset_k(address);
++ else
++ pgd = pgd_offset(mm, address);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ goto no_page_table;
+
+@@ -910,6 +952,9 @@ struct page *follow_page(struct vm_area_
+ goto out;
+ }
+
++ if (flags & FOLL_KERN)
++ goto kern;
++
+ ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+ if (!ptep)
+ goto out;
+@@ -919,6 +964,7 @@ struct page *follow_page(struct vm_area_
+ goto unlock;
+ if ((flags & FOLL_WRITE) && !pte_write(pte))
+ goto unlock;
++
+ page = vm_normal_page(vma, address, pte);
+ if (unlikely(!page))
+ goto unlock;
+@@ -936,6 +982,15 @@ unlock:
+ out:
+ return page;
+
++kern:
++ ptep = pte_offset_map(pmd, address);
++ BUG_ON(!ptep);
++ pte = *ptep;
++ BUG_ON(!pte_present(pte));
++ page = pte_page(pte);
++ pte_unmap(ptep);
++ return page;
++
+ no_page_table:
+ /*
+ * When core dumping an enormous anonymous area that nobody
+@@ -1076,12 +1131,14 @@ int get_user_pages(struct task_struct *t
+ }
+ EXPORT_SYMBOL(get_user_pages);
+
+-static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
++static int zeromap_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+ {
+ pte_t *pte;
+ spinlock_t *ptl;
++ struct mm_struct *mm;
+
++ mm = vma->vm_mm;
+ pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+@@ -1091,6 +1148,7 @@ static int zeromap_pte_range(struct mm_s
+ page_cache_get(page);
+ page_add_file_rmap(page);
+ inc_mm_counter(mm, file_rss);
++ inc_vma_rss(vma);
+ BUG_ON(!pte_none(*pte));
+ set_pte_at(mm, addr, pte, zero_pte);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+@@ -1098,35 +1156,35 @@ static int zeromap_pte_range(struct mm_s
+ return 0;
+ }
+
+-static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
++static inline int zeromap_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+ {
+ pmd_t *pmd;
+ unsigned long next;
+
+- pmd = pmd_alloc(mm, pud, addr);
++ pmd = pmd_alloc(vma->vm_mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
+ do {
+ next = pmd_addr_end(addr, end);
+- if (zeromap_pte_range(mm, pmd, addr, next, prot))
++ if (zeromap_pte_range(vma, pmd, addr, next, prot))
+ return -ENOMEM;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+ }
+
+-static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
++static inline int zeromap_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+ {
+ pud_t *pud;
+ unsigned long next;
+
+- pud = pud_alloc(mm, pgd, addr);
++ pud = pud_alloc(vma->vm_mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+- if (zeromap_pmd_range(mm, pud, addr, next, prot))
++ if (zeromap_pmd_range(vma, pud, addr, next, prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+@@ -1138,15 +1196,14 @@ int zeromap_page_range(struct vm_area_st
+ pgd_t *pgd;
+ unsigned long next;
+ unsigned long end = addr + size;
+- struct mm_struct *mm = vma->vm_mm;
+ int err;
+
+ BUG_ON(addr >= end);
+- pgd = pgd_offset(mm, addr);
++ pgd = pgd_offset(vma->vm_mm, addr);
+ flush_cache_range(vma, addr, end);
+ do {
+ next = pgd_addr_end(addr, end);
+- err = zeromap_pud_range(mm, pgd, addr, next, prot);
++ err = zeromap_pud_range(vma, pgd, addr, next, prot);
+ if (err)
+ break;
+ } while (pgd++, addr = next, addr != end);
+@@ -1172,11 +1229,14 @@ pte_t * fastcall get_locked_pte(struct m
+ * old drivers should use this, and they needed to mark their
+ * pages reserved for the old functions anyway.
+ */
+-static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot)
++static int insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot)
+ {
+ int retval;
+ pte_t *pte;
+- spinlock_t *ptl;
++ spinlock_t *ptl;
++ struct mm_struct *mm;
++
++ mm = vma->vm_mm;
+
+ retval = -EINVAL;
+ if (PageAnon(page))
+@@ -1193,6 +1253,7 @@ static int insert_page(struct mm_struct
+ /* Ok, finally just insert the thing.. */
+ get_page(page);
+ inc_mm_counter(mm, file_rss);
++ inc_vma_rss(vma);
+ page_add_file_rmap(page);
+ set_pte_at(mm, addr, pte, mk_pte(page, prot));
+
+@@ -1229,7 +1290,7 @@ int vm_insert_page(struct vm_area_struct
+ if (!page_count(page))
+ return -EINVAL;
+ vma->vm_flags |= VM_INSERTPAGE;
+- return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
++ return insert_page(vma, addr, page, vma->vm_page_prot);
+ }
+ EXPORT_SYMBOL(vm_insert_page);
+
+@@ -1438,6 +1499,7 @@ static int do_wp_page(struct mm_struct *
+ struct page *old_page, *new_page;
+ pte_t entry;
+ int ret = VM_FAULT_MINOR;
++ struct page_beancounter *pbc;
+
+ old_page = vm_normal_page(vma, address, orig_pte);
+ if (!old_page)
+@@ -1465,6 +1527,9 @@ static int do_wp_page(struct mm_struct *
+ gotten:
+ pte_unmap_unlock(page_table, ptl);
+
++ if (unlikely(pb_alloc(&pbc)))
++ goto oom_nopb;
++
+ if (unlikely(anon_vma_prepare(vma)))
+ goto oom;
+ if (old_page == ZERO_PAGE(address)) {
+@@ -1485,12 +1550,16 @@ gotten:
+ if (likely(pte_same(*page_table, orig_pte))) {
+ if (old_page) {
+ page_remove_rmap(old_page);
++ pb_remove_ref(old_page, mm);
+ if (!PageAnon(old_page)) {
+ dec_mm_counter(mm, file_rss);
+ inc_mm_counter(mm, anon_rss);
+ }
+- } else
++ } else {
++ ub_unused_privvm_dec(mm, vma);
+ inc_mm_counter(mm, anon_rss);
++ inc_vma_rss(vma);
++ }
+ flush_cache_page(vma, address, pte_pfn(orig_pte));
+ entry = mk_pte(new_page, vma->vm_page_prot);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+@@ -1499,6 +1568,7 @@ gotten:
+ lazy_mmu_prot_update(entry);
+ lru_cache_add_active(new_page);
+ page_add_anon_rmap(new_page, vma, address);
++ pb_add_ref(new_page, mm, &pbc);
+
+ /* Free the old page.. */
+ new_page = old_page;
+@@ -1508,10 +1578,13 @@ gotten:
+ page_cache_release(new_page);
+ if (old_page)
+ page_cache_release(old_page);
++ pb_free(&pbc);
+ unlock:
+ pte_unmap_unlock(page_table, ptl);
+ return ret;
+ oom:
++ pb_free(&pbc);
++oom_nopb:
+ if (old_page)
+ page_cache_release(old_page);
+ return VM_FAULT_OOM;
+@@ -1843,10 +1916,16 @@ static int do_swap_page(struct mm_struct
+ swp_entry_t entry;
+ pte_t pte;
+ int ret = VM_FAULT_MINOR;
++ struct page_beancounter *pbc;
++ cycles_t start;
+
+ if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+- goto out;
++ goto out_nostat;
+
++ if (unlikely(pb_alloc(&pbc)))
++ return VM_FAULT_OOM;
++
++ start = get_cycles();
+ entry = pte_to_swp_entry(orig_pte);
+ page = lookup_swap_cache(entry);
+ if (!page) {
+@@ -1887,6 +1966,8 @@ static int do_swap_page(struct mm_struct
+ /* The page isn't present yet, go ahead with the fault. */
+
+ inc_mm_counter(mm, anon_rss);
++ inc_vma_rss(vma);
++ ub_swapin_inc(mm);
+ pte = mk_pte(page, vma->vm_page_prot);
+ if (write_access && can_share_swap_page(page)) {
+ pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+@@ -1896,6 +1977,8 @@ static int do_swap_page(struct mm_struct
+ flush_icache_page(vma, page);
+ set_pte_at(mm, address, page_table, pte);
+ page_add_anon_rmap(page, vma, address);
++ pb_add_ref(page, mm, &pbc);
++ ub_unused_privvm_dec(mm, vma);
+
+ swap_free(entry);
+ if (vm_swap_full())
+@@ -1906,7 +1989,7 @@ static int do_swap_page(struct mm_struct
+ if (do_wp_page(mm, vma, address,
+ page_table, pmd, ptl, pte) == VM_FAULT_OOM)
+ ret = VM_FAULT_OOM;
+- goto out;
++ goto out_wp;
+ }
+
+ /* No need to invalidate - it was non-present before */
+@@ -1914,10 +1997,16 @@ static int do_swap_page(struct mm_struct
+ lazy_mmu_prot_update(pte);
+ unlock:
+ pte_unmap_unlock(page_table, ptl);
+-out:
++out_wp:
++ pb_free(&pbc);
++ spin_lock_irq(&kstat_glb_lock);
++ KSTAT_LAT_ADD(&kstat_glob.swap_in, get_cycles() - start);
++ spin_unlock_irq(&kstat_glb_lock);
++out_nostat:
+ return ret;
+ out_nomap:
+ pte_unmap_unlock(page_table, ptl);
++ pb_free(&pbc);
+ unlock_page(page);
+ page_cache_release(page);
+ return ret;
+@@ -1935,11 +2024,15 @@ static int do_anonymous_page(struct mm_s
+ struct page *page;
+ spinlock_t *ptl;
+ pte_t entry;
++ struct page_beancounter *pbc;
+
+ if (write_access) {
+ /* Allocate our own private page. */
+ pte_unmap(page_table);
+
++ if (unlikely(pb_alloc(&pbc)))
++ goto oom_nopb;
++
+ if (unlikely(anon_vma_prepare(vma)))
+ goto oom;
+ page = alloc_zeroed_user_highpage(vma, address);
+@@ -1957,6 +2050,9 @@ static int do_anonymous_page(struct mm_s
+ SetPageReferenced(page);
+ page_add_anon_rmap(page, vma, address);
+ } else {
++ if (unlikely(__pb_alloc(&pbc, GFP_ATOMIC)))
++ goto oom_nopb_locked;
++
+ /* Map the ZERO_PAGE - vm_page_prot is readonly */
+ page = ZERO_PAGE(address);
+ page_cache_get(page);
+@@ -1970,18 +2066,28 @@ static int do_anonymous_page(struct mm_s
+ page_add_file_rmap(page);
+ }
+
++ inc_vma_rss(vma);
++ pb_add_ref(page, mm, &pbc);
++ ub_unused_privvm_dec(mm, vma);
+ set_pte_at(mm, address, page_table, entry);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
+ unlock:
++ pb_free(&pbc);
+ pte_unmap_unlock(page_table, ptl);
+ return VM_FAULT_MINOR;
+ release:
+ page_cache_release(page);
+ goto unlock;
+ oom:
++ pb_free(&pbc);
++oom_nopb:
++ return VM_FAULT_OOM;
++
++oom_nopb_locked:
++ pte_unmap_unlock(page_table, ptl);
+ return VM_FAULT_OOM;
+ }
+
+@@ -2009,6 +2115,7 @@ static int do_no_page(struct mm_struct *
+ unsigned int sequence = 0;
+ int ret = VM_FAULT_MINOR;
+ int anon = 0;
++ struct page_beancounter *pbc;
+
+ pte_unmap(page_table);
+ BUG_ON(vma->vm_flags & VM_PFNMAP);
+@@ -2018,6 +2125,9 @@ static int do_no_page(struct mm_struct *
+ sequence = mapping->truncate_count;
+ smp_rmb(); /* serializes i_size against truncate_count */
+ }
++
++ if (unlikely(pb_alloc(&pbc)))
++ goto oom_nopb;
+ retry:
+ new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
+ /*
+@@ -2030,9 +2140,9 @@ retry:
+
+ /* no page was available -- either SIGBUS or OOM */
+ if (new_page == NOPAGE_SIGBUS)
+- return VM_FAULT_SIGBUS;
++ goto bus_nopg;
+ if (new_page == NOPAGE_OOM)
+- return VM_FAULT_OOM;
++ goto oom_nopg;
+
+ /*
+ * Should we do an early C-O-W break?
+@@ -2091,6 +2201,9 @@ retry:
+ inc_mm_counter(mm, file_rss);
+ page_add_file_rmap(new_page);
+ }
++ inc_vma_rss(vma);
++ pb_add_ref(new_page, mm, &pbc);
++ ub_unused_privvm_dec(mm, vma);
+ } else {
+ /* One of our sibling threads was faster, back out. */
+ page_cache_release(new_page);
+@@ -2102,10 +2215,18 @@ retry:
+ lazy_mmu_prot_update(entry);
+ unlock:
+ pte_unmap_unlock(page_table, ptl);
++ pb_free(&pbc);
+ return ret;
+ oom:
+ page_cache_release(new_page);
++oom_nopg:
++ pb_free(&pbc);
++oom_nopb:
+ return VM_FAULT_OOM;
++
++bus_nopg:
++ pb_free(&pbc);
++ return VM_FAULT_SIGBUS;
+ }
+
+ /*
+diff -uprN linux-2.6.15.orig/mm/mempool.c linux-2.6.15-ve025stab014/mm/mempool.c
+--- linux-2.6.15.orig/mm/mempool.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/mempool.c 2006-01-27 14:48:06.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <linux/mempool.h>
+ #include <linux/blkdev.h>
+ #include <linux/writeback.h>
++#include <linux/kmem_cache.h>
+
+ static void add_element(mempool_t *pool, void *element)
+ {
+@@ -78,6 +79,8 @@ mempool_t *mempool_create_node(int min_n
+ init_waitqueue_head(&pool->wait);
+ pool->alloc = alloc_fn;
+ pool->free = free_fn;
++ if (alloc_fn == mempool_alloc_slab)
++ kmem_mark_nocharge((kmem_cache_t *)pool_data);
+
+ /*
+ * First pre-allocate the guaranteed number of buffers.
+@@ -119,6 +122,7 @@ int mempool_resize(mempool_t *pool, int
+ unsigned long flags;
+
+ BUG_ON(new_min_nr <= 0);
++ gfp_mask &= ~__GFP_UBC;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (new_min_nr <= pool->min_nr) {
+@@ -212,6 +216,7 @@ void * mempool_alloc(mempool_t *pool, gf
+ gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
+ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
+ gfp_mask |= __GFP_NOWARN; /* failures are OK */
++ gfp_mask &= ~__GFP_UBC;
+
+ gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
+
+diff -uprN linux-2.6.15.orig/mm/mlock.c linux-2.6.15-ve025stab014/mm/mlock.c
+--- linux-2.6.15.orig/mm/mlock.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/mlock.c 2006-01-27 14:48:06.000000000 +0300
+@@ -10,6 +10,7 @@
+ #include <linux/mempolicy.h>
+ #include <linux/syscalls.h>
+
++#include <ub/ub_vmpages.h>
+
+ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
+ unsigned long start, unsigned long end, unsigned int newflags)
+@@ -24,6 +25,12 @@ static int mlock_fixup(struct vm_area_st
+ goto out;
+ }
+
++ if (newflags & VM_LOCKED) {
++ ret = ub_locked_charge(mm, end - start);
++ if (ret < 0)
++ goto out;
++ }
++
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
+ vma->vm_file, pgoff, vma_policy(vma));
+@@ -62,7 +69,8 @@ success:
+ pages = -pages;
+ if (!(newflags & VM_IO))
+ ret = make_pages_present(start, end);
+- }
++ } else
++ ub_locked_uncharge(mm, end - start);
+
+ vma->vm_mm->locked_vm -= pages;
+ out:
+diff -uprN linux-2.6.15.orig/mm/mmap.c linux-2.6.15-ve025stab014/mm/mmap.c
+--- linux-2.6.15.orig/mm/mmap.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/mmap.c 2006-01-27 14:48:06.000000000 +0300
+@@ -24,14 +24,18 @@
+ #include <linux/mount.h>
+ #include <linux/mempolicy.h>
+ #include <linux/rmap.h>
++#include <linux/virtinfo.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlb.h>
+
++#include <ub/ub_vmpages.h>
++
+ static void unmap_region(struct mm_struct *mm,
+ struct vm_area_struct *vma, struct vm_area_struct *prev,
+ unsigned long start, unsigned long end);
++static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft);
+
+ /*
+ * WARNING: the debugging will use recursive algorithms so never enable this
+@@ -86,6 +90,16 @@ int __vm_enough_memory(long pages, int c
+
+ vm_acct_memory(pages);
+
++ switch (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_ENOUGHMEM,
++ (void *)pages)
++ & (NOTIFY_OK | NOTIFY_FAIL)) {
++ case NOTIFY_OK:
++ return 0;
++ case NOTIFY_FAIL:
++ vm_unacct_memory(pages);
++ return -ENOMEM;
++ }
++
+ /*
+ * Sometimes we want to use more memory than we have
+ */
+@@ -200,11 +214,16 @@ static struct vm_area_struct *remove_vma
+ struct vm_area_struct *next = vma->vm_next;
+
+ might_sleep();
++
++ ub_memory_uncharge(vma->vm_mm, vma->vm_end - vma->vm_start,
++ vma->vm_flags, vma->vm_file);
+ if (vma->vm_ops && vma->vm_ops->close)
+ vma->vm_ops->close(vma);
+ if (vma->vm_file)
+ fput(vma->vm_file);
+ mpol_free(vma_policy(vma));
++ if (get_vma_rss(vma))
++ warn_bad_rss(vma, 0);
+ kmem_cache_free(vm_area_cachep, vma);
+ return next;
+ }
+@@ -241,7 +260,7 @@ asmlinkage unsigned long sys_brk(unsigne
+ goto out;
+
+ /* Ok, looks good - let it rip. */
+- if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
++ if (__do_brk(oldbrk, newbrk-oldbrk, UB_HARD) != oldbrk)
+ goto out;
+ set_brk:
+ mm->brk = brk;
+@@ -725,7 +744,7 @@ struct vm_area_struct *vma_merge(struct
+ else
+ next = mm->mmap;
+ area = next;
+- if (next && next->vm_end == end) /* cases 6, 7, 8 */
++ if (next && next->vm_end == end) /* cases 6, 7, 8 */
+ next = next->vm_next;
+
+ /*
+@@ -745,11 +764,22 @@ struct vm_area_struct *vma_merge(struct
+ is_mergeable_anon_vma(prev->anon_vma,
+ next->anon_vma)) {
+ /* cases 1, 6 */
++ add_vma_rss(prev, get_vma_rss(next));
++ if (area != next) /* case 6 */
++ add_vma_rss(prev, get_vma_rss(area));
+ vma_adjust(prev, prev->vm_start,
+ next->vm_end, prev->vm_pgoff, NULL);
+- } else /* cases 2, 5, 7 */
++ } else { /* cases 2, 5, 7 */
++ if (next && addr == next->vm_start) { /* case 5 */
++ unsigned long rss;
++ rss = pages_in_vma_range(next, addr, end);
++ sub_vma_rss(next, rss);
++ add_vma_rss(prev, rss);
++ } else if (area != next) /* case 7 */
++ add_vma_rss(prev, get_vma_rss(area));
+ vma_adjust(prev, prev->vm_start,
+ end, prev->vm_pgoff, NULL);
++ }
+ return prev;
+ }
+
+@@ -760,12 +790,19 @@ struct vm_area_struct *vma_merge(struct
+ mpol_equal(policy, vma_policy(next)) &&
+ can_vma_merge_before(next, vm_flags,
+ anon_vma, file, pgoff+pglen)) {
+- if (prev && addr < prev->vm_end) /* case 4 */
++ if (prev && addr < prev->vm_end) { /* case 4 */
++ unsigned long rss;
++ rss = pages_in_vma_range(prev, addr, end);
++ sub_vma_rss(prev, rss);
++ add_vma_rss(next, rss);
+ vma_adjust(prev, prev->vm_start,
+ addr, prev->vm_pgoff, NULL);
+- else /* cases 3, 8 */
++ } else { /* cases 3, 8 */
++ if (area != next) /* case 8 */
++ add_vma_rss(area, get_vma_rss(next));
+ vma_adjust(area, addr, next->vm_end,
+ next->vm_pgoff - pglen, NULL);
++ }
+ return area;
+ }
+
+@@ -1032,6 +1069,10 @@ munmap_back:
+ }
+ }
+
++ if (ub_memory_charge(mm, len, vm_flags, file,
++ (flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD)))
++ goto charge_error;
++
+ /*
+ * Can we just expand an old private anonymous mapping?
+ * The VM_SHARED test is necessary because shmem_zero_setup
+@@ -1047,7 +1088,8 @@ munmap_back:
+ * specific mapper. the address has already been validated, but
+ * not unmapped, but the maps are removed from the list.
+ */
+- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
++ (flags & MAP_EXECPRIO ? __GFP_SOFT_UBC : 0));
+ if (!vma) {
+ error = -ENOMEM;
+ goto unacct_error;
+@@ -1141,6 +1183,8 @@ unmap_and_free_vma:
+ free_vma:
+ kmem_cache_free(vm_area_cachep, vma);
+ unacct_error:
++ ub_memory_uncharge(mm, len, vm_flags, file);
++charge_error:
+ if (charged)
+ vm_unacct_memory(charged);
+ return error;
+@@ -1470,12 +1514,16 @@ static int acct_stack_growth(struct vm_a
+ return -ENOMEM;
+ }
+
++ if (ub_memory_charge(mm, grow << PAGE_SHIFT, vma->vm_flags,
++ vma->vm_file, UB_SOFT))
++ goto fail_charge;
++
+ /*
+ * Overcommit.. This must be the final test, as it will
+ * update security statistics.
+ */
+ if (security_vm_enough_memory(grow))
+- return -ENOMEM;
++ goto fail_sec;
+
+ /* Ok, everything looks good - let it rip */
+ mm->total_vm += grow;
+@@ -1483,6 +1531,11 @@ static int acct_stack_growth(struct vm_a
+ mm->locked_vm += grow;
+ vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
+ return 0;
++
++fail_sec:
++ ub_memory_uncharge(mm, grow << PAGE_SHIFT, vma->vm_flags, vma->vm_file);
++fail_charge:
++ return -ENOMEM;
+ }
+
+ #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
+@@ -1743,6 +1796,10 @@ int split_vma(struct mm_struct * mm, str
+ else
+ vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+
++ /* protected with mmap sem */
++ set_vma_rss(vma, pages_in_vma(vma));
++ set_vma_rss(new, pages_in_vma(new));
++
+ return 0;
+ }
+
+@@ -1838,7 +1895,7 @@ static inline void verify_mm_writelocked
+ * anonymous maps. eventually we may be able to do some
+ * brk-specific accounting here.
+ */
+-unsigned long do_brk(unsigned long addr, unsigned long len)
++static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft)
+ {
+ struct mm_struct * mm = current->mm;
+ struct vm_area_struct * vma, * prev;
+@@ -1890,11 +1947,14 @@ unsigned long do_brk(unsigned long addr,
+ if (mm->map_count > sysctl_max_map_count)
+ return -ENOMEM;
+
+- if (security_vm_enough_memory(len >> PAGE_SHIFT))
+- return -ENOMEM;
+-
+ flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
++ if (ub_memory_charge(mm, len, flags, NULL, soft))
++ goto fail_charge;
++
++ if (security_vm_enough_memory(len >> PAGE_SHIFT))
++ goto fail_sec;
++
+ /* Can we just expand an old private anonymous mapping? */
+ if (vma_merge(mm, prev, addr, addr + len, flags,
+ NULL, NULL, pgoff, NULL))
+@@ -1903,11 +1963,11 @@ unsigned long do_brk(unsigned long addr,
+ /*
+ * create a vma struct for an anonymous mapping
+ */
+- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+- if (!vma) {
+- vm_unacct_memory(len >> PAGE_SHIFT);
+- return -ENOMEM;
+- }
++ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
++ (soft == UB_SOFT ? __GFP_SOFT_UBC : 0));
++ if (!vma)
++ goto fail_alloc;
++
+ memset(vma, 0, sizeof(*vma));
+
+ vma->vm_mm = mm;
+@@ -1924,8 +1984,19 @@ out:
+ make_pages_present(addr, addr + len);
+ }
+ return addr;
++
++fail_alloc:
++ vm_unacct_memory(len >> PAGE_SHIFT);
++fail_sec:
++ ub_memory_uncharge(mm, len, flags, NULL);
++fail_charge:
++ return -ENOMEM;
+ }
+
++unsigned long do_brk(unsigned long addr, unsigned long len)
++{
++ return __do_brk(addr, len, UB_SOFT);
++}
+ EXPORT_SYMBOL(do_brk);
+
+ /* Release all mmaps. */
+@@ -2035,6 +2106,7 @@ struct vm_area_struct *copy_vma(struct v
+ new_vma->vm_start = addr;
+ new_vma->vm_end = addr + len;
+ new_vma->vm_pgoff = pgoff;
++ set_vma_rss(new_vma, 0);
+ if (new_vma->vm_file)
+ get_file(new_vma->vm_file);
+ if (new_vma->vm_ops && new_vma->vm_ops->open)
+diff -uprN linux-2.6.15.orig/mm/mprotect.c linux-2.6.15-ve025stab014/mm/mprotect.c
+--- linux-2.6.15.orig/mm/mprotect.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/mprotect.c 2006-01-27 14:48:06.000000000 +0300
+@@ -25,6 +25,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++
+ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, pgprot_t newprot)
+ {
+@@ -109,12 +111,20 @@ mprotect_fixup(struct vm_area_struct *vm
+ pgprot_t newprot;
+ pgoff_t pgoff;
+ int error;
++ unsigned long ch_size;
++ int ch_dir;
+
+ if (newflags == oldflags) {
+ *pprev = vma;
+ return 0;
+ }
+
++ error = -ENOMEM;
++ ch_size = nrpages - pages_in_vma_range(vma, start, end);
++ ch_dir = ub_protected_charge(mm, ch_size, newflags, vma);
++ if (ch_dir == PRIVVM_ERROR)
++ goto fail_ch;
++
+ /*
+ * If we make a private mapping writable we increase our commit;
+ * but (without finer accounting) cannot reduce our commit if we
+@@ -127,7 +137,7 @@ mprotect_fixup(struct vm_area_struct *vm
+ if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
+ charged = nrpages;
+ if (security_vm_enough_memory(charged))
+- return -ENOMEM;
++ goto fail_sec;
+ newflags |= VM_ACCOUNT;
+ }
+ }
+@@ -169,10 +179,16 @@ success:
+ change_protection(vma, start, end, newprot);
+ vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
+ vm_stat_account(mm, newflags, vma->vm_file, nrpages);
++ if (ch_dir == PRIVVM_TO_SHARED)
++ __ub_unused_privvm_dec(mm, ch_size);
+ return 0;
+
+ fail:
+ vm_unacct_memory(charged);
++fail_sec:
++ if (ch_dir == PRIVVM_TO_PRIVATE)
++ __ub_unused_privvm_dec(mm, ch_size);
++fail_ch:
+ return error;
+ }
+
+diff -uprN linux-2.6.15.orig/mm/mremap.c linux-2.6.15-ve025stab014/mm/mremap.c
+--- linux-2.6.15.orig/mm/mremap.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/mremap.c 2006-01-27 14:48:06.000000000 +0300
+@@ -22,6 +22,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++
+ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
+ {
+ pgd_t *pgd;
+@@ -105,6 +107,8 @@ static void move_ptes(struct vm_area_str
+ pte = ptep_clear_flush(vma, old_addr, old_pte);
+ /* ZERO_PAGE can be dependant on virtual addr */
+ pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
++ dec_vma_rss(vma);
++ inc_vma_rss(new_vma);
+ set_pte_at(mm, new_addr, new_pte, pte);
+ }
+
+@@ -165,17 +169,21 @@ static unsigned long move_vma(struct vm_
+ unsigned long hiwater_vm;
+ int split = 0;
+
++ if (ub_memory_charge(mm, new_len, vm_flags,
++ vma->vm_file, UB_HARD))
++ goto err;
++
+ /*
+ * We'd prefer to avoid failure later on in do_munmap:
+ * which may split one vma into three before unmapping.
+ */
+ if (mm->map_count >= sysctl_max_map_count - 3)
+- return -ENOMEM;
++ goto err_nomem;
+
+ new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
+ new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
+ if (!new_vma)
+- return -ENOMEM;
++ goto err_nomem;
+
+ moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
+ if (moved_len < old_len) {
+@@ -234,7 +242,13 @@ static unsigned long move_vma(struct vm_
+ new_addr + new_len);
+ }
+
+- return new_addr;
++ if (new_addr != -ENOMEM)
++ return new_addr;
++
++err_nomem:
++ ub_memory_uncharge(mm, new_len, vm_flags, vma->vm_file);
++err:
++ return -ENOMEM;
+ }
+
+ /*
+@@ -360,6 +374,11 @@ unsigned long do_mremap(unsigned long ad
+ if (max_addr - addr >= new_len) {
+ int pages = (new_len - old_len) >> PAGE_SHIFT;
+
++ ret = -ENOMEM;
++ if (ub_memory_charge(mm, new_len, vma->vm_flags,
++ vma->vm_file, UB_HARD))
++ goto out;
++
+ vma_adjust(vma, vma->vm_start,
+ addr + new_len, vma->vm_pgoff, NULL);
+
+diff -uprN linux-2.6.15.orig/mm/oom_kill.c linux-2.6.15-ve025stab014/mm/oom_kill.c
+--- linux-2.6.15.orig/mm/oom_kill.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/oom_kill.c 2006-01-27 14:48:08.000000000 +0300
+@@ -22,6 +22,9 @@
+ #include <linux/jiffies.h>
+ #include <linux/cpuset.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_oom.h>
++
+ /* #define DEBUG */
+
+ /**
+@@ -48,6 +51,9 @@ unsigned long badness(struct task_struct
+ unsigned long points, cpu_time, run_time, s;
+ struct list_head *tsk;
+
++ if (p->flags & PF_SWAPOFF)
++ return ULONG_MAX;
++
+ if (!p->mm)
+ return 0;
+
+@@ -117,14 +123,19 @@ unsigned long badness(struct task_struct
+ * Adjust the score by oomkilladj.
+ */
+ if (p->oomkilladj) {
+- if (p->oomkilladj > 0)
+- points <<= p->oomkilladj;
+- else
++ if (p->oomkilladj > 0) {
++ unsigned long long points_long;
++ points_long =
++ (unsigned long long)points << p->oomkilladj;
++ points = ULONG_MAX;
++ if (points_long < ULONG_MAX)
++ points = points_long;
++ } else
+ points >>= -(p->oomkilladj);
+ }
+
+ #ifdef DEBUG
+- printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n",
++ printk(KERN_DEBUG "OOMkill: task %d (%s) got %lu points\n",
+ p->pid, p->comm, points);
+ #endif
+ return points;
+@@ -141,39 +152,60 @@ static struct task_struct * select_bad_p
+ unsigned long maxpoints = 0;
+ struct task_struct *g, *p;
+ struct task_struct *chosen = NULL;
++ int chosen_oomadj = OOM_DISABLE;
+ struct timespec uptime;
++ struct user_beancounter *ub;
+
++retry:
++ ub = ub_oom_select_worst();
+ do_posix_clock_monotonic_gettime(&uptime);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ unsigned long points;
+ int releasing;
+
+ /* skip the init task with pid == 1 */
+ if (p->pid == 1)
+ continue;
+- if (p->oomkilladj == OOM_DISABLE)
+- continue;
+ /* If p's nodes don't overlap ours, it won't help to kill p. */
+ if (!cpuset_excl_nodes_overlap(p))
+ continue;
++ if (!ub_oom_task_match(p, ub))
++ continue;
+
+- /*
+- * This is in the process of releasing memory so for wait it
+- * to finish before killing some other task by mistake.
+- */
+ releasing = test_tsk_thread_flag(p, TIF_MEMDIE) ||
+ p->flags & PF_EXITING;
+- if (releasing && !(p->flags & PF_DEAD))
+- return ERR_PTR(-1UL);
+- if (p->flags & PF_SWAPOFF)
+- return p;
++ /* Skip the process, its killing will not help */
++ if (releasing)
++ continue;
+
+ points = badness(p, uptime.tv_sec);
++ if (p->oomkilladj == OOM_DISABLE &&
++ chosen_oomadj != OOM_DISABLE)
++ continue;
++ if (points == ULONG_MAX) {
++ /* There is no better choice. Let's kill */
++ chosen = p;
++ goto done;
++ }
+ if (points > maxpoints || !chosen) {
+ chosen = p;
++ chosen_oomadj = p->oomkilladj;
+ maxpoints = points;
+ }
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
++
++ /* Found nothing?!?! Either we hang forever, or we panic. */
++ if (!chosen) {
++ if (ub_oom_panic(ub)) {
++ read_unlock(&tasklist_lock);
++ panic("Out of memory and no killable processes...\n");
++ }
++
++ put_beancounter(ub);
++ goto retry;
++ }
++done:
++ put_beancounter(ub);
+ return chosen;
+ }
+
+@@ -182,7 +214,7 @@ static struct task_struct * select_bad_p
+ * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
+ * we select a process with CAP_SYS_RAW_IO set).
+ */
+-static void __oom_kill_task(task_t *p)
++static void __oom_kill_task(task_t *p, struct mm_struct *mm)
+ {
+ if (p->pid == 1) {
+ WARN_ON(1);
+@@ -208,7 +240,7 @@ static void __oom_kill_task(task_t *p)
+ */
+ p->time_slice = HZ;
+ set_tsk_thread_flag(p, TIF_MEMDIE);
+-
++ ub_oom_kill_task(p, mm);
+ force_sig(SIGKILL, p);
+ }
+
+@@ -224,15 +256,15 @@ static struct mm_struct *oom_kill_task(t
+ return NULL;
+ }
+
+- __oom_kill_task(p);
++ __oom_kill_task(p, mm);
+ /*
+ * kill all processes that share the ->mm (i.e. all threads),
+ * but are in a different thread group
+ */
+- do_each_thread(g, q)
++ do_each_thread_all(g, q) {
+ if (q->mm == mm && q->tgid != p->tgid)
+- __oom_kill_task(q);
+- while_each_thread(g, q);
++ __oom_kill_task(q, mm);
++ } while_each_thread_all(g, q);
+
+ return mm;
+ }
+@@ -268,6 +300,9 @@ void out_of_memory(gfp_t gfp_mask, int o
+ struct mm_struct *mm = NULL;
+ task_t * p;
+
++ if (ub_oom_start())
++ return;
++
+ if (printk_ratelimit()) {
+ printk("oom-killer: gfp_mask=0x%x, order=%d\n",
+ gfp_mask, order);
+@@ -277,28 +312,23 @@ void out_of_memory(gfp_t gfp_mask, int o
+ read_lock(&tasklist_lock);
+ retry:
+ p = select_bad_process();
+-
+ if (PTR_ERR(p) == -1UL)
+ goto out;
+
+- /* Found nothing?!?! Either we hang forever, or we panic. */
+- if (!p) {
+- read_unlock(&tasklist_lock);
+- panic("Out of memory and no killable processes...\n");
+- }
+-
+ mm = oom_kill_process(p);
+ if (!mm)
+ goto retry;
+
+ out:
+ read_unlock(&tasklist_lock);
++ ub_oom_stop();
+ if (mm)
+ mmput(mm);
+
+ /*
+ * Give "p" a good chance of killing itself before we
+- * retry to allocate memory.
++ * retry to allocate memory or exit in case of suicide.
+ */
+- schedule_timeout_interruptible(1);
++ if (!test_thread_flag(TIF_MEMDIE))
++ schedule_timeout_interruptible(1);
+ }
+diff -uprN linux-2.6.15.orig/mm/page_alloc.c linux-2.6.15-ve025stab014/mm/page_alloc.c
+--- linux-2.6.15.orig/mm/page_alloc.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/page_alloc.c 2006-01-27 14:48:08.000000000 +0300
+@@ -40,6 +40,8 @@
+ #include <asm/tlbflush.h>
+ #include "internal.h"
+
++#include <ub/ub_mem.h>
++
+ /*
+ * MCD - HACK: Find somewhere to initialize this EARLY, or make this
+ * initializer cleaner
+@@ -49,6 +51,7 @@ EXPORT_SYMBOL(node_online_map);
+ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
+ EXPORT_SYMBOL(node_possible_map);
+ struct pglist_data *pgdat_list __read_mostly;
++EXPORT_SYMBOL(pgdat_list);
+ unsigned long totalram_pages __read_mostly;
+ unsigned long totalhigh_pages __read_mostly;
+ long nr_swap_pages;
+@@ -415,6 +418,7 @@ void __free_pages_ok(struct page *page,
+ list_add(&page->lru, &list);
+ mod_page_state(pgfree, 1 << order);
+ kernel_map_pages(page, 1<<order, 0);
++ ub_page_uncharge(page, order);
+ free_pages_bulk(page_zone(page), 1, &list, order);
+ }
+
+@@ -699,6 +703,7 @@ static void fastcall free_hot_cold_page(
+ pcp->count++;
+ if (pcp->count >= pcp->high)
+ pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
++ ub_page_uncharge(page, 0);
+ local_irq_restore(flags);
+ put_cpu();
+ }
+@@ -854,6 +859,26 @@ get_page_from_freelist(gfp_t gfp_mask, u
+ return page;
+ }
+
++static void __alloc_collect_stats(unsigned int gfp_mask,
++ unsigned int order, struct page *page, cycles_t time)
++{
++ int ind;
++ unsigned long flags;
++
++ time = get_cycles() - time;
++ if (!(gfp_mask & __GFP_WAIT))
++ ind = 0;
++ else if (!(gfp_mask & __GFP_HIGHMEM))
++ ind = (order > 0 ? 2 : 1);
++ else
++ ind = (order > 0 ? 4 : 3);
++ spin_lock_irqsave(&kstat_glb_lock, flags);
++ KSTAT_LAT_ADD(&kstat_glob.alloc_lat[ind], time);
++ if (!page)
++ kstat_glob.alloc_fails[ind]++;
++ spin_unlock_irqrestore(&kstat_glb_lock, flags);
++}
++
+ /*
+ * This is the 'heart' of the zoned buddy allocator.
+ */
+@@ -869,6 +894,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
+ int do_retry;
+ int alloc_flags;
+ int did_some_progress;
++ cycles_t start;
+
+ might_sleep_if(wait);
+
+@@ -880,6 +906,7 @@ restart:
+ return NULL;
+ }
+
++ start = get_cycles();
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
+ if (page)
+@@ -997,6 +1024,7 @@ rebalance:
+ }
+
+ nopage:
++ __alloc_collect_stats(gfp_mask, order, page, start);
+ if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
+ printk(KERN_WARNING "%s: page allocation failure."
+ " order:%d, mode:0x%x\n",
+@@ -1004,7 +1032,13 @@ nopage:
+ dump_stack();
+ show_mem();
+ }
++ return NULL;
++
+ got_pg:
++ if (ub_page_charge(page, order, gfp_mask)) {
++ __free_pages(page, order);
++ page = NULL;
++ }
+ return page;
+ }
+
+@@ -2304,7 +2338,10 @@ static void *vmstat_start(struct seq_fil
+ m->private = ps;
+ if (!ps)
+ return ERR_PTR(-ENOMEM);
+- get_full_page_state(ps);
++ if (ve_is_super(get_exec_env()))
++ get_full_page_state(ps);
++ else
++ memset(ps, 0, sizeof(*ps));
+ ps->pgpgin /= 2; /* sectors -> kbytes */
+ ps->pgpgout /= 2;
+ return (unsigned long *)ps + *pos;
+diff -uprN linux-2.6.15.orig/mm/rmap.c linux-2.6.15-ve025stab014/mm/rmap.c
+--- linux-2.6.15.orig/mm/rmap.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/rmap.c 2006-01-27 14:48:06.000000000 +0300
+@@ -55,6 +55,8 @@
+
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++
+ //#define RMAP_DEBUG /* can be enabled only for debugging */
+
+ kmem_cache_t *anon_vma_cachep;
+@@ -179,7 +181,8 @@ static void anon_vma_ctor(void *data, km
+ void __init anon_vma_init(void)
+ {
+ anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
+- 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL);
++ 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_UBC,
++ anon_vma_ctor, NULL);
+ }
+
+ /*
+@@ -562,7 +565,11 @@ static int try_to_unmap_one(struct page
+ } else
+ dec_mm_counter(mm, file_rss);
+
++ dec_vma_rss(vma);
+ page_remove_rmap(page);
++ ub_unused_privvm_inc(mm, vma);
++ ub_unmap_inc(mm);
++ pb_remove_ref(page, mm);
+ page_cache_release(page);
+
+ out_unmap:
+@@ -653,8 +660,12 @@ static void try_to_unmap_cluster(unsigne
+ set_page_dirty(page);
+
+ page_remove_rmap(page);
++ ub_unmap_inc(mm);
++ pb_remove_ref(page, mm);
++ ub_unused_privvm_inc(mm, vma);
+ page_cache_release(page);
+ dec_mm_counter(mm, file_rss);
++ dec_vma_rss(vma);
+ (*mapcount)--;
+ }
+ pte_unmap_unlock(pte - 1, ptl);
+diff -uprN linux-2.6.15.orig/mm/shmem.c linux-2.6.15-ve025stab014/mm/shmem.c
+--- linux-2.6.15.orig/mm/shmem.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/shmem.c 2006-01-27 14:48:08.000000000 +0300
+@@ -49,6 +49,8 @@
+ #include <asm/div64.h>
+ #include <asm/pgtable.h>
+
++#include <ub/ub_vmpages.h>
++
+ /* This magic number is used in glibc for posix shared memory */
+ #define TMPFS_MAGIC 0x01021994
+
+@@ -210,7 +212,7 @@ static void shmem_free_blocks(struct ino
+ *
+ * It has to be called with the spinlock held.
+ */
+-static void shmem_recalc_inode(struct inode *inode)
++static void shmem_recalc_inode(struct inode *inode, long swp_freed)
+ {
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ long freed;
+@@ -220,6 +222,8 @@ static void shmem_recalc_inode(struct in
+ info->alloced -= freed;
+ shmem_unacct_blocks(info->flags, freed);
+ shmem_free_blocks(inode, freed);
++ if (freed > swp_freed)
++ ub_tmpfs_respages_sub(info, freed - swp_freed);
+ }
+ }
+
+@@ -325,6 +329,11 @@ static void shmem_swp_set(struct shmem_i
+ struct page *page = kmap_atomic_to_page(entry);
+ set_page_private(page, page_private(page) + incdec);
+ }
++
++ if (incdec == 1)
++ ub_tmpfs_respages_dec(info);
++ else
++ ub_tmpfs_respages_inc(info);
+ }
+
+ /*
+@@ -346,6 +355,9 @@ static swp_entry_t *shmem_swp_alloc(stru
+ ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+ return ERR_PTR(-EINVAL);
+
++ if (ub_shmpages_charge(info, index - info->next_index + 1))
++ return ERR_PTR(-ENOSPC);
++
+ while (!(entry = shmem_swp_entry(info, index, &page))) {
+ if (sgp == SGP_READ)
+ return shmem_swp_map(ZERO_PAGE(0));
+@@ -366,7 +378,8 @@ static swp_entry_t *shmem_swp_alloc(stru
+ }
+
+ spin_unlock(&info->lock);
+- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
++ page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) |
++ __GFP_ZERO | __GFP_UBC);
+ if (page)
+ set_page_private(page, 0);
+ spin_lock(&info->lock);
+@@ -482,6 +495,7 @@ static void shmem_truncate(struct inode
+ return;
+
+ spin_lock(&info->lock);
++ ub_shmpages_uncharge(info, info->next_index - idx);
+ info->flags |= SHMEM_TRUNCATE;
+ limit = info->next_index;
+ info->next_index = idx;
+@@ -602,7 +616,7 @@ done2:
+ info->swapped -= nr_swaps_freed;
+ if (nr_pages_to_free)
+ shmem_free_blocks(inode, nr_pages_to_free);
+- shmem_recalc_inode(inode);
++ shmem_recalc_inode(inode, nr_swaps_freed);
+ spin_unlock(&info->lock);
+
+ /*
+@@ -680,6 +694,7 @@ static void shmem_delete_inode(struct in
+ sbinfo->free_inodes++;
+ spin_unlock(&sbinfo->stat_lock);
+ }
++ shmi_ub_put(info);
+ clear_inode(inode);
+ }
+
+@@ -801,6 +816,12 @@ int shmem_unuse(swp_entry_t entry, struc
+ return found;
+ }
+
++#ifdef CONFIG_USER_RESOURCE
++#define shm_get_swap_page(info) (get_swap_page((info)->shmi_ub))
++#else
++#define shm_get_swap_page(info) (get_swap_page(NULL))
++#endif
++
+ /*
+ * Move the page from the page cache to the swap cache.
+ */
+@@ -821,12 +842,12 @@ static int shmem_writepage(struct page *
+ info = SHMEM_I(inode);
+ if (info->flags & VM_LOCKED)
+ goto redirty;
+- swap = get_swap_page();
++ swap = shm_get_swap_page(info);
+ if (!swap.val)
+ goto redirty;
+
+ spin_lock(&info->lock);
+- shmem_recalc_inode(inode);
++ shmem_recalc_inode(inode, 0);
+ if (index >= info->next_index) {
+ BUG_ON(!(info->flags & SHMEM_TRUNCATE));
+ goto unlock;
+@@ -964,7 +985,7 @@ repeat:
+ goto failed;
+
+ spin_lock(&info->lock);
+- shmem_recalc_inode(inode);
++ shmem_recalc_inode(inode, 0);
+ entry = shmem_swp_alloc(info, idx, sgp);
+ if (IS_ERR(entry)) {
+ spin_unlock(&info->lock);
+@@ -1132,6 +1153,7 @@ repeat:
+ spin_unlock(&info->lock);
+ flush_dcache_page(filepage);
+ SetPageUptodate(filepage);
++ ub_tmpfs_respages_inc(info);
+ }
+ done:
+ if (*pagep != filepage) {
+@@ -1233,28 +1255,6 @@ shmem_get_policy(struct vm_area_struct *
+ }
+ #endif
+
+-int shmem_lock(struct file *file, int lock, struct user_struct *user)
+-{
+- struct inode *inode = file->f_dentry->d_inode;
+- struct shmem_inode_info *info = SHMEM_I(inode);
+- int retval = -ENOMEM;
+-
+- spin_lock(&info->lock);
+- if (lock && !(info->flags & VM_LOCKED)) {
+- if (!user_shm_lock(inode->i_size, user))
+- goto out_nomem;
+- info->flags |= VM_LOCKED;
+- }
+- if (!lock && (info->flags & VM_LOCKED) && user) {
+- user_shm_unlock(inode->i_size, user);
+- info->flags &= ~VM_LOCKED;
+- }
+- retval = 0;
+-out_nomem:
+- spin_unlock(&info->lock);
+- return retval;
+-}
+-
+ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+ file_accessed(file);
+@@ -1291,6 +1291,7 @@ shmem_get_inode(struct super_block *sb,
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ info = SHMEM_I(inode);
+ memset(info, 0, (char *)inode - (char *)info);
++ shmi_ub_set(info, get_exec_ub());
+ spin_lock_init(&info->lock);
+ INIT_LIST_HEAD(&info->swaplist);
+
+@@ -2120,6 +2121,10 @@ static struct vm_operations_struct shmem
+ #endif
+ };
+
++int is_shmem_mapping(struct address_space *map)
++{
++ return (map != NULL && map->a_ops == &shmem_aops);
++}
+
+ static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+@@ -2133,7 +2138,13 @@ static struct file_system_type tmpfs_fs_
+ .get_sb = shmem_get_sb,
+ .kill_sb = kill_litter_super,
+ };
++EXPORT_SYMBOL(tmpfs_fs_type);
++
++#ifdef CONFIG_VE
++#define shm_mnt (get_exec_env()->shmem_mnt)
++#else
+ static struct vfsmount *shm_mnt;
++#endif
+
+ static int __init init_tmpfs(void)
+ {
+@@ -2170,6 +2181,36 @@ out3:
+ }
+ module_init(init_tmpfs)
+
++static inline int shm_charge_ahead(struct inode *inode)
++{
++#ifdef CONFIG_USER_RESOURCE
++ struct shmem_inode_info *info = SHMEM_I(inode);
++ unsigned long idx;
++ swp_entry_t *entry;
++
++ if (!inode->i_size)
++ return 0;
++ idx = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
++ /*
++ * Just touch info to allocate space for entry and
++ * make all UBC checks
++ */
++ spin_lock(&info->lock);
++ entry = shmem_swp_alloc(info, idx, SGP_CACHE);
++ if (IS_ERR(entry))
++ goto err;
++ shmem_swp_unmap(entry);
++ spin_unlock(&info->lock);
++ return 0;
++
++err:
++ spin_unlock(&info->lock);
++ return PTR_ERR(entry);
++#else
++ return 0;
++#endif
++}
++
+ /*
+ * shmem_file_setup - get an unlinked file living in tmpfs
+ *
+@@ -2217,6 +2258,10 @@ struct file *shmem_file_setup(char *name
+ d_instantiate(dentry, inode);
+ inode->i_size = size;
+ inode->i_nlink = 0; /* It is unlinked */
++ error = shm_charge_ahead(inode);
++ if (error)
++ goto close_file;
++
+ file->f_vfsmnt = mntget(shm_mnt);
+ file->f_dentry = dentry;
+ file->f_mapping = inode->i_mapping;
+@@ -2249,6 +2294,8 @@ int shmem_zero_setup(struct vm_area_stru
+
+ if (vma->vm_file)
+ fput(vma->vm_file);
++ else if (vma->vm_flags & VM_WRITE)
++ __ub_unused_privvm_dec(vma->vm_mm, size >> PAGE_SHIFT);
+ vma->vm_file = file;
+ vma->vm_ops = &shmem_vm_ops;
+ return 0;
+diff -uprN linux-2.6.15.orig/mm/slab.c linux-2.6.15-ve025stab014/mm/slab.c
+--- linux-2.6.15.orig/mm/slab.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/slab.c 2006-01-27 14:48:08.000000000 +0300
+@@ -103,33 +103,19 @@
+ #include <linux/rcupdate.h>
+ #include <linux/string.h>
+ #include <linux/nodemask.h>
++#include <linux/kmem_slab.h>
++#include <linux/kmem_cache.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ #include <asm/page.h>
+
+-/*
+- * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+- * SLAB_RED_ZONE & SLAB_POISON.
+- * 0 for faster, smaller code (especially in the critical paths).
+- *
+- * STATS - 1 to collect stats for /proc/slabinfo.
+- * 0 for faster, smaller code (especially in the critical paths).
+- *
+- * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+- */
+-
+-#ifdef CONFIG_DEBUG_SLAB
+-#define DEBUG 1
+-#define STATS 1
+-#define FORCED_DEBUG 1
+-#else
+-#define DEBUG 0
+-#define STATS 0
+-#define FORCED_DEBUG 0
+-#endif
++#include <ub/ub_mem.h>
+
++#define DEBUG SLAB_DEBUG
++#define STATS SLAB_STATS
++#define FORCED_DEBUG SLAB_FORCED_DEBUG
+
+ /* Shouldn't this be in a header file somewhere? */
+ #define BYTES_PER_WORD sizeof(void *)
+@@ -172,140 +158,27 @@
+ SLAB_NO_REAP | SLAB_CACHE_DMA | \
+ SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
+ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+- SLAB_DESTROY_BY_RCU)
++ SLAB_DESTROY_BY_RCU | SLAB_UBC | SLAB_NO_CHARGE)
+ #else
+ # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
+ SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
+ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+- SLAB_DESTROY_BY_RCU)
++ SLAB_DESTROY_BY_RCU | SLAB_UBC | SLAB_NO_CHARGE)
+ #endif
+
+-/*
+- * kmem_bufctl_t:
+- *
+- * Bufctl's are used for linking objs within a slab
+- * linked offsets.
+- *
+- * This implementation relies on "struct page" for locating the cache &
+- * slab an object belongs to.
+- * This allows the bufctl structure to be small (one int), but limits
+- * the number of objects a slab (not a cache) can contain when off-slab
+- * bufctls are used. The limit is the size of the largest general cache
+- * that does not use off-slab slabs.
+- * For 32bit archs with 4 kB pages, is this 56.
+- * This is not serious, as it is only for large objects, when it is unwise
+- * to have too many per slab.
+- * Note: This limit can be raised by introducing a general cache whose size
+- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+- */
+-
+-typedef unsigned int kmem_bufctl_t;
+-#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
+-#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
+-#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)
+-
+ /* Max number of objs-per-slab for caches which use off-slab slabs.
+ * Needed to avoid a possible looping condition in cache_grow().
+ */
+ static unsigned long offslab_limit;
+
+ /*
+- * struct slab
+- *
+- * Manages the objs in a slab. Placed either at the beginning of mem allocated
+- * for a slab, or allocated from an general cache.
+- * Slabs are chained into three list: fully used, partial, fully free slabs.
+- */
+-struct slab {
+- struct list_head list;
+- unsigned long colouroff;
+- void *s_mem; /* including colour offset */
+- unsigned int inuse; /* num of objs active in slab */
+- kmem_bufctl_t free;
+- unsigned short nodeid;
+-};
+-
+-/*
+- * struct slab_rcu
+- *
+- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
+- * arrange for kmem_freepages to be called via RCU. This is useful if
+- * we need to approach a kernel structure obliquely, from its address
+- * obtained without the usual locking. We can lock the structure to
+- * stabilize it and check it's still at the given address, only if we
+- * can be sure that the memory has not been meanwhile reused for some
+- * other kind of object (which our subsystem's lock might corrupt).
+- *
+- * rcu_read_lock before reading the address, then rcu_read_unlock after
+- * taking the spinlock within the structure expected at that address.
+- *
+- * We assume struct slab_rcu can overlay struct slab when destroying.
+- */
+-struct slab_rcu {
+- struct rcu_head head;
+- kmem_cache_t *cachep;
+- void *addr;
+-};
+-
+-/*
+- * struct array_cache
+- *
+- * Purpose:
+- * - LIFO ordering, to hand out cache-warm objects from _alloc
+- * - reduce the number of linked list operations
+- * - reduce spinlock operations
+- *
+- * The limit is stored in the per-cpu structure to reduce the data cache
+- * footprint.
+- *
+- */
+-struct array_cache {
+- unsigned int avail;
+- unsigned int limit;
+- unsigned int batchcount;
+- unsigned int touched;
+- spinlock_t lock;
+- void *entry[0]; /*
+- * Must have this definition in here for the proper
+- * alignment of array_cache. Also simplifies accessing
+- * the entries.
+- * [0] is for gcc 2.95. It should really be [].
+- */
+-};
+-
+-/* bootstrap: The caches do not work without cpuarrays anymore,
+- * but the cpuarrays are allocated from the generic caches...
+- */
+-#define BOOT_CPUCACHE_ENTRIES 1
+-struct arraycache_init {
+- struct array_cache cache;
+- void * entries[BOOT_CPUCACHE_ENTRIES];
+-};
+-
+-/*
+- * The slab lists for all objects.
+- */
+-struct kmem_list3 {
+- struct list_head slabs_partial; /* partial list first, better asm code */
+- struct list_head slabs_full;
+- struct list_head slabs_free;
+- unsigned long free_objects;
+- unsigned long next_reap;
+- int free_touched;
+- unsigned int free_limit;
+- spinlock_t list_lock;
+- struct array_cache *shared; /* shared per node */
+- struct array_cache **alien; /* on other nodes */
+-};
+-
+-/*
+ * Need this for bootstrapping a per node allocator.
+ */
+ #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
+ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
+ #define CACHE_CACHE 0
+-#define SIZE_AC 1
+-#define SIZE_L3 (1 + MAX_NUMNODES)
++#define SIZE_AC 1
++#define SIZE_L3 (1 + MAX_NUMNODES)
+
+ /*
+ * This function must be completely optimized away if
+@@ -362,74 +235,6 @@ static inline void kmem_list3_init(struc
+ MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
+ } while (0)
+
+-/*
+- * kmem_cache_t
+- *
+- * manages a cache.
+- */
+-
+-struct kmem_cache {
+-/* 1) per-cpu data, touched during every alloc/free */
+- struct array_cache *array[NR_CPUS];
+- unsigned int batchcount;
+- unsigned int limit;
+- unsigned int shared;
+- unsigned int objsize;
+-/* 2) touched by every alloc & free from the backend */
+- struct kmem_list3 *nodelists[MAX_NUMNODES];
+- unsigned int flags; /* constant flags */
+- unsigned int num; /* # of objs per slab */
+- spinlock_t spinlock;
+-
+-/* 3) cache_grow/shrink */
+- /* order of pgs per slab (2^n) */
+- unsigned int gfporder;
+-
+- /* force GFP flags, e.g. GFP_DMA */
+- gfp_t gfpflags;
+-
+- size_t colour; /* cache colouring range */
+- unsigned int colour_off; /* colour offset */
+- unsigned int colour_next; /* cache colouring */
+- kmem_cache_t *slabp_cache;
+- unsigned int slab_size;
+- unsigned int dflags; /* dynamic flags */
+-
+- /* constructor func */
+- void (*ctor)(void *, kmem_cache_t *, unsigned long);
+-
+- /* de-constructor func */
+- void (*dtor)(void *, kmem_cache_t *, unsigned long);
+-
+-/* 4) cache creation/removal */
+- const char *name;
+- struct list_head next;
+-
+-/* 5) statistics */
+-#if STATS
+- unsigned long num_active;
+- unsigned long num_allocations;
+- unsigned long high_mark;
+- unsigned long grown;
+- unsigned long reaped;
+- unsigned long errors;
+- unsigned long max_freeable;
+- unsigned long node_allocs;
+- unsigned long node_frees;
+- atomic_t allochit;
+- atomic_t allocmiss;
+- atomic_t freehit;
+- atomic_t freemiss;
+-#endif
+-#if DEBUG
+- int dbghead;
+- int reallen;
+-#endif
+-};
+-
+-#define CFLGS_OFF_SLAB (0x80000000UL)
+-#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
+-
+ #define BATCHREFILL_LIMIT 16
+ /* Optimization question: fewer reaps means less
+ * probability for unnessary cpucache drain/refill cycles.
+@@ -565,30 +370,6 @@ static void **dbg_userword(kmem_cache_t
+ #define BREAK_GFP_ORDER_LO 0
+ static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+
+-/* Functions for storing/retrieving the cachep and or slab from the
+- * global 'mem_map'. These are used to find the slab an obj belongs to.
+- * With kfree(), these are used to find the cache which an obj belongs to.
+- */
+-static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
+-{
+- page->lru.next = (struct list_head *)cache;
+-}
+-
+-static inline struct kmem_cache *page_get_cache(struct page *page)
+-{
+- return (struct kmem_cache *)page->lru.next;
+-}
+-
+-static inline void page_set_slab(struct page *page, struct slab *slab)
+-{
+- page->lru.prev = (struct list_head *)slab;
+-}
+-
+-static inline struct slab *page_get_slab(struct page *page)
+-{
+- return (struct slab *)page->lru.prev;
+-}
+-
+ /* These are the default caches for kmalloc. Custom caches can have other sizes. */
+ struct cache_sizes malloc_sizes[] = {
+ #define CACHE(x) { .cs_size = (x) },
+@@ -701,15 +482,25 @@ static void cache_estimate(unsigned long
+ {
+ int i;
+ size_t wastage = PAGE_SIZE<<gfporder;
+- size_t extra = 0;
+- size_t base = 0;
++ size_t extra;
++ size_t base;
++ size_t ub_align, ub_extra;
+
+ if (!(flags & CFLGS_OFF_SLAB)) {
+ base = sizeof(struct slab);
+ extra = sizeof(kmem_bufctl_t);
++ ub_align = UB_ALIGN(flags);
++ ub_extra = UB_EXTRA(flags);
++ } else {
++ base = 0;
++ extra = 0;
++ ub_align = UB_ALIGN(0);
++ ub_extra = UB_EXTRA(0);
+ }
++
+ i = 0;
+- while (i*size + ALIGN(base+i*extra, align) <= wastage)
++ while (i * size + ALIGN(ALIGN(base + i * extra, ub_align) +
++ i * ub_extra, align) <= wastage)
+ i++;
+ if (i > 0)
+ i--;
+@@ -718,8 +509,8 @@ static void cache_estimate(unsigned long
+ i = SLAB_LIMIT;
+
+ *num = i;
+- wastage -= i*size;
+- wastage -= ALIGN(base+i*extra, align);
++ wastage -= i * size + ALIGN(ALIGN(base + i * extra, ub_align)
++ + i * ub_extra, align);
+ *left_over = wastage;
+ }
+
+@@ -1075,13 +866,15 @@ void __init kmem_cache_init(void)
+
+ sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
+ sizes[INDEX_AC].cs_size, ARCH_KMALLOC_MINALIGN,
+- (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
++ (ARCH_KMALLOC_FLAGS | SLAB_PANIC |
++ SLAB_UBC | SLAB_NO_CHARGE), NULL, NULL);
+
+ if (INDEX_AC != INDEX_L3)
+ sizes[INDEX_L3].cs_cachep =
+ kmem_cache_create(names[INDEX_L3].name,
+ sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN,
+- (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
++ (ARCH_KMALLOC_FLAGS | SLAB_PANIC |
++ SLAB_UBC | SLAB_NO_CHARGE), NULL, NULL);
+
+ while (sizes->cs_size != ULONG_MAX) {
+ /*
+@@ -1094,13 +887,12 @@ void __init kmem_cache_init(void)
+ if(!sizes->cs_cachep)
+ sizes->cs_cachep = kmem_cache_create(names->name,
+ sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+- (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
++ (ARCH_KMALLOC_FLAGS | SLAB_PANIC |
++ SLAB_UBC | SLAB_NO_CHARGE), NULL, NULL);
+
+ /* Inc off-slab bufctl limit until the ceiling is hit. */
+- if (!(OFF_SLAB(sizes->cs_cachep))) {
+- offslab_limit = sizes->cs_size-sizeof(struct slab);
+- offslab_limit /= sizeof(kmem_bufctl_t);
+- }
++ if (!(OFF_SLAB(sizes->cs_cachep)))
++ offslab_limit = sizes->cs_size;
+
+ sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
+ sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+@@ -1511,7 +1303,7 @@ kmem_cache_create (const char *name, siz
+ unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
+ void (*dtor)(void*, kmem_cache_t *, unsigned long))
+ {
+- size_t left_over, slab_size, ralign;
++ size_t left_over, slab_size, ralign, ub_align, ub_extra;
+ kmem_cache_t *cachep = NULL;
+ struct list_head *p;
+
+@@ -1672,6 +1464,8 @@ kmem_cache_create (const char *name, siz
+ flags |= CFLGS_OFF_SLAB;
+
+ size = ALIGN(size, align);
++ ub_align = UB_ALIGN(flags);
++ ub_extra = UB_EXTRA(flags);
+
+ if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) {
+ /*
+@@ -1692,6 +1486,8 @@ kmem_cache_create (const char *name, siz
+ */
+ do {
+ unsigned int break_flag = 0;
++ unsigned long off_slab_size;
++
+ cal_wastage:
+ cache_estimate(cachep->gfporder, size, align, flags,
+ &left_over, &cachep->num);
+@@ -1701,12 +1497,17 @@ cal_wastage:
+ break;
+ if (!cachep->num)
+ goto next;
+- if (flags & CFLGS_OFF_SLAB &&
+- cachep->num > offslab_limit) {
++ if (flags & CFLGS_OFF_SLAB) {
++ off_slab_size = sizeof(struct slab) +
++ cachep->num * sizeof(kmem_bufctl_t);
++ off_slab_size = ALIGN(off_slab_size, ub_align) +
++ cachep->num * ub_extra;
+ /* This num of objs will cause problems. */
+- cachep->gfporder--;
+- break_flag++;
+- goto cal_wastage;
++ if (off_slab_size > offslab_limit) {
++ cachep->gfporder--;
++ break_flag++;
++ goto cal_wastage;
++ }
+ }
+
+ /*
+@@ -1729,8 +1530,9 @@ next:
+ cachep = NULL;
+ goto oops;
+ }
+- slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t)
+- + sizeof(struct slab), align);
++ slab_size = ALIGN(ALIGN(cachep->num * sizeof(kmem_bufctl_t) +
++ sizeof(struct slab), ub_align) +
++ cachep->num * ub_extra, align);
+
+ /*
+ * If the slab has been placed off-slab, and we have enough space then
+@@ -1743,7 +1545,9 @@ next:
+
+ if (flags & CFLGS_OFF_SLAB) {
+ /* really off slab. No need for manual alignment */
+- slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(struct slab);
++ slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) +
++ sizeof(struct slab), ub_align) +
++ cachep->num * ub_extra;
+ }
+
+ cachep->colour_off = cache_line_size();
+@@ -1825,6 +1629,7 @@ next:
+ /* cache setup completed, link it into the list */
+ list_add(&cachep->next, &cache_chain);
+ unlock_cpu_hotplug();
++ set_cache_objuse(cachep);
+ oops:
+ if (!cachep && (flags & SLAB_PANIC))
+ panic("kmem_cache_create(): failed to create slab `%s'\n",
+@@ -2064,7 +1869,8 @@ static struct slab* alloc_slabmgmt(kmem_
+
+ if (OFF_SLAB(cachep)) {
+ /* Slab management obj is off-slab. */
+- slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
++ slabp = kmem_cache_alloc(cachep->slabp_cache,
++ local_flags & (~__GFP_UBC));
+ if (!slabp)
+ return NULL;
+ } else {
+@@ -2074,15 +1880,11 @@ static struct slab* alloc_slabmgmt(kmem_
+ slabp->inuse = 0;
+ slabp->colouroff = colour_off;
+ slabp->s_mem = objp+colour_off;
++ init_slab_ubps(cachep, slabp);
+
+ return slabp;
+ }
+
+-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+-{
+- return (kmem_bufctl_t *)(slabp+1);
+-}
+-
+ static void cache_init_objs(kmem_cache_t *cachep,
+ struct slab *slabp, unsigned long ctor_flags)
+ {
+@@ -2213,7 +2015,7 @@ static int cache_grow(kmem_cache_t *cach
+ /* Get mem for the objs.
+ * Attempt to allocate a physical page from 'nodeid',
+ */
+- if (!(objp = kmem_getpages(cachep, flags, nodeid)))
++ if (!(objp = kmem_getpages(cachep, flags & (~__GFP_UBC), nodeid)))
+ goto failed;
+
+ /* Get slab management. */
+@@ -2552,6 +2354,11 @@ static inline void *__cache_alloc(kmem_c
+ objp = cache_alloc_debugcheck_after(cachep, flags, objp,
+ __builtin_return_address(0));
+ prefetchw(objp);
++
++ if (objp && ub_slab_charge(objp, flags)) {
++ kmem_cache_free(cachep, objp);
++ objp = NULL;
++ }
+ return objp;
+ }
+
+@@ -2749,6 +2556,8 @@ static inline void __cache_free(kmem_cac
+ check_irq_off();
+ objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
+
++ ub_slab_uncharge(objp);
++
+ /* Make sure we are not freeing a object from another
+ * node to the array cache on this cpu.
+ */
+@@ -2884,6 +2693,10 @@ void *kmem_cache_alloc_node(kmem_cache_t
+ local_irq_restore(save_flags);
+ ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, __builtin_return_address(0));
+
++ if (ptr && ub_slab_charge(ptr, flags)) {
++ kmem_cache_free(cachep, ptr);
++ ptr = NULL;
++ }
+ return ptr;
+ }
+ EXPORT_SYMBOL(kmem_cache_alloc_node);
+@@ -3295,6 +3108,7 @@ static void cache_reap(void *unused)
+ return;
+ }
+
++ {KSTAT_PERF_ENTER(cache_reap)
+ list_for_each(walk, &cache_chain) {
+ kmem_cache_t *searchp;
+ struct list_head* p;
+@@ -3359,6 +3173,7 @@ next:
+ check_irq_on();
+ up(&cache_chain_sem);
+ drain_remote_pages();
++ KSTAT_PERF_LEAVE(cache_reap)}
+ /* Setup the next iteration */
+ schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
+ }
+diff -uprN linux-2.6.15.orig/mm/swap.c linux-2.6.15-ve025stab014/mm/swap.c
+--- linux-2.6.15.orig/mm/swap.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/swap.c 2006-01-27 14:48:05.000000000 +0300
+@@ -349,7 +349,9 @@ void pagevec_strip(struct pagevec *pvec)
+ struct page *page = pvec->pages[i];
+
+ if (PagePrivate(page) && !TestSetPageLocked(page)) {
+- try_to_release_page(page, 0);
++ /* need to recheck after lock */
++ if (page_has_buffers(page))
++ try_to_release_page(page, 0);
+ unlock_page(page);
+ }
+ }
+diff -uprN linux-2.6.15.orig/mm/swap_state.c linux-2.6.15-ve025stab014/mm/swap_state.c
+--- linux-2.6.15.orig/mm/swap_state.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/swap_state.c 2006-01-27 14:48:08.000000000 +0300
+@@ -17,6 +17,8 @@
+
+ #include <asm/pgtable.h>
+
++#include <ub/ub_vmpages.h>
++
+ /*
+ * swapper_space is a fiction, retained to simplify the path through
+ * vmscan's shrink_list, to make sync_page look nicer, and to allow
+@@ -51,6 +53,7 @@ static struct {
+ unsigned long noent_race;
+ unsigned long exist_race;
+ } swap_cache_info;
++EXPORT_SYMBOL(swap_cache_info);
+
+ void show_swap_cache_info(void)
+ {
+@@ -149,7 +152,14 @@ int add_to_swap(struct page * page)
+ BUG();
+
+ for (;;) {
+- entry = get_swap_page();
++ struct user_beancounter *ub;
++
++ ub = pb_grab_page_ub(page);
++ if (IS_ERR(ub))
++ return 0;
++
++ entry = get_swap_page(ub);
++ put_beancounter(ub);
+ if (!entry.val)
+ return 0;
+
+diff -uprN linux-2.6.15.orig/mm/swapfile.c linux-2.6.15-ve025stab014/mm/swapfile.c
+--- linux-2.6.15.orig/mm/swapfile.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/swapfile.c 2006-01-27 14:48:08.000000000 +0300
+@@ -31,6 +31,8 @@
+ #include <asm/tlbflush.h>
+ #include <linux/swapops.h>
+
++#include <ub/ub_vmpages.h>
++
+ DEFINE_SPINLOCK(swap_lock);
+ unsigned int nr_swapfiles;
+ long total_swap_pages;
+@@ -170,7 +172,7 @@ no_page:
+ return 0;
+ }
+
+-swp_entry_t get_swap_page(void)
++swp_entry_t get_swap_page(struct user_beancounter *ub)
+ {
+ struct swap_info_struct *si;
+ pgoff_t offset;
+@@ -200,6 +202,7 @@ swp_entry_t get_swap_page(void)
+ offset = scan_swap_map(si);
+ if (offset) {
+ spin_unlock(&swap_lock);
++ ub_swapentry_inc(si, offset, ub);
+ return swp_entry(type, offset);
+ }
+ next = swap_list.next;
+@@ -255,6 +258,7 @@ static int swap_entry_free(struct swap_i
+ count--;
+ p->swap_map[offset] = count;
+ if (!count) {
++ ub_swapentry_dec(p, offset);
+ if (offset < p->lowest_bit)
+ p->lowest_bit = offset;
+ if (offset > p->highest_bit)
+@@ -401,11 +405,18 @@ void free_swap_and_cache(swp_entry_t ent
+ * force COW, vm_page_prot omits write permission from any private vma.
+ */
+ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
+- unsigned long addr, swp_entry_t entry, struct page *page)
++ unsigned long addr, swp_entry_t entry, struct page *page,
++ struct page_beancounter **pb)
+ {
+- inc_mm_counter(vma->vm_mm, anon_rss);
++ struct mm_struct *mm;
++
++ mm = vma->vm_mm;
++ inc_mm_counter(mm, anon_rss);
++ inc_vma_rss(vma);
++ ub_unused_privvm_dec(mm, vma);
++ pb_add_list_ref(page, mm, pb);
+ get_page(page);
+- set_pte_at(vma->vm_mm, addr, pte,
++ set_pte_at(mm, addr, pte,
+ pte_mkold(mk_pte(page, vma->vm_page_prot)));
+ page_add_anon_rmap(page, vma, addr);
+ swap_free(entry);
+@@ -418,7 +429,8 @@ static void unuse_pte(struct vm_area_str
+
+ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+- swp_entry_t entry, struct page *page)
++ swp_entry_t entry, struct page *page,
++ struct page_beancounter **pb)
+ {
+ pte_t swp_pte = swp_entry_to_pte(entry);
+ pte_t *pte;
+@@ -432,7 +444,7 @@ static int unuse_pte_range(struct vm_are
+ * Test inline before going to call unuse_pte.
+ */
+ if (unlikely(pte_same(*pte, swp_pte))) {
+- unuse_pte(vma, pte++, addr, entry, page);
++ unuse_pte(vma, pte++, addr, entry, page, pb);
+ found = 1;
+ break;
+ }
+@@ -443,7 +455,8 @@ static int unuse_pte_range(struct vm_are
+
+ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr, unsigned long end,
+- swp_entry_t entry, struct page *page)
++ swp_entry_t entry, struct page *page,
++ struct page_beancounter **pb)
+ {
+ pmd_t *pmd;
+ unsigned long next;
+@@ -453,7 +466,7 @@ static inline int unuse_pmd_range(struct
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+- if (unuse_pte_range(vma, pmd, addr, next, entry, page))
++ if (unuse_pte_range(vma, pmd, addr, next, entry, page, pb))
+ return 1;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+@@ -461,7 +474,8 @@ static inline int unuse_pmd_range(struct
+
+ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+- swp_entry_t entry, struct page *page)
++ swp_entry_t entry, struct page *page,
++ struct page_beancounter **pb)
+ {
+ pud_t *pud;
+ unsigned long next;
+@@ -471,14 +485,15 @@ static inline int unuse_pud_range(struct
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+- if (unuse_pmd_range(vma, pud, addr, next, entry, page))
++ if (unuse_pmd_range(vma, pud, addr, next, entry, page, pb))
+ return 1;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+ }
+
+ static int unuse_vma(struct vm_area_struct *vma,
+- swp_entry_t entry, struct page *page)
++ swp_entry_t entry, struct page *page,
++ struct page_beancounter **pb)
+ {
+ pgd_t *pgd;
+ unsigned long addr, end, next;
+@@ -499,14 +514,15 @@ static int unuse_vma(struct vm_area_stru
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+- if (unuse_pud_range(vma, pgd, addr, next, entry, page))
++ if (unuse_pud_range(vma, pgd, addr, next, entry, page, pb))
+ return 1;
+ } while (pgd++, addr = next, addr != end);
+ return 0;
+ }
+
+ static int unuse_mm(struct mm_struct *mm,
+- swp_entry_t entry, struct page *page)
++ swp_entry_t entry, struct page *page,
++ struct page_beancounter **pb)
+ {
+ struct vm_area_struct *vma;
+
+@@ -521,7 +537,7 @@ static int unuse_mm(struct mm_struct *mm
+ lock_page(page);
+ }
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+- if (vma->anon_vma && unuse_vma(vma, entry, page))
++ if (vma->anon_vma && unuse_vma(vma, entry, page, pb))
+ break;
+ }
+ up_read(&mm->mmap_sem);
+@@ -587,6 +603,7 @@ static int try_to_unuse(unsigned int typ
+ int retval = 0;
+ int reset_overflow = 0;
+ int shmem;
++ struct page_beancounter *pb;
+
+ /*
+ * When searching mms for an entry, a good strategy is to
+@@ -638,6 +655,13 @@ static int try_to_unuse(unsigned int typ
+ break;
+ }
+
++ pb = NULL;
++ if (pb_alloc_all(&pb)) {
++ page_cache_release(page);
++ retval = -ENOMEM;
++ break;
++ }
++
+ /*
+ * Don't hold on to start_mm if it looks like exiting.
+ */
+@@ -671,7 +695,7 @@ static int try_to_unuse(unsigned int typ
+ if (start_mm == &init_mm)
+ shmem = shmem_unuse(entry, page);
+ else
+- retval = unuse_mm(start_mm, entry, page);
++ retval = unuse_mm(start_mm, entry, page, &pb);
+ }
+ if (*swap_map > 1) {
+ int set_start_mm = (*swap_map >= swcount);
+@@ -703,7 +727,7 @@ static int try_to_unuse(unsigned int typ
+ set_start_mm = 1;
+ shmem = shmem_unuse(entry, page);
+ } else
+- retval = unuse_mm(mm, entry, page);
++ retval = unuse_mm(mm, entry, page, &pb);
+ if (set_start_mm && *swap_map < swcount) {
+ mmput(new_start_mm);
+ atomic_inc(&mm->mm_users);
+@@ -717,6 +741,8 @@ static int try_to_unuse(unsigned int typ
+ mmput(start_mm);
+ start_mm = new_start_mm;
+ }
++
++ pb_free_list(&pb);
+ if (retval) {
+ unlock_page(page);
+ page_cache_release(page);
+@@ -1062,6 +1088,10 @@ asmlinkage long sys_swapoff(const char _
+ int i, type, prev;
+ int err;
+
++ /* VE admin check is just to be on the safe side, the admin may affect
++ * swaps only if he has access to special, i.e. if he has been granted
++ * access to the block device or if the swap file is in the area
++ * visible to him. */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+@@ -1161,6 +1191,7 @@ asmlinkage long sys_swapoff(const char _
+ spin_unlock(&swap_lock);
+ up(&swapon_sem);
+ vfree(swap_map);
++ ub_swap_fini(p);
+ inode = mapping->host;
+ if (S_ISBLK(inode->i_mode)) {
+ struct block_device *bdev = I_BDEV(inode);
+@@ -1519,6 +1550,11 @@ asmlinkage long sys_swapon(const char __
+ goto bad_swap;
+ }
+
++ if (ub_swap_init(p, maxpages)) {
++ error = -ENOMEM;
++ goto bad_swap;
++ }
++
+ down(&swapon_sem);
+ spin_lock(&swap_lock);
+ p->flags = SWP_ACTIVE;
+diff -uprN linux-2.6.15.orig/mm/vmalloc.c linux-2.6.15-ve025stab014/mm/vmalloc.c
+--- linux-2.6.15.orig/mm/vmalloc.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/vmalloc.c 2006-01-27 14:48:08.000000000 +0300
+@@ -20,6 +20,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_debug.h>
++
+
+ DEFINE_RWLOCK(vmlist_lock);
+ struct vm_struct *vmlist;
+@@ -256,6 +258,66 @@ struct vm_struct *get_vm_area_node(unsig
+ return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
+ }
+
++struct vm_struct * get_vm_area_best(unsigned long size, unsigned long flags)
++{
++ unsigned long addr, best_addr, delta, best_delta;
++ struct vm_struct **p, **best_p, *tmp, *area;
++
++ area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
++ if (!area)
++ return NULL;
++
++ size += PAGE_SIZE; /* one-page gap at the end */
++ addr = VMALLOC_START;
++ best_addr = 0UL;
++ best_p = NULL;
++ best_delta = PAGE_ALIGN(VMALLOC_END) - VMALLOC_START;
++
++ write_lock(&vmlist_lock);
++ for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
++ if ((size + addr) < addr)
++ break;
++ delta = (unsigned long) tmp->addr - (size + addr);
++ if (delta < best_delta) {
++ best_delta = delta;
++ best_addr = addr;
++ best_p = p;
++ }
++ addr = tmp->size + (unsigned long) tmp->addr;
++ if (addr > VMALLOC_END-size)
++ break;
++ }
++
++ if (!tmp) {
++ /* check free area after list end */
++ delta = (unsigned long) PAGE_ALIGN(VMALLOC_END) - (size + addr);
++ if (delta < best_delta) {
++ best_delta = delta;
++ best_addr = addr;
++ best_p = p;
++ }
++ }
++ if (best_addr) {
++ area->flags = flags;
++ /* allocate at the end of this area */
++ area->addr = (void *)(best_addr + best_delta);
++ area->size = size;
++ area->next = *best_p;
++ area->pages = NULL;
++ area->nr_pages = 0;
++ area->phys_addr = 0;
++ *best_p = area;
++ /* check like in __vunmap */
++ WARN_ON((PAGE_SIZE - 1) & (unsigned long)area->addr);
++ } else {
++ kfree(area);
++ area = NULL;
++ }
++ write_unlock(&vmlist_lock);
++
++ return area;
++}
++
+ /* Caller must hold vmlist_lock */
+ struct vm_struct *__remove_vm_area(void *addr)
+ {
+@@ -296,7 +358,7 @@ struct vm_struct *remove_vm_area(void *a
+ return v;
+ }
+
+-void __vunmap(void *addr, int deallocate_pages)
++void __vunmap(void *addr, int deallocate_pages, int uncharge)
+ {
+ struct vm_struct *area;
+
+@@ -320,6 +382,8 @@ void __vunmap(void *addr, int deallocate
+ if (deallocate_pages) {
+ int i;
+
++ if (uncharge)
++ dec_vmalloc_charged(area);
+ for (i = 0; i < area->nr_pages; i++) {
+ if (unlikely(!area->pages[i]))
+ BUG();
+@@ -350,7 +414,7 @@ void __vunmap(void *addr, int deallocate
+ void vfree(void *addr)
+ {
+ BUG_ON(in_interrupt());
+- __vunmap(addr, 1);
++ __vunmap(addr, 1, 1);
+ }
+ EXPORT_SYMBOL(vfree);
+
+@@ -367,7 +431,7 @@ EXPORT_SYMBOL(vfree);
+ void vunmap(void *addr)
+ {
+ BUG_ON(in_interrupt());
+- __vunmap(addr, 0);
++ __vunmap(addr, 0, 0);
+ }
+ EXPORT_SYMBOL(vunmap);
+
+@@ -439,10 +503,12 @@ void *__vmalloc_area_node(struct vm_stru
+
+ if (map_vm_area(area, prot, &pages))
+ goto fail;
++
++ inc_vmalloc_charged(area, gfp_mask);
+ return area->addr;
+
+ fail:
+- vfree(area->addr);
++ __vunmap(area->addr, 1, 0);
+ return NULL;
+ }
+
+@@ -486,6 +552,21 @@ void *__vmalloc(unsigned long size, gfp_
+ }
+ EXPORT_SYMBOL(__vmalloc);
+
++static void *____vmalloc(unsigned long size, gfp_t mask, pgprot_t prot)
++{
++ struct vm_struct *area;
++
++ size = PAGE_ALIGN(size);
++ if (!size || (size >> PAGE_SHIFT) > num_physpages)
++ return NULL;
++
++ area = get_vm_area_best(size, VM_ALLOC);
++ if (!area)
++ return NULL;
++
++ return __vmalloc_area_node(area, mask, prot, -1);
++}
++
+ /**
+ * vmalloc - allocate virtually contiguous memory
+ *
+@@ -503,6 +584,20 @@ void *vmalloc(unsigned long size)
+ }
+ EXPORT_SYMBOL(vmalloc);
+
++void *vmalloc_best(unsigned long size)
++{
++ return ____vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
++}
++
++EXPORT_SYMBOL(vmalloc_best);
++
++void *ub_vmalloc_best(unsigned long size)
++{
++ return ____vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
++}
++
++EXPORT_SYMBOL(ub_vmalloc_best);
++
+ /**
+ * vmalloc_node - allocate memory on a specific node
+ *
+@@ -631,3 +726,37 @@ finished:
+ read_unlock(&vmlist_lock);
+ return buf - buf_start;
+ }
++
++void vprintstat(void)
++{
++ struct vm_struct *p, *last_p = NULL;
++ unsigned long addr, size, free_size, max_free_size;
++ int num;
++
++ addr = VMALLOC_START;
++ size = max_free_size = 0;
++ num = 0;
++
++ read_lock(&vmlist_lock);
++ for (p = vmlist; p; p = p->next) {
++ free_size = (unsigned long)p->addr - addr;
++ if (free_size > max_free_size)
++ max_free_size = free_size;
++ addr = (unsigned long)p->addr + p->size;
++ size += p->size;
++ ++num;
++ last_p = p;
++ }
++ if (last_p) {
++ free_size = VMALLOC_END -
++ ((unsigned long)last_p->addr + last_p->size);
++ if (free_size > max_free_size)
++ max_free_size = free_size;
++ }
++ read_unlock(&vmlist_lock);
++
++ printk("VMALLOC Used: %luKB Total: %luKB Entries: %d\n"
++ " Max_Free: %luKB Start: %lx End: %lx\n",
++ size/1024, (VMALLOC_END - VMALLOC_START)/1024, num,
++ max_free_size/1024, VMALLOC_START, VMALLOC_END);
++}
+diff -uprN linux-2.6.15.orig/mm/vmscan.c linux-2.6.15-ve025stab014/mm/vmscan.c
+--- linux-2.6.15.orig/mm/vmscan.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/mm/vmscan.c 2006-01-27 14:48:08.000000000 +0300
+@@ -39,6 +39,8 @@
+
+ #include <linux/swapops.h>
+
++#include <ub/ub_oom.h>
++
+ /* possible outcome of pageout() */
+ typedef enum {
+ /* failed to write page out, page is locked */
+@@ -82,6 +84,9 @@ struct scan_control {
+ * In this context, it doesn't matter that we scan the
+ * whole list at once. */
+ int swap_cluster_max;
++#ifdef CONFIG_USER_RESOURCE
++ struct oom_freeing_stat oom_stat;
++#endif
+ };
+
+ /*
+@@ -186,12 +191,14 @@ EXPORT_SYMBOL(remove_shrinker);
+ *
+ * Returns the number of slab objects which we shrunk.
+ */
+-static int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
++static int shrink_slab(struct scan_control *sc, gfp_t gfp_mask,
+ unsigned long lru_pages)
+ {
++ unsigned long scanned;
+ struct shrinker *shrinker;
+ int ret = 0;
+
++ scanned = sc->nr_scanned;
+ if (scanned == 0)
+ scanned = SWAP_CLUSTER_MAX;
+
+@@ -237,7 +244,7 @@ static int shrink_slab(unsigned long sca
+ ret += nr_before - shrink_ret;
+ mod_page_state(slabs_scanned, this_scan);
+ total_scan -= this_scan;
+-
++ ub_oom_inc(sc, slab, shrink_ret);
+ cond_resched();
+ }
+
+@@ -434,6 +441,7 @@ static int shrink_list(struct list_head
+ goto keep_locked;
+ if (!add_to_swap(page))
+ goto activate_locked;
++ ub_oom_inc(sc, swap, 1);
+ }
+ #endif /* CONFIG_SWAP */
+
+@@ -471,6 +479,7 @@ static int shrink_list(struct list_head
+ case PAGE_ACTIVATE:
+ goto activate_locked;
+ case PAGE_SUCCESS:
++ ub_oom_inc(sc, write, 1);
+ if (PageWriteback(page) || PageDirty(page))
+ goto keep;
+ /*
+@@ -658,6 +667,7 @@ static void shrink_cache(struct zone *zo
+ else
+ mod_page_state_zone(zone, pgscan_direct, nr_scan);
+ nr_freed = shrink_list(&page_list, sc);
++ ub_oom_inc(sc, free, nr_freed);
+ if (current_is_kswapd())
+ mod_page_state(kswapd_steal, nr_freed);
+ mod_page_state_zone(zone, pgsteal, nr_freed);
+@@ -722,6 +732,7 @@ refill_inactive_zone(struct zone *zone,
+ long distress;
+ long swap_tendency;
+
++ KSTAT_PERF_ENTER(refill_inact)
+ lru_add_drain();
+ spin_lock_irq(&zone->lru_lock);
+ pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
+@@ -830,6 +841,7 @@ refill_inactive_zone(struct zone *zone,
+
+ mod_page_state_zone(zone, pgrefill, pgscanned);
+ mod_page_state(pgdeactivate, pgdeactivate);
++ KSTAT_PERF_LEAVE(refill_inact);
+ }
+
+ /*
+@@ -950,10 +962,14 @@ int try_to_free_pages(struct zone **zone
+ unsigned long lru_pages = 0;
+ int i;
+
++ KSTAT_PERF_ENTER(ttfp);
++
++ memset(&sc, 0, sizeof(sc));
+ sc.gfp_mask = gfp_mask;
+ sc.may_writepage = 0;
+ sc.may_swap = 1;
+
++ ub_oom_init();
+ inc_page_state(allocstall);
+
+ for (i = 0; zones[i] != NULL; i++) {
+@@ -975,7 +991,7 @@ int try_to_free_pages(struct zone **zone
+ if (!priority)
+ disable_swap_token();
+ shrink_caches(zones, &sc);
+- shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
++ shrink_slab(&sc, gfp_mask, lru_pages);
+ if (reclaim_state) {
+ sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+ reclaim_state->reclaimed_slab = 0;
+@@ -1012,7 +1028,8 @@ out:
+
+ zone->prev_priority = zone->temp_priority;
+ }
+- return ret;
++ KSTAT_PERF_LEAVE(ttfp);
++ return ret | ub_oom_did_progress(&sc);
+ }
+
+ /*
+@@ -1143,7 +1160,7 @@ scan:
+ shrink_zone(zone, &sc);
+ atomic_dec(&zone->reclaim_in_progress);
+ reclaim_state->reclaimed_slab = 0;
+- nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
++ nr_slab = shrink_slab(&sc, GFP_KERNEL,
+ lru_pages);
+ sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+ total_reclaimed += sc.nr_reclaimed;
+@@ -1346,7 +1363,8 @@ static int __init kswapd_init(void)
+ swap_setup();
+ for_each_pgdat(pgdat)
+ pgdat->kswapd
+- = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
++ = find_task_by_pid_all(kernel_thread(kswapd,
++ pgdat, CLONE_KERNEL));
+ total_memory = nr_free_pagecache_pages();
+ hotcpu_notifier(cpu_callback, 0);
+ return 0;
+diff -uprN linux-2.6.15.orig/net/core/datagram.c linux-2.6.15-ve025stab014/net/core/datagram.c
+--- linux-2.6.15.orig/net/core/datagram.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/datagram.c 2006-01-27 14:48:06.000000000 +0300
+@@ -55,6 +55,8 @@
+ #include <net/sock.h>
+ #include <net/tcp_states.h>
+
++#include <ub/ub_net.h>
++
+ /*
+ * Is a socket 'connection oriented' ?
+ */
+@@ -432,6 +434,7 @@ unsigned int datagram_poll(struct file *
+ {
+ struct sock *sk = sock->sk;
+ unsigned int mask;
++ int no_ubc_space;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ mask = 0;
+@@ -439,8 +442,14 @@ unsigned int datagram_poll(struct file *
+ /* exceptional events? */
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ mask |= POLLERR;
+- if (sk->sk_shutdown == SHUTDOWN_MASK)
++ if (sk->sk_shutdown == SHUTDOWN_MASK) {
++ no_ubc_space = 0;
+ mask |= POLLHUP;
++ } else {
++ no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
++ if (no_ubc_space)
++ ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
++ }
+
+ /* readable? */
+ if (!skb_queue_empty(&sk->sk_receive_queue) ||
+@@ -457,7 +466,7 @@ unsigned int datagram_poll(struct file *
+ }
+
+ /* writable? */
+- if (sock_writeable(sk))
++ if (!no_ubc_space && sock_writeable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ else
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+diff -uprN linux-2.6.15.orig/net/core/dev.c linux-2.6.15-ve025stab014/net/core/dev.c
+--- linux-2.6.15.orig/net/core/dev.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/dev.c 2006-01-27 14:48:08.000000000 +0300
+@@ -114,6 +114,10 @@
+ #include <net/iw_handler.h>
+ #endif /* CONFIG_NET_RADIO */
+ #include <asm/current.h>
++#include <ub/beancounter.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
+
+ /*
+ * The list of packet types we will receive (as opposed to discard)
+@@ -166,25 +170,40 @@ static struct list_head ptype_all; /* T
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
++#ifdef CONFIG_VE
++#define dev_tail (get_exec_env()->_net_dev_tail)
++#else
+ struct net_device *dev_base;
+ static struct net_device **dev_tail = &dev_base;
++EXPORT_SYMBOL(dev_base);
++#endif
+ DEFINE_RWLOCK(dev_base_lock);
+
+-EXPORT_SYMBOL(dev_base);
+ EXPORT_SYMBOL(dev_base_lock);
+
++#ifdef CONFIG_VE
++#define MAX_UNMOVABLE_NETDEVICES (8*4096)
++static uint8_t unmovable_ifindex_list[MAX_UNMOVABLE_NETDEVICES/8];
++static LIST_HEAD(dev_global_list);
++#endif
++
+ #define NETDEV_HASHBITS 8
+ static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
+ static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+
+-static inline struct hlist_head *dev_name_hash(const char *name)
++struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env)
+ {
+- unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
++ unsigned hash;
++ if (!ve_is_super(env))
++ return visible_dev_head(env);
++ hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+ return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+ }
+
+-static inline struct hlist_head *dev_index_hash(int ifindex)
++struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env)
+ {
++ if (!ve_is_super(env))
++ return visible_dev_index_head(env);
+ return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+ }
+
+@@ -468,7 +487,7 @@ struct net_device *__dev_get_by_name(con
+ {
+ struct hlist_node *p;
+
+- hlist_for_each(p, dev_name_hash(name)) {
++ hlist_for_each(p, dev_name_hash(name, get_exec_env())) {
+ struct net_device *dev
+ = hlist_entry(p, struct net_device, name_hlist);
+ if (!strncmp(dev->name, name, IFNAMSIZ))
+@@ -501,6 +520,32 @@ struct net_device *dev_get_by_name(const
+ }
+
+ /**
++ * __dev_global_get_by_name - find a device by its name in dev_global_list
++ * @name: name to find
++ *
++ * Find an interface by name. Must be called under RTNL semaphore
++ * If the name is found a pointer to the device
++ * is returned. If the name is not found then %NULL is returned. The
++ * reference counters are not incremented so the caller must be
++ * careful with locks.
++ */
++
++#ifdef CONFIG_VE
++struct net_device *__dev_global_get_by_name(const char *name)
++{
++ struct net_device *dev;
++ /* It's called relatively rarely */
++ list_for_each_entry(dev, &dev_global_list, dev_global_list_entry) {
++ if (strncmp(dev->name, name, IFNAMSIZ) == 0)
++ return dev;
++ }
++ return NULL;
++}
++#else /* CONFIG_VE */
++#define __dev_global_get_by_name(name) __dev_get_by_name(name)
++#endif /* CONFIG_VE */
++
++/**
+ * __dev_get_by_index - find a device by its ifindex
+ * @ifindex: index of device
+ *
+@@ -515,7 +560,7 @@ struct net_device *__dev_get_by_index(in
+ {
+ struct hlist_node *p;
+
+- hlist_for_each(p, dev_index_hash(ifindex)) {
++ hlist_for_each(p, dev_index_hash(ifindex, get_exec_env())) {
+ struct net_device *dev
+ = hlist_entry(p, struct net_device, index_hlist);
+ if (dev->ifindex == ifindex)
+@@ -634,6 +679,23 @@ static int dev_valid_name(const char *na
+ || strchr(name, '/'));
+ }
+
++static inline void __dev_check_name(const char *dev_name, const char *name,
++ long *inuse, const int max_netdevices)
++{
++ int i = 0;
++ char buf[IFNAMSIZ];
++
++ if (!sscanf(dev_name, name, &i))
++ return;
++ if (i < 0 || i >= max_netdevices)
++ return;
++
++ /* avoid cases where sscanf is not exact inverse of printf */
++ snprintf(buf, sizeof(buf), name, i);
++ if (!strncmp(buf, dev_name, IFNAMSIZ))
++ set_bit(i, inuse);
++}
++
+ /**
+ * dev_alloc_name - allocate a name for a device
+ * @dev: device
+@@ -670,16 +732,20 @@ int dev_alloc_name(struct net_device *de
+ if (!inuse)
+ return -ENOMEM;
+
+- for (d = dev_base; d; d = d->next) {
+- if (!sscanf(d->name, name, &i))
+- continue;
+- if (i < 0 || i >= max_netdevices)
+- continue;
+-
+- /* avoid cases where sscanf is not exact inverse of printf */
+- snprintf(buf, sizeof(buf), name, i);
+- if (!strncmp(buf, d->name, IFNAMSIZ))
+- set_bit(i, inuse);
++#ifdef CONFIG_VE
++ if (ve_is_super(get_exec_env())) {
++ list_for_each_entry(d, &dev_global_list,
++ dev_global_list_entry) {
++ __dev_check_name(d->name, name, inuse,
++ max_netdevices);
++ }
++ } else
++#endif
++ {
++ for (d = dev_base; d; d = d->next) {
++ __dev_check_name(d->name, name, inuse,
++ max_netdevices);
++ }
+ }
+
+ i = find_first_zero_bit(inuse, max_netdevices);
+@@ -687,7 +753,11 @@ int dev_alloc_name(struct net_device *de
+ }
+
+ snprintf(buf, sizeof(buf), name, i);
+- if (!__dev_get_by_name(buf)) {
++ if (ve_is_super(get_exec_env()))
++ d = __dev_global_get_by_name(buf);
++ else
++ d = __dev_get_by_name(buf);
++ if (d == NULL) {
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return i;
+ }
+@@ -720,13 +790,14 @@ int dev_change_name(struct net_device *d
+ if (!dev_valid_name(newname))
+ return -EINVAL;
+
++ /* Rename of devices in VE is prohibited by CAP_NET_ADMIN */
+ if (strchr(newname, '%')) {
+ err = dev_alloc_name(dev, newname);
+ if (err < 0)
+ return err;
+ strcpy(newname, dev->name);
+ }
+- else if (__dev_get_by_name(newname))
++ else if (__dev_global_get_by_name(newname))
+ return -EEXIST;
+ else
+ strlcpy(dev->name, newname, IFNAMSIZ);
+@@ -734,7 +805,8 @@ int dev_change_name(struct net_device *d
+ err = class_device_rename(&dev->class_dev, dev->name);
+ if (!err) {
+ hlist_del(&dev->name_hlist);
+- hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
++ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name,
++ get_exec_env()));
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ }
+
+@@ -1296,6 +1368,25 @@ int dev_queue_xmit(struct sk_buff *skb)
+ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+ #endif
+ if (q->enqueue) {
++ struct user_beancounter *ub;
++
++ ub = netdev_bc(dev)->exec_ub;
++ /* the skb CAN be already charged if it transmitted via
++ * something like bonding device */
++ if (ub && (skb_bc(skb)->resource == 0)) {
++ unsigned long chargesize;
++ chargesize = skb_charge_fullsize(skb);
++ if (charge_beancounter(ub, UB_OTHERSOCKBUF,
++ chargesize, UB_SOFT)) {
++ rcu_read_unlock();
++ rc = -ENOMEM;
++ goto out_kfree_skb;
++ }
++ skb_bc(skb)->ub = ub;
++ skb_bc(skb)->charged = chargesize;
++ skb_bc(skb)->resource = UB_OTHERSOCKBUF;
++ }
++
+ /* Grab device queue */
+ spin_lock(&dev->queue_lock);
+
+@@ -1582,6 +1673,7 @@ int netif_receive_skb(struct sk_buff *sk
+ struct net_device *orig_dev;
+ int ret = NET_RX_DROP;
+ unsigned short type;
++ struct ve_struct *old_env;
+
+ /* if we've gotten here through NAPI, check netpoll */
+ if (skb->dev->poll && netpoll_rx(skb))
+@@ -1600,6 +1692,17 @@ int netif_receive_skb(struct sk_buff *sk
+ skb->h.raw = skb->nh.raw = skb->data;
+ skb->mac_len = skb->nh.raw - skb->mac.raw;
+
++#ifdef CONFIG_VE
++ /*
++ * Skb might be alloced in another VE context, than its device works.
++ * So, set the correct owner_env.
++ */
++ skb->owner_env = skb->dev->owner_env;
++ BUG_ON(skb->owner_env == NULL);
++#endif
++
++ old_env = set_exec_env(VE_OWNER_SKB(skb));
++
+ pt_prev = NULL;
+
+ rcu_read_lock();
+@@ -1665,6 +1768,7 @@ ncls:
+
+ out:
+ rcu_read_unlock();
++ (void)set_exec_env(old_env);
+ return ret;
+ }
+
+@@ -2040,7 +2144,7 @@ static int __init dev_proc_init(void)
+ {
+ int rc = -ENOMEM;
+
+- if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
++ if (!proc_glob_fops_create("net/dev", S_IRUGO, &dev_seq_fops))
+ goto out;
+ if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
+ goto out_dev;
+@@ -2052,7 +2156,7 @@ out:
+ out_softnet:
+ proc_net_remove("softnet_stat");
+ out_dev:
+- proc_net_remove("dev");
++ remove_proc_glob_entry("net/dev", NULL);
+ goto out;
+ }
+ #else
+@@ -2117,6 +2221,9 @@ void dev_set_promiscuity(struct net_devi
+ dev->flags &= ~IFF_PROMISC;
+ else
+ dev->flags |= IFF_PROMISC;
++ /* Promiscous mode on these devices does not mean anything */
++ if (dev->flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
++ return;
+ if (dev->flags != old_flags) {
+ dev_mc_upload(dev);
+ printk(KERN_INFO "device %s %s promiscuous mode\n",
+@@ -2531,9 +2638,28 @@ int dev_ioctl(unsigned int cmd, void __u
+ * - require strict serialization.
+ * - do not return a value
+ */
++ case SIOCSIFMTU:
++ if (!capable(CAP_NET_ADMIN) &&
++ !capable(CAP_VE_NET_ADMIN))
++ return -EPERM;
++ dev_load(ifr.ifr_name);
++ rtnl_lock();
++ if (!ve_is_super(get_exec_env())) {
++ struct net_device *dev;
++ ret = -ENODEV;
++ if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
++ goto out_set_mtu_unlock;
++ ret = -EPERM;
++ if (ifr.ifr_mtu > dev->orig_mtu)
++ goto out_set_mtu_unlock;
++ }
++ ret = dev_ifsioc(&ifr, cmd);
++out_set_mtu_unlock:
++ rtnl_unlock();
++ return ret;
++
+ case SIOCSIFFLAGS:
+ case SIOCSIFMETRIC:
+- case SIOCSIFMTU:
+ case SIOCSIFMAP:
+ case SIOCSIFHWADDR:
+ case SIOCSIFSLAVE:
+@@ -2614,20 +2740,73 @@ int dev_ioctl(unsigned int cmd, void __u
+ * dev_new_index - allocate an ifindex
+ *
+ * Returns a suitable unique value for a new device interface
+- * number. The caller must hold the rtnl semaphore or the
++ * number. The caller must hold the rtnl semaphore or the
+ * dev_base_lock to be sure it remains unique.
++ *
++ * Note: dev->name must be valid on entrance
+ */
+-static int dev_new_index(void)
++static int dev_ve_new_index(void)
+ {
+- static int ifindex;
++#ifdef CONFIG_VE
++ int *ifindex = &get_exec_env()->ifindex;
++ int delta = 2;
++#else
++ static int s_ifindex;
++ int *ifindex = &s_ifindex;
++ int delta = 1;
++#endif
+ for (;;) {
+- if (++ifindex <= 0)
+- ifindex = 1;
+- if (!__dev_get_by_index(ifindex))
+- return ifindex;
++ *ifindex += delta;
++ if (*ifindex <= 0)
++ *ifindex = 1;
++ if (!__dev_get_by_index(*ifindex))
++ return *ifindex;
+ }
+ }
+
++#ifdef CONFIG_VE
++static int dev_glb_new_index(void)
++{
++ int i;
++
++ i = find_first_zero_bit((long*)unmovable_ifindex_list,
++ MAX_UNMOVABLE_NETDEVICES);
++
++ if (i == MAX_UNMOVABLE_NETDEVICES)
++ return -EMFILE;
++
++ __set_bit(i, (long*)unmovable_ifindex_list);
++ return (i + 1) * 2;
++}
++#endif
++
++static void dev_glb_free_index(struct net_device *dev)
++{
++#ifdef CONFIG_VE
++ int bit;
++
++ bit = dev->ifindex / 2 - 1;
++ BUG_ON(bit >= MAX_UNMOVABLE_NETDEVICES);
++ __clear_bit(bit, (long*)unmovable_ifindex_list);
++#endif
++}
++
++static int dev_new_index(struct net_device *dev)
++{
++#ifdef CONFIG_VE
++ if (ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
++ return dev_glb_new_index();
++#endif
++
++ return dev_ve_new_index();
++}
++
++static void dev_free_index(struct net_device *dev)
++{
++ if ((dev->ifindex % 2) == 0)
++ dev_glb_free_index(dev);
++}
++
+ static int dev_boot_phase = 1;
+
+ /* Delayed registration/unregisteration */
+@@ -2670,6 +2849,10 @@ int register_netdevice(struct net_device
+ /* When net_device's are persistent, this will be fatal. */
+ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+
++ ret = -EPERM;
++ if (!ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
++ goto out;
++
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->xmit_lock);
+ dev->xmit_lock_owner = -1;
+@@ -2689,27 +2872,32 @@ int register_netdevice(struct net_device
+ if (ret) {
+ if (ret > 0)
+ ret = -EIO;
+- goto out_err;
++ goto out_free_div;
+ }
+ }
+
+ if (!dev_valid_name(dev->name)) {
+ ret = -EINVAL;
+- goto out_err;
++ goto out_free_div;
++ }
++
++ dev->ifindex = dev_new_index(dev);
++ if (dev->ifindex < 0) {
++ ret = dev->ifindex;
++ goto out_free_div;
+ }
+
+- dev->ifindex = dev_new_index();
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+
+ /* Check for existence of name */
+- head = dev_name_hash(dev->name);
++ head = dev_name_hash(dev->name, get_exec_env());
+ hlist_for_each(p, head) {
+ struct net_device *d
+ = hlist_entry(p, struct net_device, name_hlist);
+ if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+ ret = -EEXIST;
+- goto out_err;
++ goto out_free_ind;
+ }
+ }
+
+@@ -2761,12 +2949,21 @@ int register_netdevice(struct net_device
+ set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+ dev->next = NULL;
++ dev->owner_env = get_exec_env();
++ dev->orig_mtu = dev->mtu;
++ netdev_bc(dev)->owner_ub = get_beancounter(get_exec_ub());
++ netdev_bc(dev)->exec_ub = get_beancounter(get_exec_ub());
+ dev_init_scheduler(dev);
++#ifdef CONFIG_VE
++ if (ve_is_super(get_exec_env()))
++ list_add_tail(&dev->dev_global_list_entry, &dev_global_list);
++#endif
+ write_lock_bh(&dev_base_lock);
+ *dev_tail = dev;
+ dev_tail = &dev->next;
+ hlist_add_head(&dev->name_hlist, head);
+- hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
++ hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex,
++ get_exec_env()));
+ dev_hold(dev);
+ dev->reg_state = NETREG_REGISTERING;
+ write_unlock_bh(&dev_base_lock);
+@@ -2780,7 +2977,9 @@ int register_netdevice(struct net_device
+
+ out:
+ return ret;
+-out_err:
++out_free_ind:
++ dev_free_index(dev);
++out_free_div:
+ free_divert_blk(dev);
+ goto out;
+ }
+@@ -2826,6 +3025,10 @@ int register_netdev(struct net_device *d
+ err = register_netdevice(dev);
+ out:
+ rtnl_unlock();
++ if (err == 0 && dev->reg_state != NETREG_REGISTERED) {
++ unregister_netdev(dev);
++ err = -ENOMEM;
++ }
+ return err;
+ }
+ EXPORT_SYMBOL(register_netdev);
+@@ -2908,6 +3111,7 @@ void netdev_run_todo(void)
+ {
+ struct list_head list = LIST_HEAD_INIT(list);
+ int err;
++ struct ve_struct *current_env;
+
+
+ /* Need to guard against multiple cpu's getting out of order. */
+@@ -2926,22 +3130,30 @@ void netdev_run_todo(void)
+ list_splice_init(&net_todo_list, &list);
+ spin_unlock(&net_todo_list_lock);
+
++ current_env = get_exec_env();
+ while (!list_empty(&list)) {
+ struct net_device *dev
+ = list_entry(list.next, struct net_device, todo_list);
+ list_del(&dev->todo_list);
+
++ (void)set_exec_env(dev->owner_env);
+ switch(dev->reg_state) {
+ case NETREG_REGISTERING:
+ err = netdev_register_sysfs(dev);
+- if (err)
++ if (err) {
+ printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
+ dev->name, err);
++ dev->reg_state = NETREG_REGISTER_ERR;
++ break;
++ }
+ dev->reg_state = NETREG_REGISTERED;
+ break;
+
+ case NETREG_UNREGISTERING:
+ netdev_unregister_sysfs(dev);
++ /* fall through */
++
++ case NETREG_REGISTER_ERR:
+ dev->reg_state = NETREG_UNREGISTERED;
+
+ netdev_wait_allrefs(dev);
+@@ -2952,6 +3164,10 @@ void netdev_run_todo(void)
+ BUG_TRAP(!dev->ip6_ptr);
+ BUG_TRAP(!dev->dn_ptr);
+
++ put_beancounter(netdev_bc(dev)->exec_ub);
++ put_beancounter(netdev_bc(dev)->owner_ub);
++ netdev_bc(dev)->exec_ub = NULL;
++ netdev_bc(dev)->owner_ub = NULL;
+
+ /* It must be the very last action,
+ * after this 'dev' may point to freed up memory.
+@@ -2966,6 +3182,7 @@ void netdev_run_todo(void)
+ break;
+ }
+ }
++ (void)set_exec_env(current_env);
+
+ out:
+ up(&net_todo_run_mutex);
+@@ -2991,7 +3208,7 @@ struct net_device *alloc_netdev(int size
+ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+ alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+
+- p = kmalloc(alloc_size, GFP_KERNEL);
++ p = ub_kmalloc(alloc_size, GFP_KERNEL);
+ if (!p) {
+ printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+ return NULL;
+@@ -3071,7 +3288,8 @@ int unregister_netdevice(struct net_devi
+ return -ENODEV;
+ }
+
+- BUG_ON(dev->reg_state != NETREG_REGISTERED);
++ BUG_ON(dev->reg_state != NETREG_REGISTERED &&
++ dev->reg_state != NETREG_REGISTER_ERR);
+
+ /* If device is running, close it first. */
+ if (dev->flags & IFF_UP)
+@@ -3087,6 +3305,10 @@ int unregister_netdevice(struct net_devi
+ dev_tail = dp;
+ *dp = d->next;
+ write_unlock_bh(&dev_base_lock);
++#ifdef CONFIG_VE
++ if (ve_is_super(get_exec_env()))
++ list_del(&dev->dev_global_list_entry);
++#endif
+ break;
+ }
+ }
+@@ -3096,7 +3318,8 @@ int unregister_netdevice(struct net_devi
+ return -ENODEV;
+ }
+
+- dev->reg_state = NETREG_UNREGISTERING;
++ if (dev->reg_state != NETREG_REGISTER_ERR)
++ dev->reg_state = NETREG_UNREGISTERING;
+
+ synchronize_net();
+
+@@ -3120,6 +3343,8 @@ int unregister_netdevice(struct net_devi
+ /* Notifier chain MUST detach us from master device. */
+ BUG_TRAP(!dev->master);
+
++ dev_free_index(dev);
++
+ free_divert_blk(dev);
+
+ /* Finish processing unregister after unlock */
+@@ -3277,6 +3502,8 @@ EXPORT_SYMBOL(dev_get_by_flags);
+ EXPORT_SYMBOL(dev_get_by_index);
+ EXPORT_SYMBOL(dev_get_by_name);
+ EXPORT_SYMBOL(dev_ioctl);
++EXPORT_SYMBOL(dev_name_hash);
++EXPORT_SYMBOL(dev_index_hash);
+ EXPORT_SYMBOL(dev_open);
+ EXPORT_SYMBOL(dev_queue_xmit);
+ EXPORT_SYMBOL(dev_remove_pack);
+diff -uprN linux-2.6.15.orig/net/core/dev_mcast.c linux-2.6.15-ve025stab014/net/core/dev_mcast.c
+--- linux-2.6.15.orig/net/core/dev_mcast.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/dev_mcast.c 2006-01-27 14:48:08.000000000 +0300
+@@ -290,9 +290,10 @@ static struct file_operations dev_mc_seq
+
+ void __init dev_mcast_init(void)
+ {
+- proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
++ proc_glob_fops_create("net/dev_mcast", 0, &dev_mc_seq_fops);
+ }
+
+ EXPORT_SYMBOL(dev_mc_add);
+ EXPORT_SYMBOL(dev_mc_delete);
+ EXPORT_SYMBOL(dev_mc_upload);
++EXPORT_SYMBOL(dev_mc_discard);
+diff -uprN linux-2.6.15.orig/net/core/dst.c linux-2.6.15-ve025stab014/net/core/dst.c
+--- linux-2.6.15.orig/net/core/dst.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/dst.c 2006-01-27 14:48:08.000000000 +0300
+@@ -241,13 +241,13 @@ static inline void dst_ifdown(struct dst
+ dst->input = dst_discard_in;
+ dst->output = dst_discard_out;
+ } else {
+- dst->dev = &loopback_dev;
+- dev_hold(&loopback_dev);
++ dst->dev = &visible_loopback_dev;
++ dev_hold(&visible_loopback_dev);
+ dev_put(dev);
+ if (dst->neighbour && dst->neighbour->dev == dev) {
+- dst->neighbour->dev = &loopback_dev;
++ dst->neighbour->dev = &visible_loopback_dev;
+ dev_put(dev);
+- dev_hold(&loopback_dev);
++ dev_hold(&visible_loopback_dev);
+ }
+ }
+ }
+@@ -260,11 +260,14 @@ static int dst_dev_event(struct notifier
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ case NETDEV_DOWN:
+- spin_lock_bh(&dst_lock);
++ local_bh_disable();
++ dst_run_gc(0);
++ spin_lock(&dst_lock);
+ for (dst = dst_garbage_list; dst; dst = dst->next) {
+ dst_ifdown(dst, dev, event != NETDEV_DOWN);
+ }
+- spin_unlock_bh(&dst_lock);
++ spin_unlock(&dst_lock);
++ local_bh_enable();
+ break;
+ }
+ return NOTIFY_DONE;
+diff -uprN linux-2.6.15.orig/net/core/filter.c linux-2.6.15-ve025stab014/net/core/filter.c
+--- linux-2.6.15.orig/net/core/filter.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/filter.c 2006-01-27 14:48:06.000000000 +0300
+@@ -363,7 +363,7 @@ int sk_attach_filter(struct sock_fprog *
+ if (fprog->filter == NULL)
+ return -EINVAL;
+
+- fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
++ fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL_UBC);
+ if (!fp)
+ return -ENOMEM;
+ if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+diff -uprN linux-2.6.15.orig/net/core/neighbour.c linux-2.6.15-ve025stab014/net/core/neighbour.c
+--- linux-2.6.15.orig/net/core/neighbour.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/neighbour.c 2006-01-27 14:48:08.000000000 +0300
+@@ -727,6 +727,11 @@ static void neigh_timer_handler(unsigned
+ struct neighbour *neigh = (struct neighbour *)arg;
+ unsigned state;
+ int notify = 0;
++ struct ve_struct *env;
++ struct user_beancounter *ub;
++
++ env = set_exec_env(neigh->dev->owner_env);
++ ub = set_exec_ub(netdev_bc(neigh->dev)->exec_ub);
+
+ write_lock(&neigh->lock);
+
+@@ -824,6 +829,8 @@ out:
+ neigh_app_notify(neigh);
+ #endif
+ neigh_release(neigh);
++ (void)set_exec_ub(ub);
++ (void)set_exec_env(env);
+ }
+
+ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+@@ -1213,6 +1220,12 @@ static void neigh_proxy_process(unsigned
+ skb = skb->next;
+ if (tdif <= 0) {
+ struct net_device *dev = back->dev;
++ struct ve_struct *env;
++ struct user_beancounter *ub;
++
++ env = set_exec_env(dev->owner_env);
++ ub = set_exec_ub(netdev_bc(dev)->exec_ub);
++
+ __skb_unlink(back, &tbl->proxy_queue);
+ if (tbl->proxy_redo && netif_running(dev))
+ tbl->proxy_redo(back);
+@@ -1220,6 +1233,9 @@ static void neigh_proxy_process(unsigned
+ kfree_skb(back);
+
+ dev_put(dev);
++
++ (void)set_exec_ub(ub);
++ (void)set_exec_env(env);
+ } else if (!sched_next || tdif < sched_next)
+ sched_next = tdif;
+ }
+@@ -1424,6 +1440,9 @@ int neigh_delete(struct sk_buff *skb, st
+ struct net_device *dev = NULL;
+ int err = -ENODEV;
+
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
++
+ if (ndm->ndm_ifindex &&
+ (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ goto out;
+@@ -1475,6 +1494,9 @@ int neigh_add(struct sk_buff *skb, struc
+ struct net_device *dev = NULL;
+ int err = -ENODEV;
+
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
++
+ if (ndm->ndm_ifindex &&
+ (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ goto out;
+@@ -1936,6 +1958,9 @@ int neigh_dump_info(struct sk_buff *skb,
+ struct neigh_table *tbl;
+ int t, family, s_t;
+
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
++
+ read_lock(&neigh_tbl_lock);
+ family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+ s_t = cb->args[0];
+@@ -2530,11 +2555,17 @@ int neigh_sysctl_register(struct net_dev
+ int p_id, int pdev_id, char *p_name,
+ proc_handler *handler, ctl_handler *strategy)
+ {
+- struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
++ struct neigh_sysctl_table *t;
+ const char *dev_name_source = NULL;
+ char *dev_name = NULL;
+ int err = 0;
+
++ /* This function is called from VExx only from devinet_init,
++ and it is does not matter what is returned */
++ if (!ve_is_super(get_exec_env()))
++ return 0;
++
++ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return -ENOBUFS;
+ memcpy(t, &neigh_sysctl_template, sizeof(*t));
+@@ -2625,6 +2656,8 @@ int neigh_sysctl_register(struct net_dev
+
+ void neigh_sysctl_unregister(struct neigh_parms *p)
+ {
++ if (!ve_is_super(get_exec_env()))
++ return;
+ if (p->sysctl_table) {
+ struct neigh_sysctl_table *t = p->sysctl_table;
+ p->sysctl_table = NULL;
+diff -uprN linux-2.6.15.orig/net/core/net-sysfs.c linux-2.6.15-ve025stab014/net/core/net-sysfs.c
+--- linux-2.6.15.orig/net/core/net-sysfs.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/net-sysfs.c 2006-01-27 14:48:08.000000000 +0300
+@@ -399,7 +399,8 @@ static void netdev_release(struct class_
+ struct net_device *dev
+ = container_of(cd, struct net_device, class_dev);
+
+- BUG_ON(dev->reg_state != NETREG_RELEASED);
++ BUG_ON(dev->reg_state != NETREG_RELEASED &&
++ dev->reg_state != NETREG_REGISTERING);
+
+ kfree((char *)dev - dev->padded);
+ }
+@@ -411,6 +412,13 @@ static struct class net_class = {
+ .hotplug = netdev_hotplug,
+ #endif
+ };
++EXPORT_SYMBOL(net_class);
++
++#ifndef CONFIG_VE
++#define visible_net_class net_class
++#else
++#define visible_net_class (*get_exec_env()->net_class)
++#endif
+
+ void netdev_unregister_sysfs(struct net_device * net)
+ {
+@@ -435,7 +443,7 @@ int netdev_register_sysfs(struct net_dev
+ struct class_device_attribute *attr;
+ int ret;
+
+- class_dev->class = &net_class;
++ class_dev->class = &visible_net_class;
+ class_dev->class_data = net;
+
+ strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE);
+@@ -468,12 +476,21 @@ out_cleanup:
+ out_unreg:
+ printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
+ net->name, ret);
+- class_device_unregister(class_dev);
++ /* put is called in free_netdev() */
++ class_device_del(class_dev);
+ out:
+ return ret;
+ }
+
++void prepare_sysfs_netdev(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->net_class = &net_class;
++#endif
++}
++
+ int netdev_sysfs_init(void)
+ {
++ prepare_sysfs_netdev();
+ return class_register(&net_class);
+ }
+diff -uprN linux-2.6.15.orig/net/core/rtnetlink.c linux-2.6.15-ve025stab014/net/core/rtnetlink.c
+--- linux-2.6.15.orig/net/core/rtnetlink.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/rtnetlink.c 2006-01-27 14:48:08.000000000 +0300
+@@ -434,6 +434,8 @@ static int rtnetlink_dump_all(struct sk_
+ if (rtnetlink_links[idx] == NULL ||
+ rtnetlink_links[idx][type].dumpit == NULL)
+ continue;
++ if (vz_security_proto_check(idx, 0, 0))
++ continue;
+ if (idx > s_idx)
+ memset(&cb->args[0], 0, sizeof(cb->args));
+ if (rtnetlink_links[idx][type].dumpit(skb, cb))
+@@ -501,7 +503,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
+ return 0;
+
+ family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
+- if (family >= NPROTO) {
++ if (family >= NPROTO || vz_security_proto_check(family, 0, 0)) {
+ *errp = -EAFNOSUPPORT;
+ return -1;
+ }
+diff -uprN linux-2.6.15.orig/net/core/scm.c linux-2.6.15-ve025stab014/net/core/scm.c
+--- linux-2.6.15.orig/net/core/scm.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/scm.c 2006-01-27 14:48:08.000000000 +0300
+@@ -33,6 +33,7 @@
+ #include <net/compat.h>
+ #include <net/scm.h>
+
++#include <ub/ub_mem.h>
+
+ /*
+ * Only allow a user to send credentials, that they could set with
+@@ -41,7 +42,9 @@
+
+ static __inline__ int scm_check_creds(struct ucred *creds)
+ {
+- if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
++ if ((creds->pid == virt_tgid(current) ||
++ creds->pid == current->tgid ||
++ capable(CAP_VE_SYS_ADMIN)) &&
+ ((creds->uid == current->uid || creds->uid == current->euid ||
+ creds->uid == current->suid) || capable(CAP_SETUID)) &&
+ ((creds->gid == current->gid || creds->gid == current->egid ||
+@@ -68,7 +71,7 @@ static int scm_fp_copy(struct cmsghdr *c
+
+ if (!fpl)
+ {
+- fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
++ fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+ if (!fpl)
+ return -ENOMEM;
+ *fplp = fpl;
+@@ -274,7 +277,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
+ if (!fpl)
+ return NULL;
+
+- new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
++ new_fpl = ub_kmalloc(sizeof(*fpl), GFP_KERNEL);
+ if (new_fpl) {
+ for (i=fpl->count-1; i>=0; i--)
+ get_file(fpl->fp[i]);
+diff -uprN linux-2.6.15.orig/net/core/skbuff.c linux-2.6.15-ve025stab014/net/core/skbuff.c
+--- linux-2.6.15.orig/net/core/skbuff.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/skbuff.c 2006-01-27 14:48:08.000000000 +0300
+@@ -48,6 +48,7 @@
+ #include <linux/in.h>
+ #include <linux/inet.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/netdevice.h>
+ #ifdef CONFIG_NET_CLS_ACT
+ #include <net/pkt_sched.h>
+@@ -68,6 +69,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+
++#include <ub/ub_net.h>
++
+ static kmem_cache_t *skbuff_head_cache __read_mostly;
+ static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+
+@@ -149,6 +152,9 @@ struct sk_buff *__alloc_skb(unsigned int
+ if (!skb)
+ goto out;
+
++ if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
++ goto nobc;
++
+ /* Get the DATA. Size must match skb_add_mtu(). */
+ size = SKB_DATA_ALIGN(size);
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+@@ -162,6 +168,7 @@ struct sk_buff *__alloc_skb(unsigned int
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
++ SET_VE_OWNER_SKB(skb, get_exec_env());
+ if (fclone) {
+ struct sk_buff *child = skb + 1;
+ atomic_t *fclone_ref = (atomic_t *) (child + 1);
+@@ -181,6 +188,8 @@ struct sk_buff *__alloc_skb(unsigned int
+ out:
+ return skb;
+ nodata:
++ ub_skb_free_bc(skb);
++nobc:
+ kmem_cache_free(skbuff_head_cache, skb);
+ skb = NULL;
+ goto out;
+@@ -213,6 +222,9 @@ struct sk_buff *alloc_skb_from_cache(kme
+ if (!skb)
+ goto out;
+
++ if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
++ goto nobc;
++
+ /* Get the DATA. */
+ size = SKB_DATA_ALIGN(size);
+ data = kmem_cache_alloc(cp, gfp_mask);
+@@ -226,6 +238,7 @@ struct sk_buff *alloc_skb_from_cache(kme
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
++ SET_VE_OWNER_SKB(skb, get_exec_env());
+
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+@@ -235,6 +248,8 @@ struct sk_buff *alloc_skb_from_cache(kme
+ out:
+ return skb;
+ nodata:
++ ub_skb_free_bc(skb);
++nobc:
+ kmem_cache_free(skbuff_head_cache, skb);
+ skb = NULL;
+ goto out;
+@@ -289,6 +304,7 @@ void kfree_skbmem(struct sk_buff *skb)
+ atomic_t *fclone_ref;
+
+ skb_release_data(skb);
++ ub_skb_free_bc(skb);
+ switch (skb->fclone) {
+ case SKB_FCLONE_UNAVAILABLE:
+ kmem_cache_free(skbuff_head_cache, skb);
+@@ -330,6 +346,7 @@ void __kfree_skb(struct sk_buff *skb)
+ #ifdef CONFIG_XFRM
+ secpath_put(skb->sp);
+ #endif
++ ub_skb_uncharge(skb);
+ if (skb->destructor) {
+ WARN_ON(in_irq());
+ skb->destructor(skb);
+@@ -385,6 +402,11 @@ struct sk_buff *skb_clone(struct sk_buff
+ n->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
+
++ if (ub_skb_alloc_bc(n, gfp_mask)) {
++ kmem_cache_free(skbuff_head_cache, n);
++ return NULL;
++ }
++
+ #define C(x) n->x = skb->x
+
+ n->next = n->prev = NULL;
+@@ -411,6 +433,7 @@ struct sk_buff *skb_clone(struct sk_buff
+ C(ip_summed);
+ C(priority);
+ C(protocol);
++ SET_VE_OWNER_SKB(n, VE_OWNER_SKB(skb));
+ n->destructor = NULL;
+ #ifdef CONFIG_NETFILTER
+ C(nfmark);
+diff -uprN linux-2.6.15.orig/net/core/sock.c linux-2.6.15-ve025stab014/net/core/sock.c
+--- linux-2.6.15.orig/net/core/sock.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/sock.c 2006-01-27 14:48:08.000000000 +0300
+@@ -107,6 +107,7 @@
+ #include <linux/net.h>
+ #include <linux/mm.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/interrupt.h>
+ #include <linux/poll.h>
+ #include <linux/tcp.h>
+@@ -123,6 +124,9 @@
+ #include <net/xfrm.h>
+ #include <linux/ipsec.h>
+
++#include <ub/ub_net.h>
++#include <ub/beancounter.h>
++
+ #include <linux/filter.h>
+
+ #ifdef CONFIG_INET
+@@ -171,7 +175,7 @@ static void sock_warn_obsolete_bsdism(co
+ static char warncomm[TASK_COMM_LEN];
+ if (strcmp(warncomm, current->comm) && warned < 5) {
+ strcpy(warncomm, current->comm);
+- printk(KERN_WARNING "process `%s' is using obsolete "
++ ve_printk(VE_LOG, KERN_WARNING "process `%s' is using obsolete "
+ "%s SO_BSDCOMPAT\n", warncomm, name);
+ warned++;
+ }
+@@ -658,6 +662,7 @@ struct sock *sk_alloc(int family, gfp_t
+ */
+ sk->sk_prot = sk->sk_prot_creator = prot;
+ sock_lock_init(sk);
++ SET_VE_OWNER_SK(sk, get_exec_env());
+ }
+
+ if (security_sk_alloc(sk, family, priority))
+@@ -697,6 +702,7 @@ void sk_free(struct sock *sk)
+ __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+
+ security_sk_free(sk);
++ ub_sock_uncharge(sk);
+ if (sk->sk_prot_creator->slab != NULL)
+ kmem_cache_free(sk->sk_prot_creator->slab, sk);
+ else
+@@ -713,6 +719,11 @@ struct sock *sk_clone(const struct sock
+
+ memcpy(newsk, sk, sk->sk_prot->obj_size);
+
++ if (ub_sock_charge(newsk, sk->sk_family, sk->sk_type) < 0) {
++ sk_free(newsk);
++ return NULL;
++ }
++
+ /* SANITY */
+ sk_node_init(&newsk->sk_node);
+ sock_lock_init(newsk);
+@@ -933,14 +944,12 @@ static long sock_wait_for_wmem(struct so
+ /*
+ * Generic send/receive buffer handlers
+ */
+-
+-static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
+- unsigned long header_len,
+- unsigned long data_len,
+- int noblock, int *errcode)
++struct sk_buff *sock_alloc_send_skb2(struct sock *sk, unsigned long size,
++ unsigned long size2, int noblock,
++ int *errcode)
+ {
+ struct sk_buff *skb;
+- gfp_t gfp_mask;
++ unsigned int gfp_mask;
+ long timeo;
+ int err;
+
+@@ -958,46 +967,35 @@ static struct sk_buff *sock_alloc_send_p
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
+ goto failure;
+
+- if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+- skb = alloc_skb(header_len, sk->sk_allocation);
+- if (skb) {
+- int npages;
+- int i;
+-
+- /* No pages, we're done... */
+- if (!data_len)
+- break;
+-
+- npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+- skb->truesize += data_len;
+- skb_shinfo(skb)->nr_frags = npages;
+- for (i = 0; i < npages; i++) {
+- struct page *page;
+- skb_frag_t *frag;
+-
+- page = alloc_pages(sk->sk_allocation, 0);
+- if (!page) {
+- err = -ENOBUFS;
+- skb_shinfo(skb)->nr_frags = i;
+- kfree_skb(skb);
+- goto failure;
+- }
+-
+- frag = &skb_shinfo(skb)->frags[i];
+- frag->page = page;
+- frag->page_offset = 0;
+- frag->size = (data_len >= PAGE_SIZE ?
+- PAGE_SIZE :
+- data_len);
+- data_len -= PAGE_SIZE;
+- }
++ if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
++ if (size2 < size) {
++ size = size2;
++ continue;
++ }
++ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
++ err = -EAGAIN;
++ if (!timeo)
++ goto failure;
++ if (signal_pending(current))
++ goto interrupted;
++ timeo = ub_sock_wait_for_space(sk, timeo,
++ skb_charge_size(size));
++ continue;
++ }
+
++ if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
++ skb = alloc_skb(size, sk->sk_allocation);
++ if (skb)
+ /* Full success... */
+ break;
+- }
++ ub_sock_retwres_other(sk, skb_charge_size(size),
++ SOCK_MIN_UBCSPACE_CH);
+ err = -ENOBUFS;
+ goto failure;
+ }
++ ub_sock_retwres_other(sk,
++ skb_charge_size(size),
++ SOCK_MIN_UBCSPACE_CH);
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ err = -EAGAIN;
+@@ -1008,6 +1006,7 @@ static struct sk_buff *sock_alloc_send_p
+ timeo = sock_wait_for_wmem(sk, timeo);
+ }
+
++ ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
+ skb_set_owner_w(skb, sk);
+ return skb;
+
+@@ -1021,7 +1020,7 @@ failure:
+ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
+ int noblock, int *errcode)
+ {
+- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
++ return sock_alloc_send_skb2(sk, size, size, noblock, errcode);
+ }
+
+ static void __lock_sock(struct sock *sk)
+@@ -1461,7 +1460,8 @@ int proto_register(struct proto *prot, i
+
+ if (alloc_slab) {
+ prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
+- SLAB_HWCACHE_ALIGN, NULL, NULL);
++ SLAB_HWCACHE_ALIGN | SLAB_UBC,
++ NULL, NULL);
+
+ if (prot->slab == NULL) {
+ printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
+@@ -1477,9 +1477,11 @@ int proto_register(struct proto *prot, i
+ goto out_free_sock_slab;
+
+ sprintf(request_sock_slab_name, mask, prot->name);
+- prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
+- prot->rsk_prot->obj_size, 0,
+- SLAB_HWCACHE_ALIGN, NULL, NULL);
++ prot->rsk_prot->slab =
++ kmem_cache_create(request_sock_slab_name,
++ prot->rsk_prot->obj_size, 0,
++ SLAB_HWCACHE_ALIGN | SLAB_UBC,
++ NULL, NULL);
+
+ if (prot->rsk_prot->slab == NULL) {
+ printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
+@@ -1497,10 +1499,11 @@ int proto_register(struct proto *prot, i
+ goto out_free_request_sock_slab;
+
+ sprintf(timewait_sock_slab_name, mask, prot->name);
+- prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
+- prot->twsk_obj_size,
+- 0, SLAB_HWCACHE_ALIGN,
+- NULL, NULL);
++ prot->twsk_slab =
++ kmem_cache_create(timewait_sock_slab_name,
++ prot->twsk_obj_size, 0,
++ SLAB_HWCACHE_ALIGN | SLAB_UBC,
++ NULL, NULL);
+ if (prot->twsk_slab == NULL)
+ goto out_free_timewait_sock_slab_name;
+ }
+diff -uprN linux-2.6.15.orig/net/core/stream.c linux-2.6.15-ve025stab014/net/core/stream.c
+--- linux-2.6.15.orig/net/core/stream.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/core/stream.c 2006-01-27 14:48:06.000000000 +0300
+@@ -109,8 +109,9 @@ EXPORT_SYMBOL(sk_stream_wait_close);
+ * sk_stream_wait_memory - Wait for more memory for a socket
+ * @sk: socket to wait for memory
+ * @timeo_p: for how long
++ * @amount - amount of memory to wait for (in UB space!)
+ */
+-int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
++int sk_stream_wait_memory(struct sock *sk, long *timeo_p, unsigned long amount)
+ {
+ int err = 0;
+ long vm_wait = 0;
+@@ -132,14 +133,19 @@ int sk_stream_wait_memory(struct sock *s
+ if (signal_pending(current))
+ goto do_interrupted;
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+- if (sk_stream_memory_free(sk) && !vm_wait)
+- break;
++ if (amount == 0) {
++ if (sk_stream_memory_free(sk) && !vm_wait)
++ break;
++ } else
++ ub_sock_sndqueueadd_tcp(sk, amount);
+
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk->sk_write_pending++;
+ sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
+ vm_wait);
+ sk->sk_write_pending--;
++ if (amount > 0)
++ ub_sock_sndqueuedel(sk);
+
+ if (vm_wait) {
+ vm_wait -= current_timeo;
+diff -uprN linux-2.6.15.orig/net/dccp/ipv4.c linux-2.6.15-ve025stab014/net/dccp/ipv4.c
+--- linux-2.6.15.orig/net/dccp/ipv4.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/dccp/ipv4.c 2006-01-27 14:48:08.000000000 +0300
+@@ -60,11 +60,16 @@ static int __dccp_v4_check_established(s
+ const int dif = sk->sk_bound_dev_if;
+ INET_ADDR_COOKIE(acookie, saddr, daddr)
+ const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+- struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash);
++ unsigned int hash;
++ struct inet_ehash_bucket *head;
+ const struct sock *sk2;
+ const struct hlist_node *node;
+ struct inet_timewait_sock *tw;
++ struct ve_struct *env;
++
++ env = VE_OWNER_SK(sk);
++ hash = inet_ehashfn(daddr, lport, saddr, inet->dport, VEID(env));
++ head = inet_ehash_bucket(&tcp_hashinfo, hash);
+
+ prefetch(head->chain.first);
+ write_lock(&head->lock);
+@@ -73,14 +78,16 @@ static int __dccp_v4_check_established(s
+ sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
+ tw = inet_twsk(sk2);
+
+- if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
++ if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr,
++ ports, dif, env))
+ goto not_unique;
+ }
+ tw = NULL;
+
+ /* And established part... */
+ sk_for_each(sk2, node, &head->chain) {
+- if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
++ if (INET_MATCH(sk2, hash, acookie, saddr, daddr,
++ ports, dif, env))
+ goto not_unique;
+ }
+
+@@ -133,7 +140,8 @@ static int dccp_v4_hash_connect(struct s
+ local_bh_disable();
+ do {
+ head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
+- dccp_hashinfo.bhash_size)];
++ dccp_hashinfo.bhash_size,
++ 0)];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+@@ -154,7 +162,7 @@ static int dccp_v4_hash_connect(struct s
+ }
+
+ tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
+- head, rover);
++ head, rover, get_exec_env());
+ if (tb == NULL) {
+ spin_unlock(&head->lock);
+ break;
+@@ -191,7 +199,7 @@ ok:
+ }
+
+ head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
+- dccp_hashinfo.bhash_size)];
++ dccp_hashinfo.bhash_size, 0)];
+ tb = inet_csk(sk)->icsk_bind_hash;
+ spin_lock_bh(&head->lock);
+ if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
+diff -uprN linux-2.6.15.orig/net/ipv4/af_inet.c linux-2.6.15-ve025stab014/net/ipv4/af_inet.c
+--- linux-2.6.15.orig/net/ipv4/af_inet.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/af_inet.c 2006-01-27 14:48:08.000000000 +0300
+@@ -112,6 +112,7 @@
+ #ifdef CONFIG_IP_MROUTE
+ #include <linux/mroute.h>
+ #endif
++#include <ub/ub_net.h>
+
+ DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
+
+@@ -296,6 +297,13 @@ lookup_protocol:
+ if (sk == NULL)
+ goto out;
+
++ err = -ENOBUFS;
++ if (ub_sock_charge(sk, PF_INET, sock->type))
++ goto out_sk_free;
++ /* if charge was successful, sock_init_data() MUST be called to
++ * set sk->sk_type. otherwise sk will be uncharged to wrong resource
++ */
++
+ err = 0;
+ sk->sk_no_check = answer_no_check;
+ if (INET_PROTOSW_REUSE & answer_flags)
+@@ -352,6 +360,9 @@ out:
+ out_rcu_unlock:
+ rcu_read_unlock();
+ goto out;
++out_sk_free:
++ sk_free(sk);
++ return err;
+ }
+
+
+@@ -366,6 +377,9 @@ int inet_release(struct socket *sock)
+
+ if (sk) {
+ long timeout;
++ struct ve_struct *saved_env;
++
++ saved_env = set_exec_env(VE_OWNER_SK(sk));
+
+ /* Applications forget to leave groups before exiting */
+ ip_mc_drop_socket(sk);
+@@ -383,6 +397,8 @@ int inet_release(struct socket *sock)
+ timeout = sk->sk_lingertime;
+ sock->sk = NULL;
+ sk->sk_prot->close(sk, timeout);
++
++ (void)set_exec_env(saved_env);
+ }
+ return 0;
+ }
+@@ -1104,20 +1120,20 @@ static struct net_protocol icmp_protocol
+
+ static int __init init_ipv4_mibs(void)
+ {
+- net_statistics[0] = alloc_percpu(struct linux_mib);
+- net_statistics[1] = alloc_percpu(struct linux_mib);
+- ip_statistics[0] = alloc_percpu(struct ipstats_mib);
+- ip_statistics[1] = alloc_percpu(struct ipstats_mib);
+- icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+- icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+- tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+- tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+- udp_statistics[0] = alloc_percpu(struct udp_mib);
+- udp_statistics[1] = alloc_percpu(struct udp_mib);
++ ve_net_statistics[0] = alloc_percpu(struct linux_mib);
++ ve_net_statistics[1] = alloc_percpu(struct linux_mib);
++ ve_ip_statistics[0] = alloc_percpu(struct ipstats_mib);
++ ve_ip_statistics[1] = alloc_percpu(struct ipstats_mib);
++ ve_icmp_statistics[0] = alloc_percpu(struct icmp_mib);
++ ve_icmp_statistics[1] = alloc_percpu(struct icmp_mib);
++ ve_tcp_statistics[0] = alloc_percpu(struct tcp_mib);
++ ve_tcp_statistics[1] = alloc_percpu(struct tcp_mib);
++ ve_udp_statistics[0] = alloc_percpu(struct udp_mib);
++ ve_udp_statistics[1] = alloc_percpu(struct udp_mib);
+ if (!
+- (net_statistics[0] && net_statistics[1] && ip_statistics[0]
+- && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
+- && udp_statistics[0] && udp_statistics[1]))
++ (ve_net_statistics[0] && ve_net_statistics[1] && ve_ip_statistics[0]
++ && ve_ip_statistics[1] && ve_tcp_statistics[0] && ve_tcp_statistics[1]
++ && ve_udp_statistics[0] && ve_udp_statistics[1]))
+ return -ENOMEM;
+
+ (void) tcp_mib_init();
+diff -uprN linux-2.6.15.orig/net/ipv4/arp.c linux-2.6.15-ve025stab014/net/ipv4/arp.c
+--- linux-2.6.15.orig/net/ipv4/arp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/arp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -986,7 +986,7 @@ static int arp_req_set(struct arpreq *r,
+ return 0;
+ }
+ if (dev == NULL) {
+- ipv4_devconf.proxy_arp = 1;
++ ve_ipv4_devconf.proxy_arp = 1;
+ return 0;
+ }
+ if (__in_dev_get_rtnl(dev)) {
+@@ -1092,7 +1092,7 @@ static int arp_req_delete(struct arpreq
+ return pneigh_delete(&arp_tbl, &ip, dev);
+ if (mask == 0) {
+ if (dev == NULL) {
+- ipv4_devconf.proxy_arp = 0;
++ ve_ipv4_devconf.proxy_arp = 0;
+ return 0;
+ }
+ if (__in_dev_get_rtnl(dev)) {
+@@ -1143,6 +1143,8 @@ int arp_ioctl(unsigned int cmd, void __u
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ case SIOCGARP:
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
+ err = copy_from_user(&r, arg, sizeof(struct arpreq));
+ if (err)
+ return -EFAULT;
+@@ -1370,8 +1372,12 @@ static int arp_seq_open(struct inode *in
+ {
+ struct seq_file *seq;
+ int rc = -ENOMEM;
+- struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+-
++ struct neigh_seq_state *s;
++
++ if (!ve_is_super(get_exec_env()))
++ return -EPERM;
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ goto out;
+
+@@ -1399,7 +1405,7 @@ static struct file_operations arp_seq_fo
+
+ static int __init arp_proc_init(void)
+ {
+- if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops))
++ if (!proc_glob_fops_create("net/arp", S_IRUGO, &arp_seq_fops))
+ return -ENOMEM;
+ return 0;
+ }
+diff -uprN linux-2.6.15.orig/net/ipv4/devinet.c linux-2.6.15-ve025stab014/net/ipv4/devinet.c
+--- linux-2.6.15.orig/net/ipv4/devinet.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/devinet.c 2006-01-27 14:48:08.000000000 +0300
+@@ -69,7 +69,7 @@ struct ipv4_devconf ipv4_devconf = {
+ .shared_media = 1,
+ };
+
+-static struct ipv4_devconf ipv4_devconf_dflt = {
++struct ipv4_devconf ipv4_devconf_dflt = {
+ .accept_redirects = 1,
+ .send_redirects = 1,
+ .secure_redirects = 1,
+@@ -77,10 +77,16 @@ static struct ipv4_devconf ipv4_devconf_
+ .accept_source_route = 1,
+ };
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv4_devconf_dflt (*(get_exec_env()->_ipv4_devconf_dflt))
++#else
++#define ve_ipv4_devconf_dflt ipv4_devconf_dflt
++#endif
++
+ static void rtmsg_ifa(int event, struct in_ifaddr *);
+
+ static struct notifier_block *inetaddr_chain;
+-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
++void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ int destroy);
+ #ifdef CONFIG_SYSCTL
+ static void devinet_sysctl_register(struct in_device *in_dev,
+@@ -230,7 +236,7 @@ int inet_addr_onlink(struct in_device *i
+ return 0;
+ }
+
+-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
++void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ int destroy)
+ {
+ struct in_ifaddr *promote = NULL;
+@@ -576,7 +582,7 @@ int devinet_ioctl(unsigned int cmd, void
+
+ case SIOCSIFFLAGS:
+ ret = -EACCES;
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ goto out;
+ break;
+ case SIOCSIFADDR: /* Set interface address (and family) */
+@@ -584,7 +590,7 @@ int devinet_ioctl(unsigned int cmd, void
+ case SIOCSIFDSTADDR: /* Set the destination address */
+ case SIOCSIFNETMASK: /* Set the netmask for the interface */
+ ret = -EACCES;
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ goto out;
+ ret = -EINVAL;
+ if (sin->sin_family != AF_INET)
+@@ -1001,7 +1007,7 @@ static int inetdev_event(struct notifier
+ case NETDEV_UP:
+ if (dev->mtu < 68)
+ break;
+- if (dev == &loopback_dev) {
++ if (dev == &visible_loopback_dev) {
+ struct in_ifaddr *ifa;
+ if ((ifa = inet_alloc_ifa()) != NULL) {
+ ifa->ifa_local =
+@@ -1161,10 +1167,10 @@ static struct rtnetlink_link inet_rtnetl
+ void inet_forward_change(void)
+ {
+ struct net_device *dev;
+- int on = ipv4_devconf.forwarding;
++ int on = ve_ipv4_devconf.forwarding;
+
+- ipv4_devconf.accept_redirects = !on;
+- ipv4_devconf_dflt.forwarding = on;
++ ve_ipv4_devconf.accept_redirects = !on;
++ ve_ipv4_devconf_dflt.forwarding = on;
+
+ read_lock(&dev_base_lock);
+ for (dev = dev_base; dev; dev = dev->next) {
+@@ -1189,9 +1195,9 @@ static int devinet_sysctl_forward(ctl_ta
+ int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+ if (write && *valp != val) {
+- if (valp == &ipv4_devconf.forwarding)
++ if (valp == &ve_ipv4_devconf.forwarding)
+ inet_forward_change();
+- else if (valp != &ipv4_devconf_dflt.forwarding)
++ else if (valp != &ve_ipv4_devconf_dflt.forwarding)
+ rt_cache_flush(0);
+ }
+
+@@ -1462,30 +1468,22 @@ static struct devinet_sysctl_table {
+ },
+ };
+
+-static void devinet_sysctl_register(struct in_device *in_dev,
+- struct ipv4_devconf *p)
++static struct devinet_sysctl_table *__devinet_sysctl_register(char *dev_name,
++ int ifindex, struct ipv4_devconf *p)
+ {
+ int i;
+- struct net_device *dev = in_dev ? in_dev->dev : NULL;
+- struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+- char *dev_name = NULL;
++ struct devinet_sysctl_table *t;
+
++ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+- return;
++ goto out;
++
+ memcpy(t, &devinet_sysctl, sizeof(*t));
+ for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+ t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
+ t->devinet_vars[i].de = NULL;
+ }
+
+- if (dev) {
+- dev_name = dev->name;
+- t->devinet_dev[0].ctl_name = dev->ifindex;
+- } else {
+- dev_name = "default";
+- t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+- }
+-
+ /*
+ * Make a copy of dev_name, because '.procname' is regarded as const
+ * by sysctl and we wouldn't want anyone to change it under our feet
+@@ -1493,8 +1491,9 @@ static void devinet_sysctl_register(stru
+ */
+ dev_name = kstrdup(dev_name, GFP_KERNEL);
+ if (!dev_name)
+- goto free;
++ goto out_free_table;
+
++ t->devinet_dev[0].ctl_name = ifindex;
+ t->devinet_dev[0].procname = dev_name;
+ t->devinet_dev[0].child = t->devinet_vars;
+ t->devinet_dev[0].de = NULL;
+@@ -1507,17 +1506,38 @@ static void devinet_sysctl_register(stru
+
+ t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
+ if (!t->sysctl_header)
+- goto free_procname;
++ goto out_free_procname;
+
+- p->sysctl = t;
+- return;
++ return t;
+
+ /* error path */
+- free_procname:
++out_free_procname:
+ kfree(dev_name);
+- free:
++out_free_table:
+ kfree(t);
+- return;
++out:
++ printk(KERN_DEBUG "Can't register net/ipv4/conf sysctls.\n");
++ return NULL;
++}
++
++static void devinet_sysctl_register(struct in_device *in_dev,
++ struct ipv4_devconf *p)
++{
++ struct net_device *dev;
++ char *dev_name;
++ int ifindex;
++
++ dev = in_dev ? in_dev->dev : NULL;
++
++ if (dev) {
++ dev_name = dev->name;
++ ifindex = dev->ifindex;
++ } else {
++ dev_name = "default";
++ ifindex = NET_PROTO_CONF_DEFAULT;
++ }
++
++ p->sysctl = __devinet_sysctl_register(dev_name, ifindex, p);
+ }
+
+ static void devinet_sysctl_unregister(struct ipv4_devconf *p)
+@@ -1530,7 +1550,170 @@ static void devinet_sysctl_unregister(st
+ kfree(t);
+ }
+ }
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static ctl_table net_sysctl_tables[] = {
++ /* 0: net */
++ {
++ .ctl_name = CTL_NET,
++ .procname = "net",
++ .mode = 0555,
++ .child = &net_sysctl_tables[2],
++ },
++ { .ctl_name = 0, },
++ /* 2: net/ipv4 */
++ {
++ .ctl_name = NET_IPV4,
++ .procname = "ipv4",
++ .mode = 0555,
++ .child = &net_sysctl_tables[4],
++ },
++ { .ctl_name = 0, },
++ /* 4, 5: net/ipv4/[vars] */
++ {
++ .ctl_name = NET_IPV4_FORWARD,
++ .procname = "ip_forward",
++ .data = &ipv4_devconf.forwarding,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &ipv4_sysctl_forward,
++ .strategy = &ipv4_sysctl_forward_strategy,
++ },
++ {
++ .ctl_name = NET_IPV4_ROUTE,
++ .procname = "route",
++ .maxlen = 0,
++ .mode = 0555,
++ .child = &net_sysctl_tables[7],
++ },
++ { .ctl_name = 0 },
++ /* 7: net/ipv4/route/flush */
++ {
++ .ctl_name = NET_IPV4_ROUTE_FLUSH,
++ .procname = "flush",
++ .data = NULL, /* setuped below */
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &ipv4_sysctl_rtcache_flush,
++ .strategy = &ipv4_sysctl_rtcache_flush_strategy,
++ },
++ { .ctl_name = 0 },
++};
++
++static int ip_forward_sysctl_register(struct ve_struct *ve,
++ struct ipv4_devconf *p)
++{
++ struct ctl_table_header *hdr;
++ ctl_table *root;
++
++ root = clone_sysctl_template(net_sysctl_tables,
++ sizeof(net_sysctl_tables) / sizeof(ctl_table));
++ if (root == NULL)
++ goto out;
++
++ root[4].data = &p->forwarding;
++ root[7].data = &ipv4_flush_delay;
++
++ hdr = register_sysctl_table(root, 1);
++ if (hdr == NULL)
++ goto out_free;
++
++ ve->forward_header = hdr;
++ ve->forward_table = root;
++ return 0;
++
++out_free:
++ free_sysctl_clone(root);
++out:
++ return -ENOMEM;
++}
++
++static inline void ip_forward_sysctl_unregister(struct ve_struct *ve)
++{
++ unregister_sysctl_table(ve->forward_header);
++ ve->forward_header = NULL;
++}
++
++static inline void ip_forward_sysctl_free(struct ve_struct *ve)
++{
++ free_sysctl_clone(ve->forward_table);
++ ve->forward_table = NULL;
++}
++#endif
++#endif
++
++int devinet_sysctl_init(struct ve_struct *ve)
++{
++ int err = 0;
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ struct ipv4_devconf *conf, *conf_def;
++
++ err = -ENOMEM;
++
++ conf = kmalloc(sizeof(*conf), GFP_KERNEL);
++ if (!conf)
++ goto err1;
++
++ memcpy(conf, &ipv4_devconf, sizeof(*conf));
++ conf->sysctl = __devinet_sysctl_register("all",
++ NET_PROTO_CONF_ALL, conf);
++ if (!conf->sysctl)
++ goto err2;
++
++ conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
++ if (!conf_def)
++ goto err3;
++
++ memcpy(conf_def, &ipv4_devconf_dflt, sizeof(*conf_def));
++ conf_def->sysctl = __devinet_sysctl_register("default",
++ NET_PROTO_CONF_DEFAULT, conf_def);
++ if (!conf_def->sysctl)
++ goto err4;
++
++ err = ip_forward_sysctl_register(ve, conf);
++ if (err)
++ goto err5;
++
++ ve->_ipv4_devconf = conf;
++ ve->_ipv4_devconf_dflt = conf_def;
++ return 0;
++
++err5:
++ devinet_sysctl_unregister(conf_def);
++err4:
++ kfree(conf_def);
++err3:
++ devinet_sysctl_unregister(conf);
++err2:
++ kfree(conf);
++err1:
+ #endif
++#endif
++ return err;
++}
++
++void devinet_sysctl_fini(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ ip_forward_sysctl_unregister(ve);
++ devinet_sysctl_unregister(ve->_ipv4_devconf);
++ devinet_sysctl_unregister(ve->_ipv4_devconf_dflt);
++#endif
++#endif
++}
++
++void devinet_sysctl_free(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ ip_forward_sysctl_free(ve);
++ kfree(ve->_ipv4_devconf);
++ kfree(ve->_ipv4_devconf_dflt);
++#endif
++#endif
++}
+
+ void __init devinet_init(void)
+ {
+@@ -1540,13 +1723,18 @@ void __init devinet_init(void)
+ #ifdef CONFIG_SYSCTL
+ devinet_sysctl.sysctl_header =
+ register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
+- devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
++ __devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
++ &ipv4_devconf_dflt);
+ #endif
+ }
+
+ EXPORT_SYMBOL(devinet_ioctl);
+ EXPORT_SYMBOL(in_dev_finish_destroy);
+ EXPORT_SYMBOL(inet_select_addr);
++EXPORT_SYMBOL(inet_del_ifa);
+ EXPORT_SYMBOL(inetdev_by_index);
++EXPORT_SYMBOL(devinet_sysctl_init);
++EXPORT_SYMBOL(devinet_sysctl_fini);
++EXPORT_SYMBOL(devinet_sysctl_free);
+ EXPORT_SYMBOL(register_inetaddr_notifier);
+ EXPORT_SYMBOL(unregister_inetaddr_notifier);
+diff -uprN linux-2.6.15.orig/net/ipv4/fib_frontend.c linux-2.6.15-ve025stab014/net/ipv4/fib_frontend.c
+--- linux-2.6.15.orig/net/ipv4/fib_frontend.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/fib_frontend.c 2006-01-27 14:48:08.000000000 +0300
+@@ -51,14 +51,46 @@
+
+ #define RT_TABLE_MIN RT_TABLE_MAIN
+
++#undef ip_fib_local_table
++#undef ip_fib_main_table
+ struct fib_table *ip_fib_local_table;
+ struct fib_table *ip_fib_main_table;
++void prepare_fib_tables(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->_local_table = ip_fib_local_table;
++ ip_fib_local_table = (struct fib_table *)0x12345678;
++ get_ve0()->_main_table = ip_fib_main_table;
++ ip_fib_main_table = (struct fib_table *)0x12345678;
++#endif
++}
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ip_fib_local_table get_exec_env()->_local_table
++#define ip_fib_main_table get_exec_env()->_main_table
++#endif
+
+ #else
+
+ #define RT_TABLE_MIN 1
+
++#undef fib_tables
+ struct fib_table *fib_tables[RT_TABLE_MAX+1];
++void prepare_fib_tables(void)
++{
++#ifdef CONFIG_VE
++ int i;
++
++ BUG_ON(sizeof(fib_tables) !=
++ sizeof(((struct ve_struct *)0)->_fib_tables));
++ memcpy(get_ve0()->_fib_tables, fib_tables, sizeof(fib_tables));
++ for (i = 0; i <= RT_TABLE_MAX; i++)
++ fib_tables[i] = (void *)0x12366678;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_tables get_exec_env()->_fib_tables
++#endif
+
+ struct fib_table *__fib_new_table(int id)
+ {
+@@ -248,7 +280,7 @@ int ip_rt_ioctl(unsigned int cmd, void _
+ switch (cmd) {
+ case SIOCADDRT: /* Add a route */
+ case SIOCDELRT: /* Delete a route */
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ return -EPERM;
+ if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+ return -EFAULT;
+@@ -651,6 +683,7 @@ static struct notifier_block fib_netdev_
+
+ void __init ip_fib_init(void)
+ {
++ prepare_fib_tables();
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+ ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+ ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
+diff -uprN linux-2.6.15.orig/net/ipv4/fib_hash.c linux-2.6.15-ve025stab014/net/ipv4/fib_hash.c
+--- linux-2.6.15.orig/net/ipv4/fib_hash.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/fib_hash.c 2006-01-27 14:48:08.000000000 +0300
+@@ -35,6 +35,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/init.h>
++#include <linux/ve.h>
+
+ #include <net/ip.h>
+ #include <net/protocol.h>
+@@ -72,11 +73,6 @@ struct fn_zone {
+ * can be cheaper than memory lookup, so that FZ_* macros are used.
+ */
+
+-struct fn_hash {
+- struct fn_zone *fn_zones[33];
+- struct fn_zone *fn_zone_list;
+-};
+-
+ static inline u32 fn_hash(u32 key, struct fn_zone *fz)
+ {
+ u32 h = ntohl(key)>>(32 - fz->fz_order);
+@@ -622,7 +618,7 @@ fn_hash_delete(struct fib_table *tb, str
+ return -ESRCH;
+ }
+
+-static int fn_flush_list(struct fn_zone *fz, int idx)
++static int fn_flush_list(struct fn_zone *fz, int idx, int destroy)
+ {
+ struct hlist_head *head = &fz->fz_hash[idx];
+ struct hlist_node *node, *n;
+@@ -637,7 +633,9 @@ static int fn_flush_list(struct fn_zone
+ list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+
+- if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
++ if (fi == NULL)
++ continue;
++ if (destroy || (fi->fib_flags&RTNH_F_DEAD)) {
+ write_lock_bh(&fib_hash_lock);
+ list_del(&fa->fa_list);
+ if (list_empty(&f->fn_alias)) {
+@@ -659,7 +657,7 @@ static int fn_flush_list(struct fn_zone
+ return found;
+ }
+
+-static int fn_hash_flush(struct fib_table *tb)
++static int __fn_hash_flush(struct fib_table *tb, int destroy)
+ {
+ struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+ struct fn_zone *fz;
+@@ -669,11 +667,84 @@ static int fn_hash_flush(struct fib_tabl
+ int i;
+
+ for (i = fz->fz_divisor - 1; i >= 0; i--)
+- found += fn_flush_list(fz, i);
++ found += fn_flush_list(fz, i, destroy);
+ }
+ return found;
+ }
+
++static int fn_hash_flush(struct fib_table *tb)
++{
++ return __fn_hash_flush(tb, 0);
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++void fib_hash_destroy(struct fib_table *tb)
++{
++ __fn_hash_flush(tb, 1);
++ kfree(tb);
++}
++
++/*
++ * Initialization of virtualized networking subsystem.
++ */
++int init_ve_route(struct ve_struct *ve)
++{
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++ if (fib_rules_create())
++ return -ENOMEM;
++ ve->_fib_tables[RT_TABLE_LOCAL] = fib_hash_init(RT_TABLE_LOCAL);
++ if (!ve->_fib_tables[RT_TABLE_LOCAL])
++ goto out_destroy;
++ ve->_fib_tables[RT_TABLE_MAIN] = fib_hash_init(RT_TABLE_MAIN);
++ if (!ve->_fib_tables[RT_TABLE_MAIN])
++ goto out_destroy_local;
++
++ return 0;
++
++out_destroy_local:
++ fib_hash_destroy(ve->_fib_tables[RT_TABLE_LOCAL]);
++out_destroy:
++ fib_rules_destroy();
++ ve->_local_rule = NULL;
++ return -ENOMEM;
++#else
++ ve->_local_table = fib_hash_init(RT_TABLE_LOCAL);
++ if (!ve->_local_table)
++ return -ENOMEM;
++ ve->_main_table = fib_hash_init(RT_TABLE_MAIN);
++ if (!ve->_main_table) {
++ fib_hash_destroy(ve->_local_table);
++ return -ENOMEM;
++ }
++ return 0;
++#endif
++}
++
++void fini_ve_route(struct ve_struct *ve)
++{
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++ int i;
++ for (i=0; i<RT_TABLE_MAX+1; i++)
++ {
++ if (!ve->_fib_tables[i])
++ continue;
++ fib_hash_destroy(ve->_fib_tables[i]);
++ }
++ fib_rules_destroy();
++ ve->_local_rule = NULL;
++#else
++ fib_hash_destroy(ve->_local_table);
++ fib_hash_destroy(ve->_main_table);
++#endif
++ fib_hash_free(ve->_fib_info_hash, ve->_fib_hash_size);
++ fib_hash_free(ve->_fib_info_laddrhash, ve->_fib_hash_size);
++ ve->_fib_info_hash = ve->_fib_info_laddrhash = NULL;
++}
++
++EXPORT_SYMBOL(init_ve_route);
++EXPORT_SYMBOL(fini_ve_route);
++#endif
++
+
+ static inline int
+ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
+@@ -765,7 +836,7 @@ static int fn_hash_dump(struct fib_table
+ return skb->len;
+ }
+
+-#ifdef CONFIG_IP_MULTIPLE_TABLES
++#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+ struct fib_table * fib_hash_init(int id)
+ #else
+ struct fib_table * __init fib_hash_init(int id)
+@@ -1075,13 +1146,13 @@ static struct file_operations fib_seq_fo
+
+ int __init fib_proc_init(void)
+ {
+- if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
++ if (!proc_glob_fops_create("net/route", S_IRUGO, &fib_seq_fops))
+ return -ENOMEM;
+ return 0;
+ }
+
+ void __init fib_proc_exit(void)
+ {
+- proc_net_remove("route");
++ remove_proc_glob_entry("net/route", NULL);
+ }
+ #endif /* CONFIG_PROC_FS */
+diff -uprN linux-2.6.15.orig/net/ipv4/fib_lookup.h linux-2.6.15-ve025stab014/net/ipv4/fib_lookup.h
+--- linux-2.6.15.orig/net/ipv4/fib_lookup.h 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/fib_lookup.h 2006-01-27 14:48:08.000000000 +0300
+@@ -41,5 +41,6 @@ extern struct fib_alias *fib_find_alias(
+ extern int fib_detect_death(struct fib_info *fi, int order,
+ struct fib_info **last_resort,
+ int *last_idx, int *dflt);
++void fib_hash_free(struct hlist_head *hash, int bytes);
+
+ #endif /* _FIB_LOOKUP_H */
+diff -uprN linux-2.6.15.orig/net/ipv4/fib_rules.c linux-2.6.15-ve025stab014/net/ipv4/fib_rules.c
+--- linux-2.6.15.orig/net/ipv4/fib_rules.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/fib_rules.c 2006-01-27 14:48:08.000000000 +0300
+@@ -38,6 +38,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
++#include <linux/rtnetlink.h>
+ #include <linux/init.h>
+
+ #include <net/ip.h>
+@@ -98,9 +99,87 @@ static struct fib_rule local_rule = {
+ .r_action = RTN_UNICAST,
+ };
+
+-static struct fib_rule *fib_rules = &local_rule;
+ static DEFINE_RWLOCK(fib_rules_lock);
+
++void __init prepare_fib_rules(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->_local_rule = &local_rule;
++ get_ve0()->_fib_rules = &local_rule;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define local_rule (*(get_exec_env()->_local_rule))
++#define fib_rules (get_exec_env()->_fib_rules)
++#else
++static struct fib_rule *fib_rules = &local_rule;
++#endif
++
++#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
++int fib_rules_create()
++{
++ struct fib_rule *default_rule, *main_rule, *loc_rule;
++
++ default_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++ if (default_rule == NULL)
++ goto out_def;
++ memset(default_rule, 0, sizeof(struct fib_rule));
++ atomic_set(&default_rule->r_clntref, 1);
++ default_rule->r_preference = 0x7FFF;
++ default_rule->r_table = RT_TABLE_DEFAULT;
++ default_rule->r_action = RTN_UNICAST;
++
++ main_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++ if (main_rule == NULL)
++ goto out_main;
++ memset(main_rule, 0, sizeof(struct fib_rule));
++ atomic_set(&main_rule->r_clntref, 1);
++ main_rule->r_preference = 0x7FFE;
++ main_rule->r_table = RT_TABLE_MAIN;
++ main_rule->r_action = RTN_UNICAST;
++ main_rule->r_next = default_rule;
++
++ loc_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++ if (loc_rule == NULL)
++ goto out_loc;
++ memset(loc_rule, 0, sizeof(struct fib_rule));
++ atomic_set(&loc_rule->r_clntref, 1);
++ loc_rule->r_preference = 0;
++ loc_rule->r_table = RT_TABLE_LOCAL;
++ loc_rule->r_action = RTN_UNICAST;
++ loc_rule->r_next = main_rule;
++
++ get_exec_env()->_local_rule = loc_rule;
++ get_exec_env()->_fib_rules = loc_rule;
++
++ return 0;
++
++out_loc:
++ kfree(main_rule);
++out_main:
++ kfree(default_rule);
++out_def:
++ return -1;
++}
++
++void fib_rules_destroy()
++{
++ struct fib_rule *r;
++
++ rtnl_lock();
++ write_lock_bh(&fib_rules_lock);
++ while(fib_rules != NULL) {
++ r = fib_rules;
++ fib_rules = fib_rules->r_next;
++ r->r_dead = 1;
++ fib_rule_put(r);
++ }
++ write_unlock_bh(&fib_rules_lock);
++ rtnl_unlock();
++}
++#endif
++
+ int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
+ struct rtattr **rta = arg;
+@@ -434,5 +513,6 @@ int inet_dump_rules(struct sk_buff *skb,
+
+ void __init fib_rules_init(void)
+ {
++ prepare_fib_rules();
+ register_netdevice_notifier(&fib_rules_notifier);
+ }
+diff -uprN linux-2.6.15.orig/net/ipv4/fib_semantics.c linux-2.6.15-ve025stab014/net/ipv4/fib_semantics.c
+--- linux-2.6.15.orig/net/ipv4/fib_semantics.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/fib_semantics.c 2006-01-27 14:48:08.000000000 +0300
+@@ -32,6 +32,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/init.h>
+@@ -54,6 +55,24 @@ static struct hlist_head *fib_info_laddr
+ static unsigned int fib_hash_size;
+ static unsigned int fib_info_cnt;
+
++void prepare_fib_info(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->_fib_info_hash = fib_info_hash;
++ get_ve0()->_fib_info_laddrhash = fib_info_laddrhash;
++ get_ve0()->_fib_hash_size = fib_hash_size;
++ get_ve0()->_fib_info_cnt = fib_info_cnt;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_info_hash (get_exec_env()->_fib_info_hash)
++#define fib_info_laddrhash (get_exec_env()->_fib_info_laddrhash)
++#define fib_hash_size (get_exec_env()->_fib_hash_size)
++#define fib_info_cnt (get_exec_env()->_fib_info_cnt)
++#endif
++
++
+ #define DEVINDEX_HASHBITS 8
+ #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
+ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
+@@ -233,13 +252,15 @@ static struct fib_info *fib_find_info(co
+ return NULL;
+ }
+
+-static inline unsigned int fib_devindex_hashfn(unsigned int val)
++static inline unsigned int fib_devindex_hashfn(unsigned int val,
++ envid_t veid)
+ {
+ unsigned int mask = DEVINDEX_HASHSIZE - 1;
+
+ return (val ^
+ (val >> DEVINDEX_HASHBITS) ^
+- (val >> (DEVINDEX_HASHBITS * 2))) & mask;
++ (val >> (DEVINDEX_HASHBITS * 2)) ^
++ (veid ^ (veid >> 16))) & mask;
+ }
+
+ /* Check, that the gateway is already configured.
+@@ -255,7 +276,7 @@ int ip_fib_check_default(u32 gw, struct
+
+ read_lock(&fib_info_lock);
+
+- hash = fib_devindex_hashfn(dev->ifindex);
++ hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
+ head = &fib_info_devhash[hash];
+ hlist_for_each_entry(nh, node, head, nh_hash) {
+ if (nh->nh_dev == dev &&
+@@ -578,7 +599,7 @@ static struct hlist_head *fib_hash_alloc
+ __get_free_pages(GFP_KERNEL, get_order(bytes));
+ }
+
+-static void fib_hash_free(struct hlist_head *hash, int bytes)
++void fib_hash_free(struct hlist_head *hash, int bytes)
+ {
+ if (!hash)
+ return;
+@@ -835,7 +856,8 @@ link_it:
+
+ if (!nh->nh_dev)
+ continue;
+- hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
++ hash = fib_devindex_hashfn(nh->nh_dev->ifindex,
++ VEID(nh->nh_dev->owner_env));
+ head = &fib_info_devhash[hash];
+ hlist_add_head(&nh->nh_hash, head);
+ } endfor_nexthops(fi)
+@@ -1182,7 +1204,8 @@ int fib_sync_down(u32 local, struct net_
+
+ if (dev) {
+ struct fib_info *prev_fi = NULL;
+- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
++ unsigned int hash = fib_devindex_hashfn(dev->ifindex,
++ VEID(dev->owner_env));
+ struct hlist_head *head = &fib_info_devhash[hash];
+ struct hlist_node *node;
+ struct fib_nh *nh;
+@@ -1247,7 +1270,7 @@ int fib_sync_up(struct net_device *dev)
+ return 0;
+
+ prev_fi = NULL;
+- hash = fib_devindex_hashfn(dev->ifindex);
++ hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
+ head = &fib_info_devhash[hash];
+ ret = 0;
+
+diff -uprN linux-2.6.15.orig/net/ipv4/igmp.c linux-2.6.15-ve025stab014/net/ipv4/igmp.c
+--- linux-2.6.15.orig/net/ipv4/igmp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/igmp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -2328,7 +2328,8 @@ static inline struct ip_sf_list *igmp_mc
+ struct ip_mc_list *im = NULL;
+ struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
+
+- for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
++ for (state->dev = dev_base,
++ state->idev = NULL, state->im = NULL;
+ state->dev;
+ state->dev = state->dev->next) {
+ struct in_device *idev;
+diff -uprN linux-2.6.15.orig/net/ipv4/inet_connection_sock.c linux-2.6.15-ve025stab014/net/ipv4/inet_connection_sock.c
+--- linux-2.6.15.orig/net/ipv4/inet_connection_sock.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/inet_connection_sock.c 2006-01-27 14:48:08.000000000 +0300
+@@ -25,6 +25,9 @@
+ #include <net/tcp_states.h>
+ #include <net/xfrm.h>
+
++#include <ub/ub_net.h>
++#include <ub/ub_orphan.h>
++
+ #ifdef INET_CSK_DEBUG
+ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
+ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
+@@ -47,6 +50,7 @@ static inline int inet_csk_bind_conflict
+ sk_for_each_bound(sk2, node, &tb->owners) {
+ if (sk != sk2 &&
+ !inet_v6_ipv6only(sk2) &&
++ !ve_accessible_strict(VE_OWNER_SK(sk), VE_OWNER_SK(sk2)) &&
+ (!sk->sk_bound_dev_if ||
+ !sk2->sk_bound_dev_if ||
+ sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+@@ -72,7 +76,9 @@ int inet_csk_get_port(struct inet_hashin
+ struct hlist_node *node;
+ struct inet_bind_bucket *tb;
+ int ret;
++ struct ve_struct *env;
+
++ env = VE_OWNER_SK(sk);
+ local_bh_disable();
+ if (!snum) {
+ int low = sysctl_local_port_range[0];
+@@ -81,11 +87,15 @@ int inet_csk_get_port(struct inet_hashin
+ int rover = net_random() % (high - low) + low;
+
+ do {
+- head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
++ head = &hashinfo->bhash[inet_bhashfn(rover,
++ hashinfo->bhash_size, VEID(env))];
+ spin_lock(&head->lock);
+- inet_bind_bucket_for_each(tb, node, &head->chain)
++ inet_bind_bucket_for_each(tb, node, &head->chain) {
++ if (!ve_accessible_strict(VE_OWNER_TB(tb),env))
++ continue;
+ if (tb->port == rover)
+ goto next;
++ }
+ break;
+ next:
+ spin_unlock(&head->lock);
+@@ -108,11 +118,15 @@ int inet_csk_get_port(struct inet_hashin
+ */
+ snum = rover;
+ } else {
+- head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
++ head = &hashinfo->bhash[inet_bhashfn(snum,
++ hashinfo->bhash_size, VEID(env))];
+ spin_lock(&head->lock);
+- inet_bind_bucket_for_each(tb, node, &head->chain)
++ inet_bind_bucket_for_each(tb, node, &head->chain) {
++ if (!ve_accessible_strict(VE_OWNER_TB(tb), env))
++ continue;
+ if (tb->port == snum)
+ goto tb_found;
++ }
+ }
+ tb = NULL;
+ goto tb_not_found;
+@@ -131,7 +145,7 @@ tb_found:
+ }
+ tb_not_found:
+ ret = 1;
+- if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
++ if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum, env)) == NULL)
+ goto fail_unlock;
+ if (hlist_empty(&tb->owners)) {
+ if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+@@ -536,7 +550,7 @@ void inet_csk_destroy_sock(struct sock *
+
+ sk_refcnt_debug_release(sk);
+
+- atomic_dec(sk->sk_prot->orphan_count);
++ ub_dec_orphan_count(sk);
+ sock_put(sk);
+ }
+
+@@ -616,7 +630,7 @@ void inet_csk_listen_stop(struct sock *s
+
+ sock_orphan(child);
+
+- atomic_inc(sk->sk_prot->orphan_count);
++ ub_inc_orphan_count(sk);
+
+ inet_csk_destroy_sock(child);
+
+diff -uprN linux-2.6.15.orig/net/ipv4/inet_diag.c linux-2.6.15-ve025stab014/net/ipv4/inet_diag.c
+--- linux-2.6.15.orig/net/ipv4/inet_diag.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/inet_diag.c 2006-01-27 14:48:08.000000000 +0300
+@@ -595,7 +595,9 @@ static int inet_diag_dump(struct sk_buff
+ struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+ const struct inet_diag_handler *handler;
+ struct inet_hashinfo *hashinfo;
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ handler = inet_diag_table[cb->nlh->nlmsg_type];
+ BUG_ON(handler == NULL);
+ hashinfo = handler->idiag_hashinfo;
+@@ -616,6 +618,8 @@ static int inet_diag_dump(struct sk_buff
+ sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
+ struct inet_sock *inet = inet_sk(sk);
+
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (num < s_num) {
+ num++;
+ continue;
+@@ -677,6 +681,8 @@ skip_listen_ht:
+ sk_for_each(sk, node, &head->chain) {
+ struct inet_sock *inet = inet_sk(sk);
+
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (num < s_num)
+ goto next_normal;
+ if (!(r->idiag_states & (1 << sk->sk_state)))
+@@ -699,6 +705,8 @@ next_normal:
+ &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
+ struct inet_sock *inet = inet_sk(sk);
+
++ if (!ve_accessible_veid(inet_twsk(sk)->tw_owner_env, VEID(ve)))
++ continue;
+ if (num < s_num)
+ goto next_dying;
+ if (r->id.idiag_sport != inet->sport &&
+diff -uprN linux-2.6.15.orig/net/ipv4/inet_hashtables.c linux-2.6.15-ve025stab014/net/ipv4/inet_hashtables.c
+--- linux-2.6.15.orig/net/ipv4/inet_hashtables.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/inet_hashtables.c 2006-01-27 14:48:08.000000000 +0300
+@@ -28,7 +28,8 @@
+ */
+ struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
+ struct inet_bind_hashbucket *head,
+- const unsigned short snum)
++ const unsigned short snum,
++ struct ve_struct *ve)
+ {
+ struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC);
+
+@@ -36,6 +37,7 @@ struct inet_bind_bucket *inet_bind_bucke
+ tb->port = snum;
+ tb->fastreuse = 0;
+ INIT_HLIST_HEAD(&tb->owners);
++ SET_VE_OWNER_TB(tb, ve);
+ hlist_add_head(&tb->node, &head->chain);
+ }
+ return tb;
+@@ -69,10 +71,13 @@ EXPORT_SYMBOL(inet_bind_hash);
+ */
+ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
+ {
+- const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
+- struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
++ int bhash;
++ struct inet_bind_hashbucket *head;
+ struct inet_bind_bucket *tb;
+
++ bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size,
++ VEID(VE_OWNER_SK(sk)));
++ head = &hashinfo->bhash[bhash];
+ spin_lock(&head->lock);
+ tb = inet_csk(sk)->icsk_bind_hash;
+ __sk_del_bind_node(sk);
+@@ -128,7 +133,8 @@ EXPORT_SYMBOL(inet_listen_wlock);
+ * wildcarded during the search since they can never be otherwise.
+ */
+ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
+- const unsigned short hnum, const int dif)
++ const unsigned short hnum, const int dif,
++ struct ve_struct *env)
+ {
+ struct sock *result = NULL, *sk;
+ const struct hlist_node *node;
+@@ -137,6 +143,8 @@ struct sock *__inet_lookup_listener(cons
+ sk_for_each(sk, node, head) {
+ const struct inet_sock *inet = inet_sk(sk);
+
++ if (!ve_accessible_strict(VE_OWNER_SK(sk), env))
++ continue;
+ if (inet->num == hnum && !ipv6_only_sock(sk)) {
+ const __u32 rcv_saddr = inet->rcv_saddr;
+ int score = sk->sk_family == PF_INET ? 1 : 0;
+diff -uprN linux-2.6.15.orig/net/ipv4/inet_timewait_sock.c linux-2.6.15-ve025stab014/net/ipv4/inet_timewait_sock.c
+--- linux-2.6.15.orig/net/ipv4/inet_timewait_sock.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/inet_timewait_sock.c 2006-01-27 14:48:08.000000000 +0300
+@@ -32,7 +32,8 @@ void __inet_twsk_kill(struct inet_timewa
+ write_unlock(&ehead->lock);
+
+ /* Disassociate with bind bucket. */
+- bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
++ bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num,
++ hashinfo->bhash_size, tw->tw_owner_env)];
+ spin_lock(&bhead->lock);
+ tb = tw->tw_tb;
+ __hlist_del(&tw->tw_bind_node);
+@@ -66,7 +67,8 @@ void __inet_twsk_hashdance(struct inet_t
+ Note, that any socket with inet->num != 0 MUST be bound in
+ binding cache, even if it is closed.
+ */
+- bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
++ bhead = &hashinfo->bhash[inet_bhashfn(inet->num,
++ hashinfo->bhash_size, tw->tw_owner_env)];
+ spin_lock(&bhead->lock);
+ tw->tw_tb = icsk->icsk_bind_hash;
+ BUG_TRAP(icsk->icsk_bind_hash);
+@@ -90,8 +92,13 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance)
+
+ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
+ {
+- struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
+- SLAB_ATOMIC);
++ struct user_beancounter *ub;
++ struct inet_timewait_sock *tw;
++
++ ub = set_exec_ub(sock_bc(sk)->ub);
++ tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, SLAB_ATOMIC);
++ (void)set_exec_ub(ub);
++
+ if (tw != NULL) {
+ const struct inet_sock *inet = inet_sk(sk);
+
+diff -uprN linux-2.6.15.orig/net/ipv4/ip_forward.c linux-2.6.15-ve025stab014/net/ipv4/ip_forward.c
+--- linux-2.6.15.orig/net/ipv4/ip_forward.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/ip_forward.c 2006-01-27 14:48:08.000000000 +0300
+@@ -87,6 +87,24 @@ int ip_forward(struct sk_buff *skb)
+ if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+ goto sr_failed;
+
++ /*
++ * We try to optimize forwarding of VE packets:
++ * do not decrement TTL (and so save skb_cow)
++ * during forwarding of outgoing pkts from VE.
++ * For incoming pkts we still do ttl decr,
++ * since such skb is not cloned and does not require
++ * actual cow. So, there is at least one place
++ * in pkts path with mandatory ttl decr, that is
++ * sufficient to prevent routing loops.
++ */
++ iph = skb->nh.iph;
++ if (
++#ifdef CONFIG_IP_ROUTE_NAT
++ (rt->rt_flags & RTCF_NAT) == 0 && /* no NAT mangling expected */
++#endif /* and */
++ (skb->dev->features & NETIF_F_VENET)) /* src is VENET device */
++ goto no_ttl_decr;
++
+ /* We are about to mangle packet. Copy it! */
+ if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+ goto drop;
+@@ -95,6 +113,8 @@ int ip_forward(struct sk_buff *skb)
+ /* Decrease ttl after skb cow done */
+ ip_decrease_ttl(iph);
+
++no_ttl_decr:
++
+ /*
+ * We now generate an ICMP HOST REDIRECT giving the route
+ * we calculated.
+diff -uprN linux-2.6.15.orig/net/ipv4/ip_fragment.c linux-2.6.15-ve025stab014/net/ipv4/ip_fragment.c
+--- linux-2.6.15.orig/net/ipv4/ip_fragment.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/ip_fragment.c 2006-01-27 14:48:08.000000000 +0300
+@@ -42,6 +42,7 @@
+ #include <linux/udp.h>
+ #include <linux/inet.h>
+ #include <linux/netfilter_ipv4.h>
++#include <linux/ve_owner.h>
+
+ /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
+ * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
+@@ -91,8 +92,12 @@ struct ipq {
+ struct timer_list timer; /* when will this queue expire? */
+ int iif;
+ struct timeval stamp;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(IPQ, struct ipq, owner_env)
++DCL_VE_OWNER(IPQ, struct ipq, owner_env)
++
+ /* Hash table. */
+
+ #define IPQ_HASHSZ 64
+@@ -176,7 +181,8 @@ static __inline__ void frag_free_queue(s
+
+ static __inline__ struct ipq *frag_alloc_queue(void)
+ {
+- struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
++ struct ipq *qp = kmalloc(sizeof(struct ipq) + sizeof(void *),
++ GFP_ATOMIC);
+
+ if(!qp)
+ return NULL;
+@@ -269,6 +275,9 @@ static void ip_evictor(void)
+ static void ip_expire(unsigned long arg)
+ {
+ struct ipq *qp = (struct ipq *) arg;
++ struct ve_struct *envid;
++
++ envid = set_exec_env(VE_OWNER_IPQ(qp));
+
+ spin_lock(&qp->lock);
+
+@@ -291,6 +300,8 @@ static void ip_expire(unsigned long arg)
+ out:
+ spin_unlock(&qp->lock);
+ ipq_put(qp, NULL);
++
++ (void)set_exec_env(envid);
+ }
+
+ /* Creation primitives. */
+@@ -312,7 +323,8 @@ static struct ipq *ip_frag_intern(unsign
+ qp->saddr == qp_in->saddr &&
+ qp->daddr == qp_in->daddr &&
+ qp->protocol == qp_in->protocol &&
+- qp->user == qp_in->user) {
++ qp->user == qp_in->user &&
++ qp->owner_env == get_exec_env()) {
+ atomic_inc(&qp->refcnt);
+ write_unlock(&ipfrag_lock);
+ qp_in->last_in |= COMPLETE;
+@@ -361,6 +373,8 @@ static struct ipq *ip_frag_create(unsign
+ spin_lock_init(&qp->lock);
+ atomic_set(&qp->refcnt, 1);
+
++ SET_VE_OWNER_IPQ(qp, get_exec_env());
++
+ return ip_frag_intern(hash, qp);
+
+ out_nomem:
+@@ -387,7 +401,8 @@ static inline struct ipq *ip_find(struct
+ qp->saddr == saddr &&
+ qp->daddr == daddr &&
+ qp->protocol == protocol &&
+- qp->user == user) {
++ qp->user == user &&
++ qp->owner_env == get_exec_env()) {
+ atomic_inc(&qp->refcnt);
+ read_unlock(&ipfrag_lock);
+ return qp;
+@@ -653,6 +668,9 @@ struct sk_buff *ip_defrag(struct sk_buff
+ qp->meat == qp->len)
+ ret = ip_frag_reasm(qp, dev);
+
++ if (ret)
++ SET_VE_OWNER_SKB(ret, VE_OWNER_SKB(skb));
++
+ spin_unlock(&qp->lock);
+ ipq_put(qp, NULL);
+ return ret;
+@@ -663,6 +681,51 @@ struct sk_buff *ip_defrag(struct sk_buff
+ return NULL;
+ }
+
++#ifdef CONFIG_VE
++/* XXX */
++void ip_fragment_cleanup(struct ve_struct *envid)
++{
++ int i, progress;
++
++ /* All operations with fragment queues are performed from NET_RX/TX
++ * soft interrupts or from timer context. --Den */
++ local_bh_disable();
++ do {
++ progress = 0;
++ for (i = 0; i < IPQ_HASHSZ; i++) {
++ struct ipq *qp;
++ struct hlist_node *p, *n;
++
++ if (hlist_empty(&ipq_hash[i]))
++ continue;
++inner_restart:
++ read_lock(&ipfrag_lock);
++ hlist_for_each_entry_safe(qp, p, n,
++ &ipq_hash[i], list) {
++ if (!ve_accessible_strict(
++ VE_OWNER_IPQ(qp),
++ envid))
++ continue;
++ atomic_inc(&qp->refcnt);
++ read_unlock(&ipfrag_lock);
++
++ spin_lock(&qp->lock);
++ if (!(qp->last_in&COMPLETE))
++ ipq_kill(qp);
++ spin_unlock(&qp->lock);
++
++ ipq_put(qp, NULL);
++ progress = 1;
++ goto inner_restart;
++ }
++ read_unlock(&ipfrag_lock);
++ }
++ } while(progress);
++ local_bh_enable();
++}
++EXPORT_SYMBOL(ip_fragment_cleanup);
++#endif
++
+ void ipfrag_init(void)
+ {
+ ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+diff -uprN linux-2.6.15.orig/net/ipv4/ipmr.c linux-2.6.15-ve025stab014/net/ipv4/ipmr.c
+--- linux-2.6.15.orig/net/ipv4/ipmr.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/ipmr.c 2006-01-27 14:48:08.000000000 +0300
+@@ -834,7 +834,7 @@ static void mrtsock_destruct(struct sock
+ {
+ rtnl_lock();
+ if (sk == mroute_socket) {
+- ipv4_devconf.mc_forwarding--;
++ ve_ipv4_devconf.mc_forwarding--;
+
+ write_lock_bh(&mrt_lock);
+ mroute_socket=NULL;
+@@ -885,7 +885,7 @@ int ip_mroute_setsockopt(struct sock *sk
+ mroute_socket=sk;
+ write_unlock_bh(&mrt_lock);
+
+- ipv4_devconf.mc_forwarding++;
++ ve_ipv4_devconf.mc_forwarding++;
+ }
+ rtnl_unlock();
+ return ret;
+diff -uprN linux-2.6.15.orig/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.15-ve025stab014/net/ipv4/ipvs/ip_vs_conn.c
+--- linux-2.6.15.orig/net/ipv4/ipvs/ip_vs_conn.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/ipvs/ip_vs_conn.c 2006-01-27 14:48:06.000000000 +0300
+@@ -896,7 +896,8 @@ int ip_vs_conn_init(void)
+ /* Allocate ip_vs_conn slab cache */
+ ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+ sizeof(struct ip_vs_conn), 0,
+- SLAB_HWCACHE_ALIGN, NULL, NULL);
++ SLAB_HWCACHE_ALIGN | SLAB_UBC,
++ NULL, NULL);
+ if (!ip_vs_conn_cachep) {
+ vfree(ip_vs_conn_tab);
+ return -ENOMEM;
+diff -uprN linux-2.6.15.orig/net/ipv4/ipvs/ip_vs_core.c linux-2.6.15-ve025stab014/net/ipv4/ipvs/ip_vs_core.c
+--- linux-2.6.15.orig/net/ipv4/ipvs/ip_vs_core.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/ipvs/ip_vs_core.c 2006-01-27 14:48:08.000000000 +0300
+@@ -955,6 +955,10 @@ ip_vs_in(unsigned int hooknum, struct sk
+ * Big tappo: only PACKET_HOST (neither loopback nor mcasts)
+ * ... don't know why 1st test DOES NOT include 2nd (?)
+ */
++ /*
++ * VZ: the question above is right.
++ * The second test is superfluous.
++ */
+ if (unlikely(skb->pkt_type != PACKET_HOST
+ || skb->dev == &loopback_dev || skb->sk)) {
+ IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_core.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_core.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_core.c 2006-01-27 14:48:08.000000000 +0300
+@@ -49,6 +49,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_core.h>
+ #include <linux/netfilter_ipv4/listhelp.h>
++#include <ub/ub_mem.h>
+
+ #define IP_CONNTRACK_VERSION "2.4"
+
+@@ -60,22 +61,39 @@
+
+ DEFINE_RWLOCK(ip_conntrack_lock);
+
+-/* ip_conntrack_standalone needs this */
+-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_conntrack_helpers \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_helpers)
++#define ve_ip_conntrack_max \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_max)
++#define ve_ip_conntrack_count \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_count)
++#else
+
+ void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
+ LIST_HEAD(ip_conntrack_expect_list);
+ struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+ static LIST_HEAD(helpers);
++struct list_head *ip_conntrack_hash;
++#define ve_ip_conntrack_count ip_conntrack_count
++#define ve_ip_conntrack_helpers helpers
++#define ve_ip_conntrack_max ip_conntrack_max
++#define ve_ip_conntrack_count ip_conntrack_count
++#endif
++
++/* ip_conntrack_standalone needs this */
++atomic_t ip_conntrack_count = ATOMIC_INIT(0);
++
+ unsigned int ip_conntrack_htable_size = 0;
+ int ip_conntrack_max;
+-struct list_head *ip_conntrack_hash;
+ static kmem_cache_t *ip_conntrack_cachep __read_mostly;
+ static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
+ struct ip_conntrack ip_conntrack_untracked;
+ unsigned int ip_ct_log_invalid;
+ static LIST_HEAD(unconfirmed);
++#ifndef CONFIG_VE
+ static int ip_conntrack_vmalloc;
++#endif
+
+ static unsigned int ip_conntrack_next_id = 1;
+ static unsigned int ip_conntrack_expect_next_id = 1;
+@@ -226,7 +244,7 @@ __ip_conntrack_expect_find(const struct
+ {
+ struct ip_conntrack_expect *i;
+
+- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
++ list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
+ if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+ atomic_inc(&i->use);
+ return i;
+@@ -255,7 +273,7 @@ find_expectation(const struct ip_conntra
+ {
+ struct ip_conntrack_expect *i;
+
+- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
++ list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
+ /* If master is not in hash table yet (ie. packet hasn't left
+ this machine yet), how can other end know about expected?
+ Hence these are not the droids you are looking for (if
+@@ -284,7 +302,7 @@ void ip_ct_remove_expectations(struct ip
+ if (ct->expecting == 0)
+ return;
+
+- list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
++ list_for_each_entry_safe(i, tmp, &ve_ip_conntrack_expect_list, list) {
+ if (i->master == ct && del_timer(&i->timeout)) {
+ ip_ct_unlink_expect(i);
+ ip_conntrack_expect_put(i);
+@@ -302,8 +320,10 @@ clean_from_lists(struct ip_conntrack *ct
+
+ ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+- LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+- LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
++ LIST_DELETE(&ve_ip_conntrack_hash[ho],
++ &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
++ LIST_DELETE(&ve_ip_conntrack_hash[hr],
++ &ct->tuplehash[IP_CT_DIR_REPLY]);
+
+ /* Destroy all pending expectations */
+ ip_ct_remove_expectations(ct);
+@@ -329,8 +349,8 @@ destroy_conntrack(struct nf_conntrack *n
+ if (proto && proto->destroy)
+ proto->destroy(ct);
+
+- if (ip_conntrack_destroyed)
+- ip_conntrack_destroyed(ct);
++ if (ve_ip_conntrack_destroyed)
++ ve_ip_conntrack_destroyed(ct);
+
+ write_lock_bh(&ip_conntrack_lock);
+ /* Expectations will have been removed in clean_from_lists,
+@@ -358,7 +378,11 @@ destroy_conntrack(struct nf_conntrack *n
+ static void death_by_timeout(unsigned long ul_conntrack)
+ {
+ struct ip_conntrack *ct = (void *)ul_conntrack;
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_struct *old;
+
++ old = set_exec_env(VE_OWNER_CT(ct));
++#endif
+ write_lock_bh(&ip_conntrack_lock);
+ /* Inside lock so preempt is disabled on module removal path.
+ * Otherwise we can get spurious warnings. */
+@@ -366,6 +390,9 @@ static void death_by_timeout(unsigned lo
+ clean_from_lists(ct);
+ write_unlock_bh(&ip_conntrack_lock);
+ ip_conntrack_put(ct);
++#ifdef CONFIG_VE_IPTABLES
++ (void)set_exec_env(old);
++#endif
+ }
+
+ static inline int
+@@ -386,7 +413,7 @@ __ip_conntrack_find(const struct ip_conn
+ unsigned int hash = hash_conntrack(tuple);
+
+ ASSERT_READ_LOCK(&ip_conntrack_lock);
+- list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
++ list_for_each_entry(h, &ve_ip_conntrack_hash[hash], list) {
+ if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+ CONNTRACK_STAT_INC(found);
+ return h;
+@@ -418,9 +445,9 @@ static void __ip_conntrack_hash_insert(s
+ unsigned int repl_hash)
+ {
+ ct->id = ++ip_conntrack_next_id;
+- list_prepend(&ip_conntrack_hash[hash],
++ list_prepend(&ve_ip_conntrack_hash[hash],
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+- list_prepend(&ip_conntrack_hash[repl_hash],
++ list_prepend(&ve_ip_conntrack_hash[repl_hash],
+ &ct->tuplehash[IP_CT_DIR_REPLY].list);
+ }
+
+@@ -471,11 +498,11 @@ __ip_conntrack_confirm(struct sk_buff **
+ /* See if there's one in the list already, including reverse:
+ NAT could have grabbed it without realizing, since we're
+ not in the hash. If there is, we lost race. */
+- if (!LIST_FIND(&ip_conntrack_hash[hash],
++ if (!LIST_FIND(&ve_ip_conntrack_hash[hash],
+ conntrack_tuple_cmp,
+ struct ip_conntrack_tuple_hash *,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
+- && !LIST_FIND(&ip_conntrack_hash[repl_hash],
++ && !LIST_FIND(&ve_ip_conntrack_hash[repl_hash],
+ conntrack_tuple_cmp,
+ struct ip_conntrack_tuple_hash *,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
+@@ -569,7 +596,7 @@ static inline int helper_cmp(const struc
+ static struct ip_conntrack_helper *
+ __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
+ {
+- return LIST_FIND(&helpers, helper_cmp,
++ return LIST_FIND(&ve_ip_conntrack_helpers, helper_cmp,
+ struct ip_conntrack_helper *,
+ tuple);
+ }
+@@ -605,7 +632,7 @@ void ip_conntrack_helper_put(struct ip_c
+ struct ip_conntrack_protocol *
+ __ip_conntrack_proto_find(u_int8_t protocol)
+ {
+- return ip_ct_protos[protocol];
++ return ve_ip_ct_protos[protocol];
+ }
+
+ /* this is guaranteed to always return a valid protocol helper, since
+@@ -632,29 +659,32 @@ void ip_conntrack_proto_put(struct ip_co
+ }
+
+ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
+- struct ip_conntrack_tuple *repl)
++ struct ip_conntrack_tuple *repl, struct user_beancounter *ub)
+ {
+ struct ip_conntrack *conntrack;
++ struct user_beancounter *old_ub;
+
+ if (!ip_conntrack_hash_rnd_initted) {
+ get_random_bytes(&ip_conntrack_hash_rnd, 4);
+ ip_conntrack_hash_rnd_initted = 1;
+ }
+
+- if (ip_conntrack_max
+- && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
++ if (ve_ip_conntrack_max
++ && atomic_read(&ve_ip_conntrack_count) >= ve_ip_conntrack_max) {
+ unsigned int hash = hash_conntrack(orig);
+ /* Try dropping from this hash chain. */
+- if (!early_drop(&ip_conntrack_hash[hash])) {
++ if (!early_drop(&ve_ip_conntrack_hash[hash])) {
+ if (net_ratelimit())
+- printk(KERN_WARNING
+- "ip_conntrack: table full, dropping"
+- " packet.\n");
++ ve_printk(VE_LOG_BOTH, KERN_WARNING
++ "ip_conntrack: VPS %d: table full, dropping"
++ " packet.\n", VEID(get_exec_env()));
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
++ old_ub = set_exec_ub(ub);
+ conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
++ (void)set_exec_ub(old_ub);
+ if (!conntrack) {
+ DEBUGP("Can't allocate conntrack.\n");
+ return ERR_PTR(-ENOMEM);
+@@ -669,8 +699,11 @@ struct ip_conntrack *ip_conntrack_alloc(
+ init_timer(&conntrack->timeout);
+ conntrack->timeout.data = (unsigned long)conntrack;
+ conntrack->timeout.function = death_by_timeout;
++#ifdef CONFIG_VE_IPTABLES
++ SET_VE_OWNER_CT(conntrack, get_exec_env());
++#endif
+
+- atomic_inc(&ip_conntrack_count);
++ atomic_inc(&ve_ip_conntrack_count);
+
+ return conntrack;
+ }
+@@ -678,7 +711,7 @@ struct ip_conntrack *ip_conntrack_alloc(
+ void
+ ip_conntrack_free(struct ip_conntrack *conntrack)
+ {
+- atomic_dec(&ip_conntrack_count);
++ atomic_dec(&ve_ip_conntrack_count);
+ kmem_cache_free(ip_conntrack_cachep, conntrack);
+ }
+
+@@ -692,13 +725,22 @@ init_conntrack(struct ip_conntrack_tuple
+ struct ip_conntrack *conntrack;
+ struct ip_conntrack_tuple repl_tuple;
+ struct ip_conntrack_expect *exp;
++ struct user_beancounter *ub;
+
+ if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
+ DEBUGP("Can't invert tuple.\n");
+ return NULL;
+ }
+
+- conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
++#ifdef CONFIG_USER_RESOURCE
++ if (skb->dev != NULL) /* received skb */
++ ub = netdev_bc(skb->dev)->exec_ub;
++ else if (skb->sk != NULL) /* sent skb */
++ ub = sock_bc(skb->sk)->ub;
++ else
++#endif
++ ub = NULL;
++ conntrack = ip_conntrack_alloc(tuple, &repl_tuple, ub);
+ if (conntrack == NULL || IS_ERR(conntrack))
+ return (struct ip_conntrack_tuple_hash *)conntrack;
+
+@@ -925,7 +967,7 @@ void ip_conntrack_unexpect_related(struc
+
+ write_lock_bh(&ip_conntrack_lock);
+ /* choose the the oldest expectation to evict */
+- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
++ list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
+ if (expect_matches(i, exp) && del_timer(&i->timeout)) {
+ ip_ct_unlink_expect(i);
+ write_unlock_bh(&ip_conntrack_lock);
+@@ -963,7 +1005,7 @@ static void ip_conntrack_expect_insert(s
+ {
+ atomic_inc(&exp->use);
+ exp->master->expecting++;
+- list_add(&exp->list, &ip_conntrack_expect_list);
++ list_add(&exp->list, &ve_ip_conntrack_expect_list);
+
+ init_timer(&exp->timeout);
+ exp->timeout.data = (unsigned long)exp;
+@@ -981,7 +1023,7 @@ static void evict_oldest_expect(struct i
+ {
+ struct ip_conntrack_expect *i;
+
+- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
++ list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
+ if (i->master == master) {
+ if (del_timer(&i->timeout)) {
+ ip_ct_unlink_expect(i);
+@@ -1012,7 +1054,7 @@ int ip_conntrack_expect_related(struct i
+ DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
+
+ write_lock_bh(&ip_conntrack_lock);
+- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
++ list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
+ if (expect_matches(i, expect)) {
+ /* Refresh timer: if it's dying, ignore.. */
+ if (refresh_timer(i)) {
+@@ -1060,18 +1102,48 @@ int ip_conntrack_helper_register(struct
+ {
+ BUG_ON(me->timeout == 0);
+ write_lock_bh(&ip_conntrack_lock);
+- list_prepend(&helpers, me);
++ list_prepend(&ve_ip_conntrack_helpers, me);
+ write_unlock_bh(&ip_conntrack_lock);
+
+ return 0;
+ }
+
++int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *me)
++{
++ int ret;
++ struct module *mod = me->me;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct ip_conntrack_helper *tmp;
++ __module_get(mod);
++ ret = -ENOMEM;
++ tmp = kmalloc(sizeof(struct ip_conntrack_helper), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, me, sizeof(struct ip_conntrack_helper));
++ me = tmp;
++ }
++
++ ret = ip_conntrack_helper_register(me);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env())){
++ kfree(me);
++nomem:
++ module_put(mod);
++ }
++ return ret;
++}
++
+ struct ip_conntrack_helper *
+ __ip_conntrack_helper_find_byname(const char *name)
+ {
+ struct ip_conntrack_helper *h;
+
+- list_for_each_entry(h, &helpers, list) {
++ list_for_each_entry(h, &ve_ip_conntrack_helpers, list) {
+ if (!strcmp(h->name, name))
+ return h;
+ }
+@@ -1096,10 +1168,10 @@ void ip_conntrack_helper_unregister(stru
+
+ /* Need write lock here, to delete helper. */
+ write_lock_bh(&ip_conntrack_lock);
+- LIST_DELETE(&helpers, me);
++ LIST_DELETE(&ve_ip_conntrack_helpers, me);
+
+ /* Get rid of expectations */
+- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
++ list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list, list) {
+ if (exp->master->helper == me && del_timer(&exp->timeout)) {
+ ip_ct_unlink_expect(exp);
+ ip_conntrack_expect_put(exp);
+@@ -1108,7 +1180,7 @@ void ip_conntrack_helper_unregister(stru
+ /* Get rid of expecteds, set helpers to NULL. */
+ LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
+ for (i = 0; i < ip_conntrack_htable_size; i++)
+- LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
++ LIST_FIND_W(&ve_ip_conntrack_hash[i], unhelp,
+ struct ip_conntrack_tuple_hash *, me);
+ write_unlock_bh(&ip_conntrack_lock);
+
+@@ -1116,6 +1188,25 @@ void ip_conntrack_helper_unregister(stru
+ synchronize_net();
+ }
+
++void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
++{
++
++ if (!ve_is_super(get_exec_env())) {
++ read_lock_bh(&ip_conntrack_lock);
++ me = list_named_find(&ve_ip_conntrack_helpers, me->name);
++ read_unlock_bh(&ip_conntrack_lock);
++ if (!me)
++ return;
++ }
++
++ ip_conntrack_helper_unregister(me);
++
++ if (!ve_is_super(get_exec_env())) {
++ module_put(me->me);
++ kfree(me);
++ }
++}
++
+ /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+ void __ip_ct_refresh_acct(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+@@ -1246,7 +1337,7 @@ get_next_corpse(int (*iter)(struct ip_co
+
+ write_lock_bh(&ip_conntrack_lock);
+ for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
+- h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
++ h = LIST_FIND_W(&ve_ip_conntrack_hash[*bucket], do_iter,
+ struct ip_conntrack_tuple_hash *, iter, data);
+ if (h)
+ break;
+@@ -1359,12 +1450,17 @@ static void free_conntrack_hash(struct l
+ get_order(sizeof(struct list_head) * size));
+ }
+
++static void ip_conntrack_cache_free(void)
++{
++ kmem_cache_destroy(ip_conntrack_expect_cachep);
++ kmem_cache_destroy(ip_conntrack_cachep);
++ nf_unregister_sockopt(&so_getorigdst);
++}
++
+ /* Mishearing the voices in his head, our hero wonders how he's
+ supposed to kill the mall. */
+ void ip_conntrack_cleanup(void)
+ {
+- ip_ct_attach = NULL;
+-
+ /* This makes sure all current packets have passed through
+ netfilter framework. Roll on, two-stage module
+ delete... */
+@@ -1373,19 +1469,29 @@ void ip_conntrack_cleanup(void)
+ ip_ct_event_cache_flush();
+ i_see_dead_people:
+ ip_conntrack_flush();
+- if (atomic_read(&ip_conntrack_count) != 0) {
++ if (atomic_read(&ve_ip_conntrack_count) != 0) {
+ schedule();
+ goto i_see_dead_people;
+ }
+- /* wait until all references to ip_conntrack_untracked are dropped */
+- while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
+- schedule();
+-
+- kmem_cache_destroy(ip_conntrack_cachep);
+- kmem_cache_destroy(ip_conntrack_expect_cachep);
+- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
++ if (ve_is_super(get_exec_env())) {
++ /* wait until all references to ip_conntrack_untracked are
++ * dropped */
++ while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
++ schedule();
++ ip_ct_attach = NULL;
++ ip_conntrack_cache_free();
++ }
++ free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
+ ip_conntrack_htable_size);
+- nf_unregister_sockopt(&so_getorigdst);
++ INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
++ INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
++ atomic_set(&ve_ip_conntrack_count, 0);
++ ve_ip_conntrack_max = 0;
++#ifdef CONFIG_VE_IPTABLES
++ kfree(ve_ip_ct_protos);
++ ve_ip_ct_protos = NULL;
++ kfree(get_exec_env()->_ip_conntrack);
++#endif
+ }
+
+ static struct list_head *alloc_hashtable(int size, int *vmalloced)
+@@ -1394,13 +1500,13 @@ static struct list_head *alloc_hashtable
+ unsigned int i;
+
+ *vmalloced = 0;
+- hash = (void*)__get_free_pages(GFP_KERNEL,
++ hash = (void*)__get_free_pages(GFP_KERNEL_UBC,
+ get_order(sizeof(struct list_head)
+ * size));
+ if (!hash) {
+ *vmalloced = 1;
+ printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
+- hash = vmalloc(sizeof(struct list_head) * size);
++ hash = ub_vmalloc(sizeof(struct list_head) * size);
+ }
+
+ if (hash)
+@@ -1436,8 +1542,8 @@ static int set_hashsize(const char *val,
+
+ write_lock_bh(&ip_conntrack_lock);
+ for (i = 0; i < ip_conntrack_htable_size; i++) {
+- while (!list_empty(&ip_conntrack_hash[i])) {
+- h = list_entry(ip_conntrack_hash[i].next,
++ while (!list_empty(&ve_ip_conntrack_hash[i])) {
++ h = list_entry(ve_ip_conntrack_hash[i].next,
+ struct ip_conntrack_tuple_hash, list);
+ list_del(&h->list);
+ bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+@@ -1445,12 +1551,12 @@ static int set_hashsize(const char *val,
+ }
+ }
+ old_size = ip_conntrack_htable_size;
+- old_vmalloced = ip_conntrack_vmalloc;
+- old_hash = ip_conntrack_hash;
++ old_vmalloced = ve_ip_conntrack_vmalloc;
++ old_hash = ve_ip_conntrack_hash;
+
+ ip_conntrack_htable_size = hashsize;
+- ip_conntrack_vmalloc = vmalloced;
+- ip_conntrack_hash = hash;
++ ve_ip_conntrack_vmalloc = vmalloced;
++ ve_ip_conntrack_hash = hash;
+ ip_conntrack_hash_rnd = rnd;
+ write_unlock_bh(&ip_conntrack_lock);
+
+@@ -1461,9 +1567,8 @@ static int set_hashsize(const char *val,
+ module_param_call(hashsize, set_hashsize, param_get_uint,
+ &ip_conntrack_htable_size, 0600);
+
+-int __init ip_conntrack_init(void)
++static int ip_conntrack_cache_create(void)
+ {
+- unsigned int i;
+ int ret;
+
+ /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
+@@ -1477,70 +1582,124 @@ int __init ip_conntrack_init(void)
+ if (ip_conntrack_htable_size < 16)
+ ip_conntrack_htable_size = 16;
+ }
+- ip_conntrack_max = 8 * ip_conntrack_htable_size;
++ ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
+
+ printk("ip_conntrack version %s (%u buckets, %d max)"
+ " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
+- ip_conntrack_htable_size, ip_conntrack_max,
++ ip_conntrack_htable_size, ve_ip_conntrack_max,
+ sizeof(struct ip_conntrack));
+
+ ret = nf_register_sockopt(&so_getorigdst);
+ if (ret != 0) {
+ printk(KERN_ERR "Unable to register netfilter socket option\n");
+- return ret;
+- }
+-
+- ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
+- &ip_conntrack_vmalloc);
+- if (!ip_conntrack_hash) {
+- printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+- goto err_unreg_sockopt;
++ goto out_sockopt;
+ }
+
++ ret = -ENOMEM;
+ ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
+ sizeof(struct ip_conntrack), 0,
+- 0, NULL, NULL);
++ SLAB_UBC, NULL, NULL);
+ if (!ip_conntrack_cachep) {
+ printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
+- goto err_free_hash;
++ goto err_unreg_sockopt;
+ }
+
+ ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
+ sizeof(struct ip_conntrack_expect),
+- 0, 0, NULL, NULL);
++ 0, SLAB_UBC, NULL, NULL);
+ if (!ip_conntrack_expect_cachep) {
+ printk(KERN_ERR "Unable to create ip_expect slab cache\n");
+ goto err_free_conntrack_slab;
+ }
++ return 0;
++
++err_free_conntrack_slab:
++ kmem_cache_destroy(ip_conntrack_cachep);
++err_unreg_sockopt:
++ nf_unregister_sockopt(&so_getorigdst);
++out_sockopt:
++ return ret;
++}
++
++int ip_conntrack_init(void)
++{
++ struct ve_struct *env;
++ unsigned int i;
++ int ret;
++
++ env = get_exec_env();
++#ifdef CONFIG_VE_IPTABLES
++ ret = -ENOMEM;
++ env->_ip_conntrack =
++ kmalloc(sizeof(struct ve_ip_conntrack), GFP_KERNEL);
++ if (!env->_ip_conntrack)
++ goto out;
++ memset(env->_ip_conntrack, 0, sizeof(struct ve_ip_conntrack));
++ if (ve_is_super(env)) {
++ ret = ip_conntrack_cache_create();
++ if (ret)
++ goto cache_fail;
++ } else
++ ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
++#else /* CONFIG_VE_IPTABLES */
++ ret = ip_conntrack_cache_create();
++ if (ret)
++ goto out;
++#endif
++
++ ret = -ENOMEM;
++ ve_ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
++ &ve_ip_conntrack_vmalloc);
++ if (!ve_ip_conntrack_hash) {
++ printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
++ goto err_free_cache;
++ }
++
++#ifdef CONFIG_VE_IPTABLES
++ ve_ip_ct_protos = (struct ip_conntrack_protocol **)
++ ub_kmalloc(sizeof(void *)*MAX_IP_CT_PROTO, GFP_KERNEL);
++ if (!ve_ip_ct_protos)
++ goto err_free_hash;
++#endif
+
+ /* Don't NEED lock here, but good form anyway. */
+ write_lock_bh(&ip_conntrack_lock);
+ for (i = 0; i < MAX_IP_CT_PROTO; i++)
+- ip_ct_protos[i] = &ip_conntrack_generic_protocol;
++ ve_ip_ct_protos[i] = &ip_conntrack_generic_protocol;
+ /* Sew in builtin protocols. */
+- ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
+- ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
+- ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
++ ve_ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
++ ve_ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
++ ve_ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
+ write_unlock_bh(&ip_conntrack_lock);
+
+- /* For use by ipt_REJECT */
+- ip_ct_attach = ip_conntrack_attach;
++ INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
++ INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
+
+- /* Set up fake conntrack:
+- - to never be deleted, not in any hashes */
+- atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
+- /* - and look it like as a confirmed connection */
+- set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
++ if (ve_is_super(env)) {
++ /* For use by ipt_REJECT */
++ ip_ct_attach = ip_conntrack_attach;
++
++ /* Set up fake conntrack:
++ - to never be deleted, not in any hashes */
++ atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
++ /* - and look it like as a confirmed connection */
++ set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
++ }
+
+- return ret;
++ return 0;
+
+-err_free_conntrack_slab:
+- kmem_cache_destroy(ip_conntrack_cachep);
++#ifdef CONFIG_VE_IPTABLES
+ err_free_hash:
+- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
++#endif
++ free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
+ ip_conntrack_htable_size);
+-err_unreg_sockopt:
+- nf_unregister_sockopt(&so_getorigdst);
+-
+- return -ENOMEM;
++err_free_cache:
++ if (ve_is_super(env))
++ ip_conntrack_cache_free();
++#ifdef CONFIG_VE_IPTABLES
++cache_fail:
++ kfree(env->_ip_conntrack);
++#endif
++out:
++ return ret;
+ }
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_ftp.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_ftp.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_ftp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_ftp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/ctype.h>
+ #include <net/checksum.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+@@ -28,14 +29,6 @@ MODULE_DESCRIPTION("ftp connection track
+ static char *ftp_buffer;
+ static DEFINE_SPINLOCK(ip_ftp_lock);
+
+-#define MAX_PORTS 8
+-static unsigned short ports[MAX_PORTS];
+-static int ports_c;
+-module_param_array(ports, ushort, &ports_c, 0400);
+-
+-static int loose;
+-module_param(loose, int, 0600);
+-
+ unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ enum ip_ct_ftp_type type,
+@@ -45,6 +38,14 @@ unsigned int (*ip_nat_ftp_hook)(struct s
+ u32 *seq);
+ EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
+
++#define MAX_PORTS 8
++static unsigned short ports[MAX_PORTS];
++static int ports_c;
++module_param_array(ports, ushort, &ports_c, 0400);
++
++static int loose;
++module_param(loose, int, 0600);
++
+ #if 0
+ #define DEBUGP printk
+ #else
+@@ -425,8 +426,8 @@ static int help(struct sk_buff **pskb,
+
+ /* Now, NAT might want to mangle the packet, and register the
+ * (possibly changed) expectation itself. */
+- if (ip_nat_ftp_hook)
+- ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
++ if (ve_ip_nat_ftp_hook)
++ ret = ve_ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
+ matchoff, matchlen, exp, &seq);
+ else {
+ /* Can't expect this? Best to drop packet now. */
+@@ -452,16 +453,39 @@ out_update_nl:
+ static struct ip_conntrack_helper ftp[MAX_PORTS];
+ static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
+
+-/* Not __exit: called from init() */
+-static void fini(void)
++void fini_iptable_ftp(void)
+ {
+ int i;
+ for (i = 0; i < ports_c; i++) {
+ DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
+ ports[i]);
+- ip_conntrack_helper_unregister(&ftp[i]);
++ virt_ip_conntrack_helper_unregister(&ftp[i]);
+ }
++}
+
++int init_iptable_ftp(void)
++{
++ int i, ret;
++
++ for (i = 0; i < ports_c; i++) {
++ DEBUGP("ip_ct_ftp: registering helper for port %d\n",
++ ports[i]);
++ ret = virt_ip_conntrack_helper_register(&ftp[i]);
++ if (ret) {
++ fini_iptable_ftp();
++ return ret;
++ }
++ }
++ return 0;
++}
++
++/* Not __exit: called from init() */
++static void fini(void)
++{
++ KSYMMODUNRESOLVE(ip_conntrack_ftp);
++ KSYMUNRESOLVE(init_iptable_ftp);
++ KSYMUNRESOLVE(fini_iptable_ftp);
++ fini_iptable_ftp();
+ kfree(ftp_buffer);
+ }
+
+@@ -496,13 +520,17 @@ static int __init init(void)
+
+ DEBUGP("ip_ct_ftp: registering helper for port %d\n",
+ ports[i]);
+- ret = ip_conntrack_helper_register(&ftp[i]);
++ ret = virt_ip_conntrack_helper_register(&ftp[i]);
+
+ if (ret) {
+ fini();
+ return ret;
+ }
+ }
++
++ KSYMRESOLVE(init_iptable_ftp);
++ KSYMRESOLVE(fini_iptable_ftp);
++ KSYMMODRESOLVE(ip_conntrack_ftp);
+ return 0;
+ }
+
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_irc.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_irc.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_irc.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_irc.c 2006-01-27 14:48:08.000000000 +0300
+@@ -28,6 +28,7 @@
+ #include <linux/ip.h>
+ #include <net/checksum.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+@@ -35,7 +36,7 @@
+
+ #define MAX_PORTS 8
+ static unsigned short ports[MAX_PORTS];
+-static int ports_c;
++static int ports_c = 0;
+ static int max_dcc_channels = 8;
+ static unsigned int dcc_timeout = 300;
+ /* This is slow, but it's simple. --RR */
+@@ -244,6 +245,33 @@ static char irc_names[MAX_PORTS][sizeof(
+
+ static void fini(void);
+
++void fini_iptable_irc(void)
++{
++ int i;
++ for (i = 0; i < ports_c; i++) {
++ DEBUGP("unregistering port %d\n",
++ ports[i]);
++ virt_ip_conntrack_helper_unregister(&irc_helpers[i]);
++ }
++}
++
++int init_iptable_irc(void)
++{
++ int i, ret;
++
++ for (i = 0; i < ports_c; i++) {
++ DEBUGP("port #%d: %d\n", i, ports[i]);
++ ret = virt_ip_conntrack_helper_register(&irc_helpers[i]);
++ if (ret) {
++ printk("ip_conntrack_irc: ERROR registering port %d\n",
++ ports[i]);
++ fini_iptable_irc();
++ return -EBUSY;
++ }
++ }
++ return 0;
++}
++
+ static int __init init(void)
+ {
+ int i, ret;
+@@ -287,7 +315,7 @@ static int __init init(void)
+
+ DEBUGP("port #%d: %d\n", i, ports[i]);
+
+- ret = ip_conntrack_helper_register(hlpr);
++ ret = virt_ip_conntrack_helper_register(hlpr);
+
+ if (ret) {
+ printk("ip_conntrack_irc: ERROR registering port %d\n",
+@@ -296,6 +324,10 @@ static int __init init(void)
+ return -EBUSY;
+ }
+ }
++
++ KSYMRESOLVE(init_iptable_irc);
++ KSYMRESOLVE(fini_iptable_irc);
++ KSYMMODRESOLVE(ip_conntrack_irc);
+ return 0;
+ }
+
+@@ -303,12 +335,10 @@ static int __init init(void)
+ * it is needed by the init function */
+ static void fini(void)
+ {
+- int i;
+- for (i = 0; i < ports_c; i++) {
+- DEBUGP("unregistering port %d\n",
+- ports[i]);
+- ip_conntrack_helper_unregister(&irc_helpers[i]);
+- }
++ KSYMMODUNRESOLVE(ip_conntrack_irc);
++ KSYMUNRESOLVE(init_iptable_irc);
++ KSYMUNRESOLVE(fini_iptable_irc);
++ fini_iptable_irc();
+ kfree(irc_buffer);
+ }
+
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_netlink.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_netlink.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_netlink.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_netlink.c 2006-01-27 14:48:08.000000000 +0300
+@@ -29,6 +29,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/interrupt.h>
+ #include <linux/notifier.h>
++#include <net/sock.h>
+
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv4/ip_conntrack.h>
+@@ -39,6 +40,8 @@
+
+ #include <linux/netfilter/nfnetlink.h>
+ #include <linux/netfilter/nfnetlink_conntrack.h>
++#include <ub/beancounter.h>
++#include <ub/ub_sk.h>
+
+ MODULE_LICENSE("GPL");
+
+@@ -409,7 +412,7 @@ ctnetlink_dump_table(struct sk_buff *skb
+
+ read_lock_bh(&ip_conntrack_lock);
+ for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
+- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
++ list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
+ h = (struct ip_conntrack_tuple_hash *) i;
+ if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ continue;
+@@ -446,7 +449,7 @@ ctnetlink_dump_table_w(struct sk_buff *s
+
+ write_lock_bh(&ip_conntrack_lock);
+ for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
+- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
++ list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
+ h = (struct ip_conntrack_tuple_hash *) i;
+ if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ continue;
+@@ -1009,14 +1012,15 @@ ctnetlink_change_conntrack(struct ip_con
+ static int
+ ctnetlink_create_conntrack(struct nfattr *cda[],
+ struct ip_conntrack_tuple *otuple,
+- struct ip_conntrack_tuple *rtuple)
++ struct ip_conntrack_tuple *rtuple,
++ struct user_beancounter *ub)
+ {
+ struct ip_conntrack *ct;
+ int err = -EINVAL;
+
+ DEBUGP("entered %s\n", __FUNCTION__);
+
+- ct = ip_conntrack_alloc(otuple, rtuple);
++ ct = ip_conntrack_alloc(otuple, rtuple, ub);
+ if (ct == NULL || IS_ERR(ct))
+ return -ENOMEM;
+
+@@ -1093,8 +1097,16 @@ ctnetlink_new_conntrack(struct sock *ctn
+ write_unlock_bh(&ip_conntrack_lock);
+ DEBUGP("no such conntrack, create new\n");
+ err = -ENOENT;
+- if (nlh->nlmsg_flags & NLM_F_CREATE)
+- err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
++ if (nlh->nlmsg_flags & NLM_F_CREATE) {
++#ifdef CONFIG_USER_RESOURCE
++ if (skb->sk)
++ err = ctnetlink_create_conntrack(cda, &otuple,
++ &rtuple, sock_bc(skb->sk)->ub);
++ else
++#endif
++ err = ctnetlink_create_conntrack(cda,
++ &otuple, &rtuple, NULL);
++ }
+ return err;
+ }
+ /* implicit 'else' */
+@@ -1257,7 +1269,7 @@ ctnetlink_exp_dump_table(struct sk_buff
+ DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
+
+ read_lock_bh(&ip_conntrack_lock);
+- list_for_each_prev(i, &ip_conntrack_expect_list) {
++ list_for_each_prev(i, &ve_ip_conntrack_expect_list) {
+ exp = (struct ip_conntrack_expect *) i;
+ if (exp->id <= *id)
+ continue;
+@@ -1403,7 +1415,7 @@ ctnetlink_del_expect(struct sock *ctnl,
+ write_unlock_bh(&ip_conntrack_lock);
+ return -EINVAL;
+ }
+- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
++ list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
+ list) {
+ if (exp->master->helper == h
+ && del_timer(&exp->timeout)) {
+@@ -1415,7 +1427,7 @@ ctnetlink_del_expect(struct sock *ctnl,
+ } else {
+ /* This basically means we have to flush everything*/
+ write_lock_bh(&ip_conntrack_lock);
+- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
++ list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
+ list) {
+ if (del_timer(&exp->timeout)) {
+ ip_ct_unlink_expect(exp);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_proto_generic.c 2006-01-27 14:48:08.000000000 +0300
+@@ -52,7 +52,7 @@ static int packet(struct ip_conntrack *c
+ const struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo)
+ {
+- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
++ ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_generic_timeout);
+ return NF_ACCEPT;
+ }
+
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_proto_icmp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -104,7 +104,7 @@ static int icmp_packet(struct ip_conntra
+ } else {
+ atomic_inc(&ct->proto.icmp.count);
+ ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+- ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
++ ip_ct_refresh_acct(ct, ctinfo, skb, ve_ip_ct_icmp_timeout);
+ }
+
+ return NF_ACCEPT;
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -99,7 +99,7 @@ unsigned long ip_ct_tcp_timeout_close =
+ to ~13-30min depending on RTO. */
+ unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS;
+
+-static const unsigned long * tcp_timeouts[]
++const unsigned long * tcp_timeouts[]
+ = { NULL, /* TCP_CONNTRACK_NONE */
+ &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
+ &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
+@@ -763,7 +763,7 @@ static int tcp_in_window(struct ip_ct_tc
+ : "SEQ is under the lower bound (already ACKed data retransmitted)"
+ : "SEQ is over the upper bound (over the window of the receiver)");
+
+- res = ip_ct_tcp_be_liberal;
++ res = ve_ip_ct_tcp_be_liberal;
+ }
+
+ DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
+@@ -1034,9 +1034,11 @@ static int tcp_packet(struct ip_conntrac
+ && (new_state == TCP_CONNTRACK_FIN_WAIT
+ || new_state == TCP_CONNTRACK_CLOSE))
+ conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
+- timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
+- && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
+- ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
++ timeout = conntrack->proto.tcp.retrans >= ve_ip_ct_tcp_max_retrans &&
++ ve_ip_ct_tcp_timeouts[new_state] >
++ ve_ip_ct_tcp_timeout_max_retrans
++ ? ve_ip_ct_tcp_timeout_max_retrans :
++ ve_ip_ct_tcp_timeouts[new_state];
+ write_unlock_bh(&tcp_lock);
+
+ ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+@@ -1111,7 +1113,7 @@ static int tcp_new(struct ip_conntrack *
+ conntrack->proto.tcp.seen[1].flags = 0;
+ conntrack->proto.tcp.seen[0].loose =
+ conntrack->proto.tcp.seen[1].loose = 0;
+- } else if (ip_ct_tcp_loose == 0) {
++ } else if (ve_ip_ct_tcp_loose == 0) {
+ /* Don't try to pick up connections. */
+ return 0;
+ } else {
+@@ -1135,7 +1137,7 @@ static int tcp_new(struct ip_conntrack *
+ conntrack->proto.tcp.seen[0].flags =
+ conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
+ conntrack->proto.tcp.seen[0].loose =
+- conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
++ conntrack->proto.tcp.seen[1].loose = ve_ip_ct_tcp_loose;
+ }
+
+ conntrack->proto.tcp.seen[1].td_end = 0;
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -71,12 +71,12 @@ static int udp_packet(struct ip_conntrac
+ stream. Extend timeout. */
+ if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+ ip_ct_refresh_acct(conntrack, ctinfo, skb,
+- ip_ct_udp_timeout_stream);
++ ve_ip_ct_udp_timeout_stream);
+ /* Also, more likely to be important, and not a probe */
+ if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
+ ip_conntrack_event_cache(IPCT_STATUS, skb);
+ } else
+- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
++ ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_udp_timeout);
+
+ return NF_ACCEPT;
+ }
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_standalone.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_conntrack_standalone.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_conntrack_standalone.c 2006-01-27 14:48:08.000000000 +0300
+@@ -27,6 +27,7 @@
+ #endif
+ #include <net/checksum.h>
+ #include <net/ip.h>
++#include <linux/nfcalls.h>
+
+ #define ASSERT_READ_LOCK(x)
+ #define ASSERT_WRITE_LOCK(x)
+@@ -45,7 +46,17 @@
+
+ MODULE_LICENSE("GPL");
+
++int ip_conntrack_disable_ve0 = 0;
++module_param(ip_conntrack_disable_ve0, int, 0440);
++
+ extern atomic_t ip_conntrack_count;
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_conntrack_count \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_count)
++#else
++#define ve_ip_conntrack_count ip_conntrack_count
++#endif
+ DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+
+ static int kill_proto(struct ip_conntrack *i, void *data)
+@@ -88,8 +99,8 @@ static struct list_head *ct_get_first(st
+ for (st->bucket = 0;
+ st->bucket < ip_conntrack_htable_size;
+ st->bucket++) {
+- if (!list_empty(&ip_conntrack_hash[st->bucket]))
+- return ip_conntrack_hash[st->bucket].next;
++ if (!list_empty(&ve_ip_conntrack_hash[st->bucket]))
++ return ve_ip_conntrack_hash[st->bucket].next;
+ }
+ return NULL;
+ }
+@@ -99,10 +110,10 @@ static struct list_head *ct_get_next(str
+ struct ct_iter_state *st = seq->private;
+
+ head = head->next;
+- while (head == &ip_conntrack_hash[st->bucket]) {
++ while (head == &ve_ip_conntrack_hash[st->bucket]) {
+ if (++st->bucket >= ip_conntrack_htable_size)
+ return NULL;
+- head = ip_conntrack_hash[st->bucket].next;
++ head = ve_ip_conntrack_hash[st->bucket].next;
+ }
+ return head;
+ }
+@@ -233,7 +244,7 @@ static struct file_operations ct_file_op
+ /* expects */
+ static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+ {
+- struct list_head *e = &ip_conntrack_expect_list;
++ struct list_head *e = &ve_ip_conntrack_expect_list;
+ loff_t i;
+
+ /* strange seq_file api calls stop even if we fail,
+@@ -245,7 +256,7 @@ static void *exp_seq_start(struct seq_fi
+
+ for (i = 0; i <= *pos; i++) {
+ e = e->next;
+- if (e == &ip_conntrack_expect_list)
++ if (e == &ve_ip_conntrack_expect_list)
+ return NULL;
+ }
+ return e;
+@@ -258,7 +269,7 @@ static void *exp_seq_next(struct seq_fil
+ ++*pos;
+ e = e->next;
+
+- if (e == &ip_conntrack_expect_list)
++ if (e == &ve_ip_conntrack_expect_list)
+ return NULL;
+
+ return e;
+@@ -343,7 +354,7 @@ static void ct_cpu_seq_stop(struct seq_f
+
+ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+ {
+- unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
++ unsigned int nr_conntracks = atomic_read(&ve_ip_conntrack_count);
+ struct ip_conntrack_stat *st = v;
+
+ if (v == SEQ_START_TOKEN) {
+@@ -564,6 +575,28 @@ static struct nf_hook_ops ip_conntrack_l
+
+ /* From ip_conntrack_core.c */
+ extern int ip_conntrack_max;
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_conntrack_max \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_max)
++#define ve_ip_ct_sysctl_header \
++ (get_exec_env()->_ip_conntrack->_ip_ct_sysctl_header)
++#define ve_ip_ct_net_table \
++ (get_exec_env()->_ip_conntrack->_ip_ct_net_table)
++#define ve_ip_ct_ipv4_table \
++ (get_exec_env()->_ip_conntrack->_ip_ct_ipv4_table)
++#define ve_ip_ct_netfilter_table \
++ (get_exec_env()->_ip_conntrack->_ip_ct_netfilter_table)
++#define ve_ip_ct_sysctl_table \
++ (get_exec_env()->_ip_conntrack->_ip_ct_sysctl_table)
++#else
++#define ve_ip_conntrack_max ip_conntrack_max
++static struct ctl_table_header *ip_ct_sysctl_header;
++#define ve_ip_ct_sysctl_header ip_ct_sysctl_header
++#define ve_ip_ct_net_table ip_ct_net_table
++#define ve_ip_ct_ipv4_table ip_ct_ipv4_table
++#define ve_ip_ct_netfilter_table ip_ct_netfilter_table
++#define ve_ip_ct_sysctl_table ip_ct_sysctl_table
++#endif
+ extern unsigned int ip_conntrack_htable_size;
+
+ /* From ip_conntrack_proto_tcp.c */
+@@ -594,8 +627,6 @@ extern unsigned long ip_ct_generic_timeo
+ static int log_invalid_proto_min = 0;
+ static int log_invalid_proto_max = 255;
+
+-static struct ctl_table_header *ip_ct_sysctl_header;
+-
+ static ctl_table ip_ct_sysctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
+@@ -804,6 +835,112 @@ static ctl_table ip_ct_net_table[] = {
+ };
+
+ EXPORT_SYMBOL(ip_ct_log_invalid);
++
++#ifdef CONFIG_VE
++static void ip_conntrack_sysctl_cleanup(void)
++{
++ if (!ve_is_super(get_exec_env())) {
++ kfree(ve_ip_ct_net_table);
++ kfree(ve_ip_ct_ipv4_table);
++ kfree(ve_ip_ct_netfilter_table);
++ kfree(ve_ip_ct_sysctl_table);
++ }
++ ve_ip_ct_net_table = NULL;
++ ve_ip_ct_ipv4_table = NULL;
++ ve_ip_ct_netfilter_table = NULL;
++ ve_ip_ct_sysctl_table = NULL;
++}
++
++#define ALLOC_ENVCTL(field,k,label) \
++ if ( !(field = kmalloc(k*sizeof(ctl_table), GFP_KERNEL)) ) \
++ goto label;
++static int ip_conntrack_sysctl_init(void)
++{
++ int i, ret = 0;
++
++ ret = -ENOMEM;
++ if (ve_is_super(get_exec_env())) {
++ ve_ip_ct_net_table = ip_ct_net_table;
++ ve_ip_ct_ipv4_table = ip_ct_ipv4_table;
++ ve_ip_ct_netfilter_table = ip_ct_netfilter_table;
++ ve_ip_ct_sysctl_table = ip_ct_sysctl_table;
++ } else {
++ /* allocate structures in ve_struct */
++ ALLOC_ENVCTL(ve_ip_ct_net_table, 2, out);
++ ALLOC_ENVCTL(ve_ip_ct_ipv4_table, 2, nomem_1);
++ ALLOC_ENVCTL(ve_ip_ct_netfilter_table, 3, nomem_2);
++ ALLOC_ENVCTL(ve_ip_ct_sysctl_table, 15, nomem_3);
++
++ memcpy(ve_ip_ct_net_table, ip_ct_net_table,
++ 2*sizeof(ctl_table));
++ memcpy(ve_ip_ct_ipv4_table, ip_ct_ipv4_table,
++ 2*sizeof(ctl_table));
++ memcpy(ve_ip_ct_netfilter_table, ip_ct_netfilter_table,
++ 3*sizeof(ctl_table));
++ memcpy(ve_ip_ct_sysctl_table, ip_ct_sysctl_table,
++ 21*sizeof(ctl_table));
++
++ ve_ip_ct_net_table[0].child = ve_ip_ct_ipv4_table;
++ ve_ip_ct_ipv4_table[0].child = ve_ip_ct_netfilter_table;
++ ve_ip_ct_netfilter_table[0].child = ve_ip_ct_sysctl_table;
++ }
++ ve_ip_ct_sysctl_table[0].data = &ve_ip_conntrack_max;
++ ve_ip_ct_netfilter_table[1].data = &ve_ip_conntrack_max;
++ ve_ip_ct_sysctl_table[1].data = &ve_ip_conntrack_count;
++ /* skip ve_ip_ct_sysctl_table[2].data as it is read-only and common
++ * for all environments */
++ ve_ip_ct_tcp_timeouts[1] = ip_ct_tcp_timeout_syn_sent;
++ ve_ip_ct_sysctl_table[3].data = &ve_ip_ct_tcp_timeouts[1];
++ ve_ip_ct_tcp_timeouts[2] = ip_ct_tcp_timeout_syn_recv;
++ ve_ip_ct_sysctl_table[4].data = &ve_ip_ct_tcp_timeouts[2];
++ ve_ip_ct_tcp_timeouts[3] = ip_ct_tcp_timeout_established;
++ ve_ip_ct_sysctl_table[5].data = &ve_ip_ct_tcp_timeouts[3];
++ ve_ip_ct_tcp_timeouts[4] = ip_ct_tcp_timeout_fin_wait;
++ ve_ip_ct_sysctl_table[6].data = &ve_ip_ct_tcp_timeouts[4];
++ ve_ip_ct_tcp_timeouts[5] = ip_ct_tcp_timeout_close_wait;
++ ve_ip_ct_sysctl_table[7].data = &ve_ip_ct_tcp_timeouts[5];
++ ve_ip_ct_tcp_timeouts[6] = ip_ct_tcp_timeout_last_ack;
++ ve_ip_ct_sysctl_table[8].data = &ve_ip_ct_tcp_timeouts[6];
++ ve_ip_ct_tcp_timeouts[7] = ip_ct_tcp_timeout_time_wait;
++ ve_ip_ct_sysctl_table[9].data = &ve_ip_ct_tcp_timeouts[7];
++ ve_ip_ct_tcp_timeouts[8] = ip_ct_tcp_timeout_close;
++ ve_ip_ct_sysctl_table[10].data = &ve_ip_ct_tcp_timeouts[8];
++ ve_ip_ct_udp_timeout = ip_ct_udp_timeout;
++ ve_ip_ct_sysctl_table[11].data = &ve_ip_ct_udp_timeout;
++ ve_ip_ct_udp_timeout_stream = ip_ct_udp_timeout_stream;
++ ve_ip_ct_sysctl_table[12].data = &ve_ip_ct_udp_timeout_stream;
++ ve_ip_ct_icmp_timeout = ip_ct_icmp_timeout;
++ ve_ip_ct_sysctl_table[13].data = &ve_ip_ct_icmp_timeout;
++ ve_ip_ct_generic_timeout = ip_ct_generic_timeout;
++ ve_ip_ct_sysctl_table[14].data = &ve_ip_ct_generic_timeout;
++ ve_ip_ct_log_invalid = ip_ct_log_invalid;
++ ve_ip_ct_sysctl_table[15].data = &ve_ip_ct_log_invalid;
++ ve_ip_ct_tcp_timeout_max_retrans = ip_ct_tcp_timeout_max_retrans;
++ ve_ip_ct_sysctl_table[16].data = &ve_ip_ct_tcp_timeout_max_retrans;
++ ve_ip_ct_tcp_loose = ip_ct_tcp_loose;
++ ve_ip_ct_sysctl_table[17].data = &ve_ip_ct_tcp_loose;
++ ve_ip_ct_tcp_be_liberal = ip_ct_tcp_be_liberal;
++ ve_ip_ct_sysctl_table[18].data = &ve_ip_ct_tcp_be_liberal;
++ ve_ip_ct_tcp_max_retrans = ip_ct_tcp_max_retrans;
++ ve_ip_ct_sysctl_table[19].data = &ve_ip_ct_tcp_max_retrans;
++ for (i = 0; i < 20; i++)
++ ve_ip_ct_sysctl_table[i].owner_env = get_exec_env();
++ ve_ip_ct_netfilter_table[1].owner_env = get_exec_env();
++ return 0;
++
++nomem_3:
++ kfree(ve_ip_ct_netfilter_table);
++ ve_ip_ct_netfilter_table = NULL;
++nomem_2:
++ kfree(ve_ip_ct_ipv4_table);
++ ve_ip_ct_ipv4_table = NULL;
++nomem_1:
++ kfree(ve_ip_ct_net_table);
++ ve_ip_ct_net_table = NULL;
++out:
++ return ret;
++}
++#endif /*CONFIG_VE*/
+ #endif /* CONFIG_SYSCTL */
+
+ static int init_or_cleanup(int init)
+@@ -815,9 +952,16 @@ static int init_or_cleanup(int init)
+
+ if (!init) goto cleanup;
+
++ ret = -ENOENT;
++ if (!ve_is_super(get_exec_env()))
++ __module_get(THIS_MODULE);
++
+ ret = ip_conntrack_init();
+ if (ret < 0)
+- goto cleanup_nothing;
++ goto cleanup_unget;
++
++ if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
++ return 0;
+
+ #ifdef CONFIG_PROC_FS
+ ret = -ENOMEM;
+@@ -827,98 +971,115 @@ static int init_or_cleanup(int init)
+ proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
+ &exp_file_ops);
+ if (!proc_exp) goto cleanup_proc;
++ proc_exp->proc_fops = &exp_file_ops;
+
+- proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
+- if (!proc_stat)
+- goto cleanup_proc_exp;
++ if (ve_is_super(get_exec_env())) {
++ proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
++ if (!proc_stat)
++ goto cleanup_proc_exp;
+
+- proc_stat->proc_fops = &ct_cpu_seq_fops;
+- proc_stat->owner = THIS_MODULE;
++ proc_stat->proc_fops = &ct_cpu_seq_fops;
++ proc_stat->owner = THIS_MODULE;
++ }
+ #endif
+
+- ret = nf_register_hook(&ip_conntrack_defrag_ops);
++ ret = virt_nf_register_hook(&ip_conntrack_defrag_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register pre-routing defrag hook.\n");
+ goto cleanup_proc_stat;
+ }
+- ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
++ ret = virt_nf_register_hook(&ip_conntrack_defrag_local_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local_out defrag hook.\n");
+ goto cleanup_defragops;
+ }
+- ret = nf_register_hook(&ip_conntrack_in_ops);
++ ret = virt_nf_register_hook(&ip_conntrack_in_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register pre-routing hook.\n");
+ goto cleanup_defraglocalops;
+ }
+- ret = nf_register_hook(&ip_conntrack_local_out_ops);
++ ret = virt_nf_register_hook(&ip_conntrack_local_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local out hook.\n");
+ goto cleanup_inops;
+ }
+- ret = nf_register_hook(&ip_conntrack_helper_in_ops);
++ ret = virt_nf_register_hook(&ip_conntrack_helper_in_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local in helper hook.\n");
+ goto cleanup_inandlocalops;
+ }
+- ret = nf_register_hook(&ip_conntrack_helper_out_ops);
++ ret = virt_nf_register_hook(&ip_conntrack_helper_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register postrouting helper hook.\n");
+ goto cleanup_helperinops;
+ }
+- ret = nf_register_hook(&ip_conntrack_out_ops);
++ ret = virt_nf_register_hook(&ip_conntrack_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register post-routing hook.\n");
+ goto cleanup_helperoutops;
+ }
+- ret = nf_register_hook(&ip_conntrack_local_in_ops);
++ ret = virt_nf_register_hook(&ip_conntrack_local_in_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local in hook.\n");
+ goto cleanup_inoutandlocalops;
+ }
+ #ifdef CONFIG_SYSCTL
+- ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
+- if (ip_ct_sysctl_header == NULL) {
++#ifdef CONFIG_VE
++ ret = ip_conntrack_sysctl_init();
++ if (ret < 0)
++ goto cleanup_sysctl;
++#endif
++ ret = -ENOMEM;
++ ve_ip_ct_sysctl_header = register_sysctl_table(ve_ip_ct_net_table, 0);
++ if (ve_ip_ct_sysctl_header == NULL) {
+ printk("ip_conntrack: can't register to sysctl.\n");
+- ret = -ENOMEM;
+- goto cleanup_localinops;
++ goto cleanup_sysctl2;
+ }
+ #endif
+
+- return ret;
++ return 0;
+
+ cleanup:
++ if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
++ goto cleanup_init;
+ synchronize_net();
+ #ifdef CONFIG_SYSCTL
+- unregister_sysctl_table(ip_ct_sysctl_header);
+- cleanup_localinops:
++ unregister_sysctl_table(ve_ip_ct_sysctl_header);
++ cleanup_sysctl2:
++#ifdef CONFIG_VE
++ ip_conntrack_sysctl_cleanup();
++ cleanup_sysctl:
+ #endif
+- nf_unregister_hook(&ip_conntrack_local_in_ops);
++#endif
++ virt_nf_unregister_hook(&ip_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
+- nf_unregister_hook(&ip_conntrack_out_ops);
++ virt_nf_unregister_hook(&ip_conntrack_out_ops);
+ cleanup_helperoutops:
+- nf_unregister_hook(&ip_conntrack_helper_out_ops);
++ virt_nf_unregister_hook(&ip_conntrack_helper_out_ops);
+ cleanup_helperinops:
+- nf_unregister_hook(&ip_conntrack_helper_in_ops);
++ virt_nf_unregister_hook(&ip_conntrack_helper_in_ops);
+ cleanup_inandlocalops:
+- nf_unregister_hook(&ip_conntrack_local_out_ops);
++ virt_nf_unregister_hook(&ip_conntrack_local_out_ops);
+ cleanup_inops:
+- nf_unregister_hook(&ip_conntrack_in_ops);
++ virt_nf_unregister_hook(&ip_conntrack_in_ops);
+ cleanup_defraglocalops:
+- nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
++ virt_nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
+ cleanup_defragops:
+- nf_unregister_hook(&ip_conntrack_defrag_ops);
++ virt_nf_unregister_hook(&ip_conntrack_defrag_ops);
+ cleanup_proc_stat:
+ #ifdef CONFIG_PROC_FS
+- remove_proc_entry("ip_conntrack", proc_net_stat);
++ if (ve_is_super(get_exec_env()))
++ remove_proc_entry("ip_conntrack", proc_net_stat);
+ cleanup_proc_exp:
+ proc_net_remove("ip_conntrack_expect");
+ cleanup_proc:
+ proc_net_remove("ip_conntrack");
+- cleanup_init:
+ #endif /* CONFIG_PROC_FS */
++ cleanup_init:
+ ip_conntrack_cleanup();
+- cleanup_nothing:
++ cleanup_unget:
++ if (!ve_is_super(get_exec_env()))
++ module_put(THIS_MODULE);
+ return ret;
+ }
+
+@@ -929,11 +1090,11 @@ int ip_conntrack_protocol_register(struc
+ int ret = 0;
+
+ write_lock_bh(&ip_conntrack_lock);
+- if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
++ if (ve_ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
+ ret = -EBUSY;
+ goto out;
+ }
+- ip_ct_protos[proto->proto] = proto;
++ ve_ip_ct_protos[proto->proto] = proto;
+ out:
+ write_unlock_bh(&ip_conntrack_lock);
+ return ret;
+@@ -942,7 +1103,7 @@ int ip_conntrack_protocol_register(struc
+ void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
+ {
+ write_lock_bh(&ip_conntrack_lock);
+- ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
++ ve_ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
+ write_unlock_bh(&ip_conntrack_lock);
+
+ /* Somebody could be still looking at the proto in bh. */
+@@ -952,17 +1113,39 @@ void ip_conntrack_protocol_unregister(st
+ ip_ct_iterate_cleanup(kill_proto, &proto->proto);
+ }
+
+-static int __init init(void)
++int init_iptable_conntrack(void)
+ {
+ return init_or_cleanup(1);
+ }
+
+-static void __exit fini(void)
++void fini_iptable_conntrack(void)
+ {
+ init_or_cleanup(0);
+ }
+
+-module_init(init);
++static int __init init(void)
++{
++ int err;
++
++ err = init_iptable_conntrack();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_conntrack);
++ KSYMRESOLVE(fini_iptable_conntrack);
++ KSYMMODRESOLVE(ip_conntrack);
++ return 0;
++}
++
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(ip_conntrack);
++ KSYMUNRESOLVE(init_iptable_conntrack);
++ KSYMUNRESOLVE(fini_iptable_conntrack);
++ fini_iptable_conntrack();
++}
++
++subsys_initcall(init);
+ module_exit(fini);
+
+ /* Some modules need us, but don't depend directly on any symbol.
+@@ -979,15 +1162,20 @@ EXPORT_SYMBOL_GPL(ip_conntrack_unregiste
+ EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
+ EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
+ #endif
++EXPORT_SYMBOL(ip_conntrack_disable_ve0);
+ EXPORT_SYMBOL(ip_conntrack_protocol_register);
+ EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
+ EXPORT_SYMBOL(ip_ct_get_tuple);
+ EXPORT_SYMBOL(invert_tuplepr);
+ EXPORT_SYMBOL(ip_conntrack_alter_reply);
++#ifndef CONFIG_VE_IPTABLES
+ EXPORT_SYMBOL(ip_conntrack_destroyed);
++#endif
+ EXPORT_SYMBOL(need_ip_conntrack);
+ EXPORT_SYMBOL(ip_conntrack_helper_register);
+ EXPORT_SYMBOL(ip_conntrack_helper_unregister);
++EXPORT_SYMBOL(virt_ip_conntrack_helper_register);
++EXPORT_SYMBOL(virt_ip_conntrack_helper_unregister);
+ EXPORT_SYMBOL(ip_ct_iterate_cleanup);
+ EXPORT_SYMBOL(__ip_ct_refresh_acct);
+
+@@ -997,14 +1185,18 @@ EXPORT_SYMBOL_GPL(__ip_conntrack_expect_
+ EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
+ EXPORT_SYMBOL(ip_conntrack_expect_related);
+ EXPORT_SYMBOL(ip_conntrack_unexpect_related);
++#ifndef CONFIG_VE_IPTABLES
+ EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
++#endif
+ EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
+
+ EXPORT_SYMBOL(ip_conntrack_tuple_taken);
+ EXPORT_SYMBOL(ip_ct_gather_frags);
+ EXPORT_SYMBOL(ip_conntrack_htable_size);
+ EXPORT_SYMBOL(ip_conntrack_lock);
++#ifndef CONFIG_VE_IPTABLES
+ EXPORT_SYMBOL(ip_conntrack_hash);
++#endif
+ EXPORT_SYMBOL(ip_conntrack_untracked);
+ EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
+ #ifdef CONFIG_IP_NF_NAT_NEEDED
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_core.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_core.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_core.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_core.c 2006-01-27 14:48:08.000000000 +0300
+@@ -21,6 +21,8 @@
+ #include <linux/icmp.h>
+ #include <linux/udp.h>
+ #include <linux/jhash.h>
++#include <linux/nfcalls.h>
++#include <ub/ub_mem.h>
+
+ #define ASSERT_READ_LOCK(x)
+ #define ASSERT_WRITE_LOCK(x)
+@@ -46,15 +48,24 @@ DEFINE_RWLOCK(ip_nat_lock);
+ /* Calculated at init based on memory size */
+ static unsigned int ip_nat_htable_size;
+
+-static struct list_head *bysource;
+-
+ #define MAX_IP_NAT_PROTO 256
++
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_bysource \
++ (get_exec_env()->_ip_conntrack->_ip_nat_bysource)
++#define ve_ip_nat_protos \
++ (get_exec_env()->_ip_conntrack->_ip_nat_protos)
++#else
++static struct list_head *bysource;
++#define ve_ip_nat_bysource bysource
+ static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
++#define ve_ip_nat_protos ip_nat_protos
++#endif
+
+ static inline struct ip_nat_protocol *
+ __ip_nat_proto_find(u_int8_t protonum)
+ {
+- return ip_nat_protos[protonum];
++ return ve_ip_nat_protos[protonum];
+ }
+
+ struct ip_nat_protocol *
+@@ -177,7 +188,7 @@ find_appropriate_src(const struct ip_con
+ struct ip_conntrack *ct;
+
+ read_lock_bh(&ip_nat_lock);
+- list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
++ list_for_each_entry(ct, &ve_ip_nat_bysource[h], nat.info.bysource) {
+ if (same_src(ct, tuple)) {
+ /* Copy source part from reply tuple. */
+ invert_tuplepr(result,
+@@ -337,7 +348,7 @@ ip_nat_setup_info(struct ip_conntrack *c
+ = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple);
+ write_lock_bh(&ip_nat_lock);
+- list_add(&info->bysource, &bysource[srchash]);
++ list_add(&info->bysource, &ve_ip_nat_bysource[srchash]);
+ write_unlock_bh(&ip_nat_lock);
+ }
+
+@@ -521,11 +532,11 @@ int ip_nat_protocol_register(struct ip_n
+ int ret = 0;
+
+ write_lock_bh(&ip_nat_lock);
+- if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
++ if (ve_ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
+ ret = -EBUSY;
+ goto out;
+ }
+- ip_nat_protos[proto->protonum] = proto;
++ ve_ip_nat_protos[proto->protonum] = proto;
+ out:
+ write_unlock_bh(&ip_nat_lock);
+ return ret;
+@@ -536,7 +547,7 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
+ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
+ {
+ write_lock_bh(&ip_nat_lock);
+- ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
++ ve_ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
+ write_unlock_bh(&ip_nat_lock);
+
+ /* Someone could be still looking at the proto in a bh. */
+@@ -589,38 +600,55 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_
+ EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
+ #endif
+
+-static int __init ip_nat_init(void)
++static int ip_nat_init(void)
+ {
+ size_t i;
++ int ret;
+
+- /* Leave them the same for the moment. */
+- ip_nat_htable_size = ip_conntrack_htable_size;
++ if (ve_is_super(get_exec_env()))
++ ip_nat_htable_size = ip_conntrack_htable_size;
+
+ /* One vmalloc for both hash tables */
+- bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
+- if (!bysource)
+- return -ENOMEM;
++ ret = -ENOMEM;
++ ve_ip_nat_bysource =
++ ub_vmalloc(sizeof(struct list_head)*ip_nat_htable_size*2);
++ if (!ve_ip_nat_bysource)
++ goto nomem;
++
++#ifdef CONFIG_VE_IPTABLES
++ ve_ip_nat_protos =
++ ub_kmalloc(sizeof(void *)*MAX_IP_NAT_PROTO, GFP_KERNEL);
++ if (!ve_ip_nat_protos)
++ goto nomem2;
++#endif
+
+ /* Sew in builtin protocols. */
+ write_lock_bh(&ip_nat_lock);
+ for (i = 0; i < MAX_IP_NAT_PROTO; i++)
+- ip_nat_protos[i] = &ip_nat_unknown_protocol;
+- ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
+- ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
+- ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
++ ve_ip_nat_protos[i] = &ip_nat_unknown_protocol;
++ ve_ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
++ ve_ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
++ ve_ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
+ write_unlock_bh(&ip_nat_lock);
+
+ for (i = 0; i < ip_nat_htable_size; i++) {
+- INIT_LIST_HEAD(&bysource[i]);
++ INIT_LIST_HEAD(&ve_ip_nat_bysource[i]);
+ }
+
+ /* FIXME: Man, this is a hack. <SIGH> */
+ IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
+- ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
++ ve_ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+
+- /* Initialize fake conntrack so that NAT will skip it */
+- ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
++ if (ve_is_super(get_exec_env()))
++ /* Initialize fake conntrack so that NAT will skip it */
++ ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+ return 0;
++#ifdef CONFIG_VE_IPTABLES
++nomem2:
++#endif
++ vfree(ve_ip_nat_bysource);
++nomem:
++ return ret;
+ }
+
+ /* Clear NAT section of all conntracks, in case we're loaded again. */
+@@ -631,14 +659,41 @@ static int clean_nat(struct ip_conntrack
+ return 0;
+ }
+
+-static void __exit ip_nat_cleanup(void)
++static void ip_nat_cleanup(void)
+ {
+ ip_ct_iterate_cleanup(&clean_nat, NULL);
+- ip_conntrack_destroyed = NULL;
+- vfree(bysource);
++ ve_ip_conntrack_destroyed = NULL;
++ vfree(ve_ip_nat_bysource);
++ ve_ip_nat_bysource = NULL;
++#ifdef CONFIG_VE_IPTABLES
++ kfree(ve_ip_nat_protos);
++ ve_ip_nat_protos = NULL;
++#endif
++}
++
++static int __init init(void)
++{
++ int err;
++
++ err = ip_nat_init();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(ip_nat_init);
++ KSYMRESOLVE(ip_nat_cleanup);
++ KSYMMODRESOLVE(ip_nat);
++ return 0;
++}
++
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(ip_nat);
++ KSYMUNRESOLVE(ip_nat_cleanup);
++ KSYMUNRESOLVE(ip_nat_init);
++ ip_nat_cleanup();
+ }
+
+ MODULE_LICENSE("GPL");
+
+-module_init(ip_nat_init);
+-module_exit(ip_nat_cleanup);
++fs_initcall(init);
++module_exit(fini);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_ftp.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_ftp.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_ftp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_ftp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -19,6 +19,7 @@
+ #include <linux/netfilter_ipv4/ip_nat_rule.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++#include <linux/nfcalls.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+@@ -154,18 +155,43 @@ static unsigned int ip_nat_ftp(struct sk
+ return NF_ACCEPT;
+ }
+
+-static void __exit fini(void)
++#ifdef CONFIG_VE_IPTABLES
++#undef ve_ip_nat_ftp_hook
++#define ve_ip_nat_ftp_hook \
++ (get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook)
++#endif
++int init_iptable_nat_ftp(void)
+ {
+- ip_nat_ftp_hook = NULL;
++ BUG_ON(ve_ip_nat_ftp_hook);
++#ifdef CONFIG_VE_IPTABLES
++ ve_ip_nat_ftp_hook = (ip_nat_helper_func)ip_nat_ftp;
++#else
++ ve_ip_nat_ftp_hook = ip_nat_ftp;
++#endif
++ return 0;
++}
++
++void fini_iptable_nat_ftp(void)
++{
++ ve_ip_nat_ftp_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
+ }
+
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(ip_nat_ftp);
++ KSYMUNRESOLVE(init_iptable_nat_ftp);
++ KSYMUNRESOLVE(fini_iptable_nat_ftp);
++ fini_iptable_nat_ftp();
++}
++
+ static int __init init(void)
+ {
+- BUG_ON(ip_nat_ftp_hook);
+- ip_nat_ftp_hook = ip_nat_ftp;
+- return 0;
++ KSYMRESOLVE(init_iptable_nat_ftp);
++ KSYMRESOLVE(fini_iptable_nat_ftp);
++ KSYMMODRESOLVE(ip_nat_ftp);
++ return init_iptable_nat_ftp();
+ }
+
+ /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_irc.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_irc.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_irc.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_irc.c 2006-01-27 14:48:08.000000000 +0300
+@@ -23,6 +23,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/moduleparam.h>
++#include <linux/nfcalls.h>
+
+ #if 0
+ #define DEBUGP printk
+@@ -96,18 +97,44 @@ static unsigned int help(struct sk_buff
+ return ret;
+ }
+
+-static void __exit fini(void)
++#ifdef CONFIG_VE_IPTABLES
++#undef ve_ip_nat_irc_hook
++#define ve_ip_nat_irc_hook \
++ (get_exec_env()->_ip_conntrack->_ip_nat_irc_hook)
++#endif
++
++int init_iptable_nat_irc(void)
++{
++ BUG_ON(ve_ip_nat_irc_hook);
++#ifdef CONFIG_VE_IPTABLES
++ ve_ip_nat_irc_hook = (ip_nat_helper_func)help;
++#else
++ ve_ip_nat_irc_hook = help;
++#endif
++ return 0;
++}
++
++void fini_iptable_nat_irc(void)
+ {
+- ip_nat_irc_hook = NULL;
++ ve_ip_nat_irc_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
+ }
+
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(ip_nat_irc);
++ KSYMUNRESOLVE(init_iptable_nat_irc);
++ KSYMUNRESOLVE(fini_iptable_nat_irc);
++ fini_iptable_nat_irc();
++}
++
+ static int __init init(void)
+ {
+- BUG_ON(ip_nat_irc_hook);
+- ip_nat_irc_hook = help;
+- return 0;
++ KSYMRESOLVE(init_iptable_nat_irc);
++ KSYMRESOLVE(fini_iptable_nat_irc);
++ KSYMMODRESOLVE(ip_nat_irc);
++ return init_iptable_nat_irc();
+ }
+
+ /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_rule.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_rule.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_rule.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_rule.c 2006-01-27 14:48:08.000000000 +0300
+@@ -34,6 +34,13 @@
+ #define DEBUGP(format, args...)
+ #endif
+
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_table \
++ (get_exec_env()->_ip_conntrack->_ip_nat_table)
++#else
++#define ve_ip_nat_table &nat_table
++#endif
++
+ #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
+
+ static struct
+@@ -41,7 +48,7 @@ static struct
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+-} nat_initial_table __initdata
++} nat_initial_table
+ = { { "nat", NAT_VALID_HOOKS, 4,
+ sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ { [NF_IP_PRE_ROUTING] = 0,
+@@ -285,7 +292,7 @@ int ip_nat_rule_find(struct sk_buff **ps
+ {
+ int ret;
+
+- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
++ ret = ipt_do_table(pskb, hooknum, in, out, ve_ip_nat_table, NULL);
+
+ if (ret == NF_ACCEPT) {
+ if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
+@@ -307,34 +314,46 @@ static struct ipt_target ipt_dnat_reg =
+ .checkentry = ipt_dnat_checkentry,
+ };
+
+-int __init ip_nat_rule_init(void)
++int ip_nat_rule_init(void)
+ {
+ int ret;
++ struct ipt_table *tmp_table;
+
+- ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
+- if (ret != 0)
+- return ret;
+- ret = ipt_register_target(&ipt_snat_reg);
++ tmp_table = virt_ipt_register_table(&nat_table,
++ &nat_initial_table.repl);
++ if (IS_ERR(tmp_table))
++ return PTR_ERR(tmp_table);
++#ifdef CONFIG_VE_IPTABLES
++ ve_ip_nat_table = tmp_table;
++#endif
++
++ ret = virt_ipt_register_target(&ipt_snat_reg);
+ if (ret != 0)
+ goto unregister_table;
+
+- ret = ipt_register_target(&ipt_dnat_reg);
++ ret = virt_ipt_register_target(&ipt_dnat_reg);
+ if (ret != 0)
+ goto unregister_snat;
+
+ return ret;
+
+ unregister_snat:
+- ipt_unregister_target(&ipt_snat_reg);
++ virt_ipt_unregister_target(&ipt_snat_reg);
+ unregister_table:
+- ipt_unregister_table(&nat_table);
++ virt_ipt_unregister_table(ve_ip_nat_table);
++#ifdef CONFIG_VE_IPTABLES
++ ve_ip_nat_table = NULL;
++#endif
+
+ return ret;
+ }
+
+ void ip_nat_rule_cleanup(void)
+ {
+- ipt_unregister_target(&ipt_dnat_reg);
+- ipt_unregister_target(&ipt_snat_reg);
+- ipt_unregister_table(&nat_table);
++ virt_ipt_unregister_target(&ipt_dnat_reg);
++ virt_ipt_unregister_target(&ipt_snat_reg);
++ virt_ipt_unregister_table(ve_ip_nat_table);
++#ifdef CONFIG_VE_IPTABLES
++ ve_ip_nat_table = NULL;
++#endif
+ }
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_standalone.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_standalone.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_nat_standalone.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_nat_standalone.c 2006-01-27 14:48:08.000000000 +0300
+@@ -30,6 +30,7 @@
+ #include <net/ip.h>
+ #include <net/checksum.h>
+ #include <linux/spinlock.h>
++#include <linux/nfcalls.h>
+
+ #define ASSERT_READ_LOCK(x)
+ #define ASSERT_WRITE_LOCK(x)
+@@ -320,37 +321,43 @@ static int init_or_cleanup(int init)
+
+ if (!init) goto cleanup;
+
++ if (!ve_is_super(get_exec_env()))
++ __module_get(THIS_MODULE);
++
+ ret = ip_nat_rule_init();
+ if (ret < 0) {
+ printk("ip_nat_init: can't setup rules.\n");
+- goto cleanup_nothing;
++ goto cleanup_modput;
+ }
+- ret = nf_register_hook(&ip_nat_in_ops);
++ if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
++ return 0;
++
++ ret = virt_nf_register_hook(&ip_nat_in_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register in hook.\n");
+ goto cleanup_rule_init;
+ }
+- ret = nf_register_hook(&ip_nat_out_ops);
++ ret = virt_nf_register_hook(&ip_nat_out_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register out hook.\n");
+ goto cleanup_inops;
+ }
+- ret = nf_register_hook(&ip_nat_adjust_in_ops);
++ ret = virt_nf_register_hook(&ip_nat_adjust_in_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register adjust in hook.\n");
+ goto cleanup_outops;
+ }
+- ret = nf_register_hook(&ip_nat_adjust_out_ops);
++ ret = virt_nf_register_hook(&ip_nat_adjust_out_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register adjust out hook.\n");
+ goto cleanup_adjustin_ops;
+ }
+- ret = nf_register_hook(&ip_nat_local_out_ops);
++ ret = virt_nf_register_hook(&ip_nat_local_out_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register local out hook.\n");
+ goto cleanup_adjustout_ops;;
+ }
+- ret = nf_register_hook(&ip_nat_local_in_ops);
++ ret = virt_nf_register_hook(&ip_nat_local_in_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register local in hook.\n");
+ goto cleanup_localoutops;
+@@ -358,34 +365,60 @@ static int init_or_cleanup(int init)
+ return ret;
+
+ cleanup:
+- nf_unregister_hook(&ip_nat_local_in_ops);
++ if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
++ goto cleanup_rule_init;
++ virt_nf_unregister_hook(&ip_nat_local_in_ops);
+ cleanup_localoutops:
+- nf_unregister_hook(&ip_nat_local_out_ops);
++ virt_nf_unregister_hook(&ip_nat_local_out_ops);
+ cleanup_adjustout_ops:
+- nf_unregister_hook(&ip_nat_adjust_out_ops);
++ virt_nf_unregister_hook(&ip_nat_adjust_out_ops);
+ cleanup_adjustin_ops:
+- nf_unregister_hook(&ip_nat_adjust_in_ops);
++ virt_nf_unregister_hook(&ip_nat_adjust_in_ops);
+ cleanup_outops:
+- nf_unregister_hook(&ip_nat_out_ops);
++ virt_nf_unregister_hook(&ip_nat_out_ops);
+ cleanup_inops:
+- nf_unregister_hook(&ip_nat_in_ops);
++ virt_nf_unregister_hook(&ip_nat_in_ops);
+ cleanup_rule_init:
+ ip_nat_rule_cleanup();
+- cleanup_nothing:
++ cleanup_modput:
++ if (!ve_is_super(get_exec_env()))
++ module_put(THIS_MODULE);
+ return ret;
+ }
+
+-static int __init init(void)
++int init_iptable_nat(void)
+ {
+ return init_or_cleanup(1);
+ }
+
+-static void __exit fini(void)
++void fini_iptable_nat(void)
+ {
+ init_or_cleanup(0);
+ }
+
+-module_init(init);
++static int __init init(void)
++{
++ int err;
++
++ err = init_iptable_nat();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_nat);
++ KSYMRESOLVE(fini_iptable_nat);
++ KSYMMODRESOLVE(iptable_nat);
++ return 0;
++}
++
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(iptable_nat);
++ KSYMUNRESOLVE(init_iptable_nat);
++ KSYMUNRESOLVE(fini_iptable_nat);
++ fini_iptable_nat();
++}
++
++fs_initcall(init);
+ module_exit(fini);
+
+ MODULE_LICENSE("GPL");
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_queue.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_queue.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_queue.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_queue.c 2006-01-27 14:48:08.000000000 +0300
+@@ -542,8 +542,17 @@ ipq_rcv_sk(struct sock *sk, int len)
+ down(&ipqnl_sem);
+
+ for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
++#ifdef CONFIG_VE
++ struct ve_struct *env;
++#endif
+ skb = skb_dequeue(&sk->sk_receive_queue);
++#ifdef CONFIG_VE
++ env = set_exec_env(VE_OWNER_SKB(skb));
+ ipq_rcv_skb(skb);
++ (void)set_exec_env(env);
++#else
++ ipq_rcv_skb(skb);
++#endif
+ kfree_skb(skb);
+ }
+
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ip_tables.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_tables.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ip_tables.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ip_tables.c 2006-01-27 14:48:08.000000000 +0300
+@@ -28,9 +28,13 @@
+ #include <linux/proc_fs.h>
+ #include <linux/err.h>
+ #include <linux/cpumask.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ip_tables.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("IPv4 packet filter");
+@@ -111,6 +115,52 @@ struct ipt_table_info
+ static LIST_HEAD(ipt_target);
+ static LIST_HEAD(ipt_match);
+ static LIST_HEAD(ipt_tables);
++
++#ifdef CONFIG_VE_IPTABLES
++/* include ve.h and define get_exec_env */
++#include <linux/sched.h>
++
++int init_iptables(void);
++
++#define ve_ipt_target (*(get_exec_env()->_ipt_target))
++#define ve_ipt_match (*(get_exec_env()->_ipt_match))
++#define ve_ipt_tables (*(get_exec_env()->_ipt_tables))
++#define ve_ipt_standard_target (*(get_exec_env()->_ipt_standard_target))
++#define ve_ipt_error_target (*(get_exec_env()->_ipt_error_target))
++#define ve_tcp_matchstruct (*(get_exec_env()->_tcp_matchstruct))
++#define ve_udp_matchstruct (*(get_exec_env()->_udp_matchstruct))
++#define ve_icmp_matchstruct (*(get_exec_env()->_icmp_matchstruct))
++
++
++#ifdef CONFIG_USER_RESOURCE
++#define UB_NUMIPTENT 23
++static int charge_iptables(struct user_beancounter *ub, unsigned long size)
++{
++ if (ub == NULL)
++ return 0;
++ return charge_beancounter(ub, UB_NUMIPTENT, size, 1);
++}
++static void uncharge_iptables(struct user_beancounter *ub, unsigned long size)
++{
++ if (ub == NULL)
++ return;
++ uncharge_beancounter(ub, UB_NUMIPTENT, size);
++}
++#endif /* CONFIG_USER_RESOURCE */
++
++#else /* CONFIG_VE_IPTABLES */
++
++#define ve_ipt_target ipt_target
++#define ve_ipt_match ipt_match
++#define ve_ipt_tables ipt_tables
++#define ve_ipt_standard_target ipt_standard_target
++#define ve_ipt_error_target ipt_error_target
++#define ve_tcp_matchstruct tcp_matchstruct
++#define ve_udp_matchstruct udp_matchstruct
++#define ve_icmp_matchstruct icmp_matchstruct
++
++#endif /* CONFIG_VE_IPTABLES */
++
+ #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+
+ #ifdef CONFIG_SMP
+@@ -422,7 +472,7 @@ static inline struct ipt_table *find_tab
+ if (down_interruptible(&ipt_mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+- list_for_each_entry(t, &ipt_tables, list)
++ list_for_each_entry(t, &ve_ipt_tables, list)
+ if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+ return t;
+ up(&ipt_mutex);
+@@ -462,7 +512,7 @@ static inline struct ipt_target *find_ta
+ if (down_interruptible(&ipt_mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+- list_for_each_entry(t, &ipt_target, list) {
++ list_for_each_entry(t, &ve_ipt_target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision == revision) {
+ if (try_module_get(t->me)) {
+@@ -493,7 +543,7 @@ static int match_revfn(const char *name,
+ struct ipt_match *m;
+ int have_rev = 0;
+
+- list_for_each_entry(m, &ipt_match, list) {
++ list_for_each_entry(m, &ve_ipt_match, list) {
+ if (strcmp(m->name, name) == 0) {
+ if (m->revision > *bestp)
+ *bestp = m->revision;
+@@ -509,7 +559,7 @@ static int target_revfn(const char *name
+ struct ipt_target *t;
+ int have_rev = 0;
+
+- list_for_each_entry(t, &ipt_target, list) {
++ list_for_each_entry(t, &ve_ipt_target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision > *bestp)
+ *bestp = t->revision;
+@@ -585,7 +635,7 @@ mark_source_chains(struct ipt_table_info
+ = (void *)ipt_get_target(e);
+
+ if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
+- printk("iptables: loop hook %u pos %u %08X.\n",
++ ve_printk(VE_LOG, "iptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+@@ -763,7 +813,7 @@ check_entry(struct ipt_entry *e, const c
+ }
+ t->u.kernel.target = target;
+
+- if (t->u.kernel.target == &ipt_standard_target) {
++ if (t->u.kernel.target == &ve_ipt_standard_target) {
+ if (!standard_check(t, size)) {
+ ret = -EINVAL;
+ goto cleanup_matches;
+@@ -933,6 +983,69 @@ translate_table(const char *name,
+ return ret;
+ }
+
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++static int charge_replace_table(struct ipt_table_info *oldinfo,
++ struct ipt_table_info *newinfo)
++{
++ struct user_beancounter *old_ub, *new_ub;
++ int old_number, new_number;
++
++ old_ub = vmalloc_ub(oldinfo);
++ new_ub = vmalloc_ub(newinfo);
++ old_number = oldinfo->number;
++ new_number = newinfo->number;
++
++ /* XXX: I don't understand the code below and am not sure that it does
++ * something reasonable. 2002/04/26 SAW */
++ if (old_ub == new_ub) {
++ int charge;
++ /* charge only differences in entries */
++ charge = new_number - old_number;
++ if (charge > 0) {
++ if (charge_iptables(old_ub, charge))
++ return -1;
++ } else
++ uncharge_iptables(old_ub, -charge);
++ } else {
++ /* different contexts; do charge current and uncharge old */
++ if (charge_iptables(new_ub, new_number))
++ return -1;
++ uncharge_iptables(old_ub, old_number);
++ }
++ return 0;
++}
++#endif
++
++static int setup_table(struct ipt_table *table, struct ipt_table_info *info)
++{
++#ifdef CONFIG_NETFILTER_DEBUG
++ {
++ struct ipt_entry *table_base;
++ unsigned int i;
++
++ for (i = 0; i < NR_CPUS; i++) {
++ table_base =
++ (void *)info->entries
++ + TABLE_OFFSET(info, i);
++
++ table_base->comefrom = 0xdead57ac;
++ }
++ }
++#endif
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++ {
++ struct user_beancounter *ub;
++
++ ub = vmalloc_ub(info);
++ if (charge_iptables(ub, info->number))
++ return -ENOMEM;
++ }
++#endif
++ table->private = info;
++ info->initial_entries = 0;
++ return 0;
++}
++
+ static struct ipt_table_info *
+ replace_table(struct ipt_table *table,
+ unsigned int num_counters,
+@@ -967,6 +1080,16 @@ replace_table(struct ipt_table *table,
+ return NULL;
+ }
+ oldinfo = table->private;
++
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++ if (charge_replace_table(oldinfo, newinfo)) {
++ oldinfo = NULL;
++ write_unlock_bh(&table->lock);
++ *error = -ENOMEM;
++ return NULL;
++ }
++#endif
++
+ table->private = newinfo;
+ newinfo->initial_entries = oldinfo->initial_entries;
+ write_unlock_bh(&table->lock);
+@@ -1017,7 +1140,7 @@ copy_entries_to_user(unsigned int total_
+ (other than comefrom, which userspace doesn't care
+ about). */
+ countersize = sizeof(struct ipt_counters) * table->private->number;
+- counters = vmalloc(countersize);
++ counters = ub_vmalloc_best(countersize);
+
+ if (counters == NULL)
+ return -ENOMEM;
+@@ -1130,7 +1253,7 @@ do_replace(void __user *user, unsigned i
+ if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+ return -ENOMEM;
+
+- newinfo = vmalloc(sizeof(struct ipt_table_info)
++ newinfo = ub_vmalloc_best(sizeof(struct ipt_table_info)
+ + SMP_ALIGN(tmp.size) *
+ (highest_possible_processor_id()+1));
+ if (!newinfo)
+@@ -1142,7 +1265,7 @@ do_replace(void __user *user, unsigned i
+ goto free_newinfo;
+ }
+
+- counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
++ counters = ub_vmalloc_best(tmp.num_counters * sizeof(struct ipt_counters));
+ if (!counters) {
+ ret = -ENOMEM;
+ goto free_newinfo;
+@@ -1246,7 +1369,7 @@ do_add_counters(void __user *user, unsig
+ if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
+ return -EINVAL;
+
+- paddc = vmalloc(len);
++ paddc = ub_vmalloc_best(len);
+ if (!paddc)
+ return -ENOMEM;
+
+@@ -1288,7 +1411,7 @@ do_ipt_set_ctl(struct sock *sk, int cmd,
+ {
+ int ret;
+
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+@@ -1313,7 +1436,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
+ {
+ int ret;
+
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+@@ -1418,19 +1541,69 @@ ipt_register_target(struct ipt_target *t
+ ret = down_interruptible(&ipt_mutex);
+ if (ret != 0)
+ return ret;
+- list_add(&target->list, &ipt_target);
++ list_add(&target->list, &ve_ipt_target);
+ up(&ipt_mutex);
+ return ret;
+ }
+
++int
++virt_ipt_register_target(struct ipt_target *target)
++{
++ int ret;
++ struct module *mod = target->me;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct ipt_target *tmp;
++ __module_get(mod);
++ ret = -ENOMEM;
++ tmp = ub_kmalloc(sizeof(struct ipt_target), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, target, sizeof(struct ipt_target));
++ target = tmp;
++ }
++
++ ret = ipt_register_target(target);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env())) {
++ kfree(target);
++nomem:
++ module_put(mod);
++ }
++ return ret;
++}
++
+ void
+ ipt_unregister_target(struct ipt_target *target)
+ {
+ down(&ipt_mutex);
+- LIST_DELETE(&ipt_target, target);
++ LIST_DELETE(&ve_ipt_target, target);
+ up(&ipt_mutex);
+ }
+
++void
++virt_ipt_unregister_target(struct ipt_target *target)
++{
++ if (!ve_is_super(get_exec_env())) {
++ down(&ipt_mutex);
++ target = list_named_find(&ve_ipt_target, target->name);
++ up(&ipt_mutex);
++ if (!target)
++ return;
++ }
++
++ ipt_unregister_target(target);
++
++ if (!ve_is_super(get_exec_env())) {
++ module_put(target->me);
++ kfree(target);
++ }
++}
++
+ int
+ ipt_register_match(struct ipt_match *match)
+ {
+@@ -1440,17 +1613,81 @@ ipt_register_match(struct ipt_match *mat
+ if (ret != 0)
+ return ret;
+
+- list_add(&match->list, &ipt_match);
++ list_add(&match->list, &ve_ipt_match);
+ up(&ipt_mutex);
+
+ return ret;
+ }
+
++int
++virt_ipt_register_match(struct ipt_match *match)
++{
++ int ret;
++ struct module *mod = match->me;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct ipt_match *tmp;
++ __module_get(mod);
++ ret = -ENOMEM;
++ tmp = ub_kmalloc(sizeof(struct ipt_match), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, match, sizeof(struct ipt_match));
++ match = tmp;
++ }
++
++ ret = ipt_register_match(match);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env())) {
++ kfree(match);
++nomem:
++ module_put(mod);
++ }
++ return ret;
++}
++
+ void
+ ipt_unregister_match(struct ipt_match *match)
+ {
+ down(&ipt_mutex);
+- LIST_DELETE(&ipt_match, match);
++ LIST_DELETE(&ve_ipt_match, match);
++ up(&ipt_mutex);
++}
++
++void
++virt_ipt_unregister_match(struct ipt_match *match)
++{
++ if (!ve_is_super(get_exec_env())) {
++ down(&ipt_mutex);
++ match = list_named_find(&ve_ipt_match, match->name);
++ up(&ipt_mutex);
++ if (!match)
++ return;
++ }
++
++ ipt_unregister_match(match);
++
++ if (!ve_is_super(get_exec_env())) {
++ module_put(match->me);
++ kfree(match);
++ }
++}
++
++void ipt_flush_table(struct ipt_table *table)
++{
++ if (table == NULL)
++ return;
++
++ down(&ipt_mutex);
++ IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
++ cleanup_entry, NULL);
++ if (table->private->number > table->private->initial_entries)
++ module_put(table->me);
++ table->private->size = 0;
+ up(&ipt_mutex);
+ }
+
+@@ -1458,14 +1695,13 @@ int ipt_register_table(struct ipt_table
+ {
+ int ret;
+ struct ipt_table_info *newinfo;
+- static struct ipt_table_info bootstrap
+- = { 0, 0, 0, { 0 }, { 0 }, { } };
+
+- newinfo = vmalloc(sizeof(struct ipt_table_info)
++ ret = -ENOMEM;
++ newinfo = ub_vmalloc_best(sizeof(struct ipt_table_info)
+ + SMP_ALIGN(repl->size) *
+ (highest_possible_processor_id()+1));
+ if (!newinfo)
+- return -ENOMEM;
++ goto out;
+
+ memcpy(newinfo->entries, repl->entries, repl->size);
+
+@@ -1474,27 +1710,21 @@ int ipt_register_table(struct ipt_table
+ repl->num_entries,
+ repl->hook_entry,
+ repl->underflow);
+- if (ret != 0) {
+- vfree(newinfo);
+- return ret;
+- }
++ if (ret != 0)
++ goto out_free;
+
+ ret = down_interruptible(&ipt_mutex);
+- if (ret != 0) {
+- vfree(newinfo);
+- return ret;
+- }
++ if (ret != 0)
++ goto out_free;
+
+ /* Don't autoload: we'd eat our tail... */
+- if (list_named_find(&ipt_tables, table->name)) {
+- ret = -EEXIST;
+- goto free_unlock;
+- }
+-
+- /* Simplifies replace_table code. */
+- table->private = &bootstrap;
+- if (!replace_table(table, 0, newinfo, &ret))
+- goto free_unlock;
++ ret = -EEXIST;
++ if (list_named_find(&ve_ipt_tables, table->name))
++ goto out_free_unlock;
++
++ ret = setup_table(table, newinfo);
++ if (ret)
++ goto out_free_unlock;
+
+ duprintf("table->private->number = %u\n",
+ table->private->number);
+@@ -1503,29 +1733,77 @@ int ipt_register_table(struct ipt_table
+ table->private->initial_entries = table->private->number;
+
+ rwlock_init(&table->lock);
+- list_prepend(&ipt_tables, table);
++ list_prepend(&ve_ipt_tables, table);
++ up(&ipt_mutex);
++ return 0;
+
+- unlock:
++out_free_unlock:
+ up(&ipt_mutex);
++out_free:
++ vfree(newinfo);
++out:
+ return ret;
++}
+
+- free_unlock:
+- vfree(newinfo);
+- goto unlock;
++struct ipt_table * virt_ipt_register_table(struct ipt_table *table,
++ const struct ipt_replace *repl)
++{
++ int ret;
++ struct module *mod = table->me;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct ipt_table *tmp;
++ __module_get(mod);
++ ret = -ENOMEM;
++ tmp = ub_kmalloc(sizeof(struct ipt_table), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, table, sizeof(struct ipt_table));
++ table = tmp;
++ }
++
++ ret = ipt_register_table(table, repl);
++ if (ret)
++ goto out;
++
++ return table;
++out:
++ if (!ve_is_super(get_exec_env())) {
++ kfree(table);
++nomem:
++ module_put(mod);
++ }
++ return ERR_PTR(ret);
+ }
+
+ void ipt_unregister_table(struct ipt_table *table)
+ {
+ down(&ipt_mutex);
+- LIST_DELETE(&ipt_tables, table);
++ LIST_DELETE(&ve_ipt_tables, table);
+ up(&ipt_mutex);
+
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++ uncharge_iptables(vmalloc_ub(table->private),
++ table->private->number);
++#endif
++
+ /* Decrease module usage counts and free resources */
+ IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+ cleanup_entry, NULL);
+ vfree(table->private);
+ }
+
++void
++virt_ipt_unregister_table(struct ipt_table *table)
++{
++ ipt_unregister_table(table);
++
++ if (!ve_is_super(get_exec_env())) {
++ module_put(table->me);
++ kfree(table);
++ }
++}
++
+ /* Returns 1 if the port is matched by the range, 0 otherwise */
+ static inline int
+ port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
+@@ -1835,7 +2113,7 @@ static inline int print_target(const str
+ off_t start_offset, char *buffer, int length,
+ off_t *pos, unsigned int *count)
+ {
+- if (t == &ipt_standard_target || t == &ipt_error_target)
++ if (t == &ve_ipt_standard_target || t == &ve_ipt_error_target)
+ return 0;
+ return print_name((char *)t, start_offset, buffer, length, pos, count);
+ }
+@@ -1845,10 +2123,16 @@ static int ipt_get_tables(char *buffer,
+ off_t pos = 0;
+ unsigned int count = 0;
+
++#ifdef CONFIG_VE_IPTABLES
++ /* if we don't initialized for current VE exiting */
++ if (&ve_ipt_standard_target == NULL)
++ return 0;
++#endif
++
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+- LIST_FIND(&ipt_tables, print_name, void *,
++ LIST_FIND(&ve_ipt_tables, print_name, void *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+@@ -1866,7 +2150,7 @@ static int ipt_get_targets(char *buffer,
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+- LIST_FIND(&ipt_target, print_target, struct ipt_target *,
++ LIST_FIND(&ve_ipt_target, print_target, struct ipt_target *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+@@ -1883,7 +2167,7 @@ static int ipt_get_matches(char *buffer,
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+- LIST_FIND(&ipt_match, print_name, void *,
++ LIST_FIND(&ve_ipt_match, print_name, void *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+@@ -1899,6 +2183,36 @@ static const struct { char *name; get_in
+ { NULL, NULL} };
+ #endif /*CONFIG_PROC_FS*/
+
++static int init_proc_entries(void)
++{
++#ifdef CONFIG_PROC_FS
++ struct proc_dir_entry *proc;
++ int i;
++
++ for (i = 0; ipt_proc_entry[i].name; i++) {
++ proc = proc_net_create(ipt_proc_entry[i].name, 0,
++ ipt_proc_entry[i].get_info);
++ if (!proc) {
++ while (--i >= 0)
++ proc_net_remove(ipt_proc_entry[i].name);
++ return -ENOMEM;
++ }
++ proc->owner = THIS_MODULE;
++ }
++#endif
++ return 0;
++}
++
++static void fini_proc_entries(void)
++{
++#ifdef CONFIG_PROC_FS
++ int i;
++ for (i = 0; ipt_proc_entry[i].name; i++)
++ proc_net_remove(ipt_proc_entry[i].name);
++#endif
++}
++
++void fini_iptables(void);
+ static int __init init(void)
+ {
+ int ret;
+@@ -1919,49 +2233,169 @@ static int __init init(void)
+ return ret;
+ }
+
+-#ifdef CONFIG_PROC_FS
+- {
+- struct proc_dir_entry *proc;
+- int i;
+-
+- for (i = 0; ipt_proc_entry[i].name; i++) {
+- proc = proc_net_create(ipt_proc_entry[i].name, 0,
+- ipt_proc_entry[i].get_info);
+- if (!proc) {
+- while (--i >= 0)
+- proc_net_remove(ipt_proc_entry[i].name);
+- nf_unregister_sockopt(&ipt_sockopts);
+- return -ENOMEM;
+- }
+- proc->owner = THIS_MODULE;
++ ret = init_proc_entries();
++ if (ret) {
++ nf_unregister_sockopt(&ipt_sockopts);
++ return ret;
+ }
++
++ printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
++
++#if defined(CONFIG_VE_IPTABLES)
++ /* init ve0 */
++ ret = init_iptables();
++ if (ret) {
++ fini_proc_entries();
++ nf_unregister_sockopt(&ipt_sockopts);
++ return ret;
+ }
++
++ KSYMRESOLVE(init_iptables);
++ KSYMRESOLVE(fini_iptables);
++ KSYMRESOLVE(ipt_flush_table);
++ KSYMMODRESOLVE(ip_tables);
+ #endif
++ return ret;
++}
++
++#ifdef CONFIG_VE_IPTABLES
++/* alloc helper */
++#define ALLOC_ENVF(field,label) \
++ if ( !(envid->field = kmalloc(sizeof(*(envid->field)), GFP_KERNEL)) ) \
++ goto label;
++int init_iptables(void)
++{
++ struct ve_struct *envid;
++
++ envid = get_exec_env();
++
++ if (ve_is_super(envid)) {
++ envid->_ipt_target = &ipt_target;
++ envid->_ipt_match = &ipt_match;
++ envid->_ipt_tables = &ipt_tables;
++
++ envid->_ipt_standard_target = &ipt_standard_target;
++ envid->_ipt_error_target = &ipt_error_target;
++ envid->_tcp_matchstruct = &tcp_matchstruct;
++ envid->_udp_matchstruct = &udp_matchstruct;
++ envid->_icmp_matchstruct = &icmp_matchstruct;
++ } else {
++ /* allocate structures in ve_struct */
++ ALLOC_ENVF(_ipt_target,nomem0);
++ ALLOC_ENVF(_ipt_match,nomem1);
++ ALLOC_ENVF(_ipt_tables,nomem2);
++ ALLOC_ENVF(_ipt_standard_target,nomem3);
++ ALLOC_ENVF(_ipt_error_target,nomem4);
++ ALLOC_ENVF(_tcp_matchstruct,nomem5);
++ ALLOC_ENVF(_udp_matchstruct,nomem6);
++ ALLOC_ENVF(_icmp_matchstruct,nomem7);
++
++ /* FIXME: charge ubc */
++ INIT_LIST_HEAD(envid->_ipt_target);
++ INIT_LIST_HEAD(envid->_ipt_match);
++ INIT_LIST_HEAD(envid->_ipt_tables);
++
++ memcpy(envid->_ipt_standard_target, &ipt_standard_target,
++ sizeof(ipt_standard_target));
++ memcpy(envid->_ipt_error_target, &ipt_error_target,
++ sizeof(ipt_error_target));
++ memcpy(envid->_tcp_matchstruct, &tcp_matchstruct,
++ sizeof(tcp_matchstruct));
++ memcpy(envid->_udp_matchstruct, &udp_matchstruct,
++ sizeof(udp_matchstruct));
++ memcpy(envid->_icmp_matchstruct, &icmp_matchstruct,
++ sizeof(icmp_matchstruct));
++
++ down(&ipt_mutex);
++ list_append(envid->_ipt_target, envid->_ipt_standard_target);
++ list_append(envid->_ipt_target, envid->_ipt_error_target);
++ list_append(envid->_ipt_match, envid->_tcp_matchstruct);
++ list_append(envid->_ipt_match, envid->_udp_matchstruct);
++ list_append(envid->_ipt_match, envid->_icmp_matchstruct);
++ up(&ipt_mutex);
++
++ if (init_proc_entries())
++ goto nomem8;
++ }
+
+- printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
+ return 0;
++
++nomem8:
++ kfree(envid->_icmp_matchstruct); envid->_icmp_matchstruct = NULL;
++nomem7:
++ kfree(envid->_udp_matchstruct); envid->_udp_matchstruct = NULL;
++nomem6:
++ kfree(envid->_tcp_matchstruct); envid->_tcp_matchstruct = NULL;
++nomem5:
++ kfree(envid->_ipt_error_target); envid->_ipt_error_target = NULL;
++nomem4:
++ kfree(envid->_ipt_standard_target); envid->_ipt_standard_target = NULL;
++nomem3:
++ kfree(envid->_ipt_tables); envid->_ipt_tables = NULL;
++nomem2:
++ kfree(envid->_ipt_match); envid->_ipt_match = NULL;
++nomem1:
++ kfree(envid->_ipt_target); envid->_ipt_target = NULL;
++nomem0:
++ return -ENOMEM;
++}
++
++void fini_iptables(void)
++{
++ /* some cleanup */
++ struct ve_struct *envid = get_exec_env();
++
++ if (envid->_ipt_tables != NULL && !ve_is_super(envid)) {
++ kfree(envid->_ipt_tables);
++ kfree(envid->_ipt_target);
++ kfree(envid->_ipt_match);
++ kfree(envid->_ipt_standard_target);
++ kfree(envid->_ipt_error_target);
++ kfree(envid->_tcp_matchstruct);
++ kfree(envid->_udp_matchstruct);
++ kfree(envid->_icmp_matchstruct);
++ fini_proc_entries();
++ }
++
++ envid->_ipt_tables = NULL;
++ envid->_ipt_target = NULL;
++ envid->_ipt_match = NULL;
++ envid->_ipt_standard_target = NULL;
++ envid->_ipt_error_target = NULL;
++ envid->_tcp_matchstruct = NULL;
++ envid->_udp_matchstruct = NULL;
++ envid->_icmp_matchstruct = NULL;
+ }
++#endif
+
+ static void __exit fini(void)
+ {
++ KSYMMODUNRESOLVE(ip_tables);
++ KSYMUNRESOLVE(init_iptables);
++ KSYMUNRESOLVE(fini_iptables);
++ KSYMUNRESOLVE(ipt_flush_table);
+ nf_unregister_sockopt(&ipt_sockopts);
+-#ifdef CONFIG_PROC_FS
+- {
+- int i;
+- for (i = 0; ipt_proc_entry[i].name; i++)
+- proc_net_remove(ipt_proc_entry[i].name);
+- }
++ fini_proc_entries();
++#ifdef CONFIG_VE_IPTABLES
++ fini_iptables();
+ #endif
+ }
+
++EXPORT_SYMBOL(ipt_flush_table);
+ EXPORT_SYMBOL(ipt_register_table);
+ EXPORT_SYMBOL(ipt_unregister_table);
++EXPORT_SYMBOL(virt_ipt_register_table);
++EXPORT_SYMBOL(virt_ipt_unregister_table);
+ EXPORT_SYMBOL(ipt_register_match);
+ EXPORT_SYMBOL(ipt_unregister_match);
+ EXPORT_SYMBOL(ipt_do_table);
++EXPORT_SYMBOL(virt_ipt_register_match);
++EXPORT_SYMBOL(virt_ipt_unregister_match);
+ EXPORT_SYMBOL(ipt_register_target);
+ EXPORT_SYMBOL(ipt_unregister_target);
++EXPORT_SYMBOL(virt_ipt_register_target);
++EXPORT_SYMBOL(virt_ipt_unregister_target);
+ EXPORT_SYMBOL(ipt_find_target);
+
+-module_init(init);
++subsys_initcall(init);
+ module_exit(fini);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_CLASSIFY.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_CLASSIFY.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_CLASSIFY.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_CLASSIFY.c 2006-01-27 14:48:08.000000000 +0300
+@@ -46,7 +46,8 @@ checkentry(const char *tablename,
+ unsigned int hook_mask)
+ {
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
+- printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
++ ve_printk(VE_LOG, KERN_ERR
++ "CLASSIFY: invalid size (%u != %Zu).\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
+ return 0;
+@@ -54,13 +55,14 @@ checkentry(const char *tablename,
+
+ if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
+ (1 << NF_IP_POST_ROUTING))) {
+- printk(KERN_ERR "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
++ ve_printk(VE_LOG, KERN_ERR
++ "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
+ "and POST_ROUTING.\n");
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+- printk(KERN_ERR "CLASSIFY: can only be called from "
++ ve_printk(VE_LOG, KERN_ERR "CLASSIFY: can only be called from "
+ "\"mangle\" table, not \"%s\".\n",
+ tablename);
+ return 0;
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_LOG.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_LOG.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_LOG.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_LOG.c 2006-01-27 14:48:08.000000000 +0300
+@@ -18,6 +18,7 @@
+ #include <net/udp.h>
+ #include <net/tcp.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -51,32 +52,32 @@ static void dump_packet(const struct nf_
+
+ ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+ if (ih == NULL) {
+- printk("TRUNCATED");
++ ve_printk(VE_LOG, "TRUNCATED");
+ return;
+ }
+
+ /* Important fields:
+ * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+- printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
++ ve_printk(VE_LOG, "SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+ NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
+
+ /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+- printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
++ ve_printk(VE_LOG, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+ ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+ ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+
+ /* Max length: 6 "CE DF MF " */
+ if (ntohs(ih->frag_off) & IP_CE)
+- printk("CE ");
++ ve_printk(VE_LOG, "CE ");
+ if (ntohs(ih->frag_off) & IP_DF)
+- printk("DF ");
++ ve_printk(VE_LOG, "DF ");
+ if (ntohs(ih->frag_off) & IP_MF)
+- printk("MF ");
++ ve_printk(VE_LOG, "MF ");
+
+ /* Max length: 11 "FRAG:65535 " */
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+- printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
++ ve_printk(VE_LOG, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+
+ if ((logflags & IPT_LOG_IPOPT)
+ && ih->ihl * 4 > sizeof(struct iphdr)) {
+@@ -87,15 +88,15 @@ static void dump_packet(const struct nf_
+ op = skb_header_pointer(skb, iphoff+sizeof(_iph),
+ optsize, _opt);
+ if (op == NULL) {
+- printk("TRUNCATED");
++ ve_printk(VE_LOG, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+- printk("OPT (");
++ ve_printk(VE_LOG, "OPT (");
+ for (i = 0; i < optsize; i++)
+- printk("%02X", op[i]);
+- printk(") ");
++ ve_printk(VE_LOG, "%02X", op[i]);
++ ve_printk(VE_LOG, ") ");
+ }
+
+ switch (ih->protocol) {
+@@ -103,7 +104,7 @@ static void dump_packet(const struct nf_
+ struct tcphdr _tcph, *th;
+
+ /* Max length: 10 "PROTO=TCP " */
+- printk("PROTO=TCP ");
++ ve_printk(VE_LOG, "PROTO=TCP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+@@ -112,41 +113,41 @@ static void dump_packet(const struct nf_
+ th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+- printk("SPT=%u DPT=%u ",
++ ve_printk(VE_LOG, "SPT=%u DPT=%u ",
+ ntohs(th->source), ntohs(th->dest));
+ /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+ if (logflags & IPT_LOG_TCPSEQ)
+- printk("SEQ=%u ACK=%u ",
++ ve_printk(VE_LOG, "SEQ=%u ACK=%u ",
+ ntohl(th->seq), ntohl(th->ack_seq));
+ /* Max length: 13 "WINDOW=65535 " */
+- printk("WINDOW=%u ", ntohs(th->window));
++ ve_printk(VE_LOG, "WINDOW=%u ", ntohs(th->window));
+ /* Max length: 9 "RES=0x3F " */
+- printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
++ ve_printk(VE_LOG, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+ /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+ if (th->cwr)
+- printk("CWR ");
++ ve_printk(VE_LOG, "CWR ");
+ if (th->ece)
+- printk("ECE ");
++ ve_printk(VE_LOG, "ECE ");
+ if (th->urg)
+- printk("URG ");
++ ve_printk(VE_LOG, "URG ");
+ if (th->ack)
+- printk("ACK ");
++ ve_printk(VE_LOG, "ACK ");
+ if (th->psh)
+- printk("PSH ");
++ ve_printk(VE_LOG, "PSH ");
+ if (th->rst)
+- printk("RST ");
++ ve_printk(VE_LOG, "RST ");
+ if (th->syn)
+- printk("SYN ");
++ ve_printk(VE_LOG, "SYN ");
+ if (th->fin)
+- printk("FIN ");
++ ve_printk(VE_LOG, "FIN ");
+ /* Max length: 11 "URGP=65535 " */
+- printk("URGP=%u ", ntohs(th->urg_ptr));
++ ve_printk(VE_LOG, "URGP=%u ", ntohs(th->urg_ptr));
+
+ if ((logflags & IPT_LOG_TCPOPT)
+ && th->doff * 4 > sizeof(struct tcphdr)) {
+@@ -159,15 +160,15 @@ static void dump_packet(const struct nf_
+ iphoff+ih->ihl*4+sizeof(_tcph),
+ optsize, _opt);
+ if (op == NULL) {
+- printk("TRUNCATED");
++ ve_printk(VE_LOG, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+- printk("OPT (");
++ ve_printk(VE_LOG, "OPT (");
+ for (i = 0; i < optsize; i++)
+- printk("%02X", op[i]);
+- printk(") ");
++ ve_printk(VE_LOG, "%02X", op[i]);
++ ve_printk(VE_LOG, ") ");
+ }
+ break;
+ }
+@@ -175,7 +176,7 @@ static void dump_packet(const struct nf_
+ struct udphdr _udph, *uh;
+
+ /* Max length: 10 "PROTO=UDP " */
+- printk("PROTO=UDP ");
++ ve_printk(VE_LOG, "PROTO=UDP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+@@ -184,13 +185,13 @@ static void dump_packet(const struct nf_
+ uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+- printk("SPT=%u DPT=%u LEN=%u ",
++ ve_printk(VE_LOG, "SPT=%u DPT=%u LEN=%u ",
+ ntohs(uh->source), ntohs(uh->dest),
+ ntohs(uh->len));
+ break;
+@@ -216,7 +217,7 @@ static void dump_packet(const struct nf_
+ [ICMP_ADDRESSREPLY] = 12 };
+
+ /* Max length: 11 "PROTO=ICMP " */
+- printk("PROTO=ICMP ");
++ ve_printk(VE_LOG, "PROTO=ICMP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+@@ -225,19 +226,19 @@ static void dump_packet(const struct nf_
+ ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_icmph), &_icmph);
+ if (ich == NULL) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+- printk("TYPE=%u CODE=%u ", ich->type, ich->code);
++ ve_printk(VE_LOG, "TYPE=%u CODE=%u ", ich->type, ich->code);
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (ich->type <= NR_ICMP_TYPES
+ && required_len[ich->type]
+ && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+@@ -246,19 +247,19 @@ static void dump_packet(const struct nf_
+ case ICMP_ECHOREPLY:
+ case ICMP_ECHO:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+- printk("ID=%u SEQ=%u ",
++ ve_printk(VE_LOG, "ID=%u SEQ=%u ",
+ ntohs(ich->un.echo.id),
+ ntohs(ich->un.echo.sequence));
+ break;
+
+ case ICMP_PARAMETERPROB:
+ /* Max length: 14 "PARAMETER=255 " */
+- printk("PARAMETER=%u ",
++ ve_printk(VE_LOG, "PARAMETER=%u ",
+ ntohl(ich->un.gateway) >> 24);
+ break;
+ case ICMP_REDIRECT:
+ /* Max length: 24 "GATEWAY=255.255.255.255 " */
+- printk("GATEWAY=%u.%u.%u.%u ",
++ ve_printk(VE_LOG, "GATEWAY=%u.%u.%u.%u ",
+ NIPQUAD(ich->un.gateway));
+ /* Fall through */
+ case ICMP_DEST_UNREACH:
+@@ -266,16 +267,16 @@ static void dump_packet(const struct nf_
+ case ICMP_TIME_EXCEEDED:
+ /* Max length: 3+maxlen */
+ if (!iphoff) { /* Only recurse once. */
+- printk("[");
++ ve_printk(VE_LOG, "[");
+ dump_packet(info, skb,
+ iphoff + ih->ihl*4+sizeof(_icmph));
+- printk("] ");
++ ve_printk(VE_LOG, "] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (ich->type == ICMP_DEST_UNREACH
+ && ich->code == ICMP_FRAG_NEEDED)
+- printk("MTU=%u ", ntohs(ich->un.frag.mtu));
++ ve_printk(VE_LOG, "MTU=%u ", ntohs(ich->un.frag.mtu));
+ }
+ break;
+ }
+@@ -287,26 +288,26 @@ static void dump_packet(const struct nf_
+ break;
+
+ /* Max length: 9 "PROTO=AH " */
+- printk("PROTO=AH ");
++ ve_printk(VE_LOG, "PROTO=AH ");
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+- printk("SPI=0x%x ", ntohl(ah->spi));
++ ve_printk(VE_LOG, "SPI=0x%x ", ntohl(ah->spi));
+ break;
+ }
+ case IPPROTO_ESP: {
+ struct ip_esp_hdr _esph, *eh;
+
+ /* Max length: 10 "PROTO=ESP " */
+- printk("PROTO=ESP ");
++ ve_printk(VE_LOG, "PROTO=ESP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+@@ -315,25 +316,25 @@ static void dump_packet(const struct nf_
+ eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_esph), &_esph);
+ if (eh == NULL) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+- printk("SPI=0x%x ", ntohl(eh->spi));
++ ve_printk(VE_LOG, "SPI=0x%x ", ntohl(eh->spi));
+ break;
+ }
+ /* Max length: 10 "PROTO 255 " */
+ default:
+- printk("PROTO=%u ", ih->protocol);
++ ve_printk(VE_LOG, "PROTO=%u ", ih->protocol);
+ }
+
+ /* Max length: 15 "UID=4294967295 " */
+ if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
+ read_lock_bh(&skb->sk->sk_callback_lock);
+ if (skb->sk->sk_socket && skb->sk->sk_socket->file)
+- printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
++ ve_printk(VE_LOG, "UID=%u ", skb->sk->sk_socket->file->f_uid);
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ }
+
+@@ -374,7 +375,7 @@ ipt_log_packet(unsigned int pf,
+ loginfo = &default_loginfo;
+
+ spin_lock_bh(&log_lock);
+- printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
++ ve_printk(VE_LOG, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
+ prefix,
+ in ? in->name : "",
+ out ? out->name : "");
+@@ -384,29 +385,29 @@ ipt_log_packet(unsigned int pf,
+ struct net_device *physoutdev = skb->nf_bridge->physoutdev;
+
+ if (physindev && in != physindev)
+- printk("PHYSIN=%s ", physindev->name);
++ ve_printk(VE_LOG, "PHYSIN=%s ", physindev->name);
+ if (physoutdev && out != physoutdev)
+- printk("PHYSOUT=%s ", physoutdev->name);
++ ve_printk(VE_LOG, "PHYSOUT=%s ", physoutdev->name);
+ }
+ #endif
+
+ if (in && !out) {
+ /* MAC logging for input chain only. */
+- printk("MAC=");
++ ve_printk(VE_LOG, "MAC=");
+ if (skb->dev && skb->dev->hard_header_len
+ && skb->mac.raw != (void*)skb->nh.iph) {
+ int i;
+ unsigned char *p = skb->mac.raw;
+ for (i = 0; i < skb->dev->hard_header_len; i++,p++)
+- printk("%02x%c", *p,
++ ve_printk(VE_LOG, "%02x%c", *p,
+ i==skb->dev->hard_header_len - 1
+ ? ' ':':');
+ } else
+- printk(" ");
++ ve_printk(VE_LOG, " ");
+ }
+
+ dump_packet(loginfo, skb, 0);
+- printk("\n");
++ ve_printk(VE_LOG, "\n");
+ spin_unlock_bh(&log_lock);
+ }
+
+@@ -471,24 +472,44 @@ static struct nf_logger ipt_log_logger =
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_LOG(void)
++{
++ return virt_ipt_register_target(&ipt_log_reg);
++}
++
++void fini_iptable_LOG(void)
++{
++ virt_ipt_unregister_target(&ipt_log_reg);
++}
++
+ static int __init init(void)
+ {
+- if (ipt_register_target(&ipt_log_reg))
+- return -EINVAL;
++ int err;
++
++ err = init_iptable_LOG();
++ if (err < 0)
++ return err;
+ if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
+- printk(KERN_WARNING "ipt_LOG: not logging via system console "
++ ve_printk(VE_LOG, KERN_WARNING "ipt_LOG: not logging via system console "
+ "since somebody else already registered for PF_INET\n");
+ /* we cannot make module load fail here, since otherwise
+ * iptables userspace would abort */
+ }
+
++
++ KSYMRESOLVE(init_iptable_LOG);
++ KSYMRESOLVE(fini_iptable_LOG);
++ KSYMMODRESOLVE(ipt_LOG);
+ return 0;
+ }
+
+ static void __exit fini(void)
+ {
++ KSYMMODUNRESOLVE(ipt_LOG);
++ KSYMUNRESOLVE(init_iptable_LOG);
++ KSYMUNRESOLVE(fini_iptable_LOG);
+ nf_log_unregister_logger(&ipt_log_logger);
+- ipt_unregister_target(&ipt_log_reg);
++ fini_iptable_LOG();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_MARK.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_MARK.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_MARK.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_MARK.c 2006-01-27 14:48:08.000000000 +0300
+@@ -77,14 +77,15 @@ checkentry_v0(const char *tablename,
+ struct ipt_mark_target_info *markinfo = targinfo;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
+- printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
++ ve_printk(VE_LOG, KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+- printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
++ ve_printk(VE_LOG, KERN_WARNING "MARK: can only be called from "
++ "\"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_MASQUERADE.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_MASQUERADE.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_MASQUERADE.c 2006-01-27 14:48:08.000000000 +0300
+@@ -118,6 +118,7 @@ masquerade_target(struct sk_buff **pskb,
+ return ip_nat_setup_info(ct, &newrange, hooknum);
+ }
+
++#if 0
+ static inline int
+ device_cmp(struct ip_conntrack *i, void *ifindex)
+ {
+@@ -173,6 +174,7 @@ static struct notifier_block masq_dev_no
+ static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+ };
++#endif
+
+ static struct ipt_target masquerade = {
+ .name = "MASQUERADE",
+@@ -187,12 +189,16 @@ static int __init init(void)
+
+ ret = ipt_register_target(&masquerade);
+
++#if 0
++/* These notifiers are unnecessary and may
++ lead to oops in virtual environments */
+ if (ret == 0) {
+ /* Register for device down reports */
+ register_netdevice_notifier(&masq_dev_notifier);
+ /* Register IP address change reports */
+ register_inetaddr_notifier(&masq_inet_notifier);
+ }
++#endif
+
+ return ret;
+ }
+@@ -200,8 +206,8 @@ static int __init init(void)
+ static void __exit fini(void)
+ {
+ ipt_unregister_target(&masquerade);
+- unregister_netdevice_notifier(&masq_dev_notifier);
+- unregister_inetaddr_notifier(&masq_inet_notifier);
++/* unregister_netdevice_notifier(&masq_dev_notifier);
++ unregister_inetaddr_notifier(&masq_inet_notifier); */
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_REJECT.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_REJECT.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_REJECT.c 2006-01-27 14:48:08.000000000 +0300
+@@ -22,6 +22,7 @@
+ #include <net/ip.h>
+ #include <net/tcp.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ #include <net/dst.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_REJECT.h>
+@@ -307,7 +308,7 @@ static int check(const char *tablename,
+ }
+
+ if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
+- printk("REJECT: ECHOREPLY no longer supported.\n");
++ ve_printk(VE_LOG, "REJECT: ECHOREPLY no longer supported.\n");
+ return 0;
+ } else if (rejinfo->with == IPT_TCP_RESET) {
+ /* Must specify that it's a TCP packet */
+@@ -328,14 +329,36 @@ static struct ipt_target ipt_reject_reg
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_REJECT(void)
++{
++ return virt_ipt_register_target(&ipt_reject_reg);
++}
++
++void fini_iptable_REJECT(void)
++{
++ virt_ipt_unregister_target(&ipt_reject_reg);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_target(&ipt_reject_reg);
++ int err;
++
++ err = init_iptable_REJECT();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_REJECT);
++ KSYMRESOLVE(fini_iptable_REJECT);
++ KSYMMODRESOLVE(ipt_REJECT);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_target(&ipt_reject_reg);
++ KSYMMODUNRESOLVE(ipt_REJECT);
++ KSYMUNRESOLVE(init_iptable_REJECT);
++ KSYMUNRESOLVE(fini_iptable_REJECT);
++ fini_iptable_REJECT();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_TCPMSS.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_TCPMSS.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_TCPMSS.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_TCPMSS.c 2006-01-27 14:48:08.000000000 +0300
+@@ -13,6 +13,7 @@
+
+ #include <linux/ip.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_TCPMSS.h>
+@@ -228,7 +229,8 @@ ipt_tcpmss_checkentry(const char *tablen
+ ((hook_mask & ~((1 << NF_IP_FORWARD)
+ | (1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING))) != 0)) {
+- printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
++ ve_printk(VE_LOG, "TCPMSS: path-MTU clamping only supported in "
++ "FORWARD, OUTPUT and POSTROUTING hooks\n");
+ return 0;
+ }
+
+@@ -237,7 +239,7 @@ ipt_tcpmss_checkentry(const char *tablen
+ && IPT_MATCH_ITERATE(e, find_syn_match))
+ return 1;
+
+- printk("TCPMSS: Only works on TCP SYN packets\n");
++ ve_printk(VE_LOG, "TCPMSS: Only works on TCP SYN packets\n");
+ return 0;
+ }
+
+@@ -248,14 +250,36 @@ static struct ipt_target ipt_tcpmss_reg
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_TCPMSS(void)
++{
++ return virt_ipt_register_target(&ipt_tcpmss_reg);
++}
++
++void fini_iptable_TCPMSS(void)
++{
++ virt_ipt_unregister_target(&ipt_tcpmss_reg);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_target(&ipt_tcpmss_reg);
++ int err;
++
++ err = init_iptable_TCPMSS();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_TCPMSS);
++ KSYMRESOLVE(fini_iptable_TCPMSS);
++ KSYMMODRESOLVE(ipt_TCPMSS);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_target(&ipt_tcpmss_reg);
++ KSYMMODUNRESOLVE(ipt_TCPMSS);
++ KSYMUNRESOLVE(init_iptable_TCPMSS);
++ KSYMUNRESOLVE(fini_iptable_TCPMSS);
++ fini_iptable_TCPMSS();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_TOS.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_TOS.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_TOS.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_TOS.c 2006-01-27 14:48:08.000000000 +0300
+@@ -15,6 +15,7 @@
+
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_TOS.h>
++#include <linux/nfcalls.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+@@ -60,14 +61,15 @@ checkentry(const char *tablename,
+ const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) {
+- printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n",
++ ve_printk(VE_LOG, KERN_WARNING "TOS: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_tos_target_info)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+- printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
++ ve_printk(VE_LOG, KERN_WARNING "TOS: can only be called from "
++ "\"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+@@ -76,7 +78,7 @@ checkentry(const char *tablename,
+ && tos != IPTOS_RELIABILITY
+ && tos != IPTOS_MINCOST
+ && tos != IPTOS_NORMALSVC) {
+- printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
++ ve_printk(VE_LOG, KERN_WARNING "TOS: bad tos value %#x\n", tos);
+ return 0;
+ }
+
+@@ -90,14 +92,36 @@ static struct ipt_target ipt_tos_reg = {
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_TOS(void)
++{
++ return virt_ipt_register_target(&ipt_tos_reg);
++}
++
++void fini_iptable_TOS(void)
++{
++ virt_ipt_unregister_target(&ipt_tos_reg);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_target(&ipt_tos_reg);
++ int err;
++
++ err = init_iptable_TOS();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_TOS);
++ KSYMRESOLVE(fini_iptable_TOS);
++ KSYMMODRESOLVE(ipt_TOS);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_target(&ipt_tos_reg);
++ KSYMMODUNRESOLVE(ipt_TOS);
++ KSYMUNRESOLVE(init_iptable_TOS);
++ KSYMUNRESOLVE(fini_iptable_TOS);
++ fini_iptable_TOS();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_conntrack.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_conntrack.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_conntrack.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_conntrack.c 2006-01-27 14:48:08.000000000 +0300
+@@ -20,6 +20,7 @@
+
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_conntrack.h>
++#include <linux/nfcalls.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+@@ -217,15 +218,37 @@ static struct ipt_match conntrack_match
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_conntrack_match(void)
++{
++ return virt_ipt_register_match(&conntrack_match);
++}
++
++void fini_iptable_conntrack_match(void)
++{
++ virt_ipt_unregister_match(&conntrack_match);
++}
++
+ static int __init init(void)
+ {
++ int err;
++
+ need_ip_conntrack();
+- return ipt_register_match(&conntrack_match);
++ err = init_iptable_conntrack_match();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_conntrack_match);
++ KSYMRESOLVE(fini_iptable_conntrack_match);
++ KSYMMODRESOLVE(ipt_conntrack);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&conntrack_match);
++ KSYMMODUNRESOLVE(ipt_conntrack);
++ KSYMUNRESOLVE(init_iptable_conntrack_match);
++ KSYMUNRESOLVE(fini_iptable_conntrack_match);
++ fini_iptable_conntrack_match();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_helper.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_helper.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_helper.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_helper.c 2006-01-27 14:48:08.000000000 +0300
+@@ -24,6 +24,7 @@
+ #endif
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_helper.h>
++#include <linux/nfcalls.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
+@@ -151,15 +152,37 @@ static struct ipt_match helper_match = {
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_helper(void)
++{
++ return virt_ipt_register_match(&helper_match);
++}
++
++void fini_iptable_helper(void)
++{
++ virt_ipt_unregister_match(&helper_match);
++}
++
+ static int __init init(void)
+ {
++ int err;
++
+ need_ip_conntrack();
+- return ipt_register_match(&helper_match);
++ err = init_iptable_helper();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_helper);
++ KSYMRESOLVE(fini_iptable_helper);
++ KSYMMODRESOLVE(ipt_helper);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&helper_match);
++ KSYMMODUNRESOLVE(ipt_helper);
++ KSYMUNRESOLVE(init_iptable_helper);
++ KSYMUNRESOLVE(fini_iptable_helper);
++ fini_iptable_helper();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_length.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_length.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_length.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_length.c 2006-01-27 14:48:08.000000000 +0300
+@@ -8,6 +8,7 @@
+
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_length.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -50,14 +51,36 @@ static struct ipt_match length_match = {
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_length(void)
++{
++ return virt_ipt_register_match(&length_match);
++}
++
++void fini_iptable_length(void)
++{
++ virt_ipt_unregister_match(&length_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&length_match);
++ int err;
++
++ err = init_iptable_length();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_length);
++ KSYMRESOLVE(fini_iptable_length);
++ KSYMMODRESOLVE(ipt_length);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&length_match);
++ KSYMMODUNRESOLVE(ipt_length);
++ KSYMUNRESOLVE(init_iptable_length);
++ KSYMUNRESOLVE(fini_iptable_length);
++ fini_iptable_length();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_limit.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_limit.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_limit.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_limit.c 2006-01-27 14:48:08.000000000 +0300
+@@ -17,6 +17,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/spinlock.h>
+ #include <linux/interrupt.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_limit.h>
+@@ -25,6 +26,13 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
+ MODULE_DESCRIPTION("iptables rate limit match");
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ipt_limit_reg (*(get_exec_env()->_ipt_limit_reg))
++#else
++#define ve_ipt_limit_reg ipt_limit_reg
++#endif
++
+ /* The algorithm used is the Simple Token Bucket Filter (TBF)
+ * see net/sched/sch_tbf.c in the linux source tree
+ */
+@@ -116,7 +124,7 @@ ipt_limit_checkentry(const char *tablena
+ /* Check for overflow. */
+ if (r->burst == 0
+ || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
+- printk("Overflow in ipt_limit, try lower: %u/%u\n",
++ ve_printk(VE_LOG, "Overflow in ipt_limit, try lower: %u/%u\n",
+ r->avg, r->burst);
+ return 0;
+ }
+@@ -141,16 +149,36 @@ static struct ipt_match ipt_limit_reg =
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_limit(void)
++{
++ return virt_ipt_register_match(&ipt_limit_reg);
++}
++
++void fini_iptable_limit(void)
++{
++ virt_ipt_unregister_match(&ipt_limit_reg);
++}
++
+ static int __init init(void)
+ {
+- if (ipt_register_match(&ipt_limit_reg))
+- return -EINVAL;
++ int err;
++
++ err = init_iptable_limit();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_limit);
++ KSYMRESOLVE(fini_iptable_limit);
++ KSYMMODRESOLVE(ipt_limit);
+ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&ipt_limit_reg);
++ KSYMMODUNRESOLVE(ipt_limit);
++ KSYMUNRESOLVE(init_iptable_limit);
++ KSYMUNRESOLVE(fini_iptable_limit);
++ fini_iptable_limit();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_mac.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_mac.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_mac.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_mac.c 2006-01-27 14:48:08.000000000 +0300
+@@ -48,7 +48,8 @@ ipt_mac_checkentry(const char *tablename
+ if (hook_mask
+ & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
+ | (1 << NF_IP_FORWARD))) {
+- printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
++ ve_printk(VE_LOG, "ipt_mac: only valid for PRE_ROUTING, "
++ "LOCAL_IN or FORWARD.\n");
+ return 0;
+ }
+
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_multiport.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_multiport.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_multiport.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_multiport.c 2006-01-27 14:48:08.000000000 +0300
+@@ -13,6 +13,7 @@
+ #include <linux/types.h>
+ #include <linux/udp.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_multiport.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -21,6 +22,13 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("iptables multiple port match module");
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_multiport_match (*(get_exec_env()->_multiport_match))
++#else
++#define ve_multiport_match multiport_match
++#endif
++
+ #if 0
+ #define duprintf(format, args...) printk(format , ## args)
+ #else
+@@ -188,24 +196,45 @@ static struct ipt_match multiport_match_
+ .me = THIS_MODULE,
+ };
+
+-static int __init init(void)
++int init_iptable_multiport(void)
+ {
+ int err;
+
+- err = ipt_register_match(&multiport_match);
++ err = virt_ipt_register_match(&multiport_match);
+ if (!err) {
+- err = ipt_register_match(&multiport_match_v1);
++ err = virt_ipt_register_match(&multiport_match_v1);
+ if (err)
+- ipt_unregister_match(&multiport_match);
++ virt_ipt_unregister_match(&multiport_match);
+ }
+-
+ return err;
+ }
+
++void fini_iptable_multiport(void)
++{
++ virt_ipt_unregister_match(&multiport_match);
++ virt_ipt_unregister_match(&multiport_match_v1);
++}
++
++static int __init init(void)
++{
++ int err;
++
++ err = init_iptable_multiport();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_multiport);
++ KSYMRESOLVE(fini_iptable_multiport);
++ KSYMMODRESOLVE(ipt_multiport);
++ return 0;
++}
++
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&multiport_match);
+- ipt_unregister_match(&multiport_match_v1);
++ KSYMMODUNRESOLVE(ipt_multiport);
++ KSYMUNRESOLVE(init_iptable_multiport);
++ KSYMUNRESOLVE(fini_iptable_multiport);
++ fini_iptable_multiport();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_state.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_state.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_state.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_state.c 2006-01-27 14:48:08.000000000 +0300
+@@ -10,6 +10,7 @@
+
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ #include <net/netfilter/nf_conntrack_compat.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_state.h>
+@@ -59,15 +60,37 @@ static struct ipt_match state_match = {
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_state(void)
++{
++ return virt_ipt_register_match(&state_match);
++}
++
++void fini_iptable_state(void)
++{
++ virt_ipt_unregister_match(&state_match);
++}
++
+ static int __init init(void)
+ {
++ int err;
++
+ need_ip_conntrack();
+- return ipt_register_match(&state_match);
++ err = init_iptable_state();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_state);
++ KSYMRESOLVE(fini_iptable_state);
++ KSYMMODRESOLVE(ipt_state);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&state_match);
++ KSYMMODUNRESOLVE(ipt_state);
++ KSYMUNRESOLVE(init_iptable_state);
++ KSYMUNRESOLVE(fini_iptable_state);
++ fini_iptable_state();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_tcpmss.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_tcpmss.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_tcpmss.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_tcpmss.c 2006-01-27 14:48:08.000000000 +0300
+@@ -10,6 +10,7 @@
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_tcpmss.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -99,7 +100,7 @@ checkentry(const char *tablename,
+
+ /* Must specify -p tcp */
+ if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
+- printk("tcpmss: Only works on TCP packets\n");
++ ve_printk(VE_LOG, "tcpmss: Only works on TCP packets\n");
+ return 0;
+ }
+
+@@ -113,14 +114,36 @@ static struct ipt_match tcpmss_match = {
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_tcpmss(void)
++{
++ return virt_ipt_register_match(&tcpmss_match);
++}
++
++void fini_iptable_tcpmss(void)
++{
++ virt_ipt_unregister_match(&tcpmss_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&tcpmss_match);
++ int err;
++
++ err = init_iptable_tcpmss();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_tcpmss);
++ KSYMRESOLVE(fini_iptable_tcpmss);
++ KSYMMODRESOLVE(ipt_tcpmss);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&tcpmss_match);
++ KSYMMODUNRESOLVE(ipt_tcpmss);
++ KSYMUNRESOLVE(init_iptable_tcpmss);
++ KSYMUNRESOLVE(fini_iptable_tcpmss);
++ fini_iptable_tcpmss();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_tos.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_tos.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_tos.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_tos.c 2006-01-27 14:48:08.000000000 +0300
+@@ -10,6 +10,7 @@
+
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_tos.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -17,6 +18,13 @@
+ MODULE_LICENSE("GPL");
+ MODULE_DESCRIPTION("iptables TOS match module");
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_tos_match (*(get_exec_env()->_tos_match))
++#else
++#define ve_tos_match tos_match
++#endif
++
+ static int
+ match(const struct sk_buff *skb,
+ const struct net_device *in,
+@@ -50,14 +58,36 @@ static struct ipt_match tos_match = {
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_tos(void)
++{
++ return virt_ipt_register_match(&tos_match);
++}
++
++void fini_iptable_tos(void)
++{
++ virt_ipt_unregister_match(&tos_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&tos_match);
++ int err;
++
++ err = init_iptable_tos();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_tos);
++ KSYMRESOLVE(fini_iptable_tos);
++ KSYMMODRESOLVE(ipt_tos);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&tos_match);
++ KSYMMODUNRESOLVE(ipt_tos);
++ KSYMUNRESOLVE(init_iptable_tos);
++ KSYMUNRESOLVE(fini_iptable_tos);
++ fini_iptable_tos();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/ipt_ttl.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_ttl.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/ipt_ttl.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/ipt_ttl.c 2006-01-27 14:48:08.000000000 +0300
+@@ -11,6 +11,7 @@
+
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_ttl.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -64,15 +65,36 @@ static struct ipt_match ttl_match = {
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_ttl(void)
++{
++ return virt_ipt_register_match(&ttl_match);
++}
++
++void fini_iptable_ttl(void)
++{
++ virt_ipt_unregister_match(&ttl_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&ttl_match);
++ int err;
++
++ err = init_iptable_ttl();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_ttl);
++ KSYMRESOLVE(fini_iptable_ttl);
++ KSYMMODRESOLVE(ipt_ttl);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&ttl_match);
+-
++ KSYMMODUNRESOLVE(ipt_ttl);
++ KSYMUNRESOLVE(init_iptable_ttl);
++ KSYMUNRESOLVE(fini_iptable_ttl);
++ fini_iptable_ttl();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/iptable_filter.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/iptable_filter.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/iptable_filter.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/iptable_filter.c 2006-01-27 14:48:08.000000000 +0300
+@@ -12,12 +12,20 @@
+
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("iptables filter table");
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_filter (get_exec_env()->_ve_ipt_filter_pf)
++#else
++#define ve_packet_filter &packet_filter
++#endif
++
+ #define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
+
+ static struct
+@@ -25,7 +33,7 @@ static struct
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+-} initial_table __initdata
++} initial_table
+ = { { "filter", FILTER_VALID_HOOKS, 4,
+ sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ { [NF_IP_LOCAL_IN] = 0,
+@@ -89,7 +97,7 @@ ipt_hook(unsigned int hook,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
+- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
++ return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
+ }
+
+ static unsigned int
+@@ -107,7 +115,7 @@ ipt_local_out_hook(unsigned int hook,
+ return NF_ACCEPT;
+ }
+
+- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
++ return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
+ }
+
+ static struct nf_hook_ops ipt_ops[] = {
+@@ -138,56 +146,89 @@ static struct nf_hook_ops ipt_ops[] = {
+ static int forward = NF_ACCEPT;
+ module_param(forward, bool, 0000);
+
+-static int __init init(void)
++int init_iptable_filter(void)
+ {
+ int ret;
+-
+- if (forward < 0 || forward > NF_MAX_VERDICT) {
+- printk("iptables forward must be 0 or 1\n");
+- return -EINVAL;
+- }
+-
+- /* Entry 1 is the FORWARD hook */
+- initial_table.entries[1].target.verdict = -forward - 1;
++ struct ipt_table *tmp_filter;
+
+ /* Register table */
+- ret = ipt_register_table(&packet_filter, &initial_table.repl);
+- if (ret < 0)
+- return ret;
++ tmp_filter = virt_ipt_register_table(&packet_filter,
++ &initial_table.repl);
++ if (IS_ERR(tmp_filter))
++ return PTR_ERR(tmp_filter);
++#ifdef CONFIG_VE_IPTABLES
++ ve_packet_filter = tmp_filter;
++#endif
+
+ /* Register hooks */
+- ret = nf_register_hook(&ipt_ops[0]);
++ ret = virt_nf_register_hook(&ipt_ops[0]);
+ if (ret < 0)
+ goto cleanup_table;
+
+- ret = nf_register_hook(&ipt_ops[1]);
++ ret = virt_nf_register_hook(&ipt_ops[1]);
+ if (ret < 0)
+ goto cleanup_hook0;
+
+- ret = nf_register_hook(&ipt_ops[2]);
++ ret = virt_nf_register_hook(&ipt_ops[2]);
+ if (ret < 0)
+ goto cleanup_hook1;
+
+ return ret;
+
+ cleanup_hook1:
+- nf_unregister_hook(&ipt_ops[1]);
++ virt_nf_unregister_hook(&ipt_ops[1]);
+ cleanup_hook0:
+- nf_unregister_hook(&ipt_ops[0]);
++ virt_nf_unregister_hook(&ipt_ops[0]);
+ cleanup_table:
+- ipt_unregister_table(&packet_filter);
++ virt_ipt_unregister_table(ve_packet_filter);
++#ifdef CONFIG_VE_IPTABLES
++ ve_packet_filter = NULL;
++#endif
+
+ return ret;
+ }
+
+-static void __exit fini(void)
++void fini_iptable_filter(void)
+ {
+ unsigned int i;
+
+ for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+- nf_unregister_hook(&ipt_ops[i]);
++ virt_nf_unregister_hook(&ipt_ops[i]);
+
+- ipt_unregister_table(&packet_filter);
++ virt_ipt_unregister_table(ve_packet_filter);
++#ifdef CONFIG_VE_IPTABLES
++ ve_packet_filter = NULL;
++#endif
++}
++
++static int __init init(void)
++{
++ int err;
++
++ if (forward < 0 || forward > NF_MAX_VERDICT) {
++ printk("iptables forward must be 0 or 1\n");
++ return -EINVAL;
++ }
++
++ /* Entry 1 is the FORWARD hook */
++ initial_table.entries[1].target.verdict = -forward - 1;
++
++ err = init_iptable_filter();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_filter);
++ KSYMRESOLVE(fini_iptable_filter);
++ KSYMMODRESOLVE(iptable_filter);
++ return 0;
++}
++
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(iptable_filter);
++ KSYMUNRESOLVE(init_iptable_filter);
++ KSYMUNRESOLVE(fini_iptable_filter);
++ fini_iptable_filter();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/netfilter/iptable_mangle.c linux-2.6.15-ve025stab014/net/ipv4/netfilter/iptable_mangle.c
+--- linux-2.6.15.orig/net/ipv4/netfilter/iptable_mangle.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/netfilter/iptable_mangle.c 2006-01-27 14:48:08.000000000 +0300
+@@ -17,6 +17,7 @@
+ #include <linux/skbuff.h>
+ #include <net/sock.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ #include <linux/ip.h>
+
+ MODULE_LICENSE("GPL");
+@@ -35,7 +36,7 @@ static struct
+ struct ipt_replace repl;
+ struct ipt_standard entries[5];
+ struct ipt_error term;
+-} initial_table __initdata
++} initial_table
+ = { { "mangle", MANGLE_VALID_HOOKS, 6,
+ sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
+ { [NF_IP_PRE_ROUTING] = 0,
+@@ -111,6 +112,13 @@ static struct ipt_table packet_mangler =
+ .me = THIS_MODULE,
+ };
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_mangler (get_exec_env()->_ipt_mangle_table)
++#else
++#define ve_packet_mangler &packet_mangler
++#endif
++
+ /* The work comes in here from netfilter.c. */
+ static unsigned int
+ ipt_route_hook(unsigned int hook,
+@@ -119,7 +127,7 @@ ipt_route_hook(unsigned int hook,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
+- return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++ return ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
+ }
+
+ static unsigned int
+@@ -148,7 +156,8 @@ ipt_local_hook(unsigned int hook,
+ daddr = (*pskb)->nh.iph->daddr;
+ tos = (*pskb)->nh.iph->tos;
+
+- ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++ ret = ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
++
+ /* Reroute for ANY change. */
+ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
+ && ((*pskb)->nh.iph->saddr != saddr
+@@ -200,60 +209,103 @@ static struct nf_hook_ops ipt_ops[] = {
+ },
+ };
+
+-static int __init init(void)
++static int mangle_init(struct nf_hook_ops ipt_ops[])
+ {
+ int ret;
++ struct ipt_table *tmp_mangler;
+
+ /* Register table */
+- ret = ipt_register_table(&packet_mangler, &initial_table.repl);
+- if (ret < 0)
+- return ret;
++ tmp_mangler = virt_ipt_register_table(&packet_mangler,
++ &initial_table.repl);
++ if (IS_ERR(tmp_mangler))
++ return PTR_ERR(tmp_mangler);
++#ifdef CONFIG_VE_IPTABLES
++ ve_packet_mangler = tmp_mangler;
++#endif
+
+ /* Register hooks */
+- ret = nf_register_hook(&ipt_ops[0]);
++ ret = virt_nf_register_hook(&ipt_ops[0]);
+ if (ret < 0)
+ goto cleanup_table;
+
+- ret = nf_register_hook(&ipt_ops[1]);
++ ret = virt_nf_register_hook(&ipt_ops[1]);
+ if (ret < 0)
+ goto cleanup_hook0;
+
+- ret = nf_register_hook(&ipt_ops[2]);
++ ret = virt_nf_register_hook(&ipt_ops[2]);
+ if (ret < 0)
+ goto cleanup_hook1;
+
+- ret = nf_register_hook(&ipt_ops[3]);
++ ret = virt_nf_register_hook(&ipt_ops[3]);
+ if (ret < 0)
+ goto cleanup_hook2;
+
+- ret = nf_register_hook(&ipt_ops[4]);
++ ret = virt_nf_register_hook(&ipt_ops[4]);
+ if (ret < 0)
+ goto cleanup_hook3;
+
+ return ret;
+
+ cleanup_hook3:
+- nf_unregister_hook(&ipt_ops[3]);
++ virt_nf_unregister_hook(&ipt_ops[3]);
+ cleanup_hook2:
+- nf_unregister_hook(&ipt_ops[2]);
++ virt_nf_unregister_hook(&ipt_ops[2]);
+ cleanup_hook1:
+- nf_unregister_hook(&ipt_ops[1]);
++ virt_nf_unregister_hook(&ipt_ops[1]);
+ cleanup_hook0:
+- nf_unregister_hook(&ipt_ops[0]);
++ virt_nf_unregister_hook(&ipt_ops[0]);
+ cleanup_table:
+- ipt_unregister_table(&packet_mangler);
++ virt_ipt_unregister_table(ve_packet_mangler);
++#ifdef CONFIG_VE_IPTABLES
++ ve_packet_mangler = NULL;
++#endif
+
+ return ret;
+ }
+
+-static void __exit fini(void)
++static void mangle_fini(struct nf_hook_ops ipt_ops[])
+ {
+ unsigned int i;
+
+- for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+- nf_unregister_hook(&ipt_ops[i]);
++ for (i = 0; i < 5; i++)
++ virt_nf_unregister_hook(&ipt_ops[i]);
++
++ virt_ipt_unregister_table(ve_packet_mangler);
++#ifdef CONFIG_VE_IPTABLES
++ ve_packet_mangler = NULL;
++#endif
++}
++
++static int init_iptable_mangle(void)
++{
++ return mangle_init(ipt_ops);
++}
++
++static void fini_iptable_mangle(void)
++{
++ mangle_fini(ipt_ops);
++}
++
++static int __init init(void)
++{
++ int err;
++
++ err = init_iptable_mangle();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_mangle);
++ KSYMRESOLVE(fini_iptable_mangle);
++ KSYMMODRESOLVE(iptable_mangle);
++ return 0;
++}
+
+- ipt_unregister_table(&packet_mangler);
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(iptable_mangle);
++ KSYMUNRESOLVE(init_iptable_mangle);
++ KSYMUNRESOLVE(fini_iptable_mangle);
++ fini_iptable_mangle();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.15.orig/net/ipv4/proc.c linux-2.6.15-ve025stab014/net/ipv4/proc.c
+--- linux-2.6.15.orig/net/ipv4/proc.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/proc.c 2006-01-27 14:48:08.000000000 +0300
+@@ -257,11 +257,12 @@ static int snmp_seq_show(struct seq_file
+ seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
+
+ seq_printf(seq, "\nIp: %d %d",
+- ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
++ ve_ipv4_devconf.forwarding ? 1 : 2,
++ sysctl_ip_default_ttl);
+
+ for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+- fold_field((void **) ip_statistics,
++ fold_field((void **) ve_ip_statistics,
+ snmp4_ipstats_list[i].entry));
+
+ seq_puts(seq, "\nIcmp:");
+@@ -271,7 +272,7 @@ static int snmp_seq_show(struct seq_file
+ seq_puts(seq, "\nIcmp:");
+ for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+- fold_field((void **) icmp_statistics,
++ fold_field((void **) ve_icmp_statistics,
+ snmp4_icmp_list[i].entry));
+
+ seq_puts(seq, "\nTcp:");
+@@ -283,11 +284,11 @@ static int snmp_seq_show(struct seq_file
+ /* MaxConn field is signed, RFC 2012 */
+ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
+ seq_printf(seq, " %ld",
+- fold_field((void **) tcp_statistics,
++ fold_field((void **) ve_tcp_statistics,
+ snmp4_tcp_list[i].entry));
+ else
+ seq_printf(seq, " %lu",
+- fold_field((void **) tcp_statistics,
++ fold_field((void **) ve_tcp_statistics,
+ snmp4_tcp_list[i].entry));
+ }
+
+@@ -298,7 +299,7 @@ static int snmp_seq_show(struct seq_file
+ seq_puts(seq, "\nUdp:");
+ for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+- fold_field((void **) udp_statistics,
++ fold_field((void **) ve_udp_statistics,
+ snmp4_udp_list[i].entry));
+
+ seq_putc(seq, '\n');
+@@ -332,7 +333,7 @@ static int netstat_seq_show(struct seq_f
+ seq_puts(seq, "\nTcpExt:");
+ for (i = 0; snmp4_net_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+- fold_field((void **) net_statistics,
++ fold_field((void **) ve_net_statistics,
+ snmp4_net_list[i].entry));
+
+ seq_putc(seq, '\n');
+@@ -356,10 +357,10 @@ int __init ip_misc_proc_init(void)
+ {
+ int rc = 0;
+
+- if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
++ if (!proc_glob_fops_create("net/netstat", S_IRUGO, &netstat_seq_fops))
+ goto out_netstat;
+
+- if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
++ if (!proc_glob_fops_create("net/snmp", S_IRUGO, &snmp_seq_fops))
+ goto out_snmp;
+
+ if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
+@@ -367,9 +368,9 @@ int __init ip_misc_proc_init(void)
+ out:
+ return rc;
+ out_sockstat:
+- proc_net_remove("snmp");
++ remove_proc_glob_entry("net/snmp", NULL);
+ out_snmp:
+- proc_net_remove("netstat");
++ remove_proc_glob_entry("net/netstat", NULL);
+ out_netstat:
+ rc = -ENOMEM;
+ goto out;
+diff -uprN linux-2.6.15.orig/net/ipv4/raw.c linux-2.6.15-ve025stab014/net/ipv4/raw.c
+--- linux-2.6.15.orig/net/ipv4/raw.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/raw.c 2006-01-27 14:48:08.000000000 +0300
+@@ -114,7 +114,8 @@ struct sock *__raw_v4_lookup(struct sock
+ if (inet->num == num &&
+ !(inet->daddr && inet->daddr != raddr) &&
+ !(inet->rcv_saddr && inet->rcv_saddr != laddr) &&
+- !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
++ !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) &&
++ ve_accessible_strict(VE_OWNER_SK(sk), get_exec_env()))
+ goto found; /* gotcha */
+ }
+ sk = NULL;
+@@ -752,8 +753,12 @@ static struct sock *raw_get_first(struct
+ struct hlist_node *node;
+
+ sk_for_each(sk, node, &raw_v4_htable[state->bucket])
+- if (sk->sk_family == PF_INET)
++ if (sk->sk_family == PF_INET) {
++ if (!ve_accessible(VE_OWNER_SK(sk),
++ get_exec_env()))
++ continue;
+ goto found;
++ }
+ }
+ sk = NULL;
+ found:
+@@ -767,8 +772,14 @@ static struct sock *raw_get_next(struct
+ do {
+ sk = sk_next(sk);
+ try_again:
+- ;
+- } while (sk && sk->sk_family != PF_INET);
++ if (!sk)
++ break;
++ if (sk->sk_family != PF_INET)
++ continue;
++ if (ve_accessible(VE_OWNER_SK(sk),
++ get_exec_env()))
++ break;
++ } while (1);
+
+ if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
+ sk = sk_head(&raw_v4_htable[state->bucket]);
+@@ -885,13 +896,13 @@ static struct file_operations raw_seq_fo
+
+ int __init raw_proc_init(void)
+ {
+- if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
++ if (!proc_glob_fops_create("net/raw", S_IRUGO, &raw_seq_fops))
+ return -ENOMEM;
+ return 0;
+ }
+
+ void __init raw_proc_exit(void)
+ {
+- proc_net_remove("raw");
++ remove_proc_glob_entry("net/raw", NULL);
+ }
+ #endif /* CONFIG_PROC_FS */
+diff -uprN linux-2.6.15.orig/net/ipv4/route.c linux-2.6.15-ve025stab014/net/ipv4/route.c
+--- linux-2.6.15.orig/net/ipv4/route.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/route.c 2006-01-27 14:48:08.000000000 +0300
+@@ -114,6 +114,8 @@
+
+ #define RT_GC_TIMEOUT (300*HZ)
+
++int ip_rt_src_check = 1;
++
+ static int ip_rt_min_delay = 2 * HZ;
+ static int ip_rt_max_delay = 10 * HZ;
+ static int ip_rt_max_size;
+@@ -253,11 +255,28 @@ static unsigned int rt_hash_code(u32 dad
+ & rt_hash_mask);
+ }
+
++void prepare_rt_cache(void)
++{
++#ifdef CONFIG_VE
++ struct rtable *r;
++ int i;
++
++ for (i = rt_hash_mask; i >= 0; i--) {
++ spin_lock_bh(rt_hash_lock_addr(i));
++ for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
++ r->fl.owner_env = get_ve0();
++ }
++ spin_unlock_bh(rt_hash_lock_addr(i));
++ }
++#endif
++}
++
+ #ifdef CONFIG_PROC_FS
+ struct rt_cache_iter_state {
+ int bucket;
+ };
+
++static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r);
+ static struct rtable *rt_cache_get_first(struct seq_file *seq)
+ {
+ struct rtable *r = NULL;
+@@ -270,6 +289,8 @@ static struct rtable *rt_cache_get_first
+ break;
+ rcu_read_unlock_bh();
+ }
++ if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
++ r = rt_cache_get_next(seq, r);
+ return r;
+ }
+
+@@ -277,14 +298,19 @@ static struct rtable *rt_cache_get_next(
+ {
+ struct rt_cache_iter_state *st = rcu_dereference(seq->private);
+
+- r = r->u.rt_next;
++start:
++ do {
++ r = r->u.rt_next;
++ } while (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()));
+ while (!r) {
+ rcu_read_unlock_bh();
+ if (--st->bucket < 0)
+- break;
++ goto out;
+ rcu_read_lock_bh();
+ r = rt_hash_table[st->bucket].chain;
+ }
++ goto start;
++out:
+ return r;
+ }
+
+@@ -556,7 +582,8 @@ static inline int compare_keys(struct fl
+ {
+ return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
+ fl1->oif == fl2->oif &&
+- fl1->iif == fl2->iif;
++ fl1->iif == fl2->iif &&
++ ve_accessible_strict(fl1->owner_env, fl2->owner_env);
+ }
+
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+@@ -670,26 +697,105 @@ static void rt_check_expire(unsigned lon
+ mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
+ }
+
++typedef unsigned long rt_flush_gen_t;
++
++#ifdef CONFIG_VE
++
++static rt_flush_gen_t rt_flush_gen;
++
++/* called under rt_flush_lock */
++static void set_rt_flush_required(struct ve_struct *env)
++{
++ /*
++ * If the global generation rt_flush_gen is equal to G, then
++ * the pass considering entries labelled by G is yet to come.
++ */
++ env->rt_flush_required = rt_flush_gen;
++}
++
++static spinlock_t rt_flush_lock;
++static rt_flush_gen_t reset_rt_flush_required(void)
++{
++ rt_flush_gen_t g;
++
++ spin_lock_bh(&rt_flush_lock);
++ g = rt_flush_gen++;
++ spin_unlock_bh(&rt_flush_lock);
++ return g;
++}
++
++static int check_rt_flush_required(struct ve_struct *env, rt_flush_gen_t gen)
++{
++ /* can be checked without the lock */
++ return env->rt_flush_required >= gen;
++}
++
++#else
++
++static void set_rt_flush_required(struct ve_struct *env)
++{
++}
++
++static rt_flush_gen_t reset_rt_flush_required(void)
++{
++ return 0;
++}
++
++#endif
++
+ /* This can run from both BH and non-BH contexts, the latter
+ * in the case of a forced flush event.
+ */
+ static void rt_run_flush(unsigned long dummy)
+ {
+ int i;
+- struct rtable *rth, *next;
++ struct rtable * rth, * next;
++ struct rtable * tail;
++ rt_flush_gen_t gen;
+
+ rt_deadline = 0;
+
+ get_random_bytes(&rt_hash_rnd, 4);
+
++ gen = reset_rt_flush_required();
++
+ for (i = rt_hash_mask; i >= 0; i--) {
++#ifdef CONFIG_VE
++ struct rtable ** prev, * p;
++
++ spin_lock_bh(rt_hash_lock_addr(i));
++ rth = rt_hash_table[i].chain;
++
++ /* defer releasing the head of the list after spin_unlock */
++ for (tail = rth; tail; tail = tail->u.rt_next)
++ if (!check_rt_flush_required(tail->fl.owner_env, gen))
++ break;
++ if (rth != tail)
++ rt_hash_table[i].chain = tail;
++
++ /* call rt_free on entries after the tail requiring flush */
++ prev = &rt_hash_table[i].chain;
++ for (p = *prev; p; p = next) {
++ next = p->u.rt_next;
++ if (!check_rt_flush_required(p->fl.owner_env, gen)) {
++ prev = &p->u.rt_next;
++ } else {
++ *prev = next;
++ rt_free(p);
++ }
++ }
++
++#else
+ spin_lock_bh(rt_hash_lock_addr(i));
+ rth = rt_hash_table[i].chain;
+ if (rth)
+ rt_hash_table[i].chain = NULL;
++ tail = NULL;
++
++#endif
+ spin_unlock_bh(rt_hash_lock_addr(i));
+
+- for (; rth; rth = next) {
++ for (; rth != tail; rth = next) {
+ next = rth->u.rt_next;
+ rt_free(rth);
+ }
+@@ -728,6 +834,8 @@ void rt_cache_flush(int delay)
+ delay = tmo;
+ }
+
++ set_rt_flush_required(get_exec_env());
++
+ if (delay <= 0) {
+ spin_unlock_bh(&rt_flush_lock);
+ rt_run_flush(0);
+@@ -743,9 +851,30 @@ void rt_cache_flush(int delay)
+
+ static void rt_secret_rebuild(unsigned long dummy)
+ {
++ int i;
++ struct rtable *rth, *next;
+ unsigned long now = jiffies;
+
+- rt_cache_flush(0);
++ spin_lock_bh(&rt_flush_lock);
++ del_timer(&rt_flush_timer);
++ spin_unlock_bh(&rt_flush_lock);
++
++ rt_deadline = 0;
++ get_random_bytes(&rt_hash_rnd, 4);
++
++ for (i = rt_hash_mask; i >= 0; i--) {
++ spin_lock_bh(rt_hash_lock_addr(i));
++ rth = rt_hash_table[i].chain;
++ if (rth)
++ rt_hash_table[i].chain = NULL;
++ spin_unlock_bh(rt_hash_lock_addr(i));
++
++ for (; rth; rth = next) {
++ next = rth->u.rt_next;
++ rt_free(rth);
++ }
++ }
++
+ mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
+ }
+
+@@ -1118,7 +1247,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ struct rtable *rth, **rthp;
+ u32 skeys[2] = { saddr, 0 };
+ int ikeys[2] = { dev->ifindex, 0 };
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ tos &= IPTOS_RT_MASK;
+
+ if (!in_dev)
+@@ -1154,6 +1285,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ rth->fl.fl4_src != skeys[i] ||
+ rth->fl.fl4_tos != tos ||
+ rth->fl.oif != ikeys[k] ||
++#ifdef CONFIG_VE
++ !ve_accessible_strict(rth->fl.owner_env,
++ ve) ||
++#endif
+ rth->fl.iif != 0) {
+ rthp = &rth->u.rt_next;
+ continue;
+@@ -1192,6 +1327,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ rt->u.dst.neighbour = NULL;
+ rt->u.dst.hh = NULL;
+ rt->u.dst.xfrm = NULL;
++#ifdef CONFIG_VE
++ rt->fl.owner_env = ve;
++#endif
+
+ rt->rt_flags |= RTCF_REDIRECTED;
+
+@@ -1631,6 +1769,9 @@ static int ip_route_input_mc(struct sk_b
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env = get_exec_env();
++#endif
+ rth->fl.fl4_src = saddr;
+ rth->rt_src = saddr;
+ #ifdef CONFIG_NET_CLS_ROUTE
+@@ -1638,7 +1779,7 @@ static int ip_route_input_mc(struct sk_b
+ #endif
+ rth->rt_iif =
+ rth->fl.iif = dev->ifindex;
+- rth->u.dst.dev = &loopback_dev;
++ rth->u.dst.dev = &visible_loopback_dev;
+ dev_hold(rth->u.dst.dev);
+ rth->idev = in_dev_get(rth->u.dst.dev);
+ rth->fl.oif = 0;
+@@ -1776,6 +1917,9 @@ static inline int __mkroute_input(struct
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env = get_exec_env();
++#endif
+ rth->fl.fl4_src = saddr;
+ rth->rt_src = saddr;
+ rth->rt_gateway = daddr;
+@@ -1959,7 +2103,7 @@ static int ip_route_input_slow(struct sk
+ if (res.type == RTN_LOCAL) {
+ int result;
+ result = fib_validate_source(saddr, daddr, tos,
+- loopback_dev.ifindex,
++ visible_loopback_dev.ifindex,
+ dev, &spec_dst, &itag);
+ if (result < 0)
+ goto martian_source;
+@@ -2021,6 +2165,9 @@ local_input:
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env = get_exec_env();
++#endif
+ rth->fl.fl4_src = saddr;
+ rth->rt_src = saddr;
+ #ifdef CONFIG_NET_CLS_ROUTE
+@@ -2028,7 +2175,7 @@ local_input:
+ #endif
+ rth->rt_iif =
+ rth->fl.iif = dev->ifindex;
+- rth->u.dst.dev = &loopback_dev;
++ rth->u.dst.dev = &visible_loopback_dev;
+ dev_hold(rth->u.dst.dev);
+ rth->idev = in_dev_get(rth->u.dst.dev);
+ rth->rt_gateway = daddr;
+@@ -2100,6 +2247,9 @@ int ip_route_input(struct sk_buff *skb,
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark == skb->nfmark &&
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env == get_exec_env() &&
++#endif
+ rth->fl.fl4_tos == tos) {
+ rth->u.dst.lastuse = jiffies;
+ dst_hold(&rth->u.dst);
+@@ -2226,6 +2376,9 @@ static inline int __mkroute_output(struc
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env = get_exec_env();
++#endif
+ rth->rt_dst = fl->fl4_dst;
+ rth->rt_src = fl->fl4_src;
+ rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
+@@ -2378,7 +2531,7 @@ static int ip_route_output_slow(struct r
+ .fwmark = oldflp->fl4_fwmark
+ #endif
+ } },
+- .iif = loopback_dev.ifindex,
++ .iif = visible_loopback_dev.ifindex,
+ .oif = oldflp->oif };
+ struct fib_result res;
+ unsigned flags = 0;
+@@ -2399,10 +2552,13 @@ static int ip_route_output_slow(struct r
+ ZERONET(oldflp->fl4_src))
+ goto out;
+
+- /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+- dev_out = ip_dev_find(oldflp->fl4_src);
+- if (dev_out == NULL)
+- goto out;
++ if (ip_rt_src_check) {
++ /* It is equivalent to
++ inet_addr_type(saddr) == RTN_LOCAL */
++ dev_out = ip_dev_find(oldflp->fl4_src);
++ if (dev_out == NULL)
++ goto out;
++ }
+
+ /* I removed check for oif == dev_out->oif here.
+ It was wrong for two reasons:
+@@ -2429,6 +2585,12 @@ static int ip_route_output_slow(struct r
+ Luckily, this hack is good workaround.
+ */
+
++ if (dev_out == NULL) {
++ dev_out = ip_dev_find(oldflp->fl4_src);
++ if (dev_out == NULL)
++ goto out;
++ }
++
+ fl.oif = dev_out->ifindex;
+ goto make_route;
+ }
+@@ -2472,9 +2634,9 @@ static int ip_route_output_slow(struct r
+ fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
+ if (dev_out)
+ dev_put(dev_out);
+- dev_out = &loopback_dev;
++ dev_out = &visible_loopback_dev;
+ dev_hold(dev_out);
+- fl.oif = loopback_dev.ifindex;
++ fl.oif = visible_loopback_dev.ifindex;
+ res.type = RTN_LOCAL;
+ flags |= RTCF_LOCAL;
+ goto make_route;
+@@ -2519,7 +2681,7 @@ static int ip_route_output_slow(struct r
+ fl.fl4_src = fl.fl4_dst;
+ if (dev_out)
+ dev_put(dev_out);
+- dev_out = &loopback_dev;
++ dev_out = &visible_loopback_dev;
+ dev_hold(dev_out);
+ fl.oif = dev_out->ifindex;
+ if (res.fi)
+@@ -2575,6 +2737,7 @@ int __ip_route_output_key(struct rtable
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark == flp->fl4_fwmark &&
+ #endif
++ ve_accessible_strict(rth->fl.owner_env, get_exec_env()) &&
+ !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+ (IPTOS_RT_MASK | RTO_ONLINK))) {
+
+@@ -2705,7 +2868,7 @@ static int rt_fill_info(struct sk_buff *
+ u32 dst = rt->rt_dst;
+
+ if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
+- ipv4_devconf.mc_forwarding) {
++ ve_ipv4_devconf.mc_forwarding) {
+ int err = ipmr_get_route(skb, r, nowait);
+ if (err <= 0) {
+ if (!nowait) {
+@@ -2853,22 +3016,22 @@ void ip_rt_multicast_event(struct in_dev
+ }
+
+ #ifdef CONFIG_SYSCTL
+-static int flush_delay;
++int ipv4_flush_delay;
+
+-static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
++int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+ {
+ if (write) {
+ proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+- rt_cache_flush(flush_delay);
++ rt_cache_flush(ipv4_flush_delay);
+ return 0;
+ }
+
+ return -EINVAL;
+ }
+
+-static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
++int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+ int __user *name,
+ int nlen,
+ void __user *oldval,
+@@ -2890,7 +3053,7 @@ ctl_table ipv4_route_table[] = {
+ {
+ .ctl_name = NET_IPV4_ROUTE_FLUSH,
+ .procname = "flush",
+- .data = &flush_delay,
++ .data = &ipv4_flush_delay,
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = &ipv4_sysctl_rtcache_flush,
+@@ -3188,16 +3351,17 @@ int __init ip_rt_init(void)
+ #ifdef CONFIG_PROC_FS
+ {
+ struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
+- if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
+- !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
+- proc_net_stat))) {
++ if (!proc_glob_fops_create("net/rt_cache",
++ S_IRUGO, &rt_cache_seq_fops) ||
++ !(rtstat_pde = create_proc_glob_entry("net/stat/rt_cache",
++ S_IRUGO, NULL))) {
+ free_percpu(rt_cache_stat);
+ return -ENOMEM;
+ }
+ rtstat_pde->proc_fops = &rt_cpu_seq_fops;
+ }
+ #ifdef CONFIG_NET_CLS_ROUTE
+- create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
++ create_proc_read_entry("net/rt_acct", 0, NULL, ip_rt_acct_read, NULL);
+ #endif
+ #endif
+ #ifdef CONFIG_XFRM
+diff -uprN linux-2.6.15.orig/net/ipv4/sysctl_net_ipv4.c linux-2.6.15-ve025stab014/net/ipv4/sysctl_net_ipv4.c
+--- linux-2.6.15.orig/net/ipv4/sysctl_net_ipv4.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/sysctl_net_ipv4.c 2006-01-27 14:48:08.000000000 +0300
+@@ -31,22 +31,21 @@ struct ipv4_config ipv4_config;
+
+ #ifdef CONFIG_SYSCTL
+
+-static
+ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+- int val = ipv4_devconf.forwarding;
++ int val = ve_ipv4_devconf.forwarding;
+ int ret;
+
+ ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+- if (write && ipv4_devconf.forwarding != val)
++ if (write && ve_ipv4_devconf.forwarding != val)
+ inet_forward_change();
+
+ return ret;
+ }
+
+-static int ipv4_sysctl_forward_strategy(ctl_table *table,
++int ipv4_sysctl_forward_strategy(ctl_table *table,
+ int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen,
+diff -uprN linux-2.6.15.orig/net/ipv4/tcp.c linux-2.6.15-ve025stab014/net/ipv4/tcp.c
+--- linux-2.6.15.orig/net/ipv4/tcp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/tcp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -248,6 +248,7 @@
+ */
+
+ #include <linux/config.h>
++#include <linux/kmem_cache.h>
+ #include <linux/module.h>
+ #include <linux/types.h>
+ #include <linux/fcntl.h>
+@@ -263,6 +264,9 @@
+ #include <net/xfrm.h>
+ #include <net/ip.h>
+
++#include <ub/ub_orphan.h>
++#include <ub/ub_net.h>
++#include <ub/ub_tcp.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/ioctls.h>
+@@ -321,6 +325,7 @@ unsigned int tcp_poll(struct file *file,
+ unsigned int mask;
+ struct sock *sk = sock->sk;
+ struct tcp_sock *tp = tcp_sk(sk);
++ int check_send_space;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ if (sk->sk_state == TCP_LISTEN)
+@@ -335,6 +340,21 @@ unsigned int tcp_poll(struct file *file,
+ if (sk->sk_err)
+ mask = POLLERR;
+
++ check_send_space = 1;
++#ifdef CONFIG_USER_RESOURCE
++ if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
++ unsigned long size;
++ size = MAX_TCP_HEADER + tp->mss_cache;
++ if (size > SOCK_MIN_UBCSPACE)
++ size = SOCK_MIN_UBCSPACE;
++ size = skb_charge_size(size);
++ if (ub_sock_makewres_tcp(sk, size)) {
++ check_send_space = 0;
++ ub_sock_sndqueueadd_tcp(sk, size);
++ }
++ }
++#endif
++
+ /*
+ * POLLHUP is certainly not done right. But poll() doesn't
+ * have a notion of HUP in just one direction, and for a
+@@ -378,7 +398,7 @@ unsigned int tcp_poll(struct file *file,
+ sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
+ mask |= POLLIN | POLLRDNORM;
+
+- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
++ if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else { /* send SIGIO later */
+@@ -528,16 +548,23 @@ static ssize_t do_tcp_sendpages(struct s
+ int copy, i, can_coalesce;
+ int offset = poffset % PAGE_SIZE;
+ int size = min_t(size_t, psize, PAGE_SIZE - offset);
++ unsigned long chargesize = 0;
+
+ if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
+ new_segment:
++ chargesize = 0;
+ if (!sk_stream_memory_free(sk))
+ goto wait_for_sndbuf;
+
++ chargesize = skb_charge_size(MAX_TCP_HEADER +
++ tp->mss_cache);
++ if (ub_sock_getwres_tcp(sk, chargesize) < 0)
++ goto wait_for_ubspace;
+ skb = sk_stream_alloc_pskb(sk, 0, 0,
+ sk->sk_allocation);
+ if (!skb)
+ goto wait_for_memory;
++ ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+
+ skb_entail(sk, tp, skb);
+ copy = size_goal;
+@@ -593,10 +620,14 @@ new_segment:
+ wait_for_sndbuf:
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
++ ub_sock_retwres_tcp(sk, chargesize,
++ skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
++ chargesize = 0;
++wait_for_ubspace:
+ if (copied)
+ tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+
+- if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
++ if ((err = sk_stream_wait_memory(sk, &timeo, chargesize)) != 0)
+ goto do_error;
+
+ mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+@@ -699,6 +730,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
+ while (--iovlen >= 0) {
+ int seglen = iov->iov_len;
+ unsigned char __user *from = iov->iov_base;
++ unsigned long chargesize = 0;
+
+ iov++;
+
+@@ -709,18 +741,26 @@ int tcp_sendmsg(struct kiocb *iocb, stru
+
+ if (!sk->sk_send_head ||
+ (copy = size_goal - skb->len) <= 0) {
++ unsigned long size;
+
+ new_segment:
+ /* Allocate new segment. If the interface is SG,
+ * allocate skb fitting to single page.
+ */
++ chargesize = 0;
+ if (!sk_stream_memory_free(sk))
+ goto wait_for_sndbuf;
+-
+- skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+- 0, sk->sk_allocation);
++ size = select_size(sk, tp);
++ chargesize = skb_charge_size(MAX_TCP_HEADER +
++ size);
++ if (ub_sock_getwres_tcp(sk, chargesize) < 0)
++ goto wait_for_ubspace;
++ skb = sk_stream_alloc_pskb(sk, size, 0,
++ sk->sk_allocation);
+ if (!skb)
+ goto wait_for_memory;
++ ub_skb_set_charge(skb, sk, chargesize,
++ UB_TCPSNDBUF);
+
+ /*
+ * Check whether we can use HW checksum.
+@@ -768,6 +808,7 @@ new_segment:
+ } else if (page) {
+ if (off == PAGE_SIZE) {
+ put_page(page);
++ ub_sock_tcp_detachpage(sk);
+ TCP_PAGE(sk) = page = NULL;
+ off = 0;
+ }
+@@ -781,6 +822,9 @@ new_segment:
+ goto wait_for_memory;
+
+ if (!page) {
++ chargesize = PAGE_SIZE;
++ if (ub_sock_tcp_chargepage(sk) < 0)
++ goto wait_for_ubspace;
+ /* Allocate new cache page. */
+ if (!(page = sk_stream_alloc_page(sk)))
+ goto wait_for_memory;
+@@ -812,7 +856,8 @@ new_segment:
+ } else if (off + copy < PAGE_SIZE) {
+ get_page(page);
+ TCP_PAGE(sk) = page;
+- }
++ } else
++ ub_sock_tcp_detachpage(sk);
+ }
+
+ TCP_OFF(sk) = off + copy;
+@@ -843,10 +888,15 @@ new_segment:
+ wait_for_sndbuf:
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
++ ub_sock_retwres_tcp(sk, chargesize,
++ skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
++ chargesize = 0;
++wait_for_ubspace:
+ if (copied)
+ tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+
+- if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
++ if ((err = sk_stream_wait_memory(sk, &timeo,
++ chargesize)) != 0)
+ goto do_error;
+
+ mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+@@ -944,7 +994,18 @@ static void cleanup_rbuf(struct sock *sk
+ #if TCP_DEBUG
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+- BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
++ if (!(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq))) {
++ printk("KERNEL: assertion: skb==NULL || "
++ "before(tp->copied_seq, skb->end_seq)\n");
++ printk("VE%u pid %d comm %.16s\n",
++ (get_exec_env() ? VEID(get_exec_env()) : 0),
++ current->pid, current->comm);
++ printk("copied=%d, copied_seq=%d, rcv_nxt=%d\n", copied,
++ tp->copied_seq, tp->rcv_nxt);
++ printk("skb->len=%d, skb->seq=%d, skb->end_seq=%d\n",
++ skb->len, TCP_SKB_CB(skb)->seq,
++ TCP_SKB_CB(skb)->end_seq);
++ }
+ #endif
+
+ if (inet_csk_ack_scheduled(sk)) {
+@@ -1168,7 +1229,22 @@ int tcp_recvmsg(struct kiocb *iocb, stru
+ goto found_ok_skb;
+ if (skb->h.th->fin)
+ goto found_fin_ok;
+- BUG_TRAP(flags & MSG_PEEK);
++ if (!(flags & MSG_PEEK)) {
++ printk("KERNEL: assertion: flags&MSG_PEEK\n");
++ printk("VE%u pid %d comm %.16s\n",
++ (get_exec_env() ?
++ VEID(get_exec_env()) : 0),
++ current->pid, current->comm);
++ printk("flags=0x%x, len=%d, copied_seq=%d, "
++ "rcv_nxt=%d\n", flags, len,
++ tp->copied_seq, tp->rcv_nxt);
++ printk("skb->len=%d, *seq=%d, skb->seq=%d, "
++ "skb->end_seq=%d, offset=%d\n",
++ skb->len, *seq,
++ TCP_SKB_CB(skb)->seq,
++ TCP_SKB_CB(skb)->end_seq,
++ offset);
++ }
+ skb = skb->next;
+ } while (skb != (struct sk_buff *)&sk->sk_receive_queue);
+
+@@ -1231,8 +1307,18 @@ int tcp_recvmsg(struct kiocb *iocb, stru
+
+ tp->ucopy.len = len;
+
+- BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
+- (flags & (MSG_PEEK | MSG_TRUNC)));
++ if (!(tp->copied_seq == tp->rcv_nxt ||
++ (flags&(MSG_PEEK|MSG_TRUNC)))) {
++ printk("KERNEL: assertion: tp->copied_seq == "
++ "tp->rcv_nxt || ...\n");
++ printk("VE%u pid %d comm %.16s\n",
++ (get_exec_env() ?
++ VEID(get_exec_env()) : 0),
++ current->pid, current->comm);
++ printk("flags=0x%x, len=%d, copied_seq=%d, "
++ "rcv_nxt=%d\n", flags, len,
++ tp->copied_seq, tp->rcv_nxt);
++ }
+
+ /* Ugly... If prequeue is not empty, we have to
+ * process it before releasing socket, otherwise
+@@ -1583,7 +1669,7 @@ adjudge_to_death:
+ if (tmo > TCP_TIMEWAIT_LEN) {
+ inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
+ } else {
+- atomic_inc(sk->sk_prot->orphan_count);
++ ub_inc_orphan_count(sk);
+ tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+ goto out;
+ }
+@@ -1591,9 +1677,7 @@ adjudge_to_death:
+ }
+ if (sk->sk_state != TCP_CLOSE) {
+ sk_stream_mem_reclaim(sk);
+- if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
+- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
++ if (ub_too_many_orphans(sk, ub_get_orphan_count(sk))) {
+ if (net_ratelimit())
+ printk(KERN_INFO "TCP: too many of orphaned "
+ "sockets\n");
+@@ -1602,7 +1686,7 @@ adjudge_to_death:
+ NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+ }
+ }
+- atomic_inc(sk->sk_prot->orphan_count);
++ ub_inc_orphan_count(sk);
+
+ if (sk->sk_state == TCP_CLOSE)
+ inet_csk_destroy_sock(sk);
+@@ -2051,7 +2135,7 @@ void __init tcp_init(void)
+ tcp_hashinfo.bind_bucket_cachep =
+ kmem_cache_create("tcp_bind_bucket",
+ sizeof(struct inet_bind_bucket), 0,
+- SLAB_HWCACHE_ALIGN, NULL, NULL);
++ SLAB_HWCACHE_ALIGN | SLAB_UBC, NULL, NULL);
+ if (!tcp_hashinfo.bind_bucket_cachep)
+ panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+
+diff -uprN linux-2.6.15.orig/net/ipv4/tcp_input.c linux-2.6.15-ve025stab014/net/ipv4/tcp_input.c
+--- linux-2.6.15.orig/net/ipv4/tcp_input.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/tcp_input.c 2006-01-27 14:48:06.000000000 +0300
+@@ -72,6 +72,8 @@
+ #include <linux/ipsec.h>
+ #include <asm/unaligned.h>
+
++#include <ub/ub_tcp.h>
++
+ int sysctl_tcp_timestamps = 1;
+ int sysctl_tcp_window_scaling = 1;
+ int sysctl_tcp_sack = 1;
+@@ -252,7 +254,7 @@ static inline void tcp_grow_window(struc
+ /* Check #1 */
+ if (tp->rcv_ssthresh < tp->window_clamp &&
+ (int)tp->rcv_ssthresh < tcp_space(sk) &&
+- !tcp_memory_pressure) {
++ ub_tcp_rmem_allows_expand(sk)) {
+ int incr;
+
+ /* Check #2. Increase window, if skb with such overhead
+@@ -321,6 +323,8 @@ static void tcp_init_buffer_space(struct
+
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
+ tp->snd_cwnd_stamp = tcp_time_stamp;
++
++ ub_tcp_update_maxadvmss(sk);
+ }
+
+ /* 5. Recalculate window clamp after socket hit its memory bounds. */
+@@ -332,7 +336,7 @@ static void tcp_clamp_window(struct sock
+
+ if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+- !tcp_memory_pressure &&
++ !ub_tcp_memory_pressure(sk) &&
+ atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
+ sysctl_tcp_rmem[2]);
+@@ -3077,7 +3081,7 @@ queue_and_out:
+ !sk_stream_rmem_schedule(sk, skb))) {
+ if (tcp_prune_queue(sk) < 0 ||
+ !sk_stream_rmem_schedule(sk, skb))
+- goto drop;
++ goto drop_part;
+ }
+ sk_stream_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+@@ -3121,6 +3125,12 @@ out_of_window:
+ drop:
+ __kfree_skb(skb);
+ return;
++
++drop_part:
++ if (after(tp->copied_seq, tp->rcv_nxt))
++ tp->rcv_nxt = tp->copied_seq;
++ __kfree_skb(skb);
++ return;
+ }
+
+ /* Out of window. F.e. zero window probe. */
+@@ -3292,6 +3302,10 @@ tcp_collapse(struct sock *sk, struct sk_
+ nskb = alloc_skb(copy+header, GFP_ATOMIC);
+ if (!nskb)
+ return;
++ if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
++ kfree_skb(nskb);
++ return;
++ }
+ skb_reserve(nskb, header);
+ memcpy(nskb->head, skb->head, header);
+ nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
+@@ -3388,7 +3402,7 @@ static int tcp_prune_queue(struct sock *
+
+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ tcp_clamp_window(sk, tp);
+- else if (tcp_memory_pressure)
++ else if (ub_tcp_memory_pressure(sk))
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+
+ tcp_collapse_ofo_queue(sk);
+@@ -3464,7 +3478,7 @@ static inline int tcp_should_expand_sndb
+ return 0;
+
+ /* If we are under global TCP memory pressure, do not expand. */
+- if (tcp_memory_pressure)
++ if (ub_tcp_memory_pressure(sk))
+ return 0;
+
+ /* If we are under soft global TCP memory pressure, do not expand. */
+@@ -3858,6 +3872,10 @@ int tcp_rcv_established(struct sock *sk,
+
+ if ((int)skb->truesize > sk->sk_forward_alloc)
+ goto step5;
++ /* This is OK not to try to free memory here.
++ * Do this below on slow path. Den */
++ if (ub_tcprcvbuf_charge(sk, skb) < 0)
++ goto step5;
+
+ NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
+
+diff -uprN linux-2.6.15.orig/net/ipv4/tcp_ipv4.c linux-2.6.15-ve025stab014/net/ipv4/tcp_ipv4.c
+--- linux-2.6.15.orig/net/ipv4/tcp_ipv4.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/tcp_ipv4.c 2006-01-27 14:48:08.000000000 +0300
+@@ -71,6 +71,8 @@
+ #include <net/inet_common.h>
+ #include <net/xfrm.h>
+
++#include <ub/ub_tcp.h>
++
+ #include <linux/inet.h>
+ #include <linux/ipv6.h>
+ #include <linux/stddef.h>
+@@ -128,12 +130,16 @@ static int __tcp_v4_check_established(st
+ int dif = sk->sk_bound_dev_if;
+ INET_ADDR_COOKIE(acookie, saddr, daddr)
+ const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+- struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
++ unsigned int hash;
++ struct inet_ehash_bucket *head;
+ struct sock *sk2;
+ const struct hlist_node *node;
+ struct inet_timewait_sock *tw;
++ struct ve_struct *env;
+
++ env = VE_OWNER_SK(sk);
++ hash = inet_ehashfn(daddr, lport, saddr, inet->dport, VEID(env));
++ head = inet_ehash_bucket(&tcp_hashinfo, hash);
+ prefetch(head->chain.first);
+ write_lock(&head->lock);
+
+@@ -141,7 +147,8 @@ static int __tcp_v4_check_established(st
+ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
+ tw = inet_twsk(sk2);
+
+- if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
++ if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr,
++ ports, dif, env)) {
+ const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+@@ -178,7 +185,8 @@ static int __tcp_v4_check_established(st
+
+ /* And established part... */
+ sk_for_each(sk2, node, &head->chain) {
+- if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
++ if (INET_MATCH(sk2, hash, acookie, saddr, daddr,
++ ports, dif, env))
+ goto not_unique;
+ }
+
+@@ -228,7 +236,9 @@ static inline int tcp_v4_hash_connect(st
+ struct inet_bind_hashbucket *head;
+ struct inet_bind_bucket *tb;
+ int ret;
++ struct ve_struct *env;
+
++ env = VE_OWNER_SK(sk);
+ if (!snum) {
+ int low = sysctl_local_port_range[0];
+ int high = sysctl_local_port_range[1];
+@@ -243,7 +253,9 @@ static inline int tcp_v4_hash_connect(st
+ local_bh_disable();
+ for (i = 1; i <= range; i++) {
+ port = low + (i + offset) % range;
+- head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
++ head = &tcp_hashinfo.bhash[inet_bhashfn(port,
++ tcp_hashinfo.bhash_size,
++ VEID(env))];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+@@ -251,6 +263,8 @@ static inline int tcp_v4_hash_connect(st
+ * unique enough.
+ */
+ inet_bind_bucket_for_each(tb, node, &head->chain) {
++ if (!ve_accessible_strict(VE_OWNER_TB(tb),env))
++ continue;
+ if (tb->port == port) {
+ BUG_TRAP(!hlist_empty(&tb->owners));
+ if (tb->fastreuse >= 0)
+@@ -263,7 +277,7 @@ static inline int tcp_v4_hash_connect(st
+ }
+ }
+
+- tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
++ tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port, env);
+ if (!tb) {
+ spin_unlock(&head->lock);
+ break;
+@@ -298,7 +312,8 @@ ok:
+ goto out;
+ }
+
+- head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
++ head = &tcp_hashinfo.bhash[inet_bhashfn(snum,
++ tcp_hashinfo.bhash_size, VEID(env))];
+ tb = inet_csk(sk)->icsk_bind_hash;
+ spin_lock_bh(&head->lock);
+ if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+@@ -1137,12 +1152,15 @@ static int tcp_v4_checksum_init(struct s
+ */
+ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+ {
++ struct user_beancounter *ub;
++
++ ub = set_exec_ub(sock_bc(sk)->ub);
+ if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ TCP_CHECK_TIMER(sk);
+ if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+ goto reset;
+ TCP_CHECK_TIMER(sk);
+- return 0;
++ goto restore_context;
+ }
+
+ if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
+@@ -1156,7 +1174,7 @@ int tcp_v4_do_rcv(struct sock *sk, struc
+ if (nsk != sk) {
+ if (tcp_child_process(sk, nsk, skb))
+ goto reset;
+- return 0;
++ goto restore_context;
+ }
+ }
+
+@@ -1164,6 +1182,9 @@ int tcp_v4_do_rcv(struct sock *sk, struc
+ if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+ goto reset;
+ TCP_CHECK_TIMER(sk);
++
++restore_context:
++ (void)set_exec_ub(ub);
+ return 0;
+
+ reset:
+@@ -1175,7 +1196,7 @@ discard:
+ * might be destroyed here. This current version compiles correctly,
+ * but you have been warned.
+ */
+- return 0;
++ goto restore_context;
+
+ csum_err:
+ TCP_INC_STATS_BH(TCP_MIB_INERRS);
+@@ -1468,6 +1489,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
+ * If sendmsg cached page exists, toss it.
+ */
+ if (sk->sk_sndmsg_page) {
++ /* queue is empty, uncharge */
++ ub_sock_tcp_detachpage(sk);
+ __free_page(sk->sk_sndmsg_page);
+ sk->sk_sndmsg_page = NULL;
+ }
+@@ -1488,10 +1511,18 @@ static inline struct inet_timewait_sock
+ list_entry(head->first, struct inet_timewait_sock, tw_node);
+ }
+
+-static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
++static inline struct inet_timewait_sock *
++ tw_next(struct inet_timewait_sock *tw, envid_t veid)
+ {
+- return tw->tw_node.next ?
+- hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
++ while (1) {
++ if (tw->tw_node.next == NULL)
++ return NULL;
++ tw = hlist_entry(tw->tw_node.next, typeof(*tw), tw_node);
++ if (!ve_accessible_veid(tw->tw_owner_env, veid))
++ continue;
++ return tw;
++ }
++ return NULL; /* make compiler happy */
+ }
+
+ static void *listening_get_next(struct seq_file *seq, void *cur)
+@@ -1500,7 +1531,9 @@ static void *listening_get_next(struct s
+ struct hlist_node *node;
+ struct sock *sk = cur;
+ struct tcp_iter_state* st = seq->private;
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ if (!sk) {
+ st->bucket = 0;
+ sk = sk_head(&tcp_hashinfo.listening_hash[0]);
+@@ -1540,6 +1573,8 @@ get_req:
+ }
+ get_sk:
+ sk_for_each_from(sk, node) {
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (sk->sk_family == st->family) {
+ cur = sk;
+ goto out;
+@@ -1580,7 +1615,9 @@ static void *established_get_first(struc
+ {
+ struct tcp_iter_state* st = seq->private;
+ void *rc = NULL;
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
+ struct sock *sk;
+ struct hlist_node *node;
+@@ -1591,6 +1628,8 @@ static void *established_get_first(struc
+
+ read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+ sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (sk->sk_family != st->family) {
+ continue;
+ }
+@@ -1600,6 +1639,8 @@ static void *established_get_first(struc
+ st->state = TCP_SEQ_STATE_TIME_WAIT;
+ inet_twsk_for_each(tw, node,
+ &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
++ if (!ve_accessible_veid(tw->tw_owner_env, VEID(ve)))
++ continue;
+ if (tw->tw_family != st->family) {
+ continue;
+ }
+@@ -1619,16 +1660,17 @@ static void *established_get_next(struct
+ struct inet_timewait_sock *tw;
+ struct hlist_node *node;
+ struct tcp_iter_state* st = seq->private;
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ ++st->num;
+
+ if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
+ tw = cur;
+- tw = tw_next(tw);
++ tw = tw_next(tw, VEID(ve));
+ get_tw:
+- while (tw && tw->tw_family != st->family) {
+- tw = tw_next(tw);
+- }
++ while (tw && tw->tw_family != st->family)
++ tw = tw_next(tw, VEID(ve));
+ if (tw) {
+ cur = tw;
+ goto out;
+@@ -1650,6 +1692,8 @@ get_tw:
+ sk = sk_next(sk);
+
+ sk_for_each_from(sk, node) {
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (sk->sk_family == st->family)
+ goto found;
+ }
+@@ -1801,7 +1845,12 @@ int tcp_proc_register(struct tcp_seq_afi
+ afinfo->seq_fops->llseek = seq_lseek;
+ afinfo->seq_fops->release = seq_release_private;
+
+- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
++ if (*(afinfo->name) == 'n')
++ p = proc_glob_fops_create(afinfo->name, S_IRUGO,
++ afinfo->seq_fops);
++ else
++ p = proc_net_fops_create(afinfo->name, S_IRUGO,
++ afinfo->seq_fops);
+ if (p)
+ p->data = afinfo;
+ else
+@@ -1813,7 +1862,10 @@ void tcp_proc_unregister(struct tcp_seq_
+ {
+ if (!afinfo)
+ return;
+- proc_net_remove(afinfo->name);
++ if (*(afinfo->name) == 'n')
++ remove_proc_glob_entry(afinfo->name, NULL);
++ else
++ proc_net_remove(afinfo->name);
+ memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ }
+
+@@ -1943,7 +1995,7 @@ out:
+ static struct file_operations tcp4_seq_fops;
+ static struct tcp_seq_afinfo tcp4_seq_afinfo = {
+ .owner = THIS_MODULE,
+- .name = "tcp",
++ .name = "net/tcp",
+ .family = AF_INET,
+ .seq_show = tcp4_seq_show,
+ .seq_fops = &tcp4_seq_fops,
+@@ -2010,8 +2062,87 @@ void __init tcp_v4_init(struct net_proto
+ tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
+ }
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static void tcp_kill_ve_onesk(struct sock *sk)
++{
++ struct tcp_sock *tp = tcp_sk(sk);
++
++ /* Check the assumed state of the socket. */
++ if (!sock_flag(sk, SOCK_DEAD)) {
++ static int printed;
++invalid:
++ if (!printed)
++ printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
++ "wrseq %u unseq %u, wrqu %d.\n",
++ sock_flag(sk, SOCK_DEAD), sk->sk_state,
++ tp->write_seq, tp->snd_una,
++ !skb_queue_empty(&sk->sk_write_queue));
++ printed = 1;
++ return;
++ }
++
++ tcp_send_active_reset(sk, GFP_ATOMIC);
++ switch (sk->sk_state) {
++ case TCP_FIN_WAIT1:
++ case TCP_CLOSING:
++ /* In these 2 states the peer may want us to retransmit
++ * some data and/or FIN. Entering "resetting mode"
++ * instead.
++ */
++ tcp_time_wait(sk, TCP_CLOSE, 0);
++ break;
++ case TCP_FIN_WAIT2:
++ /* By some reason the socket may stay in this state
++ * without turning into a TW bucket. Fix it.
++ */
++ tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
++ break;
++ case TCP_LAST_ACK:
++ /* Just jump into CLOSED state. */
++ tcp_done(sk);
++ break;
++ default:
++ /* The socket must be already close()d. */
++ goto invalid;
++ }
++}
++
++void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
++{
++ struct inet_ehash_bucket *head;
++ int i;
++
++ /* alive */
++ local_bh_disable();
++ head = tcp_hashinfo.ehash;
++ for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
++ struct sock *sk;
++ struct hlist_node *node;
++more_work:
++ write_lock(&head[i].lock);
++ sk_for_each(sk, node, &head[i].chain) {
++ if (ve_accessible_strict(VE_OWNER_SK(sk), envid)) {
++ sock_hold(sk);
++ write_unlock(&head[i].lock);
++
++ bh_lock_sock(sk);
++ /* sk might have disappeared from the hash before
++ * we got the lock */
++ if (sk->sk_state != TCP_CLOSE)
++ tcp_kill_ve_onesk(sk);
++ bh_unlock_sock(sk);
++ sock_put(sk);
++ goto more_work;
++ }
++ }
++ write_unlock(&head[i].lock);
++ }
++ local_bh_enable();
++}
++EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
++#endif
++
+ EXPORT_SYMBOL(ipv4_specific);
+-EXPORT_SYMBOL(inet_bind_bucket_create);
+ EXPORT_SYMBOL(tcp_hashinfo);
+ EXPORT_SYMBOL(tcp_prot);
+ EXPORT_SYMBOL(tcp_unhash);
+diff -uprN linux-2.6.15.orig/net/ipv4/tcp_minisocks.c linux-2.6.15-ve025stab014/net/ipv4/tcp_minisocks.c
+--- linux-2.6.15.orig/net/ipv4/tcp_minisocks.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/tcp_minisocks.c 2006-01-27 14:48:08.000000000 +0300
+@@ -29,6 +29,8 @@
+ #include <net/inet_common.h>
+ #include <net/xfrm.h>
+
++#include <ub/ub_net.h>
++
+ #ifdef CONFIG_SYSCTL
+ #define SYNC_INIT 0 /* let the user enable it */
+ #else
+@@ -305,6 +307,8 @@ void tcp_time_wait(struct sock *sk, int
+ tw->tw_ipv6only = np->ipv6only;
+ }
+ #endif
++ tw->tw_owner_env = VEID(VE_OWNER_SK(sk));
++
+ /* Linkage updates. */
+ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
+
+@@ -353,6 +357,8 @@ struct sock *tcp_create_openreq_child(st
+ struct tcp_sock *newtp;
+
+ /* Now setup tcp_sock */
++ SET_VE_OWNER_SK(newsk, VE_OWNER_SK(sk));
++
+ newtp = tcp_sk(newsk);
+ newtp->pred_flags = 0;
+ newtp->rcv_nxt = treq->rcv_isn + 1;
+diff -uprN linux-2.6.15.orig/net/ipv4/tcp_output.c linux-2.6.15-ve025stab014/net/ipv4/tcp_output.c
+--- linux-2.6.15.orig/net/ipv4/tcp_output.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/tcp_output.c 2006-01-27 14:48:06.000000000 +0300
+@@ -42,6 +42,9 @@
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+
++#include <ub/ub_net.h>
++#include <ub/ub_tcp.h>
++
+ /* People can turn this off for buggy TCP's found in printers etc. */
+ int sysctl_tcp_retrans_collapse = 1;
+
+@@ -459,15 +462,23 @@ int tcp_fragment(struct sock *sk, struct
+ if (nsize < 0)
+ nsize = 0;
+
+- if (skb_cloned(skb) &&
+- skb_is_nonlinear(skb) &&
+- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+- return -ENOMEM;
++ if (skb_cloned(skb) && skb_is_nonlinear(skb)) {
++ unsigned long chargesize;
++ chargesize = skb_bc(skb)->charged;
++ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
++ return -ENOMEM;
++ ub_sock_retwres_tcp(sk, chargesize, chargesize);
++ ub_tcpsndbuf_charge_forced(sk, skb);
++ }
+
+ /* Get a new skb... force flag on. */
+ buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+ if (buff == NULL)
+ return -ENOMEM; /* We'll just try again later. */
++ if (ub_tcpsndbuf_charge(sk, buff) < 0) {
++ kfree_skb(buff);
++ return -ENOMEM;
++ }
+ sk_charge_skb(sk, buff);
+
+ /* Correct the sequence numbers. */
+@@ -1207,7 +1218,7 @@ u32 __tcp_select_window(struct sock *sk)
+ if (free_space < full_space/2) {
+ icsk->icsk_ack.quick = 0;
+
+- if (tcp_memory_pressure)
++ if (ub_tcp_shrink_rcvbuf(sk))
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
+
+ if (free_space < mss)
+@@ -1634,6 +1645,7 @@ void tcp_send_fin(struct sock *sk)
+ break;
+ yield();
+ }
++ ub_tcpsndbuf_charge_forced(sk, skb);
+
+ /* Reserve space for headers and prepare control bits. */
+ skb_reserve(skb, MAX_TCP_HEADER);
+@@ -1703,6 +1715,10 @@ int tcp_send_synack(struct sock *sk)
+ struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ if (nskb == NULL)
+ return -ENOMEM;
++ if (ub_tcpsndbuf_charge(sk, skb) < 0) {
++ kfree_skb(nskb);
++ return -ENOMEM;
++ }
+ __skb_unlink(skb, &sk->sk_write_queue);
+ skb_header_release(nskb);
+ __skb_queue_head(&sk->sk_write_queue, nskb);
+@@ -1854,6 +1870,10 @@ int tcp_connect(struct sock *sk)
+ buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
+ if (unlikely(buff == NULL))
+ return -ENOBUFS;
++ if (ub_tcpsndbuf_charge(sk, buff) < 0) {
++ kfree_skb(buff);
++ return -ENOBUFS;
++ }
+
+ /* Reserve space for headers. */
+ skb_reserve(buff, MAX_TCP_HEADER);
+diff -uprN linux-2.6.15.orig/net/ipv4/tcp_timer.c linux-2.6.15-ve025stab014/net/ipv4/tcp_timer.c
+--- linux-2.6.15.orig/net/ipv4/tcp_timer.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/tcp_timer.c 2006-01-27 14:48:08.000000000 +0300
+@@ -22,6 +22,8 @@
+
+ #include <linux/module.h>
+ #include <net/tcp.h>
++#include <ub/ub_orphan.h>
++#include <ub/ub_tcp.h>
+
+ int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
+ int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
+@@ -67,7 +69,7 @@ static void tcp_write_err(struct sock *s
+ static int tcp_out_of_resources(struct sock *sk, int do_reset)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+- int orphans = atomic_read(&tcp_orphan_count);
++ int orphans = ub_get_orphan_count(sk);
+
+ /* If peer does not open window for long time, or did not transmit
+ * anything for long time, penalize it. */
+@@ -78,9 +80,7 @@ static int tcp_out_of_resources(struct s
+ if (sk->sk_err_soft)
+ orphans <<= 1;
+
+- if (orphans >= sysctl_tcp_max_orphans ||
+- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
++ if (ub_too_many_orphans(sk, orphans)) {
+ if (net_ratelimit())
+ printk(KERN_INFO "Out of socket memory\n");
+
+@@ -173,9 +173,12 @@ static int tcp_write_timeout(struct sock
+ static void tcp_delack_timer(unsigned long data)
+ {
+ struct sock *sk = (struct sock*)data;
++ struct ve_struct *env;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
++ env = set_exec_env(VE_OWNER_SK(sk));
++
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later. */
+@@ -224,11 +227,12 @@ static void tcp_delack_timer(unsigned lo
+ TCP_CHECK_TIMER(sk);
+
+ out:
+- if (tcp_memory_pressure)
++ if (ub_tcp_memory_pressure(sk))
+ sk_stream_mem_reclaim(sk);
+ out_unlock:
+ bh_unlock_sock(sk);
+ sock_put(sk);
++ (void)set_exec_env(env);
+ }
+
+ static void tcp_probe_timer(struct sock *sk)
+@@ -283,8 +287,11 @@ static void tcp_probe_timer(struct sock
+ static void tcp_retransmit_timer(struct sock *sk)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
++ struct ve_struct *env;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
++ env = set_exec_env(VE_OWNER_SK(sk));
++
+ if (!tp->packets_out)
+ goto out;
+
+@@ -381,15 +388,19 @@ out_reset_timer:
+ if (icsk->icsk_retransmits > sysctl_tcp_retries1)
+ __sk_dst_reset(sk);
+
+-out:;
++out:
++ (void)set_exec_env(env);
+ }
+
+ static void tcp_write_timer(unsigned long data)
+ {
+ struct sock *sk = (struct sock*)data;
++ struct ve_struct *env;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ int event;
+
++ env = set_exec_env(VE_OWNER_SK(sk));
++
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later */
+@@ -423,6 +434,7 @@ out:
+ out_unlock:
+ bh_unlock_sock(sk);
+ sock_put(sk);
++ (void)set_exec_env(env);
+ }
+
+ /*
+@@ -450,10 +462,13 @@ void tcp_set_keepalive(struct sock *sk,
+ static void tcp_keepalive_timer (unsigned long data)
+ {
+ struct sock *sk = (struct sock *) data;
++ struct ve_struct *env;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ __u32 elapsed;
+
++ env = set_exec_env(VE_OWNER_SK(sk));
++
+ /* Only process if socket is not in use. */
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+@@ -525,4 +540,5 @@ death:
+ out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
++ (void)set_exec_env(env);
+ }
+diff -uprN linux-2.6.15.orig/net/ipv4/udp.c linux-2.6.15-ve025stab014/net/ipv4/udp.c
+--- linux-2.6.15.orig/net/ipv4/udp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv4/udp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -126,7 +126,9 @@ static int udp_v4_get_port(struct sock *
+ struct hlist_node *node;
+ struct sock *sk2;
+ struct inet_sock *inet = inet_sk(sk);
++ struct ve_struct *env;
+
++ env = VE_OWNER_SK(sk);
+ write_lock_bh(&udp_hash_lock);
+ if (snum == 0) {
+ int best_size_so_far, best, result, i;
+@@ -140,7 +142,7 @@ static int udp_v4_get_port(struct sock *
+ struct hlist_head *list;
+ int size;
+
+- list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
++ list = &udp_hash[udp_hashfn(result, VEID(env))];
+ if (hlist_empty(list)) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0] +
+@@ -162,7 +164,7 @@ static int udp_v4_get_port(struct sock *
+ result = sysctl_local_port_range[0]
+ + ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+- if (!udp_lport_inuse(result))
++ if (!udp_lport_inuse(result, env))
+ break;
+ }
+ if (i >= (1 << 16) / UDP_HTABLE_SIZE)
+@@ -171,11 +173,12 @@ gotit:
+ udp_port_rover = snum = result;
+ } else {
+ sk_for_each(sk2, node,
+- &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
++ &udp_hash[udp_hashfn(snum, VEID(env))]) {
+ struct inet_sock *inet2 = inet_sk(sk2);
+
+ if (inet2->num == snum &&
+ sk2 != sk &&
++ ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
+ !ipv6_only_sock(sk2) &&
+ (!sk2->sk_bound_dev_if ||
+ !sk->sk_bound_dev_if ||
+@@ -189,7 +192,7 @@ gotit:
+ }
+ inet->num = snum;
+ if (sk_unhashed(sk)) {
+- struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
++ struct hlist_head *h = &udp_hash[udp_hashfn(snum, VEID(env))];
+
+ sk_add_node(sk, h);
+ sock_prot_inc_use(sk->sk_prot);
+@@ -227,11 +230,15 @@ static struct sock *udp_v4_lookup_longwa
+ struct hlist_node *node;
+ unsigned short hnum = ntohs(dport);
+ int badness = -1;
++ struct ve_struct *env;
+
+- sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
++ env = get_exec_env();
++ sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
+ struct inet_sock *inet = inet_sk(sk);
+
+- if (inet->num == hnum && !ipv6_only_sock(sk)) {
++ if (inet->num == hnum &&
++ ve_accessible_strict(VE_OWNER_SK(sk), env) &&
++ !ipv6_only_sock(sk)) {
+ int score = (sk->sk_family == PF_INET ? 1 : 0);
+ if (inet->rcv_saddr) {
+ if (inet->rcv_saddr != daddr)
+@@ -1060,7 +1067,8 @@ static int udp_v4_mcast_deliver(struct s
+ int dif;
+
+ read_lock(&udp_hash_lock);
+- sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
++ sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
++ VEID(VE_OWNER_SKB(skb)))]);
+ dif = skb->dev->ifindex;
+ sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+ if (sk) {
+@@ -1379,10 +1387,14 @@ static struct sock *udp_get_first(struct
+ {
+ struct sock *sk;
+ struct udp_iter_state *state = seq->private;
++ struct ve_struct *env;
+
++ env = get_exec_env();
+ for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
+ struct hlist_node *node;
+ sk_for_each(sk, node, &udp_hash[state->bucket]) {
++ if (!ve_accessible(VE_OWNER_SK(sk), env))
++ continue;
+ if (sk->sk_family == state->family)
+ goto found;
+ }
+@@ -1399,8 +1411,13 @@ static struct sock *udp_get_next(struct
+ do {
+ sk = sk_next(sk);
+ try_again:
+- ;
+- } while (sk && sk->sk_family != state->family);
++ if (!sk)
++ break;
++ if (sk->sk_family != state->family)
++ continue;
++ if (ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
++ break;
++ } while (1);
+
+ if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
+ sk = sk_head(&udp_hash[state->bucket]);
+@@ -1486,7 +1503,12 @@ int udp_proc_register(struct udp_seq_afi
+ afinfo->seq_fops->llseek = seq_lseek;
+ afinfo->seq_fops->release = seq_release_private;
+
+- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
++ if (*(afinfo->name) == 'n')
++ p = proc_glob_fops_create(afinfo->name, S_IRUGO,
++ afinfo->seq_fops);
++ else
++ p = proc_net_fops_create(afinfo->name, S_IRUGO,
++ afinfo->seq_fops);
+ if (p)
+ p->data = afinfo;
+ else
+@@ -1498,7 +1520,10 @@ void udp_proc_unregister(struct udp_seq_
+ {
+ if (!afinfo)
+ return;
+- proc_net_remove(afinfo->name);
++ if (*(afinfo->name) == 'n')
++ remove_proc_glob_entry(afinfo->name, NULL);
++ else
++ proc_net_remove(afinfo->name);
+ memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ }
+
+@@ -1541,7 +1566,7 @@ static int udp4_seq_show(struct seq_file
+ static struct file_operations udp4_seq_fops;
+ static struct udp_seq_afinfo udp4_seq_afinfo = {
+ .owner = THIS_MODULE,
+- .name = "udp",
++ .name = "net/udp",
+ .family = AF_INET,
+ .seq_show = udp4_seq_show,
+ .seq_fops = &udp4_seq_fops,
+diff -uprN linux-2.6.15.orig/net/ipv6/addrconf.c linux-2.6.15-ve025stab014/net/ipv6/addrconf.c
+--- linux-2.6.15.orig/net/ipv6/addrconf.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv6/addrconf.c 2006-01-27 14:48:08.000000000 +0300
+@@ -2152,6 +2152,10 @@ static int addrconf_notify(struct notifi
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ int run_pending = 0;
+
++ /* not virtualized yet */
++ if (!ve_is_super(get_exec_env()))
++ return NOTIFY_OK;
++
+ switch(event) {
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+diff -uprN linux-2.6.15.orig/net/ipv6/inet6_hashtables.c linux-2.6.15-ve025stab014/net/ipv6/inet6_hashtables.c
+--- linux-2.6.15.orig/net/ipv6/inet6_hashtables.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv6/inet6_hashtables.c 2006-01-27 14:48:08.000000000 +0300
+@@ -31,7 +31,7 @@ struct sock *inet6_lookup_listener(struc
+ int score, hiscore = 0;
+
+ read_lock(&hashinfo->lhash_lock);
+- sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
++ sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum, 0)]) {
+ if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+
+diff -uprN linux-2.6.15.orig/net/ipv6/tcp_ipv6.c linux-2.6.15-ve025stab014/net/ipv6/tcp_ipv6.c
+--- linux-2.6.15.orig/net/ipv6/tcp_ipv6.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv6/tcp_ipv6.c 2006-01-27 14:48:08.000000000 +0300
+@@ -117,7 +117,7 @@ static int tcp_v6_get_port(struct sock *
+ int rover = net_random() % (high - low) + low;
+
+ do {
+- head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
++ head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size, 0)];
+ spin_lock(&head->lock);
+ inet_bind_bucket_for_each(tb, node, &head->chain)
+ if (tb->port == rover)
+@@ -142,7 +142,7 @@ static int tcp_v6_get_port(struct sock *
+ /* OK, here is the one we will use. */
+ snum = rover;
+ } else {
+- head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
++ head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size, 0)];
+ spin_lock(&head->lock);
+ inet_bind_bucket_for_each(tb, node, &head->chain)
+ if (tb->port == snum)
+@@ -164,7 +164,7 @@ tb_found:
+ tb_not_found:
+ ret = 1;
+ if (tb == NULL) {
+- tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
++ tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum, NULL);
+ if (tb == NULL)
+ goto fail_unlock;
+ }
+@@ -419,7 +419,7 @@ static int tcp_v6_hash_connect(struct so
+ local_bh_disable();
+ for (i = 1; i <= range; i++) {
+ port = low + (i + offset) % range;
+- head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
++ head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size, 0)];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+@@ -439,7 +439,7 @@ static int tcp_v6_hash_connect(struct so
+ }
+ }
+
+- tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
++ tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port, NULL);
+ if (!tb) {
+ spin_unlock(&head->lock);
+ break;
+@@ -474,7 +474,7 @@ ok:
+ goto out;
+ }
+
+- head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
++ head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size, 0)];
+ tb = inet_csk(sk)->icsk_bind_hash;
+ spin_lock_bh(&head->lock);
+
+diff -uprN linux-2.6.15.orig/net/ipv6/udp.c linux-2.6.15-ve025stab014/net/ipv6/udp.c
+--- linux-2.6.15.orig/net/ipv6/udp.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/ipv6/udp.c 2006-01-27 14:48:08.000000000 +0300
+@@ -68,7 +68,9 @@ static int udp_v6_get_port(struct sock *
+ {
+ struct sock *sk2;
+ struct hlist_node *node;
++ struct ve_struct *env;
+
++ env = VE_OWNER_SK(sk);
+ write_lock_bh(&udp_hash_lock);
+ if (snum == 0) {
+ int best_size_so_far, best, result, i;
+@@ -82,7 +84,7 @@ static int udp_v6_get_port(struct sock *
+ int size;
+ struct hlist_head *list;
+
+- list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
++ list = &udp_hash[udp_hashfn(result, VEID(env))];
+ if (hlist_empty(list)) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0] +
+@@ -104,7 +106,7 @@ static int udp_v6_get_port(struct sock *
+ result = sysctl_local_port_range[0]
+ + ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+- if (!udp_lport_inuse(result))
++ if (!udp_lport_inuse(result, env))
+ break;
+ }
+ if (i >= (1 << 16) / UDP_HTABLE_SIZE)
+@@ -113,9 +115,10 @@ gotit:
+ udp_port_rover = snum = result;
+ } else {
+ sk_for_each(sk2, node,
+- &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
++ &udp_hash[udp_hashfn(snum, VEID(env))]) {
+ if (inet_sk(sk2)->num == snum &&
+ sk2 != sk &&
++ ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
+ (!sk2->sk_bound_dev_if ||
+ !sk->sk_bound_dev_if ||
+ sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+@@ -127,7 +130,7 @@ gotit:
+
+ inet_sk(sk)->num = snum;
+ if (sk_unhashed(sk)) {
+- sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
++ sk_add_node(sk, &udp_hash[udp_hashfn(snum, VEID(env))]);
+ sock_prot_inc_use(sk->sk_prot);
+ }
+ write_unlock_bh(&udp_hash_lock);
+diff -uprN linux-2.6.15.orig/net/netfilter/core.c linux-2.6.15-ve025stab014/net/netfilter/core.c
+--- linux-2.6.15.orig/net/netfilter/core.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/netfilter/core.c 2006-01-27 14:48:08.000000000 +0300
+@@ -32,16 +32,24 @@
+ * of skbuffs queued for userspace, and not deregister a hook unless
+ * this is zero, but that sucks. Now, we simply check when the
+ * packets come back: if the hook is gone, the packet is discarded. */
++static DEFINE_SPINLOCK(nf_hook_lock);
++
+ struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+ EXPORT_SYMBOL(nf_hooks);
+-static DEFINE_SPINLOCK(nf_hook_lock);
++#ifdef CONFIG_VE_IPTABLES
++#define ve_nf_hooks \
++ ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
++#else
++#define ve_nf_hooks nf_hooks
++#endif
++
+
+ int nf_register_hook(struct nf_hook_ops *reg)
+ {
+ struct list_head *i;
+
+ spin_lock_bh(&nf_hook_lock);
+- list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
++ list_for_each(i, &ve_nf_hooks[reg->pf][reg->hooknum]) {
+ if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+ break;
+ }
+@@ -53,6 +61,33 @@ int nf_register_hook(struct nf_hook_ops
+ }
+ EXPORT_SYMBOL(nf_register_hook);
+
++int virt_nf_register_hook(struct nf_hook_ops *reg)
++{
++ int ret = 0;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct nf_hook_ops *tmp;
++ ret = -ENOMEM;
++ tmp = kmalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, reg, sizeof(struct nf_hook_ops));
++ reg = tmp;
++ }
++
++ ret = nf_register_hook(reg);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env()))
++ kfree(reg);
++nomem:
++ return ret;
++}
++EXPORT_SYMBOL(virt_nf_register_hook);
++
+ void nf_unregister_hook(struct nf_hook_ops *reg)
+ {
+ spin_lock_bh(&nf_hook_lock);
+@@ -63,6 +98,29 @@ void nf_unregister_hook(struct nf_hook_o
+ }
+ EXPORT_SYMBOL(nf_unregister_hook);
+
++int virt_nf_unregister_hook(struct nf_hook_ops *reg)
++{
++ struct nf_hook_ops *i;
++
++ spin_lock_bh(&nf_hook_lock);
++ list_for_each_entry(i, &ve_nf_hooks[reg->pf][reg->hooknum], list) {
++ if (reg->hook == i->hook) {
++ reg = i;
++ break;
++ }
++ }
++ spin_unlock_bh(&nf_hook_lock);
++ if (reg != i)
++ return -ENOENT;
++
++ nf_unregister_hook(reg);
++
++ if (!ve_is_super(get_exec_env()))
++ kfree(reg);
++ return 0;
++}
++EXPORT_SYMBOL(virt_nf_unregister_hook);
++
+ unsigned int nf_iterate(struct list_head *head,
+ struct sk_buff **skb,
+ int hook,
+@@ -120,9 +178,9 @@ int nf_hook_slow(int pf, unsigned int ho
+ /* We may already have this, but read-locks nest anyway */
+ rcu_read_lock();
+
+- elem = &nf_hooks[pf][hook];
++ elem = &ve_nf_hooks[pf][hook];
+ next_hook:
+- verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
++ verdict = nf_iterate(&ve_nf_hooks[pf][hook], pskb, hook, indev,
+ outdev, &elem, okfn, hook_thresh);
+ if (verdict == NF_ACCEPT || verdict == NF_STOP) {
+ ret = 1;
+@@ -195,13 +253,54 @@ struct proc_dir_entry *proc_net_netfilte
+ EXPORT_SYMBOL(proc_net_netfilter);
+ #endif
+
+-void __init netfilter_init(void)
++void init_nf_hooks(struct list_head (*nh)[NF_MAX_HOOKS])
+ {
+ int i, h;
+ for (i = 0; i < NPROTO; i++) {
+ for (h = 0; h < NF_MAX_HOOKS; h++)
+- INIT_LIST_HEAD(&nf_hooks[i][h]);
++ INIT_LIST_HEAD(&ve_nf_hooks[i][h]);
+ }
++}
++
++int init_netfilter(void)
++{
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_struct *envid;
++
++ envid = get_exec_env();
++ envid->_nf_hooks = kmalloc(sizeof(nf_hooks), GFP_KERNEL);
++ if (envid->_nf_hooks == NULL)
++ return -ENOMEM;
++
++ /* FIXME: charge ubc */
++
++ init_nf_hooks(envid->_nf_hooks);
++ return 0;
++#else
++ init_nf_hooks(nf_hooks);
++ return 0;
++#endif
++}
++EXPORT_SYMBOL(init_netfilter);
++
++#ifdef CONFIG_VE_IPTABLES
++void fini_netfilter(void)
++{
++ struct ve_struct *envid;
++
++ envid = get_exec_env();
++ if (envid->_nf_hooks != NULL)
++ kfree(envid->_nf_hooks);
++ envid->_nf_hooks = NULL;
++
++ /* FIXME: uncharge ubc */
++}
++EXPORT_SYMBOL(fini_netfilter);
++#endif
++
++void __init netfilter_init(void)
++{
++ init_netfilter();
+
+ #ifdef CONFIG_PROC_FS
+ proc_net_netfilter = proc_mkdir("netfilter", proc_net);
+@@ -214,3 +313,4 @@ void __init netfilter_init(void)
+ if (netfilter_log_init() < 0)
+ panic("cannot initialize nf_log");
+ }
++
+diff -uprN linux-2.6.15.orig/net/netfilter/nf_queue.c linux-2.6.15-ve025stab014/net/netfilter/nf_queue.c
+--- linux-2.6.15.orig/net/netfilter/nf_queue.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/netfilter/nf_queue.c 2006-01-27 14:48:08.000000000 +0300
+@@ -207,12 +207,12 @@ void nf_reinject(struct sk_buff *skb, st
+ /* Drop reference to owner of hook which queued us. */
+ module_put(info->elem->owner);
+
+- list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
++ list_for_each_rcu(i, &ve_nf_hooks[info->pf][info->hook]) {
+ if (i == elem)
+ break;
+ }
+
+- if (elem == &nf_hooks[info->pf][info->hook]) {
++ if (elem == &ve_nf_hooks[info->pf][info->hook]) {
+ /* The module which sent it to userspace is gone. */
+ NFDEBUG("%s: module disappeared, dropping packet.\n",
+ __FUNCTION__);
+@@ -227,7 +227,7 @@ void nf_reinject(struct sk_buff *skb, st
+
+ if (verdict == NF_ACCEPT) {
+ next_hook:
+- verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
++ verdict = nf_iterate(&ve_nf_hooks[info->pf][info->hook],
+ &skb, info->hook,
+ info->indev, info->outdev, &elem,
+ info->okfn, INT_MIN);
+diff -uprN linux-2.6.15.orig/net/netfilter/nf_sockopt.c linux-2.6.15-ve025stab014/net/netfilter/nf_sockopt.c
+--- linux-2.6.15.orig/net/netfilter/nf_sockopt.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/netfilter/nf_sockopt.c 2006-01-27 14:48:08.000000000 +0300
+@@ -80,6 +80,12 @@ static int nf_sockopt(struct sock *sk, i
+ struct nf_sockopt_ops *ops;
+ int ret;
+
++#ifdef CONFIG_VE_IPTABLES
++ if (!get_exec_env()->_nf_hooks ||
++ !get_exec_env()->_ipt_standard_target)
++ return -ENOPROTOOPT;
++#endif
++
+ if (down_interruptible(&nf_sockopt_mutex) != 0)
+ return -EINTR;
+
+diff -uprN linux-2.6.15.orig/net/netlink/af_netlink.c linux-2.6.15-ve025stab014/net/netlink/af_netlink.c
+--- linux-2.6.15.orig/net/netlink/af_netlink.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/netlink/af_netlink.c 2006-01-27 14:48:08.000000000 +0300
+@@ -60,6 +60,9 @@
+ #include <net/scm.h>
+ #include <net/netlink.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_net.h>
++
+ #define Nprintk(a...)
+ #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
+
+@@ -208,7 +211,10 @@ static __inline__ struct sock *netlink_l
+ read_lock(&nl_table_lock);
+ head = nl_pid_hashfn(hash, pid);
+ sk_for_each(sk, node, head) {
+- if (nlk_sk(sk)->pid == pid) {
++ /* VEs should find sockets, created by kernel */
++ if ((nlk_sk(sk)->pid == pid) &&
++ (!pid || ve_accessible_strict(VE_OWNER_SK(sk),
++ get_exec_env()))){
+ sock_hold(sk);
+ goto found;
+ }
+@@ -308,7 +314,9 @@ static int netlink_insert(struct sock *s
+ head = nl_pid_hashfn(hash, pid);
+ len = 0;
+ sk_for_each(osk, node, head) {
+- if (nlk_sk(osk)->pid == pid)
++ if ((nlk_sk(sk)->pid == pid) &&
++ ve_accessible_strict(VE_OWNER_SK(sk),
++ get_exec_env()))
+ break;
+ len++;
+ }
+@@ -361,6 +369,8 @@ static int __netlink_create(struct socke
+ sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
+ if (!sk)
+ return -ENOMEM;
++ if (ub_other_sock_charge(sk))
++ goto out_free;
+
+ sock_init_data(sock, sk);
+
+@@ -371,6 +381,10 @@ static int __netlink_create(struct socke
+ sk->sk_destruct = netlink_sock_destruct;
+ sk->sk_protocol = protocol;
+ return 0;
++
++out_free:
++ sk_free(sk);
++ return -ENOMEM;
+ }
+
+ static int netlink_create(struct socket *sock, int protocol)
+@@ -402,7 +416,7 @@ static int netlink_create(struct socket
+ groups = nl_table[protocol].groups;
+ netlink_unlock_table();
+
+- if ((err = __netlink_create(sock, protocol) < 0))
++ if ((err = __netlink_create(sock, protocol)) < 0)
+ goto out_module;
+
+ nlk = nlk_sk(sock->sk);
+@@ -476,7 +490,7 @@ static int netlink_autobind(struct socke
+ struct hlist_head *head;
+ struct sock *osk;
+ struct hlist_node *node;
+- s32 pid = current->tgid;
++ s32 pid = virt_pid(current);
+ int err;
+ static s32 rover = -4097;
+
+@@ -485,7 +499,9 @@ retry:
+ netlink_table_grab();
+ head = nl_pid_hashfn(hash, pid);
+ sk_for_each(osk, node, head) {
+- if (nlk_sk(osk)->pid == pid) {
++ if ((nlk_sk(osk)->pid == pid) &&
++ ve_accessible_strict(VE_OWNER_SK(osk),
++ get_exec_env())) {
+ /* Bind collision, search negative pid values. */
+ pid = rover--;
+ if (rover > -4097)
+@@ -510,7 +526,7 @@ retry:
+ static inline int netlink_capable(struct socket *sock, unsigned int flag)
+ {
+ return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
+- capable(CAP_NET_ADMIN);
++ capable(CAP_VE_NET_ADMIN);
+ }
+
+ static void
+@@ -701,7 +717,8 @@ struct sock *netlink_getsockbyfilp(struc
+ * 0: continue
+ * 1: repeat lookup - reference dropped while waiting for socket memory.
+ */
+-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo)
++int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
++ long timeo, struct sock *ssk)
+ {
+ struct netlink_sock *nlk;
+
+@@ -711,7 +728,7 @@ int netlink_attachskb(struct sock *sk, s
+ test_bit(0, &nlk->state)) {
+ DECLARE_WAITQUEUE(wait, current);
+ if (!timeo) {
+- if (!nlk->pid)
++ if (!ssk || nlk_sk(ssk)->pid == 0)
+ netlink_overrun(sk);
+ sock_put(sk);
+ kfree_skb(skb);
+@@ -796,7 +813,7 @@ retry:
+ kfree_skb(skb);
+ return PTR_ERR(sk);
+ }
+- err = netlink_attachskb(sk, skb, nonblock, timeo);
++ err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
+ if (err == 1)
+ goto retry;
+ if (err)
+@@ -843,6 +860,9 @@ static inline int do_one_broadcast(struc
+ !test_bit(p->group - 1, nlk->groups))
+ goto out;
+
++ if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
++ goto out;
++
+ if (p->failure) {
+ netlink_overrun(sk);
+ goto out;
+@@ -940,6 +960,9 @@ static inline int do_one_set_err(struct
+ !test_bit(p->group - 1, nlk->groups))
+ goto out;
+
++ if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
++ goto out;
++
+ sk->sk_err = p->code;
+ sk->sk_error_report(sk);
+ out:
+@@ -1074,12 +1097,17 @@ static int netlink_sendmsg(struct kiocb
+ struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+ struct sock *sk = sock->sk;
+ struct netlink_sock *nlk = nlk_sk(sk);
+- struct sockaddr_nl *addr=msg->msg_name;
++ struct sockaddr_nl *addr = msg->msg_name;
+ u32 dst_pid;
+- u32 dst_group;
+ struct sk_buff *skb;
+ int err;
+ struct scm_cookie scm;
++ struct sock *dstsk;
++ long timeo;
++ int no_ubc, no_buf;
++ unsigned long chargesize;
++
++ DECLARE_WAITQUEUE(wait, current);
+
+ if (msg->msg_flags&MSG_OOB)
+ return -EOPNOTSUPP;
+@@ -1090,17 +1118,16 @@ static int netlink_sendmsg(struct kiocb
+ if (err < 0)
+ return err;
+
++ /* Broadcasts from user to kernel are disabled. This is OK
++ * according to ANK */
+ if (msg->msg_namelen) {
+ if (addr->nl_family != AF_NETLINK)
+ return -EINVAL;
+ dst_pid = addr->nl_pid;
+- dst_group = ffs(addr->nl_groups);
+- if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
++ if (addr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+ return -EPERM;
+- } else {
++ } else
+ dst_pid = nlk->dst_pid;
+- dst_group = nlk->dst_group;
+- }
+
+ if (!nlk->pid) {
+ err = netlink_autobind(sock);
+@@ -1113,12 +1140,12 @@ static int netlink_sendmsg(struct kiocb
+ goto out;
+ err = -ENOBUFS;
+ skb = alloc_skb(len, GFP_KERNEL);
+- if (skb==NULL)
++ if (skb == NULL)
+ goto out;
+
+ NETLINK_CB(skb).pid = nlk->pid;
+ NETLINK_CB(skb).dst_pid = dst_pid;
+- NETLINK_CB(skb).dst_group = dst_group;
++ NETLINK_CB(skb).dst_group = 0;
+ NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
+ memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+
+@@ -1129,25 +1156,88 @@ static int netlink_sendmsg(struct kiocb
+ */
+
+ err = -EFAULT;
+- if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
+- kfree_skb(skb);
+- goto out;
+- }
++ if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
++ goto out_free;
+
+ err = security_netlink_send(sk, skb);
+- if (err) {
+- kfree_skb(skb);
+- goto out;
++ if (err)
++ goto out_free;
++
++ timeo = sock_sndtimeo(sk, msg->msg_flags&MSG_DONTWAIT);
++retry:
++ dstsk = netlink_getsockbypid(sk, dst_pid);
++ if (IS_ERR(dstsk)) {
++ err = PTR_ERR(dstsk);
++ goto out_free;
+ }
+
+- if (dst_group) {
+- atomic_inc(&skb->users);
+- netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
++ nlk = nlk_sk(dstsk);
++#ifdef NL_EMULATE_DEV
++ if (nlk->handler) {
++ skb_orphan(skb);
++ err = nlk->handler(protocol, skb);
++ goto out_put;
+ }
+- err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
++#endif
+
++ /* BTW, it could be done once, before the retry loop */
++ chargesize = skb_charge_fullsize(skb);
++ no_ubc = ub_sock_getwres_other(sk, chargesize);
++ no_buf = atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
++ test_bit(0, &nlk->state);
++ if (no_ubc || no_buf) {
++ wait_queue_head_t *sleep;
++
++ if (!no_ubc)
++ ub_sock_retwres_other(sk, chargesize,
++ SOCK_MIN_UBCSPACE_CH);
++ err = -EAGAIN;
++ if (timeo == 0) {
++ kfree_skb(skb);
++ goto out_put;
++ }
++
++ /* wake up comes to different queues */
++ sleep = no_ubc ? sk->sk_sleep : &nlk->wait;
++ __set_current_state(TASK_INTERRUPTIBLE);
++ add_wait_queue(sleep, &wait);
++
++ /* this if can't be moved upper because ub_sock_snd_queue_add()
++ * may change task state to TASK_RUNNING */
++ if (no_ubc)
++ ub_sock_sndqueueadd_other(sk, chargesize);
++
++ if ((atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
++ test_bit(0, &nlk->state) || no_ubc) &&
++ !sock_flag(dstsk, SOCK_DEAD))
++ timeo = schedule_timeout(timeo);
++
++ __set_current_state(TASK_RUNNING);
++ remove_wait_queue(sleep, &wait);
++ if (no_ubc)
++ ub_sock_sndqueuedel(sk);
++ sock_put(dstsk);
++
++ if (!signal_pending(current))
++ goto retry;
++ err = sock_intr_errno(timeo);
++ goto out_free;
++ }
++
++ skb_orphan(skb);
++ skb_set_owner_r(skb, dstsk);
++ ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
++ skb_queue_tail(&dstsk->sk_receive_queue, skb);
++ dstsk->sk_data_ready(dstsk, len);
++ err = len;
++out_put:
++ sock_put(dstsk);
+ out:
+ return err;
++
++out_free:
++ kfree_skb(skb);
++ return err;
+ }
+
+ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
+@@ -1300,6 +1390,10 @@ static int netlink_dump(struct sock *sk)
+ skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
++ if (ub_nlrcvbuf_charge(skb, sk) < 0) {
++ kfree_skb(skb);
++ return -EACCES;
++ }
+
+ spin_lock(&nlk->cb_lock);
+
+@@ -1422,7 +1516,7 @@ static int netlink_rcv_skb(struct sk_buf
+ while (skb->len >= nlmsg_total_size(0)) {
+ nlh = (struct nlmsghdr *) skb->data;
+
+- if (skb->len < nlh->nlmsg_len)
++ if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
+ return 0;
+
+ total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
+@@ -1468,8 +1562,15 @@ void netlink_run_queue(struct sock *sk,
+ *qlen = skb_queue_len(&sk->sk_receive_queue);
+
+ for (; *qlen; (*qlen)--) {
++ int ret;
++ struct ve_struct *old_env;
+ skb = skb_dequeue(&sk->sk_receive_queue);
+- if (netlink_rcv_skb(skb, cb)) {
++
++ old_env = set_exec_env(VE_OWNER_SKB(skb));
++ ret = netlink_rcv_skb(skb, cb);
++ (void)set_exec_env(old_env);
++
++ if (ret) {
+ if (skb->len)
+ skb_queue_head(&sk->sk_receive_queue, skb);
+ else {
+@@ -1737,6 +1838,7 @@ enomem:
+
+ sock_register(&netlink_family_ops);
+ #ifdef CONFIG_PROC_FS
++ /* FIXME: virtualize before give access from VEs */
+ proc_net_fops_create("netlink", 0, &netlink_seq_fops);
+ #endif
+ /* The netlink device handler may be needed early. */
+diff -uprN linux-2.6.15.orig/net/packet/af_packet.c linux-2.6.15-ve025stab014/net/packet/af_packet.c
+--- linux-2.6.15.orig/net/packet/af_packet.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/packet/af_packet.c 2006-01-27 14:48:08.000000000 +0300
+@@ -78,6 +78,8 @@
+ #include <linux/module.h>
+ #include <linux/init.h>
+
++#include <ub/ub_net.h>
++
+ #ifdef CONFIG_INET
+ #include <net/inet_common.h>
+ #endif
+@@ -279,7 +281,8 @@ static int packet_rcv_spkt(struct sk_buf
+ * so that this procedure is noop.
+ */
+
+- if (skb->pkt_type == PACKET_LOOPBACK)
++ if (skb->pkt_type == PACKET_LOOPBACK ||
++ !ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
+ goto out;
+
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+@@ -471,6 +474,9 @@ static int packet_rcv(struct sk_buff *sk
+ sk = pt->af_packet_priv;
+ po = pkt_sk(sk);
+
++ if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
++ goto drop;
++
+ skb->dev = dev;
+
+ if (dev->hard_header) {
+@@ -530,6 +536,9 @@ static int packet_rcv(struct sk_buff *sk
+ if (pskb_trim(skb, snaplen))
+ goto drop_n_acct;
+
++ if (ub_sockrcvbuf_charge(sk, skb))
++ goto drop_n_acct;
++
+ skb_set_owner_r(skb, sk);
+ skb->dev = NULL;
+ dst_release(skb->dst);
+@@ -580,6 +589,9 @@ static int tpacket_rcv(struct sk_buff *s
+ sk = pt->af_packet_priv;
+ po = pkt_sk(sk);
+
++ if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
++ goto drop;
++
+ if (dev->hard_header) {
+ if (sk->sk_type != SOCK_DGRAM)
+ skb_push(skb, skb->data - skb->mac.raw);
+@@ -629,6 +641,12 @@ static int tpacket_rcv(struct sk_buff *s
+ if (snaplen > skb->len-skb->data_len)
+ snaplen = skb->len-skb->data_len;
+
++ if (copy_skb &&
++ ub_sockrcvbuf_charge(sk, copy_skb)) {
++ spin_lock(&sk->sk_receive_queue.lock);
++ goto ring_is_full;
++ }
++
+ spin_lock(&sk->sk_receive_queue.lock);
+ h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
+
+@@ -1009,6 +1027,8 @@ static int packet_create(struct socket *
+ sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
+ if (sk == NULL)
+ goto out;
++ if (ub_other_sock_charge(sk))
++ goto out_free;
+
+ sock->ops = &packet_ops;
+ #ifdef CONFIG_SOCK_PACKET
+@@ -1047,6 +1067,9 @@ static int packet_create(struct socket *
+ sk_add_node(sk, &packet_sklist);
+ write_unlock_bh(&packet_sklist_lock);
+ return(0);
++
++out_free:
++ sk_free(sk);
+ out:
+ return err;
+ }
+@@ -1429,11 +1452,16 @@ static int packet_notifier(struct notifi
+ struct sock *sk;
+ struct hlist_node *node;
+ struct net_device *dev = (struct net_device*)data;
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ read_lock(&packet_sklist_lock);
+ sk_for_each(sk, node, &packet_sklist) {
+ struct packet_sock *po = pkt_sk(sk);
+
++ if (!ve_accessible_strict(VE_OWNER_SK(sk), ve))
++ continue;
++
+ switch (msg) {
+ case NETDEV_UNREGISTER:
+ #ifdef CONFIG_PACKET_MULTICAST
+@@ -1844,6 +1872,8 @@ static inline struct sock *packet_seq_id
+ struct hlist_node *node;
+
+ sk_for_each(s, node, &packet_sklist) {
++ if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
++ continue;
+ if (!off--)
+ return s;
+ }
+@@ -1859,9 +1889,13 @@ static void *packet_seq_start(struct seq
+ static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ ++*pos;
+- return (v == SEQ_START_TOKEN)
+- ? sk_head(&packet_sklist)
+- : sk_next((struct sock*)v) ;
++ do {
++ v = (v == SEQ_START_TOKEN)
++ ? sk_head(&packet_sklist)
++ : sk_next((struct sock*)v);
++ } while (v != NULL &&
++ !ve_accessible(VE_OWNER_SK((struct sock*)v), get_exec_env()));
++ return v;
+ }
+
+ static void packet_seq_stop(struct seq_file *seq, void *v)
+@@ -1917,7 +1951,7 @@ static struct file_operations packet_seq
+
+ static void __exit packet_exit(void)
+ {
+- proc_net_remove("packet");
++ remove_proc_glob_entry("net/packet", NULL);
+ unregister_netdevice_notifier(&packet_netdev_notifier);
+ sock_unregister(PF_PACKET);
+ proto_unregister(&packet_proto);
+@@ -1932,7 +1966,7 @@ static int __init packet_init(void)
+
+ sock_register(&packet_family_ops);
+ register_netdevice_notifier(&packet_netdev_notifier);
+- proc_net_fops_create("packet", 0, &packet_seq_fops);
++ proc_glob_fops_create("net/packet", 0, &packet_seq_fops);
+ out:
+ return rc;
+ }
+diff -uprN linux-2.6.15.orig/net/sched/sch_generic.c linux-2.6.15-ve025stab014/net/sched/sch_generic.c
+--- linux-2.6.15.orig/net/sched/sch_generic.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/sched/sch_generic.c 2006-01-27 14:48:08.000000000 +0300
+@@ -97,6 +97,7 @@ int qdisc_restart(struct net_device *dev
+
+ /* Dequeue packet */
+ if ((skb = q->dequeue(q)) != NULL) {
++ struct ve_struct *envid;
+ unsigned nolock = (dev->features & NETIF_F_LLTX);
+ /*
+ * When the driver has LLTX set it does its own locking
+@@ -107,6 +108,7 @@ int qdisc_restart(struct net_device *dev
+ * of lock congestion it should return -1 and the packet
+ * will be requeued.
+ */
++ envid = set_exec_env(VE_OWNER_SKB(skb));
+ if (!nolock) {
+ if (!spin_trylock(&dev->xmit_lock)) {
+ collision:
+@@ -121,6 +123,7 @@ int qdisc_restart(struct net_device *dev
+ kfree_skb(skb);
+ if (net_ratelimit())
+ printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
++ (void)set_exec_env(envid);
+ return -1;
+ }
+ __get_cpu_var(netdev_rx_stat).cpu_collision++;
+@@ -146,6 +149,7 @@ int qdisc_restart(struct net_device *dev
+ spin_unlock(&dev->xmit_lock);
+ }
+ spin_lock(&dev->queue_lock);
++ (void)set_exec_env(envid);
+ return -1;
+ }
+ if (ret == NETDEV_TX_LOCKED && nolock) {
+@@ -177,6 +181,7 @@ int qdisc_restart(struct net_device *dev
+ requeue:
+ q->ops->requeue(skb, q);
+ netif_schedule(dev);
++ (void)set_exec_env(envid);
+ return 1;
+ }
+ BUG_ON((int) q->q.qlen < 0);
+@@ -625,3 +630,4 @@ EXPORT_SYMBOL(qdisc_reset);
+ EXPORT_SYMBOL(qdisc_restart);
+ EXPORT_SYMBOL(qdisc_lock_tree);
+ EXPORT_SYMBOL(qdisc_unlock_tree);
++EXPORT_SYMBOL(dev_shutdown);
+diff -uprN linux-2.6.15.orig/net/sched/sch_teql.c linux-2.6.15-ve025stab014/net/sched/sch_teql.c
+--- linux-2.6.15.orig/net/sched/sch_teql.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/sched/sch_teql.c 2006-01-27 14:48:08.000000000 +0300
+@@ -188,6 +188,9 @@ static int teql_qdisc_init(struct Qdisc
+ struct teql_master *m = (struct teql_master*)sch->ops;
+ struct teql_sched_data *q = qdisc_priv(sch);
+
++ if (!capable(CAP_NET_ADMIN))
++ return -EPERM;
++
+ if (dev->hard_header_len > m->dev->hard_header_len)
+ return -EINVAL;
+
+diff -uprN linux-2.6.15.orig/net/socket.c linux-2.6.15-ve025stab014/net/socket.c
+--- linux-2.6.15.orig/net/socket.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/socket.c 2006-01-27 14:48:08.000000000 +0300
+@@ -84,6 +84,7 @@
+ #include <linux/compat.h>
+ #include <linux/kmod.h>
+ #include <linux/audit.h>
++#include <linux/in.h>
+
+ #ifdef CONFIG_NET_RADIO
+ #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
+@@ -1072,6 +1073,37 @@ int sock_wake_async(struct socket *sock,
+ return 0;
+ }
+
++int vz_security_proto_check(int family, int type, int protocol)
++{
++#ifdef CONFIG_VE
++ if (ve_is_super(get_exec_env()))
++ return 0;
++
++ switch (family) {
++ case PF_UNSPEC:
++ case PF_PACKET:
++ case PF_NETLINK:
++ case PF_UNIX:
++ break;
++ case PF_INET:
++ switch (protocol) {
++ case IPPROTO_IP:
++ case IPPROTO_ICMP:
++ case IPPROTO_TCP:
++ case IPPROTO_UDP:
++ case IPPROTO_RAW:
++ break;
++ default:
++ return -EAFNOSUPPORT;
++ }
++ break;
++ default:
++ return -EAFNOSUPPORT;
++ }
++#endif
++ return 0;
++}
++
+ static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
+ {
+ int err;
+@@ -1099,6 +1131,11 @@ static int __sock_create(int family, int
+ family = PF_PACKET;
+ }
+
++ /* VZ compatibility layer */
++ err = vz_security_proto_check(family, type, protocol);
++ if (err < 0)
++ return err;
++
+ err = security_socket_create(family, type, protocol, kern);
+ if (err)
+ return err;
+diff -uprN linux-2.6.15.orig/net/sunrpc/clnt.c linux-2.6.15-ve025stab014/net/sunrpc/clnt.c
+--- linux-2.6.15.orig/net/sunrpc/clnt.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/sunrpc/clnt.c 2006-01-27 14:48:08.000000000 +0300
+@@ -168,10 +168,10 @@ rpc_new_client(struct rpc_xprt *xprt, ch
+ }
+
+ /* save the nodename */
+- clnt->cl_nodelen = strlen(system_utsname.nodename);
++ clnt->cl_nodelen = strlen(ve_utsname.nodename);
+ if (clnt->cl_nodelen > UNX_MAXNODENAME)
+ clnt->cl_nodelen = UNX_MAXNODENAME;
+- memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
++ memcpy(clnt->cl_nodename, ve_utsname.nodename, clnt->cl_nodelen);
+ return clnt;
+
+ out_no_auth:
+diff -uprN linux-2.6.15.orig/net/sunrpc/sched.c linux-2.6.15-ve025stab014/net/sunrpc/sched.c
+--- linux-2.6.15.orig/net/sunrpc/sched.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/sunrpc/sched.c 2006-01-27 14:48:08.000000000 +0300
+@@ -592,7 +592,9 @@ static int rpc_wait_bit_interruptible(vo
+ static int __rpc_execute(struct rpc_task *task)
+ {
+ int status = 0;
++ struct ve_struct *env;
+
++ env = set_exec_env(get_ve0());
+ dprintk("RPC: %4d rpc_execute flgs %x\n",
+ task->tk_pid, task->tk_flags);
+
+@@ -681,6 +683,7 @@ static int __rpc_execute(struct rpc_task
+
+ /* Release all resources associated with the task */
+ rpc_release_task(task);
++ (void)set_exec_env(env);
+ return status;
+ }
+
+diff -uprN linux-2.6.15.orig/net/sunrpc/svcsock.c linux-2.6.15-ve025stab014/net/sunrpc/svcsock.c
+--- linux-2.6.15.orig/net/sunrpc/svcsock.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/sunrpc/svcsock.c 2006-01-27 14:48:08.000000000 +0300
+@@ -361,6 +361,9 @@ svc_sendto(struct svc_rqst *rqstp, struc
+ size_t base = xdr->page_base;
+ unsigned int pglen = xdr->page_len;
+ unsigned int flags = MSG_MORE;
++ struct ve_struct *old_env;
++
++ old_env = set_exec_env(get_ve0());
+
+ slen = xdr->len;
+
+@@ -425,6 +428,8 @@ out:
+ rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
+ rqstp->rq_addr.sin_addr.s_addr);
+
++ (void)set_exec_env(old_env);
++
+ return len;
+ }
+
+@@ -437,9 +442,12 @@ svc_recv_available(struct svc_sock *svsk
+ mm_segment_t oldfs;
+ struct socket *sock = svsk->sk_sock;
+ int avail, err;
++ struct ve_struct *old_env;
+
+ oldfs = get_fs(); set_fs(KERNEL_DS);
++ old_env = set_exec_env(get_ve0());
+ err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
++ (void)set_exec_env(old_env);
+ set_fs(oldfs);
+
+ return (err >= 0)? avail : err;
+@@ -454,6 +462,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
+ struct msghdr msg;
+ struct socket *sock;
+ int len, alen;
++ struct ve_struct *old_env;
+
+ rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
+ sock = rqstp->rq_sock->sk_sock;
+@@ -465,7 +474,9 @@ svc_recvfrom(struct svc_rqst *rqstp, str
+
+ msg.msg_flags = MSG_DONTWAIT;
+
++ old_env = set_exec_env(get_ve0());
+ len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
++ (void)set_exec_env(get_ve0());
+
+ /* sock_recvmsg doesn't fill in the name/namelen, so we must..
+ * possibly we should cache this in the svc_sock structure
+@@ -761,17 +772,19 @@ svc_tcp_accept(struct svc_sock *svsk)
+ struct proto_ops *ops;
+ struct svc_sock *newsvsk;
+ int err, slen;
++ struct ve_struct *old_env;
+
+ dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
+ if (!sock)
+ return;
+
++ old_env = set_exec_env(get_ve0());
+ err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
+ if (err) {
+ if (err == -ENOMEM)
+ printk(KERN_WARNING "%s: no more sockets!\n",
+ serv->sv_name);
+- return;
++ goto restore;
+ }
+
+ dprintk("svc: tcp_accept %p allocated\n", newsock);
+@@ -865,6 +878,8 @@ svc_tcp_accept(struct svc_sock *svsk)
+
+ }
+
++ (void)set_exec_env(old_env);
++
+ if (serv->sv_stats)
+ serv->sv_stats->nettcpconn++;
+
+@@ -872,6 +887,8 @@ svc_tcp_accept(struct svc_sock *svsk)
+
+ failed:
+ sock_release(newsock);
++restore:
++ (void)set_exec_env(old_env);
+ return;
+ }
+
+@@ -1388,6 +1405,7 @@ svc_create_socket(struct svc_serv *serv,
+ struct socket *sock;
+ int error;
+ int type;
++ struct ve_struct *old_env;
+
+ dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
+ serv->sv_program->pg_name, protocol,
+@@ -1401,8 +1419,10 @@ svc_create_socket(struct svc_serv *serv,
+ }
+ type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+
++ old_env = set_exec_env(get_ve0());
++
+ if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
+- return error;
++ goto restore;
+
+ if (sin != NULL) {
+ if (type == SOCK_STREAM)
+@@ -1418,12 +1438,16 @@ svc_create_socket(struct svc_serv *serv,
+ goto bummer;
+ }
+
+- if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
++ if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) {
++ (void)set_exec_env(old_env);
+ return 0;
++ }
+
+ bummer:
+ dprintk("svc: svc_create_socket error = %d\n", -error);
+ sock_release(sock);
++restore:
++ (void)set_exec_env(old_env);
+ return error;
+ }
+
+diff -uprN linux-2.6.15.orig/net/unix/af_unix.c linux-2.6.15-ve025stab014/net/unix/af_unix.c
+--- linux-2.6.15.orig/net/unix/af_unix.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/net/unix/af_unix.c 2006-01-27 14:48:08.000000000 +0300
+@@ -118,6 +118,9 @@
+ #include <net/checksum.h>
+ #include <linux/security.h>
+
++#include <ub/ub_net.h>
++#include <ub/beancounter.h>
++
+ int sysctl_unix_max_dgram_qlen = 10;
+
+ struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+@@ -235,6 +238,8 @@ static struct sock *__unix_find_socket_b
+ sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+ struct unix_sock *u = unix_sk(s);
+
++ if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
++ continue;
+ if (u->addr->len == len &&
+ !memcmp(u->addr->name, sunname, len))
+ goto found;
+@@ -439,7 +444,7 @@ static int unix_listen(struct socket *so
+ sk->sk_max_ack_backlog = backlog;
+ sk->sk_state = TCP_LISTEN;
+ /* set credentials so connect can copy them */
+- sk->sk_peercred.pid = current->tgid;
++ sk->sk_peercred.pid = virt_tgid(current);
+ sk->sk_peercred.uid = current->euid;
+ sk->sk_peercred.gid = current->egid;
+ err = 0;
+@@ -553,6 +558,8 @@ static struct sock * unix_create1(struct
+ sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
+ if (!sk)
+ goto out;
++ if (ub_other_sock_charge(sk))
++ goto out_sk_free;
+
+ atomic_inc(&unix_nr_socks);
+
+@@ -571,6 +578,9 @@ static struct sock * unix_create1(struct
+ unix_insert_socket(unix_sockets_unbound, sk);
+ out:
+ return sk;
++out_sk_free:
++ sk_free(sk);
++ return NULL;
+ }
+
+ static int unix_create(struct socket *sock, int protocol)
+@@ -932,6 +942,7 @@ static int unix_stream_connect(struct so
+ int st;
+ int err;
+ long timeo;
++ unsigned long chargesize;
+
+ err = unix_mkname(sunaddr, addr_len, &hash);
+ if (err < 0)
+@@ -960,6 +971,10 @@ static int unix_stream_connect(struct so
+ skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
+ if (skb == NULL)
+ goto out;
++ chargesize = skb_charge_fullsize(skb);
++ if (ub_sock_getwres_other(newsk, chargesize) < 0)
++ goto out;
++ ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
+
+ restart:
+ /* Find listening sock. */
+@@ -1043,7 +1058,7 @@ restart:
+ unix_peer(newsk) = sk;
+ newsk->sk_state = TCP_ESTABLISHED;
+ newsk->sk_type = sk->sk_type;
+- newsk->sk_peercred.pid = current->tgid;
++ newsk->sk_peercred.pid = virt_tgid(current);
+ newsk->sk_peercred.uid = current->euid;
+ newsk->sk_peercred.gid = current->egid;
+ newu = unix_sk(newsk);
+@@ -1105,7 +1120,7 @@ static int unix_socketpair(struct socket
+ sock_hold(skb);
+ unix_peer(ska)=skb;
+ unix_peer(skb)=ska;
+- ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
++ ska->sk_peercred.pid = skb->sk_peercred.pid = virt_tgid(current);
+ ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
+ ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
+
+@@ -1431,6 +1446,16 @@ static int unix_stream_sendmsg(struct ki
+
+ size=len-sent;
+
++ if (msg->msg_flags & MSG_DONTWAIT)
++ ub_sock_makewres_other(sk, skb_charge_size(size));
++ if (sock_bc(sk) != NULL &&
++ sock_bc(sk)->poll_reserv >=
++ SOCK_MIN_UBCSPACE &&
++ skb_charge_size(size) >
++ sock_bc(sk)->poll_reserv)
++ size = skb_charge_datalen(sock_bc(sk)->poll_reserv);
++
++
+ /* Keep two messages in the pipe so it schedules better */
+ if (size > sk->sk_sndbuf / 2 - 64)
+ size = sk->sk_sndbuf / 2 - 64;
+@@ -1442,7 +1467,8 @@ static int unix_stream_sendmsg(struct ki
+ * Grab a buffer
+ */
+
+- skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
++ skb = sock_alloc_send_skb2(sk, size, SOCK_MIN_UBCSPACE,
++ msg->msg_flags&MSG_DONTWAIT, &err);
+
+ if (skb==NULL)
+ goto out_err;
+@@ -1870,6 +1896,7 @@ static unsigned int unix_poll(struct fil
+ {
+ struct sock *sk = sock->sk;
+ unsigned int mask;
++ int no_ub_res;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ mask = 0;
+@@ -1880,6 +1907,10 @@ static unsigned int unix_poll(struct fil
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
+ mask |= POLLHUP;
+
++ no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
++ if (no_ub_res)
++ ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
++
+ /* readable? */
+ if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN))
+@@ -1893,7 +1924,7 @@ static unsigned int unix_poll(struct fil
+ * we set writable also when the other side has shut down the
+ * connection. This prevents stuck sockets.
+ */
+- if (unix_writable(sk))
++ if (!no_ub_res && unix_writable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+ return mask;
+@@ -2045,7 +2076,7 @@ static int __init af_unix_init(void)
+
+ sock_register(&unix_family_ops);
+ #ifdef CONFIG_PROC_FS
+- proc_net_fops_create("unix", 0, &unix_seq_fops);
++ proc_glob_fops_create("net/unix", 0, &unix_seq_fops);
+ #endif
+ unix_sysctl_register();
+ out:
+@@ -2056,7 +2087,7 @@ static void __exit af_unix_exit(void)
+ {
+ sock_unregister(PF_UNIX);
+ unix_sysctl_unregister();
+- proc_net_remove("unix");
++ remove_proc_glob_entry("net/unix", NULL);
+ proto_unregister(&unix_proto);
+ }
+
+diff -uprN linux-2.6.15.orig/security/commoncap.c linux-2.6.15-ve025stab014/security/commoncap.c
+--- linux-2.6.15.orig/security/commoncap.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/security/commoncap.c 2006-01-27 14:48:08.000000000 +0300
+@@ -34,7 +34,7 @@ EXPORT_SYMBOL(cap_netlink_send);
+
+ int cap_netlink_recv(struct sk_buff *skb)
+ {
+- if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
++ if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_VE_NET_ADMIN))
+ return -EPERM;
+ return 0;
+ }
+@@ -311,7 +311,7 @@ void cap_task_reparent_to_init (struct t
+
+ int cap_syslog (int type)
+ {
+- if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
++ if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ return 0;
+ }
+diff -uprN linux-2.6.15.orig/security/selinux/hooks.c linux-2.6.15-ve025stab014/security/selinux/hooks.c
+--- linux-2.6.15.orig/security/selinux/hooks.c 2006-01-03 06:21:10.000000000 +0300
++++ linux-2.6.15-ve025stab014/security/selinux/hooks.c 2006-01-27 14:48:08.000000000 +0300
+@@ -4199,12 +4199,12 @@ static int selinux_setprocattr(struct ta
+ struct task_struct *g, *t;
+ struct mm_struct *mm = p->mm;
+ read_lock(&tasklist_lock);
+- do_each_thread(g, t)
++ do_each_thread_ve(g, t)
+ if (t->mm == mm && t != p) {
+ read_unlock(&tasklist_lock);
+ return -EPERM;
+ }
+- while_each_thread(g, t);
++ while_each_thread_ve(g, t);
+ read_unlock(&tasklist_lock);
+ }
+