Fixing #140444 / CVE-2006-3626

svn path=/; revision=404
author: Christian Heim <phreak@gentoo.org> 2006-07-15 14:47:37 +0000
committer: Christian Heim <phreak@gentoo.org> 2006-07-15 14:47:37 +0000
commit: e4c83cd472e7986c2fce3dbd0c12b9edce2299ce (patch)
tree: 8740eab35358cab40fb55f26fb412c40a78c7ced /openvz-sources
parent: Adding the missing patch to 026.015-r1 (diff)
download: misc-e4c83cd472e7986c2fce3dbd0c12b9edce2299ce.tar.gz
misc-e4c83cd472e7986c2fce3dbd0c12b9edce2299ce.tar.bz2
misc-e4c83cd472e7986c2fce3dbd0c12b9edce2299ce.zip
4 files changed, 91221 insertions, 0 deletions
diff --git a/openvz-sources/026.015-r2/0001_linux-2.6.0-nonintconfig.patch b/openvz-sources/026.015-r2/0001_linux-2.6.0-nonintconfig.patch
new file mode 100644
index 0000000..a7fe97d
--- /dev/null
+++ b/openvz-sources/026.015-r2/0001_linux-2.6.0-nonintconfig.patch
@@ -0,0 +1,99 @@
+--- ./scripts/kconfig/Makefile.nonint	2006-01-03 06:21:10.000000000 +0300
++++ ./scripts/kconfig/Makefile	2006-01-16 16:59:19.000000000 +0300
+@@ -42,6 +42,10 @@ update-po-config: $(obj)/kxgettext
+ 	$(Q)rm -f arch/um/Kconfig_arch
+ 	$(Q)rm -f scripts/kconfig/linux_*.pot scripts/kconfig/config.pot
+ 
++nonint_oldconfig: scripts/kconfig/conf
++	./scripts/kconfig/conf -b arch/$(ARCH)/Kconfig
++
++
+ .PHONY: randconfig allyesconfig allnoconfig allmodconfig defconfig
+ 
+ randconfig: $(obj)/conf
+--- ./scripts/kconfig/conf.c.nonint	2006-01-03 06:21:10.000000000 +0300
++++ ./scripts/kconfig/conf.c	2006-01-16 16:10:30.000000000 +0300
+@@ -20,6 +20,7 @@ enum {
+ 	ask_all,
+ 	ask_new,
+ 	ask_silent,
++	dont_ask,
+ 	set_default,
+ 	set_yes,
+ 	set_mod,
+@@ -36,6 +37,8 @@ static struct menu *rootEntry;
+ 
+ static char nohelp_text[] = N_("Sorry, no help available for this option yet.\n");
+ 
++static int return_value = 0;
++
+ static void strip(char *str)
+ {
+ 	char *p = str;
+@@ -102,6 +105,12 @@ static void conf_askvalue(struct symbol 
+ 		fflush(stdout);
+ 		fgets(line, 128, stdin);
+ 		return;
++	case dont_ask:
++		if (!sym_has_value(sym)) {
++			fprintf(stderr,"CONFIG_%s\n",sym->name);
++			return_value++;
++		}
++		return;
+ 	case set_default:
+ 		printf("%s\n", def);
+ 		return;
+@@ -346,6 +355,10 @@ static int conf_choice(struct menu *menu
+ 			printf("?");
+ 		printf("]: ");
+ 		switch (input_mode) {
++		case dont_ask:
++			cnt = def;
++			printf("%d\n", cnt);
++			break;
+ 		case ask_new:
+ 		case ask_silent:
+ 			if (!is_new) {
+@@ -482,7 +495,10 @@ static void check_conf(struct menu *menu
+ 			if (!conf_cnt++)
+ 				printf(_("*\n* Restart config...\n*\n"));
+ 			rootEntry = menu_get_parent_menu(menu);
+-			conf(rootEntry);
++			if (input_mode == dont_ask)
++				fprintf(stderr,"CONFIG_%s\n",sym->name);
++			else
++				conf(rootEntry);
+ 		}
+ 	}
+ 
+@@ -501,6 +517,9 @@ int main(int ac, char **av)
+ 		case 'o':
+ 			input_mode = ask_new;
+ 			break;
++		case 'b':
++			input_mode = dont_ask;
++			break;
+ 		case 's':
+ 			input_mode = ask_silent;
+ 			valid_stdin = isatty(0) && isatty(1) && isatty(2);
+@@ -565,6 +584,7 @@ int main(int ac, char **av)
+ 		}
+ 	case ask_all:
+ 	case ask_new:
++	case dont_ask:
+ 		conf_read(NULL);
+ 		break;
+ 	case set_no:
+@@ -603,10 +623,10 @@ int main(int ac, char **av)
+ 	do {
+ 		conf_cnt = 0;
+ 		check_conf(&rootmenu);
+-	} while (conf_cnt);
++	} while ((conf_cnt) && (input_mode != dont_ask));
+ 	if (conf_write(NULL)) {
+ 		fprintf(stderr, _("\n*** Error during writing of the kernel configuration.\n\n"));
+ 		return 1;
+ 	}
+-	return 0;
++	return return_value;
+ }
diff --git a/openvz-sources/026.015-r2/0100_patch-026test015-core.patch b/openvz-sources/026.015-r2/0100_patch-026test015-core.patch
new file mode 100644
index 0000000..94452f7
--- /dev/null
+++ b/openvz-sources/026.015-r2/0100_patch-026test015-core.patch
@@ -0,0 +1,91083 @@
+diff -upr linux-2.6.16.orig/COPYING.SWsoft linux-2.6.16-026test015/COPYING.SWsoft
+--- linux-2.6.16.orig/COPYING.SWsoft	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/COPYING.SWsoft	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,350 @@
++
++Nothing in this license should be construed as a grant by SWsoft of any rights
++beyond the rights specified in the GNU General Public License, and nothing in
++this license should be construed as a waiver by SWsoft of its patent, copyright
++and/or trademark rights, beyond the waiver required by the GNU General Public
++License. This license is expressly inapplicable to any product that is not
++within the scope of the GNU General Public License
++
++----------------------------------------
++
++		    GNU GENERAL PUBLIC LICENSE
++		       Version 2, June 1991
++
++ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
++                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ Everyone is permitted to copy and distribute verbatim copies
++ of this license document, but changing it is not allowed.
++
++			    Preamble
++
++  The licenses for most software are designed to take away your
++freedom to share and change it.  By contrast, the GNU General Public
++License is intended to guarantee your freedom to share and change free
++software--to make sure the software is free for all its users.  This
++General Public License applies to most of the Free Software
++Foundation's software and to any other program whose authors commit to
++using it.  (Some other Free Software Foundation software is covered by
++the GNU Library General Public License instead.)  You can apply it to
++your programs, too.
++
++  When we speak of free software, we are referring to freedom, not
++price.  Our General Public Licenses are designed to make sure that you
++have the freedom to distribute copies of free software (and charge for
++this service if you wish), that you receive source code or can get it
++if you want it, that you can change the software or use pieces of it
++in new free programs; and that you know you can do these things.
++
++  To protect your rights, we need to make restrictions that forbid
++anyone to deny you these rights or to ask you to surrender the rights.
++These restrictions translate to certain responsibilities for you if you
++distribute copies of the software, or if you modify it.
++
++  For example, if you distribute copies of such a program, whether
++gratis or for a fee, you must give the recipients all the rights that
++you have.  You must make sure that they, too, receive or can get the
++source code.  And you must show them these terms so they know their
++rights.
++
++  We protect your rights with two steps: (1) copyright the software, and
++(2) offer you this license which gives you legal permission to copy,
++distribute and/or modify the software.
++
++  Also, for each author's protection and ours, we want to make certain
++that everyone understands that there is no warranty for this free
++software.  If the software is modified by someone else and passed on, we
++want its recipients to know that what they have is not the original, so
++that any problems introduced by others will not reflect on the original
++authors' reputations.
++
++  Finally, any free program is threatened constantly by software
++patents.  We wish to avoid the danger that redistributors of a free
++program will individually obtain patent licenses, in effect making the
++program proprietary.  To prevent this, we have made it clear that any
++patent must be licensed for everyone's free use or not licensed at all.
++
++  The precise terms and conditions for copying, distribution and
++modification follow.
++
++		    GNU GENERAL PUBLIC LICENSE
++   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
++
++  0. This License applies to any program or other work which contains
++a notice placed by the copyright holder saying it may be distributed
++under the terms of this General Public License.  The "Program", below,
++refers to any such program or work, and a "work based on the Program"
++means either the Program or any derivative work under copyright law:
++that is to say, a work containing the Program or a portion of it,
++either verbatim or with modifications and/or translated into another
++language.  (Hereinafter, translation is included without limitation in
++the term "modification".)  Each licensee is addressed as "you".
++
++Activities other than copying, distribution and modification are not
++covered by this License; they are outside its scope.  The act of
++running the Program is not restricted, and the output from the Program
++is covered only if its contents constitute a work based on the
++Program (independent of having been made by running the Program).
++Whether that is true depends on what the Program does.
++
++  1. You may copy and distribute verbatim copies of the Program's
++source code as you receive it, in any medium, provided that you
++conspicuously and appropriately publish on each copy an appropriate
++copyright notice and disclaimer of warranty; keep intact all the
++notices that refer to this License and to the absence of any warranty;
++and give any other recipients of the Program a copy of this License
++along with the Program.
++
++You may charge a fee for the physical act of transferring a copy, and
++you may at your option offer warranty protection in exchange for a fee.
++
++  2. You may modify your copy or copies of the Program or any portion
++of it, thus forming a work based on the Program, and copy and
++distribute such modifications or work under the terms of Section 1
++above, provided that you also meet all of these conditions:
++
++    a) You must cause the modified files to carry prominent notices
++    stating that you changed the files and the date of any change.
++
++    b) You must cause any work that you distribute or publish, that in
++    whole or in part contains or is derived from the Program or any
++    part thereof, to be licensed as a whole at no charge to all third
++    parties under the terms of this License.
++
++    c) If the modified program normally reads commands interactively
++    when run, you must cause it, when started running for such
++    interactive use in the most ordinary way, to print or display an
++    announcement including an appropriate copyright notice and a
++    notice that there is no warranty (or else, saying that you provide
++    a warranty) and that users may redistribute the program under
++    these conditions, and telling the user how to view a copy of this
++    License.  (Exception: if the Program itself is interactive but
++    does not normally print such an announcement, your work based on
++    the Program is not required to print an announcement.)
++
++These requirements apply to the modified work as a whole.  If
++identifiable sections of that work are not derived from the Program,
++and can be reasonably considered independent and separate works in
++themselves, then this License, and its terms, do not apply to those
++sections when you distribute them as separate works.  But when you
++distribute the same sections as part of a whole which is a work based
++on the Program, the distribution of the whole must be on the terms of
++this License, whose permissions for other licensees extend to the
++entire whole, and thus to each and every part regardless of who wrote it.
++
++Thus, it is not the intent of this section to claim rights or contest
++your rights to work written entirely by you; rather, the intent is to
++exercise the right to control the distribution of derivative or
++collective works based on the Program.
++
++In addition, mere aggregation of another work not based on the Program
++with the Program (or with a work based on the Program) on a volume of
++a storage or distribution medium does not bring the other work under
++the scope of this License.
++
++  3. You may copy and distribute the Program (or a work based on it,
++under Section 2) in object code or executable form under the terms of
++Sections 1 and 2 above provided that you also do one of the following:
++
++    a) Accompany it with the complete corresponding machine-readable
++    source code, which must be distributed under the terms of Sections
++    1 and 2 above on a medium customarily used for software interchange; or,
++
++    b) Accompany it with a written offer, valid for at least three
++    years, to give any third party, for a charge no more than your
++    cost of physically performing source distribution, a complete
++    machine-readable copy of the corresponding source code, to be
++    distributed under the terms of Sections 1 and 2 above on a medium
++    customarily used for software interchange; or,
++
++    c) Accompany it with the information you received as to the offer
++    to distribute corresponding source code.  (This alternative is
++    allowed only for noncommercial distribution and only if you
++    received the program in object code or executable form with such
++    an offer, in accord with Subsection b above.)
++
++The source code for a work means the preferred form of the work for
++making modifications to it.  For an executable work, complete source
++code means all the source code for all modules it contains, plus any
++associated interface definition files, plus the scripts used to
++control compilation and installation of the executable.  However, as a
++special exception, the source code distributed need not include
++anything that is normally distributed (in either source or binary
++form) with the major components (compiler, kernel, and so on) of the
++operating system on which the executable runs, unless that component
++itself accompanies the executable.
++
++If distribution of executable or object code is made by offering
++access to copy from a designated place, then offering equivalent
++access to copy the source code from the same place counts as
++distribution of the source code, even though third parties are not
++compelled to copy the source along with the object code.
++
++  4. You may not copy, modify, sublicense, or distribute the Program
++except as expressly provided under this License.  Any attempt
++otherwise to copy, modify, sublicense or distribute the Program is
++void, and will automatically terminate your rights under this License.
++However, parties who have received copies, or rights, from you under
++this License will not have their licenses terminated so long as such
++parties remain in full compliance.
++
++  5. You are not required to accept this License, since you have not
++signed it.  However, nothing else grants you permission to modify or
++distribute the Program or its derivative works.  These actions are
++prohibited by law if you do not accept this License.  Therefore, by
++modifying or distributing the Program (or any work based on the
++Program), you indicate your acceptance of this License to do so, and
++all its terms and conditions for copying, distributing or modifying
++the Program or works based on it.
++
++  6. Each time you redistribute the Program (or any work based on the
++Program), the recipient automatically receives a license from the
++original licensor to copy, distribute or modify the Program subject to
++these terms and conditions.  You may not impose any further
++restrictions on the recipients' exercise of the rights granted herein.
++You are not responsible for enforcing compliance by third parties to
++this License.
++
++  7. If, as a consequence of a court judgment or allegation of patent
++infringement or for any other reason (not limited to patent issues),
++conditions are imposed on you (whether by court order, agreement or
++otherwise) that contradict the conditions of this License, they do not
++excuse you from the conditions of this License.  If you cannot
++distribute so as to satisfy simultaneously your obligations under this
++License and any other pertinent obligations, then as a consequence you
++may not distribute the Program at all.  For example, if a patent
++license would not permit royalty-free redistribution of the Program by
++all those who receive copies directly or indirectly through you, then
++the only way you could satisfy both it and this License would be to
++refrain entirely from distribution of the Program.
++
++If any portion of this section is held invalid or unenforceable under
++any particular circumstance, the balance of the section is intended to
++apply and the section as a whole is intended to apply in other
++circumstances.
++
++It is not the purpose of this section to induce you to infringe any
++patents or other property right claims or to contest validity of any
++such claims; this section has the sole purpose of protecting the
++integrity of the free software distribution system, which is
++implemented by public license practices.  Many people have made
++generous contributions to the wide range of software distributed
++through that system in reliance on consistent application of that
++system; it is up to the author/donor to decide if he or she is willing
++to distribute software through any other system and a licensee cannot
++impose that choice.
++
++This section is intended to make thoroughly clear what is believed to
++be a consequence of the rest of this License.
++
++  8. If the distribution and/or use of the Program is restricted in
++certain countries either by patents or by copyrighted interfaces, the
++original copyright holder who places the Program under this License
++may add an explicit geographical distribution limitation excluding
++those countries, so that distribution is permitted only in or among
++countries not thus excluded.  In such case, this License incorporates
++the limitation as if written in the body of this License.
++
++  9. The Free Software Foundation may publish revised and/or new versions
++of the General Public License from time to time.  Such new versions will
++be similar in spirit to the present version, but may differ in detail to
++address new problems or concerns.
++
++Each version is given a distinguishing version number.  If the Program
++specifies a version number of this License which applies to it and "any
++later version", you have the option of following the terms and conditions
++either of that version or of any later version published by the Free
++Software Foundation.  If the Program does not specify a version number of
++this License, you may choose any version ever published by the Free Software
++Foundation.
++
++  10. If you wish to incorporate parts of the Program into other free
++programs whose distribution conditions are different, write to the author
++to ask for permission.  For software which is copyrighted by the Free
++Software Foundation, write to the Free Software Foundation; we sometimes
++make exceptions for this.  Our decision will be guided by the two goals
++of preserving the free status of all derivatives of our free software and
++of promoting the sharing and reuse of software generally.
++
++			    NO WARRANTY
++
++  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
++FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
++OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
++PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
++OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
++TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
++PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
++REPAIR OR CORRECTION.
++
++  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
++WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
++REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
++INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
++OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
++TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
++YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
++PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
++POSSIBILITY OF SUCH DAMAGES.
++
++		     END OF TERMS AND CONDITIONS
++
++	    How to Apply These Terms to Your New Programs
++
++  If you develop a new program, and you want it to be of the greatest
++possible use to the public, the best way to achieve this is to make it
++free software which everyone can redistribute and change under these terms.
++
++  To do so, attach the following notices to the program.  It is safest
++to attach them to the start of each source file to most effectively
++convey the exclusion of warranty; and each file should have at least
++the "copyright" line and a pointer to where the full notice is found.
++
++    <one line to give the program's name and a brief idea of what it does.>
++    Copyright (C) <year>  <name of author>
++
++    This program is free software; you can redistribute it and/or modify
++    it under the terms of the GNU General Public License as published by
++    the Free Software Foundation; either version 2 of the License, or
++    (at your option) any later version.
++
++    This program is distributed in the hope that it will be useful,
++    but WITHOUT ANY WARRANTY; without even the implied warranty of
++    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++    GNU General Public License for more details.
++
++    You should have received a copy of the GNU General Public License
++    along with this program; if not, write to the Free Software
++    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++
++
++Also add information on how to contact you by electronic and paper mail.
++
++If the program is interactive, make it output a short notice like this
++when it starts in an interactive mode:
++
++    Gnomovision version 69, Copyright (C) year name of author
++    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
++    This is free software, and you are welcome to redistribute it
++    under certain conditions; type `show c' for details.
++
++The hypothetical commands `show w' and `show c' should show the appropriate
++parts of the General Public License.  Of course, the commands you use may
++be called something other than `show w' and `show c'; they could even be
++mouse-clicks or menu items--whatever suits your program.
++
++You should also get your employer (if you work as a programmer) or your
++school, if any, to sign a "copyright disclaimer" for the program, if
++necessary.  Here is a sample; alter the names:
++
++  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
++  `Gnomovision' (which makes passes at compilers) written by James Hacker.
++
++  <signature of Ty Coon>, 1 April 1989
++  Ty Coon, President of Vice
++
++This General Public License does not permit incorporating your program into
++proprietary programs.  If your program is a subroutine library, you may
++consider it more useful to permit linking proprietary applications with the
++library.  If this is what you want to do, use the GNU Library General
++Public License instead of this License.
+diff -upr linux-2.6.16.orig/Documentation/dvb/get_dvb_firmware linux-2.6.16-026test015/Documentation/dvb/get_dvb_firmware
+--- linux-2.6.16.orig/Documentation/dvb/get_dvb_firmware	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/Documentation/dvb/get_dvb_firmware	2006-07-04 14:41:36.000000000 +0400
+@@ -240,9 +240,9 @@ sub dibusb {
+ }
+ 
+ sub nxt2002 {
+-    my $sourcefile = "Broadband4PC_4_2_11.zip";
++    my $sourcefile = "Technisat_DVB-PC_4_4_COMPACT.zip";
+     my $url = "http://www.bbti.us/download/windows/$sourcefile";
+-    my $hash = "c6d2ea47a8f456d887ada0cfb718ff2a";
++    my $hash = "476befae8c7c1bb9648954060b1eec1f";
+     my $outfile = "dvb-fe-nxt2002.fw";
+     my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
+ 
+@@ -250,8 +250,8 @@ sub nxt2002 {
+ 
+     wgetfile($sourcefile, $url);
+     unzip($sourcefile, $tmpdir);
+-    verify("$tmpdir/SkyNETU.sys", $hash);
+-    extract("$tmpdir/SkyNETU.sys", 375832, 5908, $outfile);
++    verify("$tmpdir/SkyNET.sys", $hash);
++    extract("$tmpdir/SkyNET.sys", 331624, 5908, $outfile);
+ 
+     $outfile;
+ }
+diff -upr linux-2.6.16.orig/Documentation/vsched.txt linux-2.6.16-026test015/Documentation/vsched.txt
+--- linux-2.6.16.orig/Documentation/vsched.txt	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/Documentation/vsched.txt	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,83 @@
++Copyright (C) 2005 SWsoft. All rights reserved.
++Licensing governed by "linux/COPYING.SWsoft" file.
++
++Hierarchical CPU schedulers
++~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++Hierarchical CPU scheduler is a stack of CPU schedulers which allows
++to organize different policies of scheduling in the system and/or between
++groups of processes.
++
++Virtuozzo uses a hierarchical Fair CPU scheduler organized as a 2-stage
++CPU scheduler, where the scheduling decisions are made in 2 steps:
++1. On the first step Fair CPU scheduler selects a group of processes
++  which should get some CPU time.
++2. Then standard Linux scheduler chooses a process inside the group.
++Such scheduler efficiently allows to isolate one group of processes
++from another and still allows a group to use more than 1 CPU on SMP systems.
++
++This document describes a new middle layer of Virtuozzo hierarchical CPU
++scheduler which makes decisions after Fair scheduler, but before Linux
++scheduler and which is called VCPU scheduler.
++
++
++Where VCPU scheduler comes from?
++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++Existing hierarchical CPU scheduler uses isolated algorithms on each stage
++of decision making, i.e. every scheduler makes its decisions without
++taking into account the details of other schedulers. This can lead to a number
++of problems described below.
++
++On SMP systems there are possible situations when the first CPU scheduler
++in the hierarchy (e.g. Fair scheduler) wants to schedule some group of
++processes on the physical CPU, but the underlying process scheduler
++(e.g. Linux O(1) CPU scheduler) is unable to schedule any processes
++on this physical CPU. Usually this happens due to the fact that Linux
++kernel scheduler uses per-physical CPU runqueues.
++
++Another problem is that Linux scheduler also knows nothing about
++Fair scheduler and can't balance efficiently without taking into account
++statistics about process groups from Fair scheduler. Without such
++statistics Linux scheduler can concentrate all processes on one physical
++CPU, thus making CPU consuming highly inefficient.
++
++VCPU scheduler solves these problems by adding a new layer between
++Fair schedule and Linux scheduler.
++
++VCPU scheduler
++~~~~~~~~~~~~~~
++
++VCPU scheduler is a CPU scheduler which splits notion of
++physical and virtual CPUs (VCPU and PCPU). This means that tasks are
++running on virtual CPU runqueues, while VCPUs are running on PCPUs.
++
++The Virtuozzo hierarchical fair scheduler becomes 3 stage CPU scheduler:
++1. First, Fair CPU scheduler select a group of processes.
++2. Then VCPU scheduler select a virtual CPU to run (this is actually
++  a runqueue).
++3. Standard Linux scheduler chooses a process from the runqueue.
++
++For example on the picture below PCPU0 executes tasks from
++VCPU1 runqueue and PCPU1 is idle:
++
++   virtual          |         physical       |          virtual
++  idle CPUs         |           CPUs         |           CPUS
++--------------------|------------------------|--------------------------
++                    |                        |     -----------------
++                    |                        |    | virtual sched X |
++                    |                        |    |   -----------   |
++                    |                        |    |  |   VCPU0   |  |
++                    |                        |    |   -----------   |
++ ------------       |        -----------          |   -----------   |
++| idle VCPU0 |      |       |   PCPU0   |  <--->  |  |   VCPU1   |  |
++ ------------       |        -----------          |   -----------   |
++                    |                        |     -----------------
++                    |                        |
++                    |                        |     -----------------
++                    |                        |    | virtual sched Y |
++ ------------                -----------     |    |   -----------   |
++| idle VCPU1 |    <--->     |   PCPU1   |    |    |  |   VCPU0   |  |
++ ------------                -----------     |    |   -----------   |
++                    |                        |     -----------------
++                    |                        |
+diff -upr linux-2.6.16.orig/Makefile linux-2.6.16-026test015/Makefile
+--- linux-2.6.16.orig/Makefile	2006-07-04 14:41:39.000000000 +0400
++++ linux-2.6.16-026test015/Makefile	2006-07-04 14:41:39.000000000 +0400
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 16
+-EXTRAVERSION =
++EXTRAVERSION = -026test015
+ NAME=Sliding Snow Leopard
+ 
+ # *DOCUMENTATION*
+diff -upr linux-2.6.16.orig/arch/alpha/kernel/setup.c linux-2.6.16-026test015/arch/alpha/kernel/setup.c
+--- linux-2.6.16.orig/arch/alpha/kernel/setup.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/alpha/kernel/setup.c	2006-07-04 14:41:36.000000000 +0400
+@@ -24,6 +24,7 @@
+ #include <linux/config.h>	/* CONFIG_ALPHA_LCA etc */
+ #include <linux/mc146818rtc.h>
+ #include <linux/console.h>
++#include <linux/cpu.h>
+ #include <linux/errno.h>
+ #include <linux/init.h>
+ #include <linux/string.h>
+@@ -477,6 +478,22 @@ page_is_ram(unsigned long pfn)
+ #undef PFN_PHYS
+ #undef PFN_MAX
+ 
++static int __init
++register_cpus(void)
++{
++	int i;
++
++	for_each_possible_cpu(i) {
++		struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL);
++		if (!p)
++			return -ENOMEM;
++		register_cpu(p, i, NULL);
++	}
++	return 0;
++}
++
++arch_initcall(register_cpus);
++
+ void __init
+ setup_arch(char **cmdline_p)
+ {
+diff -upr linux-2.6.16.orig/arch/alpha/kernel/smp.c linux-2.6.16-026test015/arch/alpha/kernel/smp.c
+--- linux-2.6.16.orig/arch/alpha/kernel/smp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/alpha/kernel/smp.c	2006-07-04 14:41:36.000000000 +0400
+@@ -439,7 +439,7 @@ setup_smp(void)
+ 			if ((cpu->flags & 0x1cc) == 0x1cc) {
+ 				smp_num_probed++;
+ 				/* Assume here that "whami" == index */
+-				cpu_set(i, cpu_possible_map);
++				cpu_set(i, cpu_present_mask);
+ 				cpu->pal_revision = boot_cpu_palrev;
+ 			}
+ 
+@@ -450,9 +450,8 @@ setup_smp(void)
+ 		}
+ 	} else {
+ 		smp_num_probed = 1;
+-		cpu_set(boot_cpuid, cpu_possible_map);
++		cpu_set(boot_cpuid, cpu_present_mask);
+ 	}
+-	cpu_present_mask = cpumask_of_cpu(boot_cpuid);
+ 
+ 	printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n",
+ 	       smp_num_probed, cpu_possible_map.bits[0]);
+@@ -488,9 +487,8 @@ void __devinit
+ smp_prepare_boot_cpu(void)
+ {
+ 	/*
+-	 * Mark the boot cpu (current cpu) as both present and online
++	 * Mark the boot cpu (current cpu) as online
+ 	 */ 
+-	cpu_set(smp_processor_id(), cpu_present_mask);
+ 	cpu_set(smp_processor_id(), cpu_online_map);
+ }
+ 
+diff -upr linux-2.6.16.orig/arch/alpha/lib/strncpy.S linux-2.6.16-026test015/arch/alpha/lib/strncpy.S
+--- linux-2.6.16.orig/arch/alpha/lib/strncpy.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/alpha/lib/strncpy.S	2006-07-04 14:41:36.000000000 +0400
+@@ -43,8 +43,8 @@ strncpy:
+ 
+ 	.align	4
+ $multiword:
+-	subq	$24, 1, $2	# clear the final bits in the prev word
+-	or	$2, $24, $2
++	subq	$27, 1, $2	# clear the final bits in the prev word
++	or	$2, $27, $2
+ 	zapnot	$1, $2, $1
+ 	subq	$18, 1, $18
+ 
+@@ -70,8 +70,8 @@ $multiword:
+ 	bne	$18, 0b
+ 
+ 1:	ldq_u	$1, 0($16)	# clear the leading bits in the final word
+-	subq	$27, 1, $2
+-	or	$2, $27, $2
++	subq	$24, 1, $2
++	or	$2, $24, $2
+ 
+ 	zap	$1, $2, $1
+ 	stq_u	$1, 0($16)
+diff -upr linux-2.6.16.orig/arch/arm/kernel/smp.c linux-2.6.16-026test015/arch/arm/kernel/smp.c
+--- linux-2.6.16.orig/arch/arm/kernel/smp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/arm/kernel/smp.c	2006-07-04 14:41:38.000000000 +0400
+@@ -197,7 +197,7 @@ int __cpuexit __cpu_disable(void)
+ 	local_flush_tlb_all();
+ 
+ 	read_lock(&tasklist_lock);
+-	for_each_process(p) {
++	for_each_process_all(p) {
+ 		if (p->mm)
+ 			cpu_clear(cpu, p->mm->cpu_vm_mask);
+ 	}
+diff -upr linux-2.6.16.orig/arch/frv/mm/mmu-context.c linux-2.6.16-026test015/arch/frv/mm/mmu-context.c
+--- linux-2.6.16.orig/arch/frv/mm/mmu-context.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/frv/mm/mmu-context.c	2006-07-04 14:41:38.000000000 +0400
+@@ -181,7 +181,7 @@ int cxn_pin_by_pid(pid_t pid)
+ 
+ 	/* get a handle on the mm_struct */
+ 	read_lock(&tasklist_lock);
+-	tsk = find_task_by_pid(pid);
++	tsk = find_task_by_pid_ve(pid);
+ 	if (tsk) {
+ 		ret = -EINVAL;
+ 
+diff -upr linux-2.6.16.orig/arch/i386/Kconfig linux-2.6.16-026test015/arch/i386/Kconfig
+--- linux-2.6.16.orig/arch/i386/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/Kconfig	2006-07-04 14:41:39.000000000 +0400
+@@ -216,6 +216,8 @@ config NR_CPUS
+ 	  This is purely to save memory - each supported CPU adds
+ 	  approximately eight kilobytes to the kernel image.
+ 
++source "kernel/Kconfig.fairsched"
++
+ config SCHED_SMT
+ 	bool "SMT (Hyperthreading) scheduler support"
+ 	depends on SMP
+@@ -268,6 +270,14 @@ config X86_VISWS_APIC
+ 	depends on X86_VISWS
+ 	default y
+ 
++config NMI_WATCHDOG
++	bool "NMI Watchdog"
++	default y
++	help
++	  If you say Y here the kernel will activate NMI watchdog by default
++	  on boot. You can still activate NMI watchdog via nmi_watchdog
++	  command line option even if you say N here.
++
+ config X86_MCE
+ 	bool "Machine Check Exception"
+ 	depends on !X86_VOYAGER
+@@ -1071,12 +1081,16 @@ endmenu
+ 
+ source "arch/i386/Kconfig.debug"
+ 
++source "kernel/Kconfig.openvz"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
+ 
++source "kernel/ub/Kconfig"
++
+ #
+ # Use the generic interrupt handling code in kernel/irq/:
+ #
+diff -upr linux-2.6.16.orig/arch/i386/kernel/apic.c linux-2.6.16-026test015/arch/i386/kernel/apic.c
+--- linux-2.6.16.orig/arch/i386/kernel/apic.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/apic.c	2006-07-04 14:41:38.000000000 +0400
+@@ -1177,6 +1177,7 @@ inline void smp_local_timer_interrupt(st
+ fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
+ {
+ 	int cpu = smp_processor_id();
++	struct ve_struct *ve;
+ 
+ 	/*
+ 	 * the NMI deadlock-detector uses this.
+@@ -1193,9 +1194,11 @@ fastcall void smp_apic_timer_interrupt(s
+ 	 * Besides, if we don't timer interrupts ignore the global
+ 	 * interrupt lock, which is the WrongThing (tm) to do.
+ 	 */
++	ve = set_exec_env(get_ve0());
+ 	irq_enter();
+ 	smp_local_timer_interrupt(regs);
+ 	irq_exit();
++	(void)set_exec_env(ve);
+ }
+ 
+ #ifndef CONFIG_SMP
+diff -upr linux-2.6.16.orig/arch/i386/kernel/apm.c linux-2.6.16-026test015/arch/i386/kernel/apm.c
+--- linux-2.6.16.orig/arch/i386/kernel/apm.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/apm.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1081,7 +1081,7 @@ static int apm_console_blank(int blank)
+ 			break;
+ 	}
+ 
+-	if (error == APM_NOT_ENGAGED && state != APM_STATE_READY) {
++	if (error == APM_NOT_ENGAGED) {
+ 		static int tried;
+ 		int eng_error;
+ 		if (tried++ == 0) {
+diff -upr linux-2.6.16.orig/arch/i386/kernel/cpu/amd.c linux-2.6.16-026test015/arch/i386/kernel/cpu/amd.c
+--- linux-2.6.16.orig/arch/i386/kernel/cpu/amd.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/cpu/amd.c	2006-07-04 14:41:36.000000000 +0400
+@@ -207,6 +207,8 @@ static void __init init_amd(struct cpuin
+ 		set_bit(X86_FEATURE_K7, c->x86_capability); 
+ 		break;
+ 	}
++	if (c->x86 >= 6)
++		set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability);
+ 
+ 	display_cacheinfo(c);
+ 
+diff -upr linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/Kconfig linux-2.6.16-026test015/arch/i386/kernel/cpu/cpufreq/Kconfig
+--- linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/cpu/cpufreq/Kconfig	2006-07-04 14:41:36.000000000 +0400
+@@ -203,6 +203,7 @@ config X86_LONGRUN
+ config X86_LONGHAUL
+ 	tristate "VIA Cyrix III Longhaul"
+ 	select CPU_FREQ_TABLE
++	depends on BROKEN
+ 	help
+ 	  This adds the CPUFreq driver for VIA Samuel/CyrixIII, 
+ 	  VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T 
+diff -upr linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c linux-2.6.16-026test015/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
+--- linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c	2006-07-04 14:41:36.000000000 +0400
+@@ -244,7 +244,7 @@ static int cpufreq_p4_cpu_init(struct cp
+ 	for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ 		if ((i<2) && (has_N44_O17_errata[policy->cpu]))
+ 			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+-		else if (has_N60_errata[policy->cpu] && p4clockmod_table[i].frequency < 2000000)
++		else if (has_N60_errata[policy->cpu] && ((stock_freq * i)/8) < 2000000)
+ 			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ 		else
+ 			p4clockmod_table[i].frequency = (stock_freq * i)/8;
+diff -upr linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c linux-2.6.16-026test015/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c
+--- linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c	2006-07-04 14:41:36.000000000 +0400
+@@ -75,7 +75,9 @@ static int speedstep_smi_ownership (void
+ 	__asm__ __volatile__(
+ 		"out %%al, (%%dx)\n"
+ 		: "=D" (result)
+-		: "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic)
++		: "a" (command), "b" (function), "c" (0), "d" (smi_port),
++			"D" (0), "S" (magic)
++		: "memory"
+ 	);
+ 
+ 	dprintk("result is %x\n", result);
+diff -upr linux-2.6.16.orig/arch/i386/kernel/cpu/mtrr/if.c linux-2.6.16-026test015/arch/i386/kernel/cpu/mtrr/if.c
+--- linux-2.6.16.orig/arch/i386/kernel/cpu/mtrr/if.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/cpu/mtrr/if.c	2006-07-04 14:41:38.000000000 +0400
+@@ -392,7 +392,7 @@ static int __init mtrr_if_init(void)
+ 		return -ENODEV;
+ 
+ 	proc_root_mtrr =
+-	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
++	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL);
+ 	if (proc_root_mtrr) {
+ 		proc_root_mtrr->owner = THIS_MODULE;
+ 		proc_root_mtrr->proc_fops = &mtrr_fops;
+diff -upr linux-2.6.16.orig/arch/i386/kernel/dmi_scan.c linux-2.6.16-026test015/arch/i386/kernel/dmi_scan.c
+--- linux-2.6.16.orig/arch/i386/kernel/dmi_scan.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/dmi_scan.c	2006-07-04 14:41:36.000000000 +0400
+@@ -106,7 +106,7 @@ static void __init dmi_save_devices(stru
+ 	struct dmi_device *dev;
+ 
+ 	for (i = 0; i < count; i++) {
+-		char *d = ((char *) dm) + (i * 2);
++		char *d = (char *)(dm + 1) + (i * 2);
+ 
+ 		/* Skip disabled device */
+ 		if ((*d & 0x80) == 0)
+diff -upr linux-2.6.16.orig/arch/i386/kernel/irq.c linux-2.6.16-026test015/arch/i386/kernel/irq.c
+--- linux-2.6.16.orig/arch/i386/kernel/irq.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/irq.c	2006-07-04 14:41:38.000000000 +0400
+@@ -59,7 +59,9 @@ fastcall unsigned int do_IRQ(struct pt_r
+ 	union irq_ctx *curctx, *irqctx;
+ 	u32 *isp;
+ #endif
++	struct ve_struct *ve;
+ 
++	ve = set_exec_env(get_ve0());
+ 	irq_enter();
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+ 	/* Debugging check for stack overflow: is there less than 1KB free? */
+@@ -108,6 +110,7 @@ fastcall unsigned int do_IRQ(struct pt_r
+ 		__do_IRQ(irq, regs);
+ 
+ 	irq_exit();
++	(void)set_exec_env(ve);
+ 
+ 	return 1;
+ }
+diff -upr linux-2.6.16.orig/arch/i386/kernel/ldt.c linux-2.6.16-026test015/arch/i386/kernel/ldt.c
+--- linux-2.6.16.orig/arch/i386/kernel/ldt.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/ldt.c	2006-07-04 14:41:39.000000000 +0400
+@@ -13,6 +13,7 @@
+ #include <linux/smp_lock.h>
+ #include <linux/vmalloc.h>
+ #include <linux/slab.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -20,6 +21,8 @@
+ #include <asm/desc.h>
+ #include <asm/mmu_context.h>
+ 
++#include <ub/ub_mem.h>
++
+ #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+ static void flush_ldt(void *null)
+ {
+@@ -39,9 +42,9 @@ static int alloc_ldt(mm_context_t *pc, i
+ 	oldsize = pc->size;
+ 	mincount = (mincount+511)&(~511);
+ 	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
++		newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
+ 	else
+-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
++		newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+ 
+ 	if (!newldt)
+ 		return -ENOMEM;
+@@ -105,6 +108,7 @@ int init_new_context(struct task_struct 
+ 	}
+ 	return retval;
+ }
++EXPORT_SYMBOL_GPL(init_new_context);
+ 
+ /*
+  * No need to lock the MM as we are the last user
+@@ -251,3 +255,5 @@ asmlinkage int sys_modify_ldt(int func, 
+ 	}
+ 	return ret;
+ }
++
++EXPORT_SYMBOL_GPL(default_ldt);
+diff -upr linux-2.6.16.orig/arch/i386/kernel/nmi.c linux-2.6.16-026test015/arch/i386/kernel/nmi.c
+--- linux-2.6.16.orig/arch/i386/kernel/nmi.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/nmi.c	2006-07-04 14:41:37.000000000 +0400
+@@ -32,7 +32,13 @@
+ 
+ #include "mach_traps.h"
+ 
+-unsigned int nmi_watchdog = NMI_NONE;
++#ifdef CONFIG_NMI_WATCHDOG
++#define NMI_DEFAULT NMI_IO_APIC
++#else
++#define NMI_DEFAULT NMI_NONE
++#endif
++
++unsigned int nmi_watchdog = NMI_DEFAULT;
+ extern int unknown_nmi_panic;
+ static unsigned int nmi_hz = HZ;
+ static unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
+@@ -521,7 +527,22 @@ void touch_nmi_watchdog (void)
+ 
+ extern void die_nmi(struct pt_regs *, const char *msg);
+ 
+-void nmi_watchdog_tick (struct pt_regs * regs)
++void smp_show_regs(struct pt_regs *regs, void *info)
++{
++	static DEFINE_SPINLOCK(show_regs_lock);
++
++	if (regs == NULL)
++		return;
++
++	bust_spinlocks(1);
++	spin_lock(&show_regs_lock);
++	printk("----------- IPI show regs -----------");
++	show_regs(regs);
++	spin_unlock(&show_regs_lock);
++	bust_spinlocks(0);
++}
++
++void nmi_watchdog_tick(struct pt_regs *regs)
+ {
+ 
+ 	/*
+diff -upr linux-2.6.16.orig/arch/i386/kernel/process.c linux-2.6.16-026test015/arch/i386/kernel/process.c
+--- linux-2.6.16.orig/arch/i386/kernel/process.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/process.c	2006-07-04 14:41:39.000000000 +0400
+@@ -59,6 +59,7 @@
+ #include <asm/cpu.h>
+ 
+ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
++EXPORT_SYMBOL_GPL(ret_from_fork);
+ 
+ static int hlt_counter;
+ 
+@@ -289,11 +290,15 @@ __setup("idle=", idle_setup);
+ void show_regs(struct pt_regs * regs)
+ {
+ 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
++	extern int die_counter;
+ 
+ 	printk("\n");
+-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+-	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
+-	print_symbol("EIP is at %s\n", regs->eip);
++	printk("Pid: %d, comm: %20s, oopses: %d\n",
++			current->pid, current->comm, die_counter);
++	printk("EIP: %04x:[<%08lx>] CPU: %d, VCPU: %d:%d\n",0xffff & regs->xcs,regs->eip, smp_processor_id(),
++			task_vsched_id(current), task_cpu(current));
++	if (decode_call_traces)
++		print_symbol("EIP is at %s\n", regs->eip);
+ 
+ 	if (user_mode(regs))
+ 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+@@ -314,6 +319,8 @@ void show_regs(struct pt_regs * regs)
+ 	cr4 = read_cr4_safe();
+ 	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+ 	show_trace(NULL, &regs->esp);
++	if (!decode_call_traces)
++		printk(" EIP: [<%08lx>]\n",regs->eip);
+ }
+ 
+ /*
+@@ -339,6 +346,13 @@ int kernel_thread(int (*fn)(void *), voi
+ {
+ 	struct pt_regs regs;
+ 
++	/* Don't allow kernel_thread() inside VE */
++	if (!ve_is_super(get_exec_env())) {
++		printk("kernel_thread call inside VE\n");
++		dump_stack();
++		return -EPERM;
++	}
++
+ 	memset(&regs, 0, sizeof(regs));
+ 
+ 	regs.ebx = (unsigned long) fn;
+diff -upr linux-2.6.16.orig/arch/i386/kernel/ptrace.c linux-2.6.16-026test015/arch/i386/kernel/ptrace.c
+--- linux-2.6.16.orig/arch/i386/kernel/ptrace.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/ptrace.c	2006-07-04 14:41:39.000000000 +0400
+@@ -706,7 +706,9 @@ int do_syscall_trace(struct pt_regs *reg
+ 	/* the 0x80 provides a way for the tracing parent to distinguish
+ 	   between a syscall stop and SIGTRAP delivery */
+ 	/* Note that the debugger could change the result of test_thread_flag!*/
++	set_pn_state(current, entryexit ? PN_STOP_LEAVE : PN_STOP_ENTRY);
+ 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
++	clear_pn_state(current);
+ 
+ 	/*
+ 	 * this isn't the same as continuing with a signal, but it will do
+diff -upr linux-2.6.16.orig/arch/i386/kernel/signal.c linux-2.6.16-026test015/arch/i386/kernel/signal.c
+--- linux-2.6.16.orig/arch/i386/kernel/signal.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/signal.c	2006-07-04 14:41:36.000000000 +0400
+@@ -582,7 +582,7 @@ static void fastcall do_signal(struct pt
+ 	if (!user_mode(regs))
+ 		return;
+ 
+-	if (try_to_freeze())
++	if (try_to_freeze() && !signal_pending(current))
+ 		goto no_signal;
+ 
+ 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+diff -upr linux-2.6.16.orig/arch/i386/kernel/smp.c linux-2.6.16-026test015/arch/i386/kernel/smp.c
+--- linux-2.6.16.orig/arch/i386/kernel/smp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/smp.c	2006-07-04 14:41:37.000000000 +0400
+@@ -21,6 +21,7 @@
+ #include <linux/cpu.h>
+ #include <linux/module.h>
+ 
++#include <asm/nmi.h>
+ #include <asm/mtrr.h>
+ #include <asm/tlbflush.h>
+ #include <mach_apic.h>
+@@ -566,6 +567,89 @@ int smp_call_function (void (*func) (voi
+ }
+ EXPORT_SYMBOL(smp_call_function);
+ 
++static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
++static struct nmi_call_data_struct {
++	smp_nmi_function func;
++	void *info;
++	atomic_t started;
++	atomic_t finished;
++	cpumask_t cpus_called;
++	int wait;
++} *nmi_call_data;
++
++static int smp_nmi_callback(struct pt_regs * regs, int cpu)
++{
++	smp_nmi_function func;
++	void *info;
++	int wait;
++
++	func = nmi_call_data->func;
++	info = nmi_call_data->info;
++	wait = nmi_call_data->wait;
++	ack_APIC_irq();
++	/* prevent from calling func() multiple times */
++	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
++		return 0;
++	/*
++	 * notify initiating CPU that I've grabbed the data and am
++	 * about to execute the function
++	 */
++	mb();
++	atomic_inc(&nmi_call_data->started);
++	/* at this point the nmi_call_data structure is out of scope */
++	irq_enter();
++	func(regs, info);
++	irq_exit();
++	if (wait)
++		atomic_inc(&nmi_call_data->finished);
++
++	return 0;
++}
++
++/*
++ * This function tries to call func(regs, info) on each cpu.
++ * Func must be fast and non-blocking.
++ * May be called with disabled interrupts and from any context.
++ */
++int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
++{
++	struct nmi_call_data_struct data;
++	int cpus;
++
++	cpus = num_online_cpus() - 1;
++	if (!cpus)
++		return 0;
++
++	data.func = func;
++	data.info = info;
++	data.wait = wait;
++	atomic_set(&data.started, 0);
++	atomic_set(&data.finished, 0);
++	cpus_clear(data.cpus_called);
++	/* prevent this cpu from calling func if NMI happens */
++	cpu_set(smp_processor_id(), data.cpus_called);
++
++	if (!spin_trylock(&nmi_call_lock))
++		return -1;
++
++	nmi_call_data = &data;
++	set_nmi_ipi_callback(smp_nmi_callback);
++	mb();
++
++	/* Send a message to all other CPUs and wait for them to respond */
++	send_IPI_allbutself(APIC_DM_NMI);
++	while (atomic_read(&data.started) != cpus)
++		barrier();
++
++	unset_nmi_ipi_callback();
++	if (wait)
++		while (atomic_read(&data.finished) != cpus)
++			barrier();
++	spin_unlock(&nmi_call_lock);
++
++	return 0;
++}
++
+ static void stop_this_cpu (void * dummy)
+ {
+ 	/*
+diff -upr linux-2.6.16.orig/arch/i386/kernel/smpboot.c linux-2.6.16-026test015/arch/i386/kernel/smpboot.c
+--- linux-2.6.16.orig/arch/i386/kernel/smpboot.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/smpboot.c	2006-07-04 14:41:38.000000000 +0400
+@@ -317,6 +317,10 @@ static void __init synchronize_tsc_bp (v
+ 	}
+ 	if (!buggy)
+ 		printk("passed.\n");
++#ifdef CONFIG_VE
++	/* TSC reset. kill whatever might rely on old values */
++	VE_TASK_INFO(current)->wakeup_stamp = 0;
++#endif
+ }
+ 
+ static void __init synchronize_tsc_ap (void)
+@@ -342,6 +346,10 @@ static void __init synchronize_tsc_ap (v
+ 		atomic_inc(&tsc_count_stop);
+ 		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ 	}
++#ifdef CONFIG_VE
++	/* TSC reset. kill whatever might rely on old values */
++	VE_TASK_INFO(current)->wakeup_stamp = 0;
++#endif
+ }
+ #undef NR_LOOPS
+ 
+@@ -908,6 +916,13 @@ static int __devinit do_boot_cpu(int api
+ 	if (IS_ERR(idle))
+ 		panic("failed fork for CPU %d", cpu);
+ 	idle->thread.eip = (unsigned long) start_secondary;
++
++#ifdef CONFIG_VE
++	/* Cosmetic: sleep_time won't be changed afterwards for the idle
++	* thread;  keep it 0 rather than -cycles. */
++	VE_TASK_INFO(idle)->sleep_time = 0;
++#endif
++
+ 	/* start_eip had better be page-aligned! */
+ 	start_eip = setup_trampoline();
+ 
+diff -upr linux-2.6.16.orig/arch/i386/kernel/sys_i386.c linux-2.6.16-026test015/arch/i386/kernel/sys_i386.c
+--- linux-2.6.16.orig/arch/i386/kernel/sys_i386.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/sys_i386.c	2006-07-04 14:41:38.000000000 +0400
+@@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsn
+ 	if (!name)
+ 		return -EFAULT;
+ 	down_read(&uts_sem);
+-	err=copy_to_user(name, &system_utsname, sizeof (*name));
++	err=copy_to_user(name, &ve_utsname, sizeof (*name));
+ 	up_read(&uts_sem);
+ 	return err?-EFAULT:0;
+ }
+@@ -233,15 +233,15 @@ asmlinkage int sys_olduname(struct oldol
+   
+   	down_read(&uts_sem);
+ 	
+-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
++	error = __copy_to_user(name->sysname,ve_utsname.sysname,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
++	error |= __copy_to_user(name->nodename,ve_utsname.nodename,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
++	error |= __copy_to_user(name->release,ve_utsname.release,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->release+__OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
++	error |= __copy_to_user(name->version,ve_utsname.version,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->version+__OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
++	error |= __copy_to_user(name->machine,ve_utsname.machine,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+ 	
+ 	up_read(&uts_sem);
+diff -upr linux-2.6.16.orig/arch/i386/kernel/syscall_table.S linux-2.6.16-026test015/arch/i386/kernel/syscall_table.S
+--- linux-2.6.16.orig/arch/i386/kernel/syscall_table.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/syscall_table.S	2006-07-04 14:41:39.000000000 +0400
+@@ -310,3 +310,21 @@ ENTRY(sys_call_table)
+ 	.long sys_pselect6
+ 	.long sys_ppoll
+ 	.long sys_unshare		/* 310 */
++
++	.rept 500-(.-sys_call_table)/4
++	.long sys_ni_syscall
++	.endr
++	.long sys_fairsched_mknod	/* 500 */
++	.long sys_fairsched_rmnod
++	.long sys_fairsched_chwt
++	.long sys_fairsched_mvpr
++	.long sys_fairsched_rate
++
++	.rept 510-(.-sys_call_table)/4
++	.long sys_ni_syscall
++	.endr
++
++	.long sys_getluid		/* 510 */
++	.long sys_setluid
++	.long sys_setublimit
++	.long sys_ubstat
+diff -upr linux-2.6.16.orig/arch/i386/kernel/timers/timer_tsc.c linux-2.6.16-026test015/arch/i386/kernel/timers/timer_tsc.c
+--- linux-2.6.16.orig/arch/i386/kernel/timers/timer_tsc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/timers/timer_tsc.c	2006-07-04 14:41:38.000000000 +0400
+@@ -94,7 +94,7 @@ static int count2; /* counter for mark_o
+  * Equal to 2^32 * (1 / (clocks per usec) ).
+  * Initialized in time_init.
+  */
+-static unsigned long fast_gettimeoffset_quotient;
++unsigned long fast_gettimeoffset_quotient;
+ 
+ static unsigned long get_offset_tsc(void)
+ {
+diff -upr linux-2.6.16.orig/arch/i386/kernel/traps.c linux-2.6.16-026test015/arch/i386/kernel/traps.c
+--- linux-2.6.16.orig/arch/i386/kernel/traps.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/traps.c	2006-07-04 14:41:39.000000000 +0400
+@@ -116,8 +116,10 @@ static void print_addr_and_symbol(unsign
+ {
+ 	printk(log_lvl);
+ 	printk(" [<%08lx>] ", addr);
+-	print_symbol("%s", addr);
+-	printk("\n");
++	if (decode_call_traces) {
++		print_symbol("%s", addr);
++		printk("\n");
++	}
+ }
+ 
+ static inline unsigned long print_context_stack(struct thread_info *tinfo,
+@@ -167,7 +169,10 @@ static void show_trace_log_lvl(struct ta
+ 		if (!stack)
+ 			break;
+ 		printk(log_lvl);
+-		printk(" =======================\n");
++		if (decode_call_traces)
++			printk(" =======================\n");
++		else
++			printk(" =<ctx>= ");
+ 	}
+ }
+ 
+@@ -203,8 +208,13 @@ static void show_stack_log_lvl(struct ta
+ 	}
+ 	printk("\n");
+ 	printk(log_lvl);
+-	printk("Call Trace:\n");
++	if (decode_call_traces)
++		printk("Call Trace:\n");
++	else
++		printk("Call Trace: ");
+ 	show_trace_log_lvl(task, esp, log_lvl);
++	if (!decode_call_traces)
++		printk("\n");
+ }
+ 
+ void show_stack(struct task_struct *task, unsigned long *esp)
+@@ -220,6 +230,8 @@ void dump_stack(void)
+ 	unsigned long stack;
+ 
+ 	show_trace(current, &stack);
++	if (!decode_call_traces)
++		printk("\n");
+ }
+ 
+ EXPORT_SYMBOL(dump_stack);
+@@ -239,9 +251,10 @@ void show_registers(struct pt_regs *regs
+ 		ss = regs->xss & 0xffff;
+ 	}
+ 	print_modules();
+-	printk(KERN_EMERG "CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
++	printk(KERN_EMERG "CPU:    %d, VCPU: %d:%d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
+ 			"EFLAGS: %08lx   (%s %.*s) \n",
+-		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
++		smp_processor_id(), task_vsched_id(current), task_cpu(current),
++		0xffff & regs->xcs, regs->eip,
+ 		print_tainted(), regs->eflags, system_utsname.release,
+ 		(int)strcspn(system_utsname.version, " "),
+ 		system_utsname.version);
+@@ -252,8 +265,11 @@ void show_registers(struct pt_regs *regs
+ 		regs->esi, regs->edi, regs->ebp, esp);
+ 	printk(KERN_EMERG "ds: %04x   es: %04x   ss: %04x\n",
+ 		regs->xds & 0xffff, regs->xes & 0xffff, ss);
+-	printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)",
+-		current->comm, current->pid, current_thread_info(), current);
++	printk(KERN_EMERG "Process %s (pid: %d, veid=%d, threadinfo=%p task=%p)",
++		current->comm, current->pid,
++		VEID(VE_TASK_INFO(current)->owner_env),
++		current_thread_info(), current);
++
+ 	/*
+ 	 * When in-kernel, we also print out the stack and code at the
+ 	 * time of the fault..
+@@ -299,9 +315,9 @@ static void handle_BUG(struct pt_regs *r
+ 		goto no_bug;
+ 	if (ud2 != 0x0b0f)
+ 		goto no_bug;
+-	if (__get_user(line, (unsigned short __user *)(eip + 2)))
++	if (__get_user(line, (unsigned short __user *)(eip + 4)))
+ 		goto bug;
+-	if (__get_user(file, (char * __user *)(eip + 4)) ||
++	if (__get_user(file, (char * __user *)(eip + 7)) ||
+ 		(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
+ 		file = "<bad filename>";
+ 
+@@ -316,6 +332,15 @@ bug:
+ 	printk(KERN_EMERG "Kernel BUG\n");
+ }
+ 
++int die_counter = 0;
++
++static void inline check_kernel_csum_bug(void)
++{
++	if (kernel_text_csum_broken)
++		printk("Kernel code checksum mismatch detected %d times\n",
++				kernel_text_csum_broken);
++}
++
+ /* This is gone through when something in the kernel
+  * has done something bad and is about to be terminated.
+ */
+@@ -330,7 +355,6 @@ void die(const char * str, struct pt_reg
+ 		.lock_owner =		-1,
+ 		.lock_owner_depth =	0
+ 	};
+-	static int die_counter;
+ 	unsigned long flags;
+ 
+ 	if (die.lock_owner != raw_smp_processor_id()) {
+@@ -370,6 +394,7 @@ void die(const char * str, struct pt_reg
+   	} else
+ 		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
+ 
++	check_kernel_csum_bug();
+ 	bust_spinlocks(0);
+ 	die.lock_owner = -1;
+ 	spin_unlock_irqrestore(&die.lock, flags);
+@@ -597,12 +622,27 @@ static void unknown_nmi_error(unsigned c
+ 	printk("Do you have a strange power saving mode enabled?\n");
+ }
+ 
+-static DEFINE_SPINLOCK(nmi_print_lock);
++/*
++ * Voyager doesn't implement these
++ */
++void __attribute__((weak)) smp_show_regs(struct pt_regs *regs, void *info)
++{
++}
++
++#ifdef CONFIG_SMP
++int __attribute__((weak))
++smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
++{
++	return 0;
++}
++#endif
+ 
+ void die_nmi (struct pt_regs *regs, const char *msg)
+ {
++	static DEFINE_SPINLOCK(nmi_print_lock);
++
+ 	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
+-	    NOTIFY_STOP)
++			NOTIFY_STOP)
+ 		return;
+ 
+ 	spin_lock(&nmi_print_lock);
+@@ -615,7 +655,11 @@ void die_nmi (struct pt_regs *regs, cons
+ 	printk(" on CPU%d, eip %08lx, registers:\n",
+ 		smp_processor_id(), regs->eip);
+ 	show_registers(regs);
+-	printk(KERN_EMERG "console shuts up ...\n");
++	smp_nmi_call_function(smp_show_regs, NULL, 1);
++	bust_spinlocks(1);
++	/* current CPU messages should go bottom */
++	if (!decode_call_traces)
++		smp_show_regs(regs, NULL);
+ 	console_silent();
+ 	spin_unlock(&nmi_print_lock);
+ 	bust_spinlocks(0);
+@@ -631,6 +675,14 @@ void die_nmi (struct pt_regs *regs, cons
+ 	do_exit(SIGSEGV);
+ }
+ 
++static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
++{
++	return 0;
++}
++
++static nmi_callback_t nmi_callback = dummy_nmi_callback;
++static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
++
+ static void default_do_nmi(struct pt_regs * regs)
+ {
+ 	unsigned char reason = 0;
+@@ -653,6 +705,9 @@ static void default_do_nmi(struct pt_reg
+ 			return;
+ 		}
+ #endif
++		if (nmi_ipi_callback != dummy_nmi_callback)
++			return;
++
+ 		unknown_nmi_error(reason, regs);
+ 		return;
+ 	}
+@@ -669,13 +724,6 @@ static void default_do_nmi(struct pt_reg
+ 	reassert_nmi();
+ }
+ 
+-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+-{
+-	return 0;
+-}
+- 
+-static nmi_callback_t nmi_callback = dummy_nmi_callback;
+- 
+ fastcall void do_nmi(struct pt_regs * regs, long error_code)
+ {
+ 	int cpu;
+@@ -689,9 +737,20 @@ fastcall void do_nmi(struct pt_regs * re
+ 	if (!rcu_dereference(nmi_callback)(regs, cpu))
+ 		default_do_nmi(regs);
+ 
++	nmi_ipi_callback(regs, cpu);
+ 	nmi_exit();
+ }
+ 
++void set_nmi_ipi_callback(nmi_callback_t callback)
++{
++	nmi_ipi_callback = callback;
++}
++
++void unset_nmi_ipi_callback(void)
++{
++	nmi_ipi_callback = dummy_nmi_callback;
++}
++
+ void set_nmi_callback(nmi_callback_t callback)
+ {
+ 	rcu_assign_pointer(nmi_callback, callback);
+diff -upr linux-2.6.16.orig/arch/i386/kernel/vm86.c linux-2.6.16-026test015/arch/i386/kernel/vm86.c
+--- linux-2.6.16.orig/arch/i386/kernel/vm86.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/kernel/vm86.c	2006-07-04 14:41:36.000000000 +0400
+@@ -43,6 +43,7 @@
+ #include <linux/smp_lock.h>
+ #include <linux/highmem.h>
+ #include <linux/ptrace.h>
++#include <linux/audit.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/io.h>
+@@ -252,6 +253,7 @@ out:
+ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
+ {
+ 	struct tss_struct *tss;
++	long eax;
+ /*
+  * make sure the vm86() system call doesn't try to do anything silly
+  */
+@@ -305,13 +307,19 @@ static void do_sys_vm86(struct kernel_vm
+ 	tsk->thread.screen_bitmap = info->screen_bitmap;
+ 	if (info->flags & VM86_SCREEN_BITMAP)
+ 		mark_screen_rdonly(tsk->mm);
++	__asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t");
++	__asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax));
++
++	/*call audit_syscall_exit since we do not exit via the normal paths */
++	if (unlikely(current->audit_context))
++		audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
++
+ 	__asm__ __volatile__(
+-		"xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
+ 		"movl %0,%%esp\n\t"
+ 		"movl %1,%%ebp\n\t"
+ 		"jmp resume_userspace"
+ 		: /* no outputs */
+-		:"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax");
++		:"r" (&info->regs), "r" (task_thread_info(tsk)));
+ 	/* we never return here */
+ }
+ 
+diff -upr linux-2.6.16.orig/arch/i386/mm/fault.c linux-2.6.16-026test015/arch/i386/mm/fault.c
+--- linux-2.6.16.orig/arch/i386/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/mm/fault.c	2006-07-04 14:41:37.000000000 +0400
+@@ -31,32 +31,6 @@
+ extern void die(const char *,struct pt_regs *,long);
+ 
+ /*
+- * Unlock any spinlocks which will prevent us from getting the
+- * message out 
+- */
+-void bust_spinlocks(int yes)
+-{
+-	int loglevel_save = console_loglevel;
+-
+-	if (yes) {
+-		oops_in_progress = 1;
+-		return;
+-	}
+-#ifdef CONFIG_VT
+-	unblank_screen();
+-#endif
+-	oops_in_progress = 0;
+-	/*
+-	 * OK, the message is on the console.  Now we call printk()
+-	 * without oops_in_progress set so that printk will give klogd
+-	 * a poke.  Hold onto your hats...
+-	 */
+-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
+-	printk(" ");
+-	console_loglevel = loglevel_save;
+-}
+-
+-/*
+  * Return EIP plus the CS segment base.  The segment limit is also
+  * adjusted, clamped to the kernel/user address space (whichever is
+  * appropriate), and returned in *eip_limit.
+@@ -347,7 +321,6 @@ good_area:
+ 				goto bad_area;
+ 	}
+ 
+- survive:
+ 	/*
+ 	 * If for any reason at all we couldn't handle the fault,
+ 	 * make sure we exit gracefully rather than endlessly redo
+@@ -485,14 +458,14 @@ no_context:
+  */
+ out_of_memory:
+ 	up_read(&mm->mmap_sem);
+-	if (tsk->pid == 1) {
+-		yield();
+-		down_read(&mm->mmap_sem);
+-		goto survive;
++	if (error_code & 4) {
++		/* 
++		 * 0-order allocation always success if something really 
++		 * fatal not happen: beancounter overdraft or OOM.
++		 */
++		force_sig(SIGKILL, tsk);
++		return;
+ 	}
+-	printk("VM: killing process %s\n", tsk->comm);
+-	if (error_code & 4)
+-		do_exit(SIGKILL);
+ 	goto no_context;
+ 
+ do_sigbus:
+diff -upr linux-2.6.16.orig/arch/i386/mm/hugetlbpage.c linux-2.6.16-026test015/arch/i386/mm/hugetlbpage.c
+--- linux-2.6.16.orig/arch/i386/mm/hugetlbpage.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/mm/hugetlbpage.c	2006-07-04 14:41:39.000000000 +0400
+@@ -14,6 +14,7 @@
+ #include <linux/slab.h>
+ #include <linux/err.h>
+ #include <linux/sysctl.h>
++#include <linux/module.h>
+ #include <asm/mman.h>
+ #include <asm/tlb.h>
+ #include <asm/tlbflush.h>
+@@ -110,6 +111,7 @@ int pmd_huge(pmd_t pmd)
+ {
+ 	return !!(pmd_val(pmd) & _PAGE_PSE);
+ }
++EXPORT_SYMBOL(pmd_huge);
+ 
+ struct page *
+ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+diff -upr linux-2.6.16.orig/arch/i386/mm/init.c linux-2.6.16-026test015/arch/i386/mm/init.c
+--- linux-2.6.16.orig/arch/i386/mm/init.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/mm/init.c	2006-07-04 14:41:37.000000000 +0400
+@@ -677,7 +677,7 @@ void __init pgtable_cache_init(void)
+ 		pmd_cache = kmem_cache_create("pmd",
+ 					PTRS_PER_PMD*sizeof(pmd_t),
+ 					PTRS_PER_PMD*sizeof(pmd_t),
+-					0,
++					SLAB_UBC,
+ 					pmd_ctor,
+ 					NULL);
+ 		if (!pmd_cache)
+@@ -686,7 +686,7 @@ void __init pgtable_cache_init(void)
+ 	pgd_cache = kmem_cache_create("pgd",
+ 				PTRS_PER_PGD*sizeof(pgd_t),
+ 				PTRS_PER_PGD*sizeof(pgd_t),
+-				0,
++				SLAB_UBC,
+ 				pgd_ctor,
+ 				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+ 	if (!pgd_cache)
+diff -upr linux-2.6.16.orig/arch/i386/mm/pgtable.c linux-2.6.16-026test015/arch/i386/mm/pgtable.c
+--- linux-2.6.16.orig/arch/i386/mm/pgtable.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/i386/mm/pgtable.c	2006-07-04 14:41:38.000000000 +0400
+@@ -5,8 +5,10 @@
+ #include <linux/config.h>
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/errno.h>
+ #include <linux/mm.h>
++#include <linux/vmalloc.h>
+ #include <linux/swap.h>
+ #include <linux/smp.h>
+ #include <linux/highmem.h>
+@@ -64,7 +66,9 @@ void show_mem(void)
+ 	printk(KERN_INFO "%lu pages mapped\n", ps.nr_mapped);
+ 	printk(KERN_INFO "%lu pages slab\n", ps.nr_slab);
+ 	printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages);
++	vprintstat();
+ }
++EXPORT_SYMBOL(show_mem);
+ 
+ /*
+  * Associate a virtual page frame with a given physical page frame 
+@@ -159,9 +163,11 @@ struct page *pte_alloc_one(struct mm_str
+ 	struct page *pte;
+ 
+ #ifdef CONFIG_HIGHPTE
+-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
++	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_HIGHMEM|
++			__GFP_REPEAT|__GFP_ZERO, 0);
+ #else
+-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
++	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|
++			__GFP_REPEAT|__GFP_ZERO, 0);
+ #endif
+ 	return pte;
+ }
+diff -upr linux-2.6.16.orig/arch/ia64/Kconfig linux-2.6.16-026test015/arch/ia64/Kconfig
+--- linux-2.6.16.orig/arch/ia64/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/Kconfig	2006-07-04 14:41:39.000000000 +0400
+@@ -283,6 +283,8 @@ config PREEMPT
+           Say Y here if you are building a kernel for a desktop, embedded
+           or real-time system.  Say N if you are unsure.
+ 
++source "kernel/Kconfig.fairsched"
++
+ source "mm/Kconfig"
+ 
+ config ARCH_SELECT_MEMORY_MODEL
+@@ -464,6 +466,10 @@ endmenu
+ 
+ source "arch/ia64/Kconfig.debug"
+ 
++source "kernel/Kconfig.openvz"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
++
++source "kernel/ub/Kconfig"
+diff -upr linux-2.6.16.orig/arch/ia64/ia32/binfmt_elf32.c linux-2.6.16-026test015/arch/ia64/ia32/binfmt_elf32.c
+--- linux-2.6.16.orig/arch/ia64/ia32/binfmt_elf32.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/ia32/binfmt_elf32.c	2006-07-04 14:41:37.000000000 +0400
+@@ -136,6 +136,12 @@ ia64_elf32_init (struct pt_regs *regs)
+ 		up_write(&current->mm->mmap_sem);
+ 	}
+ 
++	if (ub_memory_charge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
++					IA32_LDT_ENTRY_SIZE),
++				VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE,
++				NULL, UB_SOFT))
++		goto skip;
++
+ 	/*
+ 	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
+ 	 * until a task modifies them via modify_ldt().
+@@ -157,7 +163,12 @@ ia64_elf32_init (struct pt_regs *regs)
+ 			}
+ 		}
+ 		up_write(&current->mm->mmap_sem);
+-	}
++	} else
++		ub_memory_uncharge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
++					IA32_LDT_ENTRY_SIZE),
++				VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE, NULL);
++
++skip:
+ 
+ 	ia64_psr(regs)->ac = 0;		/* turn off alignment checking */
+ 	regs->loadrs = 0;
+@@ -212,9 +223,15 @@ ia32_setup_arg_pages (struct linux_binpr
+ 		bprm->loader += stack_base;
+ 	bprm->exec += stack_base;
+ 
++	ret = -ENOMEM;
++	if (ub_memory_charge(mm, IA32_STACK_TOP -
++				(PAGE_MASK & (unsigned long)bprm->p),
++				VM_STACK_FLAGS, NULL, UB_SOFT))
++		goto err_charge;
++
+ 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ 	if (!mpnt)
+-		return -ENOMEM;
++		goto err_alloc;
+ 
+ 	memset(mpnt, 0, sizeof(*mpnt));
+ 
+@@ -231,11 +248,8 @@ ia32_setup_arg_pages (struct linux_binpr
+ 			mpnt->vm_flags = VM_STACK_FLAGS;
+ 		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
+ 					PAGE_COPY_EXEC: PAGE_COPY;
+-		if ((ret = insert_vm_struct(current->mm, mpnt))) {
+-			up_write(&current->mm->mmap_sem);
+-			kmem_cache_free(vm_area_cachep, mpnt);
+-			return ret;
+-		}
++		if ((ret = insert_vm_struct(current->mm, mpnt)))
++			goto err_insert;
+ 		current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
+ 	}
+ 
+@@ -254,6 +268,16 @@ ia32_setup_arg_pages (struct linux_binpr
+ 	current->thread.ppl = ia32_init_pp_list();
+ 
+ 	return 0;
++
++err_insert:
++	up_write(&current->mm->mmap_sem);
++	kmem_cache_free(vm_area_cachep, mpnt);
++err_alloc:
++	ub_memory_uncharge(mm, IA32_STACK_TOP -
++			(PAGE_MASK & (unsigned long)bprm->p),
++			VM_STACK_FLAGS, NULL);
++err_charge:
++	return ret;
+ }
+ 
+ static void
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/asm-offsets.c linux-2.6.16-026test015/arch/ia64/kernel/asm-offsets.c
+--- linux-2.6.16.orig/arch/ia64/kernel/asm-offsets.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/asm-offsets.c	2006-07-04 14:41:38.000000000 +0400
+@@ -44,11 +44,21 @@ void foo(void)
+ 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
+ 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+ 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
++#ifdef CONFIG_VE
++	DEFINE(IA64_TASK_PID_OFFSET, offsetof
++			(struct task_struct, pids[PIDTYPE_PID].vnr));
++#else
+ 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
++#endif
+ 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+ 	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+ 	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
++#ifdef CONFIG_VE
++	DEFINE(IA64_TASK_TGID_OFFSET, offsetof
++			(struct task_struct, pids[PIDTYPE_TGID].vnr));
++#else
+ 	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
++#endif
+ 	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
+ 	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
+ 
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/entry.S linux-2.6.16-026test015/arch/ia64/kernel/entry.S
+--- linux-2.6.16.orig/arch/ia64/kernel/entry.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/entry.S	2006-07-04 14:41:39.000000000 +0400
+@@ -1620,4 +1620,17 @@ sys_call_table:
+ 	data8 sys_ni_syscall			// 1295 reserved for ppoll
+ 	data8 sys_unshare
+ 
++.rept 1500-1297
++	data8 sys_ni_syscall
++.endr
++	data8 sys_fairsched_mknod		// 1500
++	data8 sys_fairsched_rmnod
++	data8 sys_fairsched_chwt
++	data8 sys_fairsched_mvpr
++	data8 sys_fairsched_rate
++	data8 sys_getluid			// 1505
++	data8 sys_setluid
++	data8 sys_setublimit
++	data8 sys_ubstat
++
+ 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/fsys.S linux-2.6.16-026test015/arch/ia64/kernel/fsys.S
+--- linux-2.6.16.orig/arch/ia64/kernel/fsys.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/fsys.S	2006-07-04 14:41:38.000000000 +0400
+@@ -72,6 +72,7 @@ ENTRY(fsys_getpid)
+ 	FSYS_RETURN
+ END(fsys_getpid)
+ 
++#ifndef CONFIG_VE
+ ENTRY(fsys_getppid)
+ 	.prologue
+ 	.altrp b6
+@@ -118,6 +119,7 @@ ENTRY(fsys_getppid)
+ #endif
+ 	FSYS_RETURN
+ END(fsys_getppid)
++#endif
+ 
+ ENTRY(fsys_set_tid_address)
+ 	.prologue
+@@ -665,7 +667,11 @@ fsyscall_table:
+ 	data8 0				// chown
+ 	data8 0				// lseek		// 1040
+ 	data8 fsys_getpid		// getpid
++#ifdef CONFIG_VE
++	data8 0
++#else
+ 	data8 fsys_getppid		// getppid
++#endif
+ 	data8 0				// mount
+ 	data8 0				// umount
+ 	data8 0				// setuid		// 1045
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/irq.c linux-2.6.16-026test015/arch/ia64/kernel/irq.c
+--- linux-2.6.16.orig/arch/ia64/kernel/irq.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/irq.c	2006-07-04 14:41:38.000000000 +0400
+@@ -163,7 +163,9 @@ void fixup_irqs(void)
+ {
+ 	unsigned int irq;
+ 	extern void ia64_process_pending_intr(void);
++	struct ve_struct *ve;
+ 
++	ve = set_exec_env(get_ve0());
+ 	ia64_set_itv(1<<16);
+ 	/*
+ 	 * Phase 1: Locate irq's bound to this cpu and
+@@ -197,5 +199,6 @@ void fixup_irqs(void)
+ 	 */
+ 	max_xtp();
+ 	local_irq_disable();
++	(void)set_exec_env(ve);
+ }
+ #endif
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/irq_ia64.c linux-2.6.16-026test015/arch/ia64/kernel/irq_ia64.c
+--- linux-2.6.16.orig/arch/ia64/kernel/irq_ia64.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/irq_ia64.c	2006-07-04 14:41:38.000000000 +0400
+@@ -103,6 +103,7 @@ void
+ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+ {
+ 	unsigned long saved_tpr;
++	struct ve_struct *ve;
+ 
+ #if IRQ_DEBUG
+ 	{
+@@ -139,6 +140,7 @@ ia64_handle_irq (ia64_vector vector, str
+ 	 * 16 (without this, it would be ~240, which could easily lead
+ 	 * to kernel stack overflows).
+ 	 */
++	ve = set_exec_env(get_ve0());
+ 	irq_enter();
+ 	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ 	ia64_srlz_d();
+@@ -164,6 +166,7 @@ ia64_handle_irq (ia64_vector vector, str
+ 	 * come through until ia64_eoi() has been done.
+ 	 */
+ 	irq_exit();
++	(void)set_exec_env(get_ve0());
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+@@ -176,9 +179,11 @@ void ia64_process_pending_intr(void)
+ 	ia64_vector vector;
+ 	unsigned long saved_tpr;
+ 	extern unsigned int vectors_in_migration[NR_IRQS];
++	struct ve_struct *ve;
+ 
+ 	vector = ia64_get_ivr();
+ 
++	ve = set_exec_env(get_ve0());
+ 	 irq_enter();
+ 	 saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ 	 ia64_srlz_d();
+@@ -210,6 +215,7 @@ void ia64_process_pending_intr(void)
+ 		vector = ia64_get_ivr();
+ 	}
+ 	irq_exit();
++	(void)set_exec_env(ve);
+ }
+ #endif
+ 
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/mca.c linux-2.6.16-026test015/arch/ia64/kernel/mca.c
+--- linux-2.6.16.orig/arch/ia64/kernel/mca.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/mca.c	2006-07-04 14:41:38.000000000 +0400
+@@ -1241,10 +1241,10 @@ default_monarch_init_process(struct noti
+ 	}
+ 	printk("\n\n");
+ 	if (read_trylock(&tasklist_lock)) {
+-		do_each_thread (g, t) {
++		do_each_thread_all (g, t) {
+ 			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
+ 			show_stack(t, NULL);
+-		} while_each_thread (g, t);
++		} while_each_thread_all (g, t);
+ 		read_unlock(&tasklist_lock);
+ 	}
+ 	return NOTIFY_DONE;
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/perfmon.c linux-2.6.16-026test015/arch/ia64/kernel/perfmon.c
+--- linux-2.6.16.orig/arch/ia64/kernel/perfmon.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/perfmon.c	2006-07-04 14:41:38.000000000 +0400
+@@ -2624,7 +2624,7 @@ pfm_get_task(pfm_context_t *ctx, pid_t p
+ 
+ 		read_lock(&tasklist_lock);
+ 
+-		p = find_task_by_pid(pid);
++		p = find_task_by_pid_ve(pid);
+ 
+ 		/* make sure task cannot go away while we operate on it */
+ 		if (p) get_task_struct(p);
+@@ -4188,12 +4188,12 @@ pfm_check_task_exist(pfm_context_t *ctx)
+ 
+ 	read_lock(&tasklist_lock);
+ 
+-	do_each_thread (g, t) {
++	do_each_thread_ve (g, t) {
+ 		if (t->thread.pfm_context == ctx) {
+ 			ret = 0;
+ 			break;
+ 		}
+-	} while_each_thread (g, t);
++	} while_each_thread_ve (g, t);
+ 
+ 	read_unlock(&tasklist_lock);
+ 
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/process.c linux-2.6.16-026test015/arch/ia64/kernel/process.c
+--- linux-2.6.16.orig/arch/ia64/kernel/process.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/process.c	2006-07-04 14:41:39.000000000 +0400
+@@ -109,7 +109,8 @@ show_regs (struct pt_regs *regs)
+ 	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+ 
+ 	print_modules();
+-	printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
++	printk("\nPid: %d, CPU %d, VCPU %d:%d, comm: %20s\n", current->pid, smp_processor_id(),
++			task_vsched_id(current), task_cpu(current), current->comm);
+ 	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
+ 	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
+ 	print_symbol("ip is at %s\n", ip);
+@@ -681,6 +682,13 @@ kernel_thread (int (*fn)(void *), void *
+ 		struct pt_regs pt;
+ 	} regs;
+ 
++	/* Don't allow kernel_thread() inside VE */
++	if (!ve_is_super(get_exec_env())) {
++		printk("kernel_thread call inside VE\n");
++		dump_stack();
++		return -EPERM;
++	}
++
+ 	memset(&regs, 0, sizeof(regs));
+ 	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
+ 	regs.pt.r1 = helper_fptr[1];		/* set GP */
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/ptrace.c linux-2.6.16-026test015/arch/ia64/kernel/ptrace.c
+--- linux-2.6.16.orig/arch/ia64/kernel/ptrace.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/ptrace.c	2006-07-04 14:41:38.000000000 +0400
+@@ -1433,7 +1433,7 @@ sys_ptrace (long request, pid_t pid, uns
+ 	ret = -ESRCH;
+ 	read_lock(&tasklist_lock);
+ 	{
+-		child = find_task_by_pid(pid);
++		child = find_task_by_pid_ve(pid);
+ 		if (child) {
+ 			if (peek_or_poke)
+ 				child = find_thread_for_addr(child, addr);
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/signal.c linux-2.6.16-026test015/arch/ia64/kernel/signal.c
+--- linux-2.6.16.orig/arch/ia64/kernel/signal.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/signal.c	2006-07-04 14:41:38.000000000 +0400
+@@ -270,7 +270,7 @@ ia64_rt_sigreturn (struct sigscratch *sc
+ 	si.si_signo = SIGSEGV;
+ 	si.si_errno = 0;
+ 	si.si_code = SI_KERNEL;
+-	si.si_pid = current->pid;
++	si.si_pid = virt_pid(current);
+ 	si.si_uid = current->uid;
+ 	si.si_addr = sc;
+ 	force_sig_info(SIGSEGV, &si, current);
+@@ -375,7 +375,7 @@ force_sigsegv_info (int sig, void __user
+ 	si.si_signo = SIGSEGV;
+ 	si.si_errno = 0;
+ 	si.si_code = SI_KERNEL;
+-	si.si_pid = current->pid;
++	si.si_pid = virt_pid(current);
+ 	si.si_uid = current->uid;
+ 	si.si_addr = addr;
+ 	force_sig_info(SIGSEGV, &si, current);
+@@ -641,7 +641,7 @@ set_sigdelayed(pid_t pid, int signo, int
+ 	for (i = 1; i <= 3; ++i) {
+ 		switch (i) {
+ 		case 1:
+-			t = find_task_by_pid(pid);
++			t = find_task_by_pid_ve(pid);
+ 			if (t)
+ 				start_time = start_time_ul(t);
+ 			break;
+@@ -682,7 +682,7 @@ do_sigdelayed(void)
+ 	siginfo.si_code = current_thread_info()->sigdelayed.code;
+ 	siginfo.si_addr = current_thread_info()->sigdelayed.addr;
+ 	pid = current_thread_info()->sigdelayed.pid;
+-	t = find_task_by_pid(pid);
++	t = find_task_by_pid_ve(pid);
+ 	if (!t)
+ 		return;
+ 	if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/traps.c linux-2.6.16-026test015/arch/ia64/kernel/traps.c
+--- linux-2.6.16.orig/arch/ia64/kernel/traps.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/traps.c	2006-07-04 14:41:37.000000000 +0400
+@@ -54,34 +54,6 @@ trap_init (void)
+ 		fpswa_interface = __va(ia64_boot_param->fpswa);
+ }
+ 
+-/*
+- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
+- * is acquired through the console unblank code)
+- */
+-void
+-bust_spinlocks (int yes)
+-{
+-	int loglevel_save = console_loglevel;
+-
+-	if (yes) {
+-		oops_in_progress = 1;
+-		return;
+-	}
+-
+-#ifdef CONFIG_VT
+-	unblank_screen();
+-#endif
+-	oops_in_progress = 0;
+-	/*
+-	 * OK, the message is on the console.  Now we call printk() without
+-	 * oops_in_progress set so that printk will give klogd a poke.  Hold onto
+-	 * your hats...
+-	 */
+-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
+-	printk(" ");
+-	console_loglevel = loglevel_save;
+-}
+-
+ void
+ die (const char *str, struct pt_regs *regs, long err)
+ {
+diff -upr linux-2.6.16.orig/arch/ia64/kernel/unaligned.c linux-2.6.16-026test015/arch/ia64/kernel/unaligned.c
+--- linux-2.6.16.orig/arch/ia64/kernel/unaligned.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/kernel/unaligned.c	2006-07-04 14:41:37.000000000 +0400
+@@ -1290,7 +1290,7 @@ within_logging_rate_limit (void)
+ {
+ 	static unsigned long count, last_time;
+ 
+-	if (jiffies - last_time > 5*HZ)
++	if (jiffies - last_time > 60 * HZ)
+ 		count = 0;
+ 	if (count < 5) {
+ 		last_time = jiffies;
+diff -upr linux-2.6.16.orig/arch/ia64/mm/contig.c linux-2.6.16-026test015/arch/ia64/mm/contig.c
+--- linux-2.6.16.orig/arch/ia64/mm/contig.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/mm/contig.c	2006-07-04 14:41:38.000000000 +0400
+@@ -64,6 +64,7 @@ show_mem (void)
+ 	printk("%ld pages in page table cache\n",
+ 		pgtable_quicklist_total_size());
+ }
++EXPORT_SYMBOL(show_mem);
+ 
+ /* physical address where the bootmem map is located */
+ unsigned long bootmap_start;
+diff -upr linux-2.6.16.orig/arch/ia64/mm/discontig.c linux-2.6.16-026test015/arch/ia64/mm/discontig.c
+--- linux-2.6.16.orig/arch/ia64/mm/discontig.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/mm/discontig.c	2006-07-04 14:41:38.000000000 +0400
+@@ -594,6 +594,7 @@ void show_mem(void)
+ 		pgtable_quicklist_total_size());
+ 	printk("%d free buffer pages\n", nr_free_buffer_pages());
+ }
++EXPORT_SYMBOL(show_mem);
+ 
+ /**
+  * call_pernode_memory - use SRAT to call callback functions with node info
+diff -upr linux-2.6.16.orig/arch/ia64/mm/fault.c linux-2.6.16-026test015/arch/ia64/mm/fault.c
+--- linux-2.6.16.orig/arch/ia64/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/mm/fault.c	2006-07-04 14:41:37.000000000 +0400
+@@ -116,7 +116,6 @@ ia64_do_page_fault (unsigned long addres
+ 	if ((vma->vm_flags & mask) != mask)
+ 		goto bad_area;
+ 
+-  survive:
+ 	/*
+ 	 * If for any reason at all we couldn't handle the fault, make
+ 	 * sure we exit gracefully rather than endlessly redo the
+@@ -241,13 +240,13 @@ ia64_do_page_fault (unsigned long addres
+ 
+   out_of_memory:
+ 	up_read(&mm->mmap_sem);
+-	if (current->pid == 1) {
+-		yield();
+-		down_read(&mm->mmap_sem);
+-		goto survive;
+-	}
+-	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
+-	if (user_mode(regs))
+-		do_exit(SIGKILL);
++	if (user_mode(regs)) {
++		/* 
++		 * 0-order allocation always success if something really 
++		 * fatal not happen: beancounter overdraft or OOM.
++		 */
++		force_sig(SIGKILL, current);
++		return;
++	}
+ 	goto no_context;
+ }
+diff -upr linux-2.6.16.orig/arch/ia64/mm/init.c linux-2.6.16-026test015/arch/ia64/mm/init.c
+--- linux-2.6.16.orig/arch/ia64/mm/init.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ia64/mm/init.c	2006-07-04 14:41:37.000000000 +0400
+@@ -37,6 +37,8 @@
+ #include <asm/unistd.h>
+ #include <asm/mca.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+ 
+ DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
+@@ -96,7 +98,7 @@ check_pgt_cache(void)
+ 	preempt_disable();
+ 	while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
+ 		while (pages_to_free--) {
+-			free_page((unsigned long)pgtable_quicklist_alloc());
++			free_page((unsigned long)pgtable_quicklist_alloc(0));
+ 		}
+ 		preempt_enable();
+ 		preempt_disable();
+@@ -146,6 +148,10 @@ ia64_init_addr_space (void)
+ 
+ 	ia64_set_rbs_bot();
+ 
++	if (ub_memory_charge(current->mm, PAGE_SIZE, VM_DATA_DEFAULT_FLAGS,
++				NULL, UB_SOFT))
++		goto skip;
++
+ 	/*
+ 	 * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
+ 	 * the problem.  When the process attempts to write to the register backing store
+@@ -166,8 +172,11 @@ ia64_init_addr_space (void)
+ 			return;
+ 		}
+ 		up_write(&current->mm->mmap_sem);
+-	}
++	} else
++		ub_memory_uncharge(current->mm, PAGE_SIZE,
++				VM_DATA_DEFAULT_FLAGS, NULL);
+ 
++skip:
+ 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
+ 	if (!(current->personality & MMAP_PAGE_ZERO)) {
+ 		vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+diff -upr linux-2.6.16.orig/arch/m32r/kernel/m32r_ksyms.c linux-2.6.16-026test015/arch/m32r/kernel/m32r_ksyms.c
+--- linux-2.6.16.orig/arch/m32r/kernel/m32r_ksyms.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/m32r/kernel/m32r_ksyms.c	2006-07-04 14:41:36.000000000 +0400
+@@ -38,10 +38,6 @@ EXPORT_SYMBOL(__udelay);
+ EXPORT_SYMBOL(__delay);
+ EXPORT_SYMBOL(__const_udelay);
+ 
+-EXPORT_SYMBOL(__get_user_1);
+-EXPORT_SYMBOL(__get_user_2);
+-EXPORT_SYMBOL(__get_user_4);
+-
+ EXPORT_SYMBOL(strpbrk);
+ EXPORT_SYMBOL(strstr);
+ 
+diff -upr linux-2.6.16.orig/arch/m32r/kernel/setup.c linux-2.6.16-026test015/arch/m32r/kernel/setup.c
+--- linux-2.6.16.orig/arch/m32r/kernel/setup.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/m32r/kernel/setup.c	2006-07-04 14:41:36.000000000 +0400
+@@ -9,6 +9,7 @@
+ 
+ #include <linux/config.h>
+ #include <linux/init.h>
++#include <linux/kernel.h>
+ #include <linux/stddef.h>
+ #include <linux/fs.h>
+ #include <linux/sched.h>
+@@ -218,8 +219,6 @@ static unsigned long __init setup_memory
+ extern unsigned long setup_memory(void);
+ #endif	/* CONFIG_DISCONTIGMEM */
+ 
+-#define M32R_PCC_PCATCR	0x00ef7014	/* will move to m32r.h */
+-
+ void __init setup_arch(char **cmdline_p)
+ {
+ 	ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+@@ -268,15 +267,14 @@ void __init setup_arch(char **cmdline_p)
+ 	paging_init();
+ }
+ 
+-static struct cpu cpu[NR_CPUS];
++static struct cpu cpu_devices[NR_CPUS];
+ 
+ static int __init topology_init(void)
+ {
+-	int cpu_id;
++	int i;
+ 
+-	for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++)
+-		if (cpu_possible(cpu_id))
+-			register_cpu(&cpu[cpu_id], cpu_id, NULL);
++	for_each_present_cpu(i)
++		register_cpu(&cpu_devices[i], i, NULL);
+ 
+ 	return 0;
+ }
+diff -upr linux-2.6.16.orig/arch/m32r/kernel/smpboot.c linux-2.6.16-026test015/arch/m32r/kernel/smpboot.c
+--- linux-2.6.16.orig/arch/m32r/kernel/smpboot.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/m32r/kernel/smpboot.c	2006-07-04 14:41:36.000000000 +0400
+@@ -39,8 +39,10 @@
+  *		Martin J. Bligh	: 	Added support for multi-quad systems
+  */
+ 
++#include <linux/module.h>
+ #include <linux/config.h>
+ #include <linux/init.h>
++#include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/smp_lock.h>
+ #include <linux/irq.h>
+@@ -72,11 +74,15 @@ physid_mask_t phys_cpu_present_map;
+ 
+ /* Bitmask of currently online CPUs */
+ cpumask_t cpu_online_map;
++EXPORT_SYMBOL(cpu_online_map);
+ 
+ cpumask_t cpu_bootout_map;
+ cpumask_t cpu_bootin_map;
+-cpumask_t cpu_callout_map;
+ static cpumask_t cpu_callin_map;
++cpumask_t cpu_callout_map;
++EXPORT_SYMBOL(cpu_callout_map);
++cpumask_t cpu_possible_map = CPU_MASK_ALL;
++EXPORT_SYMBOL(cpu_possible_map);
+ 
+ /* Per CPU bogomips and other parameters */
+ struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned;
+@@ -110,7 +116,6 @@ static unsigned int calibration_result;
+ 
+ void smp_prepare_boot_cpu(void);
+ void smp_prepare_cpus(unsigned int);
+-static void smp_tune_scheduling(void);
+ static void init_ipi_lock(void);
+ static void do_boot_cpu(int);
+ int __cpu_up(unsigned int);
+@@ -177,6 +182,9 @@ void __init smp_prepare_cpus(unsigned in
+ 	}
+ 	for (phys_id = 0 ; phys_id < nr_cpu ; phys_id++)
+ 		physid_set(phys_id, phys_cpu_present_map);
++#ifndef CONFIG_HOTPLUG_CPU
++	cpu_present_map = cpu_possible_map;
++#endif
+ 
+ 	show_mp_info(nr_cpu);
+ 
+@@ -186,7 +194,6 @@ void __init smp_prepare_cpus(unsigned in
+ 	 * Setup boot CPU information
+ 	 */
+ 	smp_store_cpu_info(0); /* Final full version of the data */
+-	smp_tune_scheduling();
+ 
+ 	/*
+ 	 * If SMP should be disabled, then really disable it!
+@@ -230,11 +237,6 @@ smp_done:
+ 	Dprintk("Boot done.\n");
+ }
+ 
+-static void __init smp_tune_scheduling(void)
+-{
+-	/* Nothing to do. */
+-}
+-
+ /*
+  * init_ipi_lock : Initialize IPI locks.
+  */
+@@ -629,4 +631,3 @@ static void __init unmap_cpu_to_physid(i
+ 	physid_2_cpu[phys_id] = -1;
+ 	cpu_2_physid[cpu_id] = -1;
+ }
+-
+diff -upr linux-2.6.16.orig/arch/m32r/lib/Makefile linux-2.6.16-026test015/arch/m32r/lib/Makefile
+--- linux-2.6.16.orig/arch/m32r/lib/Makefile	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/m32r/lib/Makefile	2006-07-04 14:41:36.000000000 +0400
+@@ -2,6 +2,6 @@
+ # Makefile for M32R-specific library files..
+ #
+ 
+-lib-y  := checksum.o ashxdi3.o memset.o memcpy.o getuser.o \
+-	  putuser.o delay.o strlen.o usercopy.o csum_partial_copy.o
++lib-y  := checksum.o ashxdi3.o memset.o memcpy.o \
++	  delay.o strlen.o usercopy.o csum_partial_copy.o
+ 
+diff -upr linux-2.6.16.orig/arch/mips/kernel/branch.c linux-2.6.16-026test015/arch/mips/kernel/branch.c
+--- linux-2.6.16.orig/arch/mips/kernel/branch.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/mips/kernel/branch.c	2006-07-04 14:41:36.000000000 +0400
+@@ -184,7 +184,7 @@ int __compute_return_epc(struct pt_regs 
+ 		bit = (insn.i_format.rt >> 2);
+ 		bit += (bit != 0);
+ 		bit += 23;
+-		switch (insn.i_format.rt) {
++		switch (insn.i_format.rt & 3) {
+ 		case 0:	/* bc1f */
+ 		case 2:	/* bc1fl */
+ 			if (~fcr31 & (1 << bit))
+diff -upr linux-2.6.16.orig/arch/mips/kernel/irixelf.c linux-2.6.16-026test015/arch/mips/kernel/irixelf.c
+--- linux-2.6.16.orig/arch/mips/kernel/irixelf.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/mips/kernel/irixelf.c	2006-07-04 14:41:37.000000000 +0400
+@@ -432,7 +432,7 @@ static inline int look_for_irix_interpre
+ 		if (retval < 0)
+ 			goto out;
+ 
+-		file = open_exec(*name);
++		file = open_exec(*name, bprm);
+ 		if (IS_ERR(file)) {
+ 			retval = PTR_ERR(file);
+ 			goto out;
+diff -upr linux-2.6.16.orig/arch/mips/kernel/sysirix.c linux-2.6.16-026test015/arch/mips/kernel/sysirix.c
+--- linux-2.6.16.orig/arch/mips/kernel/sysirix.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/mips/kernel/sysirix.c	2006-07-04 14:41:38.000000000 +0400
+@@ -110,7 +110,7 @@ asmlinkage int irix_prctl(unsigned optio
+ 		printk("irix_prctl[%s:%d]: Wants PR_ISBLOCKED\n",
+ 		       current->comm, current->pid);
+ 		read_lock(&tasklist_lock);
+-		task = find_task_by_pid(va_arg(args, pid_t));
++		task = find_task_by_pid_ve(va_arg(args, pid_t));
+ 		error = -ESRCH;
+ 		if (error)
+ 			error = (task->run_list.next != NULL);
+diff -upr linux-2.6.16.orig/arch/mips/mm/c-r4k.c linux-2.6.16-026test015/arch/mips/mm/c-r4k.c
+--- linux-2.6.16.orig/arch/mips/mm/c-r4k.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/mips/mm/c-r4k.c	2006-07-04 14:41:36.000000000 +0400
+@@ -154,7 +154,8 @@ static inline void blast_icache32_r4600_
+ 
+ static inline void tx49_blast_icache32_page_indexed(unsigned long page)
+ {
+-	unsigned long start = page;
++	unsigned long indexmask = current_cpu_data.icache.waysize - 1;
++	unsigned long start = INDEX_BASE + (page & indexmask);
+ 	unsigned long end = start + PAGE_SIZE;
+ 	unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit;
+ 	unsigned long ws_end = current_cpu_data.icache.ways <<
+diff -upr linux-2.6.16.orig/arch/powerpc/Kconfig linux-2.6.16-026test015/arch/powerpc/Kconfig
+--- linux-2.6.16.orig/arch/powerpc/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/Kconfig	2006-07-04 14:41:39.000000000 +0400
+@@ -517,6 +517,7 @@ config HIGHMEM
+ 	bool "High memory support"
+ 	depends on PPC32
+ 
++source "kernel/Kconfig.fairsched"
+ source kernel/Kconfig.hz
+ source kernel/Kconfig.preempt
+ source "fs/Kconfig.binfmt"
+@@ -956,6 +957,8 @@ source "arch/powerpc/platforms/iseries/K
+ 
+ source "lib/Kconfig"
+ 
++source "kernel/ub/Kconfig"
++
+ menu "Instrumentation Support"
+         depends on EXPERIMENTAL
+ 
+@@ -974,6 +977,8 @@ endmenu
+ 
+ source "arch/powerpc/Kconfig.debug"
+ 
++source "kernel/Kconfig.openvz"
++
+ source "security/Kconfig"
+ 
+ config KEYS_COMPAT
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/irq.c linux-2.6.16-026test015/arch/powerpc/kernel/irq.c
+--- linux-2.6.16.orig/arch/powerpc/kernel/irq.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/irq.c	2006-07-04 14:41:38.000000000 +0400
+@@ -50,6 +50,8 @@
+ #include <linux/profile.h>
+ #include <linux/bitops.h>
+ 
++#include <ub/beancounter.h>
++
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+ #include <asm/io.h>
+@@ -189,7 +191,11 @@ void do_IRQ(struct pt_regs *regs)
+ #ifdef CONFIG_IRQSTACKS
+ 	struct thread_info *curtp, *irqtp;
+ #endif
++	struct ve_struct *ve;
++	struct user_beancounter *ub;
+ 
++	ve = set_exec_env(get_ve0());
++	ub = set_exec_ub(get_ub0());
+         irq_enter();
+ 
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+@@ -236,6 +242,8 @@ void do_IRQ(struct pt_regs *regs)
+ 		ppc_spurious_interrupts++;
+ 
+         irq_exit();
++	(void)set_exec_ub(ub);
++	(void)set_exec_env(ve);
+ 
+ #ifdef CONFIG_PPC_ISERIES
+ 	if (get_lppaca()->int_dword.fields.decr_int) {
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/misc_32.S linux-2.6.16-026test015/arch/powerpc/kernel/misc_32.S
+--- linux-2.6.16.orig/arch/powerpc/kernel/misc_32.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/misc_32.S	2006-07-04 14:41:37.000000000 +0400
+@@ -973,7 +973,7 @@ _GLOBAL(_get_SP)
+  * Create a kernel thread
+  *   kernel_thread(fn, arg, flags)
+  */
+-_GLOBAL(kernel_thread)
++_GLOBAL(ppc_kernel_thread)
+ 	stwu	r1,-16(r1)
+ 	stw	r30,8(r1)
+ 	stw	r31,12(r1)
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/misc_64.S linux-2.6.16-026test015/arch/powerpc/kernel/misc_64.S
+--- linux-2.6.16.orig/arch/powerpc/kernel/misc_64.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/misc_64.S	2006-07-04 14:41:37.000000000 +0400
+@@ -677,7 +677,7 @@ _GLOBAL(scom970_write)
+  * Create a kernel thread
+  *   kernel_thread(fn, arg, flags)
+  */
+-_GLOBAL(kernel_thread)
++_GLOBAL(ppc_kernel_thread)
+ 	std	r29,-24(r1)
+ 	std	r30,-16(r1)
+ 	stdu	r1,-STACK_FRAME_OVERHEAD(r1)
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/pci_64.c linux-2.6.16-026test015/arch/powerpc/kernel/pci_64.c
+--- linux-2.6.16.orig/arch/powerpc/kernel/pci_64.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/pci_64.c	2006-07-04 14:41:36.000000000 +0400
+@@ -78,6 +78,7 @@ int global_phb_number;		/* Global phb co
+ 
+ /* Cached ISA bridge dev. */
+ struct pci_dev *ppc64_isabridge_dev = NULL;
++EXPORT_SYMBOL_GPL(ppc64_isabridge_dev);
+ 
+ static void fixup_broken_pcnet32(struct pci_dev* dev)
+ {
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/process.c linux-2.6.16-026test015/arch/powerpc/kernel/process.c
+--- linux-2.6.16.orig/arch/powerpc/kernel/process.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/process.c	2006-07-04 14:41:39.000000000 +0400
+@@ -429,7 +429,7 @@ void show_regs(struct pt_regs * regs)
+ 	       current, current->pid, current->comm, task_thread_info(current));
+ 
+ #ifdef CONFIG_SMP
+-	printk(" CPU: %d", smp_processor_id());
++	printk(" CPU: %d VCPU %d:%d", smp_processor_id(), task_vsched_id(current), task_cpu(current);
+ #endif /* CONFIG_SMP */
+ 
+ 	for (i = 0;  i < 32;  i++) {
+@@ -774,12 +774,12 @@ static int validate_sp(unsigned long sp,
+ 		return 1;
+ 
+ #ifdef CONFIG_IRQSTACKS
+-	stack_page = (unsigned long) hardirq_ctx[task_cpu(p)];
++	stack_page = (unsigned long) hardirq_ctx[task_pcpu(p)];
+ 	if (sp >= stack_page + sizeof(struct thread_struct)
+ 	    && sp <= stack_page + THREAD_SIZE - nbytes)
+ 		return 1;
+ 
+-	stack_page = (unsigned long) softirq_ctx[task_cpu(p)];
++	stack_page = (unsigned long) softirq_ctx[task_pcpu(p)];
+ 	if (sp >= stack_page + sizeof(struct thread_struct)
+ 	    && sp <= stack_page + THREAD_SIZE - nbytes)
+ 		return 1;
+@@ -889,6 +889,20 @@ void dump_stack(void)
+ }
+ EXPORT_SYMBOL(dump_stack);
+ 
++long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
++{
++	extern long ppc_kernel_thread(int (*fn)(void *), void *arg,
++			unsigned long flags);
++
++	if (!ve_is_super(get_exec_env())) {
++		printk("kernel_thread call inside VE\n");
++		dump_stack();
++		return -EPERM;
++	}
++
++	return ppc_kernel_thread(fn, arg, flags);
++}
++
+ #ifdef CONFIG_PPC64
+ void ppc64_runlatch_on(void)
+ {
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/setup_64.c linux-2.6.16-026test015/arch/powerpc/kernel/setup_64.c
+--- linux-2.6.16.orig/arch/powerpc/kernel/setup_64.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/setup_64.c	2006-07-04 14:41:36.000000000 +0400
+@@ -256,12 +256,10 @@ void __init early_setup(unsigned long dt
+ 	/*
+ 	 * Initialize stab / SLB management except on iSeries
+ 	 */
+-	if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
+-		if (cpu_has_feature(CPU_FTR_SLB))
+-			slb_initialize();
+-		else
+-			stab_initialize(lpaca->stab_real);
+-	}
++	if (cpu_has_feature(CPU_FTR_SLB))
++		slb_initialize();
++	else if (!firmware_has_feature(FW_FEATURE_ISERIES))
++		stab_initialize(lpaca->stab_real);
+ 
+ 	DBG(" <- early_setup()\n");
+ }
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/signal_32.c linux-2.6.16-026test015/arch/powerpc/kernel/signal_32.c
+--- linux-2.6.16.orig/arch/powerpc/kernel/signal_32.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/signal_32.c	2006-07-04 14:41:36.000000000 +0400
+@@ -802,10 +802,13 @@ static int do_setcontext(struct ucontext
+ 		if (__get_user(cmcp, &ucp->uc_regs))
+ 			return -EFAULT;
+ 		mcp = (struct mcontext __user *)(u64)cmcp;
++		/* no need to check access_ok(mcp), since mcp < 4GB */
+ 	}
+ #else
+ 	if (__get_user(mcp, &ucp->uc_regs))
+ 		return -EFAULT;
++	if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp)))
++		return -EFAULT;
+ #endif
+ 	restore_sigmask(&set);
+ 	if (restore_user_regs(regs, mcp, sig))
+@@ -907,13 +910,14 @@ int sys_debug_setcontext(struct ucontext
+ {
+ 	struct sig_dbg_op op;
+ 	int i;
++	unsigned char tmp;
+ 	unsigned long new_msr = regs->msr;
+ #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+ 	unsigned long new_dbcr0 = current->thread.dbcr0;
+ #endif
+ 
+ 	for (i=0; i<ndbg; i++) {
+-		if (__copy_from_user(&op, dbg, sizeof(op)))
++		if (copy_from_user(&op, dbg + i, sizeof(op)))
+ 			return -EFAULT;
+ 		switch (op.dbg_type) {
+ 		case SIG_DBG_SINGLE_STEPPING:
+@@ -958,6 +962,11 @@ int sys_debug_setcontext(struct ucontext
+ 	current->thread.dbcr0 = new_dbcr0;
+ #endif
+ 
++	if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
++	    || __get_user(tmp, (u8 __user *) ctx)
++	    || __get_user(tmp, (u8 __user *) (ctx + 1) - 1))
++		return -EFAULT;
++
+ 	/*
+ 	 * If we get a fault copying the context into the kernel's
+ 	 * image of the user's registers, we can't just return -EFAULT
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/signal_64.c linux-2.6.16-026test015/arch/powerpc/kernel/signal_64.c
+--- linux-2.6.16.orig/arch/powerpc/kernel/signal_64.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/signal_64.c	2006-07-04 14:41:36.000000000 +0400
+@@ -183,6 +183,8 @@ static long restore_sigcontext(struct pt
+ 	err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+ 	if (err)
+ 		return err;
++	if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
++		return -EFAULT;
+ 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
+ 	if (v_regs != 0 && (msr & MSR_VEC) != 0)
+ 		err |= __copy_from_user(current->thread.vr, v_regs,
+@@ -213,7 +215,7 @@ static inline void __user * get_sigframe
+         /* Default to using normal stack */
+         newsp = regs->gpr[1];
+ 
+-	if (ka->sa.sa_flags & SA_ONSTACK) {
++	if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) {
+ 		if (! on_sig_stack(regs->gpr[1]))
+ 			newsp = (current->sas_ss_sp + current->sas_ss_size);
+ 	}
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/syscalls.c linux-2.6.16-026test015/arch/powerpc/kernel/syscalls.c
+--- linux-2.6.16.orig/arch/powerpc/kernel/syscalls.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/syscalls.c	2006-07-04 14:41:38.000000000 +0400
+@@ -259,7 +259,7 @@ long ppc_newuname(struct new_utsname __u
+ 	int err = 0;
+ 
+ 	down_read(&uts_sem);
+-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
++	if (copy_to_user(name, &ve_utsname, sizeof(*name)))
+ 		err = -EFAULT;
+ 	up_read(&uts_sem);
+ 	if (!err)
+@@ -272,7 +272,7 @@ int sys_uname(struct old_utsname __user 
+ 	int err = 0;
+ 	
+ 	down_read(&uts_sem);
+-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
++	if (copy_to_user(name, &ve_utsname, sizeof(*name)))
+ 		err = -EFAULT;
+ 	up_read(&uts_sem);
+ 	if (!err)
+@@ -288,19 +288,19 @@ int sys_olduname(struct oldold_utsname _
+ 		return -EFAULT;
+   
+ 	down_read(&uts_sem);
+-	error = __copy_to_user(&name->sysname, &system_utsname.sysname,
++	error = __copy_to_user(&name->sysname, &ve_utsname.sysname,
+ 			       __OLD_UTS_LEN);
+ 	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
++	error |= __copy_to_user(&name->nodename, &ve_utsname.nodename,
+ 				__OLD_UTS_LEN);
+ 	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->release, &system_utsname.release,
++	error |= __copy_to_user(&name->release, &ve_utsname.release,
+ 				__OLD_UTS_LEN);
+ 	error |= __put_user(0, name->release + __OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->version, &system_utsname.version,
++	error |= __copy_to_user(&name->version, &ve_utsname.version,
+ 				__OLD_UTS_LEN);
+ 	error |= __put_user(0, name->version + __OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->machine, &system_utsname.machine,
++	error |= __copy_to_user(&name->machine, &ve_utsname.machine,
+ 				__OLD_UTS_LEN);
+ 	error |= override_machine(name->machine);
+ 	up_read(&uts_sem);
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/systbl.S linux-2.6.16-026test015/arch/powerpc/kernel/systbl.S
+--- linux-2.6.16.orig/arch/powerpc/kernel/systbl.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/systbl.S	2006-07-04 14:41:37.000000000 +0400
+@@ -322,3 +322,12 @@ SYSCALL(spu_create)
+ COMPAT_SYS(pselect6)
+ COMPAT_SYS(ppoll)
+ SYSCALL(unshare)
++
++.rept 410 - (. - sys_call_table)/8
++SYSX(sys_ni_syscall, sys_ni_syscall, sys_ni_syscall)
++.endr
++
++SYSX(sys_getluid, sys_ni_syscall, sys_getluid)
++SYSX(sys_setluid, sys_ni_syscall, sys_setluid)
++SYSX(sys_setublimit, sys_ni_syscall, sys_setublimit)
++SYSX(sys_ubstat, sys_ni_syscall, sys_ubstat)
+diff -upr linux-2.6.16.orig/arch/powerpc/kernel/time.c linux-2.6.16-026test015/arch/powerpc/kernel/time.c
+--- linux-2.6.16.orig/arch/powerpc/kernel/time.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/kernel/time.c	2006-07-04 14:41:38.000000000 +0400
+@@ -431,12 +431,14 @@ void timer_interrupt(struct pt_regs * re
+ 	int next_dec;
+ 	int cpu = smp_processor_id();
+ 	unsigned long ticks;
++	struct ve_struct *ve;
+ 
+ #ifdef CONFIG_PPC32
+ 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
+ 		do_IRQ(regs);
+ #endif
+ 
++	ve = set_exec_env(get_ve0());
+ 	irq_enter();
+ 
+ 	profile_tick(CPU_PROFILING, regs);
+@@ -496,6 +498,7 @@ void timer_interrupt(struct pt_regs * re
+ #endif
+ 
+ 	irq_exit();
++	(void)set_exec_env(ve);
+ }
+ 
+ void wakeup_decrementer(void)
+diff -upr linux-2.6.16.orig/arch/powerpc/mm/fault.c linux-2.6.16-026test015/arch/powerpc/mm/fault.c
+--- linux-2.6.16.orig/arch/powerpc/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/mm/fault.c	2006-07-04 14:41:37.000000000 +0400
+@@ -307,7 +307,6 @@ good_area:
+ 	 * make sure we exit gracefully rather than endlessly redo
+ 	 * the fault.
+ 	 */
+- survive:
+ 	switch (handle_mm_fault(mm, vma, address, is_write)) {
+ 
+ 	case VM_FAULT_MINOR:
+@@ -351,14 +350,12 @@ bad_area_nosemaphore:
+  */
+ out_of_memory:
+ 	up_read(&mm->mmap_sem);
+-	if (current->pid == 1) {
+-		yield();
+-		down_read(&mm->mmap_sem);
+-		goto survive;
+-	}
+-	printk("VM: killing process %s\n", current->comm);
+ 	if (user_mode(regs))
+-		do_exit(SIGKILL);
++		/*
++		 * 0-order allocation always success if something really
++		 * fatal not happen: beancounter overdraft or OOM. Den
++		 */
++		force_sig(SIGKILL, current);
+ 	return SIGKILL;
+ 
+ do_sigbus:
+diff -upr linux-2.6.16.orig/arch/powerpc/mm/init_64.c linux-2.6.16-026test015/arch/powerpc/mm/init_64.c
+--- linux-2.6.16.orig/arch/powerpc/mm/init_64.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/mm/init_64.c	2006-07-04 14:41:37.000000000 +0400
+@@ -225,7 +225,8 @@ void pgtable_cache_init(void)
+ 		pgtable_cache[i] = kmem_cache_create(name,
+ 						     size, size,
+ 						     SLAB_HWCACHE_ALIGN |
+-						     SLAB_MUST_HWCACHE_ALIGN,
++						     SLAB_MUST_HWCACHE_ALIGN |
++						     SLAB_UBC | SLAB_NO_CHARGE,
+ 						     zero_ctor,
+ 						     NULL);
+ 		if (! pgtable_cache[i])
+diff -upr linux-2.6.16.orig/arch/powerpc/mm/mem.c linux-2.6.16-026test015/arch/powerpc/mm/mem.c
+--- linux-2.6.16.orig/arch/powerpc/mm/mem.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/mm/mem.c	2006-07-04 14:41:38.000000000 +0400
+@@ -222,6 +222,7 @@ void show_mem(void)
+ 	printk("%ld pages shared\n", shared);
+ 	printk("%ld pages swap cached\n", cached);
+ }
++EXPORT_SYMBOL(show_mem);
+ 
+ /*
+  * Initialize the bootmem system and give it all the memory we
+diff -upr linux-2.6.16.orig/arch/powerpc/mm/pgtable_32.c linux-2.6.16-026test015/arch/powerpc/mm/pgtable_32.c
+--- linux-2.6.16.orig/arch/powerpc/mm/pgtable_32.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/mm/pgtable_32.c	2006-07-04 14:41:37.000000000 +0400
+@@ -85,7 +85,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ 	pgd_t *ret;
+ 
+-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
++	ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
++			__GFP_ZERO, PGDIR_ORDER);
+ 	return ret;
+ }
+ 
+@@ -119,6 +120,7 @@ struct page *pte_alloc_one(struct mm_str
+ #else
+ 	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
+ #endif
++	flags |= (__GFP_UBC | __GFP_SOFT_UBC);
+ 
+ 	ptepage = alloc_pages(flags, 0);
+ 	if (ptepage)
+diff -upr linux-2.6.16.orig/arch/powerpc/platforms/powermac/setup.c linux-2.6.16-026test015/arch/powerpc/platforms/powermac/setup.c
+--- linux-2.6.16.orig/arch/powerpc/platforms/powermac/setup.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/powerpc/platforms/powermac/setup.c	2006-07-04 14:41:36.000000000 +0400
+@@ -456,11 +456,23 @@ static int pmac_pm_finish(suspend_state_
+ 	return 0;
+ }
+ 
++static int pmac_pm_valid(suspend_state_t state)
++{
++	switch (state) {
++	case PM_SUSPEND_DISK:
++		return 1;
++	/* can't do any other states via generic mechanism yet */
++	default:
++		return 0;
++	}
++}
++
+ static struct pm_ops pmac_pm_ops = {
+ 	.pm_disk_mode	= PM_DISK_SHUTDOWN,
+ 	.prepare	= pmac_pm_prepare,
+ 	.enter		= pmac_pm_enter,
+ 	.finish		= pmac_pm_finish,
++	.valid		= pmac_pm_valid,
+ };
+ 
+ #endif /* CONFIG_SOFTWARE_SUSPEND */
+diff -upr linux-2.6.16.orig/arch/ppc/Kconfig linux-2.6.16-026test015/arch/ppc/Kconfig
+--- linux-2.6.16.orig/arch/ppc/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ppc/Kconfig	2006-07-04 14:41:39.000000000 +0400
+@@ -920,6 +920,7 @@ config NR_CPUS
+ config HIGHMEM
+ 	bool "High memory support"
+ 
++source "kernel/Kconfig.fairsched"
+ source kernel/Kconfig.hz
+ source kernel/Kconfig.preempt
+ source "mm/Kconfig"
+@@ -1394,6 +1395,10 @@ source "arch/powerpc/oprofile/Kconfig"
+ 
+ source "arch/ppc/Kconfig.debug"
+ 
++source "kernel/Kconfig.openvz"
++
+ source "security/Kconfig"
+ 
++source "kernel/ub/Kconfig"
++
+ source "crypto/Kconfig"
+diff -upr linux-2.6.16.orig/arch/ppc/kernel/misc.S linux-2.6.16-026test015/arch/ppc/kernel/misc.S
+--- linux-2.6.16.orig/arch/ppc/kernel/misc.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ppc/kernel/misc.S	2006-07-04 14:41:37.000000000 +0400
+@@ -1004,7 +1004,7 @@ _GLOBAL(_get_SP)
+  * Create a kernel thread
+  *   kernel_thread(fn, arg, flags)
+  */
+-_GLOBAL(kernel_thread)
++_GLOBAL(ppc_kernel_thread)
+ 	stwu	r1,-16(r1)
+ 	stw	r30,8(r1)
+ 	stw	r31,12(r1)
+diff -upr linux-2.6.16.orig/arch/ppc/kernel/time.c linux-2.6.16-026test015/arch/ppc/kernel/time.c
+--- linux-2.6.16.orig/arch/ppc/kernel/time.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ppc/kernel/time.c	2006-07-04 14:41:38.000000000 +0400
+@@ -58,6 +58,8 @@
+ #include <linux/init.h>
+ #include <linux/profile.h>
+ 
++#include <ub/beancounter.h>
++
+ #include <asm/io.h>
+ #include <asm/nvram.h>
+ #include <asm/cache.h>
+@@ -136,10 +138,14 @@ void timer_interrupt(struct pt_regs * re
+ 	unsigned long cpu = smp_processor_id();
+ 	unsigned jiffy_stamp = last_jiffy_stamp(cpu);
+ 	extern void do_IRQ(struct pt_regs *);
++	struct ve_struct *ve;
++	struct user_beancounter *ub;
+ 
+ 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
+ 		do_IRQ(regs);
+ 
++	ve = set_exec_env(get_ve0());
++	ub = set_exec_ub(get_ub0());
+ 	irq_enter();
+ 
+ 	while ((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) <= 0) {
+@@ -192,6 +198,8 @@ void timer_interrupt(struct pt_regs * re
+ 		ppc_md.heartbeat();
+ 
+ 	irq_exit();
++	(void)set_exec_ub(ub);
++	(void)set_exec_env(ve);
+ }
+ 
+ /*
+diff -upr linux-2.6.16.orig/arch/ppc/mm/fault.c linux-2.6.16-026test015/arch/ppc/mm/fault.c
+--- linux-2.6.16.orig/arch/ppc/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ppc/mm/fault.c	2006-07-04 14:41:37.000000000 +0400
+@@ -247,7 +247,6 @@ good_area:
+ 	 * make sure we exit gracefully rather than endlessly redo
+ 	 * the fault.
+ 	 */
+- survive:
+         switch (handle_mm_fault(mm, vma, address, is_write)) {
+         case VM_FAULT_MINOR:
+                 current->min_flt++;
+@@ -290,14 +289,12 @@ bad_area:
+  */
+ out_of_memory:
+ 	up_read(&mm->mmap_sem);
+-	if (current->pid == 1) {
+-		yield();
+-		down_read(&mm->mmap_sem);
+-		goto survive;
+-	}
+-	printk("VM: killing process %s\n", current->comm);
+ 	if (user_mode(regs))
+-		do_exit(SIGKILL);
++		/*
++		 * 0-order allocation always success if something really
++		 * fatal not happen: beancounter overdraft or OOM. Den
++		 */
++		force_sig(SIGKILL, current);
+ 	return SIGKILL;
+ 
+ do_sigbus:
+diff -upr linux-2.6.16.orig/arch/ppc/mm/init.c linux-2.6.16-026test015/arch/ppc/mm/init.c
+--- linux-2.6.16.orig/arch/ppc/mm/init.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ppc/mm/init.c	2006-07-04 14:41:38.000000000 +0400
+@@ -132,6 +132,7 @@ void show_mem(void)
+ 	printk("%d pages shared\n",shared);
+ 	printk("%d pages swap cached\n",cached);
+ }
++EXPORT_SYMBOL(show_mem);
+ 
+ /* Free up now-unused memory */
+ static void free_sec(unsigned long start, unsigned long end, const char *name)
+diff -upr linux-2.6.16.orig/arch/ppc/mm/pgtable.c linux-2.6.16-026test015/arch/ppc/mm/pgtable.c
+--- linux-2.6.16.orig/arch/ppc/mm/pgtable.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/ppc/mm/pgtable.c	2006-07-04 14:41:37.000000000 +0400
+@@ -84,7 +84,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ 	pgd_t *ret;
+ 
+-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
++	ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
++			__GFP_ZERO, PGDIR_ORDER);
+ 	return ret;
+ }
+ 
+@@ -118,6 +119,7 @@ struct page *pte_alloc_one(struct mm_str
+ #else
+ 	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
+ #endif
++	flags |= (__GFP_UBC | __GFP_SOFT_UBC);
+ 
+ 	ptepage = alloc_pages(flags, 0);
+ 	if (ptepage)
+diff -upr linux-2.6.16.orig/arch/s390/Kconfig linux-2.6.16-026test015/arch/s390/Kconfig
+--- linux-2.6.16.orig/arch/s390/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/s390/Kconfig	2006-07-04 14:41:37.000000000 +0400
+@@ -472,8 +472,12 @@ source "arch/s390/oprofile/Kconfig"
+ 
+ source "arch/s390/Kconfig.debug"
+ 
++source "kernel/Kconfig.openvz"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "kernel/ub/Kconfig"
+diff -upr linux-2.6.16.orig/arch/s390/kernel/process.c linux-2.6.16-026test015/arch/s390/kernel/process.c
+--- linux-2.6.16.orig/arch/s390/kernel/process.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/s390/kernel/process.c	2006-07-04 14:41:38.000000000 +0400
+@@ -164,9 +164,10 @@ void show_regs(struct pt_regs *regs)
+ 	struct task_struct *tsk = current;
+ 
+         printk("CPU:    %d    %s\n", task_thread_info(tsk)->cpu, print_tainted());
+-        printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
+-	       current->comm, current->pid, (void *) tsk,
+-	       (void *) tsk->thread.ksp);
++        printk("Process %s (pid: %d, veid: %d, task: %p, ksp: %p)\n",
++	       current->comm, current->pid,
++	       VEID(VE_TASK_INFO(current)->owner_env),
++	       (void *) tsk, (void *) tsk->thread.ksp);
+ 
+ 	show_registers(regs);
+ 	/* Show stack backtrace if pt_regs is from kernel mode */
+@@ -187,6 +188,13 @@ int kernel_thread(int (*fn)(void *), voi
+ {
+ 	struct pt_regs regs;
+ 
++	if (!ve_is_super(get_exec_env())) {
++		/* Don't allow kernel_thread() inside VE */
++		printk("kernel_thread call inside VE\n");
++		dump_stack();
++		return -EPERM;
++	}
++
+ 	memset(&regs, 0, sizeof(regs));
+ 	regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
+ 	regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
+diff -upr linux-2.6.16.orig/arch/s390/kernel/s390_ext.c linux-2.6.16-026test015/arch/s390/kernel/s390_ext.c
+--- linux-2.6.16.orig/arch/s390/kernel/s390_ext.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/s390/kernel/s390_ext.c	2006-07-04 14:41:38.000000000 +0400
+@@ -114,7 +114,9 @@ void do_extint(struct pt_regs *regs, uns
+ {
+         ext_int_info_t *p;
+         int index;
++	struct ve_struct *envid;
+ 
++	envid = set_exec_env(get_ve0());
+ 	irq_enter();
+ 	asm volatile ("mc 0,0");
+ 	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
+@@ -132,6 +134,7 @@ void do_extint(struct pt_regs *regs, uns
+ 		}
+ 	}
+ 	irq_exit();
++	(void)set_exec_env(envid);
+ }
+ 
+ EXPORT_SYMBOL(register_external_interrupt);
+diff -upr linux-2.6.16.orig/arch/s390/kernel/smp.c linux-2.6.16-026test015/arch/s390/kernel/smp.c
+--- linux-2.6.16.orig/arch/s390/kernel/smp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/s390/kernel/smp.c	2006-07-04 14:41:38.000000000 +0400
+@@ -526,6 +526,17 @@ int __devinit start_secondary(void *cpuv
+ {
+         /* Setup the cpu */
+         cpu_init();
++
++#ifdef CONFIG_VE
++	/* TSC reset. kill whatever might rely on old values */
++	VE_TASK_INFO(current)->wakeup_stamp = 0;
++	/*
++	 * Cosmetic: sleep_time won't be changed afterwards for the idle
++	 * thread;  keep it 0 rather than -cycles.
++	 */
++	VE_TASK_INFO(idle)->sleep_time = 0;
++#endif
++
+ 	preempt_disable();
+         /* init per CPU timer */
+         init_cpu_timer();
+@@ -834,6 +845,11 @@ void __init smp_prepare_cpus(unsigned in
+ 	for_each_cpu(cpu)
+ 		if (cpu != smp_processor_id())
+ 			smp_create_idle(cpu);
++
++#ifdef CONFIG_VE
++	/* TSC reset. kill whatever might rely on old values */
++	VE_TASK_INFO(current)->wakeup_stamp = 0;
++#endif
+ }
+ 
+ void __devinit smp_prepare_boot_cpu(void)
+diff -upr linux-2.6.16.orig/arch/s390/kernel/syscalls.S linux-2.6.16-026test015/arch/s390/kernel/syscalls.S
+--- linux-2.6.16.orig/arch/s390/kernel/syscalls.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/s390/kernel/syscalls.S	2006-07-04 14:41:37.000000000 +0400
+@@ -312,3 +312,12 @@ SYSCALL(sys_faccessat,sys_faccessat,sys_
+ SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper)
+ SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper)
+ SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper)
++
++.rept 410-(.-sys_call_table)/4
++	NI_SYSCALL
++.endr
++
++SYSCALL(sys_getluid, sys_getluid, sys_ni_syscall)	/* 410 */
++SYSCALL(sys_setluid, sys_setluid, sys_ni_syscall)
++SYSCALL(sys_setublimit, sys_setublimit, sys_ni_syscall)
++SYSCALL(sys_ubstat, sys_ubstat, sys_ni_syscall)
+diff -upr linux-2.6.16.orig/arch/s390/mm/fault.c linux-2.6.16-026test015/arch/s390/mm/fault.c
+--- linux-2.6.16.orig/arch/s390/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/s390/mm/fault.c	2006-07-04 14:41:37.000000000 +0400
+@@ -61,17 +61,9 @@ void bust_spinlocks(int yes)
+ 	if (yes) {
+ 		oops_in_progress = 1;
+ 	} else {
+-		int loglevel_save = console_loglevel;
+ 		console_unblank();
+ 		oops_in_progress = 0;
+-		/*
+-		 * OK, the message is on the console.  Now we call printk()
+-		 * without oops_in_progress set so that printk will give klogd
+-		 * a poke.  Hold onto your hats...
+-		 */
+-		console_loglevel = 15;
+-		printk(" ");
+-		console_loglevel = loglevel_save;
++		wake_up_klogd();
+ 	}
+ }
+ 
+diff -upr linux-2.6.16.orig/arch/s390/mm/init.c linux-2.6.16-026test015/arch/s390/mm/init.c
+--- linux-2.6.16.orig/arch/s390/mm/init.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/s390/mm/init.c	2006-07-04 14:41:38.000000000 +0400
+@@ -89,6 +89,7 @@ void show_mem(void)
+         printk("%d pages shared\n",shared);
+         printk("%d pages swap cached\n",cached);
+ }
++EXPORT_SYMBOL(show_mem);
+ 
+ /* References to section boundaries */
+ 
+diff -upr linux-2.6.16.orig/arch/sh/kernel/kgdb_stub.c linux-2.6.16-026test015/arch/sh/kernel/kgdb_stub.c
+--- linux-2.6.16.orig/arch/sh/kernel/kgdb_stub.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/sh/kernel/kgdb_stub.c	2006-07-04 14:41:38.000000000 +0400
+@@ -412,7 +412,7 @@ static struct task_struct *get_thread(in
+ 	if (pid == PID_MAX) pid = 0;
+ 
+ 	/* First check via PID */
+-	thread = find_task_by_pid(pid);
++	thread = find_task_by_pid_all(pid);
+ 
+ 	if (thread)
+ 		return thread;
+diff -upr linux-2.6.16.orig/arch/sh64/kernel/process.c linux-2.6.16-026test015/arch/sh64/kernel/process.c
+--- linux-2.6.16.orig/arch/sh64/kernel/process.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/sh64/kernel/process.c	2006-07-04 14:41:38.000000000 +0400
+@@ -906,7 +906,7 @@ asids_proc_info(char *buf, char **start,
+ 	int len=0;
+ 	struct task_struct *p;
+ 	read_lock(&tasklist_lock);
+-	for_each_process(p) {
++	for_each_process_ve(p) {
+ 		int pid = p->pid;
+ 		struct mm_struct *mm;
+ 		if (!pid) continue;
+diff -upr linux-2.6.16.orig/arch/sparc64/kernel/pci_iommu.c linux-2.6.16-026test015/arch/sparc64/kernel/pci_iommu.c
+--- linux-2.6.16.orig/arch/sparc64/kernel/pci_iommu.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/sparc64/kernel/pci_iommu.c	2006-07-04 14:41:36.000000000 +0400
+@@ -219,7 +219,7 @@ static inline void iommu_free_ctx(struct
+  * DMA for PCI device PDEV.  Return non-NULL cpu-side address if
+  * successful and set *DMA_ADDRP to the PCI side dma address.
+  */
+-void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
++void *__pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp)
+ {
+ 	struct pcidev_cookie *pcp;
+ 	struct pci_iommu *iommu;
+@@ -233,7 +233,7 @@ void *pci_alloc_consistent(struct pci_de
+ 	if (order >= 10)
+ 		return NULL;
+ 
+-	first_page = __get_free_pages(GFP_ATOMIC, order);
++	first_page = __get_free_pages(gfp, order);
+ 	if (first_page == 0UL)
+ 		return NULL;
+ 	memset((char *)first_page, 0, PAGE_SIZE << order);
+diff -upr linux-2.6.16.orig/arch/sparc64/kernel/setup.c linux-2.6.16-026test015/arch/sparc64/kernel/setup.c
+--- linux-2.6.16.orig/arch/sparc64/kernel/setup.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/sparc64/kernel/setup.c	2006-07-04 14:41:38.000000000 +0400
+@@ -156,7 +156,7 @@ int prom_callback(long *args)
+ 			pte_t *ptep;
+ 			pte_t pte;
+ 
+-			for_each_process(p) {
++			for_each_process_all(p) {
+ 				mm = p->mm;
+ 				if (CTX_NRBITS(mm->context) == ctx)
+ 					break;
+diff -upr linux-2.6.16.orig/arch/sparc64/kernel/sparc64_ksyms.c linux-2.6.16-026test015/arch/sparc64/kernel/sparc64_ksyms.c
+--- linux-2.6.16.orig/arch/sparc64/kernel/sparc64_ksyms.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/sparc64/kernel/sparc64_ksyms.c	2006-07-04 14:41:36.000000000 +0400
+@@ -221,7 +221,7 @@ EXPORT_SYMBOL(insl);
+ EXPORT_SYMBOL(ebus_chain);
+ EXPORT_SYMBOL(isa_chain);
+ EXPORT_SYMBOL(pci_memspace_mask);
+-EXPORT_SYMBOL(pci_alloc_consistent);
++EXPORT_SYMBOL(__pci_alloc_consistent);
+ EXPORT_SYMBOL(pci_free_consistent);
+ EXPORT_SYMBOL(pci_map_single);
+ EXPORT_SYMBOL(pci_unmap_single);
+diff -upr linux-2.6.16.orig/arch/sparc64/lib/checksum.S linux-2.6.16-026test015/arch/sparc64/lib/checksum.S
+--- linux-2.6.16.orig/arch/sparc64/lib/checksum.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/sparc64/lib/checksum.S	2006-07-04 14:41:36.000000000 +0400
+@@ -165,8 +165,9 @@ csum_partial_end_cruft:
+ 	sll		%g1, 8, %g1
+ 	or		%o5, %g1, %o4
+ 
+-1:	add		%o2, %o4, %o2
++1:	addcc		%o2, %o4, %o2
++	addc		%g0, %o2, %o2
+ 
+ csum_partial_finish:
+ 	retl
+-	 mov		%o2, %o0
++	 srl		%o2, 0, %o0
+diff -upr linux-2.6.16.orig/arch/sparc64/lib/csum_copy.S linux-2.6.16-026test015/arch/sparc64/lib/csum_copy.S
+--- linux-2.6.16.orig/arch/sparc64/lib/csum_copy.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/sparc64/lib/csum_copy.S	2006-07-04 14:41:36.000000000 +0400
+@@ -221,11 +221,12 @@ FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len
+ 	sll		%g1, 8, %g1
+ 	or		%o5, %g1, %o4
+ 
+-1:	add		%o3, %o4, %o3
++1:	addcc		%o3, %o4, %o3
++	addc		%g0, %o3, %o3
+ 
+ 70:
+ 	retl
+-	 mov		%o3, %o0
++	 srl		%o3, 0, %o0
+ 
+ 95:	mov		0, GLOBAL_SPARE
+ 	brlez,pn	%o2, 4f
+diff -upr linux-2.6.16.orig/arch/um/drivers/mconsole_kern.c linux-2.6.16-026test015/arch/um/drivers/mconsole_kern.c
+--- linux-2.6.16.orig/arch/um/drivers/mconsole_kern.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/um/drivers/mconsole_kern.c	2006-07-04 14:41:38.000000000 +0400
+@@ -600,7 +600,7 @@ static void do_stack_trace(struct mc_req
+ 
+ 	from = current;
+ 
+-	to = find_task_by_pid(pid_requested);
++	to = find_task_by_pid_all(pid_requested);
+ 	if((to == NULL) || (pid_requested == 0)) {
+ 		mconsole_reply(req, "Couldn't find that pid", 1, 0);
+ 		return;
+diff -upr linux-2.6.16.orig/arch/um/kernel/skas/process_kern.c linux-2.6.16-026test015/arch/um/kernel/skas/process_kern.c
+--- linux-2.6.16.orig/arch/um/kernel/skas/process_kern.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/um/kernel/skas/process_kern.c	2006-07-04 14:41:38.000000000 +0400
+@@ -197,7 +197,7 @@ void kill_off_processes_skas(void)
+ 		int pid, me;
+ 
+ 		me = os_getpid();
+-		for_each_process(p){
++		for_each_process_all(p){
+ 			if(p->mm == NULL)
+ 				continue;
+ 
+diff -upr linux-2.6.16.orig/arch/um/kernel/tt/process_kern.c linux-2.6.16-026test015/arch/um/kernel/tt/process_kern.c
+--- linux-2.6.16.orig/arch/um/kernel/tt/process_kern.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/um/kernel/tt/process_kern.c	2006-07-04 14:41:38.000000000 +0400
+@@ -301,7 +301,7 @@ void kill_off_processes_tt(void)
+ 	int me;
+ 
+ 	me = os_getpid();
+-        for_each_process(p){
++        for_each_process_all(p){
+ 		if(p->thread.mode.tt.extern_pid != me) 
+ 			os_kill_process(p->thread.mode.tt.extern_pid, 0);
+ 	}
+@@ -444,7 +444,7 @@ int is_valid_pid(int pid)
+ 	struct task_struct *task;
+ 
+         read_lock(&tasklist_lock);
+-        for_each_process(task){
++        for_each_process_all(task){
+                 if(task->thread.mode.tt.extern_pid == pid){
+ 			read_unlock(&tasklist_lock);
+ 			return(1);
+diff -upr linux-2.6.16.orig/arch/x86_64/Kconfig linux-2.6.16-026test015/arch/x86_64/Kconfig
+--- linux-2.6.16.orig/arch/x86_64/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/Kconfig	2006-07-04 14:41:39.000000000 +0400
+@@ -246,6 +246,8 @@ config SCHED_SMT
+ 	  cost of slightly increased overhead in some places. If unsure say
+ 	  N here.
+ 
++source "kernel/Kconfig.fairsched"
++
+ source "kernel/Kconfig.preempt"
+ 
+ config NUMA
+@@ -588,8 +590,12 @@ endmenu
+ 
+ source "arch/x86_64/Kconfig.debug"
+ 
++source "kernel/Kconfig.openvz"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "kernel/ub/Kconfig"
+diff -upr linux-2.6.16.orig/arch/x86_64/boot/compressed/head.S linux-2.6.16-026test015/arch/x86_64/boot/compressed/head.S
+--- linux-2.6.16.orig/arch/x86_64/boot/compressed/head.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/boot/compressed/head.S	2006-07-04 14:41:39.000000000 +0400
+@@ -34,7 +34,7 @@
+ startup_32:
+ 	cld
+ 	cli
+-	movl $(__KERNEL_DS),%eax
++	movl $(__BOOT_DS),%eax
+ 	movl %eax,%ds
+ 	movl %eax,%es
+ 	movl %eax,%fs
+@@ -76,7 +76,7 @@ startup_32:
+ 	jnz  3f
+ 	addl $8,%esp
+ 	xorl %ebx,%ebx
+-	ljmp $(__KERNEL_CS), $__PHYSICAL_START
++	ljmp $(__BOOT_CS), $__PHYSICAL_START
+ 
+ /*
+  * We come here, if we were loaded high.
+@@ -104,7 +104,7 @@ startup_32:
+ 	popl %eax	# hcount
+ 	movl $__PHYSICAL_START,%edi
+ 	cli		# make sure we don't get interrupted
+-	ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
++	ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
+ 
+ /*
+  * Routine (template) for moving the decompressed kernel in place,
+@@ -127,7 +127,7 @@ move_routine_start:
+ 	movsl
+ 	movl %ebx,%esi	# Restore setup pointer
+ 	xorl %ebx,%ebx
+-	ljmp $(__KERNEL_CS), $__PHYSICAL_START
++	ljmp $(__BOOT_CS), $__PHYSICAL_START
+ move_routine_end:
+ 
+ 
+@@ -137,5 +137,5 @@ user_stack:	 	
+ 	.fill 4096,4,0
+ stack_start:	
+ 	.long user_stack+4096
+-	.word __KERNEL_DS
++	.word __BOOT_DS
+ 
+diff -upr linux-2.6.16.orig/arch/x86_64/boot/setup.S linux-2.6.16-026test015/arch/x86_64/boot/setup.S
+--- linux-2.6.16.orig/arch/x86_64/boot/setup.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/boot/setup.S	2006-07-04 14:41:39.000000000 +0400
+@@ -729,7 +729,7 @@ flush_instr:
+ 	subw	$DELTA_INITSEG, %si
+ 	shll	$4, %esi			# Convert to 32-bit pointer
+ # NOTE: For high loaded big kernels we need a
+-#	jmpi    0x100000,__KERNEL_CS
++#	jmpi    0x100000,__BOOT_CS
+ #
+ #	but we yet haven't reloaded the CS register, so the default size 
+ #	of the target offset still is 16 bit.
+@@ -740,7 +740,7 @@ flush_instr:
+ 	.byte 0x66, 0xea			# prefix + jmpi-opcode
+ code32:	.long	0x1000				# will be set to 0x100000
+ 						# for big kernels
+-	.word	__KERNEL_CS
++	.word	__BOOT_CS
+ 
+ # Here's a bunch of information about your current kernel..
+ kernel_version:	.ascii	UTS_RELEASE
+diff -upr linux-2.6.16.orig/arch/x86_64/ia32/Makefile linux-2.6.16-026test015/arch/x86_64/ia32/Makefile
+--- linux-2.6.16.orig/arch/x86_64/ia32/Makefile	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/ia32/Makefile	2006-07-04 14:41:36.000000000 +0400
+@@ -27,5 +27,5 @@ $(obj)/vsyscall-sysenter.so $(obj)/vsysc
+ $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
+ 	$(call if_changed,syscall)
+ 
+-AFLAGS_vsyscall-sysenter.o = -m32
+-AFLAGS_vsyscall-syscall.o = -m32
++AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32
++AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32
+diff -upr linux-2.6.16.orig/arch/x86_64/ia32/ia32_aout.c linux-2.6.16-026test015/arch/x86_64/ia32/ia32_aout.c
+--- linux-2.6.16.orig/arch/x86_64/ia32/ia32_aout.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/ia32/ia32_aout.c	2006-07-04 14:41:38.000000000 +0400
+@@ -347,14 +347,14 @@ static int load_aout_binary(struct linux
+ 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
+ 		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
+ 		{
+-			printk(KERN_NOTICE "executable not page aligned\n");
++			ve_printk(VE_LOG, KERN_NOTICE "executable not page aligned\n");
+ 			error_time2 = jiffies;
+ 		}
+ 
+ 		if ((fd_offset & ~PAGE_MASK) != 0 &&
+ 		    (jiffies-error_time) > 5*HZ)
+ 		{
+-			printk(KERN_WARNING 
++			ve_printk(VE_LOG, KERN_WARNING 
+ 			       "fd_offset is not page aligned. Please convert program: %s\n",
+ 			       bprm->file->f_dentry->d_name.name);
+ 			error_time = jiffies;
+@@ -467,7 +467,7 @@ static int load_aout_library(struct file
+ 		static unsigned long error_time;
+ 		if ((jiffies-error_time) > 5*HZ)
+ 		{
+-			printk(KERN_WARNING 
++			ve_printk(VE_LOG, KERN_WARNING 
+ 			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
+ 			       file->f_dentry->d_name.name);
+ 			error_time = jiffies;
+diff -upr linux-2.6.16.orig/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.16-026test015/arch/x86_64/ia32/ia32_binfmt.c
+--- linux-2.6.16.orig/arch/x86_64/ia32/ia32_binfmt.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/ia32/ia32_binfmt.c	2006-07-04 14:41:39.000000000 +0400
+@@ -27,12 +27,14 @@
+ #include <asm/ia32.h>
+ #include <asm/vsyscall32.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ #define ELF_NAME "elf/i386"
+ 
+ #define AT_SYSINFO 32
+ #define AT_SYSINFO_EHDR		33
+ 
+-int sysctl_vsyscall32 = 1;
++int sysctl_vsyscall32 = 0;
+ 
+ #define ARCH_DLINFO do {  \
+ 	if (sysctl_vsyscall32) { \
+@@ -347,9 +349,15 @@ int ia32_setup_arg_pages(struct linux_bi
+ 		bprm->loader += stack_base;
+ 	bprm->exec += stack_base;
+ 
++	ret = -ENOMEM;
++	if (ub_memory_charge(mm, IA32_STACK_TOP -
++				(PAGE_MASK & (unsigned long)bprm->p),
++				VM_STACK_FLAGS, NULL, UB_SOFT))
++		goto err_charge;
++
+ 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ 	if (!mpnt) 
+-		return -ENOMEM; 
++		goto err_alloc;
+ 
+ 	memset(mpnt, 0, sizeof(*mpnt));
+ 
+@@ -366,11 +374,8 @@ int ia32_setup_arg_pages(struct linux_bi
+ 			mpnt->vm_flags = VM_STACK_FLAGS;
+  		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? 
+  			PAGE_COPY_EXEC : PAGE_COPY;
+-		if ((ret = insert_vm_struct(mm, mpnt))) {
+-			up_write(&mm->mmap_sem);
+-			kmem_cache_free(vm_area_cachep, mpnt);
+-			return ret;
+-		}
++		if ((ret = insert_vm_struct(mm, mpnt)))
++			goto err_insert;
+ 		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
+ 	} 
+ 
+@@ -385,6 +390,16 @@ int ia32_setup_arg_pages(struct linux_bi
+ 	up_write(&mm->mmap_sem);
+ 	
+ 	return 0;
++
++err_insert:
++	up_write(&mm->mmap_sem);
++	kmem_cache_free(vm_area_cachep, mpnt);
++err_alloc:
++	ub_memory_uncharge(mm, IA32_STACK_TOP - 
++				(PAGE_MASK & (unsigned long)bprm->p),
++				VM_STACK_FLAGS, NULL);
++err_charge:
++	return ret;
+ }
+ EXPORT_SYMBOL(ia32_setup_arg_pages);
+ 
+diff -upr linux-2.6.16.orig/arch/x86_64/ia32/ia32_signal.c linux-2.6.16-026test015/arch/x86_64/ia32/ia32_signal.c
+--- linux-2.6.16.orig/arch/x86_64/ia32/ia32_signal.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/ia32/ia32_signal.c	2006-07-04 14:41:39.000000000 +0400
+@@ -39,7 +39,6 @@
+ 
+ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+ 
+-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
+ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+ 
+ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+@@ -118,22 +117,17 @@ asmlinkage long
+ sys32_sigsuspend(int history0, int history1, old_sigset_t mask,
+ 		 struct pt_regs *regs)
+ {
+-	sigset_t saveset;
+-
+ 	mask &= _BLOCKABLE;
+ 	spin_lock_irq(&current->sighand->siglock);
+-	saveset = current->blocked;
++	current->saved_sigmask = current->blocked;
+ 	siginitset(&current->blocked, mask);
+ 	recalc_sigpending();
+ 	spin_unlock_irq(&current->sighand->siglock);
+ 
+-	regs->rax = -EINTR;
+-	while (1) {
+-		current->state = TASK_INTERRUPTIBLE;
+-		schedule();
+-		if (do_signal(regs, &saveset))
+-			return -EINTR;
+-	}
++	current->state = TASK_INTERRUPTIBLE;
++	schedule();
++	set_thread_flag(TIF_RESTORE_SIGMASK);
++	return -ERESTARTNOHAND;
+ }
+ 
+ asmlinkage long
+@@ -510,11 +504,11 @@ int ia32_setup_frame(int sig, struct k_s
+ 		current->comm, current->pid, frame, regs->rip, frame->pretcode);
+ #endif
+ 
+-	return 1;
++	return 0;
+ 
+ give_sigsegv:
+ 	force_sigsegv(sig, current);
+-	return 0;
++	return -EFAULT;
+ }
+ 
+ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+@@ -606,9 +600,9 @@ int ia32_setup_rt_frame(int sig, struct 
+ 		current->comm, current->pid, frame, regs->rip, frame->pretcode);
+ #endif
+ 
+-	return 1;
++	return 0;
+ 
+ give_sigsegv:
+ 	force_sigsegv(sig, current);
+-	return 0;
++	return -EFAULT;
+ }
+diff -upr linux-2.6.16.orig/arch/x86_64/ia32/sys_ia32.c linux-2.6.16-026test015/arch/x86_64/ia32/sys_ia32.c
+--- linux-2.6.16.orig/arch/x86_64/ia32/sys_ia32.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/ia32/sys_ia32.c	2006-07-04 14:41:38.000000000 +0400
+@@ -527,7 +527,7 @@ int sys32_ni_syscall(int call)
+ 	static char lastcomm[sizeof(me->comm)];
+ 
+ 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
+-		printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
++		ve_printk(VE_LOG, KERN_INFO "IA32 syscall %d from %s not implemented\n",
+ 		       call, me->comm);
+ 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
+ 	} 
+@@ -890,13 +890,13 @@ asmlinkage long sys32_olduname(struct ol
+   
+   	down_read(&uts_sem);
+ 	
+-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
++	error = __copy_to_user(&name->sysname,&ve_utsname.sysname,__OLD_UTS_LEN);
+ 	 __put_user(0,name->sysname+__OLD_UTS_LEN);
+-	 __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
++	 __copy_to_user(&name->nodename,&ve_utsname.nodename,__OLD_UTS_LEN);
+ 	 __put_user(0,name->nodename+__OLD_UTS_LEN);
+-	 __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
++	 __copy_to_user(&name->release,&ve_utsname.release,__OLD_UTS_LEN);
+ 	 __put_user(0,name->release+__OLD_UTS_LEN);
+-	 __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
++	 __copy_to_user(&name->version,&ve_utsname.version,__OLD_UTS_LEN);
+ 	 __put_user(0,name->version+__OLD_UTS_LEN);
+ 	 { 
+ 		 char *arch = "x86_64";
+@@ -919,7 +919,7 @@ long sys32_uname(struct old_utsname __us
+ 	if (!name)
+ 		return -EFAULT;
+ 	down_read(&uts_sem);
+-	err=copy_to_user(name, &system_utsname, sizeof (*name));
++	err=copy_to_user(name, &ve_utsname, sizeof (*name));
+ 	up_read(&uts_sem);
+ 	if (personality(current->personality) == PER_LINUX32) 
+ 		err |= copy_to_user(&name->machine, "i686", 5);
+@@ -1005,7 +1005,7 @@ long sys32_vm86_warning(void)
+ 	struct task_struct *me = current;
+ 	static char lastcomm[sizeof(me->comm)];
+ 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
+-		printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
++		ve_printk(VE_LOG, KERN_INFO "%s: vm87 mode not supported on 64 bit kernel\n",
+ 		       me->comm);
+ 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
+ 	} 
+diff -upr linux-2.6.16.orig/arch/x86_64/ia32/syscall32.c linux-2.6.16-026test015/arch/x86_64/ia32/syscall32.c
+--- linux-2.6.16.orig/arch/x86_64/ia32/syscall32.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/ia32/syscall32.c	2006-07-04 14:41:37.000000000 +0400
+@@ -14,6 +14,8 @@
+ #include <asm/tlbflush.h>
+ #include <asm/ia32_unistd.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
+ extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
+ extern int sysctl_vsyscall32;
+@@ -47,32 +49,45 @@ int syscall32_setup_pages(struct linux_b
+ 	int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
+ 	struct vm_area_struct *vma;
+ 	struct mm_struct *mm = current->mm;
++	unsigned long flags;
+ 	int ret;
+ 
++	flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC | VM_MAYWRITE |
++		mm->def_flags;
++
++	ret = -ENOMEM;
++	if (ub_memory_charge(mm, VSYSCALL32_END - VSYSCALL32_BASE,
++			flags, NULL, UB_SOFT))
++		goto err_charge;
++
+ 	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ 	if (!vma)
+-		return -ENOMEM;
++		goto err_alloc;
+ 
+ 	memset(vma, 0, sizeof(struct vm_area_struct));
+ 	/* Could randomize here */
+ 	vma->vm_start = VSYSCALL32_BASE;
+ 	vma->vm_end = VSYSCALL32_END;
+ 	/* MAYWRITE to allow gdb to COW and set breakpoints */
+-	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+-	vma->vm_flags |= mm->def_flags;
++	vma->vm_flags = flags;
+ 	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+ 	vma->vm_ops = &syscall32_vm_ops;
+ 	vma->vm_mm = mm;
+ 
+ 	down_write(&mm->mmap_sem);
+-	if ((ret = insert_vm_struct(mm, vma))) {
+-		up_write(&mm->mmap_sem);
+-		kmem_cache_free(vm_area_cachep, vma);
+-		return ret;
+-	}
++	if ((ret = insert_vm_struct(mm, vma)))
++		goto err_ins;
+ 	mm->total_vm += npages;
+ 	up_write(&mm->mmap_sem);
+ 	return 0;
++
++err_ins:
++	up_write(&mm->mmap_sem);
++	kmem_cache_free(vm_area_cachep, vma);
++err_alloc:
++	ub_memory_uncharge(mm, VSYSCALL32_END - VSYSCALL32_BASE, flags, NULL);
++err_charge:
++	return ret;
+ }
+ 
+ static int __init init_syscall32(void)
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/acpi/wakeup.S linux-2.6.16-026test015/arch/x86_64/kernel/acpi/wakeup.S
+--- linux-2.6.16.orig/arch/x86_64/kernel/acpi/wakeup.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/acpi/wakeup.S	2006-07-04 14:41:39.000000000 +0400
+@@ -77,7 +77,7 @@ wakeup_code:
+ 
+ 	.byte 0x66, 0xea			# prefix + jmpi-opcode
+ 	.long	wakeup_32 - __START_KERNEL_map
+-	.word	__KERNEL_CS
++	.word	__BOOT_CS
+ 
+ 	.code32
+ wakeup_32:
+@@ -96,13 +96,13 @@ wakeup_32:
+ 	jnc	bogus_cpu
+ 	movl	%edx,%edi
+ 	
+-	movw	$__KERNEL_DS, %ax
++	movw	$__BOOT_DS, %ax
+ 	movw	%ax, %ds
+ 	movw	%ax, %es
+ 	movw	%ax, %fs
+ 	movw	%ax, %gs
+ 
+-	movw	$__KERNEL_DS, %ax	
++	movw	$__BOOT_DS, %ax	
+ 	movw	%ax, %ss
+ 
+ 	mov	$(wakeup_stack - __START_KERNEL_map), %esp
+@@ -187,7 +187,7 @@ reach_compatibility_mode:
+ 
+ wakeup_jumpvector:
+ 	.long	wakeup_long64 - __START_KERNEL_map
+-	.word	__KERNEL_CS
++	.word	__BOOT_CS
+ 
+ .code64
+ 
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/apic.c linux-2.6.16-026test015/arch/x86_64/kernel/apic.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/apic.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/apic.c	2006-07-04 14:41:38.000000000 +0400
+@@ -941,6 +941,7 @@ void smp_local_timer_interrupt(struct pt
+  */
+ void smp_apic_timer_interrupt(struct pt_regs *regs)
+ {
++	struct ve_struct *ve;
+ 	/*
+ 	 * the NMI deadlock-detector uses this.
+ 	 */
+@@ -957,9 +958,11 @@ void smp_apic_timer_interrupt(struct pt_
+ 	 * interrupt lock, which is the WrongThing (tm) to do.
+ 	 */
+ 	exit_idle();
++	ve = set_exec_env(get_ve0());
+ 	irq_enter();
+ 	smp_local_timer_interrupt(regs);
+ 	irq_exit();
++	(void)set_exec_env(ve);
+ }
+ 
+ /*
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/entry.S linux-2.6.16-026test015/arch/x86_64/kernel/entry.S
+--- linux-2.6.16.orig/arch/x86_64/kernel/entry.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/entry.S	2006-07-04 14:41:39.000000000 +0400
+@@ -180,6 +180,10 @@ rff_trace:
+  *
+  * XXX	if we had a free scratch register we could save the RSP into the stack frame
+  *      and report it properly in ps. Unfortunately we haven't.
++ *
++ * When user can change the frames always force IRET. That is because
++ * it deals with uncanonical addresses better. SYSRET has trouble
++ * with them due to bugs in both AMD and Intel CPUs.
+  */ 			 		
+ 
+ ENTRY(system_call)
+@@ -244,7 +248,7 @@ sysret_careful:
+ 	/* Handle a signal */ 
+ sysret_signal:
+ 	sti
+-	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
++	testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+ 	jz    1f
+ 
+ 	/* Really a signal */
+@@ -254,7 +258,10 @@ sysret_signal:
+ 	xorl %esi,%esi # oldset -> arg2
+ 	call ptregscall_common
+ 1:	movl $_TIF_NEED_RESCHED,%edi
+-	jmp sysret_check
++	/* Use IRET because user could have changed frame. This
++	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
++	cli
++	jmp int_with_check
+ 	
+ badsys:
+ 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
+@@ -274,13 +281,9 @@ tracesys:			 
+ 	ja  1f
+ 	movq %r10,%rcx	/* fixup for C */
+ 	call *sys_call_table(,%rax,8)
+-	movq %rax,RAX-ARGOFFSET(%rsp)
+-1:	SAVE_REST
+-	movq %rsp,%rdi
+-	call syscall_trace_leave
+-	RESTORE_TOP_OF_STACK %rbx
+-	RESTORE_REST
+-	jmp ret_from_sys_call
++1:	movq %rax,RAX-ARGOFFSET(%rsp)
++	/* Use IRET because user could have changed frame */
++	jmp int_ret_from_sys_call
+ 	CFI_ENDPROC
+ 		
+ /* 
+@@ -350,7 +353,7 @@ int_very_careful:
+ 	jmp int_restore_rest
+ 	
+ int_signal:
+-	testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
++	testl $(_TIF_NOTIFY_RESUME|_TIF_RESTORE_SIGMASK|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
+ 	jz 1f
+ 	movq %rsp,%rdi		# &ptregs -> arg1
+ 	xorl %esi,%esi		# oldset -> arg2
+@@ -408,25 +411,9 @@ ENTRY(stub_execve)
+ 	CFI_ADJUST_CFA_OFFSET -8
+ 	CFI_REGISTER rip, r11
+ 	SAVE_REST
+-	movq %r11, %r15
+-	CFI_REGISTER rip, r15
+ 	FIXUP_TOP_OF_STACK %r11
+ 	call sys_execve
+-	GET_THREAD_INFO(%rcx)
+-	bt $TIF_IA32,threadinfo_flags(%rcx)
+-	CFI_REMEMBER_STATE
+-	jc exec_32bit
+ 	RESTORE_TOP_OF_STACK %r11
+-	movq %r15, %r11
+-	CFI_REGISTER rip, r11
+-	RESTORE_REST
+-	pushq %r11
+-	CFI_ADJUST_CFA_OFFSET 8
+-	CFI_REL_OFFSET rip, 0
+-	ret
+-
+-exec_32bit:
+-	CFI_RESTORE_STATE
+ 	movq %rax,RAX(%rsp)
+ 	RESTORE_REST
+ 	jmp int_ret_from_sys_call
+@@ -574,7 +561,7 @@ retint_careful:
+ 	jmp retint_check
+ 	
+ retint_signal:
+-	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
++	testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+ 	jz    retint_swapgs
+ 	sti
+ 	SAVE_REST
+@@ -845,7 +832,7 @@ ENTRY(kernel_thread)
+ 	xorl %r9d,%r9d
+ 	
+ 	# clone now
+-	call do_fork
++	call do_fork_kthread
+ 	movq %rax,RAX(%rsp)
+ 	xorl %edi,%edi
+ 
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/head.S linux-2.6.16-026test015/arch/x86_64/kernel/head.S
+--- linux-2.6.16.orig/arch/x86_64/kernel/head.S	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/head.S	2006-07-04 14:41:39.000000000 +0400
+@@ -40,7 +40,7 @@ startup_32:
+ 	 */
+ 
+ 	/* Initialize the %ds segment register */
+-	movl $__KERNEL_DS,%eax
++	movl $__BOOT_DS,%eax
+ 	movl %eax,%ds
+ 
+ 	/* Load new GDT with the 64bit segments using 32bit descriptor */
+@@ -183,7 +183,14 @@ startup_64:
+ 	/* esi is pointer to real mode structure with interesting info.
+ 	   pass it to C */
+ 	movl	%esi, %edi
+-	
++
++	/* Switch to __KERNEL_CS. The segment is the same, but selector
++	 * is different. */
++	pushq	$__KERNEL_CS
++	pushq	$switch_cs
++	lretq
++switch_cs:
++
+ 	/* Finally jump to run C code and to be on real kernel address
+ 	 * Since we are running on identity-mapped space we have to jump
+ 	 * to the full 64bit address , this is only possible as indirect
+@@ -243,7 +250,7 @@ pGDT32:
+ .org 0xf10	
+ ljumpvector:
+ 	.long	startup_64-__START_KERNEL_map
+-	.word	__KERNEL_CS
++	.word	__BOOT_CS
+ 
+ ENTRY(stext)
+ ENTRY(_stext)
+@@ -355,21 +362,30 @@ gdt:
+ .align PAGE_SIZE
+ 
+ /* The TLS descriptors are currently at a different place compared to i386.
+-   Hopefully nobody expects them at a fixed place (Wine?) */
++   Hopefully nobody expects them at a fixed place (Wine?)
++   Descriptors rearranged to plase 32bit and TLS selectors in the same
++   places, because it is really necessary. sysret/exit mandates order
++   of kernel/user cs/ds, so we have to extend gdt.
++*/
+ 	
+ ENTRY(cpu_gdt_table)
+-	.quad	0x0000000000000000	/* NULL descriptor */
+-	.quad	0x0			/* unused */
+-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
+-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
+-	.quad	0x00cffa000000ffff	/* __USER32_CS */
+-	.quad	0x00cff2000000ffff	/* __USER_DS, __USER32_DS  */		
+-	.quad	0x00affa000000ffff	/* __USER_CS */
+-	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
+-	.quad	0,0			/* TSS */
+-	.quad	0,0			/* LDT */
+-	.quad   0,0,0			/* three TLS descriptors */ 
+-	.quad	0			/* unused */
++	.quad	0x0000000000000000	/* 0 NULL descriptor */
++	.quad	0x0			/* 1 unused */	
++	.quad	0x00af9a000000ffff	/* 2 __BOOT_CS */
++	.quad	0x00cf92000000ffff	/* 3 __BOOT_DS */
++	.quad	0,0			/* 4,5 TSS */
++	.quad   0,0,0			/* 6-8 three TLS descriptors */ 
++	.quad	0,0			/* 9,10 LDT */
++	.quad	0x00cf9a000000ffff	/* 11 __KERNEL32_CS */
++	.quad	0x00af9a000000ffff	/* 12 __KERNEL_CS */
++	.quad	0x00cf92000000ffff	/* 13 __KERNEL_DS */
++	.quad	0x00cffa000000ffff	/* 14 __USER32_CS */
++	.quad	0x00cff2000000ffff	/* 15 __USER_DS, __USER32_DS  */		
++	.quad	0x00affa000000ffff	/* 16 __USER_CS */
++	.quad	0x0			/* 17 unused */
++	.quad	0,0,0,0,0,0
++	.quad	0,0,0,0,0,0,0,0
++	
+ gdt_end:	
+ 	/* asm/segment.h:GDT_ENTRIES must match this */	
+ 	/* This should be a multiple of the cache line size */
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/irq.c linux-2.6.16-026test015/arch/x86_64/kernel/irq.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/irq.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/irq.c	2006-07-04 14:41:38.000000000 +0400
+@@ -98,12 +98,15 @@ asmlinkage unsigned int do_IRQ(struct pt
+ {	
+ 	/* high bits used in ret_from_ code  */
+ 	unsigned irq = regs->orig_rax & 0xff;
++	struct ve_struct *ve;
+ 
+ 	exit_idle();
++	ve = set_exec_env(get_ve0());
+ 	irq_enter();
+ 
+ 	__do_IRQ(irq, regs);
+ 	irq_exit();
++	(void)set_exec_env(ve);
+ 
+ 	return 1;
+ }
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/ldt.c linux-2.6.16-026test015/arch/x86_64/kernel/ldt.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/ldt.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/ldt.c	2006-07-04 14:41:39.000000000 +0400
+@@ -16,6 +16,7 @@
+ #include <linux/smp_lock.h>
+ #include <linux/vmalloc.h>
+ #include <linux/slab.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -23,6 +24,8 @@
+ #include <asm/desc.h>
+ #include <asm/proto.h>
+ 
++#include <ub/ub_mem.h>
++
+ #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+ static void flush_ldt(void *null)
+ {
+@@ -42,9 +45,9 @@ static int alloc_ldt(mm_context_t *pc, u
+ 	oldsize = pc->size;
+ 	mincount = (mincount+511)&(~511);
+ 	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
++		newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
+ 	else
+-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
++		newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+ 
+ 	if (!newldt)
+ 		return -ENOMEM;
+@@ -109,6 +112,7 @@ int init_new_context(struct task_struct 
+ 	}
+ 	return retval;
+ }
++EXPORT_SYMBOL_GPL(init_new_context);
+ 
+ /*
+  * 
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/nmi.c linux-2.6.16-026test015/arch/x86_64/kernel/nmi.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/nmi.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/nmi.c	2006-07-04 14:41:37.000000000 +0400
+@@ -522,6 +522,7 @@ static __kprobes int dummy_nmi_callback(
+ }
+  
+ static nmi_callback_t nmi_callback = dummy_nmi_callback;
++static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
+  
+ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
+ {
+@@ -531,9 +532,21 @@ asmlinkage __kprobes void do_nmi(struct 
+ 	add_pda(__nmi_count,1);
+ 	if (!rcu_dereference(nmi_callback)(regs, cpu))
+ 		default_do_nmi(regs);
++
++	nmi_ipi_callback(regs, cpu);
+ 	nmi_exit();
+ }
+ 
++void set_nmi_ipi_callback(nmi_callback_t callback)
++{
++	nmi_ipi_callback = callback;
++}
++
++void unset_nmi_ipi_callback(void)
++{
++	nmi_ipi_callback = dummy_nmi_callback;
++}
++
+ void set_nmi_callback(nmi_callback_t callback)
+ {
+ 	rcu_assign_pointer(nmi_callback, callback);
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/pci-gart.c linux-2.6.16-026test015/arch/x86_64/kernel/pci-gart.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/pci-gart.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/pci-gart.c	2006-07-04 14:41:36.000000000 +0400
+@@ -114,10 +114,6 @@ static unsigned long alloc_iommu(int siz
+ static void free_iommu(unsigned long offset, int size)
+ { 
+ 	unsigned long flags;
+-	if (size == 1) { 
+-		clear_bit(offset, iommu_gart_bitmap); 
+-		return;
+-	}
+ 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+ 	__clear_bit_string(iommu_gart_bitmap, offset, size);
+ 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/process.c linux-2.6.16-026test015/arch/x86_64/kernel/process.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/process.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/process.c	2006-07-04 14:41:39.000000000 +0400
+@@ -54,6 +54,11 @@
+ #include <asm/idle.h>
+ 
+ asmlinkage extern void ret_from_fork(void);
++asmlinkage extern void int_ret_from_sys_call(void);
++asmlinkage extern void execve(void);
++EXPORT_SYMBOL_GPL(ret_from_fork);
++EXPORT_SYMBOL_GPL(int_ret_from_sys_call);
++EXPORT_SYMBOL_GPL(execve);
+ 
+ unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
+ 
+@@ -303,7 +308,8 @@ void __show_regs(struct pt_regs * regs)
+ 		(int)strcspn(system_utsname.version, " "),
+ 		system_utsname.version);
+ 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
+-	printk_address(regs->rip); 
++	if (decode_call_traces)
++		printk_address(regs->rip); 
+ 	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
+ 		regs->eflags);
+ 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+@@ -340,11 +346,26 @@ void __show_regs(struct pt_regs * regs)
+ 
+ void show_regs(struct pt_regs *regs)
+ {
+-	printk("CPU %d:", smp_processor_id());
++	printk("CPU %d, VCPU %d%d", smp_processor_id(), task_vsched_id(current), task_cpu(current));
+ 	__show_regs(regs);
+ 	show_trace(&regs->rsp);
+ }
+ 
++void smp_show_regs(struct pt_regs *regs, void *data)
++{
++	static DEFINE_SPINLOCK(show_regs_lock);
++
++	if (regs == NULL)
++		return;
++
++	bust_spinlocks(1);
++	spin_lock(&show_regs_lock);
++	printk("----------- IPI show regs -----------\n");
++	show_regs(regs);
++	spin_unlock(&show_regs_lock);
++	bust_spinlocks(0);
++}
++
+ /*
+  * Free current thread data structures etc..
+  */
+@@ -527,8 +548,6 @@ __switch_to(struct task_struct *prev_p, 
+ 	int cpu = smp_processor_id();  
+ 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ 
+-	unlazy_fpu(prev_p);
+-
+ 	/*
+ 	 * Reload esp0, LDT and the page table pointer:
+ 	 */
+@@ -591,6 +610,12 @@ __switch_to(struct task_struct *prev_p, 
+ 	prev->userrsp = read_pda(oldrsp); 
+ 	write_pda(oldrsp, next->userrsp); 
+ 	write_pda(pcurrent, next_p); 
++
++ 	/* This must be here to ensure both math_state_restore() and
++	   kernel_fpu_begin() work consistently.
++	   And the AMD workaround requires it to be after DS reload. */
++	unlazy_fpu(prev_p);
++
+ 	write_pda(kernelstack,
+ 		  task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
+ 
+@@ -841,3 +866,20 @@ unsigned long arch_align_stack(unsigned 
+ 		sp -= get_random_int() % 8192;
+ 	return sp & ~0xf;
+ }
++
++long do_fork_kthread(unsigned long clone_flags,
++	      unsigned long stack_start,
++	      struct pt_regs *regs,
++	      unsigned long stack_size,
++	      int __user *parent_tidptr,
++	      int __user *child_tidptr)
++{
++	if (ve_is_super(get_exec_env()))
++		return do_fork(clone_flags, stack_start, regs, stack_size,
++				parent_tidptr, child_tidptr);
++
++	/* Don't allow kernel_thread() inside VE */
++	printk("kernel_thread call inside VE\n");
++	dump_stack();
++	return -EPERM;
++}
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/ptrace.c linux-2.6.16-026test015/arch/x86_64/kernel/ptrace.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/ptrace.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/ptrace.c	2006-07-04 14:41:39.000000000 +0400
+@@ -300,6 +300,15 @@ static unsigned long getreg(struct task_
+ 			return child->thread.fs;
+ 		case offsetof(struct user_regs_struct, gs_base):
+ 			return child->thread.gs;
++		case offsetof(struct user_regs_struct, cs):
++			if (test_tsk_thread_flag(child, TIF_SYSCALL_TRACE)) {
++				val = get_stack_long(child, regno - sizeof(struct pt_regs));
++				if (val == __USER_CS)
++					return 0x33;
++				if (val == __USER32_CS)
++					return 0x23;
++			}
++			/* fall through */
+ 		default:
+ 			regno = regno - sizeof(struct pt_regs);
+ 			val = get_stack_long(child, regno);
+@@ -581,8 +590,10 @@ static void syscall_trace(struct pt_regs
+ 	       current_thread_info()->flags, current->ptrace); 
+ #endif
+ 
++	set_pn_state(current, (regs->rax != -ENOSYS) ? PN_STOP_LEAVE : PN_STOP_ENTRY);
+ 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ 				? 0x80 : 0));
++	clear_pn_state(current);
+ 	/*
+ 	 * this isn't the same as continuing with a signal, but it will do
+ 	 * for normal use.  strace only continues with a signal if the
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/setup.c linux-2.6.16-026test015/arch/x86_64/kernel/setup.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/setup.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/setup.c	2006-07-04 14:41:36.000000000 +0400
+@@ -909,6 +909,10 @@ static int __init init_amd(struct cpuinf
+ 	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
+ 		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ 
++	/* Enable workaround for FXSAVE leak */
++	if (c->x86 >= 6)
++		set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
++
+ 	r = get_model_name(c);
+ 	if (!r) { 
+ 		switch (c->x86) { 
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/setup64.c linux-2.6.16-026test015/arch/x86_64/kernel/setup64.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/setup64.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/setup64.c	2006-07-04 14:41:39.000000000 +0400
+@@ -290,3 +290,5 @@ void __cpuinit cpu_init (void)
+ 
+ 	fpu_init(); 
+ }
++
++EXPORT_SYMBOL_GPL(cpu_gdt_descr);
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/signal.c linux-2.6.16-026test015/arch/x86_64/kernel/signal.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/signal.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/signal.c	2006-07-04 14:41:39.000000000 +0400
+@@ -40,37 +40,6 @@ int ia32_setup_frame(int sig, struct k_s
+             sigset_t *set, struct pt_regs * regs); 
+ 
+ asmlinkage long
+-sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs)
+-{
+-	sigset_t saveset, newset;
+-
+-	/* XXX: Don't preclude handling different sized sigset_t's.  */
+-	if (sigsetsize != sizeof(sigset_t))
+-		return -EINVAL;
+-
+-	if (copy_from_user(&newset, unewset, sizeof(newset)))
+-		return -EFAULT;
+-	sigdelsetmask(&newset, ~_BLOCKABLE);
+-
+-	spin_lock_irq(&current->sighand->siglock);
+-	saveset = current->blocked;
+-	current->blocked = newset;
+-	recalc_sigpending();
+-	spin_unlock_irq(&current->sighand->siglock);
+-#ifdef DEBUG_SIG
+-	printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
+-		saveset, newset, regs, regs->rip);
+-#endif 
+-	regs->rax = -EINTR;
+-	while (1) {
+-		current->state = TASK_INTERRUPTIBLE;
+-		schedule();
+-		if (do_signal(regs, &saveset))
+-			return -EINTR;
+-	}
+-}
+-
+-asmlinkage long
+ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+ 		struct pt_regs *regs)
+ {
+@@ -344,11 +313,11 @@ static int setup_rt_frame(int sig, struc
+ 		current->comm, current->pid, frame, regs->rip, frame->pretcode);
+ #endif
+ 
+-	return 1;
++	return 0;
+ 
+ give_sigsegv:
+ 	force_sigsegv(sig, current);
+-	return 0;
++	return -EFAULT;
+ }
+ 
+ /*
+@@ -411,7 +380,7 @@ handle_signal(unsigned long sig, siginfo
+ #endif
+ 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+ 
+-	if (ret) {
++	if (ret == 0) {
+ 		spin_lock_irq(&current->sighand->siglock);
+ 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+ 		if (!(ka->sa.sa_flags & SA_NODEFER))
+@@ -428,9 +397,10 @@ handle_signal(unsigned long sig, siginfo
+  * want to handle. Thus you cannot kill init even with a SIGKILL even by
+  * mistake.
+  */
+-int do_signal(struct pt_regs *regs, sigset_t *oldset)
++static void do_signal(struct pt_regs *regs)
+ {
+ 	struct k_sigaction ka;
++	sigset_t *oldset;
+ 	siginfo_t info;
+ 	int signr;
+ 
+@@ -441,12 +411,14 @@ int do_signal(struct pt_regs *regs, sigs
+ 	 * if so.
+ 	 */
+ 	if (!user_mode(regs))
+-		return 1;
++		return;
+ 
+-	if (try_to_freeze())
++	if (try_to_freeze() && !signal_pending(current))
+ 		goto no_signal;
+ 
+-	if (!oldset)
++	if (test_thread_flag(TIF_RESTORE_SIGMASK))
++		oldset = &current->saved_sigmask;
++	else
+ 		oldset = &current->blocked;
+ 
+ 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+@@ -460,7 +432,15 @@ int do_signal(struct pt_regs *regs, sigs
+ 			set_debugreg(current->thread.debugreg7, 7);
+ 
+ 		/* Whee!  Actually deliver the signal.  */
+-		return handle_signal(signr, &info, &ka, oldset, regs);
++		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
++			/* a signal was successfully delivered; the saved
++			 * sigmask will have been stored in the signal frame,
++			 * and will be restored by sigreturn, so we can simply
++			 * clear the TIF_RESTORE_SIGMASK flag */
++			if (test_thread_flag(TIF_RESTORE_SIGMASK))
++				clear_thread_flag(TIF_RESTORE_SIGMASK);
++		}
++		return;
+ 	}
+ 
+  no_signal:
+@@ -481,10 +461,16 @@ int do_signal(struct pt_regs *regs, sigs
+ 			regs->rip -= 2;
+ 		}
+ 	}
+-	return 0;
++
++	/* if there's no signal to deliver, we just put the saved sigmask
++	 * back */
++	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
++		clear_thread_flag(TIF_RESTORE_SIGMASK);
++		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
++	}
+ }
+ 
+-void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags)
++void do_notify_resume(struct pt_regs *regs, sigset_t *unused, __u32 thread_info_flags)
+ {
+ #ifdef DEBUG_SIG
+ 	printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n",
+@@ -498,8 +484,8 @@ void do_notify_resume(struct pt_regs *re
+ 	}
+ 
+ 	/* deal with pending signal delivery */
+-	if (thread_info_flags & _TIF_SIGPENDING)
+-		do_signal(regs,oldset);
++	if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
++		do_signal(regs);
+ }
+ 
+ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/smp.c linux-2.6.16-026test015/arch/x86_64/kernel/smp.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/smp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/smp.c	2006-07-04 14:41:37.000000000 +0400
+@@ -28,6 +28,7 @@
+ #include <asm/proto.h>
+ #include <asm/apicdef.h>
+ #include <asm/idle.h>
++#include <asm/nmi.h>
+ 
+ /*
+  *	Smarter SMP flushing macros. 
+@@ -444,6 +445,84 @@ int smp_call_function (void (*func) (voi
+ 	return 0;
+ }
+ 
++static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
++static struct nmi_call_data_struct {
++	smp_nmi_function func;
++	void *info;
++	atomic_t started;
++	atomic_t finished;
++	cpumask_t cpus_called;
++	int wait;
++} *nmi_call_data;
++
++static int smp_nmi_callback(struct pt_regs * regs, int cpu)
++{
++	smp_nmi_function func;
++	void *info;
++	int wait;
++
++	func = nmi_call_data->func;
++	info = nmi_call_data->info;
++	wait = nmi_call_data->wait;
++	ack_APIC_irq();
++	/* prevent from calling func() multiple times */
++	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
++		return 0;
++	/*
++	 * notify initiating CPU that I've grabbed the data and am
++	 * about to execute the function
++	 */
++	mb();
++	atomic_inc(&nmi_call_data->started);
++	/* at this point the nmi_call_data structure is out of scope */
++	irq_enter();
++	func(regs, info);
++	irq_exit();
++	if (wait)
++		atomic_inc(&nmi_call_data->finished);
++
++	return 0;
++}
++
++int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
++{
++	struct nmi_call_data_struct data;
++	int cpus;
++
++	cpus = num_online_cpus() - 1;
++	if (!cpus)
++		return 0;
++
++	data.func = func;
++	data.info = info;
++	data.wait = wait;
++	atomic_set(&data.started, 0);
++	atomic_set(&data.finished, 0);
++	cpus_clear(data.cpus_called);
++	/* prevent this cpu from calling func if NMI happens */
++	cpu_set(smp_processor_id(), data.cpus_called);
++
++	if (!spin_trylock(&nmi_call_lock))
++		return -1;
++
++	nmi_call_data = &data;
++	set_nmi_ipi_callback(smp_nmi_callback);
++	mb();
++
++	/* Send a message to all other CPUs and wait for them to respond */
++	send_IPI_allbutself(APIC_DM_NMI);
++	while (atomic_read(&data.started) != cpus)
++		barrier();
++
++	unset_nmi_ipi_callback();
++	if (wait)
++		while (atomic_read(&data.finished) != cpus)
++			barrier();
++	spin_unlock(&nmi_call_lock);
++
++	return 0;
++}
++
+ void smp_stop_cpu(void)
+ {
+ 	unsigned long flags;
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/sys_x86_64.c linux-2.6.16-026test015/arch/x86_64/kernel/sys_x86_64.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/sys_x86_64.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/sys_x86_64.c	2006-07-04 14:41:38.000000000 +0400
+@@ -148,7 +148,7 @@ asmlinkage long sys_uname(struct new_uts
+ {
+ 	int err;
+ 	down_read(&uts_sem);
+-	err = copy_to_user(name, &system_utsname, sizeof (*name));
++	err = copy_to_user(name, &ve_utsname, sizeof (*name));
+ 	up_read(&uts_sem);
+ 	if (personality(current->personality) == PER_LINUX32) 
+ 		err |= copy_to_user(&name->machine, "i686", 5); 		
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/time.c linux-2.6.16-026test015/arch/x86_64/kernel/time.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/time.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/time.c	2006-07-04 14:41:38.000000000 +0400
+@@ -66,6 +66,8 @@ unsigned long vxtime_hz = PIT_TICK_RATE;
+ int report_lost_ticks;				/* command line option */
+ unsigned long long monotonic_base;
+ 
++EXPORT_SYMBOL(cpu_khz);
++
+ struct vxtime_data __vxtime __section_vxtime;	/* for vsyscalls */
+ 
+ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+diff -upr linux-2.6.16.orig/arch/x86_64/kernel/traps.c linux-2.6.16-026test015/arch/x86_64/kernel/traps.c
+--- linux-2.6.16.orig/arch/x86_64/kernel/traps.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/kernel/traps.c	2006-07-04 14:41:38.000000000 +0400
+@@ -30,6 +30,7 @@
+ #include <linux/moduleparam.h>
+ #include <linux/nmi.h>
+ #include <linux/kprobes.h>
++#include <linux/kexec.h>
+ 
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
+@@ -116,6 +117,9 @@ int printk_address(unsigned long address
+ 	char *delim = ":"; 
+ 	char namebuf[128];
+ 
++	if (!decode_call_traces)
++		return printk("[<%016lx>]", address);
++
+ 	symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); 
+ 	if (!symname) 
+ 		return printk("[<%016lx>]", address);
+@@ -208,7 +212,7 @@ void show_trace(unsigned long *stack)
+ 	do while (cond) { \
+ 		unsigned long addr = *stack++; \
+ 		if (kernel_text_address(addr)) { \
+-			if (i > 50) { \
++			if (i > 50 && decode_call_traces) { \
+ 				printk("\n       "); \
+ 				i = 0; \
+ 			} \
+@@ -290,7 +294,7 @@ void show_stack(struct task_struct *tsk,
+ 		if (((long) stack & (THREAD_SIZE-1)) == 0)
+ 			break;
+ 		}
+-		if (i && ((i % 4) == 0))
++		if (i && ((i % 4) == 0) && decode_call_traces)
+ 			printk("\n       ");
+ 		printk("%016lx ", *stack++);
+ 		touch_nmi_watchdog();
+@@ -319,10 +323,12 @@ void show_registers(struct pt_regs *regs
+ 
+ 		rsp = regs->rsp;
+ 
+-	printk("CPU %d ", cpu);
++	printk("CPU: %d ", cpu);
+ 	__show_regs(regs);
+-	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
+-		cur->comm, cur->pid, task_thread_info(cur), cur);
++	printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
++		cur->comm, cur->pid,
++		VEID(VE_TASK_INFO(current)->owner_env),
++		task_thread_info(cur), cur);
+ 
+ 	/*
+ 	 * When in-kernel, we also print out the stack and code at the
+@@ -434,6 +440,8 @@ void __kprobes __die(const char * str, s
+ 	printk(KERN_ALERT "RIP ");
+ 	printk_address(regs->rip); 
+ 	printk(" RSP <%016lx>\n", regs->rsp); 
++	if (kexec_should_crash(current))
++		crash_kexec(regs);
+ }
+ 
+ void die(const char * str, struct pt_regs * regs, long err)
+@@ -456,8 +464,11 @@ void __kprobes die_nmi(char *str, struct
+ 	 */
+ 	printk(str, safe_smp_processor_id());
+ 	show_registers(regs);
++	if (kexec_should_crash(current))
++		crash_kexec(regs);
+ 	if (panic_on_timeout || panic_on_oops)
+ 		panic("nmi watchdog");
++	smp_nmi_call_function(smp_show_regs, NULL, 1);
+ 	printk("console shuts up ...\n");
+ 	oops_end(flags);
+ 	do_exit(SIGSEGV);
+diff -upr linux-2.6.16.orig/arch/x86_64/mm/fault.c linux-2.6.16-026test015/arch/x86_64/mm/fault.c
+--- linux-2.6.16.orig/arch/x86_64/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/mm/fault.c	2006-07-04 14:41:38.000000000 +0400
+@@ -41,27 +41,6 @@
+ #define PF_RSVD	(1<<3)
+ #define PF_INSTR	(1<<4)
+ 
+-void bust_spinlocks(int yes)
+-{
+-	int loglevel_save = console_loglevel;
+-	if (yes) {
+-		oops_in_progress = 1;
+-	} else {
+-#ifdef CONFIG_VT
+-		unblank_screen();
+-#endif
+-		oops_in_progress = 0;
+-		/*
+-		 * OK, the message is on the console.  Now we call printk()
+-		 * without oops_in_progress set so that printk will give klogd
+-		 * a poke.  Hold onto your hats...
+-		 */
+-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
+-		printk(" ");
+-		console_loglevel = loglevel_save;
+-	}
+-}
+-
+ /* Sometimes the CPU reports invalid exceptions on prefetch.
+    Check that here and ignore.
+    Opcode checker based on code by Richard Brunner */
+@@ -293,7 +272,7 @@ static int vmalloc_fault(unsigned long a
+ }
+ 
+ int page_fault_trace = 0;
+-int exception_trace = 1;
++int exception_trace = 0;
+ 
+ /*
+  * This routine handles page faults.  It determines the address,
+@@ -322,7 +301,7 @@ asmlinkage void __kprobes do_page_fault(
+ 		local_irq_enable();
+ 
+ 	if (unlikely(page_fault_trace))
+-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
++		ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+ 		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
+ 
+ 	tsk = current;
+@@ -372,7 +351,6 @@ asmlinkage void __kprobes do_page_fault(
+ 	if (unlikely(in_atomic() || !mm))
+ 		goto bad_area_nosemaphore;
+ 
+- again:
+ 	/* When running in the kernel we expect faults to occur only to
+ 	 * addresses in user space.  All other faults represent errors in the
+ 	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
+@@ -476,7 +454,7 @@ bad_area_nosemaphore:
+ 			return;
+ 
+ 		if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
+-			printk(
++			ve_printk(VE_LOG, 
+ 		       "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
+ 					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
+ 					tsk->comm, tsk->pid, address, regs->rip,
+@@ -526,8 +504,10 @@ no_context:
+ 	else
+ 		printk(KERN_ALERT "Unable to handle kernel paging request");
+ 	printk(" at %016lx RIP: \n" KERN_ALERT,address);
+-	printk_address(regs->rip);
+-	printk("\n");
++	if (decode_call_traces) {
++		printk_address(regs->rip);
++		printk("\n");
++	}
+ 	dump_pagetable(address);
+ 	tsk->thread.cr2 = address;
+ 	tsk->thread.trap_no = 14;
+@@ -544,13 +524,14 @@ no_context:
+  */
+ out_of_memory:
+ 	up_read(&mm->mmap_sem);
+-	if (current->pid == 1) { 
+-		yield();
+-		goto again;
+-	}
+-	printk("VM: killing process %s\n", tsk->comm);
+-	if (error_code & 4)
+-		do_exit(SIGKILL);
++	if (error_code & 4) {
++		/* 
++		 * 0-order allocation always success if something really 
++		 * fatal not happen: beancounter overdraft or OOM.
++		 */
++		force_sig(SIGKILL, tsk);
++		return;
++	}
+ 	goto no_context;
+ 
+ do_sigbus:
+diff -upr linux-2.6.16.orig/arch/x86_64/mm/init.c linux-2.6.16-026test015/arch/x86_64/mm/init.c
+--- linux-2.6.16.orig/arch/x86_64/mm/init.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/arch/x86_64/mm/init.c	2006-07-04 14:41:38.000000000 +0400
+@@ -89,6 +89,7 @@ void show_mem(void)
+ 	printk(KERN_INFO "%lu pages shared\n",shared);
+ 	printk(KERN_INFO "%lu pages swap cached\n",cached);
+ }
++EXPORT_SYMBOL(show_mem);
+ 
+ /* References to section boundaries */
+ 
+diff -upr linux-2.6.16.orig/block/elevator.c linux-2.6.16-026test015/block/elevator.c
+--- linux-2.6.16.orig/block/elevator.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/block/elevator.c	2006-07-04 14:41:38.000000000 +0400
+@@ -314,6 +314,7 @@ void elv_insert(request_queue_t *q, stru
+ {
+ 	struct list_head *pos;
+ 	unsigned ordseq;
++	int unplug_it = 1;
+ 
+ 	rq->q = q;
+ 
+@@ -378,6 +379,11 @@ void elv_insert(request_queue_t *q, stru
+ 		}
+ 
+ 		list_add_tail(&rq->queuelist, pos);
++		/*
++		 * most requeues happen because of a busy condition, don't
++		 * force unplug of the queue for that case.
++		 */
++		unplug_it = 0;
+ 		break;
+ 
+ 	default:
+@@ -386,7 +392,7 @@ void elv_insert(request_queue_t *q, stru
+ 		BUG();
+ 	}
+ 
+-	if (blk_queue_plugged(q)) {
++	if (unplug_it && blk_queue_plugged(q)) {
+ 		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
+ 			- q->in_flight;
+ 
+@@ -676,7 +682,7 @@ void elv_unregister(struct elevator_type
+ 	 * Iterate every thread in the process to remove the io contexts.
+ 	 */
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		struct io_context *ioc = p->io_context;
+ 		if (ioc && ioc->cic) {
+ 			ioc->cic->exit(ioc->cic);
+@@ -688,7 +694,7 @@ void elv_unregister(struct elevator_type
+ 			ioc->aic->dtor(ioc->aic);
+ 			ioc->aic = NULL;
+ 		}
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 	read_unlock(&tasklist_lock);
+ 
+ 	spin_lock_irq(&elv_list_lock);
+diff -upr linux-2.6.16.orig/block/genhd.c linux-2.6.16-026test015/block/genhd.c
+--- linux-2.6.16.orig/block/genhd.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/block/genhd.c	2006-07-04 14:41:38.000000000 +0400
+@@ -16,9 +16,8 @@
+ #include <linux/kobj_map.h>
+ #include <linux/buffer_head.h>
+ 
+-#define MAX_PROBE_HASH 255	/* random */
+-
+-static struct subsystem block_subsys;
++struct subsystem block_subsys;
++EXPORT_SYMBOL(block_subsys);
+ 
+ static DECLARE_MUTEX(block_subsys_sem);
+ 
+@@ -30,108 +29,29 @@ static struct blk_major_name {
+ 	struct blk_major_name *next;
+ 	int major;
+ 	char name[16];
+-} *major_names[MAX_PROBE_HASH];
++} *major_names[BLKDEV_MAJOR_HASH_SIZE];
+ 
+ /* index in the above - for now: assume no multimajor ranges */
+ static inline int major_to_index(int major)
+ {
+-	return major % MAX_PROBE_HASH;
+-}
+-
+-struct blkdev_info {
+-        int index;
+-        struct blk_major_name *bd;
+-};
+-
+-/*
+- * iterate over a list of blkdev_info structures.  allows
+- * the major_names array to be iterated over from outside this file
+- * must be called with the block_subsys_sem held
+- */
+-void *get_next_blkdev(void *dev)
+-{
+-        struct blkdev_info *info;
+-
+-        if (dev == NULL) {
+-                info = kmalloc(sizeof(*info), GFP_KERNEL);
+-                if (!info)
+-                        goto out;
+-                info->index=0;
+-                info->bd = major_names[info->index];
+-                if (info->bd)
+-                        goto out;
+-        } else {
+-                info = dev;
+-        }
+-
+-        while (info->index < ARRAY_SIZE(major_names)) {
+-                if (info->bd)
+-                        info->bd = info->bd->next;
+-                if (info->bd)
+-                        goto out;
+-                /*
+-                 * No devices on this chain, move to the next
+-                 */
+-                info->index++;
+-                info->bd = (info->index < ARRAY_SIZE(major_names)) ?
+-			major_names[info->index] : NULL;
+-                if (info->bd)
+-                        goto out;
+-        }
+-
+-out:
+-        return info;
+-}
+-
+-void *acquire_blkdev_list(void)
+-{
+-        down(&block_subsys_sem);
+-        return get_next_blkdev(NULL);
+-}
+-
+-void release_blkdev_list(void *dev)
+-{
+-        up(&block_subsys_sem);
+-        kfree(dev);
++	return major % BLKDEV_MAJOR_HASH_SIZE;
+ }
+ 
++#ifdef CONFIG_PROC_FS
+ 
+-/*
+- * Count the number of records in the blkdev_list.
+- * must be called with the block_subsys_sem held
+- */
+-int count_blkdev_list(void)
++void blkdev_show(struct seq_file *f, off_t offset)
+ {
+-	struct blk_major_name *n;
+-	int i, count;
+-
+-	count = 0;
++	struct blk_major_name *dp;
+ 
+-	for (i = 0; i < ARRAY_SIZE(major_names); i++) {
+-		for (n = major_names[i]; n; n = n->next)
+-				count++;
++	if (offset < BLKDEV_MAJOR_HASH_SIZE) {
++		down(&block_subsys_sem);
++		for (dp = major_names[offset]; dp; dp = dp->next)
++			seq_printf(f, "%3d %s\n", dp->major, dp->name);
++		up(&block_subsys_sem);
+ 	}
+-
+-	return count;
+-}
+-
+-/*
+- * extract the major and name values from a blkdev_info struct
+- * passed in as a void to *dev.  Must be called with
+- * block_subsys_sem held
+- */
+-int get_blkdev_info(void *dev, int *major, char **name)
+-{
+-        struct blkdev_info *info = dev;
+-
+-        if (info->bd == NULL)
+-                return 1;
+-
+-        *major = info->bd->major;
+-        *name = info->bd->name;
+-        return 0;
+ }
+ 
++#endif /* CONFIG_PROC_FS */
+ 
+ int register_blkdev(unsigned int major, const char *name)
+ {
+@@ -592,7 +512,7 @@ static struct kset_uevent_ops block_ueve
+ };
+ 
+ /* declare block_subsys. */
+-static decl_subsys(block, &ktype_block, &block_uevent_ops);
++decl_subsys(block, &ktype_block, &block_uevent_ops);
+ 
+ 
+ /*
+diff -upr linux-2.6.16.orig/block/ll_rw_blk.c linux-2.6.16-026test015/block/ll_rw_blk.c
+--- linux-2.6.16.orig/block/ll_rw_blk.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/block/ll_rw_blk.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1719,8 +1719,21 @@ void blk_run_queue(struct request_queue 
+ 
+ 	spin_lock_irqsave(q->queue_lock, flags);
+ 	blk_remove_plug(q);
+-	if (!elv_queue_empty(q))
+-		q->request_fn(q);
++
++	/*
++	 * Only recurse once to avoid overrunning the stack, let the unplug
++	 * handling reinvoke the handler shortly if we already got there.
++	 */
++	if (!elv_queue_empty(q)) {
++		if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
++			q->request_fn(q);
++			clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
++		} else {
++			blk_plug_device(q);
++			kblockd_schedule_work(&q->unplug_work);
++		}
++	}
++
+ 	spin_unlock_irqrestore(q->queue_lock, flags);
+ }
+ EXPORT_SYMBOL(blk_run_queue);
+diff -upr linux-2.6.16.orig/drivers/acpi/processor_perflib.c linux-2.6.16-026test015/drivers/acpi/processor_perflib.c
+--- linux-2.6.16.orig/drivers/acpi/processor_perflib.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/acpi/processor_perflib.c	2006-07-04 14:41:36.000000000 +0400
+@@ -577,6 +577,8 @@ acpi_processor_register_performance(stru
+ 		return_VALUE(-EBUSY);
+ 	}
+ 
++	WARN_ON(!performance);
++
+ 	pr->performance = performance;
+ 
+ 	if (acpi_processor_get_performance_info(pr)) {
+@@ -609,7 +611,8 @@ acpi_processor_unregister_performance(st
+ 		return_VOID;
+ 	}
+ 
+-	kfree(pr->performance->states);
++	if (pr->performance)
++		kfree(pr->performance->states);
+ 	pr->performance = NULL;
+ 
+ 	acpi_cpufreq_remove_file(pr);
+diff -upr linux-2.6.16.orig/drivers/base/class.c linux-2.6.16-026test015/drivers/base/class.c
+--- linux-2.6.16.orig/drivers/base/class.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/base/class.c	2006-07-04 14:41:38.000000000 +0400
+@@ -72,8 +72,13 @@ static struct kobj_type ktype_class = {
+ };
+ 
+ /* Hotplug events for classes go to the class_obj subsys */
+-static decl_subsys(class, &ktype_class, NULL);
++decl_subsys(class, &ktype_class, NULL);
+ 
++#ifndef CONFIG_VE
++#define visible_class_subsys class_subsys
++#else
++#define visible_class_subsys (*get_exec_env()->class_subsys)
++#endif
+ 
+ int class_create_file(struct class * cls, const struct class_attribute * attr)
+ {
+@@ -148,7 +153,7 @@ int class_register(struct class * cls)
+ 	if (error)
+ 		return error;
+ 
+-	subsys_set_kset(cls, class_subsys);
++	subsys_set_kset(cls, visible_class_subsys);
+ 
+ 	error = subsystem_register(&cls->subsys);
+ 	if (!error) {
+@@ -420,8 +425,13 @@ static struct kset_uevent_ops class_ueve
+ 	.uevent =	class_uevent,
+ };
+ 
+-static decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
++decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
+ 
++#ifndef CONFIG_VE
++#define visible_class_obj_subsys class_obj_subsys
++#else
++#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys)
++#endif
+ 
+ static int class_device_add_attrs(struct class_device * cd)
+ {
+@@ -470,7 +480,7 @@ static ssize_t store_uevent(struct class
+ 
+ void class_device_initialize(struct class_device *class_dev)
+ {
+-	kobj_set_kset_s(class_dev, class_obj_subsys);
++	kobj_set_kset_s(class_dev, visible_class_obj_subsys);
+ 	kobject_init(&class_dev->kobj);
+ 	INIT_LIST_HEAD(&class_dev->node);
+ }
+@@ -805,12 +815,19 @@ void class_interface_unregister(struct c
+ 	class_put(parent);
+ }
+ 
+-
++void prepare_sysfs_classes(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->class_subsys = &class_subsys;
++	get_ve0()->class_obj_subsys = &class_obj_subsys;
++#endif
++}
+ 
+ int __init classes_init(void)
+ {
+ 	int retval;
+ 
++	prepare_sysfs_classes();
+ 	retval = subsystem_register(&class_subsys);
+ 	if (retval)
+ 		return retval;
+@@ -848,3 +865,6 @@ EXPORT_SYMBOL_GPL(class_device_remove_bi
+ 
+ EXPORT_SYMBOL_GPL(class_interface_register);
+ EXPORT_SYMBOL_GPL(class_interface_unregister);
++
++EXPORT_SYMBOL(class_subsys);
++EXPORT_SYMBOL(class_obj_subsys);
+diff -upr linux-2.6.16.orig/drivers/base/cpu.c linux-2.6.16-026test015/drivers/base/cpu.c
+--- linux-2.6.16.orig/drivers/base/cpu.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/base/cpu.c	2006-07-04 14:41:36.000000000 +0400
+@@ -141,7 +141,7 @@ int __devinit register_cpu(struct cpu *c
+ 	return error;
+ }
+ 
+-struct sys_device *get_cpu_sysdev(int cpu)
++struct sys_device *get_cpu_sysdev(unsigned cpu)
+ {
+ 	if (cpu < NR_CPUS)
+ 		return cpu_sys_devices[cpu];
+diff -upr linux-2.6.16.orig/drivers/base/firmware_class.c linux-2.6.16-026test015/drivers/base/firmware_class.c
+--- linux-2.6.16.orig/drivers/base/firmware_class.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/base/firmware_class.c	2006-07-04 14:41:36.000000000 +0400
+@@ -211,18 +211,20 @@ static int
+ fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
+ {
+ 	u8 *new_data;
++	int new_size = fw_priv->alloc_size;
+ 
+ 	if (min_size <= fw_priv->alloc_size)
+ 		return 0;
+ 
+-	new_data = vmalloc(fw_priv->alloc_size + PAGE_SIZE);
++	new_size = ALIGN(min_size, PAGE_SIZE);
++	new_data = vmalloc(new_size);
+ 	if (!new_data) {
+ 		printk(KERN_ERR "%s: unable to alloc buffer\n", __FUNCTION__);
+ 		/* Make sure that we don't keep incomplete data */
+ 		fw_load_abort(fw_priv);
+ 		return -ENOMEM;
+ 	}
+-	fw_priv->alloc_size += PAGE_SIZE;
++	fw_priv->alloc_size = new_size;
+ 	if (fw_priv->fw->data) {
+ 		memcpy(new_data, fw_priv->fw->data, fw_priv->fw->size);
+ 		vfree(fw_priv->fw->data);
+diff -upr linux-2.6.16.orig/drivers/base/node.c linux-2.6.16-026test015/drivers/base/node.c
+--- linux-2.6.16.orig/drivers/base/node.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/base/node.c	2006-07-04 14:41:36.000000000 +0400
+@@ -106,7 +106,7 @@ static ssize_t node_read_numastat(struct
+ 	other_node = 0;
+ 	for (i = 0; i < MAX_NR_ZONES; i++) {
+ 		struct zone *z = &pg->node_zones[i];
+-		for (cpu = 0; cpu < NR_CPUS; cpu++) {
++		for_each_online_cpu(cpu) {
+ 			struct per_cpu_pageset *ps = zone_pcp(z,cpu);
+ 			numa_hit += ps->numa_hit;
+ 			numa_miss += ps->numa_miss;
+diff -upr linux-2.6.16.orig/drivers/block/cciss.c linux-2.6.16-026test015/drivers/block/cciss.c
+--- linux-2.6.16.orig/drivers/block/cciss.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/block/cciss.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1181,6 +1181,53 @@ static int revalidate_allvol(ctlr_info_t
+         return 0;
+ }
+ 
++static inline void complete_buffers(struct bio *bio, int status)
++{
++	while (bio) {
++		struct bio *xbh = bio->bi_next;
++		int nr_sectors = bio_sectors(bio);
++
++		bio->bi_next = NULL;
++		blk_finished_io(len);
++		bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
++		bio = xbh;
++	}
++
++}
++
++static void cciss_softirq_done(struct request *rq)
++{
++	CommandList_struct *cmd = rq->completion_data;
++	ctlr_info_t *h = hba[cmd->ctlr];
++	unsigned long flags;
++	u64bit temp64;
++	int i, ddir;
++
++	if (cmd->Request.Type.Direction == XFER_READ)
++		ddir = PCI_DMA_FROMDEVICE;
++	else
++		ddir = PCI_DMA_TODEVICE;
++
++	/* command did not need to be retried */
++	/* unmap the DMA mapping for all the scatter gather elements */
++	for(i=0; i<cmd->Header.SGList; i++) {
++		temp64.val32.lower = cmd->SG[i].Addr.lower;
++		temp64.val32.upper = cmd->SG[i].Addr.upper;
++		pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
++	}
++
++	complete_buffers(rq->bio, rq->errors);
++
++#ifdef CCISS_DEBUG
++	printk("Done with %p\n", rq);
++#endif /* CCISS_DEBUG */
++
++	spin_lock_irqsave(&h->lock, flags);
++	end_that_request_last(rq, rq->errors);
++	cmd_free(h, cmd,1);
++	spin_unlock_irqrestore(&h->lock, flags);
++}
++
+ /* This function will check the usage_count of the drive to be updated/added.
+  * If the usage_count is zero then the drive information will be updated and
+  * the disk will be re-registered with the kernel.  If not then it will be
+@@ -1249,6 +1296,8 @@ static void cciss_update_drive_info(int 
+ 
+ 		blk_queue_max_sectors(disk->queue, 512);
+ 
++		blk_queue_softirq_done(disk->queue, cciss_softirq_done);
++
+ 		disk->queue->queuedata = hba[ctlr];
+ 
+ 		blk_queue_hardsect_size(disk->queue,
+@@ -2148,20 +2197,6 @@ static void start_io( ctlr_info_t *h)
+ 		addQ (&(h->cmpQ), c); 
+ 	}
+ }
+-
+-static inline void complete_buffers(struct bio *bio, int status)
+-{
+-	while (bio) {
+-		struct bio *xbh = bio->bi_next; 
+-		int nr_sectors = bio_sectors(bio);
+-
+-		bio->bi_next = NULL; 
+-		blk_finished_io(len);
+-		bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
+-		bio = xbh;
+-	}
+-
+-} 
+ /* Assumes that CCISS_LOCK(h->ctlr) is held. */
+ /* Zeros out the error record and then resends the command back */
+ /* to the controller */
+@@ -2179,39 +2214,6 @@ static inline void resend_cciss_cmd( ctl
+ 	start_io(h);
+ }
+ 
+-static void cciss_softirq_done(struct request *rq)
+-{
+-	CommandList_struct *cmd = rq->completion_data;
+-	ctlr_info_t *h = hba[cmd->ctlr];
+-	unsigned long flags;
+-	u64bit temp64;
+-	int i, ddir;
+-
+-	if (cmd->Request.Type.Direction == XFER_READ)
+-		ddir = PCI_DMA_FROMDEVICE;
+-	else
+-		ddir = PCI_DMA_TODEVICE;
+-
+-	/* command did not need to be retried */
+-	/* unmap the DMA mapping for all the scatter gather elements */
+-	for(i=0; i<cmd->Header.SGList; i++) {
+-		temp64.val32.lower = cmd->SG[i].Addr.lower;
+-		temp64.val32.upper = cmd->SG[i].Addr.upper;
+-		pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
+-	}
+-
+-	complete_buffers(rq->bio, rq->errors);
+-
+-#ifdef CCISS_DEBUG
+-	printk("Done with %p\n", rq);
+-#endif /* CCISS_DEBUG */ 
+-
+-	spin_lock_irqsave(&h->lock, flags);
+-	end_that_request_last(rq, rq->errors);
+-	cmd_free(h, cmd,1);
+-	spin_unlock_irqrestore(&h->lock, flags);
+-}
+-
+ /* checks the status of the job and calls complete buffers to mark all 
+  * buffers for the completed job. Note that this function does not need
+  * to hold the hba/queue lock.
+@@ -3269,8 +3271,8 @@ clean2:
+ 	unregister_blkdev(hba[i]->major, hba[i]->devname);
+ clean1:
+ 	release_io_mem(hba[i]);
+-	free_hba(i);
+ 	hba[i]->busy_initializing = 0;
++	free_hba(i);
+ 	return(-1);
+ }
+ 
+diff -upr linux-2.6.16.orig/drivers/block/ub.c linux-2.6.16-026test015/drivers/block/ub.c
+--- linux-2.6.16.orig/drivers/block/ub.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/block/ub.c	2006-07-04 14:41:36.000000000 +0400
+@@ -704,6 +704,9 @@ static void ub_cleanup(struct ub_dev *sc
+ 		kfree(lun);
+ 	}
+ 
++	usb_set_intfdata(sc->intf, NULL);
++	usb_put_intf(sc->intf);
++	usb_put_dev(sc->dev);
+ 	kfree(sc);
+ }
+ 
+@@ -2428,7 +2431,12 @@ static int ub_probe(struct usb_interface
+ 	// sc->ifnum = intf->cur_altsetting->desc.bInterfaceNumber;
+ 	usb_set_intfdata(intf, sc);
+ 	usb_get_dev(sc->dev);
+-	// usb_get_intf(sc->intf);	/* Do we need this? */
++	/*
++	 * Since we give the interface struct to the block level through
++	 * disk->driverfs_dev, we have to pin it. Otherwise, block_uevent
++	 * oopses on close after a disconnect (kernels 2.6.16 and up).
++	 */
++	usb_get_intf(sc->intf);
+ 
+ 	snprintf(sc->name, 12, DRV_NAME "(%d.%d)",
+ 	    sc->dev->bus->busnum, sc->dev->devnum);
+@@ -2509,7 +2517,7 @@ static int ub_probe(struct usb_interface
+ err_diag:
+ err_dev_desc:
+ 	usb_set_intfdata(intf, NULL);
+-	// usb_put_intf(sc->intf);
++	usb_put_intf(sc->intf);
+ 	usb_put_dev(sc->dev);
+ 	kfree(sc);
+ err_core:
+@@ -2688,12 +2696,6 @@ static void ub_disconnect(struct usb_int
+ 	 */
+ 
+ 	device_remove_file(&sc->intf->dev, &dev_attr_diag);
+-	usb_set_intfdata(intf, NULL);
+-	// usb_put_intf(sc->intf);
+-	sc->intf = NULL;
+-	usb_put_dev(sc->dev);
+-	sc->dev = NULL;
+-
+ 	ub_put(sc);
+ }
+ 
+diff -upr linux-2.6.16.orig/drivers/char/Kconfig linux-2.6.16-026test015/drivers/char/Kconfig
+--- linux-2.6.16.orig/drivers/char/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/Kconfig	2006-07-04 14:41:36.000000000 +0400
+@@ -187,6 +187,7 @@ config MOXA_SMARTIO
+ config ISI
+ 	tristate "Multi-Tech multiport card support (EXPERIMENTAL)"
+ 	depends on SERIAL_NONSTANDARD
++	select FW_LOADER
+ 	help
+ 	  This is a driver for the Multi-Tech cards which provide several
+ 	  serial ports.  The driver is experimental and can currently only be
+diff -upr linux-2.6.16.orig/drivers/char/agp/efficeon-agp.c linux-2.6.16-026test015/drivers/char/agp/efficeon-agp.c
+--- linux-2.6.16.orig/drivers/char/agp/efficeon-agp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/agp/efficeon-agp.c	2006-07-04 14:41:36.000000000 +0400
+@@ -64,6 +64,12 @@ static struct gatt_mask efficeon_generic
+ 	{.mask = 0x00000001, .type = 0}
+ };
+ 
++/* This function does the same thing as mask_memory() for this chipset... */
++static inline unsigned long efficeon_mask_memory(unsigned long addr)
++{
++	return addr | 0x00000001;
++}
++
+ static struct aper_size_info_lvl2 efficeon_generic_sizes[4] =
+ {
+ 	{256, 65536, 0},
+@@ -251,7 +257,7 @@ static int efficeon_insert_memory(struct
+ 	last_page = NULL;
+ 	for (i = 0; i < count; i++) {
+ 		int index = pg_start + i;
+-		unsigned long insert = mem->memory[i];
++		unsigned long insert = efficeon_mask_memory(mem->memory[i]);
+ 
+ 		page = (unsigned int *) efficeon_private.l1_table[index >> 10];
+ 
+diff -upr linux-2.6.16.orig/drivers/char/cs5535_gpio.c linux-2.6.16-026test015/drivers/char/cs5535_gpio.c
+--- linux-2.6.16.orig/drivers/char/cs5535_gpio.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/cs5535_gpio.c	2006-07-04 14:41:36.000000000 +0400
+@@ -241,9 +241,10 @@ static int __init cs5535_gpio_init(void)
+ static void __exit cs5535_gpio_cleanup(void)
+ {
+ 	dev_t dev_id = MKDEV(major, 0);
++
++	cdev_del(&cs5535_gpio_cdev);
+ 	unregister_chrdev_region(dev_id, CS5535_GPIO_COUNT);
+-	if (gpio_base != 0)
+-		release_region(gpio_base, CS5535_GPIO_SIZE);
++	release_region(gpio_base, CS5535_GPIO_SIZE);
+ }
+ 
+ module_init(cs5535_gpio_init);
+diff -upr linux-2.6.16.orig/drivers/char/ipmi/ipmi_bt_sm.c linux-2.6.16-026test015/drivers/char/ipmi/ipmi_bt_sm.c
+--- linux-2.6.16.orig/drivers/char/ipmi/ipmi_bt_sm.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/ipmi/ipmi_bt_sm.c	2006-07-04 14:41:36.000000000 +0400
+@@ -165,7 +165,7 @@ static int bt_start_transaction(struct s
+ {
+ 	unsigned int i;
+ 
+-	if ((size < 2) || (size > IPMI_MAX_MSG_LENGTH))
++	if ((size < 2) || (size > (IPMI_MAX_MSG_LENGTH - 2)))
+ 	       return -1;
+ 
+ 	if ((bt->state != BT_STATE_IDLE) && (bt->state != BT_STATE_HOSED))
+diff -upr linux-2.6.16.orig/drivers/char/pcmcia/cm4000_cs.c linux-2.6.16-026test015/drivers/char/pcmcia/cm4000_cs.c
+--- linux-2.6.16.orig/drivers/char/pcmcia/cm4000_cs.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/pcmcia/cm4000_cs.c	2006-07-04 14:41:36.000000000 +0400
+@@ -2010,10 +2010,6 @@ static int __init cmm_init(void)
+ 	if (!cmm_class)
+ 		return -1;
+ 
+-	rc = pcmcia_register_driver(&cm4000_driver);
+-	if (rc < 0)
+-		return rc;
+-
+ 	major = register_chrdev(0, DEVICE_NAME, &cm4000_fops);
+ 	if (major < 0) {
+ 		printk(KERN_WARNING MODULE_NAME
+@@ -2021,6 +2017,12 @@ static int __init cmm_init(void)
+ 		return -1;
+ 	}
+ 
++	rc = pcmcia_register_driver(&cm4000_driver);
++	if (rc < 0) {
++		unregister_chrdev(major, DEVICE_NAME);
++		return rc;
++	}
++
+ 	return 0;
+ }
+ 
+diff -upr linux-2.6.16.orig/drivers/char/pcmcia/cm4040_cs.c linux-2.6.16-026test015/drivers/char/pcmcia/cm4040_cs.c
+--- linux-2.6.16.orig/drivers/char/pcmcia/cm4040_cs.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/pcmcia/cm4040_cs.c	2006-07-04 14:41:36.000000000 +0400
+@@ -769,16 +769,19 @@ static int __init cm4040_init(void)
+ 	if (!cmx_class)
+ 		return -1;
+ 
+-	rc = pcmcia_register_driver(&reader_driver);
+-	if (rc < 0)
+-		return rc;
+-
+ 	major = register_chrdev(0, DEVICE_NAME, &reader_fops);
+ 	if (major < 0) {
+ 		printk(KERN_WARNING MODULE_NAME
+ 			": could not get major number\n");
+ 		return -1;
+ 	}
++
++	rc = pcmcia_register_driver(&reader_driver);
++	if (rc < 0) {
++		unregister_chrdev(major, DEVICE_NAME);
++		return rc;
++	}
++
+ 	return 0;
+ }
+ 
+diff -upr linux-2.6.16.orig/drivers/char/pty.c linux-2.6.16-026test015/drivers/char/pty.c
+--- linux-2.6.16.orig/drivers/char/pty.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/pty.c	2006-07-04 14:41:38.000000000 +0400
+@@ -32,16 +32,30 @@
+ #include <linux/bitops.h>
+ #include <linux/devpts_fs.h>
+ 
++#include <ub/ub_misc.h>
++
+ /* These are global because they are accessed in tty_io.c */
+ #ifdef CONFIG_UNIX98_PTYS
+ struct tty_driver *ptm_driver;
+-static struct tty_driver *pts_driver;
++struct tty_driver *pts_driver;
++EXPORT_SYMBOL(ptm_driver);
++EXPORT_SYMBOL(pts_driver);
++
++void prepare_pty(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->ptm_driver = ptm_driver;
++	/* don't clean ptm_driver and co. here, they are used in vecalls.c */
++#endif
++}
+ #endif
+ 
+ static void pty_close(struct tty_struct * tty, struct file * filp)
+ {
+ 	if (!tty)
+ 		return;
++
++	ub_pty_uncharge(tty);
+ 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
+ 		if (tty->count > 1)
+ 			printk("master pty_close: count = %d!!\n", tty->count);
+@@ -61,8 +75,12 @@ static void pty_close(struct tty_struct 
+ 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
+ 		set_bit(TTY_OTHER_CLOSED, &tty->flags);
+ #ifdef CONFIG_UNIX98_PTYS
+-		if (tty->driver == ptm_driver)
++		if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) {
++			struct ve_struct *old_env;
++			old_env = set_exec_env(VE_OWNER_TTY(tty));
+ 			devpts_pty_kill(tty->index);
++			(void)set_exec_env(old_env);
++		}
+ #endif
+ 		tty_vhangup(tty->link);
+ 	}
+@@ -212,6 +230,10 @@ static int pty_open(struct tty_struct *t
+ 	if (tty->link->count != 1)
+ 		goto out;
+ 
++	retval = -ENODEV;
++	if (ub_pty_charge(tty))
++		goto out;
++
+ 	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
+ 	set_bit(TTY_THROTTLED, &tty->flags);
+ 	set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+@@ -239,7 +261,9 @@ static struct tty_operations pty_ops = {
+ 
+ /* Traditional BSD devices */
+ #ifdef CONFIG_LEGACY_PTYS
+-static struct tty_driver *pty_driver, *pty_slave_driver;
++struct tty_driver *pty_driver, *pty_slave_driver;
++EXPORT_SYMBOL(pty_driver);
++EXPORT_SYMBOL(pty_slave_driver);
+ 
+ static int pty_bsd_ioctl(struct tty_struct *tty, struct file *file,
+ 			 unsigned int cmd, unsigned long arg)
+@@ -397,6 +421,7 @@ static void __init unix98_pty_init(void)
+ 		panic("Couldn't register Unix98 pts driver");
+ 
+ 	pty_table[1].data = &ptm_driver->refcount;
++	prepare_pty();
+ }
+ #else
+ static inline void unix98_pty_init(void) { }
+diff -upr linux-2.6.16.orig/drivers/char/snsc.c linux-2.6.16-026test015/drivers/char/snsc.c
+--- linux-2.6.16.orig/drivers/char/snsc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/snsc.c	2006-07-04 14:41:36.000000000 +0400
+@@ -391,7 +391,8 @@ scdrv_init(void)
+ 			format_module_id(devnamep, geo_module(geoid),
+ 					 MODULE_FORMAT_BRIEF);
+ 			devnamep = devname + strlen(devname);
+-			sprintf(devnamep, "#%d", geo_slab(geoid));
++			sprintf(devnamep, "^%d#%d", geo_slot(geoid),
++				geo_slab(geoid));
+ 
+ 			/* allocate sysctl device data */
+ 			scd = kmalloc(sizeof (struct sysctl_data_s),
+diff -upr linux-2.6.16.orig/drivers/char/snsc_event.c linux-2.6.16-026test015/drivers/char/snsc_event.c
+--- linux-2.6.16.orig/drivers/char/snsc_event.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/snsc_event.c	2006-07-04 14:41:38.000000000 +0400
+@@ -206,7 +206,7 @@ scdrv_dispatch_event(char *event, int le
+ 
+ 		/* first find init's task */
+ 		read_lock(&tasklist_lock);
+-		for_each_process(p) {
++		for_each_process_all(p) {
+ 			if (p->pid == 1)
+ 				break;
+ 		}
+diff -upr linux-2.6.16.orig/drivers/char/sonypi.c linux-2.6.16-026test015/drivers/char/sonypi.c
+--- linux-2.6.16.orig/drivers/char/sonypi.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/sonypi.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1341,6 +1341,9 @@ static int __devinit sonypi_probe(struct
+ 	else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ 					  PCI_DEVICE_ID_INTEL_ICH6_1, NULL)))
+ 		sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3;
++	else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
++					  PCI_DEVICE_ID_INTEL_ICH7_1, NULL)))
++		sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3;
+ 	else
+ 		sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE2;
+ 
+diff -upr linux-2.6.16.orig/drivers/char/sysrq.c linux-2.6.16-026test015/drivers/char/sysrq.c
+--- linux-2.6.16.orig/drivers/char/sysrq.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/sysrq.c	2006-07-04 14:41:39.000000000 +0400
+@@ -174,8 +174,13 @@ static struct sysrq_key_op sysrq_showloc
+ static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs,
+ 				  struct tty_struct *tty) 
+ {
++	bust_spinlocks(1);
+ 	if (pt_regs)
+ 		show_regs(pt_regs);
++	bust_spinlocks(0);
++#if defined(__i386__) || defined(__x86_64__)
++	smp_nmi_call_function(smp_show_regs, NULL, 0);
++#endif
+ }
+ static struct sysrq_key_op sysrq_showregs_op = {
+ 	.handler	= sysrq_handle_showregs,
+@@ -221,7 +226,7 @@ static void send_sig_all(int sig)
+ {
+ 	struct task_struct *p;
+ 
+-	for_each_process(p) {
++	for_each_process_all(p) {
+ 		if (p->mm && p->pid != 1)
+ 			/* Not swapper, init nor kernel thread */
+ 			force_sig(sig, p);
+@@ -272,6 +277,19 @@ static struct sysrq_key_op sysrq_kill_op
+ 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
+ };
+ 
++#ifdef CONFIG_SCHED_VCPU
++static void sysrq_handle_vschedstate(int key, struct pt_regs *pt_regs,
++				   struct tty_struct *tty) 
++{
++	show_vsched();
++}
++static struct sysrq_key_op sysrq_vschedstate_op = {
++	.handler	= sysrq_handle_vschedstate,
++	.help_msg	= "vsced_stAte",
++	.action_msg	= "Show Vsched",
++};
++#endif
++
+ /* END SIGNAL SYSRQ HANDLERS BLOCK */
+ 
+ static void sysrq_handle_unrt(int key, struct pt_regs *pt_regs,
+@@ -300,9 +318,13 @@ static struct sysrq_key_op *sysrq_key_ta
+ /* 7 */	&sysrq_loglevel_op,
+ /* 8 */	&sysrq_loglevel_op,
+ /* 9 */	&sysrq_loglevel_op,
++#ifdef CONFIG_SCHED_VCPU
++/* a */ &sysrq_vschedstate_op,
++#else
+ /* a */	NULL, /* Don't use for system provided sysrqs,
+ 		 it is handled specially on the sparc
+ 		 and will never arrive */
++#endif
+ /* b */	&sysrq_reboot_op,
+ #ifdef CONFIG_KEXEC
+ /* c */ &sysrq_crashdump_op,
+diff -upr linux-2.6.16.orig/drivers/char/tipar.c linux-2.6.16-026test015/drivers/char/tipar.c
+--- linux-2.6.16.orig/drivers/char/tipar.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/tipar.c	2006-07-04 14:41:36.000000000 +0400
+@@ -515,7 +515,7 @@ tipar_init_module(void)
+ 		err = PTR_ERR(tipar_class);
+ 		goto out_chrdev;
+ 	}
+-	if (parport_register_driver(&tipar_driver) || tp_count == 0) {
++	if (parport_register_driver(&tipar_driver)) {
+ 		printk(KERN_ERR "tipar: unable to register with parport\n");
+ 		err = -EIO;
+ 		goto out_class;
+diff -upr linux-2.6.16.orig/drivers/char/tlclk.c linux-2.6.16-026test015/drivers/char/tlclk.c
+--- linux-2.6.16.orig/drivers/char/tlclk.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/tlclk.c	2006-07-04 14:41:36.000000000 +0400
+@@ -327,7 +327,7 @@ static ssize_t store_received_ref_clk3a(
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(received_ref_clk3a, S_IWUGO, NULL,
++static DEVICE_ATTR(received_ref_clk3a, (S_IWUSR|S_IWGRP), NULL,
+ 		store_received_ref_clk3a);
+ 
+ 
+@@ -349,7 +349,7 @@ static ssize_t store_received_ref_clk3b(
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(received_ref_clk3b, S_IWUGO, NULL,
++static DEVICE_ATTR(received_ref_clk3b, (S_IWUSR|S_IWGRP), NULL,
+ 		store_received_ref_clk3b);
+ 
+ 
+@@ -371,7 +371,7 @@ static ssize_t store_enable_clk3b_output
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(enable_clk3b_output, S_IWUGO, NULL,
++static DEVICE_ATTR(enable_clk3b_output, (S_IWUSR|S_IWGRP), NULL,
+ 		store_enable_clk3b_output);
+ 
+ static ssize_t store_enable_clk3a_output(struct device *d,
+@@ -392,7 +392,7 @@ static ssize_t store_enable_clk3a_output
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(enable_clk3a_output, S_IWUGO, NULL,
++static DEVICE_ATTR(enable_clk3a_output, (S_IWUSR|S_IWGRP), NULL,
+ 		store_enable_clk3a_output);
+ 
+ static ssize_t store_enable_clkb1_output(struct device *d,
+@@ -413,7 +413,7 @@ static ssize_t store_enable_clkb1_output
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(enable_clkb1_output, S_IWUGO, NULL,
++static DEVICE_ATTR(enable_clkb1_output, (S_IWUSR|S_IWGRP), NULL,
+ 		store_enable_clkb1_output);
+ 
+ 
+@@ -435,7 +435,7 @@ static ssize_t store_enable_clka1_output
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(enable_clka1_output, S_IWUGO, NULL,
++static DEVICE_ATTR(enable_clka1_output, (S_IWUSR|S_IWGRP), NULL,
+ 		store_enable_clka1_output);
+ 
+ static ssize_t store_enable_clkb0_output(struct device *d,
+@@ -456,7 +456,7 @@ static ssize_t store_enable_clkb0_output
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(enable_clkb0_output, S_IWUGO, NULL,
++static DEVICE_ATTR(enable_clkb0_output, (S_IWUSR|S_IWGRP), NULL,
+ 		store_enable_clkb0_output);
+ 
+ static ssize_t store_enable_clka0_output(struct device *d,
+@@ -477,7 +477,7 @@ static ssize_t store_enable_clka0_output
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(enable_clka0_output, S_IWUGO, NULL,
++static DEVICE_ATTR(enable_clka0_output, (S_IWUSR|S_IWGRP), NULL,
+ 		store_enable_clka0_output);
+ 
+ static ssize_t store_select_amcb2_transmit_clock(struct device *d,
+@@ -519,7 +519,7 @@ static ssize_t store_select_amcb2_transm
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(select_amcb2_transmit_clock, S_IWUGO, NULL,
++static DEVICE_ATTR(select_amcb2_transmit_clock, (S_IWUSR|S_IWGRP), NULL,
+ 	store_select_amcb2_transmit_clock);
+ 
+ static ssize_t store_select_amcb1_transmit_clock(struct device *d,
+@@ -560,7 +560,7 @@ static ssize_t store_select_amcb1_transm
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(select_amcb1_transmit_clock, S_IWUGO, NULL,
++static DEVICE_ATTR(select_amcb1_transmit_clock, (S_IWUSR|S_IWGRP), NULL,
+ 		store_select_amcb1_transmit_clock);
+ 
+ static ssize_t store_select_redundant_clock(struct device *d,
+@@ -581,7 +581,7 @@ static ssize_t store_select_redundant_cl
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(select_redundant_clock, S_IWUGO, NULL,
++static DEVICE_ATTR(select_redundant_clock, (S_IWUSR|S_IWGRP), NULL,
+ 		store_select_redundant_clock);
+ 
+ static ssize_t store_select_ref_frequency(struct device *d,
+@@ -602,7 +602,7 @@ static ssize_t store_select_ref_frequenc
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(select_ref_frequency, S_IWUGO, NULL,
++static DEVICE_ATTR(select_ref_frequency, (S_IWUSR|S_IWGRP), NULL,
+ 		store_select_ref_frequency);
+ 
+ static ssize_t store_filter_select(struct device *d,
+@@ -623,7 +623,7 @@ static ssize_t store_filter_select(struc
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(filter_select, S_IWUGO, NULL, store_filter_select);
++static DEVICE_ATTR(filter_select, (S_IWUSR|S_IWGRP), NULL, store_filter_select);
+ 
+ static ssize_t store_hardware_switching_mode(struct device *d,
+ 		 struct device_attribute *attr, const char *buf, size_t count)
+@@ -643,7 +643,7 @@ static ssize_t store_hardware_switching_
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(hardware_switching_mode, S_IWUGO, NULL,
++static DEVICE_ATTR(hardware_switching_mode, (S_IWUSR|S_IWGRP), NULL,
+ 		store_hardware_switching_mode);
+ 
+ static ssize_t store_hardware_switching(struct device *d,
+@@ -664,7 +664,7 @@ static ssize_t store_hardware_switching(
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(hardware_switching, S_IWUGO, NULL,
++static DEVICE_ATTR(hardware_switching, (S_IWUSR|S_IWGRP), NULL,
+ 		store_hardware_switching);
+ 
+ static ssize_t store_refalign (struct device *d,
+@@ -684,7 +684,7 @@ static ssize_t store_refalign (struct de
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(refalign, S_IWUGO, NULL, store_refalign);
++static DEVICE_ATTR(refalign, (S_IWUSR|S_IWGRP), NULL, store_refalign);
+ 
+ static ssize_t store_mode_select (struct device *d,
+ 		 struct device_attribute *attr, const char *buf, size_t count)
+@@ -704,7 +704,7 @@ static ssize_t store_mode_select (struct
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(mode_select, S_IWUGO, NULL, store_mode_select);
++static DEVICE_ATTR(mode_select, (S_IWUSR|S_IWGRP), NULL, store_mode_select);
+ 
+ static ssize_t store_reset (struct device *d,
+ 		 struct device_attribute *attr, const char *buf, size_t count)
+@@ -724,7 +724,7 @@ static ssize_t store_reset (struct devic
+ 	return strnlen(buf, count);
+ }
+ 
+-static DEVICE_ATTR(reset, S_IWUGO, NULL, store_reset);
++static DEVICE_ATTR(reset, (S_IWUSR|S_IWGRP), NULL, store_reset);
+ 
+ static struct attribute *tlclk_sysfs_entries[] = {
+ 	&dev_attr_current_ref.attr,
+@@ -767,6 +767,7 @@ static int __init tlclk_init(void)
+ 		printk(KERN_ERR "tlclk: can't get major %d.\n", tlclk_major);
+ 		return ret;
+ 	}
++	tlclk_major = ret;
+ 	alarm_events = kzalloc( sizeof(struct tlclk_alarms), GFP_KERNEL);
+ 	if (!alarm_events)
+ 		goto out1;
+diff -upr linux-2.6.16.orig/drivers/char/tty_io.c linux-2.6.16-026test015/drivers/char/tty_io.c
+--- linux-2.6.16.orig/drivers/char/tty_io.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/char/tty_io.c	2006-07-04 14:41:38.000000000 +0400
+@@ -86,6 +86,7 @@
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
++#include <linux/ve_owner.h>
+ #include <linux/proc_fs.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+@@ -105,6 +106,7 @@
+ #include <linux/devfs_fs_kernel.h>
+ 
+ #include <linux/kmod.h>
++#include <ub/ub_mem.h>
+ 
+ #undef TTY_DEBUG_HANGUP
+ 
+@@ -122,11 +124,16 @@ struct termios tty_std_termios = {	/* fo
+ 
+ EXPORT_SYMBOL(tty_std_termios);
+ 
++/* this lock protects tty_drivers list, this pretty guys do no locking */
++rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED;
++EXPORT_SYMBOL(tty_driver_guard);
++
+ /* This list gets poked at by procfs and various bits of boot up code. This
+    could do with some rationalisation such as pulling the tty proc function
+    into this file */
+    
+ LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
++EXPORT_SYMBOL(tty_drivers);
+ 
+ /* Semaphore to protect creating and releasing a tty. This is shared with
+    vt.c for deeply disgusting hack reasons */
+@@ -136,6 +143,15 @@ DECLARE_MUTEX(tty_sem);
+ extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
+ extern int pty_limit;		/* Config limit on Unix98 ptys */
+ static DEFINE_IDR(allocated_ptys);
++#ifdef CONFIG_VE
++#define __ve_allocated_ptys(ve) (*((ve)->allocated_ptys))
++#define ve_allocated_ptys	__ve_allocated_ptys(get_exec_env())
++#define ve_ptm_driver		(get_exec_env()->ptm_driver)
++#else
++#define __ve_allocated_ptys(ve) allocated_ptys
++#define ve_allocated_ptys	allocated_ptys
++#define ve_ptm_driver		ptm_driver
++#endif
+ static DECLARE_MUTEX(allocated_ptys_lock);
+ static int ptmx_open(struct inode *, struct file *);
+ #endif
+@@ -156,11 +172,25 @@ static int tty_fasync(int fd, struct fil
+ static void release_mem(struct tty_struct *tty, int idx);
+ 
+ 
++DCL_VE_OWNER(TTYDRV, struct tty_driver, owner_env)
++DCL_VE_OWNER(TTY, struct tty_struct, owner_env)
++
++void prepare_tty(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->allocated_ptys = &allocated_ptys;
++	/*
++	 * in this case, tty_register_driver() setups
++	 * owner_env correctly right from the bootup
++	 */
++#endif
++}
++
+ static struct tty_struct *alloc_tty_struct(void)
+ {
+ 	struct tty_struct *tty;
+ 
+-	tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
++	tty = ub_kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+ 	if (tty)
+ 		memset(tty, 0, sizeof(struct tty_struct));
+ 	return tty;
+@@ -857,14 +887,37 @@ static struct tty_driver *get_tty_driver
+ {
+ 	struct tty_driver *p;
+ 
++	read_lock(&tty_driver_guard);
+ 	list_for_each_entry(p, &tty_drivers, tty_drivers) {
+ 		dev_t base = MKDEV(p->major, p->minor_start);
+ 		if (device < base || device >= base + p->num)
+ 			continue;
+ 		*index = device - base;
+-		return p;
++#ifdef CONFIG_VE
++		if (in_interrupt())
++			goto found;
++		if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR
++#ifdef CONFIG_UNIX98_PTYS
++		    && (p->major<UNIX98_PTY_MASTER_MAJOR ||
++		    	p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) &&
++		       (p->major<UNIX98_PTY_SLAVE_MAJOR ||
++		        p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1)
++#endif
++		) goto found;
++		if (ve_is_super(VE_OWNER_TTYDRV(p)) &&
++		    ve_is_super(get_exec_env()))
++			goto found;
++		if (!ve_accessible_strict(VE_OWNER_TTYDRV(p), get_exec_env()))
++			continue;
++#endif
++		goto found;
+ 	}
++	read_unlock(&tty_driver_guard);
+ 	return NULL;
++
++found:
++	read_unlock(&tty_driver_guard);
++	return p;
+ }
+ 
+ /*
+@@ -1092,7 +1145,7 @@ static void do_tty_hangup(void *data)
+ 	
+ 	read_lock(&tasklist_lock);
+ 	if (tty->session > 0) {
+-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
++		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ 			if (p->signal->tty == tty)
+ 				p->signal->tty = NULL;
+ 			if (!p->signal->leader)
+@@ -1101,7 +1154,7 @@ static void do_tty_hangup(void *data)
+ 			send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ 			if (tty->pgrp > 0)
+ 				p->signal->tty_old_pgrp = tty->pgrp;
+-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
++		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ 	}
+ 	read_unlock(&tasklist_lock);
+ 
+@@ -1218,9 +1271,9 @@ void disassociate_ctty(int on_exit)
+ 
+ 	/* Now clear signal->tty under the lock */
+ 	read_lock(&tasklist_lock);
+-	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
++	do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) {
+ 		p->signal->tty = NULL;
+-	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
++	} while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p);
+ 	read_unlock(&tasklist_lock);
+ 	up(&tty_sem);
+ 	unlock_kernel();
+@@ -1446,21 +1499,28 @@ static inline void tty_line_name(struct 
+  * really quite straightforward.  The semaphore locking can probably be
+  * relaxed for the (most common) case of reopening a tty.
+  */
+-static int init_dev(struct tty_driver *driver, int idx,
+-	struct tty_struct **ret_tty)
++static int init_dev(struct tty_driver *driver, int idx, 
++	struct tty_struct *i_tty, struct tty_struct **ret_tty)
+ {
+ 	struct tty_struct *tty, *o_tty;
+ 	struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
+ 	struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
++	struct ve_struct * owner;
+ 	int retval=0;
+ 
+-	/* check whether we're reopening an existing tty */
+-	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+-		tty = devpts_get_tty(idx);
+-		if (tty && driver->subtype == PTY_TYPE_MASTER)
+-			tty = tty->link;
+-	} else {
+-		tty = driver->ttys[idx];
++	owner = VE_OWNER_TTYDRV(driver);
++
++	if (i_tty)
++		tty = i_tty;
++	else {
++		/* check whether we're reopening an existing tty */
++		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
++			tty = devpts_get_tty(idx);
++			if (tty && driver->subtype == PTY_TYPE_MASTER)
++				tty = tty->link;
++		} else {
++			tty = driver->ttys[idx];
++		}
+ 	}
+ 	if (tty) goto fast_track;
+ 
+@@ -1488,6 +1548,7 @@ static int init_dev(struct tty_driver *d
+ 	tty->driver = driver;
+ 	tty->index = idx;
+ 	tty_line_name(driver, idx, tty->name);
++	SET_VE_OWNER_TTY(tty, owner);
+ 
+ 	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+ 		tp_loc = &tty->termios;
+@@ -1498,7 +1559,7 @@ static int init_dev(struct tty_driver *d
+ 	}
+ 
+ 	if (!*tp_loc) {
+-		tp = (struct termios *) kmalloc(sizeof(struct termios),
++		tp = (struct termios *) ub_kmalloc(sizeof(struct termios),
+ 						GFP_KERNEL);
+ 		if (!tp)
+ 			goto free_mem_out;
+@@ -1506,7 +1567,7 @@ static int init_dev(struct tty_driver *d
+ 	}
+ 
+ 	if (!*ltp_loc) {
+-		ltp = (struct termios *) kmalloc(sizeof(struct termios),
++		ltp = (struct termios *) ub_kmalloc(sizeof(struct termios),
+ 						 GFP_KERNEL);
+ 		if (!ltp)
+ 			goto free_mem_out;
+@@ -1521,6 +1582,7 @@ static int init_dev(struct tty_driver *d
+ 		o_tty->driver = driver->other;
+ 		o_tty->index = idx;
+ 		tty_line_name(driver->other, idx, o_tty->name);
++		SET_VE_OWNER_TTY(o_tty, owner);
+ 
+ 		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+ 			o_tp_loc = &o_tty->termios;
+@@ -1532,7 +1594,7 @@ static int init_dev(struct tty_driver *d
+ 
+ 		if (!*o_tp_loc) {
+ 			o_tp = (struct termios *)
+-				kmalloc(sizeof(struct termios), GFP_KERNEL);
++				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
+ 			if (!o_tp)
+ 				goto free_mem_out;
+ 			*o_tp = driver->other->init_termios;
+@@ -1540,7 +1602,7 @@ static int init_dev(struct tty_driver *d
+ 
+ 		if (!*o_ltp_loc) {
+ 			o_ltp = (struct termios *)
+-				kmalloc(sizeof(struct termios), GFP_KERNEL);
++				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
+ 			if (!o_ltp)
+ 				goto free_mem_out;
+ 			memset(o_ltp, 0, sizeof(struct termios));
+@@ -1558,6 +1620,10 @@ static int init_dev(struct tty_driver *d
+ 			*o_ltp_loc = o_ltp;
+ 		o_tty->termios = *o_tp_loc;
+ 		o_tty->termios_locked = *o_ltp_loc;
++#ifdef CONFIG_VE
++		if (driver->other->refcount == 0)
++			(void)get_ve(owner);
++#endif
+ 		driver->other->refcount++;
+ 		if (driver->subtype == PTY_TYPE_MASTER)
+ 			o_tty->count++;
+@@ -1582,6 +1648,10 @@ static int init_dev(struct tty_driver *d
+ 		*ltp_loc = ltp;
+ 	tty->termios = *tp_loc;
+ 	tty->termios_locked = *ltp_loc;
++#ifdef CONFIG_VE
++	if (driver->refcount == 0)
++		(void)get_ve(owner);
++#endif
+ 	driver->refcount++;
+ 	tty->count++;
+ 
+@@ -1692,6 +1762,10 @@ static void release_mem(struct tty_struc
+ 		}
+ 		o_tty->magic = 0;
+ 		o_tty->driver->refcount--;
++#ifdef CONFIG_VE
++		if (o_tty->driver->refcount == 0)
++			put_ve(VE_OWNER_TTY(o_tty));
++#endif
+ 		file_list_lock();
+ 		list_del_init(&o_tty->tty_files);
+ 		file_list_unlock();
+@@ -1714,6 +1788,10 @@ static void release_mem(struct tty_struc
+ 
+ 	tty->magic = 0;
+ 	tty->driver->refcount--;
++#ifdef CONFIG_VE
++	if (tty->driver->refcount == 0)
++		put_ve(VE_OWNER_TTY(tty));
++#endif
+ 	file_list_lock();
+ 	list_del_init(&tty->tty_files);
+ 	file_list_unlock();
+@@ -1737,7 +1815,10 @@ static void release_dev(struct file * fi
+ 	int	idx;
+ 	char	buf[64];
+ 	unsigned long flags;
+-	
++#ifdef CONFIG_UNIX98_PTYS
++	struct idr *idr_alloced;
++#endif
++
+ 	tty = (struct tty_struct *)filp->private_data;
+ 	if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev"))
+ 		return;
+@@ -1752,6 +1833,9 @@ static void release_dev(struct file * fi
+ 	devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
+ 	devpts_master = pty_master && devpts;
+ 	o_tty = tty->link;
++#ifdef CONFIG_UNIX98_PTYS
++	idr_alloced = &__ve_allocated_ptys(tty->owner_env);
++#endif
+ 
+ #ifdef TTY_PARANOIA_CHECK
+ 	if (idx < 0 || idx >= tty->driver->num) {
+@@ -1924,13 +2008,13 @@ static void release_dev(struct file * fi
+ 		struct task_struct *p;
+ 
+ 		read_lock(&tasklist_lock);
+-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
++		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ 			p->signal->tty = NULL;
+-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
++		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ 		if (o_tty)
+-			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
++			do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) {
+ 				p->signal->tty = NULL;
+-			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
++			} while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p);
+ 		read_unlock(&tasklist_lock);
+ 	}
+ 
+@@ -2005,7 +2089,7 @@ static void release_dev(struct file * fi
+ 	/* Make this pty number available for reallocation */
+ 	if (devpts) {
+ 		down(&allocated_ptys_lock);
+-		idr_remove(&allocated_ptys, idx);
++		idr_remove(idr_alloced, idx);
+ 		up(&allocated_ptys_lock);
+ 	}
+ #endif
+@@ -2026,7 +2110,7 @@ static void release_dev(struct file * fi
+  */
+ static int tty_open(struct inode * inode, struct file * filp)
+ {
+-	struct tty_struct *tty;
++	struct tty_struct *tty, *c_tty;
+ 	int noctty, retval;
+ 	struct tty_driver *driver;
+ 	int index;
+@@ -2039,6 +2123,7 @@ retry_open:
+ 	noctty = filp->f_flags & O_NOCTTY;
+ 	index  = -1;
+ 	retval = 0;
++	c_tty = NULL;
+ 	
+ 	down(&tty_sem);
+ 
+@@ -2049,6 +2134,7 @@ retry_open:
+ 		}
+ 		driver = current->signal->tty->driver;
+ 		index = current->signal->tty->index;
++		c_tty = current->signal->tty;
+ 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+ 		/* noctty = 1; */
+ 		goto got_driver;
+@@ -2056,6 +2142,12 @@ retry_open:
+ #ifdef CONFIG_VT
+ 	if (device == MKDEV(TTY_MAJOR,0)) {
+ 		extern struct tty_driver *console_driver;
++#ifdef CONFIG_VE
++		if (!ve_is_super(get_exec_env())) {
++			up(&tty_sem);
++			return -ENODEV;
++		}
++#endif
+ 		driver = console_driver;
+ 		index = fg_console;
+ 		noctty = 1;
+@@ -2063,6 +2155,12 @@ retry_open:
+ 	}
+ #endif
+ 	if (device == MKDEV(TTYAUX_MAJOR,1)) {
++#ifdef CONFIG_VE
++		if (!ve_is_super(get_exec_env())) {
++			up(&tty_sem);
++			return -ENODEV;
++		}
++#endif
+ 		driver = console_device(&index);
+ 		if (driver) {
+ 			/* Don't let /dev/console block */
+@@ -2080,7 +2178,7 @@ retry_open:
+ 		return -ENODEV;
+ 	}
+ got_driver:
+-	retval = init_dev(driver, index, &tty);
++	retval = init_dev(driver, index, c_tty, &tty);
+ 	up(&tty_sem);
+ 	if (retval)
+ 		return retval;
+@@ -2149,11 +2247,11 @@ static int ptmx_open(struct inode * inod
+ 
+ 	/* find a device that is not in use. */
+ 	down(&allocated_ptys_lock);
+-	if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
++	if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) {
+ 		up(&allocated_ptys_lock);
+ 		return -ENOMEM;
+ 	}
+-	idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
++	idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index);
+ 	if (idr_ret < 0) {
+ 		up(&allocated_ptys_lock);
+ 		if (idr_ret == -EAGAIN)
+@@ -2161,14 +2259,14 @@ static int ptmx_open(struct inode * inod
+ 		return -EIO;
+ 	}
+ 	if (index >= pty_limit) {
+-		idr_remove(&allocated_ptys, index);
++		idr_remove(&ve_allocated_ptys, index);
+ 		up(&allocated_ptys_lock);
+ 		return -EIO;
+ 	}
+ 	up(&allocated_ptys_lock);
+ 
+ 	down(&tty_sem);
+-	retval = init_dev(ptm_driver, index, &tty);
++	retval = init_dev(ve_ptm_driver, index, NULL,  &tty);
+ 	up(&tty_sem);
+ 	
+ 	if (retval)
+@@ -2183,14 +2281,14 @@ static int ptmx_open(struct inode * inod
+ 		goto out1;
+ 
+ 	check_tty_count(tty, "tty_open");
+-	retval = ptm_driver->open(tty, filp);
++	retval = ve_ptm_driver->open(tty, filp);
+ 	if (!retval)
+ 		return 0;
+ out1:
+ 	release_dev(filp);
+ out:
+ 	down(&allocated_ptys_lock);
+-	idr_remove(&allocated_ptys, index);
++	idr_remove(&ve_allocated_ptys, index);
+ 	up(&allocated_ptys_lock);
+ 	return retval;
+ }
+@@ -2303,6 +2401,8 @@ static int tioccons(struct file *file)
+ {
+ 	if (!capable(CAP_SYS_ADMIN))
+ 		return -EPERM;
++	if (!ve_is_super(get_exec_env()))
++		return -EACCES;
+ 	if (file->f_op->write == redirected_tty_write) {
+ 		struct file *f;
+ 		spin_lock(&redirect_lock);
+@@ -2363,9 +2463,9 @@ static int tiocsctty(struct tty_struct *
+ 			 */
+ 
+ 			read_lock(&tasklist_lock);
+-			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
++			do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ 				p->signal->tty = NULL;
+-			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
++			} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ 			read_unlock(&tasklist_lock);
+ 		} else
+ 			return -EPERM;
+@@ -2387,7 +2487,7 @@ static int tiocgpgrp(struct tty_struct *
+ 	 */
+ 	if (tty == real_tty && current->signal->tty != real_tty)
+ 		return -ENOTTY;
+-	return put_user(real_tty->pgrp, p);
++	return put_user(pid_type_to_vpid(PIDTYPE_PGID, real_tty->pgrp), p);
+ }
+ 
+ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+@@ -2407,6 +2507,9 @@ static int tiocspgrp(struct tty_struct *
+ 		return -EFAULT;
+ 	if (pgrp < 0)
+ 		return -EINVAL;
++	pgrp = vpid_to_pid(pgrp);
++	if (pgrp < 0)
++		return -EPERM;
+ 	if (session_of_pgrp(pgrp) != current->signal->session)
+ 		return -EPERM;
+ 	real_tty->pgrp = pgrp;
+@@ -2423,7 +2526,7 @@ static int tiocgsid(struct tty_struct *t
+ 		return -ENOTTY;
+ 	if (real_tty->session <= 0)
+ 		return -ENOTTY;
+-	return put_user(real_tty->session, p);
++	return put_user(pid_type_to_vpid(PIDTYPE_SID, real_tty->session), p);
+ }
+ 
+ static int tiocsetd(struct tty_struct *tty, int __user *p)
+@@ -2696,7 +2799,7 @@ static void __do_SAK(void *arg)
+ 		tty->driver->flush_buffer(tty);
+ 	
+ 	read_lock(&tasklist_lock);
+-	do_each_task_pid(session, PIDTYPE_SID, p) {
++	do_each_task_pid_all(session, PIDTYPE_SID, p) {
+ 		if (p->signal->tty == tty || session > 0) {
+ 			printk(KERN_NOTICE "SAK: killed process %d"
+ 			    " (%s): p->signal->session==tty->session\n",
+@@ -2706,7 +2809,11 @@ static void __do_SAK(void *arg)
+ 		}
+ 		task_lock(p);
+ 		if (p->files) {
+-			rcu_read_lock();
++			/*
++			 * We don't take a ref to the file, so we must
++			 * hold ->file_lock instead.
++			 */
++			spin_lock(&p->files->file_lock);
+ 			fdt = files_fdtable(p->files);
+ 			for (i=0; i < fdt->max_fds; i++) {
+ 				filp = fcheck_files(p->files, i);
+@@ -2721,10 +2828,10 @@ static void __do_SAK(void *arg)
+ 					break;
+ 				}
+ 			}
+-			rcu_read_unlock();
++			spin_unlock(&p->files->file_lock);
+ 		}
+ 		task_unlock(p);
+-	} while_each_task_pid(session, PIDTYPE_SID, p);
++	} while_each_task_pid_all(session, PIDTYPE_SID, p);
+ 	read_unlock(&tasklist_lock);
+ #endif
+ }
+@@ -3095,8 +3202,11 @@ int tty_register_driver(struct tty_drive
+ 
+ 	if (!driver->put_char)
+ 		driver->put_char = tty_default_put_char;
+-	
++
++	SET_VE_OWNER_TTYDRV(driver, get_exec_env());
++	write_lock_irq(&tty_driver_guard);
+ 	list_add(&driver->tty_drivers, &tty_drivers);
++	write_unlock_irq(&tty_driver_guard);
+ 	
+ 	if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
+ 		for(i = 0; i < driver->num; i++)
+@@ -3123,7 +3233,9 @@ int tty_unregister_driver(struct tty_dri
+ 	unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
+ 				driver->num);
+ 
++	write_lock_irq(&tty_driver_guard);
+ 	list_del(&driver->tty_drivers);
++	write_unlock_irq(&tty_driver_guard);
+ 
+ 	/*
+ 	 * Free the termios and termios_locked structures because
+@@ -3246,6 +3358,7 @@ static int __init tty_init(void)
+ 
+ 	vty_init();
+ #endif
++	prepare_tty();
+ 	return 0;
+ }
+ module_init(tty_init);
+diff -upr linux-2.6.16.orig/drivers/edac/Kconfig linux-2.6.16-026test015/drivers/edac/Kconfig
+--- linux-2.6.16.orig/drivers/edac/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/edac/Kconfig	2006-07-04 14:41:36.000000000 +0400
+@@ -71,7 +71,7 @@ config EDAC_E7XXX
+ 
+ config EDAC_E752X
+ 	tristate "Intel e752x (e7520, e7525, e7320)"
+-	depends on EDAC_MM_EDAC && PCI
++	depends on EDAC_MM_EDAC && PCI && HOTPLUG
+ 	help
+ 	  Support for error detection and correction on the Intel
+ 	  E7520, E7525, E7320 server chipsets.
+diff -upr linux-2.6.16.orig/drivers/i2c/busses/i2c-i801.c linux-2.6.16-026test015/drivers/i2c/busses/i2c-i801.c
+--- linux-2.6.16.orig/drivers/i2c/busses/i2c-i801.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/i2c/busses/i2c-i801.c	2006-07-04 14:41:36.000000000 +0400
+@@ -478,6 +478,11 @@ static s32 i801_access(struct i2c_adapte
+ 		ret = i801_transaction();
+ 	}
+ 
++	/* Some BIOSes don't like it when PEC is enabled at reboot or resume
++	   time, so we forcibly disable it after every transaction. */
++	if (hwpec)
++		outb_p(0, SMBAUXCTL);
++
+ 	if(block)
+ 		return ret;
+ 	if(ret)
+diff -upr linux-2.6.16.orig/drivers/i2c/busses/scx200_acb.c linux-2.6.16-026test015/drivers/i2c/busses/scx200_acb.c
+--- linux-2.6.16.orig/drivers/i2c/busses/scx200_acb.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/i2c/busses/scx200_acb.c	2006-07-04 14:41:36.000000000 +0400
+@@ -440,7 +440,6 @@ static int  __init scx200_acb_create(int
+ 	struct scx200_acb_iface *iface;
+ 	struct i2c_adapter *adapter;
+ 	int rc = 0;
+-	char description[64];
+ 
+ 	iface = kzalloc(sizeof(*iface), GFP_KERNEL);
+ 	if (!iface) {
+@@ -459,8 +458,7 @@ static int  __init scx200_acb_create(int
+ 
+ 	init_MUTEX(&iface->sem);
+ 
+-	snprintf(description, sizeof(description), "NatSemi SCx200 ACCESS.bus [%s]", adapter->name);
+-	if (request_region(base, 8, description) == 0) {
++	if (!request_region(base, 8, adapter->name)) {
+ 		dev_err(&adapter->dev, "can't allocate io 0x%x-0x%x\n",
+ 			base, base + 8-1);
+ 		rc = -EBUSY;
+diff -upr linux-2.6.16.orig/drivers/i2c/chips/m41t00.c linux-2.6.16-026test015/drivers/i2c/chips/m41t00.c
+--- linux-2.6.16.orig/drivers/i2c/chips/m41t00.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/i2c/chips/m41t00.c	2006-07-04 14:41:36.000000000 +0400
+@@ -129,13 +129,13 @@ m41t00_set_tlet(ulong arg)
+ 	if ((i2c_smbus_write_byte_data(save_client, 0, tm.tm_sec & 0x7f) < 0)
+ 		|| (i2c_smbus_write_byte_data(save_client, 1, tm.tm_min & 0x7f)
+ 			< 0)
+-		|| (i2c_smbus_write_byte_data(save_client, 2, tm.tm_hour & 0x7f)
++		|| (i2c_smbus_write_byte_data(save_client, 2, tm.tm_hour & 0x3f)
+ 			< 0)
+-		|| (i2c_smbus_write_byte_data(save_client, 4, tm.tm_mday & 0x7f)
++		|| (i2c_smbus_write_byte_data(save_client, 4, tm.tm_mday & 0x3f)
+ 			< 0)
+-		|| (i2c_smbus_write_byte_data(save_client, 5, tm.tm_mon & 0x7f)
++		|| (i2c_smbus_write_byte_data(save_client, 5, tm.tm_mon & 0x1f)
+ 			< 0)
+-		|| (i2c_smbus_write_byte_data(save_client, 6, tm.tm_year & 0x7f)
++		|| (i2c_smbus_write_byte_data(save_client, 6, tm.tm_year & 0xff)
+ 			< 0))
+ 
+ 		dev_warn(&save_client->dev,"m41t00: can't write to rtc chip\n");
+diff -upr linux-2.6.16.orig/drivers/ide/pci/alim15x3.c linux-2.6.16-026test015/drivers/ide/pci/alim15x3.c
+--- linux-2.6.16.orig/drivers/ide/pci/alim15x3.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/ide/pci/alim15x3.c	2006-07-04 14:41:36.000000000 +0400
+@@ -731,6 +731,8 @@ static unsigned int __devinit ata66_ali1
+ 	
+ 	if(m5229_revision <= 0x20)
+ 		tmpbyte = (tmpbyte & (~0x02)) | 0x01;
++	else if (m5229_revision == 0xc7)
++		tmpbyte |= 0x03;
+ 	else
+ 		tmpbyte |= 0x01;
+ 
+diff -upr linux-2.6.16.orig/drivers/ieee1394/ohci1394.c linux-2.6.16-026test015/drivers/ieee1394/ohci1394.c
+--- linux-2.6.16.orig/drivers/ieee1394/ohci1394.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/ieee1394/ohci1394.c	2006-07-04 14:41:36.000000000 +0400
+@@ -2525,7 +2525,7 @@ static irqreturn_t ohci_irq_handler(int 
+ 			if (phys_dma) {
+ 				reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0xffffffff);
+ 				reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0xffffffff);
+-				reg_write(ohci,OHCI1394_PhyUpperBound, 0xffff0000);
++				reg_write(ohci,OHCI1394_PhyUpperBound, 0x01000000);
+ 			} else {
+ 				reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0x00000000);
+ 				reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0x00000000);
+diff -upr linux-2.6.16.orig/drivers/ieee1394/sbp2.c linux-2.6.16-026test015/drivers/ieee1394/sbp2.c
+--- linux-2.6.16.orig/drivers/ieee1394/sbp2.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/ieee1394/sbp2.c	2006-07-04 14:41:36.000000000 +0400
+@@ -495,22 +495,17 @@ static struct sbp2_command_info *sbp2uti
+ /*
+  * This function finds the sbp2_command for a given outstanding SCpnt.
+  * Only looks at the inuse list.
++ * Must be called with scsi_id->sbp2_command_orb_lock held.
+  */
+-static struct sbp2_command_info *sbp2util_find_command_for_SCpnt(struct scsi_id_instance_data *scsi_id, void *SCpnt)
++static struct sbp2_command_info *sbp2util_find_command_for_SCpnt(
++		struct scsi_id_instance_data *scsi_id, void *SCpnt)
+ {
+ 	struct sbp2_command_info *command;
+-	unsigned long flags;
+ 
+-	spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
+-	if (!list_empty(&scsi_id->sbp2_command_orb_inuse)) {
+-		list_for_each_entry(command, &scsi_id->sbp2_command_orb_inuse, list) {
+-			if (command->Current_SCpnt == SCpnt) {
+-				spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
++	if (!list_empty(&scsi_id->sbp2_command_orb_inuse))
++		list_for_each_entry(command, &scsi_id->sbp2_command_orb_inuse, list)
++			if (command->Current_SCpnt == SCpnt)
+ 				return command;
+-			}
+-		}
+-	}
+-	spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
+ 	return NULL;
+ }
+ 
+@@ -579,17 +574,15 @@ static void sbp2util_free_command_dma(st
+ 
+ /*
+  * This function moves a command to the completed orb list.
++ * Must be called with scsi_id->sbp2_command_orb_lock held.
+  */
+-static void sbp2util_mark_command_completed(struct scsi_id_instance_data *scsi_id,
+-					    struct sbp2_command_info *command)
++static void sbp2util_mark_command_completed(
++		struct scsi_id_instance_data *scsi_id,
++		struct sbp2_command_info *command)
+ {
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
+ 	list_del(&command->list);
+ 	sbp2util_free_command_dma(command);
+ 	list_add_tail(&command->list, &scsi_id->sbp2_command_orb_completed);
+-	spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
+ }
+ 
+ /*
+@@ -761,12 +754,17 @@ static struct scsi_id_instance_data *sbp
+ 
+ 	/* Register the status FIFO address range. We could use the same FIFO
+ 	 * for targets at different nodes. However we need different FIFOs per
+-	 * target in order to support multi-unit devices. */
++	 * target in order to support multi-unit devices.
++	 * The FIFO is located out of the local host controller's physical range
++	 * but, if possible, within the posted write area. Status writes will
++	 * then be performed as unified transactions. This slightly reduces
++	 * bandwidth usage, and some Prolific based devices seem to require it.
++	 */
+ 	scsi_id->status_fifo_addr = hpsb_allocate_and_register_addrspace(
+ 			&sbp2_highlevel, ud->ne->host, &sbp2_ops,
+ 			sizeof(struct sbp2_status_block), sizeof(quadlet_t),
+-			~0ULL, ~0ULL);
+-	if (!scsi_id->status_fifo_addr) {
++			0x010000000000ULL, CSR1212_ALL_SPACE_END);
++	if (scsi_id->status_fifo_addr == ~0ULL) {
+ 		SBP2_ERR("failed to allocate status FIFO address range");
+ 		goto failed_alloc;
+ 	}
+@@ -2177,7 +2175,9 @@ static int sbp2_handle_status_write(stru
+ 		 * Matched status with command, now grab scsi command pointers and check status
+ 		 */
+ 		SCpnt = command->Current_SCpnt;
++		spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
+ 		sbp2util_mark_command_completed(scsi_id, command);
++		spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
+ 
+ 		if (SCpnt) {
+ 
+@@ -2491,9 +2491,20 @@ static int sbp2scsi_slave_alloc(struct s
+ 
+ static int sbp2scsi_slave_configure(struct scsi_device *sdev)
+ {
++	struct scsi_id_instance_data *scsi_id =
++		(struct scsi_id_instance_data *)sdev->host->hostdata[0];
++
+ 	blk_queue_dma_alignment(sdev->request_queue, (512 - 1));
+ 	sdev->use_10_for_rw = 1;
+ 	sdev->use_10_for_ms = 1;
++
++	if ((scsi_id->sbp2_firmware_revision & 0xffff00) == 0x0a2700 &&
++	    (scsi_id->ud->model_id == 0x000021 /* gen.4 iPod */ ||
++	     scsi_id->ud->model_id == 0x000023 /* iPod mini  */ ||
++	     scsi_id->ud->model_id == 0x00007e /* iPod Photo */ )) {
++		SBP2_INFO("enabling iPod workaround: decrement disk capacity");
++		sdev->fix_capacity = 1;
++	}
+ 	return 0;
+ }
+ 
+@@ -2513,6 +2524,7 @@ static int sbp2scsi_abort(struct scsi_cm
+ 		(struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0];
+ 	struct sbp2scsi_host_info *hi = scsi_id->hi;
+ 	struct sbp2_command_info *command;
++	unsigned long flags;
+ 
+ 	SBP2_ERR("aborting sbp2 command");
+ 	scsi_print_command(SCpnt);
+@@ -2523,6 +2535,7 @@ static int sbp2scsi_abort(struct scsi_cm
+ 		 * Right now, just return any matching command structures
+ 		 * to the free pool.
+ 		 */
++		spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
+ 		command = sbp2util_find_command_for_SCpnt(scsi_id, SCpnt);
+ 		if (command) {
+ 			SBP2_DEBUG("Found command to abort");
+@@ -2540,6 +2553,7 @@ static int sbp2scsi_abort(struct scsi_cm
+ 				command->Current_done(command->Current_SCpnt);
+ 			}
+ 		}
++		spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
+ 
+ 		/*
+ 		 * Initiate a fetch agent reset.
+diff -upr linux-2.6.16.orig/drivers/input/mouse/psmouse-base.c linux-2.6.16-026test015/drivers/input/mouse/psmouse-base.c
+--- linux-2.6.16.orig/drivers/input/mouse/psmouse-base.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/input/mouse/psmouse-base.c	2006-07-04 14:41:36.000000000 +0400
+@@ -300,8 +300,10 @@ static irqreturn_t psmouse_interrupt(str
+  * Check if this is a new device announcement (0xAA 0x00)
+  */
+ 	if (unlikely(psmouse->packet[0] == PSMOUSE_RET_BAT && psmouse->pktcnt <= 2)) {
+-		if (psmouse->pktcnt == 1)
++		if (psmouse->pktcnt == 1) {
++			psmouse->last = jiffies;
+ 			goto out;
++		}
+ 
+ 		if (psmouse->packet[1] == PSMOUSE_RET_ID) {
+ 			__psmouse_set_state(psmouse, PSMOUSE_IGNORE);
+diff -upr linux-2.6.16.orig/drivers/macintosh/therm_adt746x.c linux-2.6.16-026test015/drivers/macintosh/therm_adt746x.c
+--- linux-2.6.16.orig/drivers/macintosh/therm_adt746x.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/macintosh/therm_adt746x.c	2006-07-04 14:41:36.000000000 +0400
+@@ -627,8 +627,8 @@ thermostat_init(void)
+ 	if(therm_type == ADT7460)
+ 		device_create_file(&of_dev->dev, &dev_attr_sensor2_fan_speed);
+ 
+-#ifndef CONFIG_I2C_KEYWEST
+-	request_module("i2c-keywest");
++#ifndef CONFIG_I2C_POWERMAC
++	request_module("i2c-powermac");
+ #endif
+ 
+ 	return i2c_add_driver(&thermostat_driver);
+diff -upr linux-2.6.16.orig/drivers/md/dm-snap.c linux-2.6.16-026test015/drivers/md/dm-snap.c
+--- linux-2.6.16.orig/drivers/md/dm-snap.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/md/dm-snap.c	2006-07-04 14:41:36.000000000 +0400
+@@ -542,8 +542,12 @@ static void snapshot_dtr(struct dm_targe
+ {
+ 	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+ 
++	/* Prevent further origin writes from using this snapshot. */
++	/* After this returns there can be no new kcopyd jobs. */
+ 	unregister_snapshot(s);
+ 
++	kcopyd_client_destroy(s->kcopyd_client);
++
+ 	exit_exception_table(&s->pending, pending_cache);
+ 	exit_exception_table(&s->complete, exception_cache);
+ 
+@@ -552,7 +556,7 @@ static void snapshot_dtr(struct dm_targe
+ 
+ 	dm_put_device(ti, s->origin);
+ 	dm_put_device(ti, s->cow);
+-	kcopyd_client_destroy(s->kcopyd_client);
++
+ 	kfree(s);
+ }
+ 
+diff -upr linux-2.6.16.orig/drivers/md/dm.c linux-2.6.16-026test015/drivers/md/dm.c
+--- linux-2.6.16.orig/drivers/md/dm.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/md/dm.c	2006-07-04 14:41:36.000000000 +0400
+@@ -533,30 +533,35 @@ static void __clone_and_map(struct clone
+ 
+ 	} else {
+ 		/*
+-		 * Create two copy bios to deal with io that has
+-		 * been split across a target.
++		 * Handle a bvec that must be split between two or more targets.
+ 		 */
+ 		struct bio_vec *bv = bio->bi_io_vec + ci->idx;
++		sector_t remaining = to_sector(bv->bv_len);
++		unsigned int offset = 0;
+ 
+-		clone = split_bvec(bio, ci->sector, ci->idx,
+-				   bv->bv_offset, max);
+-		__map_bio(ti, clone, tio);
+-
+-		ci->sector += max;
+-		ci->sector_count -= max;
+-		ti = dm_table_find_target(ci->map, ci->sector);
+-
+-		len = to_sector(bv->bv_len) - max;
+-		clone = split_bvec(bio, ci->sector, ci->idx,
+-				   bv->bv_offset + to_bytes(max), len);
+-		tio = alloc_tio(ci->md);
+-		tio->io = ci->io;
+-		tio->ti = ti;
+-		memset(&tio->info, 0, sizeof(tio->info));
+-		__map_bio(ti, clone, tio);
++		do {
++			if (offset) {
++				ti = dm_table_find_target(ci->map, ci->sector);
++				max = max_io_len(ci->md, ci->sector, ti);
++
++				tio = alloc_tio(ci->md);
++				tio->io = ci->io;
++				tio->ti = ti;
++				memset(&tio->info, 0, sizeof(tio->info));
++			}
++
++			len = min(remaining, max);
++
++			clone = split_bvec(bio, ci->sector, ci->idx,
++					   bv->bv_offset + offset, len);
++
++			__map_bio(ti, clone, tio);
++
++			ci->sector += len;
++			ci->sector_count -= len;
++			offset += to_bytes(len);
++		} while (remaining -= len);
+ 
+-		ci->sector += len;
+-		ci->sector_count -= len;
+ 		ci->idx++;
+ 	}
+ }
+@@ -1093,6 +1098,7 @@ int dm_suspend(struct mapped_device *md,
+ {
+ 	struct dm_table *map = NULL;
+ 	DECLARE_WAITQUEUE(wait, current);
++	struct bio *def;
+ 	int r = -EINVAL;
+ 
+ 	down(&md->suspend_lock);
+@@ -1152,9 +1158,11 @@ int dm_suspend(struct mapped_device *md,
+ 	/* were we interrupted ? */
+ 	r = -EINTR;
+ 	if (atomic_read(&md->pending)) {
++		clear_bit(DMF_BLOCK_IO, &md->flags);
++		def = bio_list_get(&md->deferred);
++		__flush_deferred_io(md, def);
+ 		up_write(&md->io_lock);
+ 		unlock_fs(md);
+-		clear_bit(DMF_BLOCK_IO, &md->flags);
+ 		goto out;
+ 	}
+ 	up_write(&md->io_lock);
+diff -upr linux-2.6.16.orig/drivers/md/kcopyd.c linux-2.6.16-026test015/drivers/md/kcopyd.c
+--- linux-2.6.16.orig/drivers/md/kcopyd.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/md/kcopyd.c	2006-07-04 14:41:36.000000000 +0400
+@@ -44,6 +44,9 @@ struct kcopyd_client {
+ 	struct page_list *pages;
+ 	unsigned int nr_pages;
+ 	unsigned int nr_free_pages;
++
++	wait_queue_head_t destroyq;
++	atomic_t nr_jobs;
+ };
+ 
+ static struct page_list *alloc_pl(void)
+@@ -293,10 +296,15 @@ static int run_complete_job(struct kcopy
+ 	int read_err = job->read_err;
+ 	unsigned int write_err = job->write_err;
+ 	kcopyd_notify_fn fn = job->fn;
++	struct kcopyd_client *kc = job->kc;
+ 
+-	kcopyd_put_pages(job->kc, job->pages);
++	kcopyd_put_pages(kc, job->pages);
+ 	mempool_free(job, _job_pool);
+ 	fn(read_err, write_err, context);
++
++	if (atomic_dec_and_test(&kc->nr_jobs))
++		wake_up(&kc->destroyq);
++
+ 	return 0;
+ }
+ 
+@@ -431,6 +439,7 @@ static void do_work(void *ignored)
+  */
+ static void dispatch_job(struct kcopyd_job *job)
+ {
++	atomic_inc(&job->kc->nr_jobs);
+ 	push(&_pages_jobs, job);
+ 	wake();
+ }
+@@ -670,6 +679,9 @@ int kcopyd_client_create(unsigned int nr
+ 		return r;
+ 	}
+ 
++	init_waitqueue_head(&kc->destroyq);
++	atomic_set(&kc->nr_jobs, 0);
++
+ 	client_add(kc);
+ 	*result = kc;
+ 	return 0;
+@@ -677,6 +689,9 @@ int kcopyd_client_create(unsigned int nr
+ 
+ void kcopyd_client_destroy(struct kcopyd_client *kc)
+ {
++	/* Wait for completion of all jobs submitted by this client. */
++	wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
++
+ 	dm_io_put(kc->nr_pages);
+ 	client_free_pages(kc);
+ 	client_del(kc);
+diff -upr linux-2.6.16.orig/drivers/md/raid10.c linux-2.6.16-026test015/drivers/md/raid10.c
+--- linux-2.6.16.orig/drivers/md/raid10.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/md/raid10.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1436,9 +1436,9 @@ static void raid10d(mddev_t *mddev)
+ 						sl--;
+ 						d = r10_bio->devs[sl].devnum;
+ 						rdev = conf->mirrors[d].rdev;
+-						atomic_add(s, &rdev->corrected_errors);
+ 						if (rdev &&
+ 						    test_bit(In_sync, &rdev->flags)) {
++							atomic_add(s, &rdev->corrected_errors);
+ 							if (sync_page_io(rdev->bdev,
+ 									 r10_bio->devs[sl].addr +
+ 									 sect + rdev->data_offset,
+diff -upr linux-2.6.16.orig/drivers/media/dvb/dvb-usb/cxusb.c linux-2.6.16-026test015/drivers/media/dvb/dvb-usb/cxusb.c
+--- linux-2.6.16.orig/drivers/media/dvb/dvb-usb/cxusb.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/media/dvb/dvb-usb/cxusb.c	2006-07-04 14:41:36.000000000 +0400
+@@ -149,6 +149,15 @@ static int cxusb_power_ctrl(struct dvb_u
+ 		return cxusb_ctrl_msg(d, CMD_POWER_OFF, &b, 1, NULL, 0);
+ }
+ 
++static int cxusb_bluebird_power_ctrl(struct dvb_usb_device *d, int onoff)
++{
++	u8 b = 0;
++	if (onoff)
++		return cxusb_ctrl_msg(d, CMD_POWER_ON, &b, 1, NULL, 0);
++	else
++		return 0;
++}
++
+ static int cxusb_streaming_ctrl(struct dvb_usb_device *d, int onoff)
+ {
+ 	u8 buf[2] = { 0x03, 0x00 };
+@@ -505,7 +514,7 @@ static struct dvb_usb_properties cxusb_b
+ 	.size_of_priv     = sizeof(struct cxusb_state),
+ 
+ 	.streaming_ctrl   = cxusb_streaming_ctrl,
+-	.power_ctrl       = cxusb_power_ctrl,
++	.power_ctrl       = cxusb_bluebird_power_ctrl,
+ 	.frontend_attach  = cxusb_lgdt3303_frontend_attach,
+ 	.tuner_attach     = cxusb_lgh064f_tuner_attach,
+ 
+@@ -545,7 +554,7 @@ static struct dvb_usb_properties cxusb_b
+ 	.size_of_priv     = sizeof(struct cxusb_state),
+ 
+ 	.streaming_ctrl   = cxusb_streaming_ctrl,
+-	.power_ctrl       = cxusb_power_ctrl,
++	.power_ctrl       = cxusb_bluebird_power_ctrl,
+ 	.frontend_attach  = cxusb_dee1601_frontend_attach,
+ 	.tuner_attach     = cxusb_dee1601_tuner_attach,
+ 
+@@ -594,7 +603,7 @@ static struct dvb_usb_properties cxusb_b
+ 	.size_of_priv     = sizeof(struct cxusb_state),
+ 
+ 	.streaming_ctrl   = cxusb_streaming_ctrl,
+-	.power_ctrl       = cxusb_power_ctrl,
++	.power_ctrl       = cxusb_bluebird_power_ctrl,
+ 	.frontend_attach  = cxusb_mt352_frontend_attach,
+ 	.tuner_attach     = cxusb_lgz201_tuner_attach,
+ 
+@@ -634,7 +643,7 @@ static struct dvb_usb_properties cxusb_b
+ 	.size_of_priv     = sizeof(struct cxusb_state),
+ 
+ 	.streaming_ctrl   = cxusb_streaming_ctrl,
+-	.power_ctrl       = cxusb_power_ctrl,
++	.power_ctrl       = cxusb_bluebird_power_ctrl,
+ 	.frontend_attach  = cxusb_mt352_frontend_attach,
+ 	.tuner_attach     = cxusb_dtt7579_tuner_attach,
+ 
+diff -upr linux-2.6.16.orig/drivers/media/video/Kconfig linux-2.6.16-026test015/drivers/media/video/Kconfig
+--- linux-2.6.16.orig/drivers/media/video/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/media/video/Kconfig	2006-07-04 14:41:36.000000000 +0400
+@@ -349,6 +349,7 @@ config VIDEO_AUDIO_DECODER
+ config VIDEO_DECODER
+ 	tristate "Add support for additional video chipsets"
+ 	depends on VIDEO_DEV && I2C && EXPERIMENTAL
++	select FW_LOADER
+ 	---help---
+ 	  Say Y here to compile drivers for SAA7115, SAA7127 and CX25840
+ 	  video decoders.
+diff -upr linux-2.6.16.orig/drivers/media/video/saa7127.c linux-2.6.16-026test015/drivers/media/video/saa7127.c
+--- linux-2.6.16.orig/drivers/media/video/saa7127.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/media/video/saa7127.c	2006-07-04 14:41:36.000000000 +0400
+@@ -141,6 +141,7 @@ struct i2c_reg_value {
+ static const struct i2c_reg_value saa7129_init_config_extra[] = {
+ 	{ SAA7127_REG_OUTPUT_PORT_CONTROL, 		0x38 },
+ 	{ SAA7127_REG_VTRIG, 				0xfa },
++	{ 0, 0 }
+ };
+ 
+ static const struct i2c_reg_value saa7127_init_config_common[] = {
+diff -upr linux-2.6.16.orig/drivers/media/video/tuner-types.c linux-2.6.16-026test015/drivers/media/video/tuner-types.c
+--- linux-2.6.16.orig/drivers/media/video/tuner-types.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/media/video/tuner-types.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1087,8 +1087,8 @@ static struct tuner_params tuner_tnf_533
+ /* ------------ TUNER_SAMSUNG_TCPN_2121P30A - Samsung NTSC ------------ */
+ 
+ static struct tuner_range tuner_samsung_tcpn_2121p30a_ntsc_ranges[] = {
+-	{ 16 * 175.75 /*MHz*/, 0x01, },
+-	{ 16 * 410.25 /*MHz*/, 0x02, },
++	{ 16 * 130.00 /*MHz*/, 0x01, },
++	{ 16 * 364.50 /*MHz*/, 0x02, },
+ 	{ 16 * 999.99        , 0x08, },
+ };
+ 
+diff -upr linux-2.6.16.orig/drivers/message/i2o/exec-osm.c linux-2.6.16-026test015/drivers/message/i2o/exec-osm.c
+--- linux-2.6.16.orig/drivers/message/i2o/exec-osm.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/message/i2o/exec-osm.c	2006-07-04 14:41:36.000000000 +0400
+@@ -55,6 +55,7 @@ struct i2o_exec_wait {
+ 	u32 m;			/* message id */
+ 	struct i2o_message *msg;	/* pointer to the reply message */
+ 	struct list_head list;	/* node in global wait list */
++	spinlock_t lock;	/* lock before modifying */
+ };
+ 
+ /* Exec OSM class handling definition */
+@@ -80,6 +81,7 @@ static struct i2o_exec_wait *i2o_exec_wa
+ 		return NULL;
+ 
+ 	INIT_LIST_HEAD(&wait->list);
++	spin_lock_init(&wait->lock);
+ 
+ 	return wait;
+ };
+@@ -118,6 +120,7 @@ int i2o_msg_post_wait_mem(struct i2o_con
+ 	DECLARE_WAIT_QUEUE_HEAD(wq);
+ 	struct i2o_exec_wait *wait;
+ 	static u32 tcntxt = 0x80000000;
++	long flags;
+ 	int rc = 0;
+ 
+ 	wait = i2o_exec_wait_alloc();
+@@ -139,33 +142,28 @@ int i2o_msg_post_wait_mem(struct i2o_con
+ 	wait->tcntxt = tcntxt++;
+ 	msg->u.s.tcntxt = cpu_to_le32(wait->tcntxt);
+ 
++	wait->wq = &wq;
++	/*
++	 * we add elements to the head, because if a entry in the list will
++	 * never be removed, we have to iterate over it every time
++	 */
++	list_add(&wait->list, &i2o_exec_wait_list);
++
+ 	/*
+ 	 * Post the message to the controller. At some point later it will
+ 	 * return. If we time out before it returns then complete will be zero.
+ 	 */
+ 	i2o_msg_post(c, msg);
+ 
+-	if (!wait->complete) {
+-		wait->wq = &wq;
+-		/*
+-		 * we add elements add the head, because if a entry in the list
+-		 * will never be removed, we have to iterate over it every time
+-		 */
+-		list_add(&wait->list, &i2o_exec_wait_list);
+-
+-		wait_event_interruptible_timeout(wq, wait->complete,
+-						 timeout * HZ);
++	wait_event_interruptible_timeout(wq, wait->complete, timeout * HZ);
+ 
+-		wait->wq = NULL;
+-	}
++	spin_lock_irqsave(&wait->lock, flags);
+ 
+-	barrier();
++	wait->wq = NULL;
+ 
+-	if (wait->complete) {
++	if (wait->complete)
+ 		rc = le32_to_cpu(wait->msg->body[0]) >> 24;
+-		i2o_flush_reply(c, wait->m);
+-		i2o_exec_wait_free(wait);
+-	} else {
++	else {
+ 		/*
+ 		 * We cannot remove it now. This is important. When it does
+ 		 * terminate (which it must do if the controller has not
+@@ -179,6 +177,13 @@ int i2o_msg_post_wait_mem(struct i2o_con
+ 		rc = -ETIMEDOUT;
+ 	}
+ 
++	spin_unlock_irqrestore(&wait->lock, flags);
++
++	if (rc != -ETIMEDOUT) {
++		i2o_flush_reply(c, wait->m);
++		i2o_exec_wait_free(wait);
++	}
++
+ 	return rc;
+ };
+ 
+@@ -206,7 +211,6 @@ static int i2o_msg_post_wait_complete(st
+ {
+ 	struct i2o_exec_wait *wait, *tmp;
+ 	unsigned long flags;
+-	static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+ 	int rc = 1;
+ 
+ 	/*
+@@ -216,23 +220,24 @@ static int i2o_msg_post_wait_complete(st
+ 	 * already expired. Not much we can do about that except log it for
+ 	 * debug purposes, increase timeout, and recompile.
+ 	 */
+-	spin_lock_irqsave(&lock, flags);
+ 	list_for_each_entry_safe(wait, tmp, &i2o_exec_wait_list, list) {
+ 		if (wait->tcntxt == context) {
+-			list_del(&wait->list);
++			spin_lock_irqsave(&wait->lock, flags);
+ 
+-			spin_unlock_irqrestore(&lock, flags);
++			list_del(&wait->list);
+ 
+ 			wait->m = m;
+ 			wait->msg = msg;
+ 			wait->complete = 1;
+ 
+-			barrier();
+-
+-			if (wait->wq) {
+-				wake_up_interruptible(wait->wq);
++			if (wait->wq)
+ 				rc = 0;
+-			} else {
++			else
++				rc = -1;
++
++			spin_unlock_irqrestore(&wait->lock, flags);
++
++			if (rc) {
+ 				struct device *dev;
+ 
+ 				dev = &c->pdev->dev;
+@@ -241,15 +246,13 @@ static int i2o_msg_post_wait_complete(st
+ 					 c->name);
+ 				i2o_dma_free(dev, &wait->dma);
+ 				i2o_exec_wait_free(wait);
+-				rc = -1;
+-			}
++			} else
++				wake_up_interruptible(wait->wq);
+ 
+ 			return rc;
+ 		}
+ 	}
+ 
+-	spin_unlock_irqrestore(&lock, flags);
+-
+ 	osm_warn("%s: Bogus reply in POST WAIT (tr-context: %08x)!\n", c->name,
+ 		 context);
+ 
+@@ -315,14 +318,9 @@ static DEVICE_ATTR(product_id, S_IRUGO, 
+ static int i2o_exec_probe(struct device *dev)
+ {
+ 	struct i2o_device *i2o_dev = to_i2o_device(dev);
+-	struct i2o_controller *c = i2o_dev->iop;
+ 
+ 	i2o_event_register(i2o_dev, &i2o_exec_driver, 0, 0xffffffff);
+ 
+-	c->exec = i2o_dev;
+-
+-	i2o_exec_lct_notify(c, c->lct->change_ind + 1);
+-
+ 	device_create_file(dev, &dev_attr_vendor_id);
+ 	device_create_file(dev, &dev_attr_product_id);
+ 
+@@ -510,6 +508,8 @@ static int i2o_exec_lct_notify(struct i2
+ 	struct device *dev;
+ 	struct i2o_message *msg;
+ 
++	down(&c->lct_lock);
++
+ 	dev = &c->pdev->dev;
+ 
+ 	if (i2o_dma_realloc
+@@ -532,6 +532,8 @@ static int i2o_exec_lct_notify(struct i2
+ 
+ 	i2o_msg_post(c, msg);
+ 
++	up(&c->lct_lock);
++
+ 	return 0;
+ };
+ 
+diff -upr linux-2.6.16.orig/drivers/message/i2o/iop.c linux-2.6.16-026test015/drivers/message/i2o/iop.c
+--- linux-2.6.16.orig/drivers/message/i2o/iop.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/message/i2o/iop.c	2006-07-04 14:41:36.000000000 +0400
+@@ -804,8 +804,6 @@ void i2o_iop_remove(struct i2o_controlle
+ 
+ 	/* Ask the IOP to switch to RESET state */
+ 	i2o_iop_reset(c);
+-
+-	put_device(&c->device);
+ }
+ 
+ /**
+@@ -1059,7 +1057,7 @@ struct i2o_controller *i2o_iop_alloc(voi
+ 
+ 	snprintf(poolname, sizeof(poolname), "i2o_%s_msg_inpool", c->name);
+ 	if (i2o_pool_alloc
+-	    (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4,
++	    (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4 + sizeof(u32),
+ 	     I2O_MSG_INPOOL_MIN)) {
+ 		kfree(c);
+ 		return ERR_PTR(-ENOMEM);
+diff -upr linux-2.6.16.orig/drivers/mtd/nand/Kconfig linux-2.6.16-026test015/drivers/mtd/nand/Kconfig
+--- linux-2.6.16.orig/drivers/mtd/nand/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/mtd/nand/Kconfig	2006-07-04 14:41:36.000000000 +0400
+@@ -178,17 +178,16 @@ config MTD_NAND_DISKONCHIP_BBTWRITE
+ 	  Even if you leave this disabled, you can enable BBT writes at module
+ 	  load time (assuming you build diskonchip as a module) with the module
+ 	  parameter "inftl_bbt_write=1".
+-	  
+- config MTD_NAND_SHARPSL
+- 	bool "Support for NAND Flash on Sharp SL Series (C7xx + others)"
+- 	depends on MTD_NAND && ARCH_PXA
+- 
+- config MTD_NAND_NANDSIM
+- 	bool "Support for NAND Flash Simulator"
+- 	depends on MTD_NAND && MTD_PARTITIONS
+ 
++config MTD_NAND_SHARPSL
++	tristate "Support for NAND Flash on Sharp SL Series (C7xx + others)"
++	depends on MTD_NAND && ARCH_PXA
++
++config MTD_NAND_NANDSIM
++	tristate "Support for NAND Flash Simulator"
++	depends on MTD_NAND && MTD_PARTITIONS
+ 	help
+ 	  The simulator may simulate verious NAND flash chips for the
+ 	  MTD nand layer.
+- 
++
+ endmenu
+diff -upr linux-2.6.16.orig/drivers/net/Makefile linux-2.6.16-026test015/drivers/net/Makefile
+--- linux-2.6.16.orig/drivers/net/Makefile	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/Makefile	2006-07-04 14:41:39.000000000 +0400
+@@ -18,6 +18,12 @@ gianfar_driver-objs := gianfar.o \
+ 		gianfar_mii.o \
+ 		gianfar_sysfs.o
+ 
++obj-$(CONFIG_VE_NETDEV) += vznetdev.o
++vznetdev-objs := open_vznet.o venet_core.o
++
++obj-$(CONFIG_VE_ETHDEV) += vzethdev.o
++vzethdev-objs := veth.o
++
+ #
+ # link order important here
+ #
+diff -upr linux-2.6.16.orig/drivers/net/e1000/e1000_main.c linux-2.6.16-026test015/drivers/net/e1000/e1000_main.c
+--- linux-2.6.16.orig/drivers/net/e1000/e1000_main.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/e1000/e1000_main.c	2006-07-04 14:41:36.000000000 +0400
+@@ -3851,6 +3851,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
+ 			skb_shinfo(skb)->nr_frags++;
+ 			skb->len += length;
+ 			skb->data_len += length;
++			skb->truesize += length;
+ 		}
+ 
+ 		e1000_rx_checksum(adapter, staterr,
+diff -upr linux-2.6.16.orig/drivers/net/irda/irda-usb.c linux-2.6.16-026test015/drivers/net/irda/irda-usb.c
+--- linux-2.6.16.orig/drivers/net/irda/irda-usb.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/irda/irda-usb.c	2006-07-04 14:41:36.000000000 +0400
+@@ -740,7 +740,7 @@ static void irda_usb_receive(struct urb 
+ 	struct sk_buff *newskb;
+ 	struct sk_buff *dataskb;
+ 	struct urb *next_urb;
+-	int		docopy;
++	unsigned int len, docopy;
+ 
+ 	IRDA_DEBUG(2, "%s(), len=%d\n", __FUNCTION__, urb->actual_length);
+ 	
+@@ -851,10 +851,11 @@ static void irda_usb_receive(struct urb 
+ 	dataskb->dev = self->netdev;
+ 	dataskb->mac.raw  = dataskb->data;
+ 	dataskb->protocol = htons(ETH_P_IRDA);
++	len = dataskb->len;
+ 	netif_rx(dataskb);
+ 
+ 	/* Keep stats up to date */
+-	self->stats.rx_bytes += dataskb->len;
++	self->stats.rx_bytes += len;
+ 	self->stats.rx_packets++;
+ 	self->netdev->last_rx = jiffies;
+ 
+diff -upr linux-2.6.16.orig/drivers/net/loopback.c linux-2.6.16-026test015/drivers/net/loopback.c
+--- linux-2.6.16.orig/drivers/net/loopback.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/loopback.c	2006-07-04 14:41:39.000000000 +0400
+@@ -130,6 +130,11 @@ static int loopback_xmit(struct sk_buff 
+ {
+ 	struct net_device_stats *lb_stats;
+ 
++	if (unlikely(get_exec_env()->disable_net)) {
++		kfree_skb(skb);
++		return 0;
++	}
++
+ 	skb_orphan(skb);
+ 
+ 	skb->protocol = eth_type_trans(skb,dev);
+@@ -198,6 +203,34 @@ static struct ethtool_ops loopback_ethto
+ 	.set_tso		= ethtool_op_set_tso,
+ };
+ 
++static void loopback_destructor(struct net_device *dev)
++{
++	kfree(dev->priv);
++	dev->priv = NULL;
++}
++
++struct net_device templ_loopback_dev = {
++	.name	 		= "lo",
++	.mtu			= (16 * 1024) + 20 + 20 + 12,
++	.hard_start_xmit	= loopback_xmit,
++	.hard_header		= eth_header,
++	.hard_header_cache	= eth_header_cache,
++	.header_cache_update	= eth_header_cache_update,
++	.hard_header_len	= ETH_HLEN,	/* 14	*/
++	.addr_len		= ETH_ALEN,	/* 6	*/
++	.tx_queue_len		= 0,
++	.type			= ARPHRD_LOOPBACK,	/* 0x0001*/
++	.rebuild_header		= eth_rebuild_header,
++	.flags			= IFF_LOOPBACK,
++	.features 		= NETIF_F_SG|NETIF_F_FRAGLIST
++				  |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
++				  |NETIF_F_LLTX|NETIF_F_VIRTUAL,
++};
++
++#ifdef loopback_dev
++#undef loopback_dev
++#endif
++
+ struct net_device loopback_dev = {
+ 	.name	 		= "lo",
+ 	.mtu			= (16 * 1024) + 20 + 20 + 12,
+@@ -231,9 +264,13 @@ int __init loopback_init(void)
+ 		memset(stats, 0, sizeof(struct net_device_stats));
+ 		loopback_dev.priv = stats;
+ 		loopback_dev.get_stats = &get_stats;
++		loopback_dev.destructor = &loopback_destructor;
+ 	}
+-	
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	get_ve0()->_loopback_dev = &loopback_dev;
++#endif
+ 	return register_netdev(&loopback_dev);
+ };
+ 
+ EXPORT_SYMBOL(loopback_dev);
++EXPORT_SYMBOL(templ_loopback_dev);
+diff -upr linux-2.6.16.orig/drivers/net/open_vznet.c linux-2.6.16-026test015/drivers/net/open_vznet.c
+--- linux-2.6.16.orig/drivers/net/open_vznet.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/drivers/net/open_vznet.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,227 @@
++/*
++ *  open_vznet.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * Virtual Networking device used to change VE ownership on packets
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/seq_file.h>
++
++#include <linux/inet.h>
++#include <net/ip.h>
++#include <linux/skbuff.h>
++#include <linux/venet.h>
++
++void veip_stop(struct ve_struct *ve)
++{
++	struct list_head *p, *tmp;
++
++	write_lock_irq(&veip_hash_lock);
++	if (ve->veip == NULL)
++		goto unlock;
++	list_for_each_safe(p, tmp, &ve->veip->ip_lh) {
++		struct ip_entry_struct *ptr;
++		ptr = list_entry(p, struct ip_entry_struct, ve_list);
++		ptr->active_env = NULL;
++		list_del(&ptr->ve_list);
++		list_del(&ptr->ip_hash);
++		kfree(ptr);
++	}
++	veip_put(ve->veip);
++	ve->veip = NULL;
++unlock:
++	write_unlock_irq(&veip_hash_lock);
++}
++
++int veip_start(struct ve_struct *ve)
++{
++	int err;
++
++	err = 0;
++	write_lock_irq(&veip_hash_lock);
++	ve->veip = veip_findcreate(ve->veid);
++	if (ve->veip == NULL)
++		err = -ENOMEM;
++	write_unlock_irq(&veip_hash_lock);
++	return err;
++}
++
++int veip_entry_add(struct ve_struct *ve, struct sockaddr *addr)
++{
++	struct ip_entry_struct *entry, *found;
++	int err;
++
++	entry = kmalloc(sizeof(struct ip_entry_struct), GFP_KERNEL);
++	if (entry == NULL)
++		return -ENOMEM;
++
++	memset(entry, 0, sizeof(struct ip_entry_struct));
++	entry->family = addr->sa_family;
++	if (addr->sa_family == AF_INET) {
++		entry->key[3] = ((struct sockaddr_in*)addr)->sin_addr.s_addr;
++	} else if (addr->sa_family == AF_INET6) {
++		memcpy(entry->key, &((struct sockaddr_in6*)addr)->sin6_addr, 16);
++	} else {
++		kfree(entry);
++		return -EAFNOSUPPORT;
++	}
++
++	write_lock_irq(&veip_hash_lock);
++	err = -EADDRINUSE;
++	found = venet_entry_lookup(entry->key, entry->family);
++	if (found != NULL)
++		goto out_unlock;
++	else {
++		ip_entry_hash(entry, ve->veip);
++		found = entry;
++		entry = NULL;
++	}
++	err = 0;
++	found->active_env = ve;
++out_unlock:
++	write_unlock_irq(&veip_hash_lock);
++	if (entry != NULL)
++		kfree(entry);
++	return err;
++}
++
++int veip_entry_del(envid_t veid, struct sockaddr *addr)
++{
++	struct ip_entry_struct *found;
++	u32 key[4];
++	int err;
++
++	if (addr->sa_family == AF_INET) {
++		memset(key, 0, sizeof(key));
++		key[3] = ((struct sockaddr_in*)addr)->sin_addr.s_addr;
++	} else if (addr->sa_family == AF_INET6) {
++		memcpy(key, &((struct sockaddr_in6*)addr)->sin6_addr, 16);
++	} else {
++		return -EAFNOSUPPORT;
++	}
++
++	err = -EADDRNOTAVAIL;
++	write_lock_irq(&veip_hash_lock);
++	found = venet_entry_lookup(key, addr->sa_family);
++	if (found == NULL)
++		goto out;
++	if (found->active_env->veid != veid)
++		goto out;
++
++	err = 0;
++	found->active_env = NULL;
++
++	list_del(&found->ip_hash);
++	list_del(&found->ve_list);
++	kfree(found);
++out:
++	write_unlock_irq(&veip_hash_lock);
++	return err;
++}
++
++static struct ve_struct *venet_find_ve(struct sk_buff *skb, int dir)
++{
++	struct ip_entry_struct *entry;
++
++	if (skb->protocol == __constant_htons(ETH_P_IP)) {
++		entry = ip_entry_lookup(dir ? skb->nh.iph->daddr :
++					skb->nh.iph->saddr);
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	} else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
++		entry = venet_entry_lookup(dir ? skb->nh.ipv6h->daddr.s6_addr32 :
++					   skb->nh.ipv6h->saddr.s6_addr32, AF_INET6);
++#endif
++	} else {
++		return NULL;
++	}
++	if (entry == NULL)
++		return NULL;
++
++	return entry->active_env;
++}
++
++int venet_change_skb_owner(struct sk_buff *skb)
++{
++	struct ve_struct *ve, *ve_old;
++
++	ve_old = skb->owner_env;
++
++	read_lock(&veip_hash_lock);
++	if (!ve_is_super(ve_old)) {
++		/* from VE to host */
++		ve = venet_find_ve(skb, 0);
++		if (ve == NULL)
++			goto out_drop;
++		if (!ve_accessible_strict(ve, ve_old))
++			goto out_source;
++		skb->owner_env = get_ve0();
++	} else {
++		/* from host to VE */
++		ve = venet_find_ve(skb, 1);
++		if (ve == NULL)
++			goto out_drop;
++		skb->owner_env = ve;
++	}
++	read_unlock(&veip_hash_lock);
++
++	return 0;
++
++out_drop:
++	read_unlock(&veip_hash_lock);
++	return -ESRCH;
++
++out_source:
++	read_unlock(&veip_hash_lock);
++	if (net_ratelimit() && skb->protocol == __constant_htons(ETH_P_IP)) {
++		printk(KERN_WARNING "Dropped packet, source wrong "
++		       "veid=%u src-IP=%u.%u.%u.%u "
++		       "dst-IP=%u.%u.%u.%u\n",
++		       skb->owner_env->veid,
++		       NIPQUAD(skb->nh.iph->saddr),
++		       NIPQUAD(skb->nh.iph->daddr));
++	}
++	return -EACCES;
++}
++
++#ifdef CONFIG_PROC_FS
++int veip_seq_show(struct seq_file *m, void *v)
++{
++	struct list_head *p;
++	struct ip_entry_struct *entry;
++	char s[40];
++
++	p = (struct list_head *)v;
++	if (p == ip_entry_hash_table) {
++		seq_puts(m, "Version: 2.5\n");
++		return 0;
++	}
++	entry = list_entry(p, struct ip_entry_struct, ip_hash);
++	if (entry->family == AF_INET)
++		sprintf(s, "%u.%u.%u.%u", NIPQUAD(entry->key[3]));
++	else
++		sprintf(s, "%x:%x:%x:%x:%x:%x:%x:%x",
++			ntohl(entry->key[0])>>16,
++			ntohl(entry->key[0])&0xFFFF,
++			ntohl(entry->key[1])>>16,
++			ntohl(entry->key[1])&0xFFFF,
++			ntohl(entry->key[2])>>16,
++			ntohl(entry->key[2])&0xFFFF,
++			ntohl(entry->key[3])>>16,
++			ntohl(entry->key[3])&0xFFFF);
++	seq_printf(m, "%39s %10u\n", s, 0);
++	return 0;
++}
++#endif
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Virtual Network Device");
++MODULE_LICENSE("GPL v2");
+diff -upr linux-2.6.16.orig/drivers/net/sky2.c linux-2.6.16-026test015/drivers/net/sky2.c
+--- linux-2.6.16.orig/drivers/net/sky2.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/sky2.c	2006-07-04 14:41:36.000000000 +0400
+@@ -579,8 +579,8 @@ static void sky2_mac_init(struct sky2_hw
+ 	reg = gma_read16(hw, port, GM_PHY_ADDR);
+ 	gma_write16(hw, port, GM_PHY_ADDR, reg | GM_PAR_MIB_CLR);
+ 
+-	for (i = 0; i < GM_MIB_CNT_SIZE; i++)
+-		gma_read16(hw, port, GM_MIB_CNT_BASE + 8 * i);
++	for (i = GM_MIB_CNT_BASE; i <= GM_MIB_CNT_END; i += 4)
++		gma_read16(hw, port, i);
+ 	gma_write16(hw, port, GM_PHY_ADDR, reg);
+ 
+ 	/* transmit control */
+diff -upr linux-2.6.16.orig/drivers/net/sky2.h linux-2.6.16-026test015/drivers/net/sky2.h
+--- linux-2.6.16.orig/drivers/net/sky2.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/sky2.h	2006-07-04 14:41:36.000000000 +0400
+@@ -1380,6 +1380,7 @@ enum {
+ /* MIB Counters */
+ #define GM_MIB_CNT_BASE	0x0100		/* Base Address of MIB Counters */
+ #define GM_MIB_CNT_SIZE	44		/* Number of MIB Counters */
++#define GM_MIB_CNT_END	0x025C		/* Last MIB counter */
+ 
+ /*
+  * MIB Counters base address definitions (low word) -
+diff -upr linux-2.6.16.orig/drivers/net/tg3.c linux-2.6.16-026test015/drivers/net/tg3.c
+--- linux-2.6.16.orig/drivers/net/tg3.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/tg3.c	2006-07-04 14:41:36.000000000 +0400
+@@ -7368,21 +7368,23 @@ static int tg3_get_settings(struct net_d
+ 		cmd->supported |= (SUPPORTED_1000baseT_Half |
+ 				   SUPPORTED_1000baseT_Full);
+ 
+-	if (!(tp->tg3_flags2 & TG3_FLG2_ANY_SERDES))
++	if (!(tp->tg3_flags2 & TG3_FLG2_ANY_SERDES)) {
+ 		cmd->supported |= (SUPPORTED_100baseT_Half |
+ 				  SUPPORTED_100baseT_Full |
+ 				  SUPPORTED_10baseT_Half |
+ 				  SUPPORTED_10baseT_Full |
+ 				  SUPPORTED_MII);
+-	else
++		cmd->port = PORT_TP;
++	} else {
+ 		cmd->supported |= SUPPORTED_FIBRE;
++		cmd->port = PORT_FIBRE;
++	}
+   
+ 	cmd->advertising = tp->link_config.advertising;
+ 	if (netif_running(dev)) {
+ 		cmd->speed = tp->link_config.active_speed;
+ 		cmd->duplex = tp->link_config.active_duplex;
+ 	}
+-	cmd->port = 0;
+ 	cmd->phy_address = PHY_ADDR;
+ 	cmd->transceiver = 0;
+ 	cmd->autoneg = tp->link_config.autoneg;
+diff -upr linux-2.6.16.orig/drivers/net/tun.c linux-2.6.16-026test015/drivers/net/tun.c
+--- linux-2.6.16.orig/drivers/net/tun.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/tun.c	2006-07-04 14:41:38.000000000 +0400
+@@ -62,6 +62,7 @@
+ 
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <ub/beancounter.h>
+ 
+ #ifdef TUN_DEBUG
+ static int debug;
+@@ -90,6 +91,7 @@ static int tun_net_close(struct net_devi
+ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ 	struct tun_struct *tun = netdev_priv(dev);
++	struct user_beancounter *ub;
+ 
+ 	DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
+ 
+@@ -114,6 +116,18 @@ static int tun_net_xmit(struct sk_buff *
+ 		}
+ 	}
+ 
++	ub = netdev_bc(dev)->exec_ub;
++	if (ub && (skb_bc(skb)->charged == 0)) {
++		unsigned long charge;
++		charge = skb_charge_fullsize(skb);
++		if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1))
++			goto drop;
++		get_beancounter(ub);
++		skb_bc(skb)->ub = ub;
++		skb_bc(skb)->charged = charge;
++		skb_bc(skb)->resource = UB_OTHERSOCKBUF;
++	}
++
+ 	/* Queue packet */
+ 	skb_queue_tail(&tun->readq, skb);
+ 	dev->trans_start = jiffies;
+@@ -410,12 +424,14 @@ static ssize_t tun_chr_readv(struct file
+ 					tun->dev->name, addr[0], addr[1], addr[2],
+ 					addr[3], addr[4], addr[5]);
+ 			ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
++			/* skb will be uncharged in kfree_skb() */
+ 			kfree_skb(skb);
+ 			break;
+ 		} else {
+ 			DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x\n",
+ 					tun->dev->name, addr[0], addr[1], addr[2],
+ 					addr[3], addr[4], addr[5]);
++			/* skb will be uncharged in kfree_skb() */
+ 			kfree_skb(skb);
+ 			continue;
+ 		}
+@@ -451,6 +467,7 @@ static void tun_setup(struct net_device 
+ 	dev->get_stats = tun_net_stats;
+ 	dev->ethtool_ops = &tun_ethtool_ops;
+ 	dev->destructor = free_netdev;
++	dev->features |= NETIF_F_VIRTUAL;
+ }
+ 
+ static struct tun_struct *tun_get_by_name(const char *name)
+@@ -459,8 +476,9 @@ static struct tun_struct *tun_get_by_nam
+ 
+ 	ASSERT_RTNL();
+ 	list_for_each_entry(tun, &tun_dev_list, list) {
+-		if (!strncmp(tun->dev->name, name, IFNAMSIZ))
+-		    return tun;
++		if (ve_accessible_strict(tun->dev->owner_env, get_exec_env()) &&
++		    !strncmp(tun->dev->name, name, IFNAMSIZ))
++			return tun;
+ 	}
+ 
+ 	return NULL;
+@@ -479,7 +497,8 @@ static int tun_set_iff(struct file *file
+ 
+ 		/* Check permissions */
+ 		if (tun->owner != -1 &&
+-		    current->euid != tun->owner && !capable(CAP_NET_ADMIN))
++		    current->euid != tun->owner && 
++		    !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
+ 			return -EPERM;
+ 	} 
+ 	else if (__dev_get_by_name(ifr->ifr_name)) 
+diff -upr linux-2.6.16.orig/drivers/net/venet_core.c linux-2.6.16-026test015/drivers/net/venet_core.c
+--- linux-2.6.16.orig/drivers/net/venet_core.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/drivers/net/venet_core.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,675 @@
++/*
++ *  venet_core.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * Common part for Virtuozzo virtual network devices
++ */
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <linux/fs.h>
++#include <linux/types.h>
++#include <linux/string.h>
++#include <linux/socket.h>
++#include <linux/errno.h>
++#include <linux/fcntl.h>
++#include <linux/in.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/tcp.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <net/addrconf.h>
++
++#include <asm/system.h>
++#include <asm/uaccess.h>
++#include <asm/io.h>
++#include <asm/unistd.h>
++
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <net/ip.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <linux/if_ether.h>	/* For the statistics structure. */
++#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
++#include <linux/venet.h>
++#include <linux/ve_proto.h>
++#include <linux/vzctl.h>
++#include <linux/vzctl_venet.h>
++
++struct list_head ip_entry_hash_table[VEIP_HASH_SZ];
++rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED;
++LIST_HEAD(veip_lh);
++
++#define ip_entry_hash_function(ip)  (ntohl(ip) & (VEIP_HASH_SZ - 1))
++
++void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip)
++{
++	list_add(&entry->ip_hash,
++		 ip_entry_hash_table + ip_entry_hash_function(entry->key[3]));
++	list_add(&entry->ve_list, &veip->ip_lh);
++}
++
++void veip_put(struct veip_struct *veip)
++{
++	if (!list_empty(&veip->ip_lh))
++		return;
++	if (!list_empty(&veip->src_lh))
++		return;
++	if (!list_empty(&veip->dst_lh))
++		return;
++
++	list_del(&veip->list);
++	kfree(veip);
++}
++
++struct ip_entry_struct *ip_entry_lookup(u32 addr)
++{
++	struct ip_entry_struct *entry;
++	struct list_head *tmp;
++
++	list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr)) {
++		entry = list_entry(tmp, struct ip_entry_struct, ip_hash);
++		if (entry->key[3] != addr || entry->family != AF_INET)
++			continue;
++		return entry;
++	}
++	return NULL;
++}
++
++struct ip_entry_struct *venet_entry_lookup(u32 *addr, int family)
++{
++	struct ip_entry_struct *entry;
++	struct list_head *tmp;
++
++	list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr[3])) {
++		entry = list_entry(tmp, struct ip_entry_struct, ip_hash);
++		if (memcmp(entry->key, addr, 16) != 0
++		    || entry->family != family)
++			continue;
++		return entry;
++	}
++	return NULL;
++}
++
++struct veip_struct *veip_find(envid_t veid)
++{
++	struct veip_struct *ptr;
++	list_for_each_entry(ptr, &veip_lh, list) {
++		if (ptr->veid != veid)
++			continue;
++		return ptr;
++	}
++	return NULL;
++}
++
++struct veip_struct *veip_findcreate(envid_t veid)
++{
++	struct veip_struct *ptr;
++
++	ptr = veip_find(veid);
++	if (ptr != NULL)
++		return ptr;
++
++	ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC);
++	if (ptr == NULL)
++		return NULL;
++	memset(ptr, 0, sizeof(struct veip_struct));
++	INIT_LIST_HEAD(&ptr->ip_lh);
++	INIT_LIST_HEAD(&ptr->src_lh);
++	INIT_LIST_HEAD(&ptr->dst_lh);
++	list_add(&ptr->list, &veip_lh);
++	ptr->veid = veid;
++	return ptr;
++}
++
++/*
++ * Device functions
++ */
++
++static int venet_open(struct net_device *dev)
++{
++	if (!try_module_get(THIS_MODULE))
++		return -EBUSY;
++	return 0;
++}
++
++static int venet_close(struct net_device *master)
++{
++	module_put(THIS_MODULE);
++	return 0;
++}
++
++static void venet_destructor(struct net_device *dev)
++{
++	kfree(dev->priv);
++	dev->priv = NULL;
++}
++
++/*
++ * The higher levels take care of making this non-reentrant (it's
++ * called with bh's disabled).
++ */
++static int venet_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct net_device_stats *stats = (struct net_device_stats *)dev->priv;
++	struct net_device *rcv = NULL;
++	int length;
++
++	if (unlikely(get_exec_env()->disable_net))
++		goto outf;
++
++	/*
++	 *	Optimise so buffers with skb->free=1 are not copied but
++	 *	instead are lobbed from tx queue to rx queue
++	 */
++	if (atomic_read(&skb->users) != 1) {
++	  	struct sk_buff *skb2 = skb;
++	  	skb = skb_clone(skb, GFP_ATOMIC);	/* Clone the buffer */
++	  	if (skb == NULL) {
++			kfree_skb(skb2);
++			goto out;
++		}
++	  	kfree_skb(skb2);
++	} else
++		skb_orphan(skb);
++
++	if (skb->protocol == __constant_htons(ETH_P_IP)) {
++		struct iphdr *iph;
++		iph = skb->nh.iph;
++		if (MULTICAST(iph->daddr))
++			goto outf;
++	} else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
++		struct ipv6hdr *ip6h;
++		ip6h = skb->nh.ipv6h;
++		if (ipv6_addr_is_multicast(&ip6h->daddr))
++			goto outf;
++	} else {
++		goto outf;
++	}
++
++	if (venet_change_skb_owner(skb) < 0)
++		goto outf;
++
++	if (unlikely(VE_OWNER_SKB(skb)->disable_net))
++		goto outf;
++
++	rcv = VE_OWNER_SKB(skb)->_venet_dev;
++	if (!rcv)
++		/* VE going down */
++		goto outf;
++
++	dev_hold(rcv);
++
++	if (!(rcv->flags & IFF_UP)) {
++		/* Target VE does not want to receive packets */
++		dev_put(rcv);
++		goto outf;
++	}
++
++	skb->pkt_type = PACKET_HOST;
++	skb->dev = rcv;
++
++	skb->mac.raw = skb->data;
++	memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len);
++
++	dst_release(skb->dst);
++	skb->dst = NULL;
++#ifdef CONFIG_NETFILTER
++	nf_conntrack_put(skb->nfct);
++	skb->nfct = NULL;
++#ifdef CONFIG_NETFILTER_DEBUG
++	skb->nf_debug = 0;
++#endif
++#endif
++	length = skb->len;
++
++	netif_rx(skb);
++
++	stats->tx_bytes += length;
++	stats->tx_packets++;
++	if (rcv) {
++		struct net_device_stats *rcv_stats =
++			(struct net_device_stats *)rcv->priv;
++		rcv_stats->rx_bytes += length;
++		rcv_stats->rx_packets++;
++		dev_put(rcv);
++	}
++
++	return 0;
++
++outf:
++	kfree_skb(skb);
++	++stats->tx_dropped;
++out:
++	return 0;
++}
++
++static struct net_device_stats *get_stats(struct net_device *dev)
++{
++	return (struct net_device_stats *)dev->priv;
++}
++
++/* Initialize the rest of the LOOPBACK device. */
++int venet_init_dev(struct net_device *dev)
++{
++	dev->hard_start_xmit	= venet_xmit;
++	dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
++	if (dev->priv == NULL)
++		return -ENOMEM;
++	memset(dev->priv, 0, sizeof(struct net_device_stats));
++	dev->get_stats = get_stats;
++	dev->open = venet_open;
++	dev->stop = venet_close;
++	dev->destructor = venet_destructor;
++
++	/*
++	 *	Fill in the generic fields of the device structure.
++	 */
++	dev->type		= ARPHRD_VOID;
++	dev->hard_header_len 	= ETH_HLEN;
++	dev->mtu		= 1500; /* eth_mtu */
++	dev->tx_queue_len	= 0;
++
++	memset(dev->broadcast, 0xFF, ETH_ALEN);
++
++	/* New-style flags. */
++	dev->flags		= IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT;
++	return 0;
++}
++
++static void venet_setup(struct net_device *dev)
++{
++	dev->init = venet_init_dev;
++	/*
++	 * No other features, as they are:
++	 *  - checksumming is required, and nobody else will done our job
++	 */
++	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL;
++}
++
++#ifdef CONFIG_PROC_FS
++static int veinfo_seq_show(struct seq_file *m, void *v)
++{
++	struct ve_struct *ve = (struct ve_struct *)v;
++	struct list_head *tmp;
++
++	seq_printf(m, "%10u %5u %5u", ve->veid,
++                                ve->class_id, atomic_read(&ve->pcounter));
++	read_lock(&veip_hash_lock);
++	if (ve->veip == NULL)
++		goto unlock;
++	list_for_each(tmp, &ve->veip->ip_lh) {
++		char ip[40];
++		struct ip_entry_struct *entry;
++
++		entry = list_entry(tmp, struct ip_entry_struct, ve_list);
++		if (entry->active_env == NULL)
++			continue;
++
++		if (entry->family == AF_INET)
++			sprintf(ip, "%u.%u.%u.%u", NIPQUAD(entry->key[3]));
++		else
++			sprintf(ip, "%x:%x:%x:%x:%x:%x:%x:%x",
++				ntohl(entry->key[0])>>16,
++				ntohl(entry->key[0])&0xFFFF,
++				ntohl(entry->key[1])>>16,
++				ntohl(entry->key[1])&0xFFFF,
++				ntohl(entry->key[2])>>16,
++				ntohl(entry->key[2])&0xFFFF,
++				ntohl(entry->key[3])>>16,
++				ntohl(entry->key[3])&0xFFFF);
++		seq_printf(m, " %39s", ip);
++	}
++unlock:
++	read_unlock(&veip_hash_lock);
++	seq_putc(m, '\n');
++	return 0;
++}
++
++static void *ve_seq_start(struct seq_file *m, loff_t *pos)
++{
++	struct ve_struct *ve, *curve;
++	loff_t l;
++
++	curve = get_exec_env();
++	read_lock(&ve_list_guard);
++	if (!ve_is_super(curve)) {
++		if (*pos != 0)
++			return NULL;
++		return curve;
++	}
++	for (ve = ve_list_head, l = *pos;
++	     ve != NULL && l > 0;
++	     ve = ve->next, l--);
++	return ve;
++}
++
++static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct ve_struct *ve = (struct ve_struct *)v;
++
++	if (!ve_is_super(get_exec_env()))
++		return NULL;
++	(*pos)++;
++	return ve->next;
++}
++
++static void ve_seq_stop(struct seq_file *m, void *v)
++{
++	read_unlock(&ve_list_guard);
++}
++
++
++static struct seq_operations veinfo_seq_op = {
++        start:  ve_seq_start,
++        next:   ve_seq_next,
++        stop:   ve_seq_stop,
++        show:   veinfo_seq_show
++};
++
++static int veinfo_open(struct inode *inode, struct file *file)
++{
++        return seq_open(file, &veinfo_seq_op);
++}
++
++static struct file_operations proc_veinfo_operations = {
++        open:           veinfo_open,
++        read:           seq_read,
++        llseek:         seq_lseek,
++        release:        seq_release
++};
++
++static void *veip_seq_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t l;
++	struct list_head *p;
++	int i;
++
++	l = *pos;
++	write_lock_irq(&veip_hash_lock);
++	if (l == 0)
++		return ip_entry_hash_table;
++	for (i = 0; i < VEIP_HASH_SZ; i++) {
++		list_for_each(p, ip_entry_hash_table + i) {
++			if (--l == 0)
++				return p;
++		}
++	}
++	return NULL;
++}
++
++static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct list_head *p;
++
++	p = (struct list_head *)v;
++	while (1) {
++		p = p->next;
++		if (p < ip_entry_hash_table ||
++		    p >= ip_entry_hash_table + VEIP_HASH_SZ) {
++			(*pos)++;
++			return p;
++		}
++		if (++p >= ip_entry_hash_table + VEIP_HASH_SZ)
++			return NULL;
++	}
++	return NULL;
++}
++
++static void veip_seq_stop(struct seq_file *m, void *v)
++{
++	write_unlock_irq(&veip_hash_lock);
++}
++
++static struct seq_operations veip_seq_op = {
++        start:  veip_seq_start,
++        next:   veip_seq_next,
++        stop:   veip_seq_stop,
++        show:   veip_seq_show
++};
++
++static int veip_open(struct inode *inode, struct file *file)
++{
++        return seq_open(file, &veip_seq_op);
++}
++
++static struct file_operations proc_veip_operations = {
++        open:           veip_open,
++        read:           seq_read,
++        llseek:         seq_lseek,
++        release:        seq_release
++};
++#endif
++
++int real_ve_ip_map(envid_t veid, int op, struct sockaddr *uservaddr, int addrlen)
++{
++	int err;
++	union {
++		struct sockaddr		g;
++		struct sockaddr_in	a4;
++		struct sockaddr_in6	a6;
++	} addr;
++	struct ve_struct *ve;
++
++	err = -EPERM;
++	if (!capable(CAP_SETVEID))
++		goto out;
++
++	err = -EINVAL;
++	if (addrlen > sizeof(addr) || addrlen < sizeof(struct sockaddr_in))
++		goto out;
++
++	err = move_addr_to_kernel(uservaddr, addrlen, &addr);
++	if (err < 0)
++		goto out;
++
++	err = -EINVAL;
++	if (addr.g.sa_family == AF_INET) {
++		if (addrlen != sizeof(struct sockaddr_in))
++			goto out;
++	} else if (addr.g.sa_family == AF_INET6) {
++		if (addrlen != sizeof(struct sockaddr_in6))
++			goto out;
++	} else {
++		err = -EAFNOSUPPORT;
++		goto out;
++	}
++
++	switch (op)
++	{
++		case VE_IP_ADD:
++			ve = get_ve_by_id(veid);
++			err = -ESRCH;
++			if (!ve)
++				goto out;
++
++			down_read(&ve->op_sem);
++			if (ve->is_running)
++				err = veip_entry_add(ve, &addr.g);
++			up_read(&ve->op_sem);
++			put_ve(ve);
++			break;
++
++		case VE_IP_DEL:
++			err = veip_entry_del(veid, &addr.g);
++			break;
++		default:
++			err = -EINVAL;
++	}
++
++out:
++	return err;
++}
++
++int venet_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++		unsigned long arg)
++{
++	int err;
++
++	err = -ENOTTY;
++	switch(cmd) {
++	    case VENETCTL_VE_IP_MAP: {
++			struct vzctl_ve_ip_map s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen);
++		}
++		break;
++	}
++	return err;
++}
++
++static struct vzioctlinfo venetcalls = {
++	type: VENETCTLTYPE,
++	func: venet_ioctl,
++	owner: THIS_MODULE,
++};
++
++int venet_dev_start(struct ve_struct *env)
++{
++	struct net_device *dev_venet;
++	int err;
++
++	dev_venet = alloc_netdev(0, "venet%d", venet_setup);
++	if (!dev_venet)
++		return -ENOMEM;
++	err = dev_alloc_name(dev_venet, dev_venet->name);
++	if (err<0)
++		goto err;
++	if ((err = register_netdev(dev_venet)) != 0)
++		goto err;
++	env->_venet_dev = dev_venet;
++	return 0;
++err:
++	free_netdev(dev_venet);
++	printk(KERN_ERR "VENET initialization error err=%d\n", err);
++	return err;
++}
++
++static int venet_start(unsigned int hooknum, void *data)
++{
++	struct ve_struct *env;
++	int err;
++
++	env = (struct ve_struct *)data;
++	if (env->veip)
++		return -EEXIST;
++	if (!ve_is_super(env) && !try_module_get(THIS_MODULE))
++		return 0;
++
++	err = veip_start(env);
++	if (err)
++		goto err;
++
++	err = venet_dev_start(env);
++	if (err)
++		goto err_free;
++	return 0;
++
++err_free:
++	veip_stop(env);
++err:
++	if (!ve_is_super(env))
++		module_put(THIS_MODULE);
++	return err;
++}
++
++static int venet_stop(unsigned int hooknum, void *data)
++{
++	struct ve_struct *env;
++
++	env = (struct ve_struct *)data;
++	veip_stop(env);
++	if (!ve_is_super(env))
++		module_put(THIS_MODULE);
++	return 0;
++}
++
++#define VE_HOOK_PRI_NET		0
++
++static struct ve_hook venet_ve_hook_init = {
++	hook:	venet_start,
++	undo:	venet_stop,
++	hooknum: VE_HOOK_INIT,
++	priority: VE_HOOK_PRI_NET
++};
++
++static struct ve_hook venet_ve_hook_fini = {
++	hook:	venet_stop,
++	hooknum: VE_HOOK_FINI,
++	priority: VE_HOOK_PRI_NET
++};
++
++__init int venet_init(void)
++{
++#ifdef CONFIG_PROC_FS
++	struct proc_dir_entry *de;
++#endif
++	int i, err;
++
++	if (get_ve0()->_venet_dev != NULL)
++		return -EEXIST;
++
++	for (i = 0; i < VEIP_HASH_SZ; i++)
++		INIT_LIST_HEAD(ip_entry_hash_table + i);
++
++	err = venet_start(VE_HOOK_INIT, (void *)get_ve0());
++	if (err)
++		return err;
++
++#ifdef CONFIG_PROC_FS
++	de = create_proc_glob_entry("vz/veinfo",
++			S_IFREG|S_IRUSR, NULL);
++	if (de)
++		de->proc_fops = &proc_veinfo_operations;
++	else
++		printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
++
++	de = create_proc_entry("vz/veip", S_IFREG|S_IRUSR, NULL);
++	if (de)
++		de->proc_fops = &proc_veip_operations;
++	else
++		printk(KERN_WARNING "venet: can't make veip proc entry\n");
++#endif
++
++	ve_hook_register(&venet_ve_hook_init);
++	ve_hook_register(&venet_ve_hook_fini);
++	vzioctl_register(&venetcalls);
++	return 0;
++}
++
++__exit void venet_exit(void)
++{
++	struct net_device *dev_venet;
++
++	vzioctl_unregister(&venetcalls);
++	ve_hook_unregister(&venet_ve_hook_fini);
++	ve_hook_unregister(&venet_ve_hook_init);
++#ifdef CONFIG_PROC_FS
++	remove_proc_entry("vz/veip", NULL);
++	remove_proc_entry("vz/veinfo", NULL);
++#endif
++
++	dev_venet = get_ve0()->_venet_dev;
++	if (dev_venet != NULL) {
++		get_ve0()->_venet_dev = NULL;
++		unregister_netdev(dev_venet);
++		free_netdev(dev_venet);
++	}
++	veip_stop(get_ve0());
++}
++
++module_init(venet_init);
++module_exit(venet_exit);
+diff -upr linux-2.6.16.orig/drivers/net/veth.c linux-2.6.16-026test015/drivers/net/veth.c
+--- linux-2.6.16.orig/drivers/net/veth.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/drivers/net/veth.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,582 @@
++/*
++ *  veth.c
++ *
++ *  Copyright (C) 2006  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * Virtual ethernet device used to change VE ownership on packets
++ */
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <linux/fs.h>
++#include <linux/types.h>
++#include <linux/string.h>
++#include <linux/socket.h>
++#include <linux/errno.h>
++#include <linux/fcntl.h>
++#include <linux/in.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/tcp.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++
++#include <asm/system.h>
++#include <asm/uaccess.h>
++#include <asm/io.h>
++#include <asm/unistd.h>
++
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <net/ip.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <linux/if_ether.h>	/* For the statistics structure. */
++#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
++#include <linux/ve_proto.h>
++#include <linux/vzctl.h>
++#include <linux/vzctl_veth.h>
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/vzcalluser.h>
++
++struct veth_struct
++{
++	struct net_device_stats stats;
++	struct net_device	*pair;
++	struct list_head	hwaddr_list;
++};
++
++struct list_head veth_hwaddr_list;
++rwlock_t ve_hwaddr_lock = RW_LOCK_UNLOCKED;
++DECLARE_MUTEX(hwaddr_sem);
++
++#define veth_from_netdev(dev) \
++	((struct veth_struct *)(netdev_priv(dev)))
++#define veth_to_netdev(veth) \
++	((struct net_device*)((char*)veth - \
++	(unsigned long)netdev_priv(NULL)))
++
++struct net_device * veth_dev_start(char *dev_addr, char *name);
++
++struct veth_struct *hwaddr_entry_lookup(char *name)
++{
++	struct veth_struct *entry;
++	struct list_head *tmp;
++
++	list_for_each(tmp, &veth_hwaddr_list) {
++		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
++		BUG_ON(entry->pair == NULL);
++		if (strncmp(name, entry->pair->name, IFNAMSIZ) == 0)
++			return entry;
++	}
++	return NULL;
++}
++
++int veth_entry_add(struct ve_struct *ve, char *dev_addr, char *name,
++		char *dev_addr_ve, char *name_ve)
++{
++	struct net_device *dev_ve;
++	struct net_device *dev_ve0;
++	struct ve_struct *old_env;
++	char dev_name[IFNAMSIZ];
++	int err;
++
++	down(&hwaddr_sem);
++
++	if (name[0] == '\0')
++		snprintf(dev_name, sizeof(dev_name), "vz%d.%%d", ve->veid);
++	else {
++		memcpy(dev_name, name, IFNAMSIZ - 1);
++		dev_name[IFNAMSIZ - 1] = '\0';
++	}
++	dev_ve0 = veth_dev_start(dev_addr, dev_name);
++	if (IS_ERR(dev_ve0)) {
++		err = PTR_ERR(dev_ve0);
++		goto err;
++	}
++
++	old_env = set_exec_env(ve);
++	if (name_ve[0] == '\0')
++		sprintf(dev_name, "eth%%d");
++	else {
++		memcpy(dev_name, name_ve, IFNAMSIZ - 1);
++		dev_name[IFNAMSIZ - 1] = '\0';
++	}
++	dev_ve = veth_dev_start(dev_addr_ve, dev_name);
++	if (IS_ERR(dev_ve)) {
++		err = PTR_ERR(dev_ve);
++		goto err_ve;
++	}
++	set_exec_env(old_env);
++	veth_from_netdev(dev_ve)->pair = dev_ve0;
++	veth_from_netdev(dev_ve0)->pair = dev_ve;
++
++	write_lock(&ve_hwaddr_lock);
++	list_add(&(veth_from_netdev(dev_ve)->hwaddr_list), &veth_hwaddr_list);
++	write_unlock(&ve_hwaddr_lock);
++
++	up(&hwaddr_sem);
++	return 0;
++
++err_ve:
++	set_exec_env(old_env);
++	unregister_netdev(dev_ve0);
++err:
++	up(&hwaddr_sem);
++	return err;
++}
++
++int veth_entry_del(struct ve_struct *ve, char *name)
++{
++	struct veth_struct *found;
++	struct ve_struct *old_env;
++	struct net_device *dev;
++	int err;
++
++	err = -ENODEV;
++	down(&hwaddr_sem);
++	found = hwaddr_entry_lookup(name);
++	if (found == NULL)
++		goto out;
++	if (veth_to_netdev(found)->owner_env != ve)
++		goto out;
++
++	write_lock(&ve_hwaddr_lock);
++	list_del(&found->hwaddr_list);
++	write_unlock(&ve_hwaddr_lock);
++	err = 0;
++	dev = found->pair;
++	BUG_ON(found->pair == NULL);
++
++	old_env = get_exec_env();
++	set_exec_env(ve);
++	unregister_netdev(veth_to_netdev(found));
++	set_exec_env(old_env);
++
++	unregister_netdev(dev);
++
++out:
++	up(&hwaddr_sem);
++	return err;
++}
++
++/*
++ * Device functions
++ */
++
++static int veth_open(struct net_device *dev)
++{
++	return 0;
++}
++
++static int veth_close(struct net_device *master)
++{
++	return 0;
++}
++
++static void veth_destructor(struct net_device *dev)
++{
++	free_netdev(dev);
++}
++
++static struct net_device_stats *get_stats(struct net_device *dev)
++{
++	return &veth_from_netdev(dev)->stats;
++}
++
++/*
++ * The higher levels take care of making this non-reentrant (it's
++ * called with bh's disabled).
++ */
++static int veth_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct net_device_stats *stats = get_stats(dev);
++	struct net_device *rcv = NULL;
++	struct veth_struct *entry;
++	int length;
++
++	if (unlikely(get_exec_env()->disable_net))
++		goto outf;
++
++	skb_orphan(skb);
++
++	entry = veth_from_netdev(dev);
++	rcv = entry->pair;
++	if (!rcv)
++		/* VE going down */
++		goto outf;
++
++	if (unlikely(rcv->owner_env->disable_net))
++		goto outf;
++
++	skb->owner_env = rcv->owner_env;
++
++	if (!(rcv->flags & IFF_UP)) {
++		/* Target VE does not want to receive packets */
++		goto outf;
++	}
++
++	skb->dev = rcv;
++	skb->pkt_type = PACKET_HOST;
++	skb->protocol = eth_type_trans(skb, rcv);
++
++	dst_release(skb->dst);
++	skb->dst = NULL;
++#ifdef CONFIG_NETFILTER
++	nf_conntrack_put(skb->nfct);
++	skb->nfct = NULL;
++#ifdef CONFIG_NETFILTER_DEBUG
++	skb->nf_debug = 0;
++#endif
++#endif
++	length = skb->len;
++
++	netif_rx(skb);
++
++	stats->tx_bytes += length;
++	stats->tx_packets++;
++	if (rcv) {
++		struct net_device_stats *rcv_stats = get_stats(rcv);
++		rcv_stats->rx_bytes += length;
++		rcv_stats->rx_packets++;
++	}
++
++	return 0;
++
++outf:
++	kfree_skb(skb);
++	stats->tx_dropped++;
++	return 0;
++}
++
++int veth_init_dev(struct net_device *dev)
++{
++	dev->hard_start_xmit = veth_xmit;
++	dev->get_stats = get_stats;
++	dev->open = veth_open;
++	dev->stop = veth_close;
++	dev->destructor = veth_destructor;
++
++	ether_setup(dev);
++
++	dev->tx_queue_len = 0;
++	return 0;
++}
++
++static void veth_setup(struct net_device *dev)
++{
++	dev->init = veth_init_dev;
++	/*
++	 * No other features, as they are:
++	 *  - checksumming is required, and nobody else will done our job
++	 */
++	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL;
++}
++
++#ifdef CONFIG_PROC_FS
++#define ADDR_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
++#define ADDR(x) (x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5]
++static int vehwaddr_seq_show(struct seq_file *m, void *v)
++{
++	struct list_head *p;
++	struct veth_struct *entry;
++
++	p = (struct list_head *)v;
++	if (p == &veth_hwaddr_list) {
++		seq_puts(m, "Version: 1.0\n");
++		return 0;
++	}
++	entry = list_entry(p, struct veth_struct, hwaddr_list);
++	seq_printf(m, ADDR_FMT " %16s ",
++			ADDR(entry->pair->dev_addr), entry->pair->name);
++	seq_printf(m, ADDR_FMT " %16s %10u\n",
++			ADDR(veth_to_netdev(entry)->dev_addr),
++			veth_to_netdev(entry)->name,
++			VEID(veth_to_netdev(entry)->owner_env));
++	return 0;
++}
++
++static void *vehwaddr_seq_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t l;
++	struct list_head *p;
++
++	l = *pos;
++	read_lock(&ve_hwaddr_lock);
++	if (l == 0)
++		return &veth_hwaddr_list;
++	list_for_each(p, &veth_hwaddr_list) {
++		if (--l == 0)
++			return p;
++	}
++	return NULL;
++}
++
++static void *vehwaddr_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct list_head *p;
++
++	p = (struct list_head *)v;
++	(*pos)++;
++	return p->next == &veth_hwaddr_list ? NULL : p->next;
++}
++
++static void vehwaddr_seq_stop(struct seq_file *m, void *v)
++{
++	read_unlock(&ve_hwaddr_lock);
++}
++
++static struct seq_operations vehwaddr_seq_op = {
++	.start 	= vehwaddr_seq_start,
++	.next	= vehwaddr_seq_next,
++	.stop	= vehwaddr_seq_stop,
++	.show	= vehwaddr_seq_show
++};
++
++static int vehwaddr_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &vehwaddr_seq_op);
++}
++
++static struct file_operations proc_vehwaddr_operations = {
++	.open		= vehwaddr_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= seq_release
++};
++#endif
++
++int real_ve_hwaddr(envid_t veid, int op,
++		unsigned char *dev_addr, int addrlen, char *name,
++		unsigned char *dev_addr_ve, int addrlen_ve, char *name_ve)
++{
++	int err;
++	struct ve_struct *ve;
++	char ve_addr[ETH_ALEN];
++
++	err = -EPERM;
++	if (!capable(CAP_NET_ADMIN))
++		goto out;
++
++	err = -EINVAL;
++	switch (op)
++	{
++		case VE_ETH_ADD:
++			if (addrlen != ETH_ALEN)
++				goto out;
++			if (addrlen_ve != ETH_ALEN && addrlen_ve != 0)
++				goto out;
++			/* If ve addr is not set then we use dev_addr[3] & 0x80 for it */
++			if (addrlen_ve == 0 && (dev_addr[3] & 0x80))
++				goto out;
++			if (addrlen_ve == 0) {
++				memcpy(ve_addr, dev_addr, ETH_ALEN);
++				ve_addr[3] |= 0x80;
++			} else {
++				memcpy(ve_addr, dev_addr_ve, ETH_ALEN);
++			}
++
++			ve = get_ve_by_id(veid);
++			err = -ESRCH;
++			if (!ve)
++				goto out;
++
++			down_read(&ve->op_sem);
++			if (ve->is_running)
++				err = veth_entry_add(ve, dev_addr, name,
++						ve_addr, name_ve);
++			up_read(&ve->op_sem);
++			put_ve(ve);
++			break;
++
++		case VE_ETH_DEL:
++			if (name[0] == '\0')
++				goto out;
++			ve = get_ve_by_id(veid);
++			err = -ESRCH;
++			if (!ve)
++				goto out;
++
++			down_read(&ve->op_sem);
++			if (ve->is_running)
++				err = veth_entry_del(ve, name);
++			up_read(&ve->op_sem);
++			put_ve(ve);
++			break;
++	}
++
++out:
++	return err;
++}
++
++int veth_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++		unsigned long arg)
++{
++	int err;
++
++	err = -ENOTTY;
++	switch(cmd) {
++	    case VETHCTL_VE_HWADDR: {
++			struct vzctl_ve_hwaddr s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = real_ve_hwaddr(s.veid, s.op,
++					s.dev_addr, s.addrlen, s.dev_name,
++					s.dev_addr_ve, s.addrlen_ve, s.dev_name_ve);
++		}
++		break;
++	}
++	return err;
++}
++
++static struct vzioctlinfo vethcalls = {
++	.type	= VETHCTLTYPE,
++	.func	= veth_ioctl,
++	.owner	= THIS_MODULE,
++};
++
++struct net_device * veth_dev_start(char *dev_addr, char *name)
++{
++	struct net_device *dev;
++	int err;
++
++	dev = alloc_netdev(sizeof(struct veth_struct), name, veth_setup);
++	if (!dev)
++		return ERR_PTR(-ENOMEM);
++	if (strchr(dev->name, '%')) {
++		err = dev_alloc_name(dev, dev->name);
++		if (err < 0)
++			goto err;
++	}
++	if ((err = register_netdev(dev)) != 0)
++		goto err;
++
++	memcpy(dev->dev_addr, dev_addr, ETH_ALEN);
++	dev->addr_len = ETH_ALEN;
++
++	return dev;
++err:
++	free_netdev(dev);
++	printk(KERN_ERR "%s initialization error err=%d\n", name, err);
++	return ERR_PTR(err);
++}
++
++static int veth_stop(unsigned int hooknum, void *data)
++{
++	struct ve_struct *old_env;
++	struct ve_struct *env;
++	struct list_head *tmp, *n;
++
++	env = (struct ve_struct *)data;
++	down(&hwaddr_sem);
++	list_for_each_safe(tmp, n, &veth_hwaddr_list) {
++		struct veth_struct *entry;
++		struct net_device *dev;
++		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
++		if (VEID(env) != VEID(veth_to_netdev(entry)->owner_env))
++			continue;
++
++		write_lock(&ve_hwaddr_lock);
++		list_del(&entry->hwaddr_list);
++		write_unlock(&ve_hwaddr_lock);
++
++		dev = entry->pair;
++		BUG_ON(entry->pair == NULL);
++		old_env = set_exec_env(env);
++		unregister_netdev(veth_to_netdev(entry));
++		set_exec_env(old_env);
++
++		old_env = set_exec_env(get_ve0());
++		unregister_netdev(dev);
++		set_exec_env(old_env);
++	}
++	up(&hwaddr_sem);
++	return 0;
++}
++
++#define VE_HOOK_PRI_NET		0
++
++static struct ve_hook veth_ve_hook_fini = {
++	.hook		= veth_stop,
++	.hooknum	= VE_HOOK_FINI,
++	.priority	= VE_HOOK_PRI_NET,
++	.owner		= THIS_MODULE,
++};
++
++__init int veth_init(void)
++{
++#ifdef CONFIG_PROC_FS
++	struct proc_dir_entry *de;
++#endif
++
++	INIT_LIST_HEAD(&veth_hwaddr_list);
++
++#ifdef CONFIG_PROC_FS
++	de = create_proc_glob_entry("vz/veth",
++			S_IFREG|S_IRUSR, NULL);
++	if (de)
++		de->proc_fops = &proc_vehwaddr_operations;
++	else
++		printk(KERN_WARNING "veth: can't make vehwaddr proc entry\n");
++
++#endif
++
++	ve_hook_register(&veth_ve_hook_fini);
++	vzioctl_register(&vethcalls);
++	return 0;
++}
++
++__exit void veth_exit(void)
++{
++	struct veth_struct *entry;
++	struct list_head *tmp, *n;
++	struct ve_struct *ve;
++	struct ve_struct *old_env;
++
++	vzioctl_unregister(&vethcalls);
++	ve_hook_unregister(&veth_ve_hook_fini);
++#ifdef CONFIG_PROC_FS
++	remove_proc_entry("vz/veth", NULL);
++#endif
++
++	down(&hwaddr_sem);
++	list_for_each_safe(tmp, n, &veth_hwaddr_list) {
++		struct net_device *dev;
++		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
++		ve = get_ve(veth_to_netdev(entry)->owner_env);
++
++		write_lock(&ve_hwaddr_lock);
++		list_del(&entry->hwaddr_list);
++		write_unlock(&ve_hwaddr_lock);
++
++		dev = entry->pair;
++		BUG_ON(entry->pair == NULL);
++		old_env = set_exec_env(ve);
++		unregister_netdev(veth_to_netdev(entry));
++		set_exec_env(old_env);
++
++		unregister_netdev(dev);
++
++		put_ve(ve);
++	}
++	up(&hwaddr_sem);
++}
++
++module_init(veth_init);
++module_exit(veth_exit);
++
++MODULE_AUTHOR("Andrey Mirkin <amirkin@sw.ru>");
++MODULE_DESCRIPTION("Virtuozzo Virtual Ethernet Device");
++MODULE_LICENSE("GPL v2");
++
+diff -upr linux-2.6.16.orig/drivers/net/via-rhine.c linux-2.6.16-026test015/drivers/net/via-rhine.c
+--- linux-2.6.16.orig/drivers/net/via-rhine.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/via-rhine.c	2006-07-04 14:41:36.000000000 +0400
+@@ -129,6 +129,7 @@
+ 	- Massive clean-up
+ 	- Rewrite PHY, media handling (remove options, full_duplex, backoff)
+ 	- Fix Tx engine race for good
++	- Craig Brind: Zero padded aligned buffers for short packets.
+ 
+ */
+ 
+@@ -1306,7 +1307,12 @@ static int rhine_start_tx(struct sk_buff
+ 			rp->stats.tx_dropped++;
+ 			return 0;
+ 		}
++
++		/* Padding is not copied and so must be redone. */
+ 		skb_copy_and_csum_dev(skb, rp->tx_buf[entry]);
++		if (skb->len < ETH_ZLEN)
++			memset(rp->tx_buf[entry] + skb->len, 0,
++			       ETH_ZLEN - skb->len);
+ 		rp->tx_skbuff_dma[entry] = 0;
+ 		rp->tx_ring[entry].addr = cpu_to_le32(rp->tx_bufs_dma +
+ 						      (rp->tx_buf[entry] -
+diff -upr linux-2.6.16.orig/drivers/net/wireless/Kconfig linux-2.6.16-026test015/drivers/net/wireless/Kconfig
+--- linux-2.6.16.orig/drivers/net/wireless/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/wireless/Kconfig	2006-07-04 14:41:36.000000000 +0400
+@@ -239,7 +239,8 @@ config IPW2200_DEBUG
+ 
+ config AIRO
+ 	tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards"
+-	depends on NET_RADIO && ISA_DMA_API && CRYPTO && (PCI || BROKEN)
++ 	depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN)
++	select CRYPTO
+ 	---help---
+ 	  This is the standard Linux driver to support Cisco/Aironet ISA and
+ 	  PCI 802.11 wireless cards.
+@@ -374,6 +375,7 @@ config PCMCIA_HERMES
+ config PCMCIA_SPECTRUM
+ 	tristate "Symbol Spectrum24 Trilogy PCMCIA card support"
+ 	depends on NET_RADIO && PCMCIA && HERMES
++	select FW_LOADER
+ 	---help---
+ 
+ 	  This is a driver for 802.11b cards using RAM-loadable Symbol
+@@ -387,6 +389,7 @@ config PCMCIA_SPECTRUM
+ config AIRO_CS
+ 	tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards"
+ 	depends on NET_RADIO && PCMCIA && (BROKEN || !M32R)
++	select CRYPTO
+ 	---help---
+ 	  This is the standard Linux driver to support Cisco/Aironet PCMCIA
+ 	  802.11 wireless cards.  This driver is the same as the Aironet
+diff -upr linux-2.6.16.orig/drivers/net/wireless/hostap/hostap_80211_tx.c linux-2.6.16-026test015/drivers/net/wireless/hostap/hostap_80211_tx.c
+--- linux-2.6.16.orig/drivers/net/wireless/hostap/hostap_80211_tx.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/wireless/hostap/hostap_80211_tx.c	2006-07-04 14:41:36.000000000 +0400
+@@ -469,7 +469,7 @@ int hostap_master_start_xmit(struct sk_b
+ 	}
+ 
+ 	if (local->ieee_802_1x && meta->ethertype == ETH_P_PAE && tx.crypt &&
+-	    !(fc & IEEE80211_FCTL_VERS)) {
++	    !(fc & IEEE80211_FCTL_PROTECTED)) {
+ 		no_encrypt = 1;
+ 		PDEBUG(DEBUG_EXTRA2, "%s: TX: IEEE 802.1X - passing "
+ 		       "unencrypted EAPOL frame\n", dev->name);
+diff -upr linux-2.6.16.orig/drivers/net/wireless/ipw2200.c linux-2.6.16-026test015/drivers/net/wireless/ipw2200.c
+--- linux-2.6.16.orig/drivers/net/wireless/ipw2200.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/net/wireless/ipw2200.c	2006-07-04 14:41:36.000000000 +0400
+@@ -8391,20 +8391,28 @@ static int ipw_wx_get_range(struct net_d
+ 
+ 	i = 0;
+ 	if (priv->ieee->mode & (IEEE_B | IEEE_G)) {
+-		for (j = 0; j < geo->bg_channels && i < IW_MAX_FREQUENCIES;
+-		     i++, j++) {
++		for (j = 0; j < geo->bg_channels && i < IW_MAX_FREQUENCIES; j++) {
++			if ((priv->ieee->iw_mode == IW_MODE_ADHOC) &&
++			    (geo->bg[j].flags & IEEE80211_CH_PASSIVE_ONLY))
++				continue;
++
+ 			range->freq[i].i = geo->bg[j].channel;
+ 			range->freq[i].m = geo->bg[j].freq * 100000;
+ 			range->freq[i].e = 1;
++			i++;
+ 		}
+ 	}
+ 
+ 	if (priv->ieee->mode & IEEE_A) {
+-		for (j = 0; j < geo->a_channels && i < IW_MAX_FREQUENCIES;
+-		     i++, j++) {
++		for (j = 0; j < geo->a_channels && i < IW_MAX_FREQUENCIES; j++) {
++			if ((priv->ieee->iw_mode == IW_MODE_ADHOC) &&
++			    (geo->a[j].flags & IEEE80211_CH_PASSIVE_ONLY))
++				continue;
++
+ 			range->freq[i].i = geo->a[j].channel;
+ 			range->freq[i].m = geo->a[j].freq * 100000;
+ 			range->freq[i].e = 1;
++			i++;
+ 		}
+ 	}
+ 
+@@ -9956,9 +9964,8 @@ static int ipw_ethtool_set_eeprom(struct
+ 		return -EINVAL;
+ 	down(&p->sem);
+ 	memcpy(&p->eeprom[eeprom->offset], bytes, eeprom->len);
+-	for (i = IPW_EEPROM_DATA;
+-	     i < IPW_EEPROM_DATA + IPW_EEPROM_IMAGE_SIZE; i++)
+-		ipw_write8(p, i, p->eeprom[i]);
++	for (i = 0; i < IPW_EEPROM_IMAGE_SIZE; i++)
++		ipw_write8(p, i + IPW_EEPROM_DATA, p->eeprom[i]);
+ 	up(&p->sem);
+ 	return 0;
+ }
+diff -upr linux-2.6.16.orig/drivers/pci/pci-acpi.c linux-2.6.16-026test015/drivers/pci/pci-acpi.c
+--- linux-2.6.16.orig/drivers/pci/pci-acpi.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/pci/pci-acpi.c	2006-07-04 14:41:36.000000000 +0400
+@@ -33,13 +33,10 @@ acpi_query_osc (
+ 	acpi_status		status;
+ 	struct acpi_object_list	input;
+ 	union acpi_object 	in_params[4];
+-	struct acpi_buffer	output;
+-	union acpi_object 	out_obj;	
++	struct acpi_buffer	output = {ACPI_ALLOCATE_BUFFER, NULL};
++	union acpi_object 	*out_obj;
+ 	u32			osc_dw0;
+ 
+-	/* Setting up output buffer */
+-	output.length = sizeof(out_obj) + 3*sizeof(u32);  
+-	output.pointer = &out_obj;
+ 	
+ 	/* Setting up input parameters */
+ 	input.count = 4;
+@@ -61,12 +58,15 @@ acpi_query_osc (
+ 			"Evaluate _OSC Set fails. Status = 0x%04x\n", status);
+ 		return status;
+ 	}
+-	if (out_obj.type != ACPI_TYPE_BUFFER) {
++	out_obj = output.pointer;
++
++	if (out_obj->type != ACPI_TYPE_BUFFER) {
+ 		printk(KERN_DEBUG  
+ 			"Evaluate _OSC returns wrong type\n");
+-		return AE_TYPE;
++		status = AE_TYPE;
++		goto query_osc_out;
+ 	}
+-	osc_dw0 = *((u32 *) out_obj.buffer.pointer);
++	osc_dw0 = *((u32 *) out_obj->buffer.pointer);
+ 	if (osc_dw0) {
+ 		if (osc_dw0 & OSC_REQUEST_ERROR)
+ 			printk(KERN_DEBUG "_OSC request fails\n"); 
+@@ -76,15 +76,21 @@ acpi_query_osc (
+ 			printk(KERN_DEBUG "_OSC invalid revision\n"); 
+ 		if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) {
+ 			/* Update Global Control Set */
+-			global_ctrlsets = *((u32 *)(out_obj.buffer.pointer+8));
+-			return AE_OK;
++			global_ctrlsets = *((u32 *)(out_obj->buffer.pointer+8));
++			status = AE_OK;
++			goto query_osc_out;
+ 		}
+-		return AE_ERROR;
++		status = AE_ERROR;
++		goto query_osc_out;
+ 	}
+ 
+ 	/* Update Global Control Set */
+-	global_ctrlsets = *((u32 *)(out_obj.buffer.pointer + 8));
+-	return AE_OK;
++	global_ctrlsets = *((u32 *)(out_obj->buffer.pointer + 8));
++	status = AE_OK;
++
++query_osc_out:
++	kfree(output.pointer);
++	return status;
+ }
+ 
+ 
+@@ -96,14 +102,10 @@ acpi_run_osc (
+ 	acpi_status		status;
+ 	struct acpi_object_list	input;
+ 	union acpi_object 	in_params[4];
+-	struct acpi_buffer	output;
+-	union acpi_object 	out_obj;	
++	struct acpi_buffer	output = {ACPI_ALLOCATE_BUFFER, NULL};
++	union acpi_object 	*out_obj;
+ 	u32			osc_dw0;
+ 
+-	/* Setting up output buffer */
+-	output.length = sizeof(out_obj) + 3*sizeof(u32);  
+-	output.pointer = &out_obj;
+-	
+ 	/* Setting up input parameters */
+ 	input.count = 4;
+ 	input.pointer = in_params;
+@@ -124,12 +126,14 @@ acpi_run_osc (
+ 			"Evaluate _OSC Set fails. Status = 0x%04x\n", status);
+ 		return status;
+ 	}
+-	if (out_obj.type != ACPI_TYPE_BUFFER) {
++	out_obj = output.pointer;
++	if (out_obj->type != ACPI_TYPE_BUFFER) {
+ 		printk(KERN_DEBUG  
+ 			"Evaluate _OSC returns wrong type\n");
+-		return AE_TYPE;
++		status = AE_TYPE;
++		goto run_osc_out;
+ 	}
+-	osc_dw0 = *((u32 *) out_obj.buffer.pointer);
++	osc_dw0 = *((u32 *) out_obj->buffer.pointer);
+ 	if (osc_dw0) {
+ 		if (osc_dw0 & OSC_REQUEST_ERROR)
+ 			printk(KERN_DEBUG "_OSC request fails\n"); 
+@@ -139,11 +143,17 @@ acpi_run_osc (
+ 			printk(KERN_DEBUG "_OSC invalid revision\n"); 
+ 		if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) {
+ 			printk(KERN_DEBUG "_OSC FW not grant req. control\n");
+-			return AE_SUPPORT;
++			status = AE_SUPPORT;
++			goto run_osc_out;
+ 		}
+-		return AE_ERROR;
++		status = AE_ERROR;
++		goto run_osc_out;
+ 	}
+-	return AE_OK;
++	status = AE_OK;
++
++run_osc_out:
++	kfree(output.pointer);
++	return status;
+ }
+ 
+ /**
+diff -upr linux-2.6.16.orig/drivers/pci/probe.c linux-2.6.16-026test015/drivers/pci/probe.c
+--- linux-2.6.16.orig/drivers/pci/probe.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/pci/probe.c	2006-07-04 14:41:38.000000000 +0400
+@@ -21,6 +21,7 @@ LIST_HEAD(pci_root_buses);
+ EXPORT_SYMBOL(pci_root_buses);
+ 
+ LIST_HEAD(pci_devices);
++EXPORT_SYMBOL(pci_devices);
+ 
+ #ifdef HAVE_PCI_LEGACY
+ /**
+diff -upr linux-2.6.16.orig/drivers/pci/quirks.c linux-2.6.16-026test015/drivers/pci/quirks.c
+--- linux-2.6.16.orig/drivers/pci/quirks.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/pci/quirks.c	2006-07-04 14:41:36.000000000 +0400
+@@ -631,6 +631,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_V
+  * non-x86 architectures (yes Via exists on PPC among other places),
+  * we must mask the PCI_INTERRUPT_LINE value versus 0xf to get
+  * interrupts delivered properly.
++ *
++ * Some of the on-chip devices are actually '586 devices' so they are
++ * listed here.
+  */
+ static void quirk_via_irq(struct pci_dev *dev)
+ {
+@@ -639,13 +642,19 @@ static void quirk_via_irq(struct pci_dev
+ 	new_irq = dev->irq & 0xf;
+ 	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+ 	if (new_irq != irq) {
+-		printk(KERN_INFO "PCI: Via IRQ fixup for %s, from %d to %d\n",
++		printk(KERN_INFO "PCI: VIA IRQ fixup for %s, from %d to %d\n",
+ 			pci_name(dev), irq, new_irq);
+ 		udelay(15);	/* unknown if delay really needed */
+ 		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, new_irq);
+ 	}
+ }
+-DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_ANY_ID, quirk_via_irq);
++DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, quirk_via_irq);
++DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, quirk_via_irq);
++DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_2, quirk_via_irq);
++DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, quirk_via_irq);
++DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_irq);
++DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_via_irq);
++DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, quirk_via_irq);
+ 
+ /*
+  * VIA VT82C598 has its device ID settable and many BIOSes
+@@ -861,6 +870,7 @@ static void __init quirk_eisa_bridge(str
+ }
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82375,	quirk_eisa_bridge );
+ 
++#ifndef CONFIG_ACPI_SLEEP
+ /*
+  * On ASUS P4B boards, the SMBus PCI Device within the ICH2/4 southbridge
+  * is not activated. The myth is that Asus said that they do not want the
+@@ -872,8 +882,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I
+  * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it 
+  * becomes necessary to do this tweak in two steps -- I've chosen the Host
+  * bridge as trigger.
++ *
++ * Actually, leaving it unhidden and not redoing the quirk over suspend2ram
++ * will cause thermal management to break down, and causing machine to
++ * overheat.
+  */
+-static int __initdata asus_hides_smbus = 0;
++static int __initdata asus_hides_smbus;
+ 
+ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev)
+ {
+@@ -1008,6 +1022,8 @@ static void __init asus_hides_smbus_lpc_
+ }
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1,	asus_hides_smbus_lpc_ich6 );
+ 
++#endif
++
+ /*
+  * SiS 96x south bridge: BIOS typically hides SMBus device...
+  */
+diff -upr linux-2.6.16.orig/drivers/pcmcia/ds.c linux-2.6.16-026test015/drivers/pcmcia/ds.c
+--- linux-2.6.16.orig/drivers/pcmcia/ds.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/pcmcia/ds.c	2006-07-04 14:41:36.000000000 +0400
+@@ -546,7 +546,7 @@ static int pcmcia_device_query(struct pc
+ 			tmp = vers1->str + vers1->ofs[i];
+ 
+ 			length = strlen(tmp) + 1;
+-			if ((length < 3) || (length > 255))
++			if ((length < 2) || (length > 255))
+ 				continue;
+ 
+ 			p_dev->prod_id[i] = kmalloc(sizeof(char) * length,
+diff -upr linux-2.6.16.orig/drivers/s390/cio/cio.c linux-2.6.16-026test015/drivers/s390/cio/cio.c
+--- linux-2.6.16.orig/drivers/s390/cio/cio.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/s390/cio/cio.c	2006-07-04 14:41:38.000000000 +0400
+@@ -610,7 +610,11 @@ do_IRQ (struct pt_regs *regs)
+ 	struct tpi_info *tpi_info;
+ 	struct subchannel *sch;
+ 	struct irb *irb;
++	struct ve_struct *ve;
++	struct user_beancounter *ub;
+ 
++	ve = set_exec_env(get_ve0());
++	ub = set_exec_ub(get_ub0());
+ 	irq_enter ();
+ 	asm volatile ("mc 0,0");
+ 	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
+@@ -657,6 +661,8 @@ do_IRQ (struct pt_regs *regs)
+ 		 */
+ 	} while (!MACHINE_IS_VM && tpi (NULL) != 0);
+ 	irq_exit ();
++	(void)set_exec_ub(ub);
++	(void)set_exec_env(ve);
+ }
+ 
+ #ifdef CONFIG_CCW_CONSOLE
+diff -upr linux-2.6.16.orig/drivers/scsi/3w-9xxx.c linux-2.6.16-026test015/drivers/scsi/3w-9xxx.c
+--- linux-2.6.16.orig/drivers/scsi/3w-9xxx.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/scsi/3w-9xxx.c	2006-07-04 14:41:36.000000000 +0400
+@@ -85,7 +85,7 @@
+ #include "3w-9xxx.h"
+ 
+ /* Globals */
+-#define TW_DRIVER_VERSION "2.26.02.005"
++#define TW_DRIVER_VERSION "2.26.02.007"
+ static TW_Device_Extension *twa_device_extension_list[TW_MAX_SLOT];
+ static unsigned int twa_device_extension_count;
+ static int twa_major = -1;
+@@ -1944,9 +1944,13 @@ static void twa_scsiop_execute_scsi_comp
+ 		}
+ 		if (tw_dev->srb[request_id]->use_sg == 1) {
+ 			struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
+-			char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
++			char *buf;
++			unsigned long flags = 0;
++			local_irq_save(flags);
++			buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ 			memcpy(buf, tw_dev->generic_buffer_virt[request_id], sg->length);
+ 			kunmap_atomic(buf - sg->offset, KM_IRQ0);
++			local_irq_restore(flags);
+ 		}
+ 	}
+ } /* End twa_scsiop_execute_scsi_complete() */
+diff -upr linux-2.6.16.orig/drivers/scsi/3w-xxxx.c linux-2.6.16-026test015/drivers/scsi/3w-xxxx.c
+--- linux-2.6.16.orig/drivers/scsi/3w-xxxx.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/scsi/3w-xxxx.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1508,10 +1508,12 @@ static void tw_transfer_internal(TW_Devi
+ 	struct scsi_cmnd *cmd = tw_dev->srb[request_id];
+ 	void *buf;
+ 	unsigned int transfer_len;
++	unsigned long flags = 0;
+ 
+ 	if (cmd->use_sg) {
+ 		struct scatterlist *sg =
+ 			(struct scatterlist *)cmd->request_buffer;
++		local_irq_save(flags);
+ 		buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ 		transfer_len = min(sg->length, len);
+ 	} else {
+@@ -1526,6 +1528,7 @@ static void tw_transfer_internal(TW_Devi
+ 
+ 		sg = (struct scatterlist *)cmd->request_buffer;
+ 		kunmap_atomic(buf - sg->offset, KM_IRQ0);
++		local_irq_restore(flags);
+ 	}
+ }
+ 
+diff -upr linux-2.6.16.orig/drivers/scsi/libata-core.c linux-2.6.16-026test015/drivers/scsi/libata-core.c
+--- linux-2.6.16.orig/drivers/scsi/libata-core.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/scsi/libata-core.c	2006-07-04 14:41:36.000000000 +0400
+@@ -4293,6 +4293,7 @@ static int ata_start_drive(struct ata_po
+ int ata_device_resume(struct ata_port *ap, struct ata_device *dev)
+ {
+ 	if (ap->flags & ATA_FLAG_SUSPENDED) {
++		ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 200000);
+ 		ap->flags &= ~ATA_FLAG_SUSPENDED;
+ 		ata_set_mode(ap);
+ 	}
+diff -upr linux-2.6.16.orig/drivers/scsi/sata_mv.c linux-2.6.16-026test015/drivers/scsi/sata_mv.c
+--- linux-2.6.16.orig/drivers/scsi/sata_mv.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/scsi/sata_mv.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1102,6 +1102,7 @@ static u8 mv_get_crpb_status(struct ata_
+ 	void __iomem *port_mmio = mv_ap_base(ap);
+ 	struct mv_port_priv *pp = ap->private_data;
+ 	u32 out_ptr;
++	u8 ata_status;
+ 
+ 	out_ptr = readl(port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
+ 
+@@ -1109,6 +1110,8 @@ static u8 mv_get_crpb_status(struct ata_
+ 	assert(((out_ptr >> EDMA_RSP_Q_PTR_SHIFT) & MV_MAX_Q_DEPTH_MASK) ==
+ 	       pp->rsp_consumer);
+ 
++	ata_status = pp->crpb[pp->rsp_consumer].flags >> CRPB_FLAG_STATUS_SHIFT;
++
+ 	/* increment our consumer index... */
+ 	pp->rsp_consumer = mv_inc_q_index(&pp->rsp_consumer);
+ 
+@@ -1123,7 +1126,7 @@ static u8 mv_get_crpb_status(struct ata_
+ 	writelfl(out_ptr, port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
+ 
+ 	/* Return ATA status register for completed CRPB */
+-	return (pp->crpb[pp->rsp_consumer].flags >> CRPB_FLAG_STATUS_SHIFT);
++	return ata_status;
+ }
+ 
+ /**
+@@ -1192,7 +1195,6 @@ static void mv_host_intr(struct ata_host
+ 	u32 hc_irq_cause;
+ 	int shift, port, port0, hard_port, handled;
+ 	unsigned int err_mask;
+-	u8 ata_status = 0;
+ 
+ 	if (hc == 0) {
+ 		port0 = 0;
+@@ -1210,6 +1212,7 @@ static void mv_host_intr(struct ata_host
+ 		hc,relevant,hc_irq_cause);
+ 
+ 	for (port = port0; port < port0 + MV_PORTS_PER_HC; port++) {
++		u8 ata_status = 0;
+ 		ap = host_set->ports[port];
+ 		hard_port = port & MV_PORT_MASK;	/* range 0-3 */
+ 		handled = 0;	/* ensure ata_status is set if handled++ */
+diff -upr linux-2.6.16.orig/drivers/scsi/scsi_lib.c linux-2.6.16-026test015/drivers/scsi/scsi_lib.c
+--- linux-2.6.16.orig/drivers/scsi/scsi_lib.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/scsi/scsi_lib.c	2006-07-04 14:41:36.000000000 +0400
+@@ -368,7 +368,7 @@ static int scsi_req_map_sg(struct reques
+ 			   int nsegs, unsigned bufflen, gfp_t gfp)
+ {
+ 	struct request_queue *q = rq->q;
+-	int nr_pages = (bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	int nr_pages = (bufflen + sgl[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ 	unsigned int data_len = 0, len, bytes, off;
+ 	struct page *page;
+ 	struct bio *bio = NULL;
+diff -upr linux-2.6.16.orig/drivers/sn/ioc3.c linux-2.6.16-026test015/drivers/sn/ioc3.c
+--- linux-2.6.16.orig/drivers/sn/ioc3.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/sn/ioc3.c	2006-07-04 14:41:36.000000000 +0400
+@@ -677,7 +677,7 @@ static int ioc3_probe(struct pci_dev *pd
+ 	/* Track PCI-device specific data */
+ 	pci_set_drvdata(pdev, idd);
+ 	down_write(&ioc3_devices_rwsem);
+-	list_add(&idd->list, &ioc3_devices);
++	list_add_tail(&idd->list, &ioc3_devices);
+ 	idd->id = ioc3_counter++;
+ 	up_write(&ioc3_devices_rwsem);
+ 
+diff -upr linux-2.6.16.orig/drivers/sn/ioc4.c linux-2.6.16-026test015/drivers/sn/ioc4.c
+--- linux-2.6.16.orig/drivers/sn/ioc4.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/sn/ioc4.c	2006-07-04 14:41:36.000000000 +0400
+@@ -313,7 +313,7 @@ ioc4_probe(struct pci_dev *pdev, const s
+ 	idd->idd_serial_data = NULL;
+ 	pci_set_drvdata(idd->idd_pdev, idd);
+ 	down_write(&ioc4_devices_rwsem);
+-	list_add(&idd->idd_list, &ioc4_devices);
++	list_add_tail(&idd->idd_list, &ioc4_devices);
+ 	up_write(&ioc4_devices_rwsem);
+ 
+ 	/* Add this IOC4 to all submodules */
+diff -upr linux-2.6.16.orig/drivers/usb/core/message.c linux-2.6.16-026test015/drivers/usb/core/message.c
+--- linux-2.6.16.orig/drivers/usb/core/message.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/usb/core/message.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1388,11 +1388,13 @@ free_interfaces:
+ 	if (dev->state != USB_STATE_ADDRESS)
+ 		usb_disable_device (dev, 1);	// Skip ep0
+ 
+-	i = dev->bus_mA - cp->desc.bMaxPower * 2;
+-	if (i < 0)
+-		dev_warn(&dev->dev, "new config #%d exceeds power "
+-				"limit by %dmA\n",
+-				configuration, -i);
++	if (cp) {
++		i = dev->bus_mA - cp->desc.bMaxPower * 2;
++		if (i < 0)
++			dev_warn(&dev->dev, "new config #%d exceeds power "
++					"limit by %dmA\n",
++					configuration, -i);
++	}
+ 
+ 	if ((ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
+ 			USB_REQ_SET_CONFIGURATION, 0, configuration, 0,
+diff -upr linux-2.6.16.orig/drivers/usb/host/ehci-sched.c linux-2.6.16-026test015/drivers/usb/host/ehci-sched.c
+--- linux-2.6.16.orig/drivers/usb/host/ehci-sched.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/usb/host/ehci-sched.c	2006-07-04 14:41:36.000000000 +0400
+@@ -707,6 +707,7 @@ iso_stream_init (
+ 	} else {
+ 		u32		addr;
+ 		int		think_time;
++		int		hs_transfers;
+ 
+ 		addr = dev->ttport << 24;
+ 		if (!ehci_is_TDI(ehci)
+@@ -719,6 +720,7 @@ iso_stream_init (
+ 		think_time = dev->tt ? dev->tt->think_time : 0;
+ 		stream->tt_usecs = NS_TO_US (think_time + usb_calc_bus_time (
+ 				dev->speed, is_input, 1, maxp));
++		hs_transfers = max (1u, (maxp + 187) / 188);
+ 		if (is_input) {
+ 			u32	tmp;
+ 
+@@ -727,12 +729,11 @@ iso_stream_init (
+ 			stream->usecs = HS_USECS_ISO (1);
+ 			stream->raw_mask = 1;
+ 
+-			/* pessimistic c-mask */
+-			tmp = usb_calc_bus_time (USB_SPEED_FULL, 1, 0, maxp)
+-					/ (125 * 1000);
+-			stream->raw_mask |= 3 << (tmp + 9);
++			/* c-mask as specified in USB 2.0 11.18.4 3.c */
++			tmp = (1 << (hs_transfers + 2)) - 1;
++			stream->raw_mask |= tmp << (8 + 2);
+ 		} else
+-			stream->raw_mask = smask_out [maxp / 188];
++			stream->raw_mask = smask_out [hs_transfers - 1];
+ 		bandwidth = stream->usecs + stream->c_usecs;
+ 		bandwidth /= 1 << (interval + 2);
+ 
+diff -upr linux-2.6.16.orig/drivers/usb/serial/console.c linux-2.6.16-026test015/drivers/usb/serial/console.c
+--- linux-2.6.16.orig/drivers/usb/serial/console.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/usb/serial/console.c	2006-07-04 14:41:36.000000000 +0400
+@@ -54,7 +54,7 @@ static struct console usbcons;
+  * serial.c code, except that the specifier is "ttyUSB" instead
+  * of "ttyS".
+  */
+-static int __init usb_console_setup(struct console *co, char *options)
++static int usb_console_setup(struct console *co, char *options)
+ {
+ 	struct usbcons_info *info = &usbcons_info;
+ 	int baud = 9600;
+diff -upr linux-2.6.16.orig/drivers/usb/serial/option.c linux-2.6.16-026test015/drivers/usb/serial/option.c
+--- linux-2.6.16.orig/drivers/usb/serial/option.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/usb/serial/option.c	2006-07-04 14:41:36.000000000 +0400
+@@ -582,14 +582,14 @@ static void option_setup_urbs(struct usb
+ 	portdata = usb_get_serial_port_data(port);
+ 
+ 	/* Do indat endpoints first */
+-	for (j = 0; j <= N_IN_URB; ++j) {
++	for (j = 0; j < N_IN_URB; ++j) {
+ 		portdata->in_urbs[j] = option_setup_urb (serial,
+                   port->bulk_in_endpointAddress, USB_DIR_IN, port,
+                   portdata->in_buffer[j], IN_BUFLEN, option_indat_callback);
+ 	}
+ 
+ 	/* outdat endpoints */
+-	for (j = 0; j <= N_OUT_URB; ++j) {
++	for (j = 0; j < N_OUT_URB; ++j) {
+ 		portdata->out_urbs[j] = option_setup_urb (serial,
+                   port->bulk_out_endpointAddress, USB_DIR_OUT, port,
+                   portdata->out_buffer[j], OUT_BUFLEN, option_outdat_callback);
+diff -upr linux-2.6.16.orig/drivers/usb/serial/whiteheat.c linux-2.6.16-026test015/drivers/usb/serial/whiteheat.c
+--- linux-2.6.16.orig/drivers/usb/serial/whiteheat.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/usb/serial/whiteheat.c	2006-07-04 14:41:36.000000000 +0400
+@@ -388,7 +388,7 @@ static int whiteheat_attach (struct usb_
+ 	if (ret) {
+ 		err("%s: Couldn't send command [%d]", serial->type->description, ret);
+ 		goto no_firmware;
+-	} else if (alen != sizeof(command)) {
++	} else if (alen != 2) {
+ 		err("%s: Send command incomplete [%d]", serial->type->description, alen);
+ 		goto no_firmware;
+ 	}
+@@ -400,7 +400,7 @@ static int whiteheat_attach (struct usb_
+ 	if (ret) {
+ 		err("%s: Couldn't get results [%d]", serial->type->description, ret);
+ 		goto no_firmware;
+-	} else if (alen != sizeof(result)) {
++	} else if (alen != sizeof(*hw_info) + 1) {
+ 		err("%s: Get results incomplete [%d]", serial->type->description, alen);
+ 		goto no_firmware;
+ 	} else if (result[0] != command[0]) {
+diff -upr linux-2.6.16.orig/drivers/usb/storage/Kconfig linux-2.6.16-026test015/drivers/usb/storage/Kconfig
+--- linux-2.6.16.orig/drivers/usb/storage/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/usb/storage/Kconfig	2006-07-04 14:41:36.000000000 +0400
+@@ -48,7 +48,8 @@ config USB_STORAGE_FREECOM
+ 
+ config USB_STORAGE_ISD200
+ 	bool "ISD-200 USB/ATA Bridge support"
+-	depends on USB_STORAGE && BLK_DEV_IDE
++	depends on USB_STORAGE
++	depends on BLK_DEV_IDE=y || BLK_DEV_IDE=USB_STORAGE
+ 	---help---
+ 	  Say Y here if you want to use USB Mass Store devices based
+ 	  on the In-Systems Design ISD-200 USB/ATA bridge.
+diff -upr linux-2.6.16.orig/drivers/video/cfbimgblt.c linux-2.6.16-026test015/drivers/video/cfbimgblt.c
+--- linux-2.6.16.orig/drivers/video/cfbimgblt.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/video/cfbimgblt.c	2006-07-04 14:41:36.000000000 +0400
+@@ -169,7 +169,7 @@ static inline void slow_imageblit(const 
+ 
+ 		while (j--) {
+ 			l--;
+-			color = (*s & 1 << (FB_BIT_NR(l))) ? fgcolor : bgcolor;
++			color = (*s & (1 << l)) ? fgcolor : bgcolor;
+ 			val |= FB_SHIFT_HIGH(color, shift);
+ 			
+ 			/* Did the bitshift spill bits to the next long? */
+diff -upr linux-2.6.16.orig/drivers/video/fbmem.c linux-2.6.16-026test015/drivers/video/fbmem.c
+--- linux-2.6.16.orig/drivers/video/fbmem.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/video/fbmem.c	2006-07-04 14:41:36.000000000 +0400
+@@ -669,13 +669,19 @@ fb_write(struct file *file, const char _
+ 		total_size = info->fix.smem_len;
+ 
+ 	if (p > total_size)
+-		return 0;
++		return -EFBIG;
+ 
+-	if (count >= total_size)
++	if (count > total_size) {
++		err = -EFBIG;
+ 		count = total_size;
++	}
++
++	if (count + p > total_size) {
++		if (!err)
++			err = -ENOSPC;
+ 
+-	if (count + p > total_size)
+ 		count = total_size - p;
++	}
+ 
+ 	buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count,
+ 			 GFP_KERNEL);
+@@ -717,7 +723,7 @@ fb_write(struct file *file, const char _
+ 
+ 	kfree(buffer);
+ 
+-	return (err) ? err : cnt;
++	return (cnt) ? cnt : err;
+ }
+ 
+ #ifdef CONFIG_KMOD
+diff -upr linux-2.6.16.orig/drivers/video/i810/i810_main.c linux-2.6.16-026test015/drivers/video/i810/i810_main.c
+--- linux-2.6.16.orig/drivers/video/i810/i810_main.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/drivers/video/i810/i810_main.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1508,7 +1508,7 @@ static int i810fb_cursor(struct fb_info 
+ 		int size = ((cursor->image.width + 7) >> 3) *
+ 			cursor->image.height;
+ 		int i;
+-		u8 *data = kmalloc(64 * 8, GFP_KERNEL);
++		u8 *data = kmalloc(64 * 8, GFP_ATOMIC);
+ 
+ 		if (data == NULL)
+ 			return -ENOMEM;
+diff -upr linux-2.6.16.orig/fs/9p/vfs_inode.c linux-2.6.16-026test015/fs/9p/vfs_inode.c
+--- linux-2.6.16.orig/fs/9p/vfs_inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/9p/vfs_inode.c	2006-07-04 14:41:36.000000000 +0400
+@@ -614,6 +614,7 @@ static struct dentry *v9fs_vfs_lookup(st
+ 
+ 	sb = dir->i_sb;
+ 	v9ses = v9fs_inode2v9ses(dir);
++	dentry->d_op = &v9fs_dentry_operations;
+ 	dirfid = v9fs_fid_lookup(dentry->d_parent);
+ 
+ 	if (!dirfid) {
+@@ -681,8 +682,6 @@ static struct dentry *v9fs_vfs_lookup(st
+ 		goto FreeFcall;
+ 
+ 	fid->qid = fcall->params.rstat.stat.qid;
+-
+-	dentry->d_op = &v9fs_dentry_operations;
+ 	v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
+ 
+ 	d_add(dentry, inode);
+diff -upr linux-2.6.16.orig/fs/Kconfig linux-2.6.16-026test015/fs/Kconfig
+--- linux-2.6.16.orig/fs/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/Kconfig	2006-07-04 14:41:39.000000000 +0400
+@@ -418,6 +418,15 @@ config QUOTA
+ 	  with the quota tools. Probably the quota support is only useful for
+ 	  multi user systems. If unsure, say N.
+ 
++config QUOTA_COMPAT
++	bool "Compatibility with older quotactl interface"
++	depends on QUOTA
++	help
++	  This option enables compatibility layer for older version
++	  of quotactl interface with byte granularity (QUOTAON at 0x0100,
++	  GETQUOTA at 0x0D00).  Interface versions older than that one and
++	  with block granularity are still not supported.
++
+ config QFMT_V1
+ 	tristate "Old quota format support"
+ 	depends on QUOTA
+@@ -433,6 +442,38 @@ config QFMT_V2
+ 	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
+ 	  need this functionality say Y here.
+ 
++config SIM_FS
++	tristate "VPS filesystem"
++	depends on VZ_QUOTA
++	default m
++	help
++	  This file system is a part of Virtuozzo. It intoduces a fake
++	  superblock and blockdev to VE to hide real device and show
++	  statfs results taken from quota.
++
++config VZ_QUOTA
++	tristate "Virtuozzo Disk Quota support"
++	depends on QUOTA
++	default m
++	help
++	  Virtuozzo Disk Quota imposes disk quota on directories with their
++	  files and subdirectories in total.  Such disk quota is used to
++	  account and limit disk usage by Virtuozzo VPS, but also may be used
++	  separately.
++
++config VZ_QUOTA_UNLOAD
++	bool "Unloadable Virtuozzo Disk Quota module"
++	depends on VZ_QUOTA=m
++	default n
++	help
++	  Make Virtuozzo Disk Quota module unloadable.
++	  Doesn't work reliably now.
++
++config VZ_QUOTA_UGID
++	bool "Per-user and per-group quota in Virtuozzo quota partitions"
++	depends on VZ_QUOTA!=n
++	default y
++
+ config QUOTACTL
+ 	bool
+ 	depends on XFS_QUOTA || QUOTA
+diff -upr linux-2.6.16.orig/fs/Makefile linux-2.6.16-026test015/fs/Makefile
+--- linux-2.6.16.orig/fs/Makefile	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/Makefile	2006-07-04 14:41:39.000000000 +0400
+@@ -39,9 +39,15 @@ obj-$(CONFIG_QUOTA)		+= dquot.o
+ obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
+ obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
+ obj-$(CONFIG_QUOTACTL)		+= quota.o
++obj-$(CONFIG_VZ_QUOTA)		+= vzdquota.o
++vzdquota-y			+= vzdquot.o vzdq_mgmt.o vzdq_ops.o vzdq_tree.o
++vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_ugid.o
++vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
+ 
+ obj-$(CONFIG_DNOTIFY)		+= dnotify.o
+ 
++obj-$(CONFIG_SIM_FS)		+= simfs.o
++
+ obj-$(CONFIG_PROC_FS)		+= proc/
+ obj-y				+= partitions/
+ obj-$(CONFIG_SYSFS)		+= sysfs/
+diff -upr linux-2.6.16.orig/fs/aio.c linux-2.6.16-026test015/fs/aio.c
+--- linux-2.6.16.orig/fs/aio.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/aio.c	2006-07-04 14:41:39.000000000 +0400
+@@ -41,13 +41,16 @@
+ #endif
+ 
+ /*------ sysctl variables----*/
+-static DEFINE_SPINLOCK(aio_nr_lock);
++DEFINE_SPINLOCK(aio_nr_lock);
+ unsigned long aio_nr;		/* current system wide number of aio requests */
+ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
++EXPORT_SYMBOL_GPL(aio_nr_lock);
++EXPORT_SYMBOL_GPL(aio_nr);
+ /*----end sysctl variables---*/
+ 
+ static kmem_cache_t	*kiocb_cachep;
+-static kmem_cache_t	*kioctx_cachep;
++kmem_cache_t	*kioctx_cachep;
++EXPORT_SYMBOL_GPL(kioctx_cachep);
+ 
+ static struct workqueue_struct *aio_wq;
+ 
+@@ -58,7 +61,7 @@ static DECLARE_WORK(fput_work, aio_fput_
+ static DEFINE_SPINLOCK(fput_lock);
+ static LIST_HEAD(fput_head);
+ 
+-static void aio_kick_handler(void *);
++void aio_kick_handler(void *);
+ static void aio_queue_work(struct kioctx *);
+ 
+ /* aio_setup
+@@ -293,7 +296,7 @@ static void aio_cancel_all(struct kioctx
+ 	spin_unlock_irq(&ctx->ctx_lock);
+ }
+ 
+-static void wait_for_all_aios(struct kioctx *ctx)
++void wait_for_all_aios(struct kioctx *ctx)
+ {
+ 	struct task_struct *tsk = current;
+ 	DECLARE_WAITQUEUE(wait, tsk);
+@@ -310,6 +313,7 @@ static void wait_for_all_aios(struct kio
+ 	__set_task_state(tsk, TASK_RUNNING);
+ 	remove_wait_queue(&ctx->wait, &wait);
+ }
++EXPORT_SYMBOL_GPL(wait_for_all_aios);
+ 
+ /* wait_on_sync_kiocb:
+  *	Waits on the given sync kiocb to complete.
+@@ -856,7 +860,7 @@ static inline void aio_run_all_iocbs(str
+  *      space.
+  * Run on aiod's context.
+  */
+-static void aio_kick_handler(void *data)
++void aio_kick_handler(void *data)
+ {
+ 	struct kioctx *ctx = data;
+ 	mm_segment_t oldfs = get_fs();
+@@ -875,6 +879,7 @@ static void aio_kick_handler(void *data)
+ 	if (requeue)
+ 		queue_work(aio_wq, &ctx->wq);
+ }
++EXPORT_SYMBOL_GPL(aio_kick_handler);
+ 
+ 
+ /*
+diff -upr linux-2.6.16.orig/fs/autofs/autofs_i.h linux-2.6.16-026test015/fs/autofs/autofs_i.h
+--- linux-2.6.16.orig/fs/autofs/autofs_i.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/autofs/autofs_i.h	2006-07-04 14:41:38.000000000 +0400
+@@ -124,7 +124,7 @@ static inline struct autofs_sb_info *aut
+    filesystem without "magic".) */
+ 
+ static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
+-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
++	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
+ }
+ 
+ /* Hash operations */
+diff -upr linux-2.6.16.orig/fs/autofs/init.c linux-2.6.16-026test015/fs/autofs/init.c
+--- linux-2.6.16.orig/fs/autofs/init.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/autofs/init.c	2006-07-04 14:41:38.000000000 +0400
+@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
+ 	.name		= "autofs",
+ 	.get_sb		= autofs_get_sb,
+ 	.kill_sb	= kill_anon_super,
++	.fs_flags	= FS_VIRTUALIZED,
+ };
+ 
+ static int __init init_autofs_fs(void)
+diff -upr linux-2.6.16.orig/fs/autofs/inode.c linux-2.6.16-026test015/fs/autofs/inode.c
+--- linux-2.6.16.orig/fs/autofs/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/autofs/inode.c	2006-07-04 14:41:38.000000000 +0400
+@@ -66,7 +66,7 @@ static int parse_options(char *options, 
+ 
+ 	*uid = current->uid;
+ 	*gid = current->gid;
+-	*pgrp = process_group(current);
++	*pgrp = virt_pgid(current);
+ 
+ 	*minproto = *maxproto = AUTOFS_PROTO_VERSION;
+ 
+@@ -138,7 +138,7 @@ int autofs_fill_super(struct super_block
+ 	sbi->magic = AUTOFS_SBI_MAGIC;
+ 	sbi->catatonic = 0;
+ 	sbi->exp_timeout = 0;
+-	sbi->oz_pgrp = process_group(current);
++	sbi->oz_pgrp = virt_pgid(current);
+ 	autofs_initialize_hash(&sbi->dirhash);
+ 	sbi->queues = NULL;
+ 	memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
+diff -upr linux-2.6.16.orig/fs/autofs/root.c linux-2.6.16-026test015/fs/autofs/root.c
+--- linux-2.6.16.orig/fs/autofs/root.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/autofs/root.c	2006-07-04 14:41:38.000000000 +0400
+@@ -354,7 +354,7 @@ static int autofs_root_unlink(struct ino
+ 
+ 	/* This allows root to remove symlinks */
+ 	lock_kernel();
+-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) {
++	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) ) {
+ 		unlock_kernel();
+ 		return -EACCES;
+ 	}
+@@ -541,7 +541,7 @@ static int autofs_root_ioctl(struct inod
+ 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
+ 		return -ENOTTY;
+ 	
+-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
++	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
+ 		return -EPERM;
+ 	
+ 	switch(cmd) {
+diff -upr linux-2.6.16.orig/fs/autofs4/autofs_i.h linux-2.6.16-026test015/fs/autofs4/autofs_i.h
+--- linux-2.6.16.orig/fs/autofs4/autofs_i.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/autofs4/autofs_i.h	2006-07-04 14:41:38.000000000 +0400
+@@ -122,7 +122,7 @@ static inline struct autofs_info *autofs
+    filesystem without "magic".) */
+ 
+ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
+-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
++	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
+ }
+ 
+ /* Does a dentry have some pending activity? */
+diff -upr linux-2.6.16.orig/fs/autofs4/init.c linux-2.6.16-026test015/fs/autofs4/init.c
+--- linux-2.6.16.orig/fs/autofs4/init.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/autofs4/init.c	2006-07-04 14:41:38.000000000 +0400
+@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
+ 	.name		= "autofs",
+ 	.get_sb		= autofs_get_sb,
+ 	.kill_sb	= kill_anon_super,
++	.fs_flags	= FS_VIRTUALIZED,
+ };
+ 
+ static int __init init_autofs4_fs(void)
+diff -upr linux-2.6.16.orig/fs/autofs4/inode.c linux-2.6.16-026test015/fs/autofs4/inode.c
+--- linux-2.6.16.orig/fs/autofs4/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/autofs4/inode.c	2006-07-04 14:41:38.000000000 +0400
+@@ -179,7 +179,7 @@ static int parse_options(char *options, 
+ 
+ 	*uid = current->uid;
+ 	*gid = current->gid;
+-	*pgrp = process_group(current);
++	*pgrp = virt_pgid(current);
+ 
+ 	*minproto = AUTOFS_MIN_PROTO_VERSION;
+ 	*maxproto = AUTOFS_MAX_PROTO_VERSION;
+@@ -265,7 +265,7 @@ int autofs4_fill_super(struct super_bloc
+ 	sbi->root = NULL;
+ 	sbi->catatonic = 0;
+ 	sbi->exp_timeout = 0;
+-	sbi->oz_pgrp = process_group(current);
++	sbi->oz_pgrp = virt_pgid(current);
+ 	sbi->sb = s;
+ 	sbi->version = 0;
+ 	sbi->sub_version = 0;
+diff -upr linux-2.6.16.orig/fs/autofs4/root.c linux-2.6.16-026test015/fs/autofs4/root.c
+--- linux-2.6.16.orig/fs/autofs4/root.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/autofs4/root.c	2006-07-04 14:41:38.000000000 +0400
+@@ -592,7 +592,7 @@ static int autofs4_dir_unlink(struct ino
+ 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ 	
+ 	/* This allows root to remove symlinks */
+-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
++	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
+ 		return -EACCES;
+ 
+ 	dput(ino->dentry);
+@@ -784,7 +784,7 @@ static int autofs4_root_ioctl(struct ino
+ 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
+ 		return -ENOTTY;
+ 	
+-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
++	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
+ 		return -EPERM;
+ 	
+ 	switch(cmd) {
+diff -upr linux-2.6.16.orig/fs/binfmt_aout.c linux-2.6.16-026test015/fs/binfmt_aout.c
+--- linux-2.6.16.orig/fs/binfmt_aout.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/binfmt_aout.c	2006-07-04 14:41:39.000000000 +0400
+@@ -446,9 +446,11 @@ beyond_if:
+ #endif
+ 	start_thread(regs, ex.a_entry, current->mm->start_stack);
+ 	if (unlikely(current->ptrace & PT_PTRACED)) {
+-		if (current->ptrace & PT_TRACE_EXEC)
++		if (current->ptrace & PT_TRACE_EXEC) {
++			set_pn_state(current, PN_STOP_EXEC);
+ 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+-		else
++			clear_pn_state(current);
++		} else
+ 			send_sig(SIGTRAP, current, 0);
+ 	}
+ 	return 0;
+diff -upr linux-2.6.16.orig/fs/binfmt_elf.c linux-2.6.16-026test015/fs/binfmt_elf.c
+--- linux-2.6.16.orig/fs/binfmt_elf.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/binfmt_elf.c	2006-07-04 14:41:39.000000000 +0400
+@@ -361,7 +361,7 @@ static unsigned long load_elf_interp(str
+ 	eppnt = elf_phdata;
+ 	for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
+ 	  if (eppnt->p_type == PT_LOAD) {
+-	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
++	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECPRIO;
+ 	    int elf_prot = 0;
+ 	    unsigned long vaddr = 0;
+ 	    unsigned long k, map_addr;
+@@ -669,7 +669,7 @@ static int load_elf_binary(struct linux_
+ 			 */
+ 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+ 
+-			interpreter = open_exec(elf_interpreter);
++			interpreter = open_exec(elf_interpreter, NULL);
+ 			retval = PTR_ERR(interpreter);
+ 			if (IS_ERR(interpreter))
+ 				goto out_free_interp;
+@@ -834,7 +834,7 @@ static int load_elf_binary(struct linux_
+ 		if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
+ 		if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
+ 
+-		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
++		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE|MAP_EXECPRIO;
+ 
+ 		vaddr = elf_ppnt->p_vaddr;
+ 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
+@@ -1000,9 +1000,11 @@ static int load_elf_binary(struct linux_
+ 
+ 	start_thread(regs, elf_entry, bprm->p);
+ 	if (unlikely(current->ptrace & PT_PTRACED)) {
+-		if (current->ptrace & PT_TRACE_EXEC)
++		if (current->ptrace & PT_TRACE_EXEC) {
++			set_pn_state(current, PN_STOP_EXEC);
+ 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+-		else
++			clear_pn_state(current);
++		} else
+ 			send_sig(SIGTRAP, current, 0);
+ 	}
+ 	retval = 0;
+@@ -1022,8 +1024,13 @@ out_free_file:
+ 	sys_close(elf_exec_fileno);
+ out_free_fh:
+ 	if (files) {
+-		put_files_struct(current->files);
++		struct files_struct *old;
++
++		old = current->files;
++		task_lock(current);
+ 		current->files = files;
++		task_unlock(current);
++		put_files_struct(old);
+ 	}
+ out_free_ph:
+ 	kfree(elf_phdata);
+@@ -1281,10 +1288,10 @@ static void fill_prstatus(struct elf_prs
+ 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
+ 	prstatus->pr_sigpend = p->pending.signal.sig[0];
+ 	prstatus->pr_sighold = p->blocked.sig[0];
+-	prstatus->pr_pid = p->pid;
+-	prstatus->pr_ppid = p->parent->pid;
+-	prstatus->pr_pgrp = process_group(p);
+-	prstatus->pr_sid = p->signal->session;
++	prstatus->pr_pid = virt_pid(p);
++	prstatus->pr_ppid = virt_pid(p->parent);
++	prstatus->pr_pgrp = virt_pgid(p);
++	prstatus->pr_sid = virt_sid(p);
+ 	if (thread_group_leader(p)) {
+ 		/*
+ 		 * This is the record for the group leader.  Add in the
+@@ -1327,10 +1334,10 @@ static int fill_psinfo(struct elf_prpsin
+ 			psinfo->pr_psargs[i] = ' ';
+ 	psinfo->pr_psargs[len] = 0;
+ 
+-	psinfo->pr_pid = p->pid;
+-	psinfo->pr_ppid = p->parent->pid;
+-	psinfo->pr_pgrp = process_group(p);
+-	psinfo->pr_sid = p->signal->session;
++	psinfo->pr_pid = virt_pid(p);
++	psinfo->pr_ppid = virt_pid(p->parent);
++	psinfo->pr_pgrp = virt_pgid(p);
++	psinfo->pr_sid = virt_sid(p);
+ 
+ 	i = p->state ? ffz(~p->state) + 1 : 0;
+ 	psinfo->pr_state = i;
+@@ -1463,7 +1470,7 @@ static int elf_core_dump(long signr, str
+ 	if (signr) {
+ 		struct elf_thread_status *tmp;
+ 		read_lock(&tasklist_lock);
+-		do_each_thread(g,p)
++		do_each_thread_ve(g,p)
+ 			if (current->mm == p->mm && current != p) {
+ 				tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
+ 				if (!tmp) {
+@@ -1475,7 +1482,7 @@ static int elf_core_dump(long signr, str
+ 				tmp->thread = p;
+ 				list_add(&tmp->list, &thread_list);
+ 			}
+-		while_each_thread(g,p);
++		while_each_thread_ve(g,p);
+ 		read_unlock(&tasklist_lock);
+ 		list_for_each(t, &thread_list) {
+ 			struct elf_thread_status *tmp;
+diff -upr linux-2.6.16.orig/fs/binfmt_elf_fdpic.c linux-2.6.16-026test015/fs/binfmt_elf_fdpic.c
+--- linux-2.6.16.orig/fs/binfmt_elf_fdpic.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/binfmt_elf_fdpic.c	2006-07-04 14:41:37.000000000 +0400
+@@ -205,7 +205,7 @@ static int load_elf_fdpic_binary(struct 
+ 			kdebug("Using ELF interpreter %s", interpreter_name);
+ 
+ 			/* replace the program with the interpreter */
+-			interpreter = open_exec(interpreter_name);
++			interpreter = open_exec(interpreter_name, bprm);
+ 			retval = PTR_ERR(interpreter);
+ 			if (IS_ERR(interpreter)) {
+ 				interpreter = NULL;
+diff -upr linux-2.6.16.orig/fs/binfmt_em86.c linux-2.6.16-026test015/fs/binfmt_em86.c
+--- linux-2.6.16.orig/fs/binfmt_em86.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/binfmt_em86.c	2006-07-04 14:41:37.000000000 +0400
+@@ -82,7 +82,7 @@ static int load_em86(struct linux_binprm
+ 	 * Note that we use open_exec() as the name is now in kernel
+ 	 * space, and we don't need to copy it.
+ 	 */
+-	file = open_exec(interp);
++	file = open_exec(interp, bprm);
+ 	if (IS_ERR(file))
+ 		return PTR_ERR(file);
+ 
+diff -upr linux-2.6.16.orig/fs/binfmt_flat.c linux-2.6.16-026test015/fs/binfmt_flat.c
+--- linux-2.6.16.orig/fs/binfmt_flat.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/binfmt_flat.c	2006-07-04 14:41:37.000000000 +0400
+@@ -774,7 +774,7 @@ static int load_flat_shared_library(int 
+ 
+ 	/* Open the file up */
+ 	bprm.filename = buf;
+-	bprm.file = open_exec(bprm.filename);
++	bprm.file = open_exec(bprm.filename, bprm);
+ 	res = PTR_ERR(bprm.file);
+ 	if (IS_ERR(bprm.file))
+ 		return res;
+diff -upr linux-2.6.16.orig/fs/binfmt_misc.c linux-2.6.16-026test015/fs/binfmt_misc.c
+--- linux-2.6.16.orig/fs/binfmt_misc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/binfmt_misc.c	2006-07-04 14:41:37.000000000 +0400
+@@ -179,7 +179,7 @@ static int load_misc_binary(struct linux
+ 
+ 	bprm->interp = iname;	/* for binfmt_script */
+ 
+-	interp_file = open_exec (iname);
++	interp_file = open_exec (iname, bprm);
+ 	retval = PTR_ERR (interp_file);
+ 	if (IS_ERR (interp_file))
+ 		goto _error;
+@@ -216,8 +216,13 @@ _error:
+ 	bprm->interp_data = 0;
+ _unshare:
+ 	if (files) {
+-		put_files_struct(current->files);
++		struct files_struct *old;
++
++		old = current->files;
++		task_lock(current);
+ 		current->files = files;
++		task_unlock(current);
++		put_files_struct(old);
+ 	}
+ 	goto _ret;
+ }
+diff -upr linux-2.6.16.orig/fs/binfmt_script.c linux-2.6.16-026test015/fs/binfmt_script.c
+--- linux-2.6.16.orig/fs/binfmt_script.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/binfmt_script.c	2006-07-04 14:41:37.000000000 +0400
+@@ -85,7 +85,7 @@ static int load_script(struct linux_binp
+ 	/*
+ 	 * OK, now restart the process with the interpreter's dentry.
+ 	 */
+-	file = open_exec(interp);
++	file = open_exec(interp, bprm);
+ 	if (IS_ERR(file))
+ 		return PTR_ERR(file);
+ 
+diff -upr linux-2.6.16.orig/fs/block_dev.c linux-2.6.16-026test015/fs/block_dev.c
+--- linux-2.6.16.orig/fs/block_dev.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/block_dev.c	2006-07-04 14:41:37.000000000 +0400
+@@ -561,9 +561,16 @@ static int do_open(struct block_device *
+ {
+ 	struct module *owner = NULL;
+ 	struct gendisk *disk;
+-	int ret = -ENXIO;
++	int ret;
+ 	int part;
+ 
++#ifdef CONFIG_VE
++	ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev,
++				  file->f_mode&(FMODE_READ|FMODE_WRITE));
++	if (ret)
++	        return ret;
++#endif
++	ret = -ENXIO;
+ 	file->f_mapping = bdev->bd_inode->i_mapping;
+ 	lock_kernel();
+ 	disk = get_gendisk(bdev->bd_dev, &part);
+@@ -832,7 +839,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
+  * namespace if possible and return it.  Return ERR_PTR(error)
+  * otherwise.
+  */
+-struct block_device *lookup_bdev(const char *path)
++struct block_device *lookup_bdev(const char *path, int mode)
+ {
+ 	struct block_device *bdev;
+ 	struct inode *inode;
+@@ -850,6 +857,11 @@ struct block_device *lookup_bdev(const c
+ 	error = -ENOTBLK;
+ 	if (!S_ISBLK(inode->i_mode))
+ 		goto fail;
++#ifdef CONFIG_VE
++	error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode);
++	if (error)
++		goto fail;
++#endif
+ 	error = -EACCES;
+ 	if (nd.mnt->mnt_flags & MNT_NODEV)
+ 		goto fail;
+@@ -881,12 +893,13 @@ struct block_device *open_bdev_excl(cons
+ 	mode_t mode = FMODE_READ;
+ 	int error = 0;
+ 
+-	bdev = lookup_bdev(path);
++	if (!(flags & MS_RDONLY))
++		mode |= FMODE_WRITE;
++
++	bdev = lookup_bdev(path, mode);
+ 	if (IS_ERR(bdev))
+ 		return bdev;
+ 
+-	if (!(flags & MS_RDONLY))
+-		mode |= FMODE_WRITE;
+ 	error = blkdev_get(bdev, mode, 0);
+ 	if (error)
+ 		return ERR_PTR(error);
+diff -upr linux-2.6.16.orig/fs/buffer.c linux-2.6.16-026test015/fs/buffer.c
+--- linux-2.6.16.orig/fs/buffer.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/buffer.c	2006-07-04 14:41:37.000000000 +0400
+@@ -1942,8 +1942,9 @@ static int __block_prepare_write(struct 
+ 			if (err)
+ 				break;
+ 			if (buffer_new(bh)) {
+-				unmap_underlying_metadata(bh->b_bdev,
+-							bh->b_blocknr);
++				if (buffer_mapped(bh))
++					unmap_underlying_metadata(bh->b_bdev,
++								bh->b_blocknr);
+ 				if (PageUptodate(page)) {
+ 					set_buffer_uptodate(bh);
+ 					continue;
+diff -upr linux-2.6.16.orig/fs/char_dev.c linux-2.6.16-026test015/fs/char_dev.c
+--- linux-2.6.16.orig/fs/char_dev.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/char_dev.c	2006-07-04 14:41:37.000000000 +0400
+@@ -15,6 +15,7 @@
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+ #include <linux/devfs_fs_kernel.h>
++#include <linux/seq_file.h>
+ 
+ #include <linux/kobject.h>
+ #include <linux/kobj_map.h>
+@@ -26,8 +27,6 @@
+ 
+ static struct kobj_map *cdev_map;
+ 
+-#define MAX_PROBE_HASH 255	/* random */
+-
+ static DECLARE_MUTEX(chrdevs_lock);
+ 
+ static struct char_device_struct {
+@@ -38,93 +37,29 @@ static struct char_device_struct {
+ 	char name[64];
+ 	struct file_operations *fops;
+ 	struct cdev *cdev;		/* will die */
+-} *chrdevs[MAX_PROBE_HASH];
++} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
+ 
+ /* index in the above */
+ static inline int major_to_index(int major)
+ {
+-	return major % MAX_PROBE_HASH;
+-}
+-
+-struct chrdev_info {
+-	int index;
+-	struct char_device_struct *cd;
+-};
+-
+-void *get_next_chrdev(void *dev)
+-{
+-	struct chrdev_info *info;
+-
+-	if (dev == NULL) {
+-		info = kmalloc(sizeof(*info), GFP_KERNEL);
+-		if (!info)
+-			goto out;
+-		info->index=0;
+-		info->cd = chrdevs[info->index];
+-		if (info->cd)
+-			goto out;
+-	} else {
+-		info = dev;
+-	}
+-
+-	while (info->index < ARRAY_SIZE(chrdevs)) {
+-		if (info->cd)
+-			info->cd = info->cd->next;
+-		if (info->cd)
+-			goto out;
+-		/*
+-		 * No devices on this chain, move to the next
+-		 */
+-		info->index++;
+-		info->cd = (info->index < ARRAY_SIZE(chrdevs)) ?
+-			chrdevs[info->index] : NULL;
+-		if (info->cd)
+-			goto out;
+-	}
+-
+-out:
+-	return info;
+-}
+-
+-void *acquire_chrdev_list(void)
+-{
+-	down(&chrdevs_lock);
+-	return get_next_chrdev(NULL);
+-}
+-
+-void release_chrdev_list(void *dev)
+-{
+-	up(&chrdevs_lock);
+-	kfree(dev);
++	return major % CHRDEV_MAJOR_HASH_SIZE;
+ }
+ 
++#ifdef CONFIG_PROC_FS
+ 
+-int count_chrdev_list(void)
++void chrdev_show(struct seq_file *f, off_t offset)
+ {
+ 	struct char_device_struct *cd;
+-	int i, count;
+ 
+-	count = 0;
+-
+-	for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
+-		for (cd = chrdevs[i]; cd; cd = cd->next)
+-			count++;
++	if (offset < CHRDEV_MAJOR_HASH_SIZE) {
++		down(&chrdevs_lock);
++		for (cd = chrdevs[offset]; cd; cd = cd->next)
++			seq_printf(f, "%3d %s\n", cd->major, cd->name);
++		up(&chrdevs_lock);
+ 	}
+-
+-	return count;
+ }
+ 
+-int get_chrdev_info(void *dev, int *major, char **name)
+-{
+-	struct chrdev_info *info = dev;
+-
+-	if (info->cd == NULL)
+-		return 1;
+-
+-	*major = info->cd->major;
+-	*name = info->cd->name;
+-	return 0;
+-}
++#endif /* CONFIG_PROC_FS */
+ 
+ /*
+  * Register a single major with a specified minor range.
+@@ -342,6 +277,13 @@ int chrdev_open(struct inode * inode, st
+ 	struct cdev *new = NULL;
+ 	int ret = 0;
+ 
++#ifdef CONFIG_VE
++	ret = get_device_perms_ve(S_IFCHR, inode->i_rdev,
++				  filp->f_mode&(FMODE_READ|FMODE_WRITE));
++	if (ret)
++		return ret;
++#endif
++
+ 	spin_lock(&cdev_lock);
+ 	p = inode->i_cdev;
+ 	if (!p) {
+diff -upr linux-2.6.16.orig/fs/cifs/cifsencrypt.c linux-2.6.16-026test015/fs/cifs/cifsencrypt.c
+--- linux-2.6.16.orig/fs/cifs/cifsencrypt.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/cifs/cifsencrypt.c	2006-07-04 14:41:36.000000000 +0400
+@@ -56,9 +56,6 @@ int cifs_sign_smb(struct smb_hdr * cifs_
+ 	int rc = 0;
+ 	char smb_signature[20];
+ 
+-	/* BB remember to initialize sequence number elsewhere and initialize mac_signing key elsewhere BB */
+-	/* BB remember to add code to save expected sequence number in midQ entry BB */
+-
+ 	if((cifs_pdu == NULL) || (server == NULL))
+ 		return -EINVAL;
+ 
+@@ -85,20 +82,33 @@ int cifs_sign_smb(struct smb_hdr * cifs_
+ static int cifs_calc_signature2(const struct kvec * iov, int n_vec,
+ 				const char * key, char * signature)
+ {
+-        struct  MD5Context context;
+-
+-        if((iov == NULL) || (signature == NULL))
+-                return -EINVAL;
++	struct  MD5Context context;
++	int i;
+ 
+-        MD5Init(&context);
+-        MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
++	if((iov == NULL) || (signature == NULL))
++		return -EINVAL;
+ 
+-/*        MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); */ /* BB FIXME BB */
++	MD5Init(&context);
++	MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
++	for(i=0;i<n_vec;i++) {
++		if(iov[i].iov_base == NULL) {
++			cERROR(1,("null iovec entry"));
++			return -EIO;
++		} else if(iov[i].iov_len == 0)
++			break; /* bail out if we are sent nothing to sign */
++		/* The first entry includes a length field (which does not get
++		   signed that occupies the first 4 bytes before the header */
++		if(i==0) {
++			if (iov[0].iov_len <= 8 ) /* cmd field at offset 9 */
++				break; /* nothing to sign or corrupt header */
++			MD5Update(&context,iov[0].iov_base+4, iov[0].iov_len-4);
++		} else
++			MD5Update(&context,iov[i].iov_base, iov[i].iov_len);
++	}
+ 
+-        MD5Final(signature,&context);
++	MD5Final(signature,&context);
+ 
+-	return -EOPNOTSUPP;
+-/*        return 0; */
++	return 0;
+ }
+ 
+ 
+diff -upr linux-2.6.16.orig/fs/cifs/cifsfs.c linux-2.6.16-026test015/fs/cifs/cifsfs.c
+--- linux-2.6.16.orig/fs/cifs/cifsfs.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/cifs/cifsfs.c	2006-07-04 14:41:37.000000000 +0400
+@@ -220,7 +220,8 @@ cifs_statfs(struct super_block *sb, stru
+ 				   longer available? */
+ }
+ 
+-static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
++static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+ 	struct cifs_sb_info *cifs_sb;
+ 
+@@ -232,7 +233,7 @@ static int cifs_permission(struct inode 
+ 		on the client (above and beyond ACL on servers) for  
+ 		servers which do not support setting and viewing mode bits,
+ 		so allowing client to check permissions is useful */ 
+-		return generic_permission(inode, mask, NULL);
++		return generic_permission(inode, mask, NULL, perm);
+ }
+ 
+ static kmem_cache_t *cifs_inode_cachep;
+diff -upr linux-2.6.16.orig/fs/cifs/dir.c linux-2.6.16-026test015/fs/cifs/dir.c
+--- linux-2.6.16.orig/fs/cifs/dir.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/cifs/dir.c	2006-07-04 14:41:36.000000000 +0400
+@@ -441,6 +441,20 @@ cifs_lookup(struct inode *parent_dir_ino
+ 	cifs_sb = CIFS_SB(parent_dir_inode->i_sb);
+ 	pTcon = cifs_sb->tcon;
+ 
++	/*
++	 * Don't allow the separator character in a path component.
++	 * The VFS will not allow "/", but "\" is allowed by posix.
++	 */
++	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
++		int i;
++		for (i = 0; i < direntry->d_name.len; i++)
++			if (direntry->d_name.name[i] == '\\') {
++				cFYI(1, ("Invalid file name"));
++				FreeXid(xid);
++				return ERR_PTR(-EINVAL);
++			}
++	}
++
+ 	/* can not grab the rename sem here since it would
+ 	deadlock in the cases (beginning of sys_rename itself)
+ 	in which we already have the sb rename sem */
+diff -upr linux-2.6.16.orig/fs/coda/dir.c linux-2.6.16-026test015/fs/coda/dir.c
+--- linux-2.6.16.orig/fs/coda/dir.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/coda/dir.c	2006-07-04 14:41:37.000000000 +0400
+@@ -151,7 +151,8 @@ exit:
+ }
+ 
+ 
+-int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
++int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+         int error = 0;
+  
+diff -upr linux-2.6.16.orig/fs/coda/pioctl.c linux-2.6.16-026test015/fs/coda/pioctl.c
+--- linux-2.6.16.orig/fs/coda/pioctl.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/coda/pioctl.c	2006-07-04 14:41:37.000000000 +0400
+@@ -25,7 +25,7 @@
+ 
+ /* pioctl ops */
+ static int coda_ioctl_permission(struct inode *inode, int mask,
+-				 struct nameidata *nd);
++				 struct nameidata *nd, struct exec_perm *perm);
+ static int coda_pioctl(struct inode * inode, struct file * filp, 
+                        unsigned int cmd, unsigned long user_data);
+ 
+@@ -43,7 +43,7 @@ struct file_operations coda_ioctl_operat
+ 
+ /* the coda pioctl inode ops */
+ static int coda_ioctl_permission(struct inode *inode, int mask,
+-				 struct nameidata *nd)
++				 struct nameidata *nd, struct exec_perm *perm)
+ {
+         return 0;
+ }
+diff -upr linux-2.6.16.orig/fs/compat.c linux-2.6.16-026test015/fs/compat.c
+--- linux-2.6.16.orig/fs/compat.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/compat.c	2006-07-04 14:41:39.000000000 +0400
+@@ -197,6 +197,8 @@ asmlinkage long compat_sys_statfs(const 
+ 		struct kstatfs tmp;
+ 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
+ 		if (!error)
++			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
++		if (!error)
+ 			error = put_compat_statfs(buf, &tmp);
+ 		path_release(&nd);
+ 	}
+@@ -215,6 +217,8 @@ asmlinkage long compat_sys_fstatfs(unsig
+ 		goto out;
+ 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
+ 	if (!error)
++		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
++	if (!error)
+ 		error = put_compat_statfs(buf, &tmp);
+ 	fput(file);
+ out:
+@@ -265,6 +269,8 @@ asmlinkage long compat_sys_statfs64(cons
+ 		struct kstatfs tmp;
+ 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
+ 		if (!error)
++			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
++		if (!error)
+ 			error = put_compat_statfs64(buf, &tmp);
+ 		path_release(&nd);
+ 	}
+@@ -286,6 +292,8 @@ asmlinkage long compat_sys_fstatfs64(uns
+ 		goto out;
+ 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
+ 	if (!error)
++		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
++	if (!error)
+ 		error = put_compat_statfs64(buf, &tmp);
+ 	fput(file);
+ out:
+@@ -1215,6 +1223,10 @@ static ssize_t compat_do_readv_writev(in
+ 	if (ret < 0)
+ 		goto out;
+ 
++	ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE);
++	if (ret)
++		goto out;
++
+ 	fnv = NULL;
+ 	if (type == READ) {
+ 		fn = file->f_op->read;
+@@ -1479,7 +1491,7 @@ int compat_do_execve(char * filename,
+ 		goto out_ret;
+ 	memset(bprm, 0, sizeof(*bprm));
+ 
+-	file = open_exec(filename);
++	file = open_exec(filename, bprm);
+ 	retval = PTR_ERR(file);
+ 	if (IS_ERR(file))
+ 		goto out_kfree;
+@@ -1897,7 +1909,7 @@ asmlinkage long compat_sys_ppoll(struct 
+ 	}
+ 
+ 	if (sigmask) {
+-		if (sigsetsize |= sizeof(compat_sigset_t))
++		if (sigsetsize != sizeof(compat_sigset_t))
+ 			return -EINVAL;
+ 		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+ 			return -EFAULT;
+diff -upr linux-2.6.16.orig/fs/dcache.c linux-2.6.16-026test015/fs/dcache.c
+--- linux-2.6.16.orig/fs/dcache.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/dcache.c	2006-07-04 14:41:38.000000000 +0400
+@@ -28,11 +28,16 @@
+ #include <linux/module.h>
+ #include <linux/mount.h>
+ #include <linux/file.h>
++#include <linux/namei.h>
+ #include <asm/uaccess.h>
+ #include <linux/security.h>
+ #include <linux/seqlock.h>
+ #include <linux/swap.h>
+ #include <linux/bootmem.h>
++#include <linux/kernel_stat.h>
++#include <net/inet_sock.h>
++
++#include <ub/ub_dcache.h>
+ 
+ /* #define DCACHE_DEBUG 1 */
+ 
+@@ -44,7 +49,7 @@ static seqlock_t rename_lock __cacheline
+ 
+ EXPORT_SYMBOL(dcache_lock);
+ 
+-static kmem_cache_t *dentry_cache; 
++kmem_cache_t *dentry_cache;
+ 
+ #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
+ 
+@@ -143,11 +148,8 @@ static void dentry_iput(struct dentry * 
+  * no dcache lock, please.
+  */
+ 
+-void dput(struct dentry *dentry)
++static void dput_recursive(struct dentry *dentry)
+ {
+-	if (!dentry)
+-		return;
+-
+ repeat:
+ 	if (atomic_read(&dentry->d_count) == 1)
+ 		might_sleep();
+@@ -206,6 +208,17 @@ kill_it: {
+ 	}
+ }
+ 
++void dput(struct dentry *dentry)
++{
++	if (!dentry)
++		return;
++
++	spin_lock(&dcache_lock);
++	ub_dentry_uncharge(dentry);
++	spin_unlock(&dcache_lock);
++	dput_recursive(dentry);
++}
++
+ /**
+  * d_invalidate - invalidate a dentry
+  * @dentry: dentry to invalidate
+@@ -272,6 +285,8 @@ static inline struct dentry * __dget_loc
+ 		dentry_stat.nr_unused--;
+ 		list_del_init(&dentry->d_lru);
+ 	}
++
++	ub_dentry_charge_nofail(dentry);
+ 	return dentry;
+ }
+ 
+@@ -373,13 +388,19 @@ static inline void prune_one_dentry(stru
+ 	parent = dentry->d_parent;
+ 	d_free(dentry);
+ 	if (parent != dentry)
+-		dput(parent);
++		/*
++		 * dentry is not in use, only child (not outside)
++		 * references change, so parent->d_inuse does not change
++		 */
++		dput_recursive(parent);
+ 	spin_lock(&dcache_lock);
+ }
+ 
+ /**
+  * prune_dcache - shrink the dcache
+  * @count: number of entries to try and free
++ * @sb: if given, ignore dentries for other superblocks
++ *         which are being unmounted.
+  *
+  * Shrink the dcache. This is done when we need
+  * more memory, or simply when we need to unmount
+@@ -390,16 +411,29 @@ static inline void prune_one_dentry(stru
+  * all the dentries are in use.
+  */
+  
+-static void prune_dcache(int count)
++static void prune_dcache(int count, struct super_block *sb)
+ {
+ 	spin_lock(&dcache_lock);
+ 	for (; count ; count--) {
+ 		struct dentry *dentry;
+ 		struct list_head *tmp;
++		struct rw_semaphore *s_umount;
+ 
+ 		cond_resched_lock(&dcache_lock);
+ 
+ 		tmp = dentry_unused.prev;
++		if (unlikely(sb)) {
++			/* Try to find a dentry for this sb, but don't try
++			 * too hard, if they aren't near the tail they will
++			 * be moved down again soon
++			 */
++			int skip = count;
++			while (skip && tmp != &dentry_unused &&
++			    list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
++				skip--;
++				tmp = tmp->prev;
++			}
++		}
+ 		if (tmp == &dentry_unused)
+ 			break;
+ 		list_del_init(tmp);
+@@ -425,7 +459,45 @@ static void prune_dcache(int count)
+  			spin_unlock(&dentry->d_lock);
+ 			continue;
+ 		}
+-		prune_one_dentry(dentry);
++		/*
++		 * If the dentry is not DCACHED_REFERENCED, it is time
++		 * to remove it from the dcache, provided the super block is
++		 * NULL (which means we are trying to reclaim memory)
++		 * or this dentry belongs to the same super block that
++		 * we want to shrink.
++		 */
++		/*
++		 * If this dentry is for "my" filesystem, then I can prune it
++		 * without taking the s_umount lock (I already hold it).
++		 */
++		if (sb && dentry->d_sb == sb) {
++			prune_one_dentry(dentry);
++			continue;
++		}
++		/*
++		 * ...otherwise we need to be sure this filesystem isn't being
++		 * unmounted, otherwise we could race with
++		 * generic_shutdown_super(), and end up holding a reference to
++		 * an inode while the filesystem is unmounted.
++		 * So we try to get s_umount, and make sure s_root isn't NULL.
++		 * (Take a local copy of s_umount to avoid a use-after-free of
++		 * `dentry').
++		 */
++		s_umount = &dentry->d_sb->s_umount;
++		if (down_read_trylock(s_umount)) {
++			if (dentry->d_sb->s_root != NULL) {
++				prune_one_dentry(dentry);
++				up_read(s_umount);
++				continue;
++			}
++			up_read(s_umount);
++		}
++		spin_unlock(&dentry->d_lock);
++		/* Cannot remove the first dentry, and it isn't appropriate
++		 * to move it to the head of the list, so give up, and try
++		 * later
++		 */
++		break;
+ 	}
+ 	spin_unlock(&dcache_lock);
+ }
+@@ -486,6 +558,7 @@ repeat:
+ 			continue;
+ 		}
+ 		prune_one_dentry(dentry);
++		cond_resched_lock(&dcache_lock);
+ 		goto repeat;
+ 	}
+ 	spin_unlock(&dcache_lock);
+@@ -635,7 +708,7 @@ void shrink_dcache_parent(struct dentry 
+ 	int found;
+ 
+ 	while ((found = select_parent(parent)) != 0)
+-		prune_dcache(found);
++		prune_dcache(found, parent->d_sb);
+ }
+ 
+ /**
+@@ -648,9 +721,10 @@ void shrink_dcache_parent(struct dentry 
+  * done under dcache_lock.
+  *
+  */
+-void shrink_dcache_anon(struct hlist_head *head)
++void shrink_dcache_anon(struct super_block *sb)
+ {
+ 	struct hlist_node *lp;
++	struct hlist_head *head = &sb->s_anon;
+ 	int found;
+ 	do {
+ 		found = 0;
+@@ -673,7 +747,7 @@ void shrink_dcache_anon(struct hlist_hea
+ 			}
+ 		}
+ 		spin_unlock(&dcache_lock);
+-		prune_dcache(found);
++		prune_dcache(found, sb);
+ 	} while(found);
+ }
+ 
+@@ -691,12 +765,18 @@ void shrink_dcache_anon(struct hlist_hea
+  */
+ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+ {
++	int res = -1;
++
++	KSTAT_PERF_ENTER(shrink_dcache)
+ 	if (nr) {
+ 		if (!(gfp_mask & __GFP_FS))
+-			return -1;
+-		prune_dcache(nr);
++			goto out;
++		prune_dcache(nr, NULL);
+ 	}
+-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
++	res = (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
++out:
++	KSTAT_PERF_LEAVE(shrink_dcache)
++	return res;
+ }
+ 
+ /**
+@@ -716,19 +796,20 @@ struct dentry *d_alloc(struct dentry * p
+ 
+ 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
+ 	if (!dentry)
+-		return NULL;
++		goto err_alloc;
+ 
+ 	if (name->len > DNAME_INLINE_LEN-1) {
+ 		dname = kmalloc(name->len + 1, GFP_KERNEL);
+-		if (!dname) {
+-			kmem_cache_free(dentry_cache, dentry); 
+-			return NULL;
+-		}
++		if (!dname)
++			goto err_name;
+ 	} else  {
+ 		dname = dentry->d_iname;
+ 	}	
+ 	dentry->d_name.name = dname;
+ 
++	if (ub_dentry_alloc(dentry))
++		goto err_charge;
++
+ 	dentry->d_name.len = name->len;
+ 	dentry->d_name.hash = name->hash;
+ 	memcpy(dname, name->name, name->len);
+@@ -759,12 +840,23 @@ struct dentry *d_alloc(struct dentry * p
+ 	}
+ 
+ 	spin_lock(&dcache_lock);
+-	if (parent)
++	if (parent) {
+ 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
++		if (parent->d_flags & DCACHE_VIRTUAL)
++			dentry->d_flags |= DCACHE_VIRTUAL;
++	}
+ 	dentry_stat.nr_dentry++;
+ 	spin_unlock(&dcache_lock);
+ 
+ 	return dentry;
++
++err_charge:
++	if (name->len > DNAME_INLINE_LEN - 1)
++		kfree(dname);
++err_name:
++	kmem_cache_free(dentry_cache, dentry);
++err_alloc:
++	return NULL;
+ }
+ 
+ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
+@@ -1048,7 +1140,6 @@ struct dentry * __d_lookup(struct dentry
+ 	unsigned int hash = name->hash;
+ 	const unsigned char *str = name->name;
+ 	struct hlist_head *head = d_hash(parent,hash);
+-	struct dentry *found = NULL;
+ 	struct hlist_node *node;
+ 	struct dentry *dentry;
+ 
+@@ -1089,7 +1180,7 @@ struct dentry * __d_lookup(struct dentry
+ 
+ 		if (!d_unhashed(dentry)) {
+ 			atomic_inc(&dentry->d_count);
+-			found = dentry;
++			goto found;
+ 		}
+ 		spin_unlock(&dentry->d_lock);
+ 		break;
+@@ -1098,7 +1189,18 @@ next:
+  	}
+  	rcu_read_unlock();
+ 
+- 	return found;
++ 	return NULL;
++
++found:
++	/*
++	 * d_lock and rcu_read_lock
++	 * are dropped in ub_dentry_charge()
++	 */
++	if (ub_dentry_charge(dentry)) {
++		dput(dentry);
++		dentry = NULL;
++	}
++	return dentry;
+ }
+ 
+ /**
+@@ -1345,6 +1447,32 @@ already_unhashed:
+ }
+ 
+ /**
++ * __d_path_add_deleted - prepend "(deleted) " text
++ * @end: a pointer to the character after free space at the beginning of the
++ *       buffer
++ * @buflen: remaining free space
++ */
++static inline char * __d_path_add_deleted(char * end, int buflen)
++{
++	buflen -= 10;
++	if (buflen < 0)
++		return ERR_PTR(-ENAMETOOLONG);
++	end -= 10;
++	memcpy(end, "(deleted) ", 10);
++	return end;
++}
++
++/**
++ * d_root_check - checks if dentry is accessible from current's fs root
++ * @dentry: dentry to be verified
++ * @vfsmnt: vfsmnt to which the dentry belongs
++ */
++int d_root_check(struct dentry *dentry, struct vfsmount *vfsmnt)
++{
++	return PTR_ERR(d_path(dentry, vfsmnt, NULL, 0));
++}
++
++/**
+  * d_path - return the path of a dentry
+  * @dentry: dentry to report
+  * @vfsmnt: vfsmnt to which the dentry belongs
+@@ -1365,36 +1493,35 @@ static char * __d_path( struct dentry *d
+ 			char *buffer, int buflen)
+ {
+ 	char * end = buffer+buflen;
+-	char * retval;
++	char * retval = NULL;
+ 	int namelen;
++	int deleted;
++	struct vfsmount *oldvfsmnt;
+ 
+-	*--end = '\0';
+-	buflen--;
+-	if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
+-		buflen -= 10;
+-		end -= 10;
+-		if (buflen < 0)
++	oldvfsmnt = vfsmnt;
++	deleted = (!IS_ROOT(dentry) && d_unhashed(dentry));
++	if (buffer != NULL) {
++		*--end = '\0';
++		buflen--;
++
++		if (buflen < 1)
+ 			goto Elong;
+-		memcpy(end, " (deleted)", 10);
++		/* Get '/' right */
++		retval = end-1;
++		*retval = '/';
+ 	}
+ 
+-	if (buflen < 1)
+-		goto Elong;
+-	/* Get '/' right */
+-	retval = end-1;
+-	*retval = '/';
+-
+ 	for (;;) {
+ 		struct dentry * parent;
+ 
+ 		if (dentry == root && vfsmnt == rootmnt)
+ 			break;
+ 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+-			/* Global root? */
++			/* root of a tree? */
+ 			spin_lock(&vfsmount_lock);
+ 			if (vfsmnt->mnt_parent == vfsmnt) {
+ 				spin_unlock(&vfsmount_lock);
+-				goto global_root;
++				goto other_root;
+ 			}
+ 			dentry = vfsmnt->mnt_mountpoint;
+ 			vfsmnt = vfsmnt->mnt_parent;
+@@ -1403,27 +1530,51 @@ static char * __d_path( struct dentry *d
+ 		}
+ 		parent = dentry->d_parent;
+ 		prefetch(parent);
++		if (buffer != NULL) {
++			namelen = dentry->d_name.len;
++			buflen -= namelen + 1;
++			if (buflen < 0)
++				goto Elong;
++			end -= namelen;
++			memcpy(end, dentry->d_name.name, namelen);
++			*--end = '/';
++			retval = end;
++		}
++		dentry = parent;
++	}
++	/* the given root point is reached */
++finish:
++	if (buffer != NULL && deleted)
++		retval = __d_path_add_deleted(end, buflen);
++	return retval;
++
++other_root:
++	/*
++	 * We traversed the tree upward and reached a root, but the given
++	 * lookup terminal point wasn't encountered.  It means either that the
++	 * dentry is out of our scope or belongs to an abstract space like
++	 * sock_mnt or pipe_mnt.  Check for it.
++	 *
++	 * There are different options to check it.
++	 * We may assume that any dentry tree is unreachable unless it's
++	 * connected to `root' (defined as fs root of init aka child reaper)
++	 * and expose all paths that are not connected to it.
++	 * The other option is to allow exposing of known abstract spaces
++	 * explicitly and hide the path information for other cases.
++	 * This approach is more safe, let's take it.  2001/04/22  SAW
++	 */
++	if (!(oldvfsmnt->mnt_sb->s_flags & MS_NOUSER))
++		return ERR_PTR(-EINVAL);
++	if (buffer != NULL) {
+ 		namelen = dentry->d_name.len;
+-		buflen -= namelen + 1;
++		buflen -= namelen;
+ 		if (buflen < 0)
+ 			goto Elong;
+-		end -= namelen;
+-		memcpy(end, dentry->d_name.name, namelen);
+-		*--end = '/';
+-		retval = end;
+-		dentry = parent;
++		retval -= namelen-1;	/* hit the slash */
++		memcpy(retval, dentry->d_name.name, namelen);
+ 	}
++	goto finish;
+ 
+-	return retval;
+-
+-global_root:
+-	namelen = dentry->d_name.len;
+-	buflen -= namelen;
+-	if (buflen < 0)
+-		goto Elong;
+-	retval -= namelen-1;	/* hit the slash */
+-	memcpy(retval, dentry->d_name.name, namelen);
+-	return retval;
+ Elong:
+ 	return ERR_PTR(-ENAMETOOLONG);
+ }
+@@ -1448,6 +1599,229 @@ char * d_path(struct dentry *dentry, str
+ 	return res;
+ }
+ 
++#ifdef CONFIG_VE
++#include <net/sock.h>
++#include <linux/ip.h>
++#include <linux/file.h>
++#include <linux/namespace.h>
++#include <linux/vzratelimit.h>
++
++static void mark_sub_tree_virtual(struct dentry *d)
++{
++	struct dentry *orig_root;
++
++	orig_root = d;
++	while (1) {
++		spin_lock(&d->d_lock);
++		d->d_flags |= DCACHE_VIRTUAL;
++		spin_unlock(&d->d_lock);
++
++		if (!list_empty(&d->d_subdirs)) {
++			d = list_entry(d->d_subdirs.next,
++					struct dentry, d_u.d_child);
++			continue;
++		}
++		if (d == orig_root)
++			break;
++		while (d == list_entry(d->d_parent->d_subdirs.prev,
++					struct dentry, d_u.d_child)) {
++			d = d->d_parent;
++			if (d == orig_root)
++				goto out;
++		}
++		d = list_entry(d->d_u.d_child.next,
++				struct dentry, d_u.d_child);
++	}
++out:
++	return;
++}
++
++void mark_tree_virtual(struct vfsmount *m, struct dentry *d)
++{
++	struct vfsmount *orig_rootmnt;
++
++	spin_lock(&dcache_lock);
++	spin_lock(&vfsmount_lock);
++	orig_rootmnt = m;
++	while (1) {
++		mark_sub_tree_virtual(d);
++		if (!list_empty(&m->mnt_mounts)) {
++			m = list_entry(m->mnt_mounts.next,
++					struct vfsmount, mnt_child);
++			d = m->mnt_root;
++			continue;
++		}
++		if (m == orig_rootmnt)
++			break;
++		while (m == list_entry(m->mnt_parent->mnt_mounts.prev,
++					struct vfsmount, mnt_child)) {
++			m = m->mnt_parent;
++			if (m == orig_rootmnt)
++				goto out;
++		}
++		m = list_entry(m->mnt_child.next,
++				struct vfsmount, mnt_child);
++		d = m->mnt_root;
++	}
++out:
++	spin_unlock(&vfsmount_lock);
++	spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(mark_tree_virtual);
++
++static struct vz_rate_info area_ri = { 20, 10*HZ };
++#define VE_AREA_ACC_CHECK	0x0001
++#define VE_AREA_ACC_DENY	0x0002
++#define VE_AREA_EXEC_CHECK	0x0010
++#define VE_AREA_EXEC_DENY	0x0020
++#define VE0_AREA_ACC_CHECK	0x0100
++#define VE0_AREA_ACC_DENY	0x0200
++#define VE0_AREA_EXEC_CHECK	0x1000
++#define VE0_AREA_EXEC_DENY	0x2000
++int ve_area_access_check = 0;
++
++static void print_connection_info(struct task_struct *tsk)
++{
++	struct files_struct *files;
++	struct fdtable *fdt;
++	int fd;
++
++	files = get_files_struct(tsk);
++	if (!files)
++		return;
++
++	spin_lock(&files->file_lock);
++	fdt = files_fdtable(files);
++	for (fd = 0; fd < fdt->max_fds; fd++) {
++		struct file *file;
++		struct inode *inode;
++		struct socket *socket;
++		struct sock *sk;
++		struct inet_sock *inet;
++
++		file = fdt->fd[fd];
++		if (file == NULL)
++			continue;
++
++		inode = file->f_dentry->d_inode;
++		if (!S_ISSOCK(inode->i_mode))
++			continue;
++
++		socket = SOCKET_I(inode);
++		if (socket == NULL)
++			continue;
++
++		sk = socket->sk;
++		if ((sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
++		    || sk->sk_type != SOCK_STREAM)
++			continue;
++
++		inet = inet_sk(sk);
++		printk(KERN_ALERT "connection from %u.%u.%u.%u:%u to port %u\n",
++				NIPQUAD(inet->daddr), ntohs(inet->dport),
++				inet->num);
++	}
++	spin_unlock(&files->file_lock);
++	put_files_struct(files);
++}
++
++static void check_alert(struct vfsmount *vfsmnt, struct dentry *dentry,
++		char *str)
++{
++	struct task_struct *tsk;
++	unsigned long page;
++	struct super_block *sb;
++	char *p;
++
++	if (!vz_ratelimit(&area_ri))
++		return;
++
++	tsk = current;
++	p = ERR_PTR(-ENOMEM);
++	page = __get_free_page(GFP_KERNEL);
++	if (page) {
++		spin_lock(&dcache_lock);
++		p = __d_path(dentry, vfsmnt, tsk->fs->root, tsk->fs->rootmnt,
++				(char *)page, PAGE_SIZE);
++		spin_unlock(&dcache_lock);
++	}
++	if (IS_ERR(p))
++		p = "(undefined)";
++
++	sb = dentry->d_sb;
++	printk(KERN_ALERT "%s check alert! file:[%s] from %d/%s, dev%x\n"
++			"Task %d/%d[%s] from VE%d, execenv %d\n",
++			str, p,	VE_OWNER_FSTYPE(sb->s_type)->veid,
++			sb->s_type->name, sb->s_dev,
++			tsk->pid, virt_pid(tsk), tsk->comm,
++			VE_TASK_INFO(tsk)->owner_env->veid,
++			get_exec_env()->veid);
++
++	free_page(page);
++
++	print_connection_info(tsk);
++
++	read_lock(&tasklist_lock);
++	tsk = tsk->real_parent;
++	get_task_struct(tsk);
++	read_unlock(&tasklist_lock);
++
++	printk(KERN_ALERT "Parent %d/%d[%s] from VE%d\n",
++			tsk->pid, virt_pid(tsk), tsk->comm,
++			VE_TASK_INFO(tsk)->owner_env->veid);
++
++	print_connection_info(tsk);
++	put_task_struct(tsk);
++	dump_stack();
++}
++#endif
++
++int check_area_access_ve(struct dentry *dentry, struct vfsmount *mnt)
++{
++#ifdef CONFIG_VE
++	int check, alert, deny;
++
++	if (ve_is_super(get_exec_env())) {
++		check = ve_area_access_check & VE0_AREA_ACC_CHECK;
++		alert = dentry->d_flags & DCACHE_VIRTUAL;
++		deny = ve_area_access_check & VE0_AREA_ACC_DENY;
++	} else {
++		check = ve_area_access_check & VE_AREA_ACC_CHECK;
++		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
++		deny = ve_area_access_check & VE_AREA_ACC_DENY;
++	}
++
++	if (check && alert)
++		check_alert(mnt, dentry, "Access");
++	if (deny && alert)
++		return -EACCES;
++#endif
++	return 0;
++}
++
++int check_area_execute_ve(struct dentry *dentry, struct vfsmount *mnt)
++{
++#ifdef CONFIG_VE
++	int check, alert, deny;
++
++	if (ve_is_super(get_exec_env())) {
++		check = ve_area_access_check & VE0_AREA_EXEC_CHECK;
++		alert = dentry->d_flags & DCACHE_VIRTUAL;
++		deny = ve_area_access_check & VE0_AREA_EXEC_DENY;
++	} else {
++		check = ve_area_access_check & VE_AREA_EXEC_CHECK;
++		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
++		deny = ve_area_access_check & VE_AREA_EXEC_DENY;
++	}
++
++	if (check && alert)
++		check_alert(mnt, dentry, "Exec");
++	if (deny && alert)
++		return -EACCES;
++#endif
++	return 0;
++}
++
+ /*
+  * NOTE! The user-level library version returns a
+  * character pointer. The kernel system call just
+@@ -1584,10 +1958,12 @@ resume:
+ 			goto repeat;
+ 		}
+ 		atomic_dec(&dentry->d_count);
++		ub_dentry_uncharge(dentry);
+ 	}
+ 	if (this_parent != root) {
+ 		next = this_parent->d_u.d_child.next;
+ 		atomic_dec(&this_parent->d_count);
++		ub_dentry_uncharge(this_parent);
+ 		this_parent = this_parent->d_parent;
+ 		goto resume;
+ 	}
+@@ -1736,7 +2112,8 @@ void __init vfs_caches_init(unsigned lon
+ 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ 
+ 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
++			NULL, NULL);
+ 
+ 	dcache_init(mempages);
+ 	inode_init(mempages);
+diff -upr linux-2.6.16.orig/fs/devpts/inode.c linux-2.6.16-026test015/fs/devpts/inode.c
+--- linux-2.6.16.orig/fs/devpts/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/devpts/inode.c	2006-07-04 14:41:38.000000000 +0400
+@@ -12,6 +12,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <linux/ve.h>
+ #include <linux/fs.h>
+ #include <linux/sched.h>
+ #include <linux/namei.h>
+@@ -21,16 +22,17 @@
+ 
+ #define DEVPTS_SUPER_MAGIC 0x1cd1
+ 
++struct devpts_config devpts_config = {.mode = 0600};
++
++#ifndef CONFIG_VE
+ static struct vfsmount *devpts_mnt;
+ static struct dentry *devpts_root;
+-
+-static struct {
+-	int setuid;
+-	int setgid;
+-	uid_t   uid;
+-	gid_t   gid;
+-	umode_t mode;
+-} config = {.mode = 0600};
++#define config	devpts_config
++#else
++#define devpts_mnt	(get_exec_env()->devpts_mnt)
++#define devpts_root	(get_exec_env()->devpts_root)
++#define config		(*(get_exec_env()->devpts_config))
++#endif
+ 
+ static int devpts_remount(struct super_block *sb, int *flags, char *data)
+ {
+@@ -56,7 +58,8 @@ static int devpts_remount(struct super_b
+ 		} else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
+ 			mode = n & ~S_IFMT;
+ 		else {
+-			printk("devpts: called with bogus options\n");
++			ve_printk(VE_LOG,
++					"devpts: called with bogus options\n");
+ 			return -EINVAL;
+ 		}
+ 	}
+@@ -114,13 +117,15 @@ static struct super_block *devpts_get_sb
+ 	return get_sb_single(fs_type, flags, data, devpts_fill_super);
+ }
+ 
+-static struct file_system_type devpts_fs_type = {
++struct file_system_type devpts_fs_type = {
+ 	.owner		= THIS_MODULE,
+ 	.name		= "devpts",
+ 	.get_sb		= devpts_get_sb,
+ 	.kill_sb	= kill_anon_super,
+ };
+ 
++EXPORT_SYMBOL(devpts_fs_type);
++
+ /*
+  * The normal naming convention is simply /dev/pts/<number>; this conforms
+  * to the System V naming convention
+@@ -212,6 +217,7 @@ static int __init init_devpts_fs(void)
+ 
+ static void __exit exit_devpts_fs(void)
+ {
++	/* the code is never called, the argument is irrelevant */
+ 	unregister_filesystem(&devpts_fs_type);
+ 	mntput(devpts_mnt);
+ }
+diff -upr linux-2.6.16.orig/fs/eventpoll.c linux-2.6.16-026test015/fs/eventpoll.c
+--- linux-2.6.16.orig/fs/eventpoll.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/eventpoll.c	2006-07-04 14:41:39.000000000 +0400
+@@ -105,11 +105,6 @@
+ #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
+ 
+ 
+-struct epoll_filefd {
+-	struct file *file;
+-	int fd;
+-};
+-
+ /*
+  * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
+  * It is used to keep track on all tasks that are currently inside the wake_up() code
+@@ -132,36 +127,6 @@ struct poll_safewake {
+ 	spinlock_t lock;
+ };
+ 
+-/*
+- * This structure is stored inside the "private_data" member of the file
+- * structure and rapresent the main data sructure for the eventpoll
+- * interface.
+- */
+-struct eventpoll {
+-	/* Protect the this structure access */
+-	rwlock_t lock;
+-
+-	/*
+-	 * This semaphore is used to ensure that files are not removed
+-	 * while epoll is using them. This is read-held during the event
+-	 * collection loop and it is write-held during the file cleanup
+-	 * path, the epoll file exit code and the ctl operations.
+-	 */
+-	struct rw_semaphore sem;
+-
+-	/* Wait queue used by sys_epoll_wait() */
+-	wait_queue_head_t wq;
+-
+-	/* Wait queue used by file->poll() */
+-	wait_queue_head_t poll_wait;
+-
+-	/* List of ready file descriptors */
+-	struct list_head rdllist;
+-
+-	/* RB-Tree root used to store monitored fd structs */
+-	struct rb_root rbr;
+-};
+-
+ /* Wait structure used by the poll hooks */
+ struct eppoll_entry {
+ 	/* List header used to link this structure to the "struct epitem" */
+@@ -180,51 +145,6 @@ struct eppoll_entry {
+ 	wait_queue_head_t *whead;
+ };
+ 
+-/*
+- * Each file descriptor added to the eventpoll interface will
+- * have an entry of this type linked to the hash.
+- */
+-struct epitem {
+-	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
+-	struct rb_node rbn;
+-
+-	/* List header used to link this structure to the eventpoll ready list */
+-	struct list_head rdllink;
+-
+-	/* The file descriptor information this item refers to */
+-	struct epoll_filefd ffd;
+-
+-	/* Number of active wait queue attached to poll operations */
+-	int nwait;
+-
+-	/* List containing poll wait queues */
+-	struct list_head pwqlist;
+-
+-	/* The "container" of this item */
+-	struct eventpoll *ep;
+-
+-	/* The structure that describe the interested events and the source fd */
+-	struct epoll_event event;
+-
+-	/*
+-	 * Used to keep track of the usage count of the structure. This avoids
+-	 * that the structure will desappear from underneath our processing.
+-	 */
+-	atomic_t usecnt;
+-
+-	/* List header used to link this item to the "struct file" items list */
+-	struct list_head fllink;
+-
+-	/* List header used to link the item to the transfer list */
+-	struct list_head txlink;
+-
+-	/*
+-	 * This is used during the collection/transfer of events to userspace
+-	 * to pin items empty events set.
+-	 */
+-	unsigned int revents;
+-};
+-
+ /* Wrapper struct used by poll queueing */
+ struct ep_pqueue {
+ 	poll_table pt;
+@@ -239,14 +159,10 @@ static int ep_getfd(int *efd, struct ino
+ 		    struct eventpoll *ep);
+ static int ep_alloc(struct eventpoll **pep);
+ static void ep_free(struct eventpoll *ep);
+-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+ static void ep_use_epitem(struct epitem *epi);
+-static void ep_release_epitem(struct epitem *epi);
+ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
+ 				 poll_table *pt);
+ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi);
+-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+-		     struct file *tfile, int fd);
+ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
+ 		     struct epoll_event *event);
+ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi);
+@@ -274,7 +190,8 @@ static struct super_block *eventpollfs_g
+ /*
+  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
+  */
+-static struct semaphore epsem;
++struct semaphore epsem;
++EXPORT_SYMBOL_GPL(epsem);
+ 
+ /* Safe wake up implementation */
+ static struct poll_safewake psw;
+@@ -289,10 +206,11 @@ static kmem_cache_t *pwq_cache;
+ static struct vfsmount *eventpoll_mnt;
+ 
+ /* File callbacks that implement the eventpoll file behaviour */
+-static struct file_operations eventpoll_fops = {
++struct file_operations eventpoll_fops = {
+ 	.release	= ep_eventpoll_close,
+ 	.poll		= ep_eventpoll_poll
+ };
++EXPORT_SYMBOL_GPL(eventpoll_fops);
+ 
+ /*
+  * This is used to register the virtual file system from where
+@@ -542,7 +460,7 @@ eexit_1:
+ 		     current, size, error));
+ 	return error;
+ }
+-
++EXPORT_SYMBOL_GPL(sys_epoll_create);
+ 
+ /*
+  * The following function implements the controller interface for
+@@ -852,7 +770,7 @@ static void ep_free(struct eventpoll *ep
+  * the returned item, so the caller must call ep_release_epitem()
+  * after finished using the "struct epitem".
+  */
+-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
++struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
+ {
+ 	int kcmp;
+ 	unsigned long flags;
+@@ -882,6 +800,7 @@ static struct epitem *ep_find(struct eve
+ 
+ 	return epir;
+ }
++EXPORT_SYMBOL_GPL(ep_find);
+ 
+ 
+ /*
+@@ -900,13 +819,13 @@ static void ep_use_epitem(struct epitem 
+  * has finished using the structure. It might lead to freeing the
+  * structure itself if the count goes to zero.
+  */
+-static void ep_release_epitem(struct epitem *epi)
++void ep_release_epitem(struct epitem *epi)
+ {
+ 
+ 	if (atomic_dec_and_test(&epi->usecnt))
+ 		kmem_cache_free(epi_cache, epi);
+ }
+-
++EXPORT_SYMBOL_GPL(ep_release_epitem);
+ 
+ /*
+  * This is the callback that is used to add our wait queue to the
+@@ -952,7 +871,7 @@ static void ep_rbtree_insert(struct even
+ }
+ 
+ 
+-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
++int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+ 		     struct file *tfile, int fd)
+ {
+ 	int error, revents, pwake = 0;
+@@ -1044,6 +963,7 @@ eexit_2:
+ eexit_1:
+ 	return error;
+ }
++EXPORT_SYMBOL_GPL(ep_insert);
+ 
+ 
+ /*
+diff -upr linux-2.6.16.orig/fs/exec.c linux-2.6.16-026test015/fs/exec.c
+--- linux-2.6.16.orig/fs/exec.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/exec.c	2006-07-04 14:41:39.000000000 +0400
+@@ -53,6 +53,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ #ifdef CONFIG_KMOD
+ #include <linux/kmod.h>
+ #endif
+@@ -64,6 +66,8 @@ int suid_dumpable = 0;
+ EXPORT_SYMBOL(suid_dumpable);
+ /* The maximal length of core_pattern is also specified in sysctl.c */
+ 
++int sysctl_at_vsyscall;
++
+ static struct linux_binfmt *formats;
+ static DEFINE_RWLOCK(binfmt_lock);
+ 
+@@ -135,7 +139,7 @@ asmlinkage long sys_uselib(const char __
+ 	if (!S_ISREG(nd.dentry->d_inode->i_mode))
+ 		goto exit;
+ 
+-	error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
++	error = vfs_permission(&nd, MAY_READ | MAY_EXEC, NULL);
+ 	if (error)
+ 		goto exit;
+ 
+@@ -308,6 +312,10 @@ void install_arg_page(struct vm_area_str
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	pte_t * pte;
+ 	spinlock_t *ptl;
++	struct page_beancounter *pb;
++
++	if (unlikely(pb_alloc(&pb)))
++		goto out_nopb;
+ 
+ 	if (unlikely(anon_vma_prepare(vma)))
+ 		goto out;
+@@ -321,15 +329,21 @@ void install_arg_page(struct vm_area_str
+ 		goto out;
+ 	}
+ 	inc_mm_counter(mm, anon_rss);
++	inc_vma_rss(vma);
+ 	lru_cache_add_active(page);
+ 	set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
+ 					page, vma->vm_page_prot))));
++	pb_add_ref(page, mm, &pb);
++	ub_unused_privvm_dec(mm, vma);
++	pb_free(&pb);
+ 	page_add_new_anon_rmap(page, vma, address);
+ 	pte_unmap_unlock(pte, ptl);
+ 
+ 	/* no need for flush_tlb */
+ 	return;
+ out:
++	pb_free(&pb);
++out_nopb:
+ 	__free_page(page);
+ 	force_sig(SIGKILL, current);
+ }
+@@ -404,9 +418,13 @@ int setup_arg_pages(struct linux_binprm 
+ 		bprm->loader += stack_base;
+ 	bprm->exec += stack_base;
+ 
+-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++	if (ub_memory_charge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags,
++				NULL, UB_SOFT))
++		goto fail_charge;
++
++	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL | __GFP_SOFT_UBC);
+ 	if (!mpnt)
+-		return -ENOMEM;
++		goto fail_alloc;
+ 
+ 	memset(mpnt, 0, sizeof(*mpnt));
+ 
+@@ -450,6 +468,11 @@ int setup_arg_pages(struct linux_binprm 
+ 	up_write(&mm->mmap_sem);
+ 	
+ 	return 0;
++
++fail_alloc:
++	ub_memory_uncharge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags, NULL);
++fail_charge:
++	return -ENOMEM;
+ }
+ 
+ EXPORT_SYMBOL(setup_arg_pages);
+@@ -471,7 +494,7 @@ static inline void free_arg_pages(struct
+ 
+ #endif /* CONFIG_MMU */
+ 
+-struct file *open_exec(const char *name)
++struct file *open_exec(const char *name, struct linux_binprm *bprm)
+ {
+ 	struct nameidata nd;
+ 	int err;
+@@ -485,9 +508,16 @@ struct file *open_exec(const char *name)
+ 		file = ERR_PTR(-EACCES);
+ 		if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
+ 		    S_ISREG(inode->i_mode)) {
+-			int err = vfs_permission(&nd, MAY_EXEC);
+-			if (!err && !(inode->i_mode & 0111))
+-				err = -EACCES;
++			int err;
++			struct exec_perm *perm;
++
++			if (bprm != NULL) {
++				perm = &bprm->perm;
++				perm->set = 0;
++			} else
++				perm = NULL;
++
++			err = vfs_permission(&nd, MAY_EXEC, perm);
+ 			file = ERR_PTR(err);
+ 			if (!err) {
+ 				file = nameidata_to_filp(&nd, O_RDONLY);
+@@ -657,7 +687,7 @@ static int de_thread(struct task_struct 
+ 	 */
+ 	if (!thread_group_leader(current)) {
+ 		struct task_struct *parent;
+-		struct dentry *proc_dentry1, *proc_dentry2;
++		struct dentry *proc_dentry1[2], *proc_dentry2[2];
+ 		unsigned long ptrace;
+ 
+ 		/*
+@@ -671,8 +701,8 @@ static int de_thread(struct task_struct 
+ 
+ 		spin_lock(&leader->proc_lock);
+ 		spin_lock(&current->proc_lock);
+-		proc_dentry1 = proc_pid_unhash(current);
+-		proc_dentry2 = proc_pid_unhash(leader);
++		proc_pid_unhash(current, proc_dentry1);
++		proc_pid_unhash(leader, proc_dentry2);
+ 		write_lock_irq(&tasklist_lock);
+ 
+ 		BUG_ON(leader->tgid != current->tgid);
+@@ -829,7 +859,7 @@ int flush_old_exec(struct linux_binprm *
+ {
+ 	char * name;
+ 	int i, ch, retval;
+-	struct files_struct *files;
++	struct files_struct *files, *old;
+ 	char tcomm[sizeof(current->comm)];
+ 
+ 	/*
+@@ -897,6 +927,7 @@ int flush_old_exec(struct linux_binprm *
+ 		suid_keys(current);
+ 		current->mm->dumpable = suid_dumpable;
+ 	}
++	current->mm->vps_dumpable = 1;
+ 
+ 	/* An exec changes our domain. We are no longer part of the thread
+ 	   group */
+@@ -909,8 +940,11 @@ int flush_old_exec(struct linux_binprm *
+ 	return 0;
+ 
+ mmap_failed:
+-	put_files_struct(current->files);
++	old = current->files;
++	task_lock(current);
+ 	current->files = files;
++	task_unlock(current);
++	put_files_struct(old);
+ out:
+ 	return retval;
+ }
+@@ -927,13 +961,6 @@ int prepare_binprm(struct linux_binprm *
+ 	struct inode * inode = bprm->file->f_dentry->d_inode;
+ 	int retval;
+ 
+-	mode = inode->i_mode;
+-	/*
+-	 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
+-	 * generic_permission lets a non-executable through
+-	 */
+-	if (!(mode & 0111))	/* with at least _one_ execute bit set */
+-		return -EACCES;
+ 	if (bprm->file->f_op == NULL)
+ 		return -EACCES;
+ 
+@@ -941,10 +968,24 @@ int prepare_binprm(struct linux_binprm *
+ 	bprm->e_gid = current->egid;
+ 
+ 	if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
++		if (!bprm->perm.set) {
++			/*
++			 * This piece of code creates a time window between
++			 * MAY_EXEC permission check and setuid/setgid
++			 * operations and may be considered as a security hole.
++			 * This code is here for compatibility reasons,
++			 * if the filesystem is unable to return info now.
++			 */
++			bprm->perm.mode = inode->i_mode;
++			bprm->perm.uid = inode->i_uid;
++			bprm->perm.gid = inode->i_gid;
++		}
++		mode = bprm->perm.mode;
++
+ 		/* Set-uid? */
+ 		if (mode & S_ISUID) {
+ 			current->personality &= ~PER_CLEAR_ON_SETID;
+-			bprm->e_uid = inode->i_uid;
++			bprm->e_uid = bprm->perm.uid;
+ 		}
+ 
+ 		/* Set-gid? */
+@@ -955,7 +996,7 @@ int prepare_binprm(struct linux_binprm *
+ 		 */
+ 		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+ 			current->personality &= ~PER_CLEAR_ON_SETID;
+-			bprm->e_gid = inode->i_gid;
++			bprm->e_gid = bprm->perm.gid;
+ 		}
+ 	}
+ 
+@@ -1054,7 +1095,7 @@ int search_binary_handler(struct linux_b
+ 
+ 	        loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
+ 
+-		file = open_exec("/sbin/loader");
++		file = open_exec("/sbin/loader", bprm);
+ 		retval = PTR_ERR(file);
+ 		if (IS_ERR(file))
+ 			return retval;
+@@ -1148,7 +1189,7 @@ int do_execve(char * filename,
+ 		goto out_ret;
+ 	memset(bprm, 0, sizeof(*bprm));
+ 
+-	file = open_exec(filename);
++	file = open_exec(filename, bprm);
+ 	retval = PTR_ERR(file);
+ 	if (IS_ERR(file))
+ 		goto out_kfree;
+@@ -1288,7 +1329,7 @@ static void format_corename(char *corena
+ 			case 'p':
+ 				pid_in_pattern = 1;
+ 				rc = snprintf(out_ptr, out_end - out_ptr,
+-					      "%d", current->tgid);
++					      "%d", virt_tgid(current));
+ 				if (rc > out_end - out_ptr)
+ 					goto out;
+ 				out_ptr += rc;
+@@ -1332,7 +1373,7 @@ static void format_corename(char *corena
+ 			case 'h':
+ 				down_read(&uts_sem);
+ 				rc = snprintf(out_ptr, out_end - out_ptr,
+-					      "%s", system_utsname.nodename);
++					      "%s", ve_utsname.nodename);
+ 				up_read(&uts_sem);
+ 				if (rc > out_end - out_ptr)
+ 					goto out;
+@@ -1360,7 +1401,7 @@ static void format_corename(char *corena
+ 	if (!pid_in_pattern
+             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
+ 		rc = snprintf(out_ptr, out_end - out_ptr,
+-			      ".%d", current->tgid);
++			      ".%d", virt_tgid(current));
+ 		if (rc > out_end - out_ptr)
+ 			goto out;
+ 		out_ptr += rc;
+@@ -1386,7 +1427,7 @@ static void zap_threads (struct mm_struc
+ 	}
+ 
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g,p)
++	do_each_thread_ve(g,p)
+ 		if (mm == p->mm && p != tsk) {
+ 			force_sig_specific(SIGKILL, p);
+ 			mm->core_waiters++;
+@@ -1394,7 +1435,7 @@ static void zap_threads (struct mm_struc
+ 			    unlikely(p->parent->mm == mm))
+ 				traced = 1;
+ 		}
+-	while_each_thread(g,p);
++	while_each_thread_ve(g,p);
+ 
+ 	read_unlock(&tasklist_lock);
+ 
+@@ -1406,12 +1447,12 @@ static void zap_threads (struct mm_struc
+ 		 * coredump to finish.  Detach them so they can both die.
+ 		 */
+ 		write_lock_irq(&tasklist_lock);
+-		do_each_thread(g,p) {
++		do_each_thread_ve(g,p) {
+ 			if (mm == p->mm && p != tsk &&
+ 			    p->ptrace && p->parent->mm == mm) {
+ 				__ptrace_detach(p, 0);
+ 			}
+-		} while_each_thread(g,p);
++		} while_each_thread_ve(g,p);
+ 		write_unlock_irq(&tasklist_lock);
+ 	}
+ }
+@@ -1447,7 +1488,8 @@ int do_coredump(long signr, int exit_cod
+ 	if (!binfmt || !binfmt->core_dump)
+ 		goto fail;
+ 	down_write(&mm->mmap_sem);
+-	if (!mm->dumpable) {
++	if (!mm->dumpable ||
++	   (!mm->vps_dumpable && !ve_is_super(get_exec_env()))) {
+ 		up_write(&mm->mmap_sem);
+ 		goto fail;
+ 	}
+diff -upr linux-2.6.16.orig/fs/ext2/acl.c linux-2.6.16-026test015/fs/ext2/acl.c
+--- linux-2.6.16.orig/fs/ext2/acl.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext2/acl.c	2006-07-04 14:41:37.000000000 +0400
+@@ -294,9 +294,10 @@ ext2_check_acl(struct inode *inode, int 
+ }
+ 
+ int
+-ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
++ext2_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+-	return generic_permission(inode, mask, ext2_check_acl);
++	return generic_permission(inode, mask, ext2_check_acl, perm);
+ }
+ 
+ /*
+diff -upr linux-2.6.16.orig/fs/ext2/acl.h linux-2.6.16-026test015/fs/ext2/acl.h
+--- linux-2.6.16.orig/fs/ext2/acl.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext2/acl.h	2006-07-04 14:41:37.000000000 +0400
+@@ -58,7 +58,8 @@ static inline int ext2_acl_count(size_t 
+ #define EXT2_ACL_NOT_CACHED ((void *)-1)
+ 
+ /* acl.c */
+-extern int ext2_permission (struct inode *, int, struct nameidata *);
++extern int ext2_permission (struct inode *, int, struct nameidata *,
++		struct exec_perm *);
+ extern int ext2_acl_chmod (struct inode *);
+ extern int ext2_init_acl (struct inode *, struct inode *);
+ 
+diff -upr linux-2.6.16.orig/fs/ext2/namei.c linux-2.6.16-026test015/fs/ext2/namei.c
+--- linux-2.6.16.orig/fs/ext2/namei.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext2/namei.c	2006-07-04 14:41:39.000000000 +0400
+@@ -31,6 +31,7 @@
+  */
+ 
+ #include <linux/pagemap.h>
++#include <linux/quotaops.h>
+ #include "ext2.h"
+ #include "xattr.h"
+ #include "acl.h"
+@@ -273,6 +274,8 @@ static int ext2_unlink(struct inode * di
+ 	struct page * page;
+ 	int err = -ENOENT;
+ 
++	DQUOT_INIT(inode);
++
+ 	de = ext2_find_entry (dir, dentry, &page);
+ 	if (!de)
+ 		goto out;
+@@ -315,6 +318,9 @@ static int ext2_rename (struct inode * o
+ 	struct ext2_dir_entry_2 * old_de;
+ 	int err = -ENOENT;
+ 
++	if (new_inode)
++		DQUOT_INIT(new_inode);
++
+ 	old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
+ 	if (!old_de)
+ 		goto out;
+diff -upr linux-2.6.16.orig/fs/ext2/super.c linux-2.6.16-026test015/fs/ext2/super.c
+--- linux-2.6.16.orig/fs/ext2/super.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext2/super.c	2006-07-04 14:41:38.000000000 +0400
+@@ -996,7 +996,7 @@ static int ext2_remount (struct super_bl
+ 	es = sbi->s_es;
+ 	if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
+ 	    (old_mount_opt & EXT2_MOUNT_XIP)) &&
+-	    invalidate_inodes(sb))
++	    invalidate_inodes(sb, 0))
+ 		ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
+ 			     "xip remain in cache (no functional problem)");
+ 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+@@ -1205,7 +1205,7 @@ static struct file_system_type ext2_fs_t
+ 	.name		= "ext2",
+ 	.get_sb		= ext2_get_sb,
+ 	.kill_sb	= kill_block_super,
+-	.fs_flags	= FS_REQUIRES_DEV,
++	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
+ };
+ 
+ static int __init init_ext2_fs(void)
+diff -upr linux-2.6.16.orig/fs/ext3/acl.c linux-2.6.16-026test015/fs/ext3/acl.c
+--- linux-2.6.16.orig/fs/ext3/acl.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext3/acl.c	2006-07-04 14:41:37.000000000 +0400
+@@ -299,9 +299,10 @@ ext3_check_acl(struct inode *inode, int 
+ }
+ 
+ int
+-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
++ext3_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+-	return generic_permission(inode, mask, ext3_check_acl);
++	return generic_permission(inode, mask, ext3_check_acl, perm);
+ }
+ 
+ /*
+diff -upr linux-2.6.16.orig/fs/ext3/acl.h linux-2.6.16-026test015/fs/ext3/acl.h
+--- linux-2.6.16.orig/fs/ext3/acl.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext3/acl.h	2006-07-04 14:41:37.000000000 +0400
+@@ -58,7 +58,8 @@ static inline int ext3_acl_count(size_t 
+ #define EXT3_ACL_NOT_CACHED ((void *)-1)
+ 
+ /* acl.c */
+-extern int ext3_permission (struct inode *, int, struct nameidata *);
++extern int ext3_permission (struct inode *, int, struct nameidata *,
++		struct exec_perm *);
+ extern int ext3_acl_chmod (struct inode *);
+ extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
+ 
+diff -upr linux-2.6.16.orig/fs/ext3/inode.c linux-2.6.16-026test015/fs/ext3/inode.c
+--- linux-2.6.16.orig/fs/ext3/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext3/inode.c	2006-07-04 14:41:37.000000000 +0400
+@@ -771,6 +771,7 @@ ext3_get_block_handle(handle_t *handle, 
+ 
+ 	set_buffer_new(bh_result);
+ got_it:
++	clear_buffer_delay(bh_result);
+ 	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+ 	if (boundary)
+ 		set_buffer_boundary(bh_result);
+@@ -964,11 +965,13 @@ static int walk_page_buffers(	handle_t *
+  * and the commit_write().  So doing the journal_start at the start of
+  * prepare_write() is the right place.
+  *
+- * Also, this function can nest inside ext3_writepage() ->
+- * block_write_full_page(). In that case, we *know* that ext3_writepage()
+- * has generated enough buffer credits to do the whole page.  So we won't
+- * block on the journal in that case, which is good, because the caller may
+- * be PF_MEMALLOC.
++ * [2004/09/04 SAW] journal_start() in prepare_write() causes different ranking
++ * violations if copy_from_user() triggers a page fault (mmap_sem, may be page
++ * lock, plus __GFP_FS allocations).
++ * Now we read in not up-to-date buffers in prepare_write(), and do the rest
++ * including hole instantiation and inode extension in commit_write().
++ *
++ * Other notes.
+  *
+  * By accident, ext3 can be reentered when a transaction is open via
+  * quota file writes.  If we were to commit the transaction while thus
+@@ -983,6 +986,27 @@ static int walk_page_buffers(	handle_t *
+  * write.  
+  */
+ 
++static int ext3_get_block_delay(struct inode *inode, sector_t iblock,
++			struct buffer_head *bh, int create)
++{
++	int ret;
++
++	ret = ext3_get_block_handle(NULL, inode, iblock, bh, 0, 0);
++	if (ret)
++		return ret;
++	if (!buffer_mapped(bh)) {
++		set_buffer_delay(bh);
++		set_buffer_new(bh);
++	}
++	return ret;
++}
++
++static int ext3_prepare_write(struct file *file, struct page *page,
++		unsigned from, unsigned to)
++{
++	return block_prepare_write(page, from, to, ext3_get_block_delay);
++}
++
+ static int do_journal_get_write_access(handle_t *handle, 
+ 				       struct buffer_head *bh)
+ {
+@@ -991,8 +1015,52 @@ static int do_journal_get_write_access(h
+ 	return ext3_journal_get_write_access(handle, bh);
+ }
+ 
+-static int ext3_prepare_write(struct file *file, struct page *page,
+-			      unsigned from, unsigned to)
++/*
++ * This function zeroes buffers not mapped to disk.
++ * We do it similarly to the error path in __block_prepare_write() to avoid
++ * keeping garbage in the page cache.
++ * Here we check BH_delay state.  We know that if the buffer appears
++ * !buffer_mapped then
++ *   - it was !buffer_mapped at the moment of ext3_prepare_write, and
++ *   - ext3_get_block failed to map this buffer (e.g., ENOSPC).
++ * If this !mapped buffer is not up to date (it can be up to date if
++ * PageUptodate), then we zero its content.
++ */
++static void ext3_clear_delayed_buffers(struct page *page,
++		unsigned from, unsigned to)
++{
++	struct buffer_head *bh, *head, *next;
++	unsigned block_start, block_end;
++	unsigned blocksize;
++	void *kaddr;
++
++	head = page_buffers(page);
++	blocksize = head->b_size;
++	for (	bh = head, block_start = 0;
++		bh != head || !block_start;
++	    	block_start = block_end, bh = next)
++	{
++		next = bh->b_this_page;
++		block_end = block_start + blocksize;
++		if (block_end <= from || block_start >= to)
++			continue;
++		if (!buffer_delay(bh))
++			continue;
++		J_ASSERT_BH(bh, !buffer_mapped(bh));
++		clear_buffer_new(bh);
++		clear_buffer_delay(bh);
++		if (!buffer_uptodate(bh)) {
++			kaddr = kmap_atomic(page, KM_USER0);
++			memset(kaddr + block_start, 0, bh->b_size);
++			kunmap_atomic(kaddr, KM_USER0);
++			set_buffer_uptodate(bh);
++			mark_buffer_dirty(bh);
++		}
++	}
++}
++
++static int ext3_map_write(struct file *file, struct page *page,
++		unsigned from, unsigned to)
+ {
+ 	struct inode *inode = page->mapping->host;
+ 	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+@@ -1009,18 +1077,17 @@ retry:
+ 		ret = nobh_prepare_write(page, from, to, ext3_get_block);
+ 	else
+ 		ret = block_prepare_write(page, from, to, ext3_get_block);
+-	if (ret)
+-		goto prepare_write_failed;
+-
+-	if (ext3_should_journal_data(inode)) {
++	if (!ret && ext3_should_journal_data(inode)) {
+ 		ret = walk_page_buffers(handle, page_buffers(page),
+ 				from, to, NULL, do_journal_get_write_access);
+ 	}
+-prepare_write_failed:
+-	if (ret)
+-		ext3_journal_stop(handle);
++	if (!ret)
++		goto out;
++
++	ext3_journal_stop(handle);
+ 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+ 		goto retry;
++	ext3_clear_delayed_buffers(page, from, to);
+ out:
+ 	return ret;
+ }
+@@ -1055,10 +1122,15 @@ static int commit_write_fn(handle_t *han
+ static int ext3_ordered_commit_write(struct file *file, struct page *page,
+ 			     unsigned from, unsigned to)
+ {
+-	handle_t *handle = ext3_journal_current_handle();
++	handle_t *handle;
+ 	struct inode *inode = page->mapping->host;
+ 	int ret = 0, ret2;
+ 
++	ret = ext3_map_write(file, page, from, to);
++	if (ret)
++		return ret;
++	handle = ext3_journal_current_handle();
++
+ 	ret = walk_page_buffers(handle, page_buffers(page),
+ 		from, to, NULL, ext3_journal_dirty_data);
+ 
+@@ -1084,11 +1156,15 @@ static int ext3_ordered_commit_write(str
+ static int ext3_writeback_commit_write(struct file *file, struct page *page,
+ 			     unsigned from, unsigned to)
+ {
+-	handle_t *handle = ext3_journal_current_handle();
++	handle_t *handle;
+ 	struct inode *inode = page->mapping->host;
+ 	int ret = 0, ret2;
+ 	loff_t new_i_size;
+ 
++	ret = ext3_map_write(file, page, from, to);
++	if (ret)
++		return ret;
++	handle = ext3_journal_current_handle();
+ 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+ 	if (new_i_size > EXT3_I(inode)->i_disksize)
+ 		EXT3_I(inode)->i_disksize = new_i_size;
+@@ -1107,12 +1183,17 @@ static int ext3_writeback_commit_write(s
+ static int ext3_journalled_commit_write(struct file *file,
+ 			struct page *page, unsigned from, unsigned to)
+ {
+-	handle_t *handle = ext3_journal_current_handle();
++	handle_t *handle;
+ 	struct inode *inode = page->mapping->host;
+ 	int ret = 0, ret2;
+ 	int partial = 0;
+ 	loff_t pos;
+ 
++	ret = ext3_map_write(file, page, from, to);
++	if (ret)
++		return ret;
++	handle = ext3_journal_current_handle();
++
+ 	/*
+ 	 * Here we duplicate the generic_commit_write() functionality
+ 	 */
+diff -upr linux-2.6.16.orig/fs/ext3/ioctl.c linux-2.6.16-026test015/fs/ext3/ioctl.c
+--- linux-2.6.16.orig/fs/ext3/ioctl.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext3/ioctl.c	2006-07-04 14:41:37.000000000 +0400
+@@ -69,7 +69,7 @@ int ext3_ioctl (struct inode * inode, st
+ 		 * the relevant capability.
+ 		 */
+ 		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
+-			if (!capable(CAP_SYS_RESOURCE))
++			if (!capable(CAP_SYS_ADMIN))
+ 				return -EPERM;
+ 		}
+ 
+diff -upr linux-2.6.16.orig/fs/ext3/resize.c linux-2.6.16-026test015/fs/ext3/resize.c
+--- linux-2.6.16.orig/fs/ext3/resize.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext3/resize.c	2006-07-04 14:41:36.000000000 +0400
+@@ -974,6 +974,7 @@ int ext3_group_extend(struct super_block
+ 	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
+ 		ext3_warning(sb, __FUNCTION__,
+ 			     "multiple resizers run on filesystem!");
++		unlock_super(sb);
+ 		err = -EBUSY;
+ 		goto exit_put;
+ 	}
+diff -upr linux-2.6.16.orig/fs/ext3/super.c linux-2.6.16-026test015/fs/ext3/super.c
+--- linux-2.6.16.orig/fs/ext3/super.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ext3/super.c	2006-07-04 14:41:38.000000000 +0400
+@@ -2661,7 +2661,7 @@ static struct file_system_type ext3_fs_t
+ 	.name		= "ext3",
+ 	.get_sb		= ext3_get_sb,
+ 	.kill_sb	= kill_block_super,
+-	.fs_flags	= FS_REQUIRES_DEV,
++	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
+ };
+ 
+ static int __init init_ext3_fs(void)
+diff -upr linux-2.6.16.orig/fs/fcntl.c linux-2.6.16-026test015/fs/fcntl.c
+--- linux-2.6.16.orig/fs/fcntl.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/fcntl.c	2006-07-04 14:41:39.000000000 +0400
+@@ -18,6 +18,7 @@
+ #include <linux/ptrace.h>
+ #include <linux/signal.h>
+ #include <linux/rcupdate.h>
++#include <linux/ve_owner.h>
+ 
+ #include <asm/poll.h>
+ #include <asm/siginfo.h>
+@@ -190,6 +191,7 @@ out_fput:
+ 	fput(file);
+ 	goto out;
+ }
++EXPORT_SYMBOL_GPL(sys_dup2);
+ 
+ asmlinkage long sys_dup(unsigned int fildes)
+ {
+@@ -254,6 +256,7 @@ static int setfl(int fd, struct file * f
+ static void f_modown(struct file *filp, unsigned long pid,
+                      uid_t uid, uid_t euid, int force)
+ {
++	pid = comb_vpid_to_pid(pid);
+ 	write_lock_irq(&filp->f_owner.lock);
+ 	if (force || !filp->f_owner.pid) {
+ 		filp->f_owner.pid = pid;
+@@ -320,7 +323,7 @@ static long do_fcntl(int fd, unsigned in
+ 		 * current syscall conventions, the only way
+ 		 * to fix this will be in libc.
+ 		 */
+-		err = filp->f_owner.pid;
++		err = comb_pid_to_vpid(filp->f_owner.pid);
+ 		force_successful_syscall_return();
+ 		break;
+ 	case F_SETOWN:
+@@ -472,23 +475,29 @@ static void send_sigio_to_task(struct ta
+ void send_sigio(struct fown_struct *fown, int fd, int band)
+ {
+ 	struct task_struct *p;
++	struct file *f;
++	struct ve_struct *ve;
+ 	int pid;
+ 	
+ 	read_lock(&fown->lock);
+ 	pid = fown->pid;
+ 	if (!pid)
+ 		goto out_unlock_fown;
++
++	/* hack: fown's are always embedded in struct file */
++	f = container_of(fown, struct file, f_owner);
++	ve = VE_OWNER_FILP(f);
+ 	
+ 	read_lock(&tasklist_lock);
+ 	if (pid > 0) {
+-		p = find_task_by_pid(pid);
+-		if (p) {
++		p = find_task_by_pid_all(pid);
++		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
+ 			send_sigio_to_task(p, fown, fd, band);
+ 		}
+ 	} else {
+-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
++		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
+ 			send_sigio_to_task(p, fown, fd, band);
+-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
++		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
+ 	}
+ 	read_unlock(&tasklist_lock);
+  out_unlock_fown:
+@@ -505,6 +514,8 @@ static void send_sigurg_to_task(struct t
+ int send_sigurg(struct fown_struct *fown)
+ {
+ 	struct task_struct *p;
++	struct file *f;
++	struct ve_struct *ve;
+ 	int pid, ret = 0;
+ 	
+ 	read_lock(&fown->lock);
+@@ -513,17 +524,19 @@ int send_sigurg(struct fown_struct *fown
+ 		goto out_unlock_fown;
+ 
+ 	ret = 1;
++	f = container_of(fown, struct file, f_owner);
++	ve = VE_OWNER_FILP(f);
+ 	
+ 	read_lock(&tasklist_lock);
+ 	if (pid > 0) {
+-		p = find_task_by_pid(pid);
+-		if (p) {
++		p = find_task_by_pid_all(pid);
++		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
+ 			send_sigurg_to_task(p, fown);
+ 		}
+ 	} else {
+-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
++		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
+ 			send_sigurg_to_task(p, fown);
+-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
++		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
+ 	}
+ 	read_unlock(&tasklist_lock);
+  out_unlock_fown:
+diff -upr linux-2.6.16.orig/fs/file.c linux-2.6.16-026test015/fs/file.c
+--- linux-2.6.16.orig/fs/file.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/file.c	2006-07-04 14:41:39.000000000 +0400
+@@ -8,6 +8,7 @@
+ 
+ #include <linux/fs.h>
+ #include <linux/mm.h>
++#include <linux/module.h>
+ #include <linux/time.h>
+ #include <linux/slab.h>
+ #include <linux/vmalloc.h>
+@@ -18,6 +19,8 @@
+ #include <linux/rcupdate.h>
+ #include <linux/workqueue.h>
+ 
++#include <ub/ub_mem.h>
++
+ struct fdtable_defer {
+ 	spinlock_t lock;
+ 	struct work_struct wq;
+@@ -44,9 +47,9 @@ struct file ** alloc_fd_array(int num)
+ 	int size = num * sizeof(struct file *);
+ 
+ 	if (size <= PAGE_SIZE)
+-		new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
++		new_fds = (struct file **) ub_kmalloc(size, GFP_KERNEL);
+ 	else 
+-		new_fds = (struct file **) vmalloc(size);
++		new_fds = (struct file **) ub_vmalloc(size);
+ 	return new_fds;
+ }
+ 
+@@ -212,9 +215,9 @@ fd_set * alloc_fdset(int num)
+ 	int size = num / 8;
+ 
+ 	if (size <= PAGE_SIZE)
+-		new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
++		new_fdset = (fd_set *) ub_kmalloc(size, GFP_KERNEL);
+ 	else
+-		new_fdset = (fd_set *) vmalloc(size);
++		new_fdset = (fd_set *) ub_vmalloc(size);
+ 	return new_fdset;
+ }
+ 
+@@ -302,7 +305,7 @@ out:
+  * both fd array and fdset. It is expected to be called with the
+  * files_lock held.
+  */
+-static int expand_fdtable(struct files_struct *files, int nr)
++int expand_fdtable(struct files_struct *files, int nr)
+ 	__releases(files->file_lock)
+ 	__acquires(files->file_lock)
+ {
+@@ -338,6 +341,7 @@ static int expand_fdtable(struct files_s
+ out:
+ 	return error;
+ }
++EXPORT_SYMBOL_GPL(expand_fdtable);
+ 
+ /*
+  * Expand files.
+diff -upr linux-2.6.16.orig/fs/file_table.c linux-2.6.16-026test015/fs/file_table.c
+--- linux-2.6.16.orig/fs/file_table.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/file_table.c	2006-07-04 14:41:38.000000000 +0400
+@@ -9,6 +9,7 @@
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/file.h>
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+@@ -25,6 +26,8 @@
+ 
+ #include <asm/atomic.h>
+ 
++#include <ub/ub_misc.h>
++
+ /* sysctl tunables... */
+ struct files_stat_struct files_stat = {
+ 	.max_files = NR_FILE
+@@ -38,6 +41,8 @@ static struct percpu_counter nr_files __
+ static inline void file_free_rcu(struct rcu_head *head)
+ {
+ 	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
++	ub_file_uncharge(f);
++	put_ve(VE_OWNER_FILP(f));
+ 	kmem_cache_free(filp_cachep, f);
+ }
+ 
+@@ -109,6 +114,12 @@ struct file *get_empty_filp(void)
+ 
+ 	percpu_counter_inc(&nr_files);
+ 	memset(f, 0, sizeof(*f));
++
++	if (ub_file_charge(f))
++		goto fail_ch;
++
++	SET_VE_OWNER_FILP(f, get_ve(get_exec_env()));
++
+ 	if (security_file_alloc(f))
+ 		goto fail_sec;
+ 
+@@ -134,6 +145,10 @@ fail_sec:
+ 	file_free(f);
+ fail:
+ 	return NULL;
++
++fail_ch:
++	kmem_cache_free(filp_cachep, f);
++	return NULL;
+ }
+ 
+ EXPORT_SYMBOL(get_empty_filp);
+diff -upr linux-2.6.16.orig/fs/filesystems.c linux-2.6.16-026test015/fs/filesystems.c
+--- linux-2.6.16.orig/fs/filesystems.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/filesystems.c	2006-07-04 14:41:38.000000000 +0400
+@@ -13,6 +13,7 @@
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/sched.h>	/* for 'current' */
++#include <linux/ve_owner.h>
+ #include <asm/uaccess.h>
+ 
+ /*
+@@ -22,8 +23,8 @@
+  *	During the unload module must call unregister_filesystem().
+  *	We can access the fields of list element if:
+  *		1) spinlock is held or
+- *		2) we hold the reference to the module.
+- *	The latter can be guaranteed by call of try_module_get(); if it
++ *		2) we hold the reference to the element.
++ *	The latter can be guaranteed by call of try_filesystem(); if it
+  *	returned 0 we must skip the element, otherwise we got the reference.
+  *	Once the reference is obtained we can drop the spinlock.
+  */
+@@ -31,23 +32,51 @@
+ static struct file_system_type *file_systems;
+ static DEFINE_RWLOCK(file_systems_lock);
+ 
++int try_get_filesystem(struct file_system_type *fs)
++{
++	if (try_module_get(fs->owner)) {
++#ifdef CONFIG_VE
++		get_ve(VE_OWNER_FSTYPE(fs));
++#endif
++		return 1;
++	}
++	return 0;
++}
++
+ /* WARNING: This can be used only if we _already_ own a reference */
+ void get_filesystem(struct file_system_type *fs)
+ {
++#ifdef CONFIG_VE
++	get_ve(VE_OWNER_FSTYPE(fs));
++#endif
+ 	__module_get(fs->owner);
+ }
+ 
+ void put_filesystem(struct file_system_type *fs)
+ {
+ 	module_put(fs->owner);
++#ifdef CONFIG_VE
++	put_ve(VE_OWNER_FSTYPE(fs));
++#endif
++}
++
++static inline int check_ve_fstype(struct file_system_type *p,
++		struct ve_struct *env)
++{
++	return ((p->fs_flags & FS_VIRTUALIZED) ||
++			ve_accessible_strict(VE_OWNER_FSTYPE(p), env));
+ }
+ 
+-static struct file_system_type **find_filesystem(const char *name)
++static struct file_system_type **find_filesystem(const char *name,
++		struct ve_struct *env)
+ {
+ 	struct file_system_type **p;
+-	for (p=&file_systems; *p; p=&(*p)->next)
++	for (p=&file_systems; *p; p=&(*p)->next) {
++		if (!check_ve_fstype(*p, env))
++			continue;
+ 		if (strcmp((*p)->name,name) == 0)
+ 			break;
++	}
+ 	return p;
+ }
+ 
+@@ -74,8 +103,10 @@ int register_filesystem(struct file_syst
+ 	if (fs->next)
+ 		return -EBUSY;
+ 	INIT_LIST_HEAD(&fs->fs_supers);
++	if (VE_OWNER_FSTYPE(fs) == NULL)
++		SET_VE_OWNER_FSTYPE(fs, get_ve0());
+ 	write_lock(&file_systems_lock);
+-	p = find_filesystem(fs->name);
++	p = find_filesystem(fs->name, VE_OWNER_FSTYPE(fs));
+ 	if (*p)
+ 		res = -EBUSY;
+ 	else
+@@ -132,11 +163,14 @@ static int fs_index(const char __user * 
+ 
+ 	err = -EINVAL;
+ 	read_lock(&file_systems_lock);
+-	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
++	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next) {
++		if (!check_ve_fstype(tmp, get_exec_env()))
++			continue;
+ 		if (strcmp(tmp->name,name) == 0) {
+ 			err = index;
+ 			break;
+ 		}
++		index++;
+ 	}
+ 	read_unlock(&file_systems_lock);
+ 	putname(name);
+@@ -149,9 +183,15 @@ static int fs_name(unsigned int index, c
+ 	int len, res;
+ 
+ 	read_lock(&file_systems_lock);
+-	for (tmp = file_systems; tmp; tmp = tmp->next, index--)
+-		if (index <= 0 && try_module_get(tmp->owner))
+-			break;
++	for (tmp = file_systems; tmp; tmp = tmp->next) {
++		if (!check_ve_fstype(tmp, get_exec_env()))
++			continue;
++		if (!index) {
++			if (try_get_filesystem(tmp))
++				break;
++		} else
++			index--;
++	}
+ 	read_unlock(&file_systems_lock);
+ 	if (!tmp)
+ 		return -EINVAL;
+@@ -169,8 +209,9 @@ static int fs_maxindex(void)
+ 	int index;
+ 
+ 	read_lock(&file_systems_lock);
+-	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
+-		;
++	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next)
++		if (check_ve_fstype(tmp, get_exec_env()))
++			index++;
+ 	read_unlock(&file_systems_lock);
+ 	return index;
+ }
+@@ -206,9 +247,10 @@ int get_filesystem_list(char * buf)
+ 	read_lock(&file_systems_lock);
+ 	tmp = file_systems;
+ 	while (tmp && len < PAGE_SIZE - 80) {
+-		len += sprintf(buf+len, "%s\t%s\n",
+-			(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
+-			tmp->name);
++		if (check_ve_fstype(tmp, get_exec_env()))
++			len += sprintf(buf+len, "%s\t%s\n",
++				(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
++				tmp->name);
+ 		tmp = tmp->next;
+ 	}
+ 	read_unlock(&file_systems_lock);
+@@ -220,14 +262,14 @@ struct file_system_type *get_fs_type(con
+ 	struct file_system_type *fs;
+ 
+ 	read_lock(&file_systems_lock);
+-	fs = *(find_filesystem(name));
+-	if (fs && !try_module_get(fs->owner))
++	fs = *(find_filesystem(name, get_exec_env()));
++	if (fs && !try_get_filesystem(fs))
+ 		fs = NULL;
+ 	read_unlock(&file_systems_lock);
+ 	if (!fs && (request_module("%s", name) == 0)) {
+ 		read_lock(&file_systems_lock);
+-		fs = *(find_filesystem(name));
+-		if (fs && !try_module_get(fs->owner))
++		fs = *(find_filesystem(name, get_exec_env()));
++		if (fs && !try_get_filesystem(fs))
+ 			fs = NULL;
+ 		read_unlock(&file_systems_lock);
+ 	}
+@@ -235,3 +277,5 @@ struct file_system_type *get_fs_type(con
+ }
+ 
+ EXPORT_SYMBOL(get_fs_type);
++EXPORT_SYMBOL(get_filesystem);
++EXPORT_SYMBOL(put_filesystem);
+diff -upr linux-2.6.16.orig/fs/fuse/dir.c linux-2.6.16-026test015/fs/fuse/dir.c
+--- linux-2.6.16.orig/fs/fuse/dir.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/fuse/dir.c	2006-07-04 14:41:37.000000000 +0400
+@@ -708,14 +708,15 @@ static int fuse_access(struct inode *ino
+  * access request is sent.  Execute permission is still checked
+  * locally based on file mode.
+  */
+-static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
++static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+ 	struct fuse_conn *fc = get_fuse_conn(inode);
+ 
+ 	if (!fuse_allow_task(fc, current))
+ 		return -EACCES;
+ 	else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+-		int err = generic_permission(inode, mask, NULL);
++		int err = generic_permission(inode, mask, NULL, perm);
+ 
+ 		/* If permission is denied, try to refresh file
+ 		   attributes.  This is also needed, because the root
+@@ -723,7 +724,7 @@ static int fuse_permission(struct inode 
+ 		if (err == -EACCES) {
+ 		 	err = fuse_do_getattr(inode);
+ 			if (!err)
+-				err = generic_permission(inode, mask, NULL);
++				err = generic_permission(inode, mask, NULL, perm);
+ 		}
+ 
+ 		/* Note: the opposite of the above test does not
+diff -upr linux-2.6.16.orig/fs/fuse/file.c linux-2.6.16-026test015/fs/fuse/file.c
+--- linux-2.6.16.orig/fs/fuse/file.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/fuse/file.c	2006-07-04 14:41:36.000000000 +0400
+@@ -397,8 +397,12 @@ static int fuse_readpages(struct file *f
+ 		return -EINTR;
+ 
+ 	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
+-	if (!err)
+-		fuse_send_readpages(data.req, file, inode);
++	if (!err) {
++		if (data.req->num_pages)
++			fuse_send_readpages(data.req, file, inode);
++		else
++			fuse_put_request(fc, data.req);
++	}
+ 	return err;
+ }
+ 
+diff -upr linux-2.6.16.orig/fs/hfs/inode.c linux-2.6.16-026test015/fs/hfs/inode.c
+--- linux-2.6.16.orig/fs/hfs/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/hfs/inode.c	2006-07-04 14:41:37.000000000 +0400
+@@ -520,11 +520,11 @@ void hfs_clear_inode(struct inode *inode
+ }
+ 
+ static int hfs_permission(struct inode *inode, int mask,
+-			  struct nameidata *nd)
++			  struct nameidata *nd, struct exec_perm *perm)
+ {
+ 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
+ 		return 0;
+-	return generic_permission(inode, mask, NULL);
++	return generic_permission(inode, mask, NULL, perm);
+ }
+ 
+ static int hfs_file_open(struct inode *inode, struct file *file)
+diff -upr linux-2.6.16.orig/fs/hfsplus/inode.c linux-2.6.16-026test015/fs/hfsplus/inode.c
+--- linux-2.6.16.orig/fs/hfsplus/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/hfsplus/inode.c	2006-07-04 14:41:37.000000000 +0400
+@@ -237,7 +237,8 @@ static void hfsplus_set_perms(struct ino
+ 	perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
+ }
+ 
+-static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
++static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+ 	/* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
+ 	 * open_exec has the same test, so it's still not executable, if a x bit
+@@ -245,7 +246,7 @@ static int hfsplus_permission(struct ino
+ 	 */
+ 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111))
+ 		return 0;
+-	return generic_permission(inode, mask, NULL);
++	return generic_permission(inode, mask, NULL, perm);
+ }
+ 
+ 
+diff -upr linux-2.6.16.orig/fs/hostfs/hostfs_kern.c linux-2.6.16-026test015/fs/hostfs/hostfs_kern.c
+--- linux-2.6.16.orig/fs/hostfs/hostfs_kern.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/hostfs/hostfs_kern.c	2006-07-04 14:41:37.000000000 +0400
+@@ -796,7 +796,8 @@ int hostfs_rename(struct inode *from_ino
+ 	return(err);
+ }
+ 
+-int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
++int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+ 	char *name;
+ 	int r = 0, w = 0, x = 0, err;
+@@ -814,7 +815,7 @@ int hostfs_permission(struct inode *ino,
+ 		err = access_file(name, r, w, x);
+ 	kfree(name);
+ 	if(!err)
+-		err = generic_permission(ino, desired, NULL);
++		err = generic_permission(ino, desired, NULL, perm);
+ 	return err;
+ }
+ 
+diff -upr linux-2.6.16.orig/fs/hpfs/namei.c linux-2.6.16-026test015/fs/hpfs/namei.c
+--- linux-2.6.16.orig/fs/hpfs/namei.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/hpfs/namei.c	2006-07-04 14:41:37.000000000 +0400
+@@ -415,7 +415,7 @@ again:
+ 		d_drop(dentry);
+ 		spin_lock(&dentry->d_lock);
+ 		if (atomic_read(&dentry->d_count) > 1 ||
+-		    permission(inode, MAY_WRITE, NULL) ||
++		    permission(inode, MAY_WRITE, NULL, NULL) ||
+ 		    !S_ISREG(inode->i_mode) ||
+ 		    get_write_access(inode)) {
+ 			spin_unlock(&dentry->d_lock);
+diff -upr linux-2.6.16.orig/fs/hugetlbfs/inode.c linux-2.6.16-026test015/fs/hugetlbfs/inode.c
+--- linux-2.6.16.orig/fs/hugetlbfs/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/hugetlbfs/inode.c	2006-07-04 14:41:39.000000000 +0400
+@@ -800,7 +800,7 @@ struct file *hugetlb_zero_setup(size_t s
+ 	struct inode *inode;
+ 	struct dentry *dentry, *root;
+ 	struct qstr quick_string;
+-	char buf[16];
++	char buf[64];
+ 
+ 	if (!can_do_hugetlb_shm())
+ 		return ERR_PTR(-EPERM);
+@@ -812,7 +812,8 @@ struct file *hugetlb_zero_setup(size_t s
+ 		return ERR_PTR(-ENOMEM);
+ 
+ 	root = hugetlbfs_vfsmount->mnt_root;
+-	snprintf(buf, 16, "%lu", hugetlbfs_counter());
++	snprintf(buf, sizeof(buf), "VE%d-%lu",
++			VEID(get_exec_env()), hugetlbfs_counter());
+ 	quick_string.name = buf;
+ 	quick_string.len = strlen(quick_string.name);
+ 	quick_string.hash = 0;
+diff -upr linux-2.6.16.orig/fs/inode.c linux-2.6.16-026test015/fs/inode.c
+--- linux-2.6.16.orig/fs/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/inode.c	2006-07-04 14:41:39.000000000 +0400
+@@ -9,6 +9,7 @@
+ #include <linux/mm.h>
+ #include <linux/dcache.h>
+ #include <linux/init.h>
++#include <linux/kernel_stat.h>
+ #include <linux/quotaops.h>
+ #include <linux/slab.h>
+ #include <linux/writeback.h>
+@@ -98,13 +99,15 @@ DECLARE_MUTEX(iprune_sem);
+  */
+ struct inodes_stat_t inodes_stat;
+ 
+-static kmem_cache_t * inode_cachep;
++kmem_cache_t *inode_cachep;
++
++static struct address_space_operations vfs_empty_aops;
++struct inode_operations vfs_empty_iops;
++static struct file_operations vfs_empty_fops;
++EXPORT_SYMBOL(vfs_empty_iops);
+ 
+ static struct inode *alloc_inode(struct super_block *sb)
+ {
+-	static struct address_space_operations empty_aops;
+-	static struct inode_operations empty_iops;
+-	static struct file_operations empty_fops;
+ 	struct inode *inode;
+ 
+ 	if (sb->s_op->alloc_inode)
+@@ -119,8 +122,8 @@ static struct inode *alloc_inode(struct 
+ 		inode->i_blkbits = sb->s_blocksize_bits;
+ 		inode->i_flags = 0;
+ 		atomic_set(&inode->i_count, 1);
+-		inode->i_op = &empty_iops;
+-		inode->i_fop = &empty_fops;
++		inode->i_op = &vfs_empty_iops;
++		inode->i_fop = &vfs_empty_fops;
+ 		inode->i_nlink = 1;
+ 		atomic_set(&inode->i_writecount, 0);
+ 		inode->i_size = 0;
+@@ -144,7 +147,7 @@ static struct inode *alloc_inode(struct 
+ 			return NULL;
+ 		}
+ 
+-		mapping->a_ops = &empty_aops;
++		mapping->a_ops = &vfs_empty_aops;
+  		mapping->host = inode;
+ 		mapping->flags = 0;
+ 		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
+@@ -303,13 +306,57 @@ static void dispose_list(struct list_hea
+ 	spin_unlock(&inode_lock);
+ }
+ 
++static void show_header(struct inode *inode)
++{
++	struct super_block *sb = inode->i_sb;
++
++	printk("VFS: Busy inodes after unmount. "
++			"sb = %p, fs type = %s, sb count = %d, "
++			"sb->s_root = %s\n", sb,
++			(sb->s_type != NULL) ? sb->s_type->name : "",
++			sb->s_count,
++			(sb->s_root != NULL) ?
++			(char *)sb->s_root->d_name.name : "");
++}
++
++static void show_inode(struct list_head *tmp, struct inode *inode)
++{
++	struct dentry *d;
++	int i;
++
++	printk("inode = %p, inode->i_count = %d, "
++			"inode->i_nlink = %d, "
++			"inode->i_mode = %d, "
++			"inode->i_state = %ld, "
++			"inode->i_flags = %d, "
++			"inode->i_devices.next = %p, "
++			"inode->i_devices.prev = %p, "
++			"inode->i_ino = %ld\n",
++			tmp,
++			atomic_read(&inode->i_count),
++			inode->i_nlink,
++			inode->i_mode,
++			inode->i_state,
++			inode->i_flags,
++			inode->i_devices.next,
++			inode->i_devices.prev,
++			inode->i_ino);
++	printk("inode dump: ");
++	for (i = 0; i < sizeof(*tmp); i++)
++		printk("%2.2x ", *((u_char *)tmp + i));
++	printk("\n");
++	list_for_each_entry(d, &inode->i_dentry, d_alias)
++		printk("  d_alias %s\n",
++				d->d_name.name);
++}
++
+ /*
+  * Invalidate all inodes for a device.
+  */
+-static int invalidate_list(struct list_head *head, struct list_head *dispose)
++static int invalidate_list(struct list_head *head, struct list_head *dispose, int check)
+ {
+ 	struct list_head *next;
+-	int busy = 0, count = 0;
++	int busy = 0, count = 0, once = 1;
+ 
+ 	next = head->next;
+ 	for (;;) {
+@@ -336,6 +383,14 @@ static int invalidate_list(struct list_h
+ 			continue;
+ 		}
+ 		busy = 1;
++
++		if (check) {
++			if (once) {
++				once = 0;
++				show_header(inode);
++			}
++			show_inode(tmp, inode);
++		}
+ 	}
+ 	/* only unused inodes may be cached with i_count zero */
+ 	inodes_stat.nr_unused -= count;
+@@ -350,7 +405,7 @@ static int invalidate_list(struct list_h
+  *	fails because there are busy inodes then a non zero value is returned.
+  *	If the discard is successful all the inodes have been discarded.
+  */
+-int invalidate_inodes(struct super_block * sb)
++int invalidate_inodes(struct super_block * sb, int check)
+ {
+ 	int busy;
+ 	LIST_HEAD(throw_away);
+@@ -358,7 +413,7 @@ int invalidate_inodes(struct super_block
+ 	down(&iprune_sem);
+ 	spin_lock(&inode_lock);
+ 	inotify_unmount_inodes(&sb->s_inodes);
+-	busy = invalidate_list(&sb->s_inodes, &throw_away);
++	busy = invalidate_list(&sb->s_inodes, &throw_away, check);
+ 	spin_unlock(&inode_lock);
+ 
+ 	dispose_list(&throw_away);
+@@ -382,7 +437,7 @@ int __invalidate_device(struct block_dev
+ 		 * hold).
+ 		 */
+ 		shrink_dcache_sb(sb);
+-		res = invalidate_inodes(sb);
++		res = invalidate_inodes(sb, 0);
+ 		drop_super(sb);
+ 	}
+ 	invalidate_bdev(bdev, 0);
+@@ -478,6 +533,7 @@ static void prune_icache(int nr_to_scan)
+  */
+ static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+ {
++	KSTAT_PERF_ENTER(shrink_icache)
+ 	if (nr) {
+ 		/*
+ 		 * Nasty deadlock avoidance.  We may hold various FS locks,
+@@ -488,6 +544,7 @@ static int shrink_icache_memory(int nr, 
+ 			return -1;
+ 		prune_icache(nr);
+ 	}
++	KSTAT_PERF_LEAVE(shrink_icache)
+ 	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+ }
+ 
+@@ -737,7 +794,7 @@ EXPORT_SYMBOL(iunique);
+ struct inode *igrab(struct inode *inode)
+ {
+ 	spin_lock(&inode_lock);
+-	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
++	if (inode && !(inode->i_state & (I_FREEING|I_WILL_FREE)))
+ 		__iget(inode);
+ 	else
+ 		/*
+diff -upr linux-2.6.16.orig/fs/inotify.c linux-2.6.16-026test015/fs/inotify.c
+--- linux-2.6.16.orig/fs/inotify.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/inotify.c	2006-07-04 14:41:37.000000000 +0400
+@@ -374,7 +374,7 @@ static int find_inode(const char __user 
+ 	if (error)
+ 		return error;
+ 	/* you can only watch an inode if you have read permissions on it */
+-	error = vfs_permission(nd, MAY_READ);
++	error = vfs_permission(nd, MAY_READ, NULL);
+ 	if (error) 
+ 		path_release(nd);
+ 	return error;
+diff -upr linux-2.6.16.orig/fs/ioprio.c linux-2.6.16-026test015/fs/ioprio.c
+--- linux-2.6.16.orig/fs/ioprio.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ioprio.c	2006-07-04 14:41:38.000000000 +0400
+@@ -53,6 +53,9 @@ asmlinkage long sys_ioprio_set(int which
+ 	struct user_struct *user;
+ 	int ret;
+ 
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
+ 	switch (class) {
+ 		case IOPRIO_CLASS_RT:
+ 			if (!capable(CAP_SYS_ADMIN))
+@@ -78,18 +81,18 @@ asmlinkage long sys_ioprio_set(int which
+ 			if (!who)
+ 				p = current;
+ 			else
+-				p = find_task_by_pid(who);
++				p = find_task_by_pid_all(who);
+ 			if (p)
+ 				ret = set_task_ioprio(p, ioprio);
+ 			break;
+ 		case IOPRIO_WHO_PGRP:
+ 			if (!who)
+ 				who = process_group(current);
+-			do_each_task_pid(who, PIDTYPE_PGID, p) {
++			do_each_task_pid_all(who, PIDTYPE_PGID, p) {
+ 				ret = set_task_ioprio(p, ioprio);
+ 				if (ret)
+ 					break;
+-			} while_each_task_pid(who, PIDTYPE_PGID, p);
++			} while_each_task_pid_all(who, PIDTYPE_PGID, p);
+ 			break;
+ 		case IOPRIO_WHO_USER:
+ 			if (!who)
+@@ -100,13 +103,13 @@ asmlinkage long sys_ioprio_set(int which
+ 			if (!user)
+ 				break;
+ 
+-			do_each_thread(g, p) {
++			do_each_thread_all(g, p) {
+ 				if (p->uid != who)
+ 					continue;
+ 				ret = set_task_ioprio(p, ioprio);
+ 				if (ret)
+ 					break;
+-			} while_each_thread(g, p);
++			} while_each_thread_all(g, p);
+ 
+ 			if (who)
+ 				free_uid(user);
+@@ -131,19 +134,19 @@ asmlinkage long sys_ioprio_get(int which
+ 			if (!who)
+ 				p = current;
+ 			else
+-				p = find_task_by_pid(who);
++				p = find_task_by_pid_ve(who);
+ 			if (p)
+ 				ret = p->ioprio;
+ 			break;
+ 		case IOPRIO_WHO_PGRP:
+ 			if (!who)
+ 				who = process_group(current);
+-			do_each_task_pid(who, PIDTYPE_PGID, p) {
++			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ 				if (ret == -ESRCH)
+ 					ret = p->ioprio;
+ 				else
+ 					ret = ioprio_best(ret, p->ioprio);
+-			} while_each_task_pid(who, PIDTYPE_PGID, p);
++			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ 			break;
+ 		case IOPRIO_WHO_USER:
+ 			if (!who)
+@@ -154,14 +157,14 @@ asmlinkage long sys_ioprio_get(int which
+ 			if (!user)
+ 				break;
+ 
+-			do_each_thread(g, p) {
++			do_each_thread_ve(g, p) {
+ 				if (p->uid != user->uid)
+ 					continue;
+ 				if (ret == -ESRCH)
+ 					ret = p->ioprio;
+ 				else
+ 					ret = ioprio_best(ret, p->ioprio);
+-			} while_each_thread(g, p);
++			} while_each_thread_ve(g, p);
+ 
+ 			if (who)
+ 				free_uid(user);
+diff -upr linux-2.6.16.orig/fs/jbd/journal.c linux-2.6.16-026test015/fs/jbd/journal.c
+--- linux-2.6.16.orig/fs/jbd/journal.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/jbd/journal.c	2006-07-04 14:41:37.000000000 +0400
+@@ -210,10 +210,16 @@ end_loop:
+ 	return 0;
+ }
+ 
+-static void journal_start_thread(journal_t *journal)
++static int journal_start_thread(journal_t *journal)
+ {
+-	kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
++	int err;
++
++	err = kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
++	if (err < 0)
++		return err;
++
+ 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
++	return 0;
+ }
+ 
+ static void journal_kill_thread(journal_t *journal)
+@@ -839,8 +845,7 @@ static int journal_reset(journal_t *jour
+ 
+ 	/* Add the dynamic fields and write it to disk. */
+ 	journal_update_superblock(journal, 1);
+-	journal_start_thread(journal);
+-	return 0;
++	return journal_start_thread(journal);
+ }
+ 
+ /** 
+diff -upr linux-2.6.16.orig/fs/jbd/transaction.c linux-2.6.16-026test015/fs/jbd/transaction.c
+--- linux-2.6.16.orig/fs/jbd/transaction.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/jbd/transaction.c	2006-07-04 14:41:37.000000000 +0400
+@@ -1868,6 +1868,7 @@ zap_buffer_unlocked:
+ 	clear_buffer_mapped(bh);
+ 	clear_buffer_req(bh);
+ 	clear_buffer_new(bh);
++	clear_buffer_delay(bh);
+ 	bh->b_bdev = NULL;
+ 	return may_free;
+ }
+diff -upr linux-2.6.16.orig/fs/jfs/acl.c linux-2.6.16-026test015/fs/jfs/acl.c
+--- linux-2.6.16.orig/fs/jfs/acl.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/jfs/acl.c	2006-07-04 14:41:37.000000000 +0400
+@@ -140,9 +140,10 @@ static int jfs_check_acl(struct inode *i
+ 	return -EAGAIN;
+ }
+ 
+-int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
++int jfs_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+-	return generic_permission(inode, mask, jfs_check_acl);
++	return generic_permission(inode, mask, jfs_check_acl, perm);
+ }
+ 
+ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
+diff -upr linux-2.6.16.orig/fs/jfs/jfs_acl.h linux-2.6.16-026test015/fs/jfs/jfs_acl.h
+--- linux-2.6.16.orig/fs/jfs/jfs_acl.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/jfs/jfs_acl.h	2006-07-04 14:41:37.000000000 +0400
+@@ -20,7 +20,7 @@
+ 
+ #ifdef CONFIG_JFS_POSIX_ACL
+ 
+-int jfs_permission(struct inode *, int, struct nameidata *);
++int jfs_permission(struct inode *, int, struct nameidata *, struct exec_perm *);
+ int jfs_init_acl(tid_t, struct inode *, struct inode *);
+ int jfs_setattr(struct dentry *, struct iattr *);
+ 
+diff -upr linux-2.6.16.orig/fs/jfs/jfs_metapage.c linux-2.6.16-026test015/fs/jfs/jfs_metapage.c
+--- linux-2.6.16.orig/fs/jfs/jfs_metapage.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/jfs/jfs_metapage.c	2006-07-04 14:41:36.000000000 +0400
+@@ -543,7 +543,7 @@ add_failed:
+ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
+ {
+ 	struct metapage *mp;
+-	int busy = 0;
++	int ret = 1;
+ 	unsigned int offset;
+ 
+ 	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+@@ -553,30 +553,20 @@ static int metapage_releasepage(struct p
+ 			continue;
+ 
+ 		jfs_info("metapage_releasepage: mp = 0x%p", mp);
+-		if (mp->count || mp->nohomeok) {
++		if (mp->count || mp->nohomeok ||
++		    test_bit(META_dirty, &mp->flag)) {
+ 			jfs_info("count = %ld, nohomeok = %d", mp->count,
+ 				 mp->nohomeok);
+-			busy = 1;
++			ret = 0;
+ 			continue;
+ 		}
+-		wait_on_page_writeback(page);
+-		//WARN_ON(test_bit(META_dirty, &mp->flag));
+-		if (test_bit(META_dirty, &mp->flag)) {
+-			dump_mem("dirty mp in metapage_releasepage", mp,
+-				 sizeof(struct metapage));
+-			dump_mem("page", page, sizeof(struct page));
+-			dump_stack();
+-		}
+ 		if (mp->lsn)
+ 			remove_from_logsync(mp);
+ 		remove_metapage(page, mp);
+ 		INCREMENT(mpStat.pagefree);
+ 		free_metapage(mp);
+ 	}
+-	if (busy)
+-		return -1;
+-
+-	return 0;
++	return ret;
+ }
+ 
+ static int metapage_invalidatepage(struct page *page, unsigned long offset)
+diff -upr linux-2.6.16.orig/fs/lockd/clntproc.c linux-2.6.16-026test015/fs/lockd/clntproc.c
+--- linux-2.6.16.orig/fs/lockd/clntproc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/lockd/clntproc.c	2006-07-04 14:41:38.000000000 +0400
+@@ -130,10 +130,10 @@ static void nlmclnt_setlockargs(struct n
+ 	nlmclnt_next_cookie(&argp->cookie);
+ 	argp->state   = nsm_local_state;
+ 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
+-	lock->caller  = system_utsname.nodename;
++	lock->caller  = ve_utsname.nodename;
+ 	lock->oh.data = req->a_owner;
+ 	lock->oh.len  = sprintf(req->a_owner, "%d@%s",
+-				current->pid, system_utsname.nodename);
++				current->pid, ve_utsname.nodename);
+ 	locks_copy_lock(&lock->fl, fl);
+ }
+ 
+@@ -154,7 +154,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
+ {
+ 	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+ 	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+-	call->a_args.lock.caller = system_utsname.nodename;
++	call->a_args.lock.caller = ve_utsname.nodename;
+ 	call->a_args.lock.oh.len = lock->oh.len;
+ 
+ 	/* set default data area */
+diff -upr linux-2.6.16.orig/fs/lockd/mon.c linux-2.6.16-026test015/fs/lockd/mon.c
+--- linux-2.6.16.orig/fs/lockd/mon.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/lockd/mon.c	2006-07-04 14:41:38.000000000 +0400
+@@ -147,7 +147,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
+ 	 */
+ 	sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
+ 	if (!(p = xdr_encode_string(p, buffer))
+-	 || !(p = xdr_encode_string(p, system_utsname.nodename)))
++	 || !(p = xdr_encode_string(p, ve_utsname.nodename)))
+ 		return ERR_PTR(-EIO);
+ 	*p++ = htonl(argp->prog);
+ 	*p++ = htonl(argp->vers);
+diff -upr linux-2.6.16.orig/fs/locks.c linux-2.6.16-026test015/fs/locks.c
+--- linux-2.6.16.orig/fs/locks.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/locks.c	2006-07-04 14:41:39.000000000 +0400
+@@ -129,6 +129,8 @@
+ #include <asm/semaphore.h>
+ #include <asm/uaccess.h>
+ 
++#include <ub/ub_misc.h>
++
+ #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
+ #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
+ #define IS_LEASE(fl)	(fl->fl_flags & FL_LEASE)
+@@ -148,11 +150,28 @@ static LIST_HEAD(blocked_list);
+ static kmem_cache_t *filelock_cache;
+ 
+ /* Allocate an empty lock structure. */
+-static struct file_lock *locks_alloc_lock(void)
++static struct file_lock *locks_alloc_lock(int charge)
+ {
+-	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
++	struct file_lock *fl;
++
++	fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
++#ifdef CONFIG_USER_RESOURCE
++	if (fl == NULL)
++		goto out;
++	fl->fl_charged = 0;
++	if (!charge)
++		goto out;
++	if (!ub_flock_charge(fl, 1))
++		goto out;
++
++	kmem_cache_free(filelock_cache, fl);
++	fl = NULL;
++out:
++#endif
++	return fl;
+ }
+ 
++
+ /* Free a lock which is not in use. */
+ static void locks_free_lock(struct file_lock *fl)
+ {
+@@ -181,6 +200,7 @@ static void locks_free_lock(struct file_
+ 		fl->fl_lmops = NULL;
+ 	}
+ 
++	ub_flock_uncharge(fl);
+ 	kmem_cache_free(filelock_cache, fl);
+ }
+ 
+@@ -263,7 +283,7 @@ static int flock_make_lock(struct file *
+ 	if (type < 0)
+ 		return type;
+ 	
+-	fl = locks_alloc_lock();
++	fl = locks_alloc_lock(type != F_UNLCK);
+ 	if (fl == NULL)
+ 		return -ENOMEM;
+ 
+@@ -432,15 +452,14 @@ static struct lock_manager_operations le
+  */
+ static int lease_init(struct file *filp, int type, struct file_lock *fl)
+  {
++	if (assign_type(fl, type) != 0)
++		return -EINVAL;
++
+ 	fl->fl_owner = current->files;
+ 	fl->fl_pid = current->tgid;
+ 
+ 	fl->fl_file = filp;
+ 	fl->fl_flags = FL_LEASE;
+-	if (assign_type(fl, type) != 0) {
+-		locks_free_lock(fl);
+-		return -EINVAL;
+-	}
+ 	fl->fl_start = 0;
+ 	fl->fl_end = OFFSET_MAX;
+ 	fl->fl_ops = NULL;
+@@ -451,17 +470,20 @@ static int lease_init(struct file *filp,
+ /* Allocate a file_lock initialised to this type of lease */
+ static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
+ {
+-	struct file_lock *fl = locks_alloc_lock();
+-	int error;
++	struct file_lock *fl = locks_alloc_lock(1);
++	int error = -ENOMEM;
+ 
+ 	if (fl == NULL)
+-		return -ENOMEM;
++		goto out;
+ 
+ 	error = lease_init(filp, type, fl);
+-	if (error)
+-		return error;
++	if (error) {
++		locks_free_lock(fl);
++		fl = NULL;
++	}
++out:
+ 	*flp = fl;
+-	return 0;
++	return error;
+ }
+ 
+ /* Check if two locks overlap each other.
+@@ -712,8 +734,9 @@ EXPORT_SYMBOL(posix_locks_deadlock);
+  * at the head of the list, but that's secret knowledge known only to
+  * flock_lock_file and posix_lock_file.
+  */
+-static int flock_lock_file(struct file *filp, struct file_lock *new_fl)
++static int flock_lock_file(struct file *filp, struct file_lock *request)
+ {
++	struct file_lock *new_fl = NULL;
+ 	struct file_lock **before;
+ 	struct inode * inode = filp->f_dentry->d_inode;
+ 	int error = 0;
+@@ -728,44 +751,60 @@ static int flock_lock_file(struct file *
+ 			continue;
+ 		if (filp != fl->fl_file)
+ 			continue;
+-		if (new_fl->fl_type == fl->fl_type)
++		if (request->fl_type == fl->fl_type)
+ 			goto out;
+ 		found = 1;
+ 		locks_delete_lock(before);
+ 		break;
+ 	}
+-	unlock_kernel();
+ 
+-	if (new_fl->fl_type == F_UNLCK)
+-		return 0;
++	if (request->fl_type == F_UNLCK)
++		goto out;
+ 
+ 	/*
++	 * Nont F_UNLCK request must be already charged in
++	 * flock_make_lock().
++	 *
++	 * actually new_fl must be charged not the request,
++	 * but we try to fail earlier
++	 */
++	error = -ENOMEM;
++	new_fl = locks_alloc_lock(0);
++	if (new_fl == NULL)
++		goto out;
++	/*
+ 	 * If a higher-priority process was blocked on the old file lock,
+ 	 * give it the opportunity to lock the file.
+ 	 */
+ 	if (found)
+ 		cond_resched();
+ 
+-	lock_kernel();
+ 	for_each_lock(inode, before) {
+ 		struct file_lock *fl = *before;
+ 		if (IS_POSIX(fl))
+ 			break;
+ 		if (IS_LEASE(fl))
+ 			continue;
+-		if (!flock_locks_conflict(new_fl, fl))
++		if (!flock_locks_conflict(request, fl))
+ 			continue;
+ 		error = -EAGAIN;
+-		if (new_fl->fl_flags & FL_SLEEP) {
+-			locks_insert_block(fl, new_fl);
+-		}
++		if (request->fl_flags & FL_SLEEP)
++			locks_insert_block(fl, request);
+ 		goto out;
+ 	}
++
++	set_flock_charged(new_fl);
++	unset_flock_charged(request);
++
++	locks_copy_lock(new_fl, request);
+ 	locks_insert_lock(&inode->i_flock, new_fl);
++	new_fl = NULL;
+ 	error = 0;
+ 
+ out:
+ 	unlock_kernel();
++	if (new_fl)
++		locks_free_lock(new_fl);
+ 	return error;
+ }
+ 
+@@ -784,8 +823,11 @@ static int __posix_lock_file(struct inod
+ 	 * We may need two file_lock structures for this operation,
+ 	 * so we get them in advance to avoid races.
+ 	 */
+-	new_fl = locks_alloc_lock();
+-	new_fl2 = locks_alloc_lock();
++	if (request->fl_type != F_UNLCK)
++		new_fl = locks_alloc_lock(1);
++	else
++		new_fl = NULL;
++	new_fl2 = locks_alloc_lock(0);
+ 
+ 	lock_kernel();
+ 	if (request->fl_type != F_UNLCK) {
+@@ -813,7 +855,7 @@ static int __posix_lock_file(struct inod
+ 		goto out;
+ 
+ 	error = -ENOLCK; /* "no luck" */
+-	if (!(new_fl && new_fl2))
++	if (!((request->fl_type == F_UNLCK || new_fl) && new_fl2))
+ 		goto out;
+ 
+ 	/*
+@@ -919,19 +961,30 @@ static int __posix_lock_file(struct inod
+ 	if (!added) {
+ 		if (request->fl_type == F_UNLCK)
+ 			goto out;
++		error = -ENOLCK;
++		if (right && (left == right) && ub_flock_charge(new_fl, 1))
++			goto out;
+ 		locks_copy_lock(new_fl, request);
+ 		locks_insert_lock(before, new_fl);
+ 		new_fl = NULL;
++		error = 0;
+ 	}
+ 	if (right) {
+ 		if (left == right) {
+ 			/* The new lock breaks the old one in two pieces,
+ 			 * so we have to use the second new lock.
+ 			 */
++			error = -ENOLCK;
++			if (added && ub_flock_charge(new_fl2,
++						request->fl_type != F_UNLCK))
++				goto out;
++			/* FIXME move all fl_charged manipulations in ub code */
++			set_flock_charged(new_fl2);
+ 			left = new_fl2;
+ 			new_fl2 = NULL;
+ 			locks_copy_lock(left, right);
+ 			locks_insert_lock(before, left);
++			error = 0;
+ 		}
+ 		right->fl_start = request->fl_end + 1;
+ 		locks_wake_up_blocks(right);
+@@ -1337,6 +1390,7 @@ static int __setlease(struct file *filp,
+ 		goto out;
+ 
+ 	if (my_before != NULL) {
++		*flp = *my_before;
+ 		error = lease->fl_lmops->fl_change(my_before, arg);
+ 		goto out;
+ 	}
+@@ -1529,15 +1583,14 @@ asmlinkage long sys_flock(unsigned int f
+ 		error = flock_lock_file_wait(filp, lock);
+ 
+  out_free:
+-	if (list_empty(&lock->fl_link)) {
+-		locks_free_lock(lock);
+-	}
++	locks_free_lock(lock);
+ 
+  out_putf:
+ 	fput(filp);
+  out:
+ 	return error;
+ }
++EXPORT_SYMBOL_GPL(sys_flock);
+ 
+ /* Report the first existing lock that would conflict with l.
+  * This implements the F_GETLK command of fcntl().
+@@ -1573,7 +1626,7 @@ int fcntl_getlk(struct file *filp, struc
+  
+ 	flock.l_type = F_UNLCK;
+ 	if (fl != NULL) {
+-		flock.l_pid = fl->fl_pid;
++		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ #if BITS_PER_LONG == 32
+ 		/*
+ 		 * Make sure we can represent the posix lock via
+@@ -1605,7 +1658,7 @@ out:
+ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
+ 		struct flock __user *l)
+ {
+-	struct file_lock *file_lock = locks_alloc_lock();
++	struct file_lock *file_lock = locks_alloc_lock(0);
+ 	struct flock flock;
+ 	struct inode *inode;
+ 	int error;
+@@ -1727,7 +1780,7 @@ int fcntl_getlk64(struct file *filp, str
+  
+ 	flock.l_type = F_UNLCK;
+ 	if (fl != NULL) {
+-		flock.l_pid = fl->fl_pid;
++		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ 		flock.l_start = fl->fl_start;
+ 		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+ 			fl->fl_end - fl->fl_start + 1;
+@@ -1748,7 +1801,7 @@ out:
+ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
+ 		struct flock64 __user *l)
+ {
+-	struct file_lock *file_lock = locks_alloc_lock();
++	struct file_lock *file_lock = locks_alloc_lock(0);
+ 	struct flock64 flock;
+ 	struct inode *inode;
+ 	int error;
+@@ -1976,7 +2029,9 @@ EXPORT_SYMBOL(posix_unblock_lock);
+ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
+ {
+ 	struct inode *inode = NULL;
++	unsigned int fl_pid;
+ 
++	fl_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ 	if (fl->fl_file != NULL)
+ 		inode = fl->fl_file->f_dentry->d_inode;
+ 
+@@ -2018,16 +2073,16 @@ static void lock_get_status(char* out, s
+ 	}
+ 	if (inode) {
+ #ifdef WE_CAN_BREAK_LSLK_NOW
+-		out += sprintf(out, "%d %s:%ld ", fl->fl_pid,
++		out += sprintf(out, "%d %s:%ld ", fl_pid,
+ 				inode->i_sb->s_id, inode->i_ino);
+ #else
+ 		/* userspace relies on this representation of dev_t ;-( */
+-		out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid,
++		out += sprintf(out, "%d %02x:%02x:%ld ", fl_pid,
+ 				MAJOR(inode->i_sb->s_dev),
+ 				MINOR(inode->i_sb->s_dev), inode->i_ino);
+ #endif
+ 	} else {
+-		out += sprintf(out, "%d <none>:0 ", fl->fl_pid);
++		out += sprintf(out, "%d <none>:0 ", fl_pid);
+ 	}
+ 	if (IS_POSIX(fl)) {
+ 		if (fl->fl_end == OFFSET_MAX)
+@@ -2076,11 +2131,17 @@ int get_locks_status(char *buffer, char 
+ 	char *q = buffer;
+ 	off_t pos = 0;
+ 	int i = 0;
++	struct ve_struct *env;
+ 
+ 	lock_kernel();
++	env = get_exec_env();
+ 	list_for_each(tmp, &file_lock_list) {
+ 		struct list_head *btmp;
+ 		struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
++
++		if (!ve_accessible(VE_OWNER_FILP(fl->fl_file), env))
++			continue;
++
+ 		lock_get_status(q, fl, ++i, "");
+ 		move_lock_status(&q, &pos, offset);
+ 
+@@ -2212,7 +2273,12 @@ void steal_locks(fl_owner_t from)
+ 
+ 	lock_kernel();
+ 	j = 0;
+-	rcu_read_lock();
++
++	/*
++	 * We are not taking a ref to the file structures, so
++	 * we need to acquire ->file_lock.
++	 */
++	spin_lock(&files->file_lock);
+ 	fdt = files_fdtable(files);
+ 	for (;;) {
+ 		unsigned long set;
+@@ -2230,7 +2296,7 @@ void steal_locks(fl_owner_t from)
+ 			set >>= 1;
+ 		}
+ 	}
+-	rcu_read_unlock();
++	spin_unlock(&files->file_lock);
+ 	unlock_kernel();
+ }
+ EXPORT_SYMBOL(steal_locks);
+@@ -2238,7 +2304,7 @@ EXPORT_SYMBOL(steal_locks);
+ static int __init filelock_init(void)
+ {
+ 	filelock_cache = kmem_cache_create("file_lock_cache",
+-			sizeof(struct file_lock), 0, SLAB_PANIC,
++			sizeof(struct file_lock), 0, SLAB_PANIC | SLAB_UBC,
+ 			init_once, NULL);
+ 	return 0;
+ }
+diff -upr linux-2.6.16.orig/fs/namei.c linux-2.6.16-026test015/fs/namei.c
+--- linux-2.6.16.orig/fs/namei.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/namei.c	2006-07-04 14:41:39.000000000 +0400
+@@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname);
+  * for filesystem access without changing the "normal" uids which
+  * are used for other things..
+  */
+-int generic_permission(struct inode *inode, int mask,
++static int __generic_permission(struct inode *inode, int mask,
+ 		int (*check_acl)(struct inode *inode, int mask))
+ {
+ 	umode_t			mode = inode->i_mode;
+@@ -225,7 +225,26 @@ int generic_permission(struct inode *ino
+ 	return -EACCES;
+ }
+ 
+-int permission(struct inode *inode, int mask, struct nameidata *nd)
++int generic_permission(struct inode *inode, int mask,
++		int (*check_acl)(struct inode *inode, int mask),
++		struct exec_perm *perm)
++{
++	int ret;
++
++	if (perm == NULL)
++		return __generic_permission(inode, mask, check_acl);
++
++	mutex_lock(&inode->i_mutex);
++	ret = __generic_permission(inode, mask, check_acl);
++	if (!ret)
++		set_exec_perm(perm, inode);
++	mutex_unlock(&inode->i_mutex);
++	return ret;
++}
++
++
++int permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+ 	int retval, submask;
+ 
+@@ -250,9 +269,9 @@ int permission(struct inode *inode, int 
+ 	/* Ordinary permission routines do not understand MAY_APPEND. */
+ 	submask = mask & ~MAY_APPEND;
+ 	if (inode->i_op && inode->i_op->permission)
+-		retval = inode->i_op->permission(inode, submask, nd);
++		retval = inode->i_op->permission(inode, submask, nd, perm);
+ 	else
+-		retval = generic_permission(inode, submask, NULL);
++		retval = generic_permission(inode, submask, NULL, perm);
+ 	if (retval)
+ 		return retval;
+ 
+@@ -269,9 +288,9 @@ int permission(struct inode *inode, int 
+  * for filesystem access without changing the "normal" uids which
+  * are used for other things.
+  */
+-int vfs_permission(struct nameidata *nd, int mask)
++int vfs_permission(struct nameidata *nd, int mask, struct exec_perm *perm)
+ {
+-	return permission(nd->dentry->d_inode, mask, nd);
++	return permission(nd->dentry->d_inode, mask, nd, perm);
+ }
+ 
+ /**
+@@ -288,7 +307,7 @@ int vfs_permission(struct nameidata *nd,
+  */
+ int file_permission(struct file *file, int mask)
+ {
+-	return permission(file->f_dentry->d_inode, mask, NULL);
++	return permission(file->f_dentry->d_inode, mask, NULL, NULL);
+ }
+ 
+ /*
+@@ -379,6 +398,21 @@ static struct dentry * cached_lookup(str
+ 	if (!dentry)
+ 		dentry = d_lookup(parent, name);
+ 
++	/*
++	 * The revalidation rules are simple:
++	 * d_revalidate operation is called when we're about to use a cached
++	 * dentry rather than call d_lookup.
++	 * d_revalidate method may unhash the dentry itself or return FALSE, in
++	 * which case if the dentry can be released d_lookup will be called.
++	 *
++	 * Additionally, by request of NFS people
++	 * (http://linux.bkbits.net:8080/linux-2.4/cset@1.181?nav=index.html|src/|src/fs|related/fs/namei.c)
++	 * d_revalidate is called when `/', `.' or `..' are looked up.
++	 * Since re-lookup is impossible on them, we introduce a hack and
++	 * return an error in this case.
++	 *
++	 *     2003/02/19  SAW
++	 */
+ 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ 		if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
+ 			dput(dentry);
+@@ -441,6 +475,7 @@ static struct dentry * real_lookup(struc
+ 	struct dentry * result;
+ 	struct inode *dir = parent->d_inode;
+ 
++repeat:
+ 	mutex_lock(&dir->i_mutex);
+ 	/*
+ 	 * First re-do the cached lookup just in case it was created
+@@ -479,7 +514,7 @@ static struct dentry * real_lookup(struc
+ 	if (result->d_op && result->d_op->d_revalidate) {
+ 		if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
+ 			dput(result);
+-			result = ERR_PTR(-ENOENT);
++			goto repeat;
+ 		}
+ 	}
+ 	return result;
+@@ -704,7 +739,14 @@ static __always_inline void follow_dotdo
+                         read_unlock(&current->fs->lock);
+ 			break;
+ 		}
+-                read_unlock(&current->fs->lock);
++#ifdef CONFIG_VE
++		if (nd->dentry == get_exec_env()->fs_root &&
++		    nd->mnt == get_exec_env()->fs_rootmnt) {
++			read_unlock(&current->fs->lock);
++			break;
++		}
++#endif
++		read_unlock(&current->fs->lock);
+ 		spin_lock(&dcache_lock);
+ 		if (nd->dentry != nd->mnt->mnt_root) {
+ 			nd->dentry = dget(nd->dentry->d_parent);
+@@ -745,6 +787,10 @@ static int do_lookup(struct nameidata *n
+ 	if (dentry->d_op && dentry->d_op->d_revalidate)
+ 		goto need_revalidate;
+ done:
++	if ((nd->flags & LOOKUP_STRICT) && d_mountpoint(dentry)) {
++		dput(dentry);
++		return -ENOENT;
++	}
+ 	path->mnt = mnt;
+ 	path->dentry = dentry;
+ 	__follow_mount(path);
+@@ -780,6 +826,7 @@ static fastcall int __link_path_walk(con
+ {
+ 	struct path next;
+ 	struct inode *inode;
++	int real_components = 0;
+ 	int err;
+ 	unsigned int lookup_flags = nd->flags;
+ 	
+@@ -801,7 +848,7 @@ static fastcall int __link_path_walk(con
+ 		nd->flags |= LOOKUP_CONTINUE;
+ 		err = exec_permission_lite(inode, nd);
+ 		if (err == -EAGAIN)
+-			err = vfs_permission(nd, MAY_EXEC);
++			err = vfs_permission(nd, MAY_EXEC, NULL);
+  		if (err)
+ 			break;
+ 
+@@ -851,6 +898,7 @@ static fastcall int __link_path_walk(con
+ 				break;
+ 		}
+ 		/* This does the actual lookups.. */
++		real_components++;
+ 		err = do_lookup(nd, &this, &next);
+ 		if (err)
+ 			break;
+@@ -864,6 +912,9 @@ static fastcall int __link_path_walk(con
+ 			goto out_dput;
+ 
+ 		if (inode->i_op->follow_link) {
++			err = -ENOENT;
++			if (lookup_flags & LOOKUP_STRICT)
++				goto out_dput;
+ 			err = do_follow_link(&next, nd);
+ 			if (err)
+ 				goto return_err;
+@@ -911,6 +962,7 @@ last_component:
+ 			break;
+ 		inode = next.dentry->d_inode;
+ 		if ((lookup_flags & LOOKUP_FOLLOW)
++		    && !(lookup_flags & LOOKUP_STRICT)
+ 		    && inode && inode->i_op && inode->i_op->follow_link) {
+ 			err = do_follow_link(&next, nd);
+ 			if (err)
+@@ -932,26 +984,40 @@ lookup_parent:
+ 		nd->last_type = LAST_NORM;
+ 		if (this.name[0] != '.')
+ 			goto return_base;
+-		if (this.len == 1)
++		if (this.len == 1) {
+ 			nd->last_type = LAST_DOT;
+-		else if (this.len == 2 && this.name[1] == '.')
++			goto return_reval;
++		} else if (this.len == 2 && this.name[1] == '.') {
+ 			nd->last_type = LAST_DOTDOT;
+-		else
+-			goto return_base;
++			goto return_reval;
++		}
++return_base:
++		if (!(nd->flags & LOOKUP_NOAREACHECK)) {
++			err = check_area_access_ve(nd->dentry, nd->mnt);
++			if (err)
++				break;
++		}
++		return 0;
+ return_reval:
+ 		/*
+ 		 * We bypassed the ordinary revalidation routines.
+ 		 * We may need to check the cached dentry for staleness.
+ 		 */
+-		if (nd->dentry && nd->dentry->d_sb &&
++		if (!real_components && nd->dentry && nd->dentry->d_sb &&
+ 		    (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
+ 			err = -ESTALE;
+ 			/* Note: we do not d_invalidate() */
+ 			if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
++				/*
++				 * This lookup is for `/' or `.' or `..'.
++				 * The filesystem unhashed the dentry itself
++				 * inside d_revalidate (otherwise, d_invalidate
++				 * wouldn't succeed).  As a special courtesy to
++				 * NFS we return an error.   2003/02/19  SAW
++				 */
+ 				break;
+ 		}
+-return_base:
+-		return 0;
++		goto return_base;
+ out_dput:
+ 		dput_path(&next, nd);
+ 		break;
+@@ -1077,8 +1143,8 @@ static int fastcall do_path_lookup(int d
+ 	nd->flags = flags;
+ 	nd->depth = 0;
+ 
+-	read_lock(&current->fs->lock);
+ 	if (*name=='/') {
++		read_lock(&current->fs->lock);
+ 		if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+ 			nd->mnt = mntget(current->fs->altrootmnt);
+ 			nd->dentry = dget(current->fs->altroot);
+@@ -1089,33 +1155,35 @@ static int fastcall do_path_lookup(int d
+ 		}
+ 		nd->mnt = mntget(current->fs->rootmnt);
+ 		nd->dentry = dget(current->fs->root);
++		read_unlock(&current->fs->lock);
+ 	} else if (dfd == AT_FDCWD) {
++		read_lock(&current->fs->lock);
+ 		nd->mnt = mntget(current->fs->pwdmnt);
+ 		nd->dentry = dget(current->fs->pwd);
++		read_unlock(&current->fs->lock);
+ 	} else {
+ 		struct dentry *dentry;
+ 
+ 		file = fget_light(dfd, &fput_needed);
+ 		retval = -EBADF;
+ 		if (!file)
+-			goto unlock_fail;
++			goto out_fail;
+ 
+ 		dentry = file->f_dentry;
+ 
+ 		retval = -ENOTDIR;
+ 		if (!S_ISDIR(dentry->d_inode->i_mode))
+-			goto fput_unlock_fail;
++			goto fput_fail;
+ 
+ 		retval = file_permission(file, MAY_EXEC);
+ 		if (retval)
+-			goto fput_unlock_fail;
++			goto fput_fail;
+ 
+ 		nd->mnt = mntget(file->f_vfsmnt);
+ 		nd->dentry = dget(dentry);
+ 
+ 		fput_light(file, fput_needed);
+ 	}
+-	read_unlock(&current->fs->lock);
+ 	current->total_link_count = 0;
+ 	retval = link_path_walk(name, nd);
+ out:
+@@ -1124,13 +1192,12 @@ out:
+ 				nd->dentry->d_inode))
+ 		audit_inode(name, nd->dentry->d_inode, flags);
+ 	}
++out_fail:
+ 	return retval;
+ 
+-fput_unlock_fail:
++fput_fail:
+ 	fput_light(file, fput_needed);
+-unlock_fail:
+-	read_unlock(&current->fs->lock);
+-	return retval;
++	goto out_fail;
+ }
+ 
+ int fastcall path_lookup(const char *name, unsigned int flags,
+@@ -1219,7 +1286,7 @@ static struct dentry * __lookup_hash(str
+ 	int err;
+ 
+ 	inode = base->d_inode;
+-	err = permission(inode, MAY_EXEC, nd);
++	err = permission(inode, MAY_EXEC, nd, NULL);
+ 	dentry = ERR_PTR(err);
+ 	if (err)
+ 		goto out;
+@@ -1354,7 +1421,7 @@ static int may_delete(struct inode *dir,
+ 
+ 	BUG_ON(victim->d_parent->d_inode != dir);
+ 
+-	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
++	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL, NULL);
+ 	if (error)
+ 		return error;
+ 	if (IS_APPEND(dir))
+@@ -1391,7 +1458,7 @@ static inline int may_create(struct inod
+ 		return -EEXIST;
+ 	if (IS_DEADDIR(dir))
+ 		return -ENOENT;
+-	return permission(dir,MAY_WRITE | MAY_EXEC, nd);
++	return permission(dir,MAY_WRITE | MAY_EXEC, nd, NULL);
+ }
+ 
+ /* 
+@@ -1491,7 +1558,7 @@ int may_open(struct nameidata *nd, int a
+ 	if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
+ 		return -EISDIR;
+ 
+-	error = vfs_permission(nd, acc_mode);
++	error = vfs_permission(nd, acc_mode, NULL);
+ 	if (error)
+ 		return error;
+ 
+@@ -1628,6 +1695,12 @@ do_last:
+ 		goto exit;
+ 	}
+ 
++	if (IS_ERR(nd->intent.open.file)) {
++		mutex_unlock(&dir->d_inode->i_mutex);
++		error = PTR_ERR(nd->intent.open.file);
++		goto exit_dput;
++	}
++
+ 	/* Negative dentry, just create the file */
+ 	if (!path.dentry->d_inode) {
+ 		if (!IS_POSIXACL(dir->d_inode))
+@@ -1851,6 +1924,7 @@ asmlinkage long sys_mknod(const char __u
+ {
+ 	return sys_mknodat(AT_FDCWD, filename, mode, dev);
+ }
++EXPORT_SYMBOL_GPL(sys_mknod);
+ 
+ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+ {
+@@ -1909,6 +1983,7 @@ asmlinkage long sys_mkdir(const char __u
+ {
+ 	return sys_mkdirat(AT_FDCWD, pathname, mode);
+ }
++EXPORT_SYMBOL_GPL(sys_mkdir);
+ 
+ /*
+  * We try to drop the dentry early: we should have
+@@ -2016,6 +2091,7 @@ asmlinkage long sys_rmdir(const char __u
+ {
+ 	return do_rmdir(AT_FDCWD, pathname);
+ }
++EXPORT_SYMBOL_GPL(sys_rmdir);
+ 
+ int vfs_unlink(struct inode *dir, struct dentry *dentry)
+ {
+@@ -2115,6 +2191,7 @@ asmlinkage long sys_unlink(const char __
+ {
+ 	return do_unlinkat(AT_FDCWD, pathname);
+ }
++EXPORT_SYMBOL_GPL(sys_unlink);
+ 
+ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
+ {
+@@ -2313,7 +2390,7 @@ static int vfs_rename_dir(struct inode *
+ 	 * we'll need to flip '..'.
+ 	 */
+ 	if (new_dir != old_dir) {
+-		error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
++		error = permission(old_dentry->d_inode, MAY_WRITE, NULL, NULL);
+ 		if (error)
+ 			return error;
+ 	}
+@@ -2380,6 +2457,9 @@ int vfs_rename(struct inode *old_dir, st
+ 	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+ 	const char *old_name;
+ 
++	if (DQUOT_RENAME(old_dentry->d_inode, old_dir, new_dir))
++		return -EXDEV;
++
+ 	if (old_dentry->d_inode == new_dentry->d_inode)
+  		return 0;
+  
+diff -upr linux-2.6.16.orig/fs/namespace.c linux-2.6.16-026test015/fs/namespace.c
+--- linux-2.6.16.orig/fs/namespace.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/namespace.c	2006-07-04 14:41:39.000000000 +0400
+@@ -40,13 +40,15 @@ static inline int sysfs_init(void)
+ 
+ /* spinlock for vfsmount related operations, inplace of dcache_lock */
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
++EXPORT_SYMBOL(vfsmount_lock);
+ 
+ static int event;
+ 
+ static struct list_head *mount_hashtable;
+ static int hash_mask __read_mostly, hash_bits __read_mostly;
+ static kmem_cache_t *mnt_cache;
+-static struct rw_semaphore namespace_sem;
++struct rw_semaphore namespace_sem;
++EXPORT_SYMBOL(namespace_sem);
+ 
+ /* /sys/fs */
+ decl_subsys(fs, NULL, NULL);
+@@ -65,6 +67,7 @@ struct vfsmount *alloc_vfsmnt(const char
+ 	struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
+ 	if (mnt) {
+ 		memset(mnt, 0, sizeof(struct vfsmount));
++		mnt->owner = VEID(get_exec_env());
+ 		atomic_set(&mnt->mnt_count, 1);
+ 		INIT_LIST_HEAD(&mnt->mnt_hash);
+ 		INIT_LIST_HEAD(&mnt->mnt_child);
+@@ -371,10 +374,32 @@ static int show_vfsmnt(struct seq_file *
+ 		{ 0, NULL }
+ 	};
+ 	struct proc_fs_info *fs_infop;
++	char *path_buf, *path;
+ 
+-	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++	/* skip FS_NOMOUNT mounts (rootfs) */
++	if (mnt->mnt_sb->s_flags & MS_NOUSER)
++		return 0;
++
++	path_buf = (char *) __get_free_page(GFP_KERNEL);
++	if (!path_buf)
++		return -ENOMEM;
++	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
++	if (IS_ERR(path)) {
++		free_page((unsigned long) path_buf);
++		/*
++		 * This means that the file position will be incremented, i.e.
++		 * the total number of "invisible" vfsmnt will leak.
++		 */
++		return 0;
++	}
++
++	if (ve_is_super(get_exec_env()))
++		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++	else
++		mangle(m, mnt->mnt_sb->s_type->name);
+ 	seq_putc(m, ' ');
+-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
++	mangle(m, path);
++	free_page((unsigned long) path_buf);
+ 	seq_putc(m, ' ');
+ 	mangle(m, mnt->mnt_sb->s_type->name);
+ 	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
+@@ -474,6 +499,7 @@ void release_mounts(struct list_head *he
+ 		mntput(mnt);
+ 	}
+ }
++EXPORT_SYMBOL(release_mounts);
+ 
+ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
+ {
+@@ -498,6 +524,7 @@ void umount_tree(struct vfsmount *mnt, i
+ 		change_mnt_propagation(p, MS_PRIVATE);
+ 	}
+ }
++EXPORT_SYMBOL(umount_tree);
+ 
+ static int do_umount(struct vfsmount *mnt, int flags)
+ {
+@@ -608,7 +635,7 @@ asmlinkage long sys_umount(char __user *
+ 		goto dput_and_out;
+ 
+ 	retval = -EPERM;
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		goto dput_and_out;
+ 
+ 	retval = do_umount(nd.mnt, flags);
+@@ -632,7 +659,7 @@ asmlinkage long sys_oldumount(char __use
+ 
+ static int mount_is_safe(struct nameidata *nd)
+ {
+-	if (capable(CAP_SYS_ADMIN))
++	if (capable(CAP_VE_SYS_ADMIN))
+ 		return 0;
+ 	return -EPERM;
+ #ifdef notyet
+@@ -642,7 +669,7 @@ static int mount_is_safe(struct nameidat
+ 		if (current->uid != nd->dentry->d_inode->i_uid)
+ 			return -EPERM;
+ 	}
+-	if (vfs_permission(nd, MAY_WRITE))
++	if (vfs_permission(nd, MAY_WRITE, NULL))
+ 		return -EPERM;
+ 	return 0;
+ #endif
+@@ -848,6 +875,8 @@ static int do_change_type(struct nameida
+ 
+ 	if (nd->dentry != nd->mnt->mnt_root)
+ 		return -EINVAL;
++	if (!ve_accessible_veid(nd->mnt->owner, get_exec_env()->veid))
++		return -EPERM;
+ 
+ 	down_write(&namespace_sem);
+ 	spin_lock(&vfsmount_lock);
+@@ -917,7 +946,7 @@ static int do_remount(struct nameidata *
+ 	int err;
+ 	struct super_block *sb = nd->mnt->mnt_sb;
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 
+ 	if (!check_mnt(nd->mnt))
+@@ -926,6 +955,9 @@ static int do_remount(struct nameidata *
+ 	if (nd->dentry != nd->mnt->mnt_root)
+ 		return -EINVAL;
+ 
++	if (!ve_accessible_veid(nd->mnt->owner, get_exec_env()->veid))
++		return -EPERM;
++
+ 	down_write(&sb->s_umount);
+ 	err = do_remount_sb(sb, flags, data, 0);
+ 	if (!err)
+@@ -951,7 +983,7 @@ static int do_move_mount(struct nameidat
+ 	struct nameidata old_nd, parent_nd;
+ 	struct vfsmount *p;
+ 	int err = 0;
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 	if (!old_name || !*old_name)
+ 		return -EINVAL;
+@@ -959,6 +991,10 @@ static int do_move_mount(struct nameidat
+ 	if (err)
+ 		return err;
+ 
++	err = -EPERM;
++	if (!ve_accessible_veid(old_nd.mnt->owner, get_exec_env()->veid))
++		goto out_nosem;
++
+ 	down_write(&namespace_sem);
+ 	while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
+ 		;
+@@ -1014,6 +1050,7 @@ out:
+ 	up_write(&namespace_sem);
+ 	if (!err)
+ 		path_release(&parent_nd);
++out_nosem:
+ 	path_release(&old_nd);
+ 	return err;
+ }
+@@ -1031,7 +1068,7 @@ static int do_new_mount(struct nameidata
+ 		return -EINVAL;
+ 
+ 	/* we need capabilities... */
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 
+ 	mnt = do_kern_mount(type, flags, name, data);
+@@ -1072,6 +1109,10 @@ int do_add_mount(struct vfsmount *newmnt
+ 	if ((err = graft_tree(newmnt, nd)))
+ 		goto unlock;
+ 
++	if (newmnt->mnt_mountpoint->d_flags & DCACHE_VIRTUAL)
++		/* unaccessible yet - no lock */
++		newmnt->mnt_root->d_flags |= DCACHE_VIRTUAL;
++
+ 	if (fslist) {
+ 		/* add to the specified expiration list */
+ 		spin_lock(&vfsmount_lock);
+@@ -1469,6 +1510,7 @@ out1:
+ 	free_page(type_page);
+ 	return retval;
+ }
++EXPORT_SYMBOL_GPL(sys_mount);
+ 
+ /*
+  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
+@@ -1520,7 +1562,7 @@ static void chroot_fs_refs(struct nameid
+ 	struct fs_struct *fs;
+ 
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_ve(g, p) {
+ 		task_lock(p);
+ 		fs = p->fs;
+ 		if (fs) {
+@@ -1535,7 +1577,7 @@ static void chroot_fs_refs(struct nameid
+ 			put_fs_struct(fs);
+ 		} else
+ 			task_unlock(p);
+-	} while_each_thread(g, p);
++	} while_each_thread_ve(g, p);
+ 	read_unlock(&tasklist_lock);
+ }
+ 
+@@ -1688,10 +1730,10 @@ static void __init init_mount_tree(void)
+ 
+ 	init_task.namespace = namespace;
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		get_namespace(namespace);
+ 		p->namespace = namespace;
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 	read_unlock(&tasklist_lock);
+ 
+ 	set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
+@@ -1707,7 +1749,8 @@ void __init mnt_init(unsigned long mempa
+ 	init_rwsem(&namespace_sem);
+ 
+ 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
+-			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
++			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
++			NULL, NULL);
+ 
+ 	mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
+ 
+@@ -1763,3 +1806,4 @@ void __put_namespace(struct namespace *n
+ 	release_mounts(&umount_list);
+ 	kfree(namespace);
+ }
++EXPORT_SYMBOL_GPL(__put_namespace);
+diff -upr linux-2.6.16.orig/fs/nfs/dir.c linux-2.6.16-026test015/fs/nfs/dir.c
+--- linux-2.6.16.orig/fs/nfs/dir.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/nfs/dir.c	2006-07-04 14:41:37.000000000 +0400
+@@ -1635,7 +1635,8 @@ out:
+ 	return -EACCES;
+ }
+ 
+-int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
++int nfs_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+ 	struct rpc_cred *cred;
+ 	int res = 0;
+@@ -1683,7 +1684,7 @@ out:
+ out_notsup:
+ 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ 	if (res == 0)
+-		res = generic_permission(inode, mask, NULL);
++		res = generic_permission(inode, mask, NULL, perm);
+ 	unlock_kernel();
+ 	return res;
+ }
+diff -upr linux-2.6.16.orig/fs/nfs/nfsroot.c linux-2.6.16-026test015/fs/nfs/nfsroot.c
+--- linux-2.6.16.orig/fs/nfs/nfsroot.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/nfs/nfsroot.c	2006-07-04 14:41:38.000000000 +0400
+@@ -312,7 +312,7 @@ static int __init root_nfs_name(char *na
+ 	/* Override them by options set on kernel command-line */
+ 	root_nfs_parse(name, buf);
+ 
+-	cp = system_utsname.nodename;
++	cp = ve_utsname.nodename;
+ 	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
+ 		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
+ 		return -1;
+diff -upr linux-2.6.16.orig/fs/nfsd/nfs3proc.c linux-2.6.16-026test015/fs/nfsd/nfs3proc.c
+--- linux-2.6.16.orig/fs/nfsd/nfs3proc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/nfsd/nfs3proc.c	2006-07-04 14:41:36.000000000 +0400
+@@ -682,7 +682,7 @@ static struct svc_procedure		nfsd_proced
+   PROC(lookup,	 dirop,		dirop,		fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
+   PROC(access,	 access,	access,		fhandle,  RC_NOCACHE, ST+pAT+1),
+   PROC(readlink, readlink,	readlink,	fhandle,  RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
+-  PROC(read,	 read,		read,		fhandle,  RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE),
++  PROC(read,	 read,		read,		fhandle,  RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4),
+   PROC(write,	 write,		write,		fhandle,  RC_REPLBUFF, ST+WC+4),
+   PROC(create,	 create,	create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+   PROC(mkdir,	 mkdir,		create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+diff -upr linux-2.6.16.orig/fs/nfsd/nfs4proc.c linux-2.6.16-026test015/fs/nfsd/nfs4proc.c
+--- linux-2.6.16.orig/fs/nfsd/nfs4proc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/nfsd/nfs4proc.c	2006-07-04 14:41:36.000000000 +0400
+@@ -975,7 +975,7 @@ struct nfsd4_voidargs { int dummy; };
+  */
+ static struct svc_procedure		nfsd_procedures4[2] = {
+   PROC(null,	 void,		void,		void,	  RC_NOCACHE, 1),
+-  PROC(compound, compound,	compound,	compound, RC_NOCACHE, NFSD_BUFSIZE)
++  PROC(compound, compound,	compound,	compound, RC_NOCACHE, NFSD_BUFSIZE/4)
+ };
+ 
+ struct svc_version	nfsd_version4 = {
+diff -upr linux-2.6.16.orig/fs/nfsd/nfsfh.c linux-2.6.16-026test015/fs/nfsd/nfsfh.c
+--- linux-2.6.16.orig/fs/nfsd/nfsfh.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/nfsd/nfsfh.c	2006-07-04 14:41:37.000000000 +0400
+@@ -56,7 +56,7 @@ static int nfsd_acceptable(void *expv, s
+ 		/* make sure parents give x permission to user */
+ 		int err;
+ 		parent = dget_parent(tdentry);
+-		err = permission(parent->d_inode, MAY_EXEC, NULL);
++		err = permission(parent->d_inode, MAY_EXEC, NULL, NULL);
+ 		if (err < 0) {
+ 			dput(parent);
+ 			break;
+diff -upr linux-2.6.16.orig/fs/nfsd/nfsproc.c linux-2.6.16-026test015/fs/nfsd/nfsproc.c
+--- linux-2.6.16.orig/fs/nfsd/nfsproc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/nfsd/nfsproc.c	2006-07-04 14:41:36.000000000 +0400
+@@ -553,7 +553,7 @@ static struct svc_procedure		nfsd_proced
+   PROC(none,	 void,		void,		none,		RC_NOCACHE, ST),
+   PROC(lookup,	 diropargs,	diropres,	fhandle,	RC_NOCACHE, ST+FH+AT),
+   PROC(readlink, readlinkargs,	readlinkres,	none,		RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
+-  PROC(read,	 readargs,	readres,	fhandle,	RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE),
++  PROC(read,	 readargs,	readres,	fhandle,	RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
+   PROC(none,	 void,		void,		none,		RC_NOCACHE, ST),
+   PROC(write,	 writeargs,	attrstat,	fhandle,	RC_REPLBUFF, ST+AT),
+   PROC(create,	 createargs,	diropres,	fhandle,	RC_REPLBUFF, ST+FH+AT),
+diff -upr linux-2.6.16.orig/fs/nfsd/vfs.c linux-2.6.16-026test015/fs/nfsd/vfs.c
+--- linux-2.6.16.orig/fs/nfsd/vfs.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/nfsd/vfs.c	2006-07-04 14:41:37.000000000 +0400
+@@ -1817,12 +1817,13 @@ nfsd_permission(struct svc_export *exp, 
+ 	    inode->i_uid == current->fsuid)
+ 		return 0;
+ 
+-	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
++	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC),
++			NULL, NULL);
+ 
+ 	/* Allow read access to binaries even when mode 111 */
+ 	if (err == -EACCES && S_ISREG(inode->i_mode) &&
+ 	    acc == (MAY_READ | MAY_OWNER_OVERRIDE))
+-		err = permission(inode, MAY_EXEC, NULL);
++		err = permission(inode, MAY_EXEC, NULL, NULL);
+ 
+ 	return err? nfserrno(err) : 0;
+ }
+diff -upr linux-2.6.16.orig/fs/ntfs/file.c linux-2.6.16-026test015/fs/ntfs/file.c
+--- linux-2.6.16.orig/fs/ntfs/file.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ntfs/file.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1489,14 +1489,15 @@ static inline void ntfs_flush_dcache_pag
+ 		unsigned nr_pages)
+ {
+ 	BUG_ON(!nr_pages);
++	/*
++	 * Warning: Do not do the decrement at the same time as the call to
++	 * flush_dcache_page() because it is a NULL macro on i386 and hence the
++	 * decrement never happens so the loop never terminates.
++	 */
+ 	do {
+-		/*
+-		 * Warning: Do not do the decrement at the same time as the
+-		 * call because flush_dcache_page() is a NULL macro on i386
+-		 * and hence the decrement never happens.
+-		 */
++		--nr_pages;
+ 		flush_dcache_page(pages[nr_pages]);
+-	} while (--nr_pages > 0);
++	} while (nr_pages > 0);
+ }
+ 
+ /**
+diff -upr linux-2.6.16.orig/fs/ntfs/super.c linux-2.6.16-026test015/fs/ntfs/super.c
+--- linux-2.6.16.orig/fs/ntfs/super.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/ntfs/super.c	2006-07-04 14:41:37.000000000 +0400
+@@ -3033,7 +3033,7 @@ iput_tmp_ino_err_out_now:
+ 	 * method again... FIXME: Do we need to do this twice now because of
+ 	 * attribute inodes? I think not, so leave as is for now... (AIA)
+ 	 */
+-	if (invalidate_inodes(sb)) {
++	if (invalidate_inodes(sb, 0)) {
+ 		ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
+ 				"driver bug.");
+ 		/* Copied from fs/super.c. I just love this message. (-; */
+diff -upr linux-2.6.16.orig/fs/open.c linux-2.6.16-026test015/fs/open.c
+--- linux-2.6.16.orig/fs/open.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/open.c	2006-07-04 14:41:39.000000000 +0400
+@@ -25,6 +25,7 @@
+ #include <linux/fs.h>
+ #include <linux/personality.h>
+ #include <linux/pagemap.h>
++#include <linux/faudit.h>
+ #include <linux/syscalls.h>
+ #include <linux/rcupdate.h>
+ 
+@@ -51,7 +52,21 @@ int vfs_statfs(struct super_block *sb, s
+ 
+ EXPORT_SYMBOL(vfs_statfs);
+ 
+-static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
++int faudit_statfs(struct super_block *sb, struct kstatfs *buf)
++{
++	struct faudit_statfs_arg arg;
++
++	arg.sb = sb;
++	arg.stat = buf;
++
++	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS, &arg)
++			!= NOTIFY_DONE)
++		return arg.err;
++	return 0;
++}
++
++static int vfs_statfs_native(struct super_block *sb, struct vfsmount *mnt,
++		struct statfs *buf)
+ {
+ 	struct kstatfs st;
+ 	int retval;
+@@ -60,6 +75,10 @@ static int vfs_statfs_native(struct supe
+ 	if (retval)
+ 		return retval;
+ 
++	retval = faudit_statfs(mnt->mnt_sb, &st);
++	if (retval)
++		return retval;
++
+ 	if (sizeof(*buf) == sizeof(st))
+ 		memcpy(buf, &st, sizeof(st));
+ 	else {
+@@ -94,7 +113,8 @@ static int vfs_statfs_native(struct supe
+ 	return 0;
+ }
+ 
+-static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
++static int vfs_statfs64(struct super_block *sb, struct vfsmount *mnt,
++		struct statfs64 *buf)
+ {
+ 	struct kstatfs st;
+ 	int retval;
+@@ -103,6 +123,10 @@ static int vfs_statfs64(struct super_blo
+ 	if (retval)
+ 		return retval;
+ 
++	retval = faudit_statfs(mnt->mnt_sb, &st);
++	if (retval)
++		return retval;
++
+ 	if (sizeof(*buf) == sizeof(st))
+ 		memcpy(buf, &st, sizeof(st));
+ 	else {
+@@ -129,7 +153,8 @@ asmlinkage long sys_statfs(const char __
+ 	error = user_path_walk(path, &nd);
+ 	if (!error) {
+ 		struct statfs tmp;
+-		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
++		error = vfs_statfs_native(nd.dentry->d_inode->i_sb,
++				nd.mnt, &tmp);
+ 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ 			error = -EFAULT;
+ 		path_release(&nd);
+@@ -148,7 +173,8 @@ asmlinkage long sys_statfs64(const char 
+ 	error = user_path_walk(path, &nd);
+ 	if (!error) {
+ 		struct statfs64 tmp;
+-		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
++		error = vfs_statfs64(nd.dentry->d_inode->i_sb,
++				nd.mnt, &tmp);
+ 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ 			error = -EFAULT;
+ 		path_release(&nd);
+@@ -167,7 +193,8 @@ asmlinkage long sys_fstatfs(unsigned int
+ 	file = fget(fd);
+ 	if (!file)
+ 		goto out;
+-	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
++	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb,
++			file->f_vfsmnt, &tmp);
+ 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ 		error = -EFAULT;
+ 	fput(file);
+@@ -188,7 +215,8 @@ asmlinkage long sys_fstatfs64(unsigned i
+ 	file = fget(fd);
+ 	if (!file)
+ 		goto out;
+-	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
++	error = vfs_statfs64(file->f_dentry->d_inode->i_sb,
++			file->f_vfsmnt, &tmp);
+ 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ 		error = -EFAULT;
+ 	fput(file);
+@@ -243,7 +271,7 @@ static long do_sys_truncate(const char _
+ 	if (!S_ISREG(inode->i_mode))
+ 		goto dput_and_out;
+ 
+-	error = vfs_permission(&nd, MAY_WRITE);
++	error = vfs_permission(&nd, MAY_WRITE, NULL);
+ 	if (error)
+ 		goto dput_and_out;
+ 
+@@ -330,7 +358,10 @@ out:
+ 
+ asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
+ {
+-	return do_sys_ftruncate(fd, length, 1);
++	long ret = do_sys_ftruncate(fd, length, 1);
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ /* LFS versions of truncate are only needed on 32 bit machines */
+@@ -342,7 +373,10 @@ asmlinkage long sys_truncate64(const cha
+ 
+ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
+ {
+-	return do_sys_ftruncate(fd, length, 0);
++	long ret = do_sys_ftruncate(fd, length, 0);
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ #endif
+ 
+@@ -397,7 +431,7 @@ asmlinkage long sys_utime(char __user * 
+                         goto dput_and_out;
+ 
+ 		if (current->fsuid != inode->i_uid &&
+-		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
++		    (error = vfs_permission(&nd, MAY_WRITE, NULL)) != 0)
+ 			goto dput_and_out;
+ 	}
+ 	mutex_lock(&inode->i_mutex);
+@@ -450,7 +484,7 @@ long do_utimes(int dfd, char __user *fil
+                         goto dput_and_out;
+ 
+ 		if (current->fsuid != inode->i_uid &&
+-		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
++		    (error = vfs_permission(&nd, MAY_WRITE, NULL)) != 0)
+ 			goto dput_and_out;
+ 	}
+ 	mutex_lock(&inode->i_mutex);
+@@ -514,7 +548,7 @@ asmlinkage long sys_faccessat(int dfd, c
+ 
+ 	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
+ 	if (!res) {
+-		res = vfs_permission(&nd, mode);
++		res = vfs_permission(&nd, mode, NULL);
+ 		/* SuS v2 requires we report a read only fs too */
+ 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+ 		   && !special_file(nd.dentry->d_inode->i_mode))
+@@ -543,7 +577,7 @@ asmlinkage long sys_chdir(const char __u
+ 	if (error)
+ 		goto out;
+ 
+-	error = vfs_permission(&nd, MAY_EXEC);
++	error = vfs_permission(&nd, MAY_EXEC, NULL);
+ 	if (error)
+ 		goto dput_and_out;
+ 
+@@ -594,7 +628,7 @@ asmlinkage long sys_chroot(const char __
+ 	if (error)
+ 		goto out;
+ 
+-	error = vfs_permission(&nd, MAY_EXEC);
++	error = vfs_permission(&nd, MAY_EXEC, NULL);
+ 	if (error)
+ 		goto dput_and_out;
+ 
+@@ -733,6 +767,7 @@ asmlinkage long sys_chown(const char __u
+ 	}
+ 	return error;
+ }
++EXPORT_SYMBOL_GPL(sys_chown);
+ 
+ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
+ 			     gid_t group, int flag)
+@@ -1083,20 +1118,30 @@ long do_sys_open(int dfd, const char __u
+ 
+ asmlinkage long sys_open(const char __user *filename, int flags, int mode)
+ {
++	long ret;
++
+ 	if (force_o_largefile())
+ 		flags |= O_LARGEFILE;
+ 
+-	return do_sys_open(AT_FDCWD, filename, flags, mode);
++	ret = do_sys_open(AT_FDCWD, filename, flags, mode);
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(sys_open);
+ 
+ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
+ 			   int mode)
+ {
++	long ret;
++
+ 	if (force_o_largefile())
+ 		flags |= O_LARGEFILE;
+ 
+-	return do_sys_open(dfd, filename, flags, mode);
++	ret = do_sys_open(dfd, filename, flags, mode);
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(sys_openat);
+ 
+diff -upr linux-2.6.16.orig/fs/partitions/check.c linux-2.6.16-026test015/fs/partitions/check.c
+--- linux-2.6.16.orig/fs/partitions/check.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/partitions/check.c	2006-07-04 14:41:38.000000000 +0400
+@@ -128,6 +128,7 @@ char *disk_name(struct gendisk *hd, int 
+ 
+ 	return buf;
+ }
++EXPORT_SYMBOL(disk_name);
+ 
+ const char *bdevname(struct block_device *bdev, char *buf)
+ {
+@@ -345,6 +346,7 @@ static char *make_block_name(struct gend
+ 	char *name;
+ 	static char *block_str = "block:";
+ 	int size;
++	char *s;
+ 
+ 	size = strlen(block_str) + strlen(disk->disk_name) + 1;
+ 	name = kmalloc(size, GFP_KERNEL);
+@@ -352,6 +354,10 @@ static char *make_block_name(struct gend
+ 		return NULL;
+ 	strcpy(name, block_str);
+ 	strcat(name, disk->disk_name);
++	/* ewww... some of these buggers have / in name... */
++	s = strchr(name, '/');
++	if (s)
++		*s = '!';
+ 	return name;
+ }
+ 
+diff -upr linux-2.6.16.orig/fs/pipe.c linux-2.6.16-026test015/fs/pipe.c
+--- linux-2.6.16.orig/fs/pipe.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/pipe.c	2006-07-04 14:41:39.000000000 +0400
+@@ -19,6 +19,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/ioctls.h>
+ 
++#include <ub/ub_mem.h>
++
+ /*
+  * We use a start+len construction, which provides full use of the 
+  * allocated memory.
+@@ -284,7 +286,7 @@ pipe_writev(struct file *filp, const str
+ 			int error;
+ 
+ 			if (!page) {
+-				page = alloc_page(GFP_HIGHUSER);
++				page = alloc_page(GFP_HIGHUSER | __GFP_UBC);
+ 				if (unlikely(!page)) {
+ 					ret = ret ? : -ENOMEM;
+ 					break;
+@@ -662,7 +664,7 @@ struct inode* pipe_new(struct inode* ino
+ {
+ 	struct pipe_inode_info *info;
+ 
+-	info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
++	info = ub_kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+ 	if (!info)
+ 		goto fail_page;
+ 	memset(info, 0, sizeof(*info));
+@@ -797,6 +799,7 @@ close_f1:
+ no_files:
+ 	return error;	
+ }
++EXPORT_SYMBOL_GPL(do_pipe);
+ 
+ /*
+  * pipefs should _never_ be mounted by userland - too much of security hassle,
+diff -upr linux-2.6.16.orig/fs/proc/array.c linux-2.6.16-026test015/fs/proc/array.c
+--- linux-2.6.16.orig/fs/proc/array.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/array.c	2006-07-04 14:41:39.000000000 +0400
+@@ -75,6 +75,9 @@
+ #include <linux/times.h>
+ #include <linux/cpuset.h>
+ #include <linux/rcupdate.h>
++#include <linux/fairsched.h>
++
++#include <ub/beancounter.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -161,8 +164,13 @@ static inline char * task_state(struct t
+ 	struct group_info *group_info;
+ 	int g;
+ 	struct fdtable *fdt = NULL;
++	pid_t pid, ppid, tgid;
++
++	pid = get_task_pid(p);
++	tgid = get_task_tgid(p);
+ 
+ 	read_lock(&tasklist_lock);
++	ppid = get_task_ppid(p);
+ 	buffer += sprintf(buffer,
+ 		"State:\t%s\n"
+ 		"SleepAVG:\t%lu%%\n"
+@@ -170,13 +178,19 @@ static inline char * task_state(struct t
+ 		"Pid:\t%d\n"
+ 		"PPid:\t%d\n"
+ 		"TracerPid:\t%d\n"
++#ifdef CONFIG_FAIRSCHED
++		"FNid:\t%d\n"
++#endif
+ 		"Uid:\t%d\t%d\t%d\t%d\n"
+ 		"Gid:\t%d\t%d\t%d\t%d\n",
+ 		get_task_state(p),
+ 		(p->sleep_avg/1024)*100/(1020000000/1024),
+-	       	p->tgid,
+-		p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
+-		pid_alive(p) && p->ptrace ? p->parent->pid : 0,
++	       	tgid,
++		pid, ppid,
++		pid_alive(p) && p->ptrace ? get_task_pid(p->parent) : 0,
++#ifdef CONFIG_FAIRSCHED
++		task_fairsched_node_id(p),
++#endif
+ 		p->uid, p->euid, p->suid, p->fsuid,
+ 		p->gid, p->egid, p->sgid, p->fsgid);
+ 	read_unlock(&tasklist_lock);
+@@ -199,6 +213,18 @@ static inline char * task_state(struct t
+ 	put_group_info(group_info);
+ 
+ 	buffer += sprintf(buffer, "\n");
++
++#ifdef CONFIG_VE
++	buffer += sprintf(buffer,
++			"envID:\t%d\n"
++			"VPid:\t%d\n"
++			"PNState:\t%u\n"
++			"StopState:\t%u\n",
++			VE_TASK_INFO(p)->owner_env->veid,
++			virt_pid(p),
++			p->pn_state,
++			p->stopped_state);
++#endif
+ 	return buffer;
+ }
+ 
+@@ -244,7 +270,7 @@ static void collect_sigign_sigcatch(stru
+ 
+ static inline char * task_sig(struct task_struct *p, char *buffer)
+ {
+-	sigset_t pending, shpending, blocked, ignored, caught;
++	sigset_t pending, shpending, blocked, ignored, caught, saved;
+ 	int num_threads = 0;
+ 	unsigned long qsize = 0;
+ 	unsigned long qlim = 0;
+@@ -254,6 +280,7 @@ static inline char * task_sig(struct tas
+ 	sigemptyset(&blocked);
+ 	sigemptyset(&ignored);
+ 	sigemptyset(&caught);
++	sigemptyset(&saved);
+ 
+ 	/* Gather all the data with the appropriate locks held */
+ 	read_lock(&tasklist_lock);
+@@ -262,6 +289,7 @@ static inline char * task_sig(struct tas
+ 		pending = p->pending.signal;
+ 		shpending = p->signal->shared_pending.signal;
+ 		blocked = p->blocked;
++		saved = p->saved_sigmask;
+ 		collect_sigign_sigcatch(p, &ignored, &caught);
+ 		num_threads = atomic_read(&p->signal->count);
+ 		qsize = atomic_read(&p->user->sigpending);
+@@ -279,6 +307,7 @@ static inline char * task_sig(struct tas
+ 	buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
+ 	buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
+ 	buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
++	buffer = render_sigset_t("SigSvd:\t", &saved, buffer);
+ 
+ 	return buffer;
+ }
+@@ -293,10 +322,27 @@ static inline char *task_cap(struct task
+ 			    cap_t(p->cap_effective));
+ }
+ 
++#ifdef CONFIG_USER_RESOURCE
++static inline void ub_dump_task_info(struct task_struct *tsk,
++		char *stsk, int ltsk, char *smm, int lmm)
++{
++	print_ub_uid(tsk->task_bc.task_ub, stsk, ltsk);
++	task_lock(tsk);
++	if (tsk->mm)
++		print_ub_uid(tsk->mm->mm_ub, smm, lmm);
++	else
++		strncpy(smm, "N/A", lmm);
++	task_unlock(tsk);
++}
++#endif
++
+ int proc_pid_status(struct task_struct *task, char * buffer)
+ {
+ 	char * orig = buffer;
+ 	struct mm_struct *mm = get_task_mm(task);
++#ifdef CONFIG_USER_RESOURCE
++	char tsk_ub_info[64], mm_ub_info[64];
++#endif
+ 
+ 	buffer = task_name(task, buffer);
+ 	buffer = task_state(task, buffer);
+@@ -311,6 +357,14 @@ int proc_pid_status(struct task_struct *
+ #if defined(CONFIG_S390)
+ 	buffer = task_show_regs(task, buffer);
+ #endif
++#ifdef CONFIG_USER_RESOURCE
++	ub_dump_task_info(task,
++			tsk_ub_info, sizeof(tsk_ub_info),
++			mm_ub_info, sizeof(mm_ub_info));
++
++	buffer += sprintf(buffer, "TaskUB:\t%s\n", tsk_ub_info);
++	buffer += sprintf(buffer, "MMUB:\t%s\n", mm_ub_info);
++#endif
+ 	return buffer - orig;
+ }
+ 
+@@ -333,6 +387,10 @@ static int do_task_stat(struct task_stru
+ 	DEFINE_KTIME(it_real_value);
+ 	struct task_struct *t;
+ 	char tcomm[sizeof(task->comm)];
++#ifdef CONFIG_USER_RESOURCE
++	char ub_task_info[64];
++	char ub_mm_info[64];
++#endif
+ 
+ 	state = *get_task_state(task);
+ 	vsize = eip = esp = 0;
+@@ -370,11 +428,12 @@ static int do_task_stat(struct task_stru
+ 	}
+ 	if (task->signal) {
+ 		if (task->signal->tty) {
+-			tty_pgrp = task->signal->tty->pgrp;
++			tty_pgrp = pid_type_to_vpid(PIDTYPE_PGID,
++						    task->signal->tty->pgrp);
+ 			tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
+ 		}
+-		pgid = process_group(task);
+-		sid = task->signal->session;
++		pgid = get_task_pgid(task);
++		sid = get_task_sid(task);
+ 		cmin_flt = task->signal->cmin_flt;
+ 		cmaj_flt = task->signal->cmaj_flt;
+ 		cutime = task->signal->cutime;
+@@ -388,7 +447,7 @@ static int do_task_stat(struct task_stru
+ 		}
+ 		it_real_value = task->signal->real_timer.expires;
+ 	}
+-	ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
++	ppid = get_task_ppid(task);
+ 	read_unlock(&tasklist_lock);
+ 
+ 	if (!whole || num_threads<2)
+@@ -407,14 +466,34 @@ static int do_task_stat(struct task_stru
+ 
+ 	/* Temporary variable needed for gcc-2.96 */
+ 	/* convert timespec -> nsec*/
++#ifndef CONFIG_VE
+ 	start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
+ 				+ task->start_time.tv_nsec;
++#else
++	start_time = (unsigned long long)(task->start_time.tv_sec -
++			get_exec_env()->init_entry->start_time.tv_sec) *
++			NSEC_PER_SEC + task->start_time.tv_nsec -
++			get_exec_env()->init_entry->start_time.tv_nsec;
++#endif
+ 	/* convert nsec -> ticks */
+ 	start_time = nsec_to_clock_t(start_time);
+ 
++#ifdef CONFIG_USER_RESOURCE
++	ub_dump_task_info(task,
++			ub_task_info, sizeof(ub_task_info),
++			ub_mm_info, sizeof(ub_mm_info));
++#endif
++
+ 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
+ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
+-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
++%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu"
++#ifdef CONFIG_VE
++"0 0 0 0 0 0 0 0 %d %u"
++#endif
++#ifdef CONFIG_USER_RESOURCE
++	" %s %s"
++#endif
++	"\n",
+ 		task->pid,
+ 		tcomm,
+ 		state,
+@@ -459,7 +538,16 @@ static int do_task_stat(struct task_stru
+ 		task->exit_signal,
+ 		task_cpu(task),
+ 		task->rt_priority,
+-		task->policy);
++		task->policy
++#ifdef CONFIG_VE
++		, virt_pid(task),
++		VEID(VE_TASK_INFO(task)->owner_env)
++#endif
++#ifdef CONFIG_USER_RESOURCE
++		, ub_task_info,
++		ub_mm_info
++#endif
++		);
+ 	if(mm)
+ 		mmput(mm);
+ 	return res;
+diff -upr linux-2.6.16.orig/fs/proc/base.c linux-2.6.16-026test015/fs/proc/base.c
+--- linux-2.6.16.orig/fs/proc/base.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/base.c	2006-07-04 14:41:38.000000000 +0400
+@@ -291,22 +291,29 @@ static int proc_fd_link(struct inode *in
+ 	struct files_struct *files;
+ 	struct file *file;
+ 	int fd = proc_type(inode) - PROC_TID_FD_DIR;
++	int err = -ENOENT;
+ 
+ 	files = get_files_struct(task);
+ 	if (files) {
+-		rcu_read_lock();
++		/*
++		 * We are not taking a ref to the file structure, so we must
++		 * hold ->file_lock.
++		 */
++		spin_lock(&files->file_lock);
+ 		file = fcheck_files(files, fd);
+ 		if (file) {
+-			*mnt = mntget(file->f_vfsmnt);
+-			*dentry = dget(file->f_dentry);
+-			rcu_read_unlock();
+-			put_files_struct(files);
+-			return 0;
++			if (d_root_check(file->f_dentry, file->f_vfsmnt)) {
++				err = -EACCES;
++			} else {
++				*mnt = mntget(file->f_vfsmnt);
++				*dentry = dget(file->f_dentry);
++				err = 0;
++			}
+ 		}
+-		rcu_read_unlock();
++		spin_unlock(&files->file_lock);
+ 		put_files_struct(files);
+ 	}
+-	return -ENOENT;
++	return err;
+ }
+ 
+ static struct fs_struct *get_fs_struct(struct task_struct *task)
+@@ -326,10 +333,12 @@ static int proc_cwd_link(struct inode *i
+ 	int result = -ENOENT;
+ 	if (fs) {
+ 		read_lock(&fs->lock);
+-		*mnt = mntget(fs->pwdmnt);
+-		*dentry = dget(fs->pwd);
++		result = d_root_check(fs->pwd, fs->pwdmnt);
++		if (!result) {
++			*mnt = mntget(fs->pwdmnt);
++			*dentry = dget(fs->pwd);
++		}
+ 		read_unlock(&fs->lock);
+-		result = 0;
+ 		put_fs_struct(fs);
+ 	}
+ 	return result;
+@@ -579,19 +588,21 @@ static int proc_check_root(struct inode 
+ 	return proc_check_chroot(root, vfsmnt);
+ }
+ 
+-static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
++static int proc_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+-	if (generic_permission(inode, mask, NULL) != 0)
++	if (generic_permission(inode, mask, NULL, perm) != 0)
+ 		return -EACCES;
+ 	return proc_check_root(inode);
+ }
+ 
+-static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
++static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+ 	struct dentry *root;
+ 	struct vfsmount *vfsmnt;
+ 
+-	if (generic_permission(inode, mask, NULL) != 0)
++	if (generic_permission(inode, mask, NULL, perm) != 0)
+ 		return -EACCES;
+ 
+ 	if (proc_task_root_link(inode, &root, &vfsmnt))
+@@ -1303,6 +1314,10 @@ static struct inode *proc_pid_make_inode
+ 	struct inode * inode;
+ 	struct proc_inode *ei;
+ 
++	if (!ve_accessible(VE_TASK_INFO(task)->owner_env,
++			   VE_OWNER_FSTYPE(sb->s_type)))
++		return NULL;
++
+ 	/* We need a new inode */
+ 	
+ 	inode = new_inode(sb);
+@@ -1406,6 +1421,10 @@ static void pid_base_iput(struct dentry 
+ 	spin_lock(&task->proc_lock);
+ 	if (task->proc_dentry == dentry)
+ 		task->proc_dentry = NULL;
++#ifdef CONFIG_VE
++	if (VE_TASK_INFO(task)->glob_proc_dentry == dentry)
++		VE_TASK_INFO(task)->glob_proc_dentry = NULL;
++#endif
+ 	spin_unlock(&task->proc_lock);
+ 	iput(inode);
+ }
+@@ -1485,7 +1504,12 @@ static struct dentry *proc_lookupfd(stru
+ 	if (!files)
+ 		goto out_unlock;
+ 	inode->i_mode = S_IFLNK;
+-	rcu_read_lock();
++
++	/*
++	 * We are not taking a ref to the file structure, so we must
++	 * hold ->file_lock.
++	 */
++	spin_lock(&files->file_lock);
+ 	file = fcheck_files(files, fd);
+ 	if (!file)
+ 		goto out_unlock2;
+@@ -1493,7 +1517,7 @@ static struct dentry *proc_lookupfd(stru
+ 		inode->i_mode |= S_IRUSR | S_IXUSR;
+ 	if (file->f_mode & 2)
+ 		inode->i_mode |= S_IWUSR | S_IXUSR;
+-	rcu_read_unlock();
++	spin_unlock(&files->file_lock);
+ 	put_files_struct(files);
+ 	inode->i_op = &proc_pid_link_inode_operations;
+ 	inode->i_size = 64;
+@@ -1503,7 +1527,7 @@ static struct dentry *proc_lookupfd(stru
+ 	return NULL;
+ 
+ out_unlock2:
+-	rcu_read_unlock();
++	spin_unlock(&files->file_lock);
+ 	put_files_struct(files);
+ out_unlock:
+ 	iput(inode);
+@@ -1879,14 +1903,14 @@ static int proc_self_readlink(struct den
+ 			      int buflen)
+ {
+ 	char tmp[30];
+-	sprintf(tmp, "%d", current->tgid);
++	sprintf(tmp, "%d", get_task_tgid(current));
+ 	return vfs_readlink(dentry,buffer,buflen,tmp);
+ }
+ 
+ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+ {
+ 	char tmp[30];
+-	sprintf(tmp, "%d", current->tgid);
++	sprintf(tmp, "%d", get_task_tgid(current));
+ 	return ERR_PTR(vfs_follow_link(nd,tmp));
+ }	
+ 
+@@ -1911,11 +1935,8 @@ static struct inode_operations proc_self
+  *   of PIDTYPE_PID.
+  */
+ 
+-struct dentry *proc_pid_unhash(struct task_struct *p)
++struct dentry *__proc_pid_unhash(struct task_struct *p, struct dentry *proc_dentry)
+ {
+-	struct dentry *proc_dentry;
+-
+-	proc_dentry = p->proc_dentry;
+ 	if (proc_dentry != NULL) {
+ 
+ 		spin_lock(&dcache_lock);
+@@ -1933,6 +1954,14 @@ struct dentry *proc_pid_unhash(struct ta
+ 	return proc_dentry;
+ }
+ 
++void proc_pid_unhash(struct task_struct *p, struct dentry *pd[2])
++{
++	pd[0] = __proc_pid_unhash(p, p->proc_dentry);
++#ifdef CONFIG_VE
++	pd[1] = __proc_pid_unhash(p, VE_TASK_INFO(p)->glob_proc_dentry);
++#endif
++}
++
+ /**
+  * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
+  * @proc_dentry: directoy to prune.
+@@ -1940,7 +1969,7 @@ struct dentry *proc_pid_unhash(struct ta
+  * Shrink the /proc directory that was used by the just killed thread.
+  */
+ 	
+-void proc_pid_flush(struct dentry *proc_dentry)
++void __proc_pid_flush(struct dentry *proc_dentry)
+ {
+ 	might_sleep();
+ 	if(proc_dentry != NULL) {
+@@ -1949,12 +1978,21 @@ void proc_pid_flush(struct dentry *proc_
+ 	}
+ }
+ 
++void proc_pid_flush(struct dentry *proc_dentry[2])
++{
++	__proc_pid_flush(proc_dentry[0]);
++#ifdef CONFIG_VE
++	__proc_pid_flush(proc_dentry[1]);
++#endif
++}
++
+ /* SMP-safe */
+ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+ {
+ 	struct task_struct *task;
+ 	struct inode *inode;
+ 	struct proc_inode *ei;
++	struct dentry *pd[2];
+ 	unsigned tgid;
+ 	int died;
+ 
+@@ -1978,7 +2016,19 @@ struct dentry *proc_pid_lookup(struct in
+ 		goto out;
+ 
+ 	read_lock(&tasklist_lock);
+-	task = find_task_by_pid(tgid);
++	task = find_task_by_pid_ve(tgid);
++	/* In theory we are allowed to lookup both /proc/VIRT_PID and
++	 * /proc/GLOBAL_PID inside VE. However, current /proc implementation
++	 * cannot maintain two references to one task, so that we have
++	 * to prohibit /proc/GLOBAL_PID.
++	 */
++	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tgid)) {
++		/* However, VE_ENTERed tasks are exception, they use global
++		 * pids.
++		 */
++		if (virt_pid(task) != tgid)
++			task = NULL;
++	}
+ 	if (task)
+ 		get_task_struct(task);
+ 	read_unlock(&tasklist_lock);
+@@ -2007,16 +2057,23 @@ struct dentry *proc_pid_lookup(struct in
+ 	died = 0;
+ 	d_add(dentry, inode);
+ 	spin_lock(&task->proc_lock);
++#ifdef CONFIG_VE
++	if (ve_is_super(VE_OWNER_FSTYPE(inode->i_sb->s_type)))
++		VE_TASK_INFO(task)->glob_proc_dentry = dentry;
++	else
++		task->proc_dentry = dentry;
++#else
+ 	task->proc_dentry = dentry;
++#endif
+ 	if (!pid_alive(task)) {
+-		dentry = proc_pid_unhash(task);
++		proc_pid_unhash(task, pd);
+ 		died = 1;
+ 	}
+ 	spin_unlock(&task->proc_lock);
+ 
+ 	put_task_struct(task);
+ 	if (died) {
+-		proc_pid_flush(dentry);
++		proc_pid_flush(pd);
+ 		goto out;
+ 	}
+ 	return NULL;
+@@ -2037,7 +2094,12 @@ static struct dentry *proc_task_lookup(s
+ 		goto out;
+ 
+ 	read_lock(&tasklist_lock);
+-	task = find_task_by_pid(tid);
++	task = find_task_by_pid_ve(tid);
++	/* See comment above in similar place. */
++	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tid)) {
++		if (virt_pid(task) != tid)
++			task = NULL;
++	}
+ 	if (task)
+ 		get_task_struct(task);
+ 	read_unlock(&tasklist_lock);
+@@ -2081,16 +2143,23 @@ out:
+  * tasklist lock while doing this, and we must release it before
+  * we actually do the filldir itself, so we use a temp buffer..
+  */
+-static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
++static int get_tgid_list(int index, unsigned long version, unsigned int *tgids,
++		struct ve_struct *ve)
+ {
+ 	struct task_struct *p;
+ 	int nr_tgids = 0;
+ 
+ 	index--;
+ 	read_lock(&tasklist_lock);
++	if (list_empty(&ve->vetask_lh))
++		goto out;
+ 	p = NULL;
+ 	if (version) {
+-		p = find_task_by_pid(version);
++		struct ve_struct *oldve;
++
++		oldve = set_exec_env(ve);
++		p = find_task_by_pid_ve(version);
++		(void)set_exec_env(oldve);
+ 		if (p && !thread_group_leader(p))
+ 			p = NULL;
+ 	}
+@@ -2098,10 +2167,10 @@ static int get_tgid_list(int index, unsi
+ 	if (p)
+ 		index = 0;
+ 	else
+-		p = next_task(&init_task);
++		p = __first_task_ve(ve);
+ 
+-	for ( ; p != &init_task; p = next_task(p)) {
+-		int tgid = p->pid;
++	for ( ; p != NULL; p = __next_task_ve(ve, p)) {
++		int tgid = get_task_pid_ve(p, ve);
+ 		if (!pid_alive(p))
+ 			continue;
+ 		if (--index >= 0)
+@@ -2111,6 +2180,7 @@ static int get_tgid_list(int index, unsi
+ 		if (nr_tgids >= PROC_MAXPIDS)
+ 			break;
+ 	}
++out:
+ 	read_unlock(&tasklist_lock);
+ 	return nr_tgids;
+ }
+@@ -2134,7 +2204,7 @@ static int get_tid_list(int index, unsig
+ 	 * via next_thread().
+ 	 */
+ 	if (pid_alive(task)) do {
+-		int tid = task->pid;
++		int tid = get_task_pid(task);
+ 
+ 		if (--index >= 0)
+ 			continue;
+@@ -2171,7 +2241,8 @@ int proc_pid_readdir(struct file * filp,
+ 	next_tgid = filp->f_version;
+ 	filp->f_version = 0;
+ 	for (;;) {
+-		nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
++		nr_tgids = get_tgid_list(nr, next_tgid, tgid_array,
++				filp->f_dentry->d_sb->s_type->owner_env);
+ 		if (!nr_tgids) {
+ 			/* no more entries ! */
+ 			break;
+diff -upr linux-2.6.16.orig/fs/proc/generic.c linux-2.6.16-026test015/fs/proc/generic.c
+--- linux-2.6.16.orig/fs/proc/generic.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/generic.c	2006-07-04 14:41:38.000000000 +0400
+@@ -10,7 +10,9 @@
+ 
+ #include <linux/errno.h>
+ #include <linux/time.h>
++#include <linux/fs.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve_owner.h>
+ #include <linux/stat.h>
+ #include <linux/module.h>
+ #include <linux/mount.h>
+@@ -29,6 +31,8 @@ static ssize_t proc_file_write(struct fi
+ 			       size_t count, loff_t *ppos);
+ static loff_t proc_file_lseek(struct file *, loff_t, int);
+ 
++static DEFINE_RWLOCK(proc_tree_lock);
++
+ int proc_match(int len, const char *name, struct proc_dir_entry *de)
+ {
+ 	if (de->namelen != len)
+@@ -229,6 +233,7 @@ proc_file_lseek(struct file *file, loff_
+ 	return retval;
+ }
+ 
++#ifndef CONFIG_VE
+ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
+ {
+ 	struct inode *inode = dentry->d_inode;
+@@ -261,9 +266,12 @@ static int proc_getattr(struct vfsmount 
+ 	generic_fillattr(inode, stat);
+ 	return 0;
+ }
++#endif
+ 
+ static struct inode_operations proc_file_inode_operations = {
++#ifndef CONFIG_VE
+ 	.setattr	= proc_notify_change,
++#endif
+ };
+ 
+ /*
+@@ -271,14 +279,20 @@ static struct inode_operations proc_file
+  * returns the struct proc_dir_entry for "/proc/tty/driver", and
+  * returns "serial" in residual.
+  */
+-static int xlate_proc_name(const char *name,
++static int __xlate_proc_name(struct proc_dir_entry *root, const char *name,
+ 			   struct proc_dir_entry **ret, const char **residual)
+ {
+ 	const char     		*cp = name, *next;
+ 	struct proc_dir_entry	*de;
+ 	int			len;
+ 
+-	de = &proc_root;
++	if (*ret) {
++		de_get(*ret);
++		return 0;
++	}
++
++	read_lock(&proc_tree_lock);
++	de = root;
+ 	while (1) {
+ 		next = strchr(cp, '/');
+ 		if (!next)
+@@ -289,15 +303,35 @@ static int xlate_proc_name(const char *n
+ 			if (proc_match(len, cp, de))
+ 				break;
+ 		}
+-		if (!de)
++		if (!de) {
++			read_unlock(&proc_tree_lock);
+ 			return -ENOENT;
++		}
+ 		cp += len + 1;
+ 	}
+ 	*residual = cp;
+-	*ret = de;
++	*ret = de_get(de);
++	read_unlock(&proc_tree_lock);
+ 	return 0;
+ }
+ 
++#ifndef CONFIG_VE
++#define xlate_proc_loc_name xlate_proc_name
++#else
++static int xlate_proc_loc_name(const char *name,
++			   struct proc_dir_entry **ret, const char **residual)
++{
++	return __xlate_proc_name(get_exec_env()->proc_root,
++			name, ret, residual);
++}
++#endif
++
++static int xlate_proc_name(const char *name,
++		struct proc_dir_entry **ret, const char **residual)
++{
++	return __xlate_proc_name(&proc_root, name, ret, residual);
++}
++
+ static DEFINE_IDR(proc_inum_idr);
+ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
+ 
+@@ -369,6 +403,20 @@ static struct dentry_operations proc_den
+ 	.d_delete	= proc_delete_dentry,
+ };
+ 
++static struct proc_dir_entry *__proc_lookup(struct proc_dir_entry *dir,
++		struct dentry *d)
++{
++	struct proc_dir_entry *de;
++
++	for (de = dir->subdir; de; de = de->next) {
++		if (de->namelen != d->d_name.len)
++			continue;
++		if (!memcmp(d->d_name.name, de->name, de->namelen))
++			break;
++	}
++	return de_get(de);
++}
++
+ /*
+  * Don't create negative dentries here, return -ENOENT by hand
+  * instead.
+@@ -376,34 +424,147 @@ static struct dentry_operations proc_den
+ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+ {
+ 	struct inode *inode = NULL;
+-	struct proc_dir_entry * de;
++	struct proc_dir_entry *lde, *gde;
+ 	int error = -ENOENT;
+ 
+ 	lock_kernel();
+-	de = PDE(dir);
+-	if (de) {
+-		for (de = de->subdir; de ; de = de->next) {
+-			if (de->namelen != dentry->d_name.len)
+-				continue;
+-			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
+-				unsigned int ino = de->low_ino;
++	lde = LPDE(dir);
+ 
+-				error = -EINVAL;
+-				inode = proc_get_inode(dir->i_sb, ino, de);
+-				break;
+-			}
+-		}
+-	}
++	if (!lde)
++		goto out;
++
++	read_lock(&proc_tree_lock);
++	lde = __proc_lookup(lde, dentry);
++#ifdef CONFIG_VE
++	gde = GPDE(dir);
++	if (gde)
++		gde = __proc_lookup(gde, dentry);
++#else
++	gde = NULL;
++#endif
++	read_unlock(&proc_tree_lock);
++
++	/*
++	 * There are following possible cases after lookup:
++	 *
++	 * lde		gde
++	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++	 * NULL		NULL		ENOENT
++	 * loc		NULL		found in local tree
++	 * loc		glob		found in both trees
++	 * NULL		glob		found in global tree
++	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++	 *
++	 * We initialized inode as follows after lookup:
++	 *
++	 * inode->lde	inode->gde
++	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++	 * loc		NULL		in local tree
++	 * loc		glob		both trees
++	 * glob		glob		global tree
++	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++	 * i.e. inode->lde is always initialized
++	 */
++
++	if (lde == NULL && gde == NULL)
++		goto out;
++
++	if (lde != NULL)
++		inode = proc_get_inode(dir->i_sb, lde->low_ino, lde);
++	else
++		inode = proc_get_inode(dir->i_sb, gde->low_ino, gde);
++
++	/*
++	 * We can sleep in proc_get_inode(), but since we have i_sem
++	 * being taken, no one can setup GPDE/LPDE on this inode.
++	 */
++	if (!inode)
++		goto out_put;
++
++#ifdef CONFIG_VE
++	GPDE(inode) = de_get(gde);
++	if (gde)
++		__module_get(gde->owner);
++
++	/* if dentry is found in both trees and it is a directory
++	 * then inode's nlink count must be altered, because local
++	 * and global subtrees may differ.
++	 * on the other hand, they may intersect, so actual nlink
++	 * value is difficult to calculate - upper estimate is used
++	 * instead of it.
++	 * dentry found in global tree only must not be writable
++	 * in non-super ve.
++	 */
++	if (lde && gde && lde != gde && gde->nlink > 1)
++		inode->i_nlink += gde->nlink - 2;
++	if (lde == NULL && !ve_is_super(
++				VE_OWNER_FSTYPE(dir->i_sb->s_type)))
++		inode->i_mode &= ~S_IWUGO;
++#endif
+ 	unlock_kernel();
++	dentry->d_op = &proc_dentry_operations;
++	d_add(dentry, inode);
++	de_put(lde);
++	de_put(gde);
++	return NULL;
+ 
+-	if (inode) {
+-		dentry->d_op = &proc_dentry_operations;
+-		d_add(dentry, inode);
+-		return NULL;
+-	}
++out_put:
++	de_put(lde);
++	de_put(gde);
++out:
++	unlock_kernel();
+ 	return ERR_PTR(error);
+ }
+ 
++struct proc_dir_reader {
++	struct list_head list;
++	struct proc_dir_entry *next;
++};
++
++static LIST_HEAD(proc_dir_readers);
++static DEFINE_SPINLOCK(proc_dir_readers_lock);
++
++static inline void add_reader(struct proc_dir_reader *r,
++		struct proc_dir_entry *cur)
++{
++	r->next = cur->next;
++	spin_lock(&proc_dir_readers_lock);
++	list_add(&r->list, &proc_dir_readers);
++	spin_unlock(&proc_dir_readers_lock);
++}
++
++static inline struct proc_dir_entry *del_reader(struct proc_dir_reader *r)
++{
++	spin_lock(&proc_dir_readers_lock);
++	list_del(&r->list);
++	spin_unlock(&proc_dir_readers_lock);
++	return r->next;
++}
++
++static void notify_readers(struct proc_dir_entry *de)
++{
++	struct proc_dir_reader *r;
++
++	/* lockless since proc_tree_lock is taken for writing */
++	list_for_each_entry(r, &proc_dir_readers, list)
++		if (r->next == de)
++			r->next = de->next;
++}
++
++static inline int in_tree(struct proc_dir_entry *de, struct proc_dir_entry *dir)
++{
++	struct proc_dir_entry *gde;
++
++	for (gde = dir->subdir; gde; gde = gde->next) {
++		if (de->namelen != gde->namelen)
++			continue;
++		if (memcmp(de->name, gde->name, gde->namelen))
++			continue;
++		return 1;
++	}
++	return 0;
++}
++
+ /*
+  * This returns non-zero if at EOF, so that the /proc
+  * root directory can use this and check if it should
+@@ -421,6 +582,7 @@ int proc_readdir(struct file * filp,
+ 	int i;
+ 	struct inode *inode = filp->f_dentry->d_inode;
+ 	int ret = 0;
++	struct proc_dir_reader this;
+ 
+ 	lock_kernel();
+ 
+@@ -447,13 +609,12 @@ int proc_readdir(struct file * filp,
+ 			filp->f_pos++;
+ 			/* fall through */
+ 		default:
++			read_lock(&proc_tree_lock);
+ 			de = de->subdir;
+ 			i -= 2;
+ 			for (;;) {
+-				if (!de) {
+-					ret = 1;
+-					goto out;
+-				}
++				if (!de)
++					goto chk_global;
+ 				if (!i)
+ 					break;
+ 				de = de->next;
+@@ -461,12 +622,60 @@ int proc_readdir(struct file * filp,
+ 			}
+ 
+ 			do {
+-				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
+-					    de->low_ino, de->mode >> 12) < 0)
++				de_get(de);
++				add_reader(&this, de);
++				read_unlock(&proc_tree_lock);
++				ret = filldir(dirent, de->name, de->namelen,
++						filp->f_pos, de->low_ino,
++						de->mode >> 12);
++				read_lock(&proc_tree_lock);
++				de_put(de);
++				de = del_reader(&this);
++				if (ret < 0) {
++					read_unlock(&proc_tree_lock);
++					ret = 0;
+ 					goto out;
++				}
+ 				filp->f_pos++;
+-				de = de->next;
+ 			} while (de);
++chk_global:
++#ifdef CONFIG_VE
++			de = GPDE(inode);
++			if (de == NULL)
++				goto done;
++
++			de = de->subdir;
++			while (de) {
++				if (in_tree(de, LPDE(inode))) {
++					de = de->next;
++					continue;
++				}
++
++				if (i > 0) {
++					i--;
++					de = de->next;
++					continue;
++				}
++
++				de_get(de);
++				add_reader(&this, de);
++				read_unlock(&proc_tree_lock);
++				ret = filldir(dirent, de->name, de->namelen,
++						filp->f_pos, de->low_ino,
++						de->mode >> 12);
++				read_lock(&proc_tree_lock);
++				de_put(de);
++				de = del_reader(&this);
++				if (ret < 0) {
++					read_unlock(&proc_tree_lock);
++					ret = 0;
++					goto out;
++				}
++				filp->f_pos++;
++			}
++done:
++#endif
++			read_unlock(&proc_tree_lock);
+ 	}
+ 	ret = 1;
+ out:	unlock_kernel();
+@@ -488,8 +697,10 @@ static struct file_operations proc_dir_o
+  */
+ static struct inode_operations proc_dir_inode_operations = {
+ 	.lookup		= proc_lookup,
++#ifndef CONFIG_VE
+ 	.getattr	= proc_getattr,
+ 	.setattr	= proc_notify_change,
++#endif
+ };
+ 
+ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
+@@ -499,10 +710,20 @@ static int proc_register(struct proc_dir
+ 	i = get_inode_number();
+ 	if (i == 0)
+ 		return -EAGAIN;
++
++	write_lock(&proc_tree_lock);
++	if (dir->deleted) {
++		write_unlock(&proc_tree_lock);
++		release_inode_number(i);
++		return -ENOENT;
++	}
++
+ 	dp->low_ino = i;
+ 	dp->next = dir->subdir;
+-	dp->parent = dir;
++	dp->parent = de_get(dir);
+ 	dir->subdir = dp;
++	write_unlock(&proc_tree_lock);
++
+ 	if (S_ISDIR(dp->mode)) {
+ 		if (dp->proc_iops == NULL) {
+ 			dp->proc_fops = &proc_dir_operations;
+@@ -556,24 +777,26 @@ static struct proc_dir_entry *proc_creat
+ 					  mode_t mode,
+ 					  nlink_t nlink)
+ {
+-	struct proc_dir_entry *ent = NULL;
++	struct proc_dir_entry *ent;
+ 	const char *fn = name;
+ 	int len;
+ 
+ 	/* make sure name is valid */
+-	if (!name || !strlen(name)) goto out;
++	if (!name || !strlen(name))
++		goto out;
+ 
+-	if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
++	if (xlate_proc_loc_name(name, parent, &fn) != 0)
+ 		goto out;
+ 
+ 	/* At this point there must not be any '/' characters beyond *fn */
+ 	if (strchr(fn, '/'))
+-		goto out;
++		goto out_put;
+ 
+ 	len = strlen(fn);
+ 
+ 	ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
+-	if (!ent) goto out;
++	if (!ent)
++		goto out_put;
+ 
+ 	memset(ent, 0, sizeof(struct proc_dir_entry));
+ 	memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
+@@ -581,8 +804,13 @@ static struct proc_dir_entry *proc_creat
+ 	ent->namelen = len;
+ 	ent->mode = mode;
+ 	ent->nlink = nlink;
+- out:
++	atomic_set(&ent->count, 1);
+ 	return ent;
++
++out_put:
++	de_put(*parent);
++out:
++	return NULL;
+ }
+ 
+ struct proc_dir_entry *proc_symlink(const char *name,
+@@ -606,6 +834,7 @@ struct proc_dir_entry *proc_symlink(cons
+ 			kfree(ent);
+ 			ent = NULL;
+ 		}
++		de_put(parent);
+ 	}
+ 	return ent;
+ }
+@@ -624,6 +853,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
+ 			kfree(ent);
+ 			ent = NULL;
+ 		}
++		de_put(parent);
+ 	}
+ 	return ent;
+ }
+@@ -662,9 +892,28 @@ struct proc_dir_entry *create_proc_entry
+ 			kfree(ent);
+ 			ent = NULL;
+ 		}
++		de_put(parent);
+ 	}
+ 	return ent;
+ }
++EXPORT_SYMBOL(remove_proc_glob_entry);
++
++struct proc_dir_entry *create_proc_glob_entry(const char *name, mode_t mode,
++		struct proc_dir_entry *parent)
++{
++	const char *path;
++	struct proc_dir_entry *ent;
++
++	path = name;
++	if (xlate_proc_name(path, &parent, &name) != 0)
++		return NULL;
++
++	ent = create_proc_entry(name, mode, parent);
++	de_put(parent);
++	return ent;
++}
++
++EXPORT_SYMBOL(create_proc_glob_entry);
+ 
+ void free_proc_entry(struct proc_dir_entry *de)
+ {
+@@ -684,20 +933,21 @@ void free_proc_entry(struct proc_dir_ent
+  * Remove a /proc entry and free it if it's not currently in use.
+  * If it is in use, we set the 'deleted' flag.
+  */
+-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
++static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+ {
+ 	struct proc_dir_entry **p;
+ 	struct proc_dir_entry *de;
+ 	const char *fn = name;
+ 	int len;
+ 
+-	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
+-		goto out;
+ 	len = strlen(fn);
++	write_lock(&proc_tree_lock);
+ 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
+ 		if (!proc_match(len, fn, *p))
+ 			continue;
++
+ 		de = *p;
++		notify_readers(de);
+ 		*p = de->next;
+ 		de->next = NULL;
+ 		if (S_ISDIR(de->mode))
+@@ -705,15 +955,43 @@ void remove_proc_entry(const char *name,
+ 		proc_kill_inodes(de);
+ 		de->nlink = 0;
+ 		WARN_ON(de->subdir);
+-		if (!atomic_read(&de->count))
+-			free_proc_entry(de);
+-		else {
+-			de->deleted = 1;
+-			printk("remove_proc_entry: %s/%s busy, count=%d\n",
+-				parent->name, de->name, atomic_read(&de->count));
+-		}
++		de->deleted = 1;
++		de_put(de);
++		de_put(parent);
+ 		break;
+ 	}
+-out:
+-	return;
++	write_unlock(&proc_tree_lock);
++}
++
++void remove_proc_loc_entry(const char *name, struct proc_dir_entry *parent)
++{
++	const char *path;
++
++	path = name;
++	if (xlate_proc_loc_name(path, &parent, &name) != 0)
++		return;
++
++	__remove_proc_entry(name, parent);
++	de_put(parent);
++}
++
++void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent)
++{
++	const char *path;
++
++	path = name;
++	if (xlate_proc_name(path, &parent, &name) != 0)
++		return;
++
++	__remove_proc_entry(name, parent);
++	de_put(parent);
++}
++
++void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
++{
++	remove_proc_loc_entry(name, parent);
++#ifdef CONFIG_VE
++	if (ve_is_super(get_exec_env()))
++		remove_proc_glob_entry(name, parent);
++#endif
+ }
+diff -upr linux-2.6.16.orig/fs/proc/inode.c linux-2.6.16-026test015/fs/proc/inode.c
+--- linux-2.6.16.orig/fs/proc/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/inode.c	2006-07-04 14:41:38.000000000 +0400
+@@ -8,6 +8,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
++#include <linux/ve_owner.h>
+ #include <linux/string.h>
+ #include <linux/stat.h>
+ #include <linux/file.h>
+@@ -21,34 +22,25 @@
+ 
+ #include "internal.h"
+ 
+-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
+-{
+-	if (de)
+-		atomic_inc(&de->count);
+-	return de;
+-}
+-
+ /*
+  * Decrements the use count and checks for deferred deletion.
+  */
+-static void de_put(struct proc_dir_entry *de)
++void de_put(struct proc_dir_entry *de)
+ {
+ 	if (de) {	
+-		lock_kernel();		
+ 		if (!atomic_read(&de->count)) {
+ 			printk("de_put: entry %s already free!\n", de->name);
+-			unlock_kernel();
+ 			return;
+ 		}
+ 
+ 		if (atomic_dec_and_test(&de->count)) {
+-			if (de->deleted) {
+-				printk("de_put: deferred delete of %s\n",
++			if (unlikely(!de->deleted)) {
++				printk("de_put: early delete of %s\n",
+ 					de->name);
+-				free_proc_entry(de);
++				return;
+ 			}
++			free_proc_entry(de);
+ 		}		
+-		unlock_kernel();
+ 	}
+ }
+ 
+@@ -68,12 +60,19 @@ static void proc_delete_inode(struct ino
+ 		put_task_struct(tsk);
+ 
+ 	/* Let go of any associated proc directory entry */
+-	de = PROC_I(inode)->pde;
++	de = LPDE(inode);
+ 	if (de) {
+ 		if (de->owner)
+ 			module_put(de->owner);
+ 		de_put(de);
+ 	}
++#ifdef CONFIG_VE
++	de = GPDE(inode);
++	if (de) {
++		module_put(de->owner);
++		de_put(de);
++	}
++#endif
+ 	clear_inode(inode);
+ }
+ 
+@@ -100,6 +99,9 @@ static struct inode *proc_alloc_inode(st
+ 	ei->pde = NULL;
+ 	inode = &ei->vfs_inode;
+ 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++#ifdef CONFIG_VE
++	GPDE(inode) = NULL;
++#endif
+ 	return inode;
+ }
+ 
+@@ -209,6 +211,12 @@ int proc_fill_super(struct super_block *
+ 	s->s_root = d_alloc_root(root_inode);
+ 	if (!s->s_root)
+ 		goto out_no_root;
++#ifdef CONFIG_VE
++	LPDE(root_inode) = de_get(get_exec_env()->proc_root);
++	GPDE(root_inode) = &proc_root;
++#else
++	LPDE(root_inode) = &proc_root;
++#endif
+ 	return 0;
+ 
+ out_no_root:
+diff -upr linux-2.6.16.orig/fs/proc/kmsg.c linux-2.6.16-026test015/fs/proc/kmsg.c
+--- linux-2.6.16.orig/fs/proc/kmsg.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/kmsg.c	2006-07-04 14:41:38.000000000 +0400
+@@ -11,6 +11,7 @@
+ #include <linux/kernel.h>
+ #include <linux/poll.h>
+ #include <linux/fs.h>
++#include <linux/veprintk.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/io.h>
+@@ -40,7 +41,7 @@ static ssize_t kmsg_read(struct file *fi
+ 
+ static unsigned int kmsg_poll(struct file *file, poll_table *wait)
+ {
+-	poll_wait(file, &log_wait, wait);
++	poll_wait(file, &ve_log_wait, wait);
+ 	if (do_syslog(9, NULL, 0))
+ 		return POLLIN | POLLRDNORM;
+ 	return 0;
+diff -upr linux-2.6.16.orig/fs/proc/proc_misc.c linux-2.6.16-026test015/fs/proc/proc_misc.c
+--- linux-2.6.16.orig/fs/proc/proc_misc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/proc_misc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -32,6 +32,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
+ #include <linux/slab.h>
++#include <linux/virtinfo.h>
+ #include <linux/smp.h>
+ #include <linux/signal.h>
+ #include <linux/module.h>
+@@ -45,6 +46,8 @@
+ #include <linux/jiffies.h>
+ #include <linux/sysrq.h>
+ #include <linux/vmalloc.h>
++#include <linux/version.h>
++#include <linux/compile.h>
+ #include <linux/crash_dump.h>
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -53,8 +56,10 @@
+ #include <asm/div64.h>
+ #include "internal.h"
+ 
+-#define LOAD_INT(x) ((x) >> FSHIFT)
+-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
++#ifdef CONFIG_FAIRSCHED
++#include <linux/fairsched.h>
++#endif
++
+ /*
+  * Warning: stuff below (imported functions) assumes that its output will fit
+  * into one page. For some of those functions it may be wrong. Moreover, we
+@@ -84,15 +89,33 @@ static int loadavg_read_proc(char *page,
+ {
+ 	int a, b, c;
+ 	int len;
+-
+-	a = avenrun[0] + (FIXED_1/200);
+-	b = avenrun[1] + (FIXED_1/200);
+-	c = avenrun[2] + (FIXED_1/200);
++	unsigned long __nr_running;
++	int __nr_threads;
++	unsigned long *__avenrun;
++	struct ve_struct *ve;
++
++	ve = get_exec_env();
++
++	if (ve_is_super(ve)) {
++		__avenrun = &avenrun[0];
++		__nr_running = nr_running();
++		__nr_threads = nr_threads;
++	} 
++#ifdef CONFIG_VE
++	else {
++		__avenrun = &ve->avenrun[0];
++		__nr_running = nr_running_ve(ve); 
++		__nr_threads = atomic_read(&ve->pcounter);
++	}
++#endif
++	a = __avenrun[0] + (FIXED_1/200);
++	b = __avenrun[1] + (FIXED_1/200);
++	c = __avenrun[2] + (FIXED_1/200);
+ 	len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+ 		LOAD_INT(a), LOAD_FRAC(a),
+ 		LOAD_INT(b), LOAD_FRAC(b),
+ 		LOAD_INT(c), LOAD_FRAC(c),
+-		nr_running(), nr_threads, last_pid);
++		__nr_running, __nr_threads, last_pid);
+ 	return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+ 
+@@ -105,6 +128,13 @@ static int uptime_read_proc(char *page, 
+ 	cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
+ 
+ 	do_posix_clock_monotonic_gettime(&uptime);
++#ifdef CONFIG_VE
++	if (!ve_is_super(get_exec_env())) {
++		set_normalized_timespec(&uptime,
++		      uptime.tv_sec - get_exec_env()->start_timespec.tv_sec,
++		      uptime.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
++	}
++#endif
+ 	cputime_to_timespec(idletime, &idle);
+ 	len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
+ 			(unsigned long) uptime.tv_sec,
+@@ -118,35 +148,37 @@ static int uptime_read_proc(char *page, 
+ static int meminfo_read_proc(char *page, char **start, off_t off,
+ 				 int count, int *eof, void *data)
+ {
+-	struct sysinfo i;
++	struct meminfo mi;
+ 	int len;
+-	struct page_state ps;
+-	unsigned long inactive;
+-	unsigned long active;
+-	unsigned long free;
+-	unsigned long committed;
+-	unsigned long allowed;
++	unsigned long dummy;
+ 	struct vmalloc_info vmi;
+-	long cached;
+ 
+-	get_page_state(&ps);
+-	get_zone_counts(&active, &inactive, &free);
++	get_page_state(&mi.ps);
++	get_zone_counts(&mi.active, &mi.inactive, &dummy);
+ 
+ /*
+  * display in kilobytes.
+  */
+ #define K(x) ((x) << (PAGE_SHIFT - 10))
+-	si_meminfo(&i);
+-	si_swapinfo(&i);
+-	committed = atomic_read(&vm_committed_space);
+-	allowed = ((totalram_pages - hugetlb_total_pages())
+-		* sysctl_overcommit_ratio / 100) + total_swap_pages;
++	si_meminfo(&mi.si);
++	si_swapinfo(&mi.si);
++	mi.committed_space = atomic_read(&vm_committed_space);
++	mi.swapcache = total_swapcache_pages;
++	mi.cache = get_page_cache_size() - mi.swapcache - mi.si.bufferram;
++	if (mi.cache < 0)
++		mi.cache = 0;
+ 
+-	cached = get_page_cache_size() - total_swapcache_pages - i.bufferram;
+-	if (cached < 0)
+-		cached = 0;
++	mi.vmalloc_total = (VMALLOC_END - VMALLOC_START) >> PAGE_SHIFT;
++	mi.allowed = ((totalram_pages - hugetlb_total_pages())
++		* sysctl_overcommit_ratio / 100) + total_swap_pages;
+ 
+ 	get_vmalloc_info(&vmi);
++	mi.vmalloc_used = vmi.used >> PAGE_SHIFT;
++	mi.vmalloc_largest = vmi.largest_chunk >> PAGE_SHIFT;
++
++	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
++			& NOTIFY_FAIL)
++		return -ENOMSG;
+ 
+ 	/*
+ 	 * Tagged format, for easy grepping and expansion.
+@@ -175,29 +207,29 @@ static int meminfo_read_proc(char *page,
+ 		"VmallocTotal: %8lu kB\n"
+ 		"VmallocUsed:  %8lu kB\n"
+ 		"VmallocChunk: %8lu kB\n",
+-		K(i.totalram),
+-		K(i.freeram),
+-		K(i.bufferram),
+-		K(cached),
+-		K(total_swapcache_pages),
+-		K(active),
+-		K(inactive),
+-		K(i.totalhigh),
+-		K(i.freehigh),
+-		K(i.totalram-i.totalhigh),
+-		K(i.freeram-i.freehigh),
+-		K(i.totalswap),
+-		K(i.freeswap),
+-		K(ps.nr_dirty),
+-		K(ps.nr_writeback),
+-		K(ps.nr_mapped),
+-		K(ps.nr_slab),
+-		K(allowed),
+-		K(committed),
+-		K(ps.nr_page_table_pages),
+-		(unsigned long)VMALLOC_TOTAL >> 10,
+-		vmi.used >> 10,
+-		vmi.largest_chunk >> 10
++		K(mi.si.totalram),
++		K(mi.si.freeram),
++		K(mi.si.bufferram),
++		K(mi.cache),
++		K(mi.swapcache),
++		K(mi.active),
++		K(mi.inactive),
++		K(mi.si.totalhigh),
++		K(mi.si.freehigh),
++		K(mi.si.totalram-mi.si.totalhigh),
++		K(mi.si.freeram-mi.si.freehigh),
++		K(mi.si.totalswap),
++		K(mi.si.freeswap),
++		K(mi.ps.nr_dirty),
++		K(mi.ps.nr_writeback),
++		K(mi.ps.nr_mapped),
++		K(mi.ps.nr_slab),
++		K(mi.allowed),
++		K(mi.committed_space),
++		K(mi.ps.nr_page_table_pages),
++		K(mi.vmalloc_total),
++		K(mi.vmalloc_used),
++		K(mi.vmalloc_largest)
+ 		);
+ 
+ 		len += hugetlb_report_meminfo(page + len);
+@@ -237,8 +269,15 @@ static int version_read_proc(char *page,
+ 				 int count, int *eof, void *data)
+ {
+ 	int len;
++	struct new_utsname *utsname = &ve_utsname;
+ 
+-	strcpy(page, linux_banner);
++	if (ve_is_super(get_exec_env()))
++		strcpy(page, linux_banner);
++	else
++		sprintf(page, "Linux version %s ("
++		      LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") ("
++		      LINUX_COMPILER ") %s\n",
++		      utsname->release, utsname->version);
+ 	len = strlen(page);
+ 	return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+@@ -249,144 +288,60 @@ static int cpuinfo_open(struct inode *in
+ 	return seq_open(file, &cpuinfo_op);
+ }
+ 
+-enum devinfo_states {
+-	CHR_HDR,
+-	CHR_LIST,
+-	BLK_HDR,
+-	BLK_LIST,
+-	DEVINFO_DONE
+-};
+-
+-struct devinfo_state {
+-	void *chrdev;
+-	void *blkdev;
+-	unsigned int num_records;
+-	unsigned int cur_record;
+-	enum devinfo_states state;
++static struct file_operations proc_cpuinfo_operations = {
++	.open		= cpuinfo_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= seq_release,
+ };
+ 
+-static void *devinfo_start(struct seq_file *f, loff_t *pos)
++static int devinfo_show(struct seq_file *f, void *v)
+ {
+-	struct devinfo_state *info = f->private;
++	int i = *(loff_t *) v;
+ 
+-	if (*pos) {
+-		if ((info) && (*pos <= info->num_records))
+-			return info;
+-		return NULL;
++	if (i < CHRDEV_MAJOR_HASH_SIZE) {
++		if (i == 0)
++			seq_printf(f, "Character devices:\n");
++		chrdev_show(f, i);
++	} else {
++		i -= CHRDEV_MAJOR_HASH_SIZE;
++		if (i == 0)
++			seq_printf(f, "\nBlock devices:\n");
++		blkdev_show(f, i);
+ 	}
+-	info = kmalloc(sizeof(*info), GFP_KERNEL);
+-	f->private = info;
+-	info->chrdev = acquire_chrdev_list();
+-	info->blkdev = acquire_blkdev_list();
+-	info->state = CHR_HDR;
+-	info->num_records = count_chrdev_list();
+-	info->num_records += count_blkdev_list();
+-	info->num_records += 2; /* Character and Block headers */
+-	*pos = 1;
+-	info->cur_record = *pos;
+-	return info;
++	return 0;
+ }
+ 
+-static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
++static void *devinfo_start(struct seq_file *f, loff_t *pos)
+ {
+-	int idummy;
+-	char *ndummy;
+-	struct devinfo_state *info = f->private;
+-
+-	switch (info->state) {
+-		case CHR_HDR:
+-			info->state = CHR_LIST;
+-			(*pos)++;
+-			/*fallthrough*/
+-		case CHR_LIST:
+-			if (get_chrdev_info(info->chrdev,&idummy,&ndummy)) {
+-				/*
+-				 * The character dev list is complete
+-				 */
+-				info->state = BLK_HDR;
+-			} else {
+-				info->chrdev = get_next_chrdev(info->chrdev);
+-			}
+-			(*pos)++;
+-			break;
+-		case BLK_HDR:
+-			info->state = BLK_LIST;
+-			(*pos)++;
+-			break;
+-		case BLK_LIST:
+-			if (get_blkdev_info(info->blkdev,&idummy,&ndummy)) {
+-				/*
+-				 * The block dev list is complete
+-				 */
+-				info->state = DEVINFO_DONE;
+-			} else {
+-				info->blkdev = get_next_blkdev(info->blkdev);
+-			}
+-			(*pos)++;
+-			break;
+-		case DEVINFO_DONE:
+-			(*pos)++;
+-			info->cur_record = *pos;
+-			info = NULL;
+-			break;
+-		default:
+-			break;
+-	}
+-	if (info)
+-		info->cur_record = *pos;
+-	return info;
++	if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
++		return pos;
++	return NULL;
+ }
+ 
+-static void devinfo_stop(struct seq_file *f, void *v)
++static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
+ {
+-	struct devinfo_state *info = f->private;
+-
+-	if (info) {
+-		release_chrdev_list(info->chrdev);
+-		release_blkdev_list(info->blkdev);
+-		f->private = NULL;
+-		kfree(info);
+-	}
++	(*pos)++;
++	if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
++		return NULL;
++	return pos;
+ }
+ 
+-static int devinfo_show(struct seq_file *f, void *arg)
+-{
+-	int major;
+-	char *name;
+-	struct devinfo_state *info = f->private;
+-
+-	switch(info->state) {
+-		case CHR_HDR:
+-			seq_printf(f,"Character devices:\n");
+-			/* fallthrough */
+-		case CHR_LIST:
+-			if (!get_chrdev_info(info->chrdev,&major,&name))
+-				seq_printf(f,"%3d %s\n",major,name);
+-			break;
+-		case BLK_HDR:
+-			seq_printf(f,"\nBlock devices:\n");
+-			/* fallthrough */
+-		case BLK_LIST:
+-			if (!get_blkdev_info(info->blkdev,&major,&name))
+-				seq_printf(f,"%3d %s\n",major,name);
+-			break;
+-		default:
+-			break;
+-	}
+-
+-	return 0;
++static void devinfo_stop(struct seq_file *f, void *v)
++{
++	/* Nothing to do */
+ }
+ 
+-static  struct seq_operations devinfo_op = {
+-	.start  = devinfo_start,
+-	.next   = devinfo_next,
+-	.stop   = devinfo_stop,
+-	.show   = devinfo_show,
++static struct seq_operations devinfo_ops = {
++	.start = devinfo_start,
++	.next  = devinfo_next,
++	.stop  = devinfo_stop,
++	.show  = devinfo_show
+ };
+ 
+-static int devinfo_open(struct inode *inode, struct file *file)
++static int devinfo_open(struct inode *inode, struct file *filp)
+ {
+-	return seq_open(file, &devinfo_op);
++	return seq_open(filp, &devinfo_ops);
+ }
+ 
+ static struct file_operations proc_devinfo_operations = {
+@@ -396,13 +351,6 @@ static struct file_operations proc_devin
+ 	.release	= seq_release,
+ };
+ 
+-static struct file_operations proc_cpuinfo_operations = {
+-	.open		= cpuinfo_open,
+-	.read		= seq_read,
+-	.llseek		= seq_lseek,
+-	.release	= seq_release,
+-};
+-
+ extern struct seq_operations vmstat_op;
+ static int vmstat_open(struct inode *inode, struct file *file)
+ {
+@@ -487,18 +435,15 @@ static struct file_operations proc_slabi
+ };
+ #endif
+ 
+-static int show_stat(struct seq_file *p, void *v)
++static void show_stat_ve0(struct seq_file *p)
+ {
+ 	int i;
+-	unsigned long jif;
++	struct page_state page_state;
+ 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
+ 	u64 sum = 0;
+ 
+ 	user = nice = system = idle = iowait =
+ 		irq = softirq = steal = cputime64_zero;
+-	jif = - wall_to_monotonic.tv_sec;
+-	if (wall_to_monotonic.tv_nsec)
+-		--jif;
+ 
+ 	for_each_cpu(i) {
+ 		int j;
+@@ -552,9 +497,84 @@ static int show_stat(struct seq_file *p,
+ 	for (i = 0; i < NR_IRQS; i++)
+ 		seq_printf(p, " %u", kstat_irqs(i));
+ #endif
++	get_full_page_state(&page_state);
++	seq_printf(p, "\nswap %lu %lu\n", page_state.pswpin, page_state.pswpout);
++}
++
++#ifdef CONFIG_VE
++static void show_stat_ve(struct seq_file *p, struct ve_struct *env)
++{
++	int i;
++	u64 user, nice, system;
++	cycles_t idle, iowait;
++	cpumask_t ve_cpus;
++
++	ve_cpu_online_map(env, &ve_cpus);
++
++	user = nice = system = idle = iowait = 0;
++	for_each_cpu_mask(i, ve_cpus) {
++		user += VE_CPU_STATS(env, i)->user;
++		nice += VE_CPU_STATS(env, i)->nice;
++		system += VE_CPU_STATS(env, i)->system;
++		idle += ve_sched_get_idle_time(env, i);
++		iowait += ve_sched_get_iowait_time(env, i);
++	}
++
++	seq_printf(p, "cpu  %llu %llu %llu %llu %llu 0 0 0\n",
++		(unsigned long long)cputime64_to_clock_t(user),
++		(unsigned long long)cputime64_to_clock_t(nice),
++		(unsigned long long)cputime64_to_clock_t(system),
++		(unsigned long long)cycles_to_clocks(idle),
++		(unsigned long long)cycles_to_clocks(iowait));
++
++	for_each_cpu_mask(i, ve_cpus) {
++		user = VE_CPU_STATS(env, i)->user;
++		nice = VE_CPU_STATS(env, i)->nice;
++		system = VE_CPU_STATS(env, i)->system;
++		idle = ve_sched_get_idle_time(env, i);
++		iowait = ve_sched_get_iowait_time(env, i);
++		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu 0 0 0\n",
++			i,
++			(unsigned long long)cputime64_to_clock_t(user),
++			(unsigned long long)cputime64_to_clock_t(nice),
++			(unsigned long long)cputime64_to_clock_t(system),
++			(unsigned long long)cycles_to_clocks(idle),
++			(unsigned long long)cycles_to_clocks(iowait));
++	}
++	seq_printf(p, "intr 0\nswap 0 0\n");
++}
++#endif
++
++int show_stat(struct seq_file *p, void *v)
++{
++	extern unsigned long total_forks;
++	unsigned long seq, jif;
++	struct ve_struct *env;
++	unsigned long __nr_running, __nr_iowait;
++ 
++	do {
++		seq = read_seqbegin(&xtime_lock);
++		jif = - wall_to_monotonic.tv_sec;
++		if (wall_to_monotonic.tv_nsec)
++			--jif;
++	} while (read_seqretry(&xtime_lock, seq));
++
++	env = get_exec_env();
++	if (ve_is_super(env)) {
++		show_stat_ve0(p);
++		__nr_running = nr_running();
++		__nr_iowait = nr_iowait();
++	}
++#ifdef CONFIG_VE
++	else {
++		show_stat_ve(p, env);
++		__nr_running = nr_running_ve(env);
++		__nr_iowait = nr_iowait_ve(env);
++	}
++#endif
+ 
+ 	seq_printf(p,
+-		"\nctxt %llu\n"
++		"ctxt %llu\n"
+ 		"btime %lu\n"
+ 		"processes %lu\n"
+ 		"procs_running %lu\n"
+@@ -562,8 +582,8 @@ static int show_stat(struct seq_file *p,
+ 		nr_context_switches(),
+ 		(unsigned long)jif,
+ 		total_forks,
+-		nr_running(),
+-		nr_iowait());
++		__nr_running,
++		__nr_iowait);
+ 
+ 	return 0;
+ }
+@@ -652,7 +672,8 @@ static int cmdline_read_proc(char *page,
+ {
+ 	int len;
+ 
+-	len = sprintf(page, "%s\n", saved_command_line);
++	len = sprintf(page, "%s\n",
++		ve_is_super(get_exec_env()) ? saved_command_line : "");
+ 	return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+ 
+diff -upr linux-2.6.16.orig/fs/proc/proc_tty.c linux-2.6.16-026test015/fs/proc/proc_tty.c
+--- linux-2.6.16.orig/fs/proc/proc_tty.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/proc_tty.c	2006-07-04 14:41:38.000000000 +0400
+@@ -6,6 +6,7 @@
+ 
+ #include <asm/uaccess.h>
+ 
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/errno.h>
+ #include <linux/time.h>
+@@ -106,24 +107,35 @@ static int show_tty_driver(struct seq_fi
+ /* iterator */
+ static void *t_start(struct seq_file *m, loff_t *pos)
+ {
+-	struct list_head *p;
++	struct tty_driver *drv;
++
+ 	loff_t l = *pos;
+-	list_for_each(p, &tty_drivers)
++	read_lock(&tty_driver_guard);
++	list_for_each_entry(drv, &tty_drivers, tty_drivers) {
++		if (!ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
++			continue;
+ 		if (!l--)
+-			return list_entry(p, struct tty_driver, tty_drivers);
++			return drv;
++	}
+ 	return NULL;
+ }
+ 
+ static void *t_next(struct seq_file *m, void *v, loff_t *pos)
+ {
+-	struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
++	struct tty_driver *drv;
++
+ 	(*pos)++;
+-	return p==&tty_drivers ? NULL :
+-			list_entry(p, struct tty_driver, tty_drivers);
++	drv = (struct tty_driver *)v;
++	list_for_each_entry_continue(drv, &tty_drivers, tty_drivers) {
++		if (ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
++			return drv;
++	}
++	return NULL;
+ }
+ 
+ static void t_stop(struct seq_file *m, void *v)
+ {
++	read_unlock(&tty_driver_guard);
+ }
+ 
+ static struct seq_operations tty_drivers_op = {
+diff -upr linux-2.6.16.orig/fs/proc/root.c linux-2.6.16-026test015/fs/proc/root.c
+--- linux-2.6.16.orig/fs/proc/root.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/root.c	2006-07-04 14:41:38.000000000 +0400
+@@ -20,7 +20,10 @@
+ 
+ #include "internal.h"
+ 
+-struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
++#ifndef CONFIG_VE
++struct proc_dir_entry *proc_net, *proc_net_stat;
++#endif
++struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
+ 
+ #ifdef CONFIG_SYSCTL
+ struct proc_dir_entry *proc_sys_root;
+@@ -32,12 +35,14 @@ static struct super_block *proc_get_sb(s
+ 	return get_sb_single(fs_type, flags, data, proc_fill_super);
+ }
+ 
+-static struct file_system_type proc_fs_type = {
++struct file_system_type proc_fs_type = {
+ 	.name		= "proc",
+ 	.get_sb		= proc_get_sb,
+ 	.kill_sb	= kill_anon_super,
+ };
+ 
++EXPORT_SYMBOL(proc_fs_type);
++
+ void __init proc_root_init(void)
+ {
+ 	int err = proc_init_inodecache();
+@@ -157,7 +162,9 @@ EXPORT_SYMBOL(create_proc_entry);
+ EXPORT_SYMBOL(remove_proc_entry);
+ EXPORT_SYMBOL(proc_root);
+ EXPORT_SYMBOL(proc_root_fs);
++#ifndef CONFIG_VE
+ EXPORT_SYMBOL(proc_net);
+ EXPORT_SYMBOL(proc_net_stat);
++#endif
+ EXPORT_SYMBOL(proc_bus);
+ EXPORT_SYMBOL(proc_root_driver);
+diff -upr linux-2.6.16.orig/fs/proc/task_mmu.c linux-2.6.16-026test015/fs/proc/task_mmu.c
+--- linux-2.6.16.orig/fs/proc/task_mmu.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/task_mmu.c	2006-07-04 14:41:38.000000000 +0400
+@@ -90,9 +90,12 @@ int proc_exe_link(struct inode *inode, s
+ 	}
+ 
+ 	if (vma) {
+-		*mnt = mntget(vma->vm_file->f_vfsmnt);
+-		*dentry = dget(vma->vm_file->f_dentry);
+-		result = 0;
++		result = d_root_check(vma->vm_file->f_dentry,
++				vma->vm_file->f_vfsmnt);
++		if (!result) {
++			*mnt = mntget(vma->vm_file->f_vfsmnt);
++			*dentry = dget(vma->vm_file->f_dentry);
++		}
+ 	}
+ 
+ 	up_read(&mm->mmap_sem);
+diff -upr linux-2.6.16.orig/fs/proc/task_nommu.c linux-2.6.16-026test015/fs/proc/task_nommu.c
+--- linux-2.6.16.orig/fs/proc/task_nommu.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/task_nommu.c	2006-07-04 14:41:38.000000000 +0400
+@@ -126,9 +126,12 @@ int proc_exe_link(struct inode *inode, s
+ 	}
+ 
+ 	if (vma) {
+-		*mnt = mntget(vma->vm_file->f_vfsmnt);
+-		*dentry = dget(vma->vm_file->f_dentry);
+-		result = 0;
++		result = d_root_check(vma->vm_file->f_dentry,
++				vma->vm_file->f_vfsmnt);
++		if (!result) {
++			*mnt = mntget(vma->vm_file->f_vfsmnt);
++			*dentry = dget(vma->vm_file->f_dentry);
++		}
+ 	}
+ 
+ 	up_read(&mm->mmap_sem);
+diff -upr linux-2.6.16.orig/fs/proc/vmcore.c linux-2.6.16-026test015/fs/proc/vmcore.c
+--- linux-2.6.16.orig/fs/proc/vmcore.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/proc/vmcore.c	2006-07-04 14:41:36.000000000 +0400
+@@ -103,8 +103,8 @@ static ssize_t read_vmcore(struct file *
+ 				size_t buflen, loff_t *fpos)
+ {
+ 	ssize_t acc = 0, tmp;
+-	size_t tsz, nr_bytes;
+-	u64 start;
++	size_t tsz;
++	u64 start, nr_bytes;
+ 	struct vmcore *curr_m = NULL;
+ 
+ 	if (buflen == 0 || *fpos >= vmcore_size)
+diff -upr linux-2.6.16.orig/fs/quota.c linux-2.6.16-026test015/fs/quota.c
+--- linux-2.6.16.orig/fs/quota.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/quota.c	2006-07-04 14:41:39.000000000 +0400
+@@ -81,11 +81,11 @@ static int generic_quotactl_valid(struct
+ 	if (cmd == Q_GETQUOTA) {
+ 		if (((type == USRQUOTA && current->euid != id) ||
+ 		     (type == GRPQUOTA && !in_egroup_p(id))) &&
+-		    !capable(CAP_SYS_ADMIN))
++		    !capable(CAP_VE_SYS_ADMIN))
+ 			return -EPERM;
+ 	}
+ 	else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
+-		if (!capable(CAP_SYS_ADMIN))
++		if (!capable(CAP_VE_SYS_ADMIN))
+ 			return -EPERM;
+ 
+ 	return 0;
+@@ -132,10 +132,10 @@ static int xqm_quotactl_valid(struct sup
+ 	if (cmd == Q_XGETQUOTA) {
+ 		if (((type == XQM_USRQUOTA && current->euid != id) ||
+ 		     (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
+-		     !capable(CAP_SYS_ADMIN))
++		     !capable(CAP_VE_SYS_ADMIN))
+ 			return -EPERM;
+ 	} else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
+-		if (!capable(CAP_SYS_ADMIN))
++		if (!capable(CAP_VE_SYS_ADMIN))
+ 			return -EPERM;
+ 	}
+ 
+@@ -216,7 +216,7 @@ restart:
+ 		sb->s_count++;
+ 		spin_unlock(&sb_lock);
+ 		down_read(&sb->s_umount);
+-		if (sb->s_root && sb->s_qcop->quota_sync)
++		if (sb->s_root && sb->s_qcop && sb->s_qcop->quota_sync)
+ 			quota_sync_sb(sb, type);
+ 		up_read(&sb->s_umount);
+ 		spin_lock(&sb_lock);
+@@ -337,6 +337,235 @@ static int do_quotactl(struct super_bloc
+ 	return 0;
+ }
+ 
++static struct super_block *quota_get_sb(const char __user *special)
++{
++	struct super_block *sb;
++	struct block_device *bdev;
++	char *tmp;
++
++	tmp = getname(special);
++	if (IS_ERR(tmp))
++		return (struct super_block *)tmp;
++	bdev = lookup_bdev(tmp, FMODE_QUOTACTL);
++	putname(tmp);
++	if (IS_ERR(bdev))
++		return (struct super_block *)bdev;
++	sb = get_super(bdev);
++	bdput(bdev);
++	if (!sb)
++		return ERR_PTR(-ENODEV);
++	return sb;
++}
++
++#ifdef CONFIG_QUOTA_COMPAT
++
++#define QC_QUOTAON  0x0100	/* enable quotas */
++#define QC_QUOTAOFF 0x0200	/* disable quotas */
++/* GETQUOTA, SETQUOTA and SETUSE which were at 0x0300-0x0500 has now other parameteres */
++#define QC_SYNC     0x0600	/* sync disk copy of a filesystems quotas */
++#define QC_SETQLIM  0x0700	/* set limits */
++/* GETSTATS at 0x0800 is now longer... */
++#define QC_GETINFO  0x0900	/* get info about quotas - graces, flags... */
++#define QC_SETINFO  0x0A00	/* set info about quotas */
++#define QC_SETGRACE 0x0B00	/* set inode and block grace */
++#define QC_SETFLAGS 0x0C00	/* set flags for quota */
++#define QC_GETQUOTA 0x0D00	/* get limits and usage */
++#define QC_SETQUOTA 0x0E00	/* set limits and usage */
++#define QC_SETUSE   0x0F00	/* set usage */
++/* 0x1000 used by old RSQUASH */
++#define QC_GETSTATS 0x1100	/* get collected stats */
++
++struct compat_dqblk {
++	unsigned int dqb_ihardlimit;
++	unsigned int dqb_isoftlimit;
++	unsigned int dqb_curinodes;
++	unsigned int dqb_bhardlimit;
++	unsigned int dqb_bsoftlimit;
++	qsize_t dqb_curspace;
++	__kernel_time_t dqb_btime;
++	__kernel_time_t dqb_itime;
++};
++
++struct compat_dqinfo {
++	unsigned int dqi_bgrace;
++	unsigned int dqi_igrace;
++	unsigned int dqi_flags;
++	unsigned int dqi_blocks;
++	unsigned int dqi_free_blk;
++	unsigned int dqi_free_entry;
++};
++
++struct compat_dqstats {
++	__u32 lookups;
++	__u32 drops;
++	__u32 reads;
++	__u32 writes;
++	__u32 cache_hits;
++	__u32 allocated_dquots;
++	__u32 free_dquots;
++	__u32 syncs;
++	__u32 version;
++};
++
++asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr);
++static long compat_quotactl(unsigned int cmds, unsigned int type,
++		const char __user *special, qid_t id,
++		void __user *addr)
++{
++	struct super_block *sb;
++	long ret;
++
++	sb = NULL;
++	switch (cmds) {
++		case QC_QUOTAON:
++			return sys_quotactl(QCMD(Q_QUOTAON, type),
++					special, id, addr);
++
++		case QC_QUOTAOFF:
++			return sys_quotactl(QCMD(Q_QUOTAOFF, type),
++					special, id, addr);
++
++		case QC_SYNC:
++			return sys_quotactl(QCMD(Q_SYNC, type),
++					special, id, addr);
++
++		case QC_GETQUOTA: {
++			struct if_dqblk idq;
++			struct compat_dqblk cdq;
++
++			sb = quota_get_sb(special);
++			ret = PTR_ERR(sb);
++			if (IS_ERR(sb))
++				break;
++			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
++			if (ret)
++				break;
++			ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
++			if (ret)
++				break;
++			cdq.dqb_ihardlimit = idq.dqb_ihardlimit;
++			cdq.dqb_isoftlimit = idq.dqb_isoftlimit;
++			cdq.dqb_curinodes = idq.dqb_curinodes;
++			cdq.dqb_bhardlimit = idq.dqb_bhardlimit;
++			cdq.dqb_bsoftlimit = idq.dqb_bsoftlimit;
++			cdq.dqb_curspace = idq.dqb_curspace;
++			cdq.dqb_btime = idq.dqb_btime;
++			cdq.dqb_itime = idq.dqb_itime;
++			ret = 0;
++			if (copy_to_user(addr, &cdq, sizeof(cdq)))
++				ret = -EFAULT;
++			break;
++		}
++
++		case QC_SETQUOTA:
++		case QC_SETUSE:
++		case QC_SETQLIM: {
++			struct if_dqblk idq;
++			struct compat_dqblk cdq;
++
++			sb = quota_get_sb(special);
++			ret = PTR_ERR(sb);
++			if (IS_ERR(sb))
++				break;
++			ret = check_quotactl_valid(sb, type, Q_SETQUOTA, id);
++			if (ret)
++				break;
++			ret = -EFAULT;
++			if (copy_from_user(&cdq, addr, sizeof(cdq)))
++				break;
++			idq.dqb_ihardlimit = cdq.dqb_ihardlimit;
++			idq.dqb_isoftlimit = cdq.dqb_isoftlimit;
++			idq.dqb_curinodes = cdq.dqb_curinodes;
++			idq.dqb_bhardlimit = cdq.dqb_bhardlimit;
++			idq.dqb_bsoftlimit = cdq.dqb_bsoftlimit;
++			idq.dqb_curspace = cdq.dqb_curspace;
++			idq.dqb_valid = 0;
++			if (cmds == QC_SETQUOTA || cmds == QC_SETQLIM)
++				idq.dqb_valid |= QIF_LIMITS;
++			if (cmds == QC_SETQUOTA || cmds == QC_SETUSE)
++				idq.dqb_valid |= QIF_USAGE;
++			ret = sb->s_qcop->set_dqblk(sb, type, id, &idq);
++			break;
++		}
++
++		case QC_GETINFO: {
++			struct if_dqinfo iinf;
++			struct compat_dqinfo cinf;
++
++			sb = quota_get_sb(special);
++			ret = PTR_ERR(sb);
++			if (IS_ERR(sb))
++				break;
++			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
++			if (ret)
++				break;
++			ret = sb->s_qcop->get_info(sb, type, &iinf);
++			if (ret)
++				break;
++			cinf.dqi_bgrace = iinf.dqi_bgrace;
++			cinf.dqi_igrace = iinf.dqi_igrace;
++			cinf.dqi_flags = 0;
++			if (iinf.dqi_flags & DQF_INFO_DIRTY)
++				cinf.dqi_flags |= 0x0010;
++			cinf.dqi_blocks = 0;
++			cinf.dqi_free_blk = 0;
++			cinf.dqi_free_entry = 0;
++			ret = 0;
++			if (copy_to_user(addr, &cinf, sizeof(cinf)))
++				ret = -EFAULT;
++			break;
++		}
++
++		case QC_SETINFO:
++		case QC_SETGRACE:
++		case QC_SETFLAGS: {
++			struct if_dqinfo iinf;
++			struct compat_dqinfo cinf;
++
++			sb = quota_get_sb(special);
++			ret = PTR_ERR(sb);
++			if (IS_ERR(sb))
++				break;
++			ret = check_quotactl_valid(sb, type, Q_SETINFO, id);
++			if (ret)
++				break;
++			ret = -EFAULT;
++			if (copy_from_user(&cinf, addr, sizeof(cinf)))
++				break;
++			iinf.dqi_bgrace = cinf.dqi_bgrace;
++			iinf.dqi_igrace = cinf.dqi_igrace;
++			iinf.dqi_flags = cinf.dqi_flags;
++			iinf.dqi_valid = 0;
++			if (cmds == QC_SETINFO || cmds == QC_SETGRACE)
++				iinf.dqi_valid |= IIF_BGRACE | IIF_IGRACE;
++			if (cmds == QC_SETINFO || cmds == QC_SETFLAGS)
++				iinf.dqi_valid |= IIF_FLAGS;
++			ret = sb->s_qcop->set_info(sb, type, &iinf);
++			break;
++		}
++
++		case QC_GETSTATS: {
++			struct compat_dqstats stat;
++
++			memset(&stat, 0, sizeof(stat));
++			stat.version = 6*10000+5*100+0;
++			ret = 0;
++			if (copy_to_user(addr, &stat, sizeof(stat)))
++				ret = -EFAULT;
++			break;
++		}
++
++		default:
++			ret = -ENOSYS;
++			break;
++	}
++	if (sb && !IS_ERR(sb))
++		drop_super(sb);
++	return ret;
++}
++
++#endif
++
+ /*
+  * This is the system call interface. This communicates with
+  * the user-level programs. Currently this only supports diskquota
+@@ -347,25 +576,20 @@ asmlinkage long sys_quotactl(unsigned in
+ {
+ 	uint cmds, type;
+ 	struct super_block *sb = NULL;
+-	struct block_device *bdev;
+-	char *tmp;
+ 	int ret;
+ 
+ 	cmds = cmd >> SUBCMDSHIFT;
+ 	type = cmd & SUBCMDMASK;
+ 
++#ifdef CONFIG_QUOTA_COMPAT
++	if (cmds >= 0x0100 && cmds < 0x3000)
++		return compat_quotactl(cmds, type, special, id, addr);
++#endif
++
+ 	if (cmds != Q_SYNC || special) {
+-		tmp = getname(special);
+-		if (IS_ERR(tmp))
+-			return PTR_ERR(tmp);
+-		bdev = lookup_bdev(tmp);
+-		putname(tmp);
+-		if (IS_ERR(bdev))
+-			return PTR_ERR(bdev);
+-		sb = get_super(bdev);
+-		bdput(bdev);
+-		if (!sb)
+-			return -ENODEV;
++		sb = quota_get_sb(special);
++		if (IS_ERR(sb))
++			return PTR_ERR(sb);
+ 	}
+ 
+ 	ret = check_quotactl_valid(sb, type, cmds, id);
+diff -upr linux-2.6.16.orig/fs/reiserfs/namei.c linux-2.6.16-026test015/fs/reiserfs/namei.c
+--- linux-2.6.16.orig/fs/reiserfs/namei.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/reiserfs/namei.c	2006-07-04 14:41:39.000000000 +0400
+@@ -864,6 +864,9 @@ static int reiserfs_rmdir(struct inode *
+ 	INITIALIZE_PATH(path);
+ 	struct reiserfs_dir_entry de;
+ 
++	inode = dentry->d_inode;
++	DQUOT_INIT(inode);
++
+ 	/* we will be doing 2 balancings and update 2 stat data, we change quotas
+ 	 * of the owner of the directory and of the owner of the parent directory.
+ 	 * The quota structure is possibly deleted only on last iput => outside
+@@ -888,8 +891,6 @@ static int reiserfs_rmdir(struct inode *
+ 		goto end_rmdir;
+ 	}
+ 
+-	inode = dentry->d_inode;
+-
+ 	reiserfs_update_inode_transaction(inode);
+ 	reiserfs_update_inode_transaction(dir);
+ 
+@@ -952,6 +953,7 @@ static int reiserfs_unlink(struct inode 
+ 	unsigned long savelink;
+ 
+ 	inode = dentry->d_inode;
++	DQUOT_INIT(inode);
+ 
+ 	/* in this transaction we can be doing at max two balancings and update
+ 	 * two stat datas, we change quotas of the owner of the directory and of
+@@ -1259,6 +1261,8 @@ static int reiserfs_rename(struct inode 
+ 
+ 	old_inode = old_dentry->d_inode;
+ 	new_dentry_inode = new_dentry->d_inode;
++	if (new_dentry_inode)
++		DQUOT_INIT(new_dentry_inode);
+ 
+ 	// make sure, that oldname still exists and points to an object we
+ 	// are going to rename
+diff -upr linux-2.6.16.orig/fs/reiserfs/xattr.c linux-2.6.16-026test015/fs/reiserfs/xattr.c
+--- linux-2.6.16.orig/fs/reiserfs/xattr.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/reiserfs/xattr.c	2006-07-04 14:41:37.000000000 +0400
+@@ -1343,7 +1343,8 @@ static int reiserfs_check_acl(struct ino
+ 	return error;
+ }
+ 
+-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
++int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+ 	/*
+ 	 * We don't do permission checks on the internal objects.
+@@ -1356,7 +1357,7 @@ int reiserfs_permission(struct inode *in
+ 	 * Stat data v1 doesn't support ACLs.
+ 	 */
+ 	if (get_inode_sd_version(inode) == STAT_DATA_V1)
+-		return generic_permission(inode, mask, NULL);
++		return generic_permission(inode, mask, NULL, perm);
+ 	else
+-		return generic_permission(inode, mask, reiserfs_check_acl);
++		return generic_permission(inode, mask, reiserfs_check_acl, perm);
+ }
+diff -upr linux-2.6.16.orig/fs/reiserfs/xattr_acl.c linux-2.6.16-026test015/fs/reiserfs/xattr_acl.c
+--- linux-2.6.16.orig/fs/reiserfs/xattr_acl.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/reiserfs/xattr_acl.c	2006-07-04 14:41:36.000000000 +0400
+@@ -408,8 +408,9 @@ int reiserfs_cache_default_acl(struct in
+ 		acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
+ 		reiserfs_read_unlock_xattrs(inode->i_sb);
+ 		reiserfs_read_unlock_xattr_i(inode);
+-		ret = acl ? 1 : 0;
+-		posix_acl_release(acl);
++		ret = (acl && !IS_ERR(acl));
++		if (ret)
++			posix_acl_release(acl);
+ 	}
+ 
+ 	return ret;
+diff -upr linux-2.6.16.orig/fs/select.c linux-2.6.16-026test015/fs/select.c
+--- linux-2.6.16.orig/fs/select.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/select.c	2006-07-04 14:41:37.000000000 +0400
+@@ -24,6 +24,8 @@
+ #include <linux/fs.h>
+ #include <linux/rcupdate.h>
+ 
++#include <ub/ub_mem.h>
++
+ #include <asm/uaccess.h>
+ 
+ #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
+@@ -286,7 +288,7 @@ int do_select(int n, fd_set_bits *fds, s
+ 
+ static void *select_bits_alloc(int size)
+ {
+-	return kmalloc(6 * size, GFP_KERNEL);
++	return ub_kmalloc(6 * size, GFP_KERNEL);
+ }
+ 
+ static void select_bits_free(void *bits, int size)
+@@ -645,7 +647,7 @@ int do_sys_poll(struct pollfd __user *uf
+ 	err = -ENOMEM;
+ 	while(i!=0) {
+ 		struct poll_list *pp;
+-		pp = kmalloc(sizeof(struct poll_list)+
++		pp = ub_kmalloc(sizeof(struct poll_list)+
+ 				sizeof(struct pollfd)*
+ 				(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
+ 					GFP_KERNEL);
+diff -upr linux-2.6.16.orig/fs/seq_file.c linux-2.6.16-026test015/fs/seq_file.c
+--- linux-2.6.16.orig/fs/seq_file.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/seq_file.c	2006-07-04 14:41:38.000000000 +0400
+@@ -345,6 +345,8 @@ int seq_path(struct seq_file *m,
+ 	if (m->count < m->size) {
+ 		char *s = m->buf + m->count;
+ 		char *p = d_path(dentry, mnt, s, m->size - m->count);
++		if (IS_ERR(p) && PTR_ERR(p) != -ENAMETOOLONG)
++			return 0;
+ 		if (!IS_ERR(p)) {
+ 			while (s <= p) {
+ 				char c = *p++;
+diff -upr linux-2.6.16.orig/fs/simfs.c linux-2.6.16-026test015/fs/simfs.c
+--- linux-2.6.16.orig/fs/simfs.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/fs/simfs.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,290 @@
++/*
++ *  fs/simfs.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/init.h>
++#include <linux/namei.h>
++#include <linux/err.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/vzquota.h>
++#include <linux/statfs.h>
++#include <linux/virtinfo.h>
++#include <linux/faudit.h>
++#include <linux/genhd.h>
++
++#include <asm/unistd.h>
++#include <asm/uaccess.h>
++
++#define SIMFS_GET_LOWER_FS_SB(sb) sb->s_root->d_sb
++
++static struct super_operations sim_super_ops;
++
++static int sim_getattr(struct vfsmount *mnt, struct dentry *dentry,
++		struct kstat *stat)
++{
++	struct super_block *sb;
++	struct inode *inode;
++
++	inode = dentry->d_inode;
++	if (!inode->i_op->getattr) {
++		generic_fillattr(inode, stat);
++		if (!stat->blksize) {
++			unsigned blocks;
++
++			sb = inode->i_sb;
++			blocks = (stat->size + sb->s_blocksize-1) >>
++				sb->s_blocksize_bits;
++			stat->blocks = (sb->s_blocksize / 512) * blocks;
++			stat->blksize = sb->s_blocksize;
++		}
++	} else {
++		int err;
++
++		err = inode->i_op->getattr(mnt, dentry, stat);
++		if (err)
++			return err;
++	}
++
++	sb = mnt->mnt_sb;
++	if (sb->s_op == &sim_super_ops)
++		stat->dev = sb->s_dev;
++	return 0;
++}
++
++static void quota_get_stat(struct super_block *sb, struct kstatfs *buf)
++{
++	int err;
++	struct dq_stat qstat;
++	struct virt_info_quota q;
++	long free_file, adj_file;
++	s64 blk, free_blk, adj_blk;
++	int bsize_bits;
++
++	q.super = sb;
++	q.qstat = &qstat;
++	err = virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_GETSTAT, &q);
++	if (err != NOTIFY_OK)
++		return;
++
++	bsize_bits = ffs(buf->f_bsize) - 1;
++	free_blk = (s64)(qstat.bsoftlimit - qstat.bcurrent) >> bsize_bits;
++	if (free_blk < 0)
++		free_blk = 0;
++	/*
++	 * In the regular case, we always set buf->f_bfree and buf->f_blocks to
++	 * the values reported by quota.  In case of real disk space shortage,
++	 * we adjust the values.  We want this adjustment to look as if the
++	 * total disk space were reduced, not as if the usage were increased.
++	 *    -- SAW
++	 */
++	adj_blk = 0;
++	if (buf->f_bfree < free_blk)
++		adj_blk = free_blk - buf->f_bfree;
++	buf->f_bfree = (long)(free_blk - adj_blk);
++
++	if (free_blk < buf->f_bavail)
++		buf->f_bavail = (long)free_blk; /* min(f_bavail, free_blk) */
++
++	blk = (qstat.bsoftlimit >> bsize_bits) - adj_blk;
++	buf->f_blocks = blk > LONG_MAX ? LONG_MAX : blk;
++
++	free_file = qstat.isoftlimit - qstat.icurrent;
++	if (free_file < 0)
++		free_file = 0;
++	if (buf->f_ffree == -1)
++		/*
++		 * One filesystem uses -1 to represent the fact that it doesn't
++		 * have a detached limit for inode number.
++		 * May be, because -1 is a good pretendent for the maximum value
++		 * of signed long type, may be, because it's just nice to have
++		 * an exceptional case...  Guess what that filesystem is :-)
++		 *    -- SAW
++		 */
++		buf->f_ffree = free_file;
++	adj_file = 0;
++	if (buf->f_ffree < free_file)
++		adj_file = free_file - buf->f_ffree;
++	buf->f_ffree = free_file - adj_file;
++	buf->f_files = qstat.isoftlimit - adj_file;
++}
++
++static int sim_statfs(struct super_block *sb, struct kstatfs *buf)
++{
++	int err;
++	struct super_block *lsb;
++	struct kstatfs statbuf;
++
++	err = 0;
++	if (sb->s_op != &sim_super_ops)
++		return 0;
++
++	lsb = SIMFS_GET_LOWER_FS_SB(sb);
++
++	err = -ENOSYS;
++	if (lsb && lsb->s_op && lsb->s_op->statfs)
++		err = lsb->s_op->statfs(lsb, &statbuf);
++	if (err)
++		return err;
++
++	quota_get_stat(sb, &statbuf);
++
++	buf->f_files    = statbuf.f_files;
++	buf->f_ffree    = statbuf.f_ffree;
++	buf->f_blocks   = statbuf.f_blocks;
++	buf->f_bfree    = statbuf.f_bfree;
++	buf->f_bavail   = statbuf.f_bavail;
++	return 0;
++}
++
++static int sim_systemcall(struct vnotifier_block *me, unsigned long n,
++		void *d, int old_ret)
++{
++	int err;
++
++	switch (n) {
++	case VIRTINFO_FAUDIT_STAT: {
++		struct faudit_stat_arg *arg;
++
++		arg = (struct faudit_stat_arg *)d;
++		err = sim_getattr(arg->mnt, arg->dentry, arg->stat);
++		arg->err = err;
++		}
++		break;
++	case VIRTINFO_FAUDIT_STATFS: {
++		struct faudit_statfs_arg *arg;
++
++		arg = (struct faudit_statfs_arg *)d;
++		err = sim_statfs(arg->sb, arg->stat);
++		arg->err = err;
++		}
++		break;
++	default:
++		return old_ret;
++	}
++	return (err ? NOTIFY_BAD : NOTIFY_OK);
++}
++
++static struct inode *sim_quota_root(struct super_block *sb)
++{
++	return sb->s_root->d_inode;
++}
++
++void sim_put_super(struct super_block *sb)
++{
++	struct virt_info_quota viq;
++
++	viq.super = sb;
++	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_OFF, &viq);
++	bdput(sb->s_bdev);
++}
++
++static struct super_operations sim_super_ops = {
++	.get_quota_root	= sim_quota_root,
++	.put_super = sim_put_super,
++};
++
++static int sim_fill_super(struct super_block *s, void *data)
++{
++	int err;
++	struct nameidata *nd;
++
++	err = set_anon_super(s, NULL);
++	if (err)
++		goto out;
++
++	err = 0;
++	nd = (struct nameidata *)data;
++	s->s_root = dget(nd->dentry);
++	s->s_op = &sim_super_ops;
++out:
++	return err;
++}
++
++struct super_block *sim_get_sb(struct file_system_type *type,
++		int flags, const char *dev_name, void *opt)
++{
++	int err;
++	struct nameidata nd;
++	struct super_block *sb;
++	struct block_device *bd;
++	struct virt_info_quota viq;
++	static struct hd_struct fake_hds;
++
++	sb = ERR_PTR(-EINVAL);
++	if (opt == NULL)
++		goto out;
++
++	err = path_lookup(opt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
++	sb = ERR_PTR(err);
++	if (err)
++		goto out;
++
++	sb = sget(type, NULL, sim_fill_super, &nd);
++	if (IS_ERR(sb))
++		goto out_path;
++
++	bd = bdget(sb->s_dev);
++	if (!bd)
++		goto out_killsb;
++
++	sb->s_bdev = bd;
++	bd->bd_part = &fake_hds;
++	viq.super = sb;
++	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_ON, &viq);
++out_path:
++	path_release(&nd);
++out:
++	return sb;
++
++out_killsb:
++	up_write(&sb->s_umount);
++	deactivate_super(sb);
++	sb = ERR_PTR(-ENODEV);
++	goto out_path;
++}
++
++static struct file_system_type sim_fs_type = {
++	.owner		= THIS_MODULE,
++	.name		= "simfs",
++	.get_sb		= sim_get_sb,
++	.kill_sb	= kill_anon_super,
++};
++
++static struct vnotifier_block sim_syscalls = {
++	.notifier_call = sim_systemcall,
++};
++
++static int __init init_simfs(void)
++{
++	int err;
++
++	err = register_filesystem(&sim_fs_type);
++	if (err)
++		return err;
++
++	virtinfo_notifier_register(VITYPE_FAUDIT, &sim_syscalls);
++	return 0;
++}
++
++static void __exit exit_simfs(void)
++{
++	virtinfo_notifier_unregister(VITYPE_FAUDIT, &sim_syscalls);
++	unregister_filesystem(&sim_fs_type);
++}
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Open Virtuozzo Simulation of File System");
++MODULE_LICENSE("GPL v2");
++
++module_init(init_simfs);
++module_exit(exit_simfs);
+diff -upr linux-2.6.16.orig/fs/smbfs/dir.c linux-2.6.16-026test015/fs/smbfs/dir.c
+--- linux-2.6.16.orig/fs/smbfs/dir.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/smbfs/dir.c	2006-07-04 14:41:36.000000000 +0400
+@@ -434,6 +434,11 @@ smb_lookup(struct inode *dir, struct den
+ 	if (dentry->d_name.len > SMB_MAXNAMELEN)
+ 		goto out;
+ 
++	/* Do not allow lookup of names with backslashes in */
++	error = -EINVAL;
++	if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
++		goto out;
++
+ 	lock_kernel();
+ 	error = smb_proc_getattr(dentry, &finfo);
+ #ifdef SMBFS_PARANOIA
+diff -upr linux-2.6.16.orig/fs/smbfs/file.c linux-2.6.16-026test015/fs/smbfs/file.c
+--- linux-2.6.16.orig/fs/smbfs/file.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/smbfs/file.c	2006-07-04 14:41:37.000000000 +0400
+@@ -387,7 +387,8 @@ smb_file_release(struct inode *inode, st
+  * privileges, so we need our own check for this.
+  */
+ static int
+-smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
++smb_file_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+ 	int mode = inode->i_mode;
+ 	int error = 0;
+diff -upr linux-2.6.16.orig/fs/smbfs/inode.c linux-2.6.16-026test015/fs/smbfs/inode.c
+--- linux-2.6.16.orig/fs/smbfs/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/smbfs/inode.c	2006-07-04 14:41:37.000000000 +0400
+@@ -233,7 +233,7 @@ smb_invalidate_inodes(struct smb_sb_info
+ {
+ 	VERBOSE("\n");
+ 	shrink_dcache_sb(SB_of(server));
+-	invalidate_inodes(SB_of(server));
++	invalidate_inodes(SB_of(server), 0);
+ }
+ 
+ /*
+diff -upr linux-2.6.16.orig/fs/smbfs/request.c linux-2.6.16-026test015/fs/smbfs/request.c
+--- linux-2.6.16.orig/fs/smbfs/request.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/smbfs/request.c	2006-07-04 14:41:36.000000000 +0400
+@@ -339,9 +339,11 @@ int smb_add_request(struct smb_request *
+ 		/*
+ 		 * On timeout or on interrupt we want to try and remove the
+ 		 * request from the recvq/xmitq.
++		 * First check if the request is still part of a queue. (May
++		 * have been removed by some error condition)
+ 		 */
+ 		smb_lock_server(server);
+-		if (!(req->rq_flags & SMB_REQ_RECEIVED)) {
++		if (!list_empty(&req->rq_queue)) {
+ 			list_del_init(&req->rq_queue);
+ 			smb_rput(req);
+ 		}
+diff -upr linux-2.6.16.orig/fs/stat.c linux-2.6.16-026test015/fs/stat.c
+--- linux-2.6.16.orig/fs/stat.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/stat.c	2006-07-04 14:41:39.000000000 +0400
+@@ -15,6 +15,7 @@
+ #include <linux/namei.h>
+ #include <linux/security.h>
+ #include <linux/syscalls.h>
++#include <linux/faudit.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -42,11 +43,19 @@ int vfs_getattr(struct vfsmount *mnt, st
+ {
+ 	struct inode *inode = dentry->d_inode;
+ 	int retval;
++	struct faudit_stat_arg arg;
+ 
+ 	retval = security_inode_getattr(mnt, dentry);
+ 	if (retval)
+ 		return retval;
+ 
++	arg.mnt = mnt;
++	arg.dentry = dentry;
++	arg.stat = stat;
++	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT, &arg)
++			!= NOTIFY_DONE)
++		return arg.err;
++
+ 	if (inode->i_op->getattr)
+ 		return inode->i_op->getattr(mnt, dentry, stat);
+ 
+diff -upr linux-2.6.16.orig/fs/super.c linux-2.6.16-026test015/fs/super.c
+--- linux-2.6.16.orig/fs/super.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/super.c	2006-07-04 14:41:38.000000000 +0400
+@@ -23,6 +23,7 @@
+ #include <linux/config.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/smp_lock.h>
+ #include <linux/acct.h>
+@@ -231,13 +232,13 @@ void generic_shutdown_super(struct super
+ 	if (root) {
+ 		sb->s_root = NULL;
+ 		shrink_dcache_parent(root);
+-		shrink_dcache_anon(&sb->s_anon);
++		shrink_dcache_anon(sb);
+ 		dput(root);
+ 		fsync_super(sb);
+ 		lock_super(sb);
+ 		sb->s_flags &= ~MS_ACTIVE;
+ 		/* bad name - it should be evict_inodes() */
+-		invalidate_inodes(sb);
++		invalidate_inodes(sb, 0);
+ 		lock_kernel();
+ 
+ 		if (sop->write_super && sb->s_dirt)
+@@ -246,7 +247,7 @@ void generic_shutdown_super(struct super
+ 			sop->put_super(sb);
+ 
+ 		/* Forget any remaining inodes */
+-		if (invalidate_inodes(sb)) {
++		if (invalidate_inodes(sb, 1)) {
+ 			printk("VFS: Busy inodes after unmount of %s. "
+ 			   "Self-destruct in 5 seconds.  Have a nice day...\n",
+ 			   sb->s_id);
+@@ -481,11 +482,20 @@ asmlinkage long sys_ustat(unsigned dev, 
+         struct super_block *s;
+         struct ustat tmp;
+         struct kstatfs sbuf;
+-	int err = -EINVAL;
++	dev_t kdev;
++	int err;
++
++	kdev = new_decode_dev(dev);
++#ifdef CONFIG_VE
++	err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
++	if (err)
++		goto out;
++#endif
+ 
+-        s = user_get_super(new_decode_dev(dev));
+-        if (s == NULL)
+-                goto out;
++	err = -EINVAL;
++	s = user_get_super(kdev);
++	if (s == NULL)
++		goto out;
+ 	err = vfs_statfs(s, &sbuf);
+ 	drop_super(s);
+ 	if (err)
+@@ -599,6 +609,13 @@ void emergency_remount(void)
+ static struct idr unnamed_dev_idr;
+ static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
+ 
++/* for compatibility with coreutils still unaware of new minor sizes */
++int unnamed_dev_majors[] = {
++	0, 144, 145, 146, 242, 243, 244, 245,
++	246, 247, 248, 249, 250, 251, 252, 253
++};
++EXPORT_SYMBOL(unnamed_dev_majors);
++
+ int set_anon_super(struct super_block *s, void *data)
+ {
+ 	int dev;
+@@ -616,13 +633,13 @@ int set_anon_super(struct super_block *s
+ 	else if (error)
+ 		return -EAGAIN;
+ 
+-	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
++	if ((dev & MAX_ID_MASK) >= (1 << MINORBITS)) {
+ 		spin_lock(&unnamed_dev_lock);
+ 		idr_remove(&unnamed_dev_idr, dev);
+ 		spin_unlock(&unnamed_dev_lock);
+ 		return -EMFILE;
+ 	}
+-	s->s_dev = MKDEV(0, dev & MINORMASK);
++	s->s_dev = make_unnamed_dev(dev);
+ 	return 0;
+ }
+ 
+@@ -630,8 +647,9 @@ EXPORT_SYMBOL(set_anon_super);
+ 
+ void kill_anon_super(struct super_block *sb)
+ {
+-	int slot = MINOR(sb->s_dev);
++	int slot;
+ 
++	slot = unnamed_dev_idx(sb->s_dev);
+ 	generic_shutdown_super(sb);
+ 	spin_lock(&unnamed_dev_lock);
+ 	idr_remove(&unnamed_dev_idr, slot);
+diff -upr linux-2.6.16.orig/fs/sysfs/bin.c linux-2.6.16-026test015/fs/sysfs/bin.c
+--- linux-2.6.16.orig/fs/sysfs/bin.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/sysfs/bin.c	2006-07-04 14:41:37.000000000 +0400
+@@ -120,6 +120,9 @@ static int open(struct inode * inode, st
+ 	struct bin_attribute * attr = to_bin_attr(file->f_dentry);
+ 	int error = -EINVAL;
+ 
++	if (!ve_sysfs_alowed())
++		return 0;
++
+ 	if (!kobj || !attr)
+ 		goto Done;
+ 
+@@ -196,6 +199,9 @@ int sysfs_create_bin_file(struct kobject
+ 
+ int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+ {
++	if (!ve_sysfs_alowed())
++		return 0;
++
+ 	sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
+ 	return 0;
+ }
+diff -upr linux-2.6.16.orig/fs/sysfs/dir.c linux-2.6.16-026test015/fs/sysfs/dir.c
+--- linux-2.6.16.orig/fs/sysfs/dir.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/sysfs/dir.c	2006-07-04 14:41:37.000000000 +0400
+@@ -144,6 +144,9 @@ int sysfs_create_dir(struct kobject * ko
+ 	struct dentry * parent;
+ 	int error = 0;
+ 
++	if (!ve_sysfs_alowed())
++		return 0;
++
+ 	BUG_ON(!kobj);
+ 
+ 	if (kobj->parent)
+@@ -278,10 +281,14 @@ void sysfs_remove_subdir(struct dentry *
+ 
+ void sysfs_remove_dir(struct kobject * kobj)
+ {
+-	struct dentry * dentry = dget(kobj->dentry);
++	struct dentry * dentry;
+ 	struct sysfs_dirent * parent_sd;
+ 	struct sysfs_dirent * sd, * tmp;
+ 
++	if (!ve_sysfs_alowed())
++		return;
++
++	dentry = dget(kobj->dentry);
+ 	if (!dentry)
+ 		return;
+ 
+@@ -302,6 +309,7 @@ void sysfs_remove_dir(struct kobject * k
+ 	 * Drop reference from dget() on entrance.
+ 	 */
+ 	dput(dentry);
++	kobj->dentry = NULL;
+ }
+ 
+ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
+@@ -309,6 +317,9 @@ int sysfs_rename_dir(struct kobject * ko
+ 	int error = 0;
+ 	struct dentry * new_dentry, * parent;
+ 
++	if (!ve_sysfs_alowed())
++		return 0;
++
+ 	if (!strcmp(kobject_name(kobj), new_name))
+ 		return -EINVAL;
+ 
+diff -upr linux-2.6.16.orig/fs/sysfs/file.c linux-2.6.16-026test015/fs/sysfs/file.c
+--- linux-2.6.16.orig/fs/sysfs/file.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/sysfs/file.c	2006-07-04 14:41:37.000000000 +0400
+@@ -183,7 +183,7 @@ fill_write_buffer(struct sysfs_buffer * 
+ 		return -ENOMEM;
+ 
+ 	if (count >= PAGE_SIZE)
+-		count = PAGE_SIZE;
++		count = PAGE_SIZE - 1;
+ 	error = copy_from_user(buffer->page,buf,count);
+ 	buffer->needs_read_fill = 1;
+ 	return error ? -EFAULT : count;
+@@ -380,6 +380,9 @@ int sysfs_add_file(struct dentry * dir, 
+ 
+ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
+ {
++	if (!ve_sysfs_alowed())
++		return 0;
++
+ 	BUG_ON(!kobj || !kobj->dentry || !attr);
+ 
+ 	return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
+@@ -398,6 +401,9 @@ int sysfs_update_file(struct kobject * k
+ 	struct dentry * victim;
+ 	int res = -ENOENT;
+ 
++	if (!ve_sysfs_alowed())
++		return 0;
++
+ 	mutex_lock(&dir->d_inode->i_mutex);
+ 	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
+ 	if (!IS_ERR(victim)) {
+@@ -473,6 +479,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
+ 
+ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
+ {
++	if (!ve_sysfs_alowed())
++		return;
++
+ 	sysfs_hash_and_remove(kobj->dentry,attr->name);
+ }
+ 
+diff -upr linux-2.6.16.orig/fs/sysfs/group.c linux-2.6.16-026test015/fs/sysfs/group.c
+--- linux-2.6.16.orig/fs/sysfs/group.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/sysfs/group.c	2006-07-04 14:41:37.000000000 +0400
+@@ -46,6 +46,9 @@ int sysfs_create_group(struct kobject * 
+ 	struct dentry * dir;
+ 	int error;
+ 
++	if (!ve_sysfs_alowed())
++		return 0;
++
+ 	BUG_ON(!kobj || !kobj->dentry);
+ 
+ 	if (grp->name) {
+@@ -68,6 +71,9 @@ void sysfs_remove_group(struct kobject *
+ {
+ 	struct dentry * dir;
+ 
++	if (!ve_sysfs_alowed())
++		return;
++
+ 	if (grp->name)
+ 		dir = lookup_one_len(grp->name, kobj->dentry,
+ 				strlen(grp->name));
+diff -upr linux-2.6.16.orig/fs/sysfs/inode.c linux-2.6.16-026test015/fs/sysfs/inode.c
+--- linux-2.6.16.orig/fs/sysfs/inode.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/sysfs/inode.c	2006-07-04 14:41:37.000000000 +0400
+@@ -8,14 +8,13 @@
+ 
+ #undef DEBUG 
+ 
++#include <linux/config.h>
+ #include <linux/pagemap.h>
+ #include <linux/namei.h>
+ #include <linux/backing-dev.h>
+ #include <linux/capability.h>
+ #include "sysfs.h"
+ 
+-extern struct super_block * sysfs_sb;
+-
+ static struct address_space_operations sysfs_aops = {
+ 	.readpage	= simple_readpage,
+ 	.prepare_write	= simple_prepare_write,
+@@ -227,12 +226,16 @@ void sysfs_drop_dentry(struct sysfs_dire
+ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
+ {
+ 	struct sysfs_dirent * sd;
+-	struct sysfs_dirent * parent_sd = dir->d_fsdata;
++	struct sysfs_dirent * parent_sd;
++
++	if (!dir)
++		return;
+ 
+ 	if (dir->d_inode == NULL)
+ 		/* no inode means this hasn't been made visible yet */
+ 		return;
+ 
++	parent_sd = dir->d_fsdata;
+ 	mutex_lock(&dir->d_inode->i_mutex);
+ 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+ 		if (!sd->s_element)
+diff -upr linux-2.6.16.orig/fs/sysfs/mount.c linux-2.6.16-026test015/fs/sysfs/mount.c
+--- linux-2.6.16.orig/fs/sysfs/mount.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/sysfs/mount.c	2006-07-04 14:41:38.000000000 +0400
+@@ -7,6 +7,7 @@
+ #include <linux/fs.h>
+ #include <linux/mount.h>
+ #include <linux/pagemap.h>
++#include <linux/module.h>
+ #include <linux/init.h>
+ 
+ #include "sysfs.h"
+@@ -14,8 +15,11 @@
+ /* Random magic number */
+ #define SYSFS_MAGIC 0x62656572
+ 
++#ifndef CONFIG_VE
+ struct vfsmount *sysfs_mount;
+ struct super_block * sysfs_sb = NULL;
++#endif
++
+ kmem_cache_t *sysfs_dir_cachep;
+ 
+ static struct super_operations sysfs_ops = {
+@@ -31,6 +35,15 @@ static struct sysfs_dirent sysfs_root = 
+ 	.s_iattr	= NULL,
+ };
+ 
++#ifdef CONFIG_VE
++static void init_ve0_sysfs_root(void)
++{
++	get_ve0()->sysfs_root = &sysfs_root;
++}
++
++#define sysfs_root (*(get_exec_env()->sysfs_root))
++#endif
++
+ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
+ {
+ 	struct inode *inode;
+@@ -72,16 +85,21 @@ static struct super_block *sysfs_get_sb(
+ 	return get_sb_single(fs_type, flags, data, sysfs_fill_super);
+ }
+ 
+-static struct file_system_type sysfs_fs_type = {
++struct file_system_type sysfs_fs_type = {
+ 	.name		= "sysfs",
+ 	.get_sb		= sysfs_get_sb,
+ 	.kill_sb	= kill_litter_super,
+ };
+ 
++EXPORT_SYMBOL(sysfs_fs_type);
++
+ int __init sysfs_init(void)
+ {
+ 	int err = -ENOMEM;
+ 
++#ifdef CONFIG_VE
++	init_ve0_sysfs_root();
++#endif
+ 	sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
+ 					      sizeof(struct sysfs_dirent),
+ 					      0, 0, NULL, NULL);
+diff -upr linux-2.6.16.orig/fs/sysfs/symlink.c linux-2.6.16-026test015/fs/sysfs/symlink.c
+--- linux-2.6.16.orig/fs/sysfs/symlink.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/sysfs/symlink.c	2006-07-04 14:41:37.000000000 +0400
+@@ -66,6 +66,7 @@ static int sysfs_add_link(struct dentry 
+ 	if (!error)
+ 		return 0;
+ 
++	kobject_put(target);
+ 	kfree(sl->link_name);
+ exit2:
+ 	kfree(sl);
+@@ -86,6 +87,9 @@ int sysfs_create_link(struct kobject * k
+ 
+ 	BUG_ON(!kobj || !kobj->dentry || !name);
+ 
++	if (!ve_sysfs_alowed())
++		return 0;
++
+ 	mutex_lock(&dentry->d_inode->i_mutex);
+ 	error = sysfs_add_link(dentry, name, target);
+ 	mutex_unlock(&dentry->d_inode->i_mutex);
+@@ -101,6 +105,9 @@ int sysfs_create_link(struct kobject * k
+ 
+ void sysfs_remove_link(struct kobject * kobj, const char * name)
+ {
++	if(!ve_sysfs_alowed())
++		return;
++
+ 	sysfs_hash_and_remove(kobj->dentry,name);
+ }
+ 
+diff -upr linux-2.6.16.orig/fs/sysfs/sysfs.h linux-2.6.16-026test015/fs/sysfs/sysfs.h
+--- linux-2.6.16.orig/fs/sysfs/sysfs.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/sysfs/sysfs.h	2006-07-04 14:41:38.000000000 +0400
+@@ -1,5 +1,14 @@
+ 
+-extern struct vfsmount * sysfs_mount;
++#ifndef CONFIG_VE
++extern struct vfsmount *sysfs_mount;
++extern struct super_block *sysfs_sb;
++#define ve_sysfs_alowed()	(1)
++#else
++#define sysfs_mount		(get_exec_env()->sysfs_mnt)
++#define sysfs_sb		(get_exec_env()->sysfs_sb)
++#define ve_sysfs_alowed()	(sysfs_sb != NULL)
++#endif
++
+ extern kmem_cache_t *sysfs_dir_cachep;
+ 
+ extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
+@@ -19,7 +28,6 @@ extern void sysfs_drop_dentry(struct sys
+ extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+ 
+ extern struct rw_semaphore sysfs_rename_sem;
+-extern struct super_block * sysfs_sb;
+ extern struct file_operations sysfs_dir_operations;
+ extern struct file_operations sysfs_file_operations;
+ extern struct file_operations bin_fops;
+diff -upr linux-2.6.16.orig/fs/vzdq_file.c linux-2.6.16-026test015/fs/vzdq_file.c
+--- linux-2.6.16.orig/fs/vzdq_file.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/fs/vzdq_file.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,851 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo quota files as proc entry implementation.
++ * It is required for std quota tools to work correctly as they are expecting
++ * aquota.user and aquota.group files.
++ */
++
++#include <linux/ctype.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <linux/sysctl.h>
++#include <linux/mount.h>
++#include <linux/namespace.h>
++#include <linux/quotaio_v2.h>
++#include <asm/uaccess.h>
++
++#include <linux/ve.h>
++#include <linux/ve_proto.h>
++#include <linux/vzdq_tree.h>
++#include <linux/vzquota.h>
++
++/* ----------------------------------------------------------------------
++ *
++ * File read operation
++ *
++ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
++ * perhaps) abuse vz_quota_sem.
++ * Taking a global semaphore for lengthy and user-controlled operations inside
++ * VPSs is not a good idea in general.
++ * In this case, the reasons for taking this semaphore are completely unclear,
++ * especially taking into account that the only function that has comments
++ * about the necessity to be called under this semaphore
++ * (create_proc_quotafile) is actually called OUTSIDE it.
++ *
++ * --------------------------------------------------------------------- */
++
++#define DQBLOCK_SIZE		1024
++#define DQUOTBLKNUM		21U
++#define DQTREE_DEPTH		4
++#define TREENUM_2_BLKNUM(num)	(((num) + 1) << 1)
++#define ISINDBLOCK(num)		((num)%2 != 0)
++#define FIRST_DATABLK	  	2  /* first even number */
++#define LAST_IND_LEVEL		(DQTREE_DEPTH - 1)
++#define CONVERT_LEVEL(level)	((level) * (QUOTAID_EBITS/QUOTAID_BBITS))
++#define GETLEVINDX(ind, lev)	(((ind) >> QUOTAID_BBITS*(lev)) \
++					& QUOTATREE_BMASK)
++
++#if (QUOTAID_EBITS / QUOTAID_BBITS) != (QUOTATREE_DEPTH / DQTREE_DEPTH)
++#error xBITS and DQTREE_DEPTH does not correspond
++#endif
++
++#define BLOCK_NOT_FOUND	1
++
++/* data for quota file -- one per proc entry */
++struct quotatree_data {
++	struct list_head	list;
++	struct vz_quota_master	*qmblk;
++	int			type;	/* type of the tree */
++};
++
++/* serialized by vz_quota_sem */
++static LIST_HEAD(qf_data_head);
++
++static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
++static const u_int32_t vzquota_versions[] = V2_INITQVERSIONS;
++
++static inline loff_t get_depoff(int depth)
++{
++	loff_t res = 1;
++	while (depth) {
++		res += (1 << ((depth - 1)*QUOTAID_EBITS + 1));
++		depth--;
++	}
++	return res;
++}
++
++static inline loff_t get_blknum(loff_t num, int depth)
++{
++	loff_t res;
++	res = (num << 1) + get_depoff(depth);
++	return res;
++}
++
++static int get_depth(loff_t num)
++{
++	int i;
++	for (i = 0; i < DQTREE_DEPTH; i++) {
++		if (num >= get_depoff(i) && (i == DQTREE_DEPTH - 1
++				|| num < get_depoff(i + 1)))
++			return i;
++	}
++	return -1;
++}
++
++static inline loff_t get_offset(loff_t num)
++{
++	loff_t res, tmp;
++
++	tmp = get_depth(num);
++	if (tmp < 0)
++		return -1;
++	num -= get_depoff(tmp);
++	BUG_ON(num < 0);
++	res = num >> 1;
++
++	return res;
++}
++
++static inline loff_t get_quot_blk_num(struct quotatree_tree *tree, int level)
++{
++	/* return maximum available block num */
++	return tree->levels[level].freenum;
++}
++
++static inline loff_t get_block_num(struct quotatree_tree *tree)
++{
++	loff_t ind_blk_num, quot_blk_num, max_ind, max_quot;
++
++	quot_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH) - 1);
++	max_quot = TREENUM_2_BLKNUM(quot_blk_num);
++	ind_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH - 1));
++	max_ind = (quot_blk_num) ? get_blknum(ind_blk_num, LAST_IND_LEVEL)
++		: get_blknum(ind_blk_num, 0);
++
++	return (max_ind > max_quot) ? max_ind + 1 : max_quot + 1;
++}
++
++/*  Write quota file header */
++static int read_header(void *buf, struct quotatree_tree *tree,
++	struct dq_info *dq_ugid_info, int type)
++{
++	struct v2_disk_dqheader *dqh;
++	struct v2_disk_dqinfo *dq_disk_info;
++
++	dqh = buf;
++	dq_disk_info = buf + sizeof(struct v2_disk_dqheader);
++
++	dqh->dqh_magic = vzquota_magics[type];
++	dqh->dqh_version = vzquota_versions[type];
++
++	dq_disk_info->dqi_bgrace = dq_ugid_info[type].bexpire;
++	dq_disk_info->dqi_igrace = dq_ugid_info[type].iexpire;
++	dq_disk_info->dqi_flags = 0;	/* no flags */
++	dq_disk_info->dqi_blocks = get_block_num(tree);
++	dq_disk_info->dqi_free_blk = 0;	/* first block in the file */
++	dq_disk_info->dqi_free_entry = FIRST_DATABLK;
++
++	return 0;
++}
++
++static int get_block_child(int depth, struct quotatree_node *p, u_int32_t *buf)
++{
++	int i, j, lev_num;
++
++	lev_num = QUOTATREE_DEPTH/DQTREE_DEPTH - 1;
++	for (i = 0; i < BLOCK_SIZE/sizeof(u_int32_t); i++) {
++		struct quotatree_node *next, *parent;
++
++		parent = p;
++		next = p;
++		for (j = lev_num; j >= 0; j--) {
++			if (!next->blocks[GETLEVINDX(i,j)]) {
++				buf[i] = 0;
++				goto bad_branch;
++			}
++			parent = next;
++			next = next->blocks[GETLEVINDX(i,j)];
++		}
++		buf[i] = (depth == DQTREE_DEPTH - 1) ?
++			TREENUM_2_BLKNUM(parent->num)
++			: get_blknum(next->num, depth + 1);
++
++	bad_branch:
++		;
++	}
++
++	return 0;
++}
++
++/*
++ * Write index block to disk (or buffer)
++ * @buf has length 256*sizeof(u_int32_t) bytes
++ */
++static int read_index_block(int num, u_int32_t *buf,
++		struct quotatree_tree *tree)
++{
++	struct quotatree_node *p;
++	u_int32_t index;
++	loff_t off;
++	int depth, res;
++
++	res = BLOCK_NOT_FOUND; 
++	index = 0;
++	depth = get_depth(num);
++	off = get_offset(num);
++	if (depth < 0 || off < 0)
++		return -EINVAL;
++
++	list_for_each_entry(p, &tree->levels[CONVERT_LEVEL(depth)].usedlh,
++			list) {
++		if (p->num >= off)
++			res = 0;
++		if (p->num != off)
++			continue;
++		get_block_child(depth, p, buf);
++		break;
++	}
++
++	return res;
++}
++
++static inline void convert_quot_format(struct v2_disk_dqblk *dq,
++		struct vz_quota_ugid *vzq)
++{
++	dq->dqb_id = vzq->qugid_id;
++	dq->dqb_ihardlimit = vzq->qugid_stat.ihardlimit;
++	dq->dqb_isoftlimit = vzq->qugid_stat.isoftlimit;
++	dq->dqb_curinodes = vzq->qugid_stat.icurrent;
++	dq->dqb_bhardlimit = vzq->qugid_stat.bhardlimit / QUOTABLOCK_SIZE;
++	dq->dqb_bsoftlimit = vzq->qugid_stat.bsoftlimit / QUOTABLOCK_SIZE;
++	dq->dqb_curspace = vzq->qugid_stat.bcurrent;
++	dq->dqb_btime = vzq->qugid_stat.btime;
++	dq->dqb_itime = vzq->qugid_stat.itime;
++}
++
++static int read_dquot(loff_t num, void *buf, struct quotatree_tree *tree)
++{
++	int res, i, entries = 0;
++	struct v2_disk_dqdbheader *dq_header;
++	struct quotatree_node *p;
++	struct v2_disk_dqblk *blk = buf + sizeof(struct v2_disk_dqdbheader);
++
++	res = BLOCK_NOT_FOUND;
++	dq_header = buf;
++	memset(dq_header, 0, sizeof(*dq_header));
++
++	list_for_each_entry(p, &(tree->levels[QUOTATREE_DEPTH - 1].usedlh),
++			list) {
++		if (TREENUM_2_BLKNUM(p->num) >= num)
++			res = 0;
++		if (TREENUM_2_BLKNUM(p->num) != num)
++			continue;
++
++		for (i = 0; i < QUOTATREE_BSIZE; i++) {
++			if (!p->blocks[i])
++				continue;
++			convert_quot_format(blk + entries,
++					(struct vz_quota_ugid *)p->blocks[i]);
++			entries++;
++			res = 0;
++		}
++		break;
++	}
++	dq_header->dqdh_entries = entries;
++
++	return res;
++}
++
++static int read_block(int num, void *buf, struct quotatree_tree *tree,
++	struct dq_info *dq_ugid_info, int magic)
++{
++	int res;
++
++	memset(buf, 0, DQBLOCK_SIZE);
++	if (!num)
++		res = read_header(buf, tree, dq_ugid_info, magic);
++	else if (ISINDBLOCK(num))
++		res = read_index_block(num, (u_int32_t*)buf, tree);
++	else
++		res = read_dquot(num, buf, tree);
++
++	return res;
++}
++
++/*
++ * FIXME: this function can handle quota files up to 2GB only.
++ */
++static int read_proc_quotafile(char *page, char **start, off_t off, int count,
++		int *eof, void *data)
++{
++	off_t blk_num, blk_off, buf_off;
++	char *tmp;
++	size_t buf_size;
++	struct quotatree_data *qtd;
++	struct quotatree_tree *tree;
++	struct dq_info *dqi;
++	int res;
++
++	tmp = kmalloc(DQBLOCK_SIZE, GFP_KERNEL);
++	if (!tmp)
++		return -ENOMEM;
++
++	qtd = data;
++	down(&vz_quota_sem);
++	down(&qtd->qmblk->dq_sem);
++
++	res = 0;
++	tree = QUGID_TREE(qtd->qmblk, qtd->type);
++	if (!tree) {
++		*eof = 1;
++		goto out_dq;
++	}
++
++	dqi = &qtd->qmblk->dq_ugid_info[qtd->type];
++
++	buf_off = 0;
++	buf_size = count;
++	blk_num = off / DQBLOCK_SIZE;
++	blk_off = off % DQBLOCK_SIZE;
++
++	while (buf_size > 0) {
++		off_t len;
++
++		len = min((size_t)(DQBLOCK_SIZE-blk_off), buf_size);
++		res = read_block(blk_num, tmp, tree, dqi, qtd->type);
++		if (res < 0)
++			goto out_err;
++		if (res == BLOCK_NOT_FOUND) {
++			*eof = 1;
++			break;
++		} 
++		memcpy(page + buf_off, tmp + blk_off, len);
++
++		blk_num++;
++		buf_size -= len;
++		blk_off = 0;
++		buf_off += len;
++	}
++	res = buf_off;
++
++out_err:
++	*start = NULL + count;
++out_dq:
++	up(&qtd->qmblk->dq_sem);
++	up(&vz_quota_sem);
++	kfree(tmp);
++
++	return res;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota/QID/aquota.* files
++ *
++ * FIXME: this code lacks serialization of read/readdir/lseek.
++ * However, this problem should be fixed after the mainstream issue of what
++ * appears to be non-atomic read and update of file position in sys_read.
++ *
++ * --------------------------------------------------------------------- */
++
++static inline unsigned long vzdq_aquot_getino(dev_t dev)
++{
++	return 0xec000000UL + dev;
++}
++
++static inline dev_t vzdq_aquot_getidev(struct inode *inode)
++{
++	return (dev_t)(unsigned long)PROC_I(inode)->op.proc_get_link;
++}
++
++static inline void vzdq_aquot_setidev(struct inode *inode, dev_t dev)
++{
++	PROC_I(inode)->op.proc_get_link = (void *)(unsigned long)dev;
++}
++
++static ssize_t vzdq_aquotf_read(struct file *file,
++		char __user *buf, size_t size, loff_t *ppos)
++{
++	char *page;
++	size_t bufsize;
++	ssize_t l, l2, copied;
++	char *start;
++	struct inode *inode;
++	struct block_device *bdev;
++	struct super_block *sb;
++	struct quotatree_data data;
++	int eof, err;
++
++	err = -ENOMEM;
++	page = (char *)__get_free_page(GFP_KERNEL);
++	if (page == NULL)
++		goto out_err;
++
++	err = -ENODEV;
++	inode = file->f_dentry->d_inode;
++	bdev = bdget(vzdq_aquot_getidev(inode));
++	if (bdev == NULL)
++		goto out_err;
++	sb = get_super(bdev);
++	bdput(bdev);
++	if (sb == NULL)
++		goto out_err;
++	data.qmblk = vzquota_find_qmblk(sb);
++	data.type = PROC_I(inode)->type - 1;
++	drop_super(sb);
++	if (data.qmblk == NULL || data.qmblk == VZ_QUOTA_BAD)
++		goto out_err;
++
++	copied = 0;
++	l = l2 = 0;
++	while (1) {
++		bufsize = min(size, (size_t)PAGE_SIZE);
++		if (bufsize <= 0)
++			break;
++
++		l = read_proc_quotafile(page, &start, *ppos, bufsize,
++				&eof, &data);
++		if (l <= 0)
++			break;
++
++		l2 = copy_to_user(buf, page, l);
++		copied += l - l2;
++		if (l2)
++			break;
++
++		buf += l;
++		size -= l;
++		*ppos += (unsigned long)start;
++		l = l2 = 0;
++	}
++
++	qmblk_put(data.qmblk);
++	free_page((unsigned long)page);
++	if (copied)
++		return copied;
++	else if (l2)		/* last copy_to_user failed */
++		return -EFAULT;
++	else			/* read error or EOF */
++		return l;
++
++out_err:
++	if (page != NULL)
++		free_page((unsigned long)page);
++	return err;
++}
++
++static struct file_operations vzdq_aquotf_file_operations = {
++	.read		= &vzdq_aquotf_read,
++};
++
++static struct inode_operations vzdq_aquotf_inode_operations = {
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota/QID directory
++ *
++ * --------------------------------------------------------------------- */
++
++static int vzdq_aquotq_readdir(struct file *file, void *data, filldir_t filler)
++{
++	loff_t n;
++	int err;
++
++	n = file->f_pos;
++	for (err = 0; !err; n++) {
++		switch (n) {
++		case 0:
++			err = (*filler)(data, ".", 1, n,
++					file->f_dentry->d_inode->i_ino,
++					DT_DIR);
++			break;
++		case 1:
++			err = (*filler)(data, "..", 2, n,
++					parent_ino(file->f_dentry), DT_DIR);
++			break;
++		case 2:
++			err = (*filler)(data, "aquota.user", 11, n,
++					file->f_dentry->d_inode->i_ino
++								+ USRQUOTA + 1,
++					DT_REG);
++			break;
++		case 3:
++			err = (*filler)(data, "aquota.group", 12, n,
++					file->f_dentry->d_inode->i_ino 
++								+ GRPQUOTA + 1,
++					DT_REG);
++			break;
++		default:
++			goto out;
++		}
++	}
++out:
++	file->f_pos = n;
++	return err;
++}
++
++struct vzdq_aquotq_lookdata {
++	dev_t dev;
++	int type;
++};
++
++static int vzdq_aquotq_looktest(struct inode *inode, void *data)
++{
++	struct vzdq_aquotq_lookdata *d;
++
++	d = data;
++	return inode->i_op == &vzdq_aquotf_inode_operations &&
++	       vzdq_aquot_getidev(inode) == d->dev &&
++	       PROC_I(inode)->type == d->type + 1;
++}
++
++static int vzdq_aquotq_lookset(struct inode *inode, void *data)
++{
++	struct vzdq_aquotq_lookdata *d;
++
++	d = data;
++	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++	inode->i_ino = vzdq_aquot_getino(d->dev) + d->type + 1;
++	inode->i_mode = S_IFREG | S_IRUSR;
++	inode->i_uid = 0;
++	inode->i_gid = 0;
++	inode->i_nlink = 1;
++	inode->i_op = &vzdq_aquotf_inode_operations;
++	inode->i_fop = &vzdq_aquotf_file_operations;
++	PROC_I(inode)->type = d->type + 1;
++	vzdq_aquot_setidev(inode, d->dev);
++	return 0;
++}
++
++static struct dentry *vzdq_aquotq_lookup(struct inode *dir,
++		struct dentry *dentry,
++		struct nameidata *nd)
++{
++	struct inode *inode;
++	struct vzdq_aquotq_lookdata d;
++	int k;
++
++	if (dentry->d_name.len == 11) {
++		if (memcmp(dentry->d_name.name, "aquota.user", 11))
++			goto out;
++		k = USRQUOTA;
++	} else if (dentry->d_name.len == 12) {
++		if (memcmp(dentry->d_name.name, "aquota.group", 11))
++			goto out;
++		k = GRPQUOTA;
++	} else
++		goto out;
++	d.dev = vzdq_aquot_getidev(dir);
++	d.type = k;
++	inode = iget5_locked(dir->i_sb, dir->i_ino + k + 1,
++			vzdq_aquotq_looktest, vzdq_aquotq_lookset, &d);
++	if (inode == NULL)
++		goto out;
++	unlock_new_inode(inode);
++	d_add(dentry, inode);
++	return NULL;
++
++out:
++	return ERR_PTR(-ENOENT);
++}
++
++static struct file_operations vzdq_aquotq_file_operations = {
++	.read		= &generic_read_dir,
++	.readdir	= &vzdq_aquotq_readdir,
++};
++
++static struct inode_operations vzdq_aquotq_inode_operations = {
++	.lookup		= &vzdq_aquotq_lookup,
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota directory
++ *
++ * --------------------------------------------------------------------- */
++
++struct vzdq_aquot_de {
++	struct list_head list;
++	struct vfsmount *mnt;
++};
++
++static int vzdq_aquot_buildmntlist(struct ve_struct *ve,
++		struct list_head *head)
++{
++	struct vfsmount *rmnt, *mnt;
++	struct vzdq_aquot_de *p;
++	int err;
++
++#ifdef CONFIG_VE
++	rmnt = mntget(ve->fs_rootmnt);
++#else
++	read_lock(&current->fs->lock);
++	rmnt = mntget(current->fs->rootmnt);
++	read_unlock(&current->fs->lock);
++#endif
++	mnt = rmnt;
++	spin_lock(&vfsmount_lock);
++	while (1) {
++		list_for_each_entry(p, head, list) {
++			if (p->mnt->mnt_sb == mnt->mnt_sb)
++				goto skip;
++		}
++
++		err = -ENOMEM;
++		p = kmalloc(sizeof(*p), GFP_KERNEL);
++		if (p == NULL)
++			goto out;
++		p->mnt = mntget(mnt);
++		list_add_tail(&p->list, head);
++
++skip:
++		err = 0;
++		if (list_empty(&mnt->mnt_mounts)) {
++			while (1) {
++				if (mnt == rmnt)
++					goto out;
++				if (mnt->mnt_child.next !=
++						&mnt->mnt_parent->mnt_mounts)
++					break;
++				mnt = mnt->mnt_parent;
++			}
++			mnt = list_entry(mnt->mnt_child.next,
++					struct vfsmount, mnt_child);
++		} else
++			mnt = list_entry(mnt->mnt_mounts.next,
++					struct vfsmount, mnt_child);
++	}
++out:
++	spin_unlock(&vfsmount_lock);
++	mntput(rmnt);
++	return err;
++}
++
++static void vzdq_aquot_releasemntlist(struct ve_struct *ve,
++		struct list_head *head)
++{
++	struct vzdq_aquot_de *p;
++
++	while (!list_empty(head)) {
++		p = list_entry(head->next, typeof(*p), list);
++		mntput(p->mnt);
++		list_del(&p->list);
++		kfree(p);
++	}
++}
++
++static int vzdq_aquotd_readdir(struct file *file, void *data, filldir_t filler)
++{
++	struct ve_struct *ve, *old_ve;
++	struct list_head mntlist;
++	struct vzdq_aquot_de *de;
++	struct super_block *sb;
++	struct vz_quota_master *qmblk;
++	loff_t i, n;
++	char buf[24];
++	int l, err;
++
++	i = 0;
++	n = file->f_pos;
++	ve = VE_OWNER_FSTYPE(file->f_dentry->d_sb->s_type);
++	old_ve = set_exec_env(ve);
++
++	INIT_LIST_HEAD(&mntlist);
++#ifdef CONFIG_VE
++	/*
++	 * The only reason of disabling readdir for the host system is that
++	 * this readdir can be slow and CPU consuming with large number of VPSs
++	 * (or just mount points).
++	 */
++	err = ve_is_super(ve);
++#else
++	err = 0;
++#endif
++	if (!err) {
++		err = vzdq_aquot_buildmntlist(ve, &mntlist);
++		if (err)
++			goto out_err;
++	}
++
++	if (i >= n) {
++		if ((*filler)(data, ".", 1, i,
++					file->f_dentry->d_inode->i_ino, DT_DIR))
++			goto out_fill;
++	}
++	i++;
++
++	if (i >= n) {
++		if ((*filler)(data, "..", 2, i,
++					parent_ino(file->f_dentry), DT_DIR))
++			goto out_fill;
++	}
++	i++;
++
++	list_for_each_entry (de, &mntlist, list) {
++		sb = de->mnt->mnt_sb;
++#ifdef CONFIG_VE
++		if (get_device_perms_ve(S_IFBLK, sb->s_dev, FMODE_QUOTACTL))
++			continue;
++#endif
++		qmblk = vzquota_find_qmblk(sb);
++		if (qmblk == NULL || qmblk == VZ_QUOTA_BAD)
++			continue;
++
++		qmblk_put(qmblk);
++		i++;
++		if (i <= n)
++			continue;
++
++		l = sprintf(buf, "%08x", new_encode_dev(sb->s_dev));
++		if ((*filler)(data, buf, l, i - 1,
++					vzdq_aquot_getino(sb->s_dev), DT_DIR))
++			break;
++	}
++
++out_fill:
++	err = 0;
++	file->f_pos = i;
++out_err:
++	vzdq_aquot_releasemntlist(ve, &mntlist);
++	(void)set_exec_env(old_ve);
++	return err;
++}
++
++static int vzdq_aquotd_looktest(struct inode *inode, void *data)
++{
++	return inode->i_op == &vzdq_aquotq_inode_operations &&
++	       vzdq_aquot_getidev(inode) == (dev_t)(unsigned long)data;
++}
++
++static int vzdq_aquotd_lookset(struct inode *inode, void *data)
++{
++	dev_t dev;
++
++	dev = (dev_t)(unsigned long)data;
++	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++	inode->i_ino = vzdq_aquot_getino(dev);
++	inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++	inode->i_uid = 0;
++	inode->i_gid = 0;
++	inode->i_nlink = 2;
++	inode->i_op = &vzdq_aquotq_inode_operations;
++	inode->i_fop = &vzdq_aquotq_file_operations;
++	vzdq_aquot_setidev(inode, dev);
++	return 0;
++}
++
++static struct dentry *vzdq_aquotd_lookup(struct inode *dir,
++		struct dentry *dentry,
++		struct nameidata *nd)
++{
++	struct ve_struct *ve, *old_ve;
++	const unsigned char *s;
++	int l;
++	dev_t dev;
++	struct inode *inode;
++
++	ve = VE_OWNER_FSTYPE(dir->i_sb->s_type);
++	old_ve = set_exec_env(ve);
++#ifdef CONFIG_VE
++	/*
++	 * Lookup is much lighter than readdir, so it can be allowed for the
++	 * host system.  But it would be strange to be able to do lookup only
++	 * without readdir...
++	 */
++	if (ve_is_super(ve))
++		goto out;
++#endif
++
++	dev = 0;
++	l = dentry->d_name.len;
++	if (l <= 0)
++		goto out;
++	for (s = dentry->d_name.name; l > 0; s++, l--) {
++		if (!isxdigit(*s))
++			goto out;
++		if (dev & ~(~0UL >> 4))
++			goto out;
++		dev <<= 4;
++		if (isdigit(*s))
++			dev += *s - '0';
++		else if (islower(*s))
++			dev += *s - 'a' + 10;
++		else
++			dev += *s - 'A' + 10;
++	}
++	dev = new_decode_dev(dev);
++
++#ifdef CONFIG_VE
++	if (get_device_perms_ve(S_IFBLK, dev, FMODE_QUOTACTL))
++		goto out;
++#endif
++
++	inode = iget5_locked(dir->i_sb, vzdq_aquot_getino(dev),
++			vzdq_aquotd_looktest, vzdq_aquotd_lookset,
++			(void *)(unsigned long)dev);
++	if (inode == NULL)
++		goto out;
++	unlock_new_inode(inode);
++
++	d_add(dentry, inode);
++	(void)set_exec_env(old_ve);
++	return NULL;
++
++out:
++	(void)set_exec_env(old_ve);
++	return ERR_PTR(-ENOENT);
++}
++
++static struct file_operations vzdq_aquotd_file_operations = {
++	.read		= &generic_read_dir,
++	.readdir	= &vzdq_aquotd_readdir,
++};
++
++static struct inode_operations vzdq_aquotd_inode_operations = {
++	.lookup		= &vzdq_aquotd_lookup,
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Initialization and deinitialization
++ *
++ * --------------------------------------------------------------------- */
++
++/*
++ * FIXME: creation of proc entries here is unsafe with respect to module
++ * unloading.
++ */
++void vzaquota_init(void)
++{
++	struct proc_dir_entry *de;
++
++	de = create_proc_glob_entry("vz/vzaquota",
++			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
++	if (de != NULL) {
++		de->proc_iops = &vzdq_aquotd_inode_operations;
++		de->proc_fops = &vzdq_aquotd_file_operations;
++	} else
++		printk("VZDQ: vz/vzaquota creation failed\n");
++#if defined(CONFIG_SYSCTL)
++	de = create_proc_glob_entry("sys/fs/quota",
++			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
++	if (de == NULL)
++		printk("VZDQ: sys/fs/quota creation failed\n");
++#endif
++}
++
++void vzaquota_fini(void)
++{
++}
+diff -upr linux-2.6.16.orig/fs/vzdq_mgmt.c linux-2.6.16-026test015/fs/vzdq_mgmt.c
+--- linux-2.6.16.orig/fs/vzdq_mgmt.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/fs/vzdq_mgmt.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,735 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/list.h>
++#include <asm/semaphore.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/writeback.h>
++#include <linux/gfp.h>
++#include <asm/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/quota.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++
++
++/* ----------------------------------------------------------------------
++ * Switching quota on.
++ * --------------------------------------------------------------------- */
++
++/*
++ * check limits copied from user
++ */
++int vzquota_check_sane_limits(struct dq_stat *qstat)
++{
++	int err;
++
++	err = -EINVAL;
++
++	/* softlimit must be less then hardlimit */
++	if (qstat->bsoftlimit > qstat->bhardlimit)
++		goto out;
++
++	if (qstat->isoftlimit > qstat->ihardlimit)
++		goto out;
++
++	err = 0;
++out:
++	return err;
++}
++
++/*
++ * check usage values copied from user
++ */
++int vzquota_check_sane_values(struct dq_stat *qstat)
++{
++	int err;
++
++	err = -EINVAL;
++
++	/* expiration time must not be set if softlimit was not exceeded */
++	if (qstat->bcurrent < qstat->bsoftlimit && qstat->btime != (time_t)0)
++		goto out;
++
++	if (qstat->icurrent < qstat->isoftlimit && qstat->itime != (time_t)0)
++		goto out;
++
++	err = vzquota_check_sane_limits(qstat);
++out:
++	return err;
++}
++
++/*
++ * create new quota master block
++ * this function should:
++ *  - copy limits and usage parameters from user buffer;
++ *  - allock, initialize quota block and insert it to hash;
++ */
++static int vzquota_create(unsigned int quota_id, struct vz_quota_stat *u_qstat)
++{
++	int err;
++	struct vz_quota_stat qstat;
++	struct vz_quota_master *qmblk;
++
++	down(&vz_quota_sem);
++
++	err = -EFAULT;
++	if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
++		goto out;
++
++	err = -EINVAL;
++	if (quota_id == 0)
++		goto out;
++
++	if (vzquota_check_sane_values(&qstat.dq_stat))
++		goto out;
++	err = 0;
++	qmblk = vzquota_alloc_master(quota_id, &qstat);
++
++	if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
++		err = PTR_ERR(qmblk);
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++/**
++ * vzquota_on - turn quota on
++ *
++ * This function should:
++ *  - find and get refcnt of directory entry for quota root and corresponding
++ *    mountpoint;
++ *  - find corresponding quota block and mark it with given path;
++ *  - check quota tree;
++ *  - initialize quota for the tree root.
++ */
++static int vzquota_on(unsigned int quota_id, const char *quota_root)
++{
++	int err;
++	struct nameidata nd;
++	struct vz_quota_master *qmblk;
++	struct super_block *dqsb;
++
++	dqsb = NULL;
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EBUSY;
++	if (qmblk->dq_state != VZDQ_STARTING)
++		goto out;
++
++	err = user_path_walk(quota_root, &nd);
++	if (err)
++		goto out;
++	/* init path must be a directory */
++	err = -ENOTDIR;
++	if (!S_ISDIR(nd.dentry->d_inode->i_mode))
++		goto out_path;
++
++	qmblk->dq_root_dentry = nd.dentry;
++	qmblk->dq_root_mnt = nd.mnt;
++	qmblk->dq_sb = nd.dentry->d_inode->i_sb;
++	err = vzquota_get_super(qmblk->dq_sb);
++	if (err)
++		goto out_super;
++
++	/*
++	 * Serialization with quota initialization and operations is performed
++	 * through generation check: generation is memorized before qmblk is
++	 * found and compared under inode_qmblk_lock with assignment.
++	 *
++	 * Note that the dentry tree is shrunk only for high-level logical
++	 * serialization, purely as a courtesy to the user: to have consistent
++	 * quota statistics, files should be closed etc. on quota on.
++	 */
++	err = vzquota_on_qmblk(qmblk->dq_sb, qmblk->dq_root_dentry->d_inode,
++			qmblk);
++	if (err)
++		goto out_init;
++	qmblk->dq_state = VZDQ_WORKING;
++
++	up(&vz_quota_sem);
++	return 0;
++
++out_init:
++	dqsb = qmblk->dq_sb;
++out_super:
++	/* clear for qmblk_put/quota_free_master */
++	qmblk->dq_sb = NULL;
++	qmblk->dq_root_dentry = NULL;
++	qmblk->dq_root_mnt = NULL;
++out_path:
++	path_release(&nd);
++out:
++	if (dqsb)
++		vzquota_put_super(dqsb);
++	up(&vz_quota_sem);
++	return err;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Switching quota off.
++ * --------------------------------------------------------------------- */
++
++/*
++ * destroy quota block by ID
++ */
++static int vzquota_destroy(unsigned int quota_id)
++{
++	int err;
++	struct vz_quota_master *qmblk;
++	struct dentry *dentry;
++	struct vfsmount *mnt;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EBUSY;
++	if (qmblk->dq_state == VZDQ_WORKING)
++		goto out; /* quota_off first */
++
++	list_del_init(&qmblk->dq_hash);
++	dentry = qmblk->dq_root_dentry;
++	qmblk->dq_root_dentry = NULL;
++	mnt = qmblk->dq_root_mnt;
++	qmblk->dq_root_mnt = NULL;
++
++	if (qmblk->dq_sb)
++		vzquota_put_super(qmblk->dq_sb);
++	up(&vz_quota_sem);
++
++	qmblk_put(qmblk);
++	dput(dentry);
++	mntput(mnt);
++	return 0;
++
++out:
++	up(&vz_quota_sem);
++	return err;
++}
++
++/**
++ * vzquota_off - turn quota off
++ */
++
++static int __vzquota_sync_list(struct list_head *lh,
++		struct vz_quota_master *qmblk,
++		enum writeback_sync_modes sync_mode)
++{
++	struct writeback_control wbc;
++	LIST_HEAD(list);
++	struct vz_quota_ilink *qlnk;
++	struct inode *inode;
++	int err;
++
++	memset(&wbc, 0, sizeof(wbc));
++	wbc.sync_mode = sync_mode;
++
++	err = 0;
++	while (!list_empty(lh) && !err) {
++		if (need_resched()) {
++			inode_qmblk_unlock(qmblk->dq_sb);
++			schedule();
++			inode_qmblk_lock(qmblk->dq_sb);
++		}
++
++		qlnk = list_first_entry(lh, struct vz_quota_ilink, list);
++		list_move(&qlnk->list, &list);
++
++		inode = igrab(QLNK_INODE(qlnk));
++		if (!inode)
++			continue;
++
++		inode_qmblk_unlock(qmblk->dq_sb);
++
++		wbc.nr_to_write = LONG_MAX;
++		err = sync_inode(inode, &wbc);
++		iput(inode);
++
++		inode_qmblk_lock(qmblk->dq_sb);
++	}
++
++	list_splice(&list, lh);
++	return err;
++}
++
++static int vzquota_sync_list(struct list_head *lh,
++		struct vz_quota_master *qmblk)
++{
++	int err;
++
++	err = __vzquota_sync_list(lh, qmblk, WB_SYNC_NONE);
++	if (err)
++		return err;
++
++	err = __vzquota_sync_list(lh, qmblk, WB_SYNC_ALL);
++	if (err)
++		return err;
++
++	return 0;
++}
++
++static int vzquota_sync_inodes(struct vz_quota_master *qmblk)
++{
++	int err;
++	LIST_HEAD(qlnk_list);
++
++	list_splice_init(&qmblk->dq_ilink_list, &qlnk_list);
++	err = vzquota_sync_list(&qlnk_list, qmblk);
++	if (!err && !list_empty(&qmblk->dq_ilink_list))
++		err = -EBUSY;
++	list_splice(&qlnk_list, &qmblk->dq_ilink_list);
++
++	return err;
++}
++
++static int vzquota_off(unsigned int quota_id)
++{
++	int err;
++	struct vz_quota_master *qmblk;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EALREADY;
++	if (qmblk->dq_state != VZDQ_WORKING)
++		goto out;
++
++	inode_qmblk_lock(qmblk->dq_sb); /* protects dq_ilink_list also */
++	err = vzquota_sync_inodes(qmblk);
++	if (err)
++		goto out_unlock;
++	inode_qmblk_unlock(qmblk->dq_sb);
++
++	err = vzquota_off_qmblk(qmblk->dq_sb, qmblk);
++	if (err)
++		goto out;
++
++	/* vzquota_destroy will free resources */
++	qmblk->dq_state = VZDQ_STOPING;
++out:
++	up(&vz_quota_sem);
++
++	return err;
++
++out_unlock:
++	inode_qmblk_unlock(qmblk->dq_sb);
++	goto out;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Other VZQUOTA ioctl's.
++ * --------------------------------------------------------------------- */
++
++/*
++ * this function should:
++ * - set new limits/buffer under quota master block lock
++ * - if new softlimit less then usage, then set expiration time
++ * - no need to alloc ugid hash table - we'll do that on demand
++ */
++int vzquota_update_limit(struct dq_stat *_qstat,
++		struct dq_stat *qstat)
++{
++	int err;
++
++	err = -EINVAL;
++	if (vzquota_check_sane_limits(qstat))
++		goto out;
++
++	err = 0;
++
++	/* limits */
++	_qstat->bsoftlimit = qstat->bsoftlimit;
++	_qstat->bhardlimit = qstat->bhardlimit;
++	/*
++	 * If the soft limit is exceeded, administrator can override the moment
++	 * when the grace period for limit exceeding ends.
++	 * Specifying the moment may be useful if the soft limit is set to be
++	 * lower than the current usage.  In the latter case, if the grace
++	 * period end isn't specified, the grace period will start from the
++	 * moment of the first write operation.
++	 * There is a race with the user level.  Soft limit may be already
++	 * exceeded before the limit change, and grace period end calculated by
++	 * the kernel will be overriden.  User level may check if the limit is
++	 * already exceeded, but check and set calls are not atomic.
++	 * This race isn't dangerous.  Under normal cicrumstances, the
++	 * difference between the grace period end calculated by the kernel and
++	 * the user level should be not greater than as the difference between
++	 * the moments of check and set calls, i.e. not bigger than the quota
++	 * timer resolution - 1 sec.
++	 */
++	if (qstat->btime != (time_t)0 &&
++			_qstat->bcurrent >= _qstat->bsoftlimit)
++		_qstat->btime = qstat->btime;
++
++	_qstat->isoftlimit = qstat->isoftlimit;
++	_qstat->ihardlimit = qstat->ihardlimit;
++	if (qstat->itime != (time_t)0 &&
++			_qstat->icurrent >= _qstat->isoftlimit)
++		_qstat->itime = qstat->itime;
++
++out:
++	return err;
++}
++
++/*
++ * set new quota limits.
++ * this function should:
++ *  copy new limits from user level
++ *  - find quota block
++ *  - set new limits and flags.
++ */
++static int vzquota_setlimit(unsigned int quota_id,
++		struct vz_quota_stat *u_qstat)
++{
++	int err;
++	struct vz_quota_stat qstat;
++	struct vz_quota_master *qmblk;
++
++	down(&vz_quota_sem); /* for hash list protection */
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EFAULT;
++	if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
++		goto out;
++
++	qmblk_data_write_lock(qmblk);
++	err = vzquota_update_limit(&qmblk->dq_stat, &qstat.dq_stat);
++	if (err == 0)
++		qmblk->dq_info = qstat.dq_info;
++	qmblk_data_write_unlock(qmblk);
++
++out:
++	up(&vz_quota_sem);
++	return err;
++}
++
++/*
++ * get quota limits.
++ * very simple - just return stat buffer to user
++ */
++static int vzquota_getstat(unsigned int quota_id,
++		struct vz_quota_stat *u_qstat)
++{
++	int err;
++	struct vz_quota_stat qstat;
++	struct vz_quota_master *qmblk;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	qmblk_data_read_lock(qmblk);
++	/* copy whole buffer under lock */
++	memcpy(&qstat.dq_stat, &qmblk->dq_stat, sizeof(qstat.dq_stat));
++	memcpy(&qstat.dq_info, &qmblk->dq_info, sizeof(qstat.dq_info));
++	qmblk_data_read_unlock(qmblk);
++
++	err = copy_to_user(u_qstat, &qstat, sizeof(qstat));
++	if (err)
++		err = -EFAULT;
++
++out:
++	up(&vz_quota_sem);
++	return err;
++}
++
++/*
++ * This is a system call to turn per-VE disk quota on.
++ * Note this call is allowed to run ONLY from VE0
++ */
++long do_vzquotactl(int cmd, unsigned int quota_id,
++			  struct vz_quota_stat *qstat, const char *ve_root)
++{
++	int ret;
++
++	ret = -EPERM;
++	/* access allowed only from root of VE0 */
++	if (!capable(CAP_SYS_RESOURCE) ||
++	    !capable(CAP_SYS_ADMIN))
++		goto out;
++
++	switch (cmd) {
++		case VZ_DQ_CREATE:
++			ret = vzquota_create(quota_id, qstat);
++			break;
++		case VZ_DQ_DESTROY:
++			ret = vzquota_destroy(quota_id);
++			break;
++		case VZ_DQ_ON:
++			ret = vzquota_on(quota_id, ve_root);
++			break;
++		case VZ_DQ_OFF:
++			ret = vzquota_off(quota_id);
++			break;
++		case VZ_DQ_SETLIMIT:
++			ret = vzquota_setlimit(quota_id, qstat);
++			break;
++		case VZ_DQ_GETSTAT:
++			ret = vzquota_getstat(quota_id, qstat);
++			break;
++
++		default:
++			ret = -EINVAL;
++			goto out;
++	}
++
++out:
++	return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Proc filesystem routines
++ * ---------------------------------------------------------------------*/
++
++#if defined(CONFIG_PROC_FS)
++
++#define QUOTA_UINT_LEN		15
++#define QUOTA_TIME_LEN_FMT_UINT	"%11u"
++#define QUOTA_NUM_LEN_FMT_UINT	"%15u"
++#define QUOTA_NUM_LEN_FMT_ULL	"%15Lu"
++#define QUOTA_TIME_LEN_FMT_STR	"%11s"
++#define QUOTA_NUM_LEN_FMT_STR	"%15s"
++#define QUOTA_PROC_MAX_LINE_LEN 2048
++
++/*
++ * prints /proc/ve_dq header line
++ */
++static int print_proc_header(char * buffer)
++{
++	return sprintf(buffer,
++		       "%-11s"
++		       QUOTA_NUM_LEN_FMT_STR
++		       QUOTA_NUM_LEN_FMT_STR
++		       QUOTA_NUM_LEN_FMT_STR
++		       QUOTA_TIME_LEN_FMT_STR
++		       QUOTA_TIME_LEN_FMT_STR
++		       "\n",
++		       "qid: path", 
++		       "usage", "softlimit", "hardlimit", "time", "expire");
++}
++
++/*
++ * prints proc master record id, dentry path
++ */
++static int print_proc_master_id(char * buffer, char * path_buf,
++		struct vz_quota_master * qp)
++{
++	char *path;
++	int over;
++
++	path = NULL;
++	switch (qp->dq_state) {
++		case VZDQ_WORKING:
++			if (!path_buf) {
++				path = "";
++				break;
++			}
++			path = d_path(qp->dq_root_dentry,
++				      qp->dq_root_mnt, path_buf, PAGE_SIZE);
++			if (IS_ERR(path)) {
++				path = "";
++				break;
++			}
++			/* do not print large path, truncate it */
++			over = strlen(path) -
++				(QUOTA_PROC_MAX_LINE_LEN - 3 - 3 -
++				 	QUOTA_UINT_LEN);
++			if (over > 0) {
++				path += over - 3;
++				path[0] = path[1] = path[3] = '.';
++			}
++			break;
++		case VZDQ_STARTING:
++			path = "-- started --";
++			break;
++		case VZDQ_STOPING:
++			path = "-- stopped --";
++			break;
++	}
++
++	return sprintf(buffer, "%u: %s\n", qp->dq_id, path);
++}
++
++/*
++ * prints struct vz_quota_stat data
++ */
++static int print_proc_stat(char * buffer, struct dq_stat *qs,
++		struct dq_info *qi)
++{
++	return sprintf(buffer,
++		       "%11s"
++		       QUOTA_NUM_LEN_FMT_ULL
++		       QUOTA_NUM_LEN_FMT_ULL
++		       QUOTA_NUM_LEN_FMT_ULL
++		       QUOTA_TIME_LEN_FMT_UINT
++		       QUOTA_TIME_LEN_FMT_UINT
++		       "\n"
++		       "%11s"
++		       QUOTA_NUM_LEN_FMT_UINT
++		       QUOTA_NUM_LEN_FMT_UINT
++		       QUOTA_NUM_LEN_FMT_UINT
++		       QUOTA_TIME_LEN_FMT_UINT
++		       QUOTA_TIME_LEN_FMT_UINT
++		       "\n",
++		       "1k-blocks",
++		       qs->bcurrent >> 10,
++		       qs->bsoftlimit >> 10,
++		       qs->bhardlimit >> 10,
++		       (unsigned int)qs->btime,
++		       (unsigned int)qi->bexpire,
++		       "inodes",
++		       qs->icurrent,
++		       qs->isoftlimit,
++		       qs->ihardlimit,
++		       (unsigned int)qs->itime,
++		       (unsigned int)qi->iexpire);
++}
++
++
++/*
++ * for /proc filesystem output
++ */
++static int vzquota_read_proc(char *page, char **start, off_t off, int count,
++			   int *eof, void *data)
++{
++	int len, i;
++	off_t printed = 0;
++	char *p = page;
++	struct vz_quota_master *qp;
++	struct vz_quota_ilink *ql2;
++	struct list_head *listp;
++	char *path_buf;
++
++	path_buf = (char*)__get_free_page(GFP_KERNEL);
++	if (path_buf == NULL)
++		return -ENOMEM;
++
++	len = print_proc_header(p);
++	printed += len;
++	if (off < printed) /* keep header in output */ {
++		*start = p + off;
++		p += len;
++	}
++
++	down(&vz_quota_sem);
++
++	/* traverse master hash table for all records */
++	for (i = 0; i < vzquota_hash_size; i++) {
++		list_for_each(listp, &vzquota_hash_table[i]) {
++			qp = list_entry(listp,
++					struct vz_quota_master, dq_hash);
++
++			/* Skip other VE's information if not root of VE0 */
++			if ((!capable(CAP_SYS_ADMIN) ||
++			     !capable(CAP_SYS_RESOURCE))) {
++				ql2 = INODE_QLNK(current->fs->root->d_inode);
++				if (ql2 == NULL || qp != ql2->qmblk)
++					continue;
++			}
++			/*
++			 * Now print the next record
++			 */
++			len = 0;
++			/* we print quotaid and path only in VE0 */
++			if (capable(CAP_SYS_ADMIN))
++				len += print_proc_master_id(p+len,path_buf, qp);
++			len += print_proc_stat(p+len, &qp->dq_stat,
++					&qp->dq_info);
++			printed += len;
++			/* skip unnecessary lines */
++			if (printed <= off)
++				continue;
++			p += len;
++			/* provide start offset */
++			if (*start == NULL)
++				*start = p + (off - printed);
++			/* have we printed all requested size? */
++			if (PAGE_SIZE - (p - page) < QUOTA_PROC_MAX_LINE_LEN ||
++			    (p - *start) >= count)
++				goto out;
++		}
++	}
++
++	*eof = 1; /* checked all hash */
++out:
++	up(&vz_quota_sem);
++
++	len = 0;
++	if (*start != NULL) {
++		len = (p - *start);
++		if (len > count)
++			len = count;
++	}
++
++	if (path_buf)
++		free_page((unsigned long) path_buf);
++
++	return len;
++}
++
++/*
++ * Register procfs read callback
++ */
++int vzquota_proc_init(void)
++{
++	struct proc_dir_entry *de;
++
++	de = create_proc_entry("vz/vzquota", S_IFREG|S_IRUSR, NULL);
++	if (de == NULL) {
++		/* create "vz" subdirectory, if not exist */
++		de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++		if (de == NULL)
++			goto out_err;
++		de = create_proc_entry("vzquota", S_IFREG|S_IRUSR, de);
++		if (de == NULL)
++			goto out_err;
++	}
++	de->read_proc = vzquota_read_proc;
++	de->data = NULL;
++	return 0;
++out_err:
++	return -EBUSY;
++}
++
++void vzquota_proc_release(void)
++{
++	/* Unregister procfs read callback */
++	remove_proc_entry("vz/vzquota", NULL);
++}
++
++#endif
+diff -upr linux-2.6.16.orig/fs/vzdq_ops.c linux-2.6.16-026test015/fs/vzdq_ops.c
+--- linux-2.6.16.orig/fs/vzdq_ops.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/fs/vzdq_ops.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,565 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <asm/semaphore.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/quota.h>
++#include <linux/vzquota.h>
++
++
++/* ----------------------------------------------------------------------
++ * Quota superblock operations - helper functions.
++ * --------------------------------------------------------------------- */
++
++static inline void vzquota_incr_inodes(struct dq_stat *dqstat,
++		unsigned long number)
++{
++	dqstat->icurrent += number;
++}
++
++static inline void vzquota_incr_space(struct dq_stat *dqstat,
++		__u64 number)
++{
++	dqstat->bcurrent += number;
++}
++
++static inline void vzquota_decr_inodes(struct dq_stat *dqstat,
++		unsigned long number)
++{
++	if (dqstat->icurrent > number)
++		dqstat->icurrent -= number;
++	else
++		dqstat->icurrent = 0;
++	if (dqstat->icurrent < dqstat->isoftlimit)
++		dqstat->itime = (time_t) 0;
++}
++
++static inline void vzquota_decr_space(struct dq_stat *dqstat,
++		__u64 number)
++{
++	if (dqstat->bcurrent > number)
++		dqstat->bcurrent -= number;
++	else
++		dqstat->bcurrent = 0;
++	if (dqstat->bcurrent < dqstat->bsoftlimit)
++		dqstat->btime = (time_t) 0;
++}
++
++/*
++ * better printk() message or use /proc/vzquotamsg interface
++ * similar to /proc/kmsg
++ */
++static inline void vzquota_warn(struct dq_info *dq_info, int dq_id, int flag,
++		const char *fmt)
++{
++	if (dq_info->flags & flag) /* warning already printed for this
++				       masterblock */
++		return;
++	printk(fmt, dq_id);
++	dq_info->flags |= flag;
++}
++
++/*
++ * ignore_hardlimit -
++ *
++ * Intended to allow superuser of VE0 to overwrite hardlimits.
++ *
++ * ignore_hardlimit() has a very bad feature:
++ *
++ *	writepage() operation for writable mapping of a file with holes
++ *	may trigger get_block() with wrong current and as a consequence,
++ *	opens a possibility to overcommit hardlimits
++ */
++/* for the reason above, it is disabled now */
++static inline int ignore_hardlimit(struct dq_info *dqstat)
++{
++#if 0
++	return	ve_is_super(get_exec_env()) &&
++		capable(CAP_SYS_RESOURCE) &&
++		(dqstat->options & VZ_QUOTA_OPT_RSQUASH);
++#else
++	return 0;
++#endif
++}
++
++static int vzquota_check_inodes(struct dq_info *dq_info,
++		struct dq_stat *dqstat,
++		unsigned long number, int dq_id)
++{
++	if (number == 0)
++		return QUOTA_OK;
++
++	if (dqstat->icurrent + number > dqstat->ihardlimit &&
++	    !ignore_hardlimit(dq_info)) {
++		vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
++			   "VZ QUOTA: file hardlimit reached for id=%d\n");
++		return NO_QUOTA;
++	}
++
++	if (dqstat->icurrent + number > dqstat->isoftlimit) {
++		if (dqstat->itime == (time_t)0) {
++			vzquota_warn(dq_info, dq_id, 0,
++				"VZ QUOTA: file softlimit exceeded "
++				"for id=%d\n");
++			dqstat->itime = CURRENT_TIME_SECONDS +
++				dq_info->iexpire;
++		} else if (CURRENT_TIME_SECONDS >= dqstat->itime &&
++			   !ignore_hardlimit(dq_info)) {
++			vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
++				"VZ QUOTA: file softlimit expired "
++				"for id=%d\n");
++			return NO_QUOTA;
++		}
++	}
++
++	return QUOTA_OK;
++}
++
++static int vzquota_check_space(struct dq_info *dq_info,
++		struct dq_stat *dqstat,
++		__u64 number, int dq_id, char prealloc)
++{
++	if (number == 0)
++		return QUOTA_OK;
++
++	if (dqstat->bcurrent + number > dqstat->bhardlimit &&
++	    !ignore_hardlimit(dq_info)) {
++		if (!prealloc)
++			vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
++				"VZ QUOTA: disk hardlimit reached "
++				"for id=%d\n");
++		return NO_QUOTA;
++	}
++
++	if (dqstat->bcurrent + number > dqstat->bsoftlimit) {
++		if (dqstat->btime == (time_t)0) {
++			if (!prealloc) {
++				vzquota_warn(dq_info, dq_id, 0,
++					"VZ QUOTA: disk softlimit exceeded "
++					"for id=%d\n");
++				dqstat->btime = CURRENT_TIME_SECONDS
++							+ dq_info->bexpire;
++			} else {
++				/*
++				 * Original Linux quota doesn't allow
++				 * preallocation to exceed softlimit so
++				 * exceeding will be always printed
++				 */
++				return NO_QUOTA;
++			}
++		} else if (CURRENT_TIME_SECONDS >= dqstat->btime &&
++			   !ignore_hardlimit(dq_info)) {
++			if (!prealloc)
++				vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
++					"VZ QUOTA: disk quota "
++					"softlimit expired "
++					"for id=%d\n");
++			return NO_QUOTA;
++		}
++	}
++
++	return QUOTA_OK;
++}
++
++static int vzquota_check_ugid_inodes(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid[],
++		int type, unsigned long number)
++{
++	struct dq_info *dqinfo;
++	struct dq_stat *dqstat;
++
++	if (qugid[type] == NULL)
++		return QUOTA_OK;
++	if (qugid[type] == VZ_QUOTA_UGBAD)
++		return NO_QUOTA;
++
++	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
++		return QUOTA_OK;
++	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
++		return QUOTA_OK;
++	if (number == 0)
++		return QUOTA_OK;
++
++	dqinfo = &qmblk->dq_ugid_info[type];
++	dqstat = &qugid[type]->qugid_stat;
++
++	if (dqstat->ihardlimit != 0 &&
++	    dqstat->icurrent + number > dqstat->ihardlimit)
++		return NO_QUOTA;
++
++	if (dqstat->isoftlimit != 0 &&
++	    dqstat->icurrent + number > dqstat->isoftlimit) {
++		if (dqstat->itime == (time_t)0)
++			dqstat->itime = CURRENT_TIME_SECONDS +
++				dqinfo->iexpire;
++		else if (CURRENT_TIME_SECONDS >= dqstat->itime)
++			return NO_QUOTA;
++	}
++
++	return QUOTA_OK;
++}
++
++static int vzquota_check_ugid_space(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid[],
++		int type, __u64 number, char prealloc)
++{
++	struct dq_info *dqinfo;
++	struct dq_stat *dqstat;
++	
++	if (qugid[type] == NULL)
++		return QUOTA_OK;
++	if (qugid[type] == VZ_QUOTA_UGBAD)
++		return NO_QUOTA;
++
++	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
++		return QUOTA_OK;
++	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
++		return QUOTA_OK;
++	if (number == 0)
++		return QUOTA_OK;
++
++	dqinfo = &qmblk->dq_ugid_info[type];
++	dqstat = &qugid[type]->qugid_stat;
++
++	if (dqstat->bhardlimit != 0 &&
++	    dqstat->bcurrent + number > dqstat->bhardlimit)
++		return NO_QUOTA;
++
++	if (dqstat->bsoftlimit != 0 &&
++	    dqstat->bcurrent + number > dqstat->bsoftlimit) {
++		if (dqstat->btime == (time_t)0) {
++			if (!prealloc)
++				dqstat->btime = CURRENT_TIME_SECONDS
++							+ dqinfo->bexpire;
++			else
++				/*
++				 * Original Linux quota doesn't allow
++				 * preallocation to exceed softlimit so
++				 * exceeding will be always printed
++				 */
++				return NO_QUOTA;
++		} else if (CURRENT_TIME_SECONDS >= dqstat->btime)
++			return NO_QUOTA;
++	}
++
++	return QUOTA_OK;
++}
++
++/* ----------------------------------------------------------------------
++ * Quota superblock operations
++ * --------------------------------------------------------------------- */
++
++/*
++ * S_NOQUOTA note.
++ * In the current kernel (2.6.8.1), S_NOQUOTA flag is set only for
++ *  - quota file (absent in our case)
++ *  - after explicit DQUOT_DROP (earlier than clear_inode) in functions like
++ *    filesystem-specific new_inode, before the inode gets outside links.
++ * For the latter case, the only quota operation where care about S_NOQUOTA
++ * might be required is vzquota_drop, but there S_NOQUOTA has already been
++ * checked in DQUOT_DROP().
++ * So, S_NOQUOTA may be ignored for now in the VZDQ code.
++ *
++ * The above note is not entirely correct.
++ * Both for ext2 and ext3 filesystems, DQUOT_FREE_INODE is called from
++ * delete_inode if new_inode fails (for example, because of inode quota
++ * limits), so S_NOQUOTA check is needed in free_inode.
++ * This seems to be the dark corner of the current quota API.
++ */
++
++/*
++ * Initialize quota operations for the specified inode.
++ */
++static int vzquota_initialize(struct inode *inode, int type)
++{
++	vzquota_inode_init_call(inode);
++	return 0; /* ignored by caller */
++}
++
++/*
++ * Release quota for the specified inode.
++ */
++static int vzquota_drop(struct inode *inode)
++{
++	vzquota_inode_drop_call(inode);
++	return 0; /* ignored by caller */
++}
++
++/*
++ * Allocate block callback.
++ *
++ * If (prealloc) disk quota exceeding warning is not printed.
++ * See Linux quota to know why.
++ *
++ * Return:
++ *	QUOTA_OK == 0 on SUCCESS
++ *	NO_QUOTA == 1 if allocation should fail
++ */
++static int vzquota_alloc_space(struct inode *inode,
++			     qsize_t number, int prealloc)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++	int ret = QUOTA_OK;
++
++	qmblk = vzquota_inode_data(inode, &data);
++	if (qmblk == VZ_QUOTA_BAD)
++		return NO_QUOTA;
++	if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++		int cnt;
++		struct vz_quota_ugid * qugid[MAXQUOTAS];
++#endif
++
++		/* checking first */
++		ret = vzquota_check_space(&qmblk->dq_info, &qmblk->dq_stat,
++				number, qmblk->dq_id, prealloc);
++		if (ret == NO_QUOTA)
++			goto no_quota;
++#ifdef CONFIG_VZ_QUOTA_UGID
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
++			ret = vzquota_check_ugid_space(qmblk, qugid,
++					cnt, number, prealloc);
++			if (ret == NO_QUOTA)
++				goto no_quota;
++		}
++		/* check ok, may increment */
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			if (qugid[cnt] == NULL)
++				continue;
++			vzquota_incr_space(&qugid[cnt]->qugid_stat, number);
++		}
++#endif
++		vzquota_incr_space(&qmblk->dq_stat, number);
++		vzquota_data_unlock(inode, &data);
++	}
++
++	inode_add_bytes(inode, number);
++	might_sleep();
++	return QUOTA_OK;
++
++no_quota:
++	vzquota_data_unlock(inode, &data);
++	return NO_QUOTA;
++}
++
++/*
++ * Allocate inodes callback.
++ *
++ * Return:
++ *	QUOTA_OK == 0 on SUCCESS
++ *	NO_QUOTA == 1 if allocation should fail
++ */
++static int vzquota_alloc_inode(const struct inode *inode, unsigned long number)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++	int ret = QUOTA_OK;
++
++	qmblk = vzquota_inode_data((struct inode *)inode, &data);
++	if (qmblk == VZ_QUOTA_BAD)
++		return NO_QUOTA;
++	if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++		int cnt;
++		struct vz_quota_ugid *qugid[MAXQUOTAS];
++#endif
++
++		/* checking first */
++		ret = vzquota_check_inodes(&qmblk->dq_info, &qmblk->dq_stat,
++				number, qmblk->dq_id);
++		if (ret == NO_QUOTA)
++			goto no_quota;
++#ifdef CONFIG_VZ_QUOTA_UGID
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
++			ret = vzquota_check_ugid_inodes(qmblk, qugid,
++					cnt, number);
++			if (ret == NO_QUOTA)
++				goto no_quota;
++		}
++		/* check ok, may increment */
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			if (qugid[cnt] == NULL)
++				continue;
++			vzquota_incr_inodes(&qugid[cnt]->qugid_stat, number);
++		}
++#endif
++		vzquota_incr_inodes(&qmblk->dq_stat, number);
++		vzquota_data_unlock((struct inode *)inode, &data);
++	}
++
++	might_sleep();
++	return QUOTA_OK;
++
++no_quota:
++	vzquota_data_unlock((struct inode *)inode, &data);
++	return NO_QUOTA;
++}
++
++/*
++ * Free space callback.
++ */
++static int vzquota_free_space(struct inode *inode, qsize_t number)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++
++	qmblk = vzquota_inode_data(inode, &data);
++	if (qmblk == VZ_QUOTA_BAD)
++		return NO_QUOTA; /* isn't checked by the caller */
++	if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++		int cnt;
++		struct vz_quota_ugid * qugid;
++#endif
++
++		vzquota_decr_space(&qmblk->dq_stat, number);
++#ifdef CONFIG_VZ_QUOTA_UGID
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			qugid = INODE_QLNK(inode)->qugid[cnt];
++			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
++				continue;
++			vzquota_decr_space(&qugid->qugid_stat, number);
++		}
++#endif
++		vzquota_data_unlock(inode, &data);
++	}
++	inode_sub_bytes(inode, number);
++	might_sleep();
++	return QUOTA_OK;
++}
++
++/*
++ * Free inodes callback.
++ */
++static int vzquota_free_inode(const struct inode *inode, unsigned long number)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++
++	if (IS_NOQUOTA(inode))
++		return QUOTA_OK;
++
++	qmblk = vzquota_inode_data((struct inode *)inode, &data);
++	if (qmblk == VZ_QUOTA_BAD)
++		return NO_QUOTA;
++	if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++		int cnt;
++		struct vz_quota_ugid * qugid;
++#endif
++
++		vzquota_decr_inodes(&qmblk->dq_stat, number);
++#ifdef CONFIG_VZ_QUOTA_UGID
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			qugid = INODE_QLNK(inode)->qugid[cnt];
++			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
++				continue;
++			vzquota_decr_inodes(&qugid->qugid_stat, number);
++		}
++#endif
++		vzquota_data_unlock((struct inode *)inode, &data);
++	}
++	might_sleep();
++	return QUOTA_OK;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++
++/*
++ * helper function for quota_transfer
++ * check that we can add inode to this quota_id
++ */
++static int vzquota_transfer_check(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid[],
++		unsigned int type, __u64 size)
++{
++	if (vzquota_check_ugid_space(qmblk, qugid, type, size, 0) != QUOTA_OK ||
++	    vzquota_check_ugid_inodes(qmblk, qugid, type, 1) != QUOTA_OK)
++		return -1;
++	return 0;
++}
++
++int vzquota_transfer_usage(struct inode *inode,
++		int mask,
++		struct vz_quota_ilink *qlnk)
++{
++	struct vz_quota_ugid *qugid_old;
++	__u64 space;
++	int i;
++
++	space = inode_get_bytes(inode);
++	for (i = 0; i < MAXQUOTAS; i++) {
++		if (!(mask & (1 << i)))
++			continue;
++		if (vzquota_transfer_check(qlnk->qmblk, qlnk->qugid, i, space))
++			return -1;
++	}
++
++	for (i = 0; i < MAXQUOTAS; i++) {
++		if (!(mask & (1 << i)))
++			continue;
++		qugid_old = INODE_QLNK(inode)->qugid[i];
++		vzquota_decr_space(&qugid_old->qugid_stat, space);
++		vzquota_decr_inodes(&qugid_old->qugid_stat, 1);
++		vzquota_incr_space(&qlnk->qugid[i]->qugid_stat, space);
++		vzquota_incr_inodes(&qlnk->qugid[i]->qugid_stat, 1);
++	}
++	return 0;
++}
++
++/*
++ * Transfer the inode between diffent user/group quotas.
++ */
++static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
++{
++	return vzquota_inode_transfer_call(inode, iattr) ?
++		NO_QUOTA : QUOTA_OK;
++}
++
++#else /* CONFIG_VZ_QUOTA_UGID */
++
++static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
++{
++	return QUOTA_OK;
++}
++
++#endif
++
++/*
++ * Called under following semaphores:
++ *	old_d->d_inode->i_sb->s_vfs_rename_sem
++ *	old_d->d_inode->i_sem
++ *	new_d->d_inode->i_sem
++ * [not verified  --SAW]
++ */
++static int vzquota_rename(struct inode *inode,
++		struct inode *old_dir, struct inode *new_dir)
++{
++	return vzquota_rename_check(inode, old_dir, new_dir) ?
++		NO_QUOTA : QUOTA_OK;
++}
++
++/*
++ * Structure of superblock diskquota operations.
++ */
++struct dquot_operations vz_quota_operations = {
++	initialize:	vzquota_initialize,
++	drop:		vzquota_drop,
++	alloc_space:	vzquota_alloc_space,
++	alloc_inode:	vzquota_alloc_inode,
++	free_space:	vzquota_free_space,
++	free_inode:	vzquota_free_inode,
++	transfer:	vzquota_transfer,
++	rename:		vzquota_rename
++};
+diff -upr linux-2.6.16.orig/fs/vzdq_tree.c linux-2.6.16-026test015/fs/vzdq_tree.c
+--- linux-2.6.16.orig/fs/vzdq_tree.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/fs/vzdq_tree.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,286 @@
++/*
++ *
++ * Copyright (C) 2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo quota tree implementation
++ */
++
++#include <linux/errno.h>
++#include <linux/slab.h>
++#include <linux/vzdq_tree.h>
++
++struct quotatree_tree *quotatree_alloc(void)
++{
++	int l;
++	struct quotatree_tree *tree;
++
++	tree = kmalloc(sizeof(struct quotatree_tree), GFP_KERNEL);
++	if (tree == NULL)
++		goto out;
++
++	for (l = 0; l < QUOTATREE_DEPTH; l++) {
++		INIT_LIST_HEAD(&tree->levels[l].usedlh);
++		INIT_LIST_HEAD(&tree->levels[l].freelh);
++		tree->levels[l].freenum = 0;
++	}
++	tree->root = NULL;
++	tree->leaf_num = 0;
++out:
++	return tree;
++}
++
++static struct quotatree_node *
++quotatree_follow(struct quotatree_tree *tree, quotaid_t id, int level,
++		struct quotatree_find_state *st)
++{
++	void **block;
++	struct quotatree_node *parent;
++	int l, index;
++
++	parent = NULL;
++	block = (void **)&tree->root;
++	l = 0;
++	while (l < level && *block != NULL) {
++		index = (id >>  QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
++		parent = *block;
++		block = parent->blocks + index;
++		l++;
++	}
++	if (st != NULL) {
++		st->block = block;
++		st->level = l;
++	}
++
++	return parent;
++}
++
++void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
++		struct quotatree_find_state *st)
++{
++	quotatree_follow(tree, id, QUOTATREE_DEPTH, st);
++	if (st->level == QUOTATREE_DEPTH)
++		return *st->block;
++	else
++		return NULL;
++}
++
++void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index)
++{
++	int i, count;
++	struct quotatree_node *p;
++	void *leaf;
++
++	if (QTREE_LEAFNUM(tree) <= index)
++		return NULL;
++
++	count = 0;
++	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
++		for (i = 0; i < QUOTATREE_BSIZE; i++) {	
++			leaf = p->blocks[i];
++			if (leaf == NULL)
++				continue;
++			if (count == index)
++				return leaf;
++			count++;
++		}
++	}
++	return NULL;
++}
++
++/* returns data leaf (vz_quota_ugid) after _existent_ ugid (@id)
++ * in the tree... */
++void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id)
++{
++	int off;
++	struct quotatree_node *parent, *p;
++	struct list_head *lh;
++
++	/* get parent refering correct quota tree node of the last level */
++	parent = quotatree_follow(tree, id, QUOTATREE_DEPTH, NULL);
++	if (!parent)
++		return NULL;
++
++	off = (id & QUOTATREE_BMASK) + 1;	/* next ugid */
++	lh = &parent->list;
++	do {
++		p = list_entry(lh, struct quotatree_node, list);
++		for ( ; off < QUOTATREE_BSIZE; off++)
++			if (p->blocks[off])
++				return p->blocks[off];
++		off = 0;
++		lh = lh->next;
++	} while (lh != &QTREE_LEAFLVL(tree)->usedlh);
++
++	return NULL;
++}
++
++int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
++		struct quotatree_find_state *st, void *data)
++{
++	struct quotatree_node *p;
++	int l, index;
++
++	while (st->level < QUOTATREE_DEPTH) {
++		l = st->level;
++		if (!list_empty(&tree->levels[l].freelh)) {
++			p = list_entry(tree->levels[l].freelh.next,
++					struct quotatree_node, list);
++			list_del(&p->list);
++		} else {
++			p = kmalloc(sizeof(struct quotatree_node), GFP_NOFS | __GFP_NOFAIL);
++			if (p == NULL)
++				return -ENOMEM;
++			/* save block number in the l-level
++			 * it uses for quota file generation */
++			p->num = tree->levels[l].freenum++;
++		}
++		list_add(&p->list, &tree->levels[l].usedlh);
++		memset(p->blocks, 0, sizeof(p->blocks));
++		*st->block = p;
++
++		index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
++		st->block = p->blocks + index;
++		st->level++;
++	}
++	tree->leaf_num++;
++	*st->block = data;
++
++	return 0;
++}
++
++static struct quotatree_node *
++quotatree_remove_ptr(struct quotatree_tree *tree, quotaid_t id,
++		int level)
++{
++	struct quotatree_node *parent;
++	struct quotatree_find_state st;
++
++	parent = quotatree_follow(tree, id, level, &st);
++	if (st.level == QUOTATREE_DEPTH)
++		tree->leaf_num--;
++	*st.block = NULL;
++	return parent;
++}
++
++void quotatree_remove(struct quotatree_tree *tree, quotaid_t id)
++{
++	struct quotatree_node *p;
++	int level, i;
++
++	p = quotatree_remove_ptr(tree, id, QUOTATREE_DEPTH);
++	for (level = QUOTATREE_DEPTH - 1; level >= QUOTATREE_CDEPTH; level--) {
++		for (i = 0; i < QUOTATREE_BSIZE; i++)
++			if (p->blocks[i] != NULL)
++				return;
++		list_move(&p->list, &tree->levels[level].freelh);
++		p = quotatree_remove_ptr(tree, id, level);
++	}
++}
++
++#if 0
++static void quotatree_walk(struct quotatree_tree *tree,
++		struct quotatree_node *node_start,
++		quotaid_t id_start,
++		int level_start, int level_end,
++		int (*callback)(struct quotatree_tree *,
++				quotaid_t id,
++				int level,
++				void *ptr,
++				void *data),
++		void *data)
++{
++	struct quotatree_node *p;
++	int l, shift, index;
++	quotaid_t id;
++	struct quotatree_find_state st;
++
++	p = node_start;
++	l = level_start;
++	shift = (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
++	id = id_start;
++	index = 0;
++
++	/*
++	 * Invariants:
++	 * shift == (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
++	 * id & ((1 << shift) - 1) == 0
++	 * p is l-level node corresponding to id
++	 */
++	do {
++		if (!p)
++			break;
++
++		if (l < level_end) {
++			for (; index < QUOTATREE_BSIZE; index++)
++				if (p->blocks[index] != NULL)
++					break;
++			if (index < QUOTATREE_BSIZE) {
++				/* descend */
++				p = p->blocks[index];
++				l++;
++				shift -= QUOTAID_BBITS;
++				id += (quotaid_t)index << shift;
++				index = 0;
++				continue;
++			}
++		}
++
++		if ((*callback)(tree, id, l, p, data))
++			break;
++
++		/* ascend and to the next node */
++		p = quotatree_follow(tree, id, l, &st);
++
++		index = ((id >> shift) & QUOTATREE_BMASK) + 1;
++		l--;
++		shift += QUOTAID_BBITS;
++		id &= ~(((quotaid_t)1 << shift) - 1);
++	} while (l >= level_start);
++}
++#endif
++
++static void free_list(struct list_head *node_list)
++{
++	struct quotatree_node *p, *tmp;
++
++	list_for_each_entry_safe(p, tmp, node_list, list) {
++		list_del(&p->list);
++		kfree(p);
++	}
++}
++
++static inline void quotatree_free_nodes(struct quotatree_tree *tree)
++{
++	int i;
++
++	for (i = 0; i < QUOTATREE_DEPTH; i++) {
++		free_list(&tree->levels[i].usedlh);
++		free_list(&tree->levels[i].freelh);
++	}
++}
++
++static void quotatree_free_leafs(struct quotatree_tree *tree,
++		void (*dtor)(void *))
++{
++	int i;
++	struct quotatree_node *p;
++
++	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
++		for (i = 0; i < QUOTATREE_BSIZE; i++) {
++			if (p->blocks[i] == NULL)
++				continue;
++
++			dtor(p->blocks[i]);
++		}
++	}
++}
++
++void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *))
++{
++	quotatree_free_leafs(tree, dtor);
++	quotatree_free_nodes(tree);
++	kfree(tree);
++}
+diff -upr linux-2.6.16.orig/fs/vzdq_ugid.c linux-2.6.16-026test015/fs/vzdq_ugid.c
+--- linux-2.6.16.orig/fs/vzdq_ugid.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/fs/vzdq_ugid.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,1130 @@
++/*
++ * Copyright (C) 2002 SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo UID/GID disk quota implementation
++ */
++
++#include <linux/config.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/smp_lock.h>
++#include <linux/rcupdate.h>
++#include <asm/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/quota.h>
++#include <linux/quotaio_v2.h>
++#include <linux/virtinfo.h>
++
++#include <linux/vzctl.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++
++/*
++ * XXX
++ * may be something is needed for sb->s_dquot->info[]?
++ */
++
++#define USRQUOTA_MASK		(1 << USRQUOTA)
++#define GRPQUOTA_MASK		(1 << GRPQUOTA)
++#define QTYPE2MASK(type)	(1 << (type))
++
++static kmem_cache_t *vz_quota_ugid_cachep;
++
++/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
++ * list on the hash table */
++extern struct semaphore vz_quota_sem;
++
++inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
++{
++	if (qugid != VZ_QUOTA_UGBAD)
++		atomic_inc(&qugid->qugid_count);
++	return qugid;
++}
++
++/* we don't limit users with zero limits */
++static inline int vzquota_fake_stat(struct dq_stat *stat)
++{
++	return stat->bhardlimit == 0 && stat->bsoftlimit == 0 &&
++		stat->ihardlimit == 0 && stat->isoftlimit == 0;
++}
++
++/* callback function for quotatree_free() */
++static inline void vzquota_free_qugid(void *ptr)
++{
++	kmem_cache_free(vz_quota_ugid_cachep, ptr);
++}
++
++/*
++ * destroy ugid, if it have zero refcount, limits and usage
++ * must be called under qmblk->dq_sem
++ */
++void vzquota_put_ugid(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid)
++{
++	if (qugid == VZ_QUOTA_UGBAD)
++		return;
++	qmblk_data_read_lock(qmblk);
++	if (atomic_dec_and_test(&qugid->qugid_count) &&
++	    (qmblk->dq_flags & VZDQUG_FIXED_SET) == 0 &&
++	    vzquota_fake_stat(&qugid->qugid_stat) &&
++	    qugid->qugid_stat.bcurrent == 0 &&
++	    qugid->qugid_stat.icurrent == 0) {
++		quotatree_remove(QUGID_TREE(qmblk, qugid->qugid_type),
++				qugid->qugid_id);
++		qmblk->dq_ugid_count--;
++		vzquota_free_qugid(qugid);
++	}
++	qmblk_data_read_unlock(qmblk);
++}
++
++/*
++ * Get ugid block by its index, like it would present in array.
++ * In reality, this is not array - this is leafs chain of the tree.
++ * NULL if index is out of range.
++ * qmblk semaphore is required to protect the tree.
++ */
++static inline struct vz_quota_ugid *
++vzquota_get_byindex(struct vz_quota_master *qmblk, unsigned int index, int type)
++{
++	return quotatree_leaf_byindex(QUGID_TREE(qmblk, type), index);
++}
++
++/*
++ * get next element from ugid "virtual array"
++ * ugid must be in current array and this array may not be changed between
++ * two accesses (quaranteed by "stopped" quota state and quota semaphore)
++ * qmblk semaphore is required to protect the tree
++ */
++static inline struct vz_quota_ugid *
++vzquota_get_next(struct vz_quota_master *qmblk, struct vz_quota_ugid *qugid)
++{
++	return quotatree_get_next(QUGID_TREE(qmblk, qugid->qugid_type),
++			qugid->qugid_id);
++}
++
++/*
++ * requires dq_sem
++ */
++struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
++			unsigned int quota_id, int type, int flags)
++{
++	struct vz_quota_ugid *qugid;
++	struct quotatree_tree *tree;
++	struct quotatree_find_state st;
++
++	tree = QUGID_TREE(qmblk, type);
++	qugid = quotatree_find(tree, quota_id, &st);
++	if (qugid)
++		goto success;
++
++	/* caller does not want alloc */
++	if (flags & VZDQUG_FIND_DONT_ALLOC)
++		goto fail;
++
++	if (flags & VZDQUG_FIND_FAKE)
++		goto doit;
++
++	/* check limit */
++	if (qmblk->dq_ugid_count >= qmblk->dq_ugid_max)
++		goto fail;
++
++	/* see comment at VZDQUG_FIXED_SET define */
++	if (qmblk->dq_flags & VZDQUG_FIXED_SET)
++		goto fail;
++
++doit:
++	/* alloc new structure */
++	qugid = kmem_cache_alloc(vz_quota_ugid_cachep,
++			SLAB_NOFS | __GFP_NOFAIL);
++	if (qugid == NULL)
++		goto fail;
++
++	/* initialize new structure */
++	qugid->qugid_id = quota_id;
++	memset(&qugid->qugid_stat, 0, sizeof(qugid->qugid_stat));
++	qugid->qugid_type = type;
++	atomic_set(&qugid->qugid_count, 0);
++
++	/* insert in tree */
++	if (quotatree_insert(tree, quota_id, &st, qugid) < 0)
++		goto fail_insert;
++	qmblk->dq_ugid_count++;
++
++success:
++	vzquota_get_ugid(qugid);
++	return qugid;
++
++fail_insert:
++	vzquota_free_qugid(qugid);
++fail:
++	return VZ_QUOTA_UGBAD;
++}
++
++/*
++ * takes dq_sem, may schedule
++ */
++struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
++			unsigned int quota_id, int type, int flags)
++{
++	struct vz_quota_ugid *qugid;
++
++	down(&qmblk->dq_sem);
++	qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
++	up(&qmblk->dq_sem);
++
++	return qugid;
++}
++
++/*
++ * destroy all ugid records on given quota master
++ */
++void vzquota_kill_ugid(struct vz_quota_master *qmblk)
++{
++	BUG_ON((qmblk->dq_gid_tree == NULL && qmblk->dq_uid_tree != NULL) ||
++		(qmblk->dq_uid_tree == NULL && qmblk->dq_gid_tree != NULL));
++
++	if (qmblk->dq_uid_tree != NULL) {
++		quotatree_free(qmblk->dq_uid_tree, vzquota_free_qugid);
++		quotatree_free(qmblk->dq_gid_tree, vzquota_free_qugid);
++	}
++}
++
++
++/* ----------------------------------------------------------------------
++ * Management interface to ugid quota for (super)users.
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_find_qmblk - helper to emulate quota on virtual filesystems
++ *
++ * This function finds a quota master block corresponding to the root of
++ * a virtual filesystem.
++ * Returns a quota master block with reference taken, or %NULL if not under
++ * quota, or %VZ_QUOTA_BAD if quota inconsistency is found (and all allocation
++ * operations will fail).
++ *
++ * Note: this function uses vzquota_inode_qmblk().
++ * The latter is a rather confusing function: it returns qmblk that used to be
++ * on the inode some time ago (without guarantee that it still has any
++ * relations to the inode).  So, vzquota_find_qmblk() leaves it up to the
++ * caller to think whether the inode could have changed its qmblk and what to
++ * do in that case.
++ * Currently, the callers appear to not care :(
++ */
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *sb)
++{
++	struct inode *qrinode;
++	struct vz_quota_master *qmblk;
++
++	qmblk = NULL;
++	qrinode = NULL;
++	if (sb->s_op->get_quota_root != NULL)
++		qrinode = sb->s_op->get_quota_root(sb);
++	if (qrinode != NULL)
++		qmblk = vzquota_inode_qmblk(qrinode);
++	return qmblk;
++}
++
++static int vzquota_initialize2(struct inode *inode, int type)
++{
++	return QUOTA_OK;
++}
++
++static int vzquota_drop2(struct inode *inode)
++{
++	return QUOTA_OK;
++}
++
++static int vzquota_alloc_space2(struct inode *inode,
++			     qsize_t number, int prealloc)
++{
++	inode_add_bytes(inode, number);
++	return QUOTA_OK;
++}
++
++static int vzquota_alloc_inode2(const struct inode *inode, unsigned long number)
++{
++	return QUOTA_OK;
++}
++
++static int vzquota_free_space2(struct inode *inode, qsize_t number)
++{
++	inode_sub_bytes(inode, number);
++	return QUOTA_OK;
++}
++
++static int vzquota_free_inode2(const struct inode *inode, unsigned long number)
++{
++	return QUOTA_OK;
++}
++
++static int vzquota_transfer2(struct inode *inode, struct iattr *iattr)
++{
++	return QUOTA_OK;
++}
++
++struct dquot_operations vz_quota_operations2 = {
++	initialize:	vzquota_initialize2,
++	drop:		vzquota_drop2,
++	alloc_space:	vzquota_alloc_space2,
++	alloc_inode:	vzquota_alloc_inode2,
++	free_space:	vzquota_free_space2,
++	free_inode:	vzquota_free_inode2,
++	transfer:	vzquota_transfer2
++};
++
++static int vz_quota_on(struct super_block *sb, int type,
++		int format_id, char *path)
++{
++	struct vz_quota_master *qmblk;
++	int mask, mask2;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	mask = 0;
++	mask2 = 0;
++	sb->dq_op = &vz_quota_operations2;
++	sb->s_qcop = &vz_quotactl_operations;
++	if (type == USRQUOTA) {
++		mask = DQUOT_USR_ENABLED;
++		mask2 = VZDQ_USRQUOTA;
++	}
++	if (type == GRPQUOTA) {
++		mask = DQUOT_GRP_ENABLED;
++		mask2 = VZDQ_GRPQUOTA;
++	}
++	err = -EBUSY;
++	if (qmblk->dq_flags & mask2)
++		goto out;
++
++	err = 0;
++	qmblk->dq_flags |= mask2;
++	sb->s_dquot.flags |= mask;
++
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++static int vz_quota_off(struct super_block *sb, int type)
++{
++	struct vz_quota_master *qmblk;
++	int mask2;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	mask2 = 0;
++	if (type == USRQUOTA)
++		mask2 = VZDQ_USRQUOTA;
++	if (type == GRPQUOTA)
++		mask2 = VZDQ_GRPQUOTA;
++	err = -EINVAL;
++	if (!(qmblk->dq_flags & mask2))
++		goto out;
++
++	qmblk->dq_flags &= ~mask2;
++	err = 0;
++
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++static int vz_quota_sync(struct super_block *sb, int type)
++{
++	return 0;	/* vz quota is always uptodate */
++}
++
++static int vz_get_dqblk(struct super_block *sb, int type,
++		qid_t id, struct if_dqblk *di)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid *ugid;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	err = 0;
++	ugid = vzquota_find_ugid(qmblk, id, type, VZDQUG_FIND_DONT_ALLOC);
++	if (ugid != VZ_QUOTA_UGBAD) {
++		qmblk_data_read_lock(qmblk);
++		di->dqb_bhardlimit = ugid->qugid_stat.bhardlimit >> 10;
++		di->dqb_bsoftlimit = ugid->qugid_stat.bsoftlimit >> 10;
++		di->dqb_curspace = ugid->qugid_stat.bcurrent;
++		di->dqb_ihardlimit = ugid->qugid_stat.ihardlimit;
++		di->dqb_isoftlimit = ugid->qugid_stat.isoftlimit;
++		di->dqb_curinodes = ugid->qugid_stat.icurrent;
++		di->dqb_btime = ugid->qugid_stat.btime;
++		di->dqb_itime = ugid->qugid_stat.itime;
++		qmblk_data_read_unlock(qmblk);
++		di->dqb_valid = QIF_ALL;
++		vzquota_put_ugid(qmblk, ugid);
++	} else {
++		memset(di, 0, sizeof(*di));
++		di->dqb_valid = QIF_ALL;
++	}
++
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++/* must be called under vz_quota_sem */
++static int __vz_set_dqblk(struct vz_quota_master *qmblk,
++		int type, qid_t id, struct if_dqblk *di)
++{
++	struct vz_quota_ugid *ugid;
++
++	ugid = vzquota_find_ugid(qmblk, id, type, 0);
++	if (ugid == VZ_QUOTA_UGBAD)
++		return -ESRCH;
++
++	qmblk_data_write_lock(qmblk);
++	/*
++	 * Subtle compatibility breakage.
++	 *
++	 * Some old non-vz kernel quota didn't start grace period
++	 * if the new soft limit happens to be below the usage.
++	 * Non-vz kernel quota in 2.4.20 starts the grace period
++	 * (if it hasn't been started).
++	 * Current non-vz kernel performs even more complicated
++	 * manipulations...
++	 *
++	 * Also, current non-vz kernels have inconsistency related to 
++	 * the grace time start.  In regular operations the grace period
++	 * is started if the usage is greater than the soft limit (and,
++	 * strangely, is cancelled if the usage is less).
++	 * However, set_dqblk starts the grace period if the usage is greater
++	 * or equal to the soft limit.
++	 *
++	 * Here we try to mimic the behavior of the current non-vz kernel.
++	 */
++	if (di->dqb_valid & QIF_BLIMITS) {
++		ugid->qugid_stat.bhardlimit =
++			(__u64)di->dqb_bhardlimit << 10;
++		ugid->qugid_stat.bsoftlimit =
++			(__u64)di->dqb_bsoftlimit << 10;
++		if (di->dqb_bsoftlimit == 0 ||
++		    ugid->qugid_stat.bcurrent < ugid->qugid_stat.bsoftlimit)
++			ugid->qugid_stat.btime = 0;
++		else if (!(di->dqb_valid & QIF_BTIME))
++			ugid->qugid_stat.btime = CURRENT_TIME_SECONDS
++				+ qmblk->dq_ugid_info[type].bexpire;
++		else
++			ugid->qugid_stat.btime = di->dqb_btime;
++	}
++	if (di->dqb_valid & QIF_ILIMITS) {
++		ugid->qugid_stat.ihardlimit = di->dqb_ihardlimit;
++		ugid->qugid_stat.isoftlimit = di->dqb_isoftlimit;
++		if (di->dqb_isoftlimit == 0 ||
++		    ugid->qugid_stat.icurrent < ugid->qugid_stat.isoftlimit)
++			ugid->qugid_stat.itime = 0;
++		else if (!(di->dqb_valid & QIF_ITIME))
++			ugid->qugid_stat.itime = CURRENT_TIME_SECONDS
++				+ qmblk->dq_ugid_info[type].iexpire;
++		else
++			ugid->qugid_stat.itime = di->dqb_itime;
++	}
++	qmblk_data_write_unlock(qmblk);
++	vzquota_put_ugid(qmblk, ugid);
++
++	return 0;
++}
++
++static int vz_set_dqblk(struct super_block *sb, int type,
++		qid_t id, struct if_dqblk *di)
++{
++	struct vz_quota_master *qmblk;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++	err = __vz_set_dqblk(qmblk, type, id, di);
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++static int vz_get_dqinfo(struct super_block *sb, int type,
++		struct if_dqinfo *ii)
++{
++	struct vz_quota_master *qmblk;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	err = 0;
++	ii->dqi_bgrace = qmblk->dq_ugid_info[type].bexpire;
++	ii->dqi_igrace = qmblk->dq_ugid_info[type].iexpire;
++	ii->dqi_flags = 0;
++	ii->dqi_valid = IIF_ALL;
++
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++/* must be called under vz_quota_sem */
++static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
++		int type, struct if_dqinfo *ii)
++{
++	if (ii->dqi_valid & IIF_FLAGS)
++		if (ii->dqi_flags & DQF_MASK)
++			return -EINVAL;
++
++	if (ii->dqi_valid & IIF_BGRACE)
++		qmblk->dq_ugid_info[type].bexpire = ii->dqi_bgrace;
++	if (ii->dqi_valid & IIF_IGRACE)
++		qmblk->dq_ugid_info[type].iexpire = ii->dqi_igrace;
++	return 0;
++}
++
++static int vz_set_dqinfo(struct super_block *sb, int type,
++		struct if_dqinfo *ii)
++{
++	struct vz_quota_master *qmblk;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++	err = __vz_set_dqinfo(qmblk, type, ii);
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++#ifdef CONFIG_QUOTA_COMPAT
++
++#define Q_GETQUOTI_SIZE 1024
++
++#define UGID2DQBLK(dst, src)						\
++	do {								\
++		(dst)->dqb_ihardlimit = (src)->qugid_stat.ihardlimit;	\
++		(dst)->dqb_isoftlimit = (src)->qugid_stat.isoftlimit;	\
++		(dst)->dqb_curinodes = (src)->qugid_stat.icurrent;	\
++		/* in 1K blocks */					\
++		(dst)->dqb_bhardlimit = (src)->qugid_stat.bhardlimit >> 10; \
++		/* in 1K blocks */					\
++		(dst)->dqb_bsoftlimit = (src)->qugid_stat.bsoftlimit >> 10; \
++		/* in bytes, 64 bit */					\
++		(dst)->dqb_curspace = (src)->qugid_stat.bcurrent;	\
++		(dst)->dqb_btime = (src)->qugid_stat.btime;		\
++		(dst)->dqb_itime = (src)->qugid_stat.itime;		\
++	} while (0)
++
++static int vz_get_quoti(struct super_block *sb, int type, qid_t idx,
++		struct v2_disk_dqblk *dqblk)
++{
++	struct vz_quota_master *qmblk;
++	struct v2_disk_dqblk *data, *kbuf;
++	struct vz_quota_ugid *ugid;
++	int count;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	err = -ENOMEM;
++	kbuf = vmalloc(Q_GETQUOTI_SIZE * sizeof(*kbuf));
++	if (!kbuf)
++		goto out;
++
++	down(&vz_quota_sem);
++	down(&qmblk->dq_sem);
++	for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
++		ugid != NULL && count < Q_GETQUOTI_SIZE;
++		count++)
++	{
++		data = kbuf + count;
++		qmblk_data_read_lock(qmblk);
++		UGID2DQBLK(data, ugid);
++		qmblk_data_read_unlock(qmblk);
++		data->dqb_id = ugid->qugid_id;
++
++		/* Find next entry */
++		ugid = vzquota_get_next(qmblk, ugid);
++		BUG_ON(ugid != NULL && ugid->qugid_type != type);
++	}
++	up(&qmblk->dq_sem);
++	up(&vz_quota_sem);
++
++	err = count;
++	if (copy_to_user(dqblk, kbuf, count * sizeof(*kbuf)))
++		err = -EFAULT;
++
++	vfree(kbuf);
++out:
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++
++	return err;
++}
++
++#endif
++
++struct quotactl_ops vz_quotactl_operations = {
++	quota_on:	vz_quota_on,
++	quota_off:	vz_quota_off,
++	quota_sync:	vz_quota_sync,
++	get_info:	vz_get_dqinfo,
++	set_info:	vz_set_dqinfo,
++	get_dqblk:	vz_get_dqblk,
++	set_dqblk:	vz_set_dqblk,
++#ifdef CONFIG_QUOTA_COMPAT
++	get_quoti:	vz_get_quoti
++#endif
++};
++
++
++/* ----------------------------------------------------------------------
++ * Management interface for host system admins.
++ * --------------------------------------------------------------------- */
++
++static int quota_ugid_addstat(unsigned int quota_id, unsigned int ugid_size,
++		struct vz_quota_iface *u_ugid_buf)
++{
++	struct vz_quota_master *qmblk;
++	int ret;
++
++	down(&vz_quota_sem);
++
++	ret = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	ret = -EBUSY;
++	if (qmblk->dq_state != VZDQ_STARTING)
++		goto out; /* working quota doesn't accept new ugids */
++
++	ret = 0;
++	/* start to add ugids */
++	for (ret = 0; ret < ugid_size; ret++) {
++		struct vz_quota_iface ugid_buf;
++		struct vz_quota_ugid *ugid;
++
++		if (copy_from_user(&ugid_buf, u_ugid_buf, sizeof(ugid_buf)))
++			break;
++
++		if (ugid_buf.qi_type >= MAXQUOTAS)
++			break; /* bad quota type - this is the only check */
++
++		ugid = vzquota_find_ugid(qmblk,
++				ugid_buf.qi_id, ugid_buf.qi_type, 0);
++		if (ugid == VZ_QUOTA_UGBAD) {
++			qmblk->dq_flags |= VZDQUG_FIXED_SET;
++			break; /* limit reached */
++		}
++
++		/* update usage/limits 
++		 * we can copy the data without the lock, because the data
++		 * cannot be modified in VZDQ_STARTING state */
++		ugid->qugid_stat = ugid_buf.qi_stat;
++
++		vzquota_put_ugid(qmblk, ugid);
++
++		u_ugid_buf++; /* next user buffer */
++	}
++out:
++	up(&vz_quota_sem);
++
++	return ret;
++}
++
++static int quota_ugid_setgrace(unsigned int quota_id,
++		struct dq_info u_dq_info[])
++{
++	struct vz_quota_master *qmblk;
++	struct dq_info dq_info[MAXQUOTAS];
++	struct dq_info *target;
++	int err, type;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++	
++	err = -EBUSY;
++	if (qmblk->dq_state != VZDQ_STARTING)
++		goto out; /* working quota doesn't accept changing options */
++
++	err = -EFAULT;
++	if (copy_from_user(dq_info, u_dq_info, sizeof(dq_info)))
++		goto out;
++
++	err = 0;
++
++	/* update in qmblk */
++	for (type = 0; type < MAXQUOTAS; type ++) {
++		target = &qmblk->dq_ugid_info[type];
++		target->bexpire = dq_info[type].bexpire;
++		target->iexpire = dq_info[type].iexpire;
++	}
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int do_quota_ugid_getstat(struct vz_quota_master *qmblk, int index, int size,
++		struct vz_quota_iface *u_ugid_buf)
++{
++	int type, count;
++	struct vz_quota_ugid *ugid;
++
++	if (QTREE_LEAFNUM(qmblk->dq_uid_tree) +
++	    QTREE_LEAFNUM(qmblk->dq_gid_tree)
++	    		<= index)
++		return 0;
++
++	count = 0;
++
++	type = index < QTREE_LEAFNUM(qmblk->dq_uid_tree) ? USRQUOTA : GRPQUOTA;
++	if (type == GRPQUOTA)
++		index -= QTREE_LEAFNUM(qmblk->dq_uid_tree);
++
++	/* loop through ugid and then qgid quota */
++repeat:
++	for (ugid = vzquota_get_byindex(qmblk, index, type);
++		ugid != NULL && count < size;
++		ugid = vzquota_get_next(qmblk, ugid), count++)
++	{
++		struct vz_quota_iface ugid_buf;
++
++		/* form interface buffer and send in to user-level */
++		qmblk_data_read_lock(qmblk);
++		memcpy(&ugid_buf.qi_stat, &ugid->qugid_stat,
++				sizeof(ugid_buf.qi_stat));
++		qmblk_data_read_unlock(qmblk);
++		ugid_buf.qi_id = ugid->qugid_id;
++		ugid_buf.qi_type = ugid->qugid_type;
++
++		memcpy(u_ugid_buf, &ugid_buf, sizeof(ugid_buf));
++		u_ugid_buf++; /* next portion of user buffer */
++	}
++
++	if (type == USRQUOTA && count < size) {
++		type = GRPQUOTA;
++		index = 0;
++		goto repeat;
++	}
++
++	return count;
++}
++
++static int quota_ugid_getstat(unsigned int quota_id,
++		int index, int size, struct vz_quota_iface *u_ugid_buf)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_iface *k_ugid_buf;
++	int err;
++
++	if (index < 0 || size < 0)
++		return -EINVAL;
++
++	if (size > INT_MAX / sizeof(struct vz_quota_iface))
++		return -EINVAL;
++
++	k_ugid_buf = vmalloc(size * sizeof(struct vz_quota_iface));
++	if (k_ugid_buf == NULL)
++		return -ENOMEM;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	down(&qmblk->dq_sem);
++	err = do_quota_ugid_getstat(qmblk, index, size, k_ugid_buf);
++	up(&qmblk->dq_sem);
++	if (err < 0)
++		goto out;
++
++	if (copy_to_user(u_ugid_buf, k_ugid_buf,
++				size * sizeof(struct vz_quota_iface)))
++		err = -EFAULT;
++
++out:
++	up(&vz_quota_sem);
++	vfree(k_ugid_buf);
++	return err;
++}
++
++static int quota_ugid_getgrace(unsigned int quota_id,
++		struct dq_info u_dq_info[])
++{
++	struct vz_quota_master *qmblk;
++	struct dq_info dq_info[MAXQUOTAS];
++	struct dq_info *target;
++	int err, type;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++	
++	err = 0;
++	/* update from qmblk */
++	for (type = 0; type < MAXQUOTAS; type ++) {
++		target = &qmblk->dq_ugid_info[type];
++		dq_info[type].bexpire = target->bexpire;
++		dq_info[type].iexpire = target->iexpire;
++		dq_info[type].flags = target->flags;
++	}
++
++	if (copy_to_user(u_dq_info, dq_info, sizeof(dq_info)))
++		err = -EFAULT;
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int quota_ugid_getconfig(unsigned int quota_id, 
++		struct vz_quota_ugid_stat *info)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid_stat kinfo;
++	int err;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++	
++	err = 0;
++	kinfo.limit = qmblk->dq_ugid_max;
++	kinfo.count = qmblk->dq_ugid_count;
++	kinfo.flags = qmblk->dq_flags;
++
++	if (copy_to_user(info, &kinfo, sizeof(kinfo)))
++		err = -EFAULT;
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int quota_ugid_setconfig(unsigned int quota_id,
++		struct vz_quota_ugid_stat *info)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid_stat kinfo;
++	int err;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EFAULT;
++	if (copy_from_user(&kinfo, info, sizeof(kinfo)))
++		goto out;
++
++	err = 0;
++	qmblk->dq_ugid_max = kinfo.limit;
++	if (qmblk->dq_state == VZDQ_STARTING) {
++		qmblk->dq_flags = kinfo.flags;
++		if (qmblk->dq_flags & VZDQUG_ON)
++			qmblk->dq_flags |= VZDQ_USRQUOTA | VZDQ_GRPQUOTA;
++	}		
++
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int quota_ugid_setlimit(unsigned int quota_id,
++		struct vz_quota_ugid_setlimit *u_lim)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid_setlimit lim;
++	int err;
++
++	down(&vz_quota_sem);
++
++	err = -ESRCH;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EFAULT;
++	if (copy_from_user(&lim, u_lim, sizeof(lim)))
++		goto out;
++
++	err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
++
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int quota_ugid_setinfo(unsigned int quota_id,
++		struct vz_quota_ugid_setinfo *u_info)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid_setinfo info;
++	int err;
++
++	down(&vz_quota_sem);
++
++	err = -ESRCH;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EFAULT;
++	if (copy_from_user(&info, u_info, sizeof(info)))
++		goto out;
++
++	err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
++
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++/*
++ * This is a system call to maintain UGID quotas
++ * Note this call is allowed to run ONLY from VE0
++ */
++long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub)
++{
++	int ret;
++
++	ret = -EPERM;
++	/* access allowed only from root of VE0 */
++	if (!capable(CAP_SYS_RESOURCE) ||
++	    !capable(CAP_SYS_ADMIN))
++		goto out;
++
++	switch (qub->cmd) {
++		case VZ_DQ_UGID_GETSTAT:
++			ret = quota_ugid_getstat(qub->quota_id,
++					qub->ugid_index, qub->ugid_size,
++				       	(struct vz_quota_iface *)qub->addr);
++			break;
++		case VZ_DQ_UGID_ADDSTAT:
++			ret = quota_ugid_addstat(qub->quota_id, qub->ugid_size,
++				       	(struct vz_quota_iface *)qub->addr);
++			break;
++		case VZ_DQ_UGID_GETGRACE:
++			ret = quota_ugid_getgrace(qub->quota_id,
++					(struct dq_info *)qub->addr);
++			break;
++		case VZ_DQ_UGID_SETGRACE:
++			ret = quota_ugid_setgrace(qub->quota_id,
++					(struct dq_info *)qub->addr);
++			break;
++		case VZ_DQ_UGID_GETCONFIG:
++			ret = quota_ugid_getconfig(qub->quota_id,
++					(struct vz_quota_ugid_stat *)qub->addr);
++			break;
++		case VZ_DQ_UGID_SETCONFIG:
++			ret = quota_ugid_setconfig(qub->quota_id,
++					(struct vz_quota_ugid_stat *)qub->addr);
++			break;
++		case VZ_DQ_UGID_SETLIMIT:
++			ret = quota_ugid_setlimit(qub->quota_id,
++					(struct vz_quota_ugid_setlimit *)
++								qub->addr);
++			break;
++		case VZ_DQ_UGID_SETINFO:
++			ret = quota_ugid_setinfo(qub->quota_id,
++					(struct vz_quota_ugid_setinfo *)
++								qub->addr);
++			break;
++		default:
++			ret = -EINVAL;
++			goto out;
++	}
++out:
++	return ret;
++}
++
++static void ugid_quota_on_sb(struct super_block *sb)
++{
++	struct super_block *real_sb;
++	struct vz_quota_master *qmblk;
++
++	if (!sb->s_op->get_quota_root)
++		return;
++
++	real_sb = sb->s_op->get_quota_root(sb)->i_sb;
++	if (real_sb->dq_op != &vz_quota_operations)
++		return;
++
++	sb->dq_op = &vz_quota_operations2;
++	sb->s_qcop = &vz_quotactl_operations;
++	INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++	INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++	sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
++	sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
++
++	qmblk = vzquota_find_qmblk(sb);
++	if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
++		return;
++	down(&vz_quota_sem);
++	if (qmblk->dq_flags & VZDQ_USRQUOTA)
++		sb->s_dquot.flags |= DQUOT_USR_ENABLED;
++	if (qmblk->dq_flags & VZDQ_GRPQUOTA)
++		sb->s_dquot.flags |= DQUOT_GRP_ENABLED;
++	up(&vz_quota_sem);
++	qmblk_put(qmblk);
++}
++
++static void ugid_quota_off_sb(struct super_block *sb)
++{
++	/* can't make quota off on mounted super block */
++	BUG_ON(sb->s_root != NULL);
++}
++
++static int ugid_notifier_call(struct vnotifier_block *self,
++		unsigned long n, void *data, int old_ret)
++{
++	struct virt_info_quota *viq;
++
++	viq = (struct virt_info_quota *)data;
++
++	switch (n) {
++	case VIRTINFO_QUOTA_ON:
++		ugid_quota_on_sb(viq->super);
++		break;
++	case VIRTINFO_QUOTA_OFF:
++		ugid_quota_off_sb(viq->super);
++		break;
++	case VIRTINFO_QUOTA_GETSTAT:
++		break;
++	default:
++		return old_ret;
++	}
++	return NOTIFY_OK;
++}
++
++static struct vnotifier_block ugid_notifier_block = {
++	.notifier_call = ugid_notifier_call,
++};
++
++/* ----------------------------------------------------------------------
++ * Init/exit.
++ * --------------------------------------------------------------------- */
++
++struct quota_format_type vz_quota_empty_v2_format = {
++	qf_fmt_id:	QFMT_VFS_V0,
++	qf_ops:		NULL,
++	qf_owner:	THIS_MODULE
++};
++
++int vzquota_ugid_init()
++{
++	int err;
++
++	vz_quota_ugid_cachep = kmem_cache_create("vz_quota_ugid",
++				      sizeof(struct vz_quota_ugid),
++				      0, SLAB_HWCACHE_ALIGN,
++				      NULL, NULL);
++	if (vz_quota_ugid_cachep == NULL)
++		goto err_slab;
++
++	err = register_quota_format(&vz_quota_empty_v2_format);
++	if (err)
++		goto err_reg;
++
++	virtinfo_notifier_register(VITYPE_QUOTA, &ugid_notifier_block);
++	return 0;
++
++err_reg:
++	kmem_cache_destroy(vz_quota_ugid_cachep);
++	return err;
++
++err_slab:
++	printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
++	return -ENOMEM;
++}
++
++void vzquota_ugid_release()
++{
++	virtinfo_notifier_unregister(VITYPE_QUOTA, &ugid_notifier_block);
++	unregister_quota_format(&vz_quota_empty_v2_format);
++
++	if (kmem_cache_destroy(vz_quota_ugid_cachep))
++		printk(KERN_ERR "VZQUOTA: kmem_cache_destroy failed\n");
++}
+diff -upr linux-2.6.16.orig/fs/vzdquot.c linux-2.6.16-026test015/fs/vzdquot.c
+--- linux-2.6.16.orig/fs/vzdquot.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/fs/vzdquot.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,1705 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains the core of Virtuozzo disk quota implementation:
++ * maintenance of VZDQ information in inodes,
++ * external interfaces,
++ * module entry.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/list.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/quota.h>
++#include <linux/rcupdate.h>
++#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <linux/vzctl.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++#include <linux/virtinfo.h>
++#include <linux/vzdq_tree.h>
++
++/* ----------------------------------------------------------------------
++ *
++ * Locking
++ *
++ * ---------------------------------------------------------------------- */
++
++/*
++ * Serializes on/off and all other do_vzquotactl operations.
++ * Protects qmblk hash.
++ */
++struct semaphore vz_quota_sem;
++
++/*
++ * Data access locks
++ *  inode_qmblk
++ *	protects qmblk pointers in all inodes and qlnk content in general
++ *	(but not qmblk content);
++ *	also protects related qmblk invalidation procedures;
++ *	can't be per-inode because of vzquota_dtree_qmblk complications
++ *	and problems with serialization with quota_on,
++ *	but can be per-superblock;
++ *  qmblk_data
++ *	protects qmblk fields (such as current usage)
++ *  quota_data
++ *	protects charge/uncharge operations, thus, implies
++ *	qmblk_data lock and, if CONFIG_VZ_QUOTA_UGID, inode_qmblk lock
++ *	(to protect ugid pointers).
++ *
++ * Lock order:
++ *  inode_qmblk_lock -> dcache_lock
++ *  inode_qmblk_lock -> qmblk_data
++ */
++static spinlock_t vzdq_qmblk_lock = SPIN_LOCK_UNLOCKED;
++
++inline void inode_qmblk_lock(struct super_block *sb)
++{
++	spin_lock(&vzdq_qmblk_lock);
++}
++
++inline void inode_qmblk_unlock(struct super_block *sb)
++{
++	spin_unlock(&vzdq_qmblk_lock);
++}
++
++inline void qmblk_data_read_lock(struct vz_quota_master *qmblk)
++{
++	spin_lock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_read_unlock(struct vz_quota_master *qmblk)
++{
++	spin_unlock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_write_lock(struct vz_quota_master *qmblk)
++{
++	spin_lock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_write_unlock(struct vz_quota_master *qmblk)
++{
++	spin_unlock(&qmblk->dq_data_lock);
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Master hash table handling.
++ *
++ * SMP not safe, serialied by vz_quota_sem within quota syscalls
++ *
++ * --------------------------------------------------------------------- */
++
++static kmem_cache_t *vzquota_cachep;
++
++/*
++ * Hash function.
++ */
++#define QHASH_BITS		6
++#define	VZ_QUOTA_HASH_SIZE	(1 << QHASH_BITS)
++#define QHASH_MASK		(VZ_QUOTA_HASH_SIZE - 1)
++
++struct list_head vzquota_hash_table[VZ_QUOTA_HASH_SIZE];
++int vzquota_hash_size = VZ_QUOTA_HASH_SIZE;
++
++static inline int vzquota_hash_func(unsigned int qid)
++{
++	return (((qid >> QHASH_BITS) ^ qid) & QHASH_MASK);
++}
++
++/**
++ * vzquota_alloc_master - alloc and instantiate master quota record
++ *
++ * Returns:
++ *	pointer to newly created record if SUCCESS
++ *	-ENOMEM if out of memory
++ *	-EEXIST if record with given quota_id already exist
++ */
++struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
++		struct vz_quota_stat *qstat)
++{
++	int err;
++	struct vz_quota_master *qmblk;
++
++	err = -EEXIST;
++	if (vzquota_find_master(quota_id) != NULL)
++		goto out;
++
++	err = -ENOMEM;
++	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
++	if (qmblk == NULL)
++		goto out;
++#ifdef CONFIG_VZ_QUOTA_UGID
++	qmblk->dq_uid_tree = quotatree_alloc();
++	if (!qmblk->dq_uid_tree)
++		goto out_free;
++
++	qmblk->dq_gid_tree = quotatree_alloc();
++	if (!qmblk->dq_gid_tree)
++		goto out_free_tree;
++#endif
++
++	qmblk->dq_state = VZDQ_STARTING;
++	init_MUTEX(&qmblk->dq_sem);
++	spin_lock_init(&qmblk->dq_data_lock);
++
++	qmblk->dq_id = quota_id;
++	qmblk->dq_stat = qstat->dq_stat;
++	qmblk->dq_info = qstat->dq_info;
++	qmblk->dq_root_dentry = NULL;
++	qmblk->dq_root_mnt = NULL;
++	qmblk->dq_sb = NULL;
++	qmblk->dq_ugid_count = 0;
++	qmblk->dq_ugid_max = 0;
++	qmblk->dq_flags = 0;
++	memset(qmblk->dq_ugid_info, 0, sizeof(qmblk->dq_ugid_info));
++	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
++
++	atomic_set(&qmblk->dq_count, 1);
++
++	/* insert in hash chain */
++	list_add(&qmblk->dq_hash,
++		&vzquota_hash_table[vzquota_hash_func(quota_id)]);
++
++	/* success */
++	return qmblk;
++
++out_free_tree:
++	quotatree_free(qmblk->dq_uid_tree, NULL);
++out_free:
++	kmem_cache_free(vzquota_cachep, qmblk);
++out:
++	return ERR_PTR(err);
++}
++
++static struct vz_quota_master *vzquota_alloc_fake(void)
++{
++	struct vz_quota_master *qmblk;
++
++	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
++	if (qmblk == NULL)
++		return NULL;
++	memset(qmblk, 0, sizeof(*qmblk));
++	qmblk->dq_state = VZDQ_STOPING;
++	qmblk->dq_flags = VZDQ_NOQUOT;
++	spin_lock_init(&qmblk->dq_data_lock);
++	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
++	atomic_set(&qmblk->dq_count, 1);
++	return qmblk;
++}
++
++/**
++ * vzquota_find_master - find master record with given id
++ *
++ * Returns qmblk without touching its refcounter.
++ * Called under vz_quota_sem.
++ */
++struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
++{
++	int i;
++	struct vz_quota_master *qp;
++
++	i = vzquota_hash_func(quota_id);
++	list_for_each_entry(qp, &vzquota_hash_table[i], dq_hash) {
++		if (qp->dq_id == quota_id)
++			return qp;
++	}
++	return NULL;
++}
++
++/**
++ * vzquota_free_master - release resources taken by qmblk, freeing memory
++ *
++ * qmblk is assumed to be already taken out from the hash.
++ * Should be called outside vz_quota_sem.
++ */
++void vzquota_free_master(struct vz_quota_master *qmblk)
++{
++#ifdef CONFIG_VZ_QUOTA_UGID
++	vzquota_kill_ugid(qmblk);
++#endif
++	BUG_ON(!list_empty(&qmblk->dq_ilink_list));
++	kmem_cache_free(vzquota_cachep, qmblk);
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Passing quota information through current
++ *
++ * Used in inode -> qmblk lookup at inode creation stage (since at that
++ * time there are no links between the inode being created and its parent
++ * directory).
++ *
++ * --------------------------------------------------------------------- */
++
++#define VZDQ_CUR_MAGIC	0x57d0fee2
++
++static inline int vzquota_cur_qmblk_check(void)
++{
++	return current->magic == VZDQ_CUR_MAGIC;
++}
++
++static inline struct inode *vzquota_cur_qmblk_fetch(void)
++{
++	return current->ino;
++}
++
++static inline void vzquota_cur_qmblk_set(struct inode *data)
++{
++	struct task_struct *tsk;
++
++	tsk = current;
++	tsk->magic = VZDQ_CUR_MAGIC;
++	tsk->ino = data;
++}
++
++#if 0
++static inline void vzquota_cur_qmblk_reset(void)
++{
++	current->magic = 0;
++}
++#endif
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Superblock quota operations
++ *
++ * --------------------------------------------------------------------- */
++
++/*
++ * Kernel structure abuse.
++ * We use files[0] pointer as an int variable:
++ * reference counter of how many quota blocks uses this superblock.
++ * files[1] is used for generations structure which helps us to track
++ * when traversing of dentries is really required.
++ */
++#define __VZ_QUOTA_NOQUOTA(sb)		sb->s_dquot.vzdq_master
++#define __VZ_QUOTA_TSTAMP(sb)		((struct timeval *)\
++						&sb->s_dquot.dqio_sem)
++
++#if defined(VZ_QUOTA_UNLOAD)
++
++#define __VZ_QUOTA_SBREF(sb)		sb->s_dquot.vzdq_count
++
++struct dquot_operations *orig_dq_op;
++struct quotactl_ops *orig_dq_cop;
++
++/**
++ * quota_get_super - account for new a quoted tree under the superblock
++ *
++ * One superblock can have multiple directory subtrees with different VZ
++ * quotas.  We keep a counter of such subtrees and set VZ quota operations or
++ * reset the default ones.
++ *
++ * Called under vz_quota_sem (from quota_on).
++ */
++int vzquota_get_super(struct super_block *sb)
++{
++	if (sb->dq_op != &vz_quota_operations) {
++		down(&sb->s_dquot.dqonoff_sem);
++		if (sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) {
++			up(&sb->s_dquot.dqonoff_sem);
++			return -EEXIST;
++		}
++		if (orig_dq_op == NULL && sb->dq_op != NULL)
++			orig_dq_op = sb->dq_op;
++		sb->dq_op = &vz_quota_operations;
++		if (orig_dq_cop == NULL && sb->s_qcop != NULL)
++			orig_dq_cop = sb->s_qcop;
++		/* XXX this may race with sys_quotactl */
++#ifdef CONFIG_VZ_QUOTA_UGID
++		sb->s_qcop = &vz_quotactl_operations;
++#else
++		sb->s_qcop = NULL;
++#endif
++		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
++		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++
++		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++		sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
++		sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
++		/*
++		 * To get quotaops.h call us we need to mark superblock
++		 * as having quota.  These flags mark the moment when
++		 * our dq_op start to be called.
++		 *
++		 * The ordering of dq_op and s_dquot.flags assignment
++		 * needs to be enforced, but other CPUs do not do rmb()
++		 * between s_dquot.flags and dq_op accesses.
++		 */
++		wmb(); synchronize_sched();
++		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
++		__module_get(THIS_MODULE);
++		up(&sb->s_dquot.dqonoff_sem);
++	}
++	/* protected by vz_quota_sem */
++	__VZ_QUOTA_SBREF(sb)++;
++	return 0;
++}
++
++/**
++ * quota_put_super - release superblock when one quota tree goes away
++ *
++ * Called under vz_quota_sem.
++ */
++void vzquota_put_super(struct super_block *sb)
++{
++	int count;
++
++	count = --__VZ_QUOTA_SBREF(sb);
++	if (count == 0) {
++		down(&sb->s_dquot.dqonoff_sem);
++		sb->s_dquot.flags = 0;
++		wmb(); synchronize_sched();
++		sema_init(&sb->s_dquot.dqio_sem, 1);
++		sb->s_qcop = orig_dq_cop;
++		sb->dq_op = orig_dq_op;
++		inode_qmblk_lock(sb);
++		quota_gen_put(SB_QGEN(sb));
++		SB_QGEN(sb) = NULL;
++		/* release qlnk's without qmblk */
++		remove_inode_quota_links_list(&non_vzquota_inodes_lh,
++				sb, NULL);
++		/*
++		 * Races with quota initialization:
++		 * after this inode_qmblk_unlock all inode's generations are
++		 * invalidated, quota_inode_qmblk checks superblock operations.
++		 */
++		inode_qmblk_unlock(sb);
++		/*
++		 * Module refcounting: in theory, this is the best place
++		 * to call module_put(THIS_MODULE).
++		 * In reality, it can't be done because we can't be sure that
++		 * other CPUs do not enter our code segment through dq_op
++		 * cached long time ago.  Quotaops interface isn't supposed to
++		 * go into modules currently (that is, into unloadable
++		 * modules).  By omitting module_put, our module isn't
++		 * unloadable.
++		 */
++		up(&sb->s_dquot.dqonoff_sem);
++	}
++}
++
++#else
++
++struct vzquota_new_sop {
++	struct super_operations new_op;
++	struct super_operations *old_op;
++};
++
++/**
++ * vzquota_shutdown_super - callback on umount
++ */
++void vzquota_shutdown_super(struct super_block *sb)
++{
++	struct vz_quota_master *qmblk;
++	struct vzquota_new_sop *sop;
++
++	qmblk = __VZ_QUOTA_NOQUOTA(sb);
++	__VZ_QUOTA_NOQUOTA(sb) = NULL;
++	if (qmblk != NULL)
++		qmblk_put(qmblk);
++	sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
++	sb->s_op = sop->old_op;
++	kfree(sop);
++	(*sb->s_op->put_super)(sb);
++}
++
++/**
++ * vzquota_get_super - account for new a quoted tree under the superblock
++ *
++ * One superblock can have multiple directory subtrees with different VZ
++ * quotas.
++ *
++ * Called under vz_quota_sem (from vzquota_on).
++ */
++int vzquota_get_super(struct super_block *sb)
++{
++	struct vz_quota_master *qnew;
++	struct vzquota_new_sop *sop;
++	int err;
++
++	down(&sb->s_dquot.dqonoff_sem);
++	err = -EEXIST;
++	if ((sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) &&
++	    sb->dq_op != &vz_quota_operations)
++		goto out_up;
++
++	/*
++	 * This allocation code should be under sb->dq_op check below, but
++	 * it doesn't really matter...
++	 */
++	if (__VZ_QUOTA_NOQUOTA(sb) == NULL) {
++		qnew = vzquota_alloc_fake();
++		if (qnew == NULL)
++			goto out_up;
++		__VZ_QUOTA_NOQUOTA(sb) = qnew;
++	}
++
++	if (sb->dq_op != &vz_quota_operations) {
++		sop = kmalloc(sizeof(*sop), GFP_KERNEL);
++		if (sop == NULL) {
++			vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
++			__VZ_QUOTA_NOQUOTA(sb) = NULL;
++			goto out_up;
++		}
++		memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
++		sop->new_op.put_super = &vzquota_shutdown_super;
++		sop->old_op = sb->s_op;
++		sb->s_op = &sop->new_op;
++
++		sb->dq_op = &vz_quota_operations;
++#ifdef CONFIG_VZ_QUOTA_UGID
++		sb->s_qcop = &vz_quotactl_operations;
++#else
++		sb->s_qcop = NULL;
++#endif
++		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
++
++		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++		/* these 2 list heads are checked in sync_dquots() */
++		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++		sb->s_dquot.info[USRQUOTA].dqi_format =
++						&vz_quota_empty_v2_format;
++		sb->s_dquot.info[GRPQUOTA].dqi_format =
++						&vz_quota_empty_v2_format;
++
++		/*
++		 * To get quotaops.h to call us we need to mark superblock
++		 * as having quota.  These flags mark the moment when
++		 * our dq_op start to be called.
++		 *
++		 * The ordering of dq_op and s_dquot.flags assignment
++		 * needs to be enforced, but other CPUs do not do rmb()
++		 * between s_dquot.flags and dq_op accesses.
++		 */
++		wmb(); synchronize_sched();
++		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
++	}
++	err = 0;
++
++out_up:
++	up(&sb->s_dquot.dqonoff_sem);
++	return err;
++}
++
++/**
++ * vzquota_put_super - one quota tree less on this superblock
++ *
++ * Called under vz_quota_sem.
++ */
++void vzquota_put_super(struct super_block *sb)
++{
++	/*
++	 * Even if this put is the last one,
++	 * sb->s_dquot.flags can't be cleared, because otherwise vzquota_drop
++	 * won't be called and the remaining qmblk references won't be put.
++	 */
++}
++
++#endif
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Helpers for inode -> qmblk link maintenance
++ *
++ * --------------------------------------------------------------------- */
++
++#define __VZ_QUOTA_EMPTY		((void *)0xbdbdbdbd)
++#define VZ_QUOTA_IS_NOQUOTA(qm, sb)	((qm)->dq_flags & VZDQ_NOQUOT)
++#define VZ_QUOTA_EMPTY_IOPS		(&vfs_empty_iops)
++extern struct inode_operations vfs_empty_iops;
++
++static int VZ_QUOTA_IS_ACTUAL(struct inode *inode)
++{
++	struct vz_quota_master *qmblk;
++
++	qmblk = INODE_QLNK(inode)->qmblk;
++	if (qmblk == VZ_QUOTA_BAD)
++		return 1;
++	if (qmblk == __VZ_QUOTA_EMPTY)
++		return 0;
++	if (qmblk->dq_flags & VZDQ_NOACT)
++		/* not actual (invalidated) qmblk */
++		return 0;
++	return 1;
++}
++
++static inline int vzquota_qlnk_is_empty(struct vz_quota_ilink *qlnk)
++{
++	return qlnk->qmblk == __VZ_QUOTA_EMPTY;
++}
++
++static inline void vzquota_qlnk_set_empty(struct vz_quota_ilink *qlnk)
++{
++	qlnk->qmblk = __VZ_QUOTA_EMPTY;
++	qlnk->origin = VZ_QUOTAO_SETE;
++}
++
++void vzquota_qlnk_init(struct vz_quota_ilink *qlnk)
++{
++	memset(qlnk, 0, sizeof(*qlnk));
++	INIT_LIST_HEAD(&qlnk->list);
++	vzquota_qlnk_set_empty(qlnk);
++	qlnk->origin = VZ_QUOTAO_INIT;
++}
++
++void vzquota_qlnk_destroy(struct vz_quota_ilink *qlnk)
++{
++	might_sleep();
++	if (vzquota_qlnk_is_empty(qlnk))
++		return;
++#if defined(CONFIG_VZ_QUOTA_UGID)
++	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD) {
++		struct vz_quota_master *qmblk;
++		struct vz_quota_ugid *quid, *qgid;
++		qmblk = qlnk->qmblk;
++		quid = qlnk->qugid[USRQUOTA];
++		qgid = qlnk->qugid[GRPQUOTA];
++		if (quid != NULL || qgid != NULL) {
++			down(&qmblk->dq_sem);
++			if (qgid != NULL)
++				vzquota_put_ugid(qmblk, qgid);
++			if (quid != NULL)
++				vzquota_put_ugid(qmblk, quid);
++			up(&qmblk->dq_sem);
++		}
++	}
++#endif
++	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qlnk->qmblk);
++	qlnk->origin = VZ_QUOTAO_DESTR;
++}
++
++/**
++ * vzquota_qlnk_swap - swap inode's and temporary vz_quota_ilink contents
++ * @qlt: temporary
++ * @qli: inode's
++ *
++ * Locking is provided by the caller (depending on the context).
++ * After swap, @qli is inserted into the corresponding dq_ilink_list,
++ * @qlt list is reinitialized.
++ */
++static void vzquota_qlnk_swap(struct vz_quota_ilink *qlt,
++		struct vz_quota_ilink *qli)
++{
++	struct vz_quota_master *qb;
++	struct vz_quota_ugid *qu;
++	int i;
++
++	qb = qlt->qmblk;
++	qlt->qmblk = qli->qmblk;
++	qli->qmblk = qb;
++	list_del_init(&qli->list);
++	if (qb != __VZ_QUOTA_EMPTY && qb != VZ_QUOTA_BAD)
++		list_add(&qli->list, &qb->dq_ilink_list);
++	INIT_LIST_HEAD(&qlt->list);
++	qli->origin = VZ_QUOTAO_SWAP;
++
++	for (i = 0; i < MAXQUOTAS; i++) {
++		qu = qlt->qugid[i];
++		qlt->qugid[i] = qli->qugid[i];
++		qli->qugid[i] = qu;
++	}
++}
++
++/**
++ * vzquota_qlnk_reinit_locked - destroy qlnk content, called under locks
++ *
++ * Called under dcache_lock and inode_qmblk locks.
++ * Returns 1 if locks were dropped inside, 0 if atomic.
++ */
++static int vzquota_qlnk_reinit_locked(struct vz_quota_ilink *qlnk,
++		struct inode *inode)
++{
++	if (vzquota_qlnk_is_empty(qlnk))
++		return 0;
++	if (qlnk->qmblk == VZ_QUOTA_BAD) {
++		vzquota_qlnk_set_empty(qlnk);
++		return 0;
++	}
++	spin_unlock(&dcache_lock);
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(qlnk);
++	vzquota_qlnk_init(qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	spin_lock(&dcache_lock);
++	return 1;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_qlnk_reinit_attr - destroy and reinit qlnk content
++ *
++ * Similar to vzquota_qlnk_reinit_locked, called under different locks.
++ */
++static int vzquota_qlnk_reinit_attr(struct vz_quota_ilink *qlnk,
++		struct inode *inode,
++		struct vz_quota_master *qmblk)
++{
++	if (vzquota_qlnk_is_empty(qlnk))
++		return 0;
++	/* may be optimized if qlnk->qugid all NULLs */
++	qmblk_data_write_unlock(qmblk);
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(qlnk);
++	vzquota_qlnk_init(qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	qmblk_data_write_lock(qmblk);
++	return 1;
++}
++#endif
++
++/**
++ * vzquota_qlnk_fill - fill vz_quota_ilink content
++ * @qlnk: vz_quota_ilink to fill
++ * @inode: inode for which @qlnk is filled (i_sb, i_uid, i_gid)
++ * @qmblk: qmblk to which this @qlnk will belong
++ *
++ * Called under dcache_lock and inode_qmblk locks.
++ * Returns 1 if locks were dropped inside, 0 if atomic.
++ * @qlnk is expected to be empty.
++ */
++static int vzquota_qlnk_fill(struct vz_quota_ilink *qlnk,
++		struct inode *inode,
++		struct vz_quota_master *qmblk)
++{
++	if (qmblk != VZ_QUOTA_BAD)
++		qmblk_get(qmblk);
++	qlnk->qmblk = qmblk;
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++	if (qmblk != VZ_QUOTA_BAD &&
++	    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
++	    (qmblk->dq_flags & VZDQUG_ON)) {
++		struct vz_quota_ugid *quid, *qgid;
++
++		spin_unlock(&dcache_lock);
++		inode_qmblk_unlock(inode->i_sb);
++
++		down(&qmblk->dq_sem);
++		quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
++		qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
++		up(&qmblk->dq_sem);
++
++		inode_qmblk_lock(inode->i_sb);
++		spin_lock(&dcache_lock);
++		qlnk->qugid[USRQUOTA] = quid;
++		qlnk->qugid[GRPQUOTA] = qgid;
++		return 1;
++	}
++#endif
++
++	return 0;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_qlnk_fill_attr - fill vz_quota_ilink content for uid, gid
++ *
++ * This function is a helper for vzquota_transfer, and differs from
++ * vzquota_qlnk_fill only by locking.
++ */
++static int vzquota_qlnk_fill_attr(struct vz_quota_ilink *qlnk,
++		struct inode *inode,
++		struct iattr *iattr,
++		int mask,
++		struct vz_quota_master *qmblk)
++{
++	qmblk_get(qmblk);
++	qlnk->qmblk = qmblk;
++
++	if (mask) {
++		struct vz_quota_ugid *quid, *qgid;
++
++		quid = qgid = NULL; /* to make gcc happy */
++		if (!(mask & (1 << USRQUOTA)))
++			quid = vzquota_get_ugid(INODE_QLNK(inode)->
++							qugid[USRQUOTA]);
++		if (!(mask & (1 << GRPQUOTA)))
++			qgid = vzquota_get_ugid(INODE_QLNK(inode)->
++							qugid[GRPQUOTA]);
++
++		qmblk_data_write_unlock(qmblk);
++		inode_qmblk_unlock(inode->i_sb);
++
++		down(&qmblk->dq_sem);
++		if (mask & (1 << USRQUOTA))
++			quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
++					USRQUOTA, 0);
++		if (mask & (1 << GRPQUOTA))
++			qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
++					GRPQUOTA, 0);
++		up(&qmblk->dq_sem);
++
++		inode_qmblk_lock(inode->i_sb);
++		qmblk_data_write_lock(qmblk);
++		qlnk->qugid[USRQUOTA] = quid;
++		qlnk->qugid[GRPQUOTA] = qgid;
++		return 1;
++	}
++
++	return 0;
++}
++#endif
++
++/**
++ * __vzquota_inode_init - make sure inode's qlnk is initialized
++ *
++ * May be called if qlnk is already initialized, detects this situation itself.
++ * Called under inode_qmblk_lock.
++ */
++static void __vzquota_inode_init(struct inode *inode, unsigned char origin)
++{
++	if (inode->i_dquot[USRQUOTA] == NODQUOT) {
++		vzquota_qlnk_init(INODE_QLNK(inode));
++		inode->i_dquot[USRQUOTA] = (void *)~(unsigned long)NODQUOT;
++	}
++	INODE_QLNK(inode)->origin = origin;
++}
++
++/**
++ * vzquota_inode_drop - destroy VZ quota information in the inode
++ *
++ * Inode must not be externally accessible or dirty.
++ */
++static void vzquota_inode_drop(struct inode *inode)
++{
++	struct vz_quota_ilink qlnk;
++
++	vzquota_qlnk_init(&qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	vzquota_qlnk_swap(&qlnk, INODE_QLNK(inode));
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_DRCAL;
++	inode->i_dquot[USRQUOTA] = NODQUOT;
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(&qlnk);
++}
++
++/**
++ * vzquota_inode_qmblk_set - initialize inode's qlnk
++ * @inode: inode to be initialized
++ * @qmblk: quota master block to which this inode should belong (may be BAD)
++ * @qlnk: placeholder to store data to resolve locking issues
++ *
++ * Returns 1 if locks were dropped and rechecks possibly needed, 0 otherwise.
++ * Called under dcache_lock and inode_qmblk locks.
++ * @qlnk will be destroyed in the caller chain.
++ *
++ * It is not mandatory to restart parent checks since quota on/off currently
++ * shrinks dentry tree and checks that there are not outside references.
++ * But if at some time that shink is removed, restarts will be required.
++ * Additionally, the restarts prevent inconsistencies if the dentry tree
++ * changes (inode is moved).  This is not a big deal, but anyway...
++ */
++static int vzquota_inode_qmblk_set(struct inode *inode,
++		struct vz_quota_master *qmblk,
++		struct vz_quota_ilink *qlnk)
++{
++	if (qmblk == NULL) {
++		printk(KERN_ERR "VZDQ: NULL in set, "
++				"orig %u, dev %s, inode %lu, fs %s\n",
++				INODE_QLNK(inode)->origin,
++				inode->i_sb->s_id, inode->i_ino,
++				inode->i_sb->s_type->name);
++		printk(KERN_ERR "current %d (%s), VE %d\n",
++				current->pid, current->comm,
++				VEID(get_exec_env()));
++		dump_stack();
++		qmblk = VZ_QUOTA_BAD;
++	}
++	while (1) {
++		if (vzquota_qlnk_is_empty(qlnk) &&
++		    vzquota_qlnk_fill(qlnk, inode, qmblk))
++			return 1;
++		if (qlnk->qmblk == qmblk)
++			break;
++		if (vzquota_qlnk_reinit_locked(qlnk, inode))
++			return 1;
++	}
++	vzquota_qlnk_swap(qlnk, INODE_QLNK(inode));
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_QSET;
++	return 0;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * vzquota_inode_qmblk (inode -> qmblk lookup) parts
++ *
++ * --------------------------------------------------------------------- */
++
++static int vzquota_dparents_check_attach(struct inode *inode)
++{
++	if (!list_empty(&inode->i_dentry))
++		return 0;
++	printk(KERN_ERR "VZDQ: no parent for "
++			"dev %s, inode %lu, fs %s\n",
++			inode->i_sb->s_id,
++			inode->i_ino,
++			inode->i_sb->s_type->name);
++	return -1;
++}
++
++static struct inode *vzquota_dparents_check_actual(struct inode *inode)
++{
++	struct dentry *de;
++
++	list_for_each_entry(de, &inode->i_dentry, d_alias) {
++		if (de->d_parent == de) /* detached dentry, perhaps */
++			continue;
++		/* first access to parent, make sure its qlnk initialized */
++		__vzquota_inode_init(de->d_parent->d_inode, VZ_QUOTAO_ACT);
++		if (!VZ_QUOTA_IS_ACTUAL(de->d_parent->d_inode))
++			return de->d_parent->d_inode;
++	}
++	return NULL;
++}
++
++static struct vz_quota_master *vzquota_dparents_check_same(struct inode *inode)
++{
++	struct dentry *de;
++	struct vz_quota_master *qmblk;
++
++	qmblk = NULL;
++	list_for_each_entry(de, &inode->i_dentry, d_alias) {
++		if (de->d_parent == de) /* detached dentry, perhaps */
++			continue;
++		if (qmblk == NULL) {
++			qmblk = INODE_QLNK(de->d_parent->d_inode)->qmblk;
++			continue;
++		}
++		if (INODE_QLNK(de->d_parent->d_inode)->qmblk != qmblk) {
++			printk(KERN_WARNING "VZDQ: multiple quotas for "
++					"dev %s, inode %lu, fs %s\n",
++					inode->i_sb->s_id,
++					inode->i_ino,
++					inode->i_sb->s_type->name);
++			qmblk = VZ_QUOTA_BAD;
++			break;
++		}
++	}
++	if (qmblk == NULL) {
++		printk(KERN_WARNING "VZDQ: not attached to tree, "
++				"dev %s, inode %lu, fs %s\n",
++				inode->i_sb->s_id,
++				inode->i_ino,
++				inode->i_sb->s_type->name);
++		qmblk = VZ_QUOTA_BAD;
++	}
++	return qmblk;
++}
++
++static void vzquota_dbranch_actualize(struct inode *inode,
++		struct inode *refinode)
++{
++	struct inode *pinode;
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ilink qlnk;
++
++	vzquota_qlnk_init(&qlnk);
++
++start:
++	if (inode == inode->i_sb->s_root->d_inode) {
++		/* filesystem root */
++		atomic_inc(&inode->i_count);
++		do {
++			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++		} while (vzquota_inode_qmblk_set(inode, qmblk, &qlnk));
++		goto out;
++	}
++
++	if (!vzquota_dparents_check_attach(inode)) {
++		pinode = vzquota_dparents_check_actual(inode);
++		if (pinode != NULL) {
++			inode = pinode;
++			goto start;
++		}
++	}
++
++	atomic_inc(&inode->i_count);
++	while (1) {
++		if (VZ_QUOTA_IS_ACTUAL(inode)) /* actualized without us */
++			break;
++		/*
++		 * Need to check parents again if we have slept inside
++		 * vzquota_inode_qmblk_set() in the loop.
++		 * If the state of parents is different, just return and repeat
++		 * the actualizing process again from the inode passed to
++		 * vzquota_inode_qmblk_recalc().
++		 */
++		if (!vzquota_dparents_check_attach(inode)) {
++			if (vzquota_dparents_check_actual(inode) != NULL)
++				break;
++			qmblk = vzquota_dparents_check_same(inode);
++		} else
++			qmblk = VZ_QUOTA_BAD;
++		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk)){/* success */
++			INODE_QLNK(inode)->origin = VZ_QUOTAO_ACT;
++			break;
++		}
++	}
++
++out:
++	spin_unlock(&dcache_lock);
++	inode_qmblk_unlock(refinode->i_sb);
++	vzquota_qlnk_destroy(&qlnk);
++	iput(inode);
++	inode_qmblk_lock(refinode->i_sb);
++	spin_lock(&dcache_lock);
++}
++
++static void vzquota_dtree_qmblk_recalc(struct inode *inode,
++		struct vz_quota_ilink *qlnk)
++{
++	struct inode *pinode;
++	struct vz_quota_master *qmblk;
++
++	if (inode == inode->i_sb->s_root->d_inode) {
++		/* filesystem root */
++		do {
++			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++		} while (vzquota_inode_qmblk_set(inode, qmblk, qlnk));
++		return;
++	}
++
++start:
++	if (VZ_QUOTA_IS_ACTUAL(inode))
++		return;
++	/*
++	 * Here qmblk is (re-)initialized for all ancestors.
++	 * This is not a very efficient procedure, but it guarantees that
++	 * the quota tree is consistent (that is, the inode doesn't have two
++	 * ancestors with different qmblk).
++	 */
++	if (!vzquota_dparents_check_attach(inode)) {
++		pinode = vzquota_dparents_check_actual(inode);
++		if (pinode != NULL) {
++			vzquota_dbranch_actualize(pinode, inode);
++			goto start;
++		}
++		qmblk = vzquota_dparents_check_same(inode);
++	} else
++		qmblk = VZ_QUOTA_BAD;
++
++	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
++		goto start;
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_DTREE;
++}
++
++static void vzquota_det_qmblk_recalc(struct inode *inode,
++		struct vz_quota_ilink *qlnk)
++{
++	struct inode *parent;
++	struct vz_quota_master *qmblk;
++	char *msg;
++	int cnt;
++	time_t timeout;
++
++	cnt = 0;
++	parent = NULL;
++start:
++	/*
++	 * qmblk of detached inodes shouldn't be considered as not actual.
++	 * They are not in any dentry tree, so quota on/off shouldn't affect
++	 * them.
++	 */
++	if (!vzquota_qlnk_is_empty(INODE_QLNK(inode)))
++		return;
++
++	timeout = 3;
++	qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++	msg = "detached inode not in creation";
++	if (inode->i_op != VZ_QUOTA_EMPTY_IOPS)
++		goto fail;
++	qmblk = VZ_QUOTA_BAD;
++	msg = "unexpected creation context";
++	if (!vzquota_cur_qmblk_check())
++		goto fail;
++	timeout = 0;
++	parent = vzquota_cur_qmblk_fetch();
++	msg = "uninitialized parent";
++	if (vzquota_qlnk_is_empty(INODE_QLNK(parent)))
++		goto fail;
++	msg = "parent not in tree";
++	if (list_empty(&parent->i_dentry))
++		goto fail;
++	msg = "parent has 0 refcount";
++	if (!atomic_read(&parent->i_count))
++		goto fail;
++	msg = "parent has different sb";
++	if (parent->i_sb != inode->i_sb)
++		goto fail;
++	if (!VZ_QUOTA_IS_ACTUAL(parent)) {
++		vzquota_dbranch_actualize(parent, inode);
++		goto start;
++	}
++
++	qmblk = INODE_QLNK(parent)->qmblk;
++set:
++	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
++		goto start;
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_DET;
++	return;
++
++fail:
++	{
++		struct timeval tv, tvo;
++		do_gettimeofday(&tv);
++		memcpy(&tvo, __VZ_QUOTA_TSTAMP(inode->i_sb), sizeof(tvo));
++		tv.tv_sec -= tvo.tv_sec;
++		if (tv.tv_usec < tvo.tv_usec) {
++			tv.tv_sec--;
++			tv.tv_usec += USEC_PER_SEC - tvo.tv_usec;
++		} else
++			tv.tv_usec -= tvo.tv_usec;
++		if (tv.tv_sec < timeout)
++			goto set;
++		printk(KERN_ERR "VZDQ: %s, orig %u,"
++			" dev %s, inode %lu, fs %s\n",
++			msg, INODE_QLNK(inode)->origin,
++			inode->i_sb->s_id, inode->i_ino,
++			inode->i_sb->s_type->name);
++		if (!cnt++) {
++			printk(KERN_ERR "current %d (%s), VE %d,"
++				" time %ld.%06ld\n",
++				current->pid, current->comm,
++				VEID(get_exec_env()),
++				tv.tv_sec, tv.tv_usec);
++			dump_stack();
++		}
++		if (parent != NULL)
++			printk(KERN_ERR "VZDQ: parent of %lu is %lu\n",
++				inode->i_ino, parent->i_ino);
++	}
++	goto set;
++}
++
++static void vzquota_inode_qmblk_recalc(struct inode *inode,
++		struct vz_quota_ilink *qlnk)
++{
++	spin_lock(&dcache_lock);
++	if (!list_empty(&inode->i_dentry))
++		vzquota_dtree_qmblk_recalc(inode, qlnk);
++	else
++		vzquota_det_qmblk_recalc(inode, qlnk);
++	spin_unlock(&dcache_lock);
++}
++
++/**
++ * vzquota_inode_qmblk - obtain inode's qmblk
++ *
++ * Returns qmblk with refcounter taken, %NULL if not under
++ * VZ quota or %VZ_QUOTA_BAD.
++ *
++ * FIXME: This function should be removed when vzquota_find_qmblk /
++ * get_quota_root / vzquota_dstat code is cleaned up.
++ */
++struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ilink qlnk;
++
++	might_sleep();
++
++	if (inode->i_sb->dq_op != &vz_quota_operations)
++		return NULL;
++#if defined(VZ_QUOTA_UNLOAD)
++#error Make sure qmblk does not disappear
++#endif
++
++	vzquota_qlnk_init(&qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++	    !VZ_QUOTA_IS_ACTUAL(inode))
++		vzquota_inode_qmblk_recalc(inode, &qlnk);
++
++	qmblk = INODE_QLNK(inode)->qmblk;
++	if (qmblk != VZ_QUOTA_BAD) {
++		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb))
++			qmblk_get(qmblk);
++		else
++			qmblk = NULL;
++	}
++
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(&qlnk);
++	return qmblk;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Calls from quota operations
++ *
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_inode_init_call - call from DQUOT_INIT
++ */
++void vzquota_inode_init_call(struct inode *inode)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++
++	/* initializes inode's quota inside */
++	qmblk = vzquota_inode_data(inode, &data);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		vzquota_data_unlock(inode, &data);
++
++	/*
++	 * The check is needed for repeated new_inode() calls from a single
++	 * ext3 call like create or mkdir in case of -ENOSPC.
++	 */
++	spin_lock(&dcache_lock);
++	if (!list_empty(&inode->i_dentry))
++		vzquota_cur_qmblk_set(inode);
++	spin_unlock(&dcache_lock);
++}
++
++/**
++ * vzquota_inode_drop_call - call from DQUOT_DROP
++ */
++void vzquota_inode_drop_call(struct inode *inode)
++{
++	vzquota_inode_drop(inode);
++}
++
++/**
++ * vzquota_inode_data - initialize (if nec.) and lock inode quota ptrs
++ * @inode: the inode
++ * @data: storage space
++ *
++ * Returns: qmblk is NULL or VZ_QUOTA_BAD or actualized qmblk.
++ * On return if qmblk is neither NULL nor VZ_QUOTA_BAD:
++ *   qmblk in inode's qlnk is the same as returned,
++ *   ugid pointers inside inode's qlnk are valid,
++ *   some locks are taken (and should be released by vzquota_data_unlock).
++ * If qmblk is NULL or VZ_QUOTA_BAD, locks are NOT taken.
++ */
++struct vz_quota_master *vzquota_inode_data(struct inode *inode,
++		struct vz_quota_datast *data)
++{
++	struct vz_quota_master *qmblk;
++
++	might_sleep();
++
++	vzquota_qlnk_init(&data->qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++	    !VZ_QUOTA_IS_ACTUAL(inode))
++		vzquota_inode_qmblk_recalc(inode, &data->qlnk);
++
++	qmblk = INODE_QLNK(inode)->qmblk;
++	if (qmblk != VZ_QUOTA_BAD) {
++		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb)) {
++			/*
++			 * Note that in the current implementation,
++			 * inode_qmblk_lock can theoretically be dropped here.
++			 * This place is serialized with quota_off because
++			 * quota_off fails when there are extra dentry
++			 * references and syncs inodes before removing quota
++			 * information from them.
++			 * However, quota usage information should stop being
++			 * updated immediately after vzquota_off.
++			 */
++			qmblk_data_write_lock(qmblk);
++		} else {
++			inode_qmblk_unlock(inode->i_sb);
++			qmblk = NULL;
++		}
++	} else {
++		inode_qmblk_unlock(inode->i_sb);
++	}
++	return qmblk;
++}
++
++void vzquota_data_unlock(struct inode *inode,
++		struct vz_quota_datast *data)
++{
++	qmblk_data_write_unlock(INODE_QLNK(inode)->qmblk);
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(&data->qlnk);
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_inode_transfer_call - call from vzquota_transfer
++ */
++int vzquota_inode_transfer_call(struct inode *inode, struct iattr *iattr)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++	struct vz_quota_ilink qlnew;
++	int mask;
++	int ret;
++
++	might_sleep();
++	vzquota_qlnk_init(&qlnew);
++start:
++	qmblk = vzquota_inode_data(inode, &data);
++	ret = NO_QUOTA;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out_destr;
++	ret = QUOTA_OK;
++	if (qmblk == NULL)
++		goto out_destr;
++	qmblk_get(qmblk);
++
++	ret = QUOTA_OK;
++	if (!(qmblk->dq_flags & VZDQUG_ON))
++		/* no ugid quotas */
++		goto out_unlock;
++
++	mask = 0;
++	if ((iattr->ia_valid & ATTR_UID) && iattr->ia_uid != inode->i_uid)
++		mask |= 1 << USRQUOTA;
++	if ((iattr->ia_valid & ATTR_GID) && iattr->ia_gid != inode->i_gid)
++		mask |= 1 << GRPQUOTA;
++	while (1) {
++		if (vzquota_qlnk_is_empty(&qlnew) &&
++		    vzquota_qlnk_fill_attr(&qlnew, inode, iattr, mask, qmblk))
++			break;
++		if (qlnew.qmblk == INODE_QLNK(inode)->qmblk &&
++		    qlnew.qmblk == qmblk)
++			goto finish;
++		if (vzquota_qlnk_reinit_attr(&qlnew, inode, qmblk))
++			break;
++	}
++
++	/* prepare for restart */
++	vzquota_data_unlock(inode, &data);
++	qmblk_put(qmblk);
++	goto start;
++
++finish:
++	/* all references obtained successfully */
++	ret = vzquota_transfer_usage(inode, mask, &qlnew);
++	if (!ret) {
++		vzquota_qlnk_swap(&qlnew, INODE_QLNK(inode));
++		INODE_QLNK(inode)->origin = VZ_QUOTAO_TRANS;
++	}
++out_unlock:
++	vzquota_data_unlock(inode, &data);
++	qmblk_put(qmblk);
++out_destr:
++	vzquota_qlnk_destroy(&qlnew);
++	return ret;
++}
++#endif
++
++int vzquota_rename_check(struct inode *inode,
++		struct inode *old_dir, struct inode *new_dir)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ilink qlnk1, qlnk2;
++	int c, ret;
++
++	if (inode->i_sb != old_dir->i_sb || inode->i_sb != new_dir->i_sb)
++		return -1;
++
++	might_sleep();
++
++	vzquota_qlnk_init(&qlnk1);
++	vzquota_qlnk_init(&qlnk2);
++	inode_qmblk_lock(inode->i_sb);
++	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++	__vzquota_inode_init(old_dir, VZ_QUOTAO_INICAL);
++	__vzquota_inode_init(new_dir, VZ_QUOTAO_INICAL);
++
++	do {
++		c = 0;
++		if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++		    !VZ_QUOTA_IS_ACTUAL(inode)) {
++			vzquota_inode_qmblk_recalc(inode, &qlnk1);
++			c++;
++		}
++		if (vzquota_qlnk_is_empty(INODE_QLNK(new_dir)) ||
++		    !VZ_QUOTA_IS_ACTUAL(new_dir)) {
++			vzquota_inode_qmblk_recalc(new_dir, &qlnk2);
++			c++;
++		}
++	} while (c);
++
++	ret = 0;
++	qmblk = INODE_QLNK(inode)->qmblk;
++	if (qmblk != INODE_QLNK(new_dir)->qmblk) {
++		ret = -1;
++		if (qmblk != VZ_QUOTA_BAD &&
++		    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
++		    qmblk->dq_root_dentry->d_inode == inode &&
++		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(new_dir)->qmblk,
++			    				inode->i_sb) &&
++		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(old_dir)->qmblk,
++			    				inode->i_sb))
++			/* quota root rename is allowed */
++			ret = 0;
++	}
++
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(&qlnk2);
++	vzquota_qlnk_destroy(&qlnk1);
++	return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * qmblk-related parts of on/off operations
++ *
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_check_dtree - check dentry tree if quota on/off is allowed
++ *
++ * This function doesn't allow quota to be turned on/off if some dentries in
++ * the tree have external references.
++ * In addition to technical reasons, it enforces user-space correctness:
++ * current usage (taken from or reported to the user space) can be meaningful
++ * and accurate only if the tree is not being modified.
++ * Side effect: additional vfsmount structures referencing the tree (bind
++ * mounts of tree nodes to some other places) are not allowed at on/off time.
++ */
++int vzquota_check_dtree(struct vz_quota_master *qmblk, int off)
++{
++	struct dentry *dentry;
++	int err, count;
++
++	err = -EBUSY;
++	dentry = qmblk->dq_root_dentry;
++
++	if (d_unhashed(dentry) && dentry != dentry->d_sb->s_root)
++		goto unhashed;
++
++	/* attempt to shrink */
++  	if (!list_empty(&dentry->d_subdirs)) {
++		spin_unlock(&dcache_lock);
++		inode_qmblk_unlock(dentry->d_sb);
++		shrink_dcache_parent(dentry);
++		inode_qmblk_lock(dentry->d_sb);
++		spin_lock(&dcache_lock);
++		if (!list_empty(&dentry->d_subdirs))
++			goto out;
++
++		count = 1;
++		if (dentry == dentry->d_sb->s_root)
++			count += 2;	/* sb and mnt refs */
++		if (atomic_read(&dentry->d_count) < count) {
++			printk(KERN_ERR "%s: too small count %d vs %d.\n",
++					__FUNCTION__,
++					atomic_read(&dentry->d_count), count);
++			goto out;
++		}
++		if (atomic_read(&dentry->d_count) > count)
++			goto out;
++	}
++
++	err = 0;
++out:
++	return err;
++
++unhashed:
++	/*
++	 * Quota root is removed.
++	 * Allow to turn quota off, but not on.
++	 */
++	if (off)
++		err = 0;
++	goto out;
++}
++
++int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
++		struct vz_quota_master *qmblk)
++{
++	struct vz_quota_ilink qlnk;
++	struct vz_quota_master *qold, *qnew;
++	int err;
++
++	might_sleep();
++
++	qold = NULL;
++	qnew = vzquota_alloc_fake();
++	if (qnew == NULL)
++		return -ENOMEM;
++
++	vzquota_qlnk_init(&qlnk);
++	inode_qmblk_lock(sb);
++	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++	spin_lock(&dcache_lock);
++	while (1) {
++		err = vzquota_check_dtree(qmblk, 0);
++		if (err)
++			break;
++		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk))
++			break;
++	}
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_ON;
++	spin_unlock(&dcache_lock);
++
++	if (!err) {
++		qold = __VZ_QUOTA_NOQUOTA(sb);
++		qold->dq_flags |= VZDQ_NOACT;
++		__VZ_QUOTA_NOQUOTA(sb) = qnew;
++	}
++
++	inode_qmblk_unlock(sb);
++	vzquota_qlnk_destroy(&qlnk);
++	if (qold != NULL)
++		qmblk_put(qold);
++
++	return err;
++}
++
++int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk)
++{
++	int ret;
++
++	ret = 0;
++	inode_qmblk_lock(sb);
++
++	spin_lock(&dcache_lock);
++	if (vzquota_check_dtree(qmblk, 1))
++		ret = -EBUSY;
++	spin_unlock(&dcache_lock);
++
++	if (!ret)
++		qmblk->dq_flags |= VZDQ_NOACT | VZDQ_NOQUOT;
++	inode_qmblk_unlock(sb);
++	return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * External interfaces
++ *
++ * ---------------------------------------------------------------------*/
++
++static int vzquota_ioctl(struct inode *ino, struct file *file,
++		unsigned int cmd, unsigned long arg)
++{
++	int err;
++	struct vzctl_quotactl qb;
++	struct vzctl_quotaugidctl qub;
++
++	switch (cmd) {
++		case VZCTL_QUOTA_CTL:
++			err = -ENOTTY;
++			break;
++		case VZCTL_QUOTA_NEW_CTL:
++			err = -EFAULT;
++			if (copy_from_user(&qb, (void *)arg, sizeof(qb)))
++				break;
++			err = do_vzquotactl(qb.cmd, qb.quota_id,
++					qb.qstat, qb.ve_root);
++			break;
++#ifdef CONFIG_VZ_QUOTA_UGID
++		case VZCTL_QUOTA_UGID_CTL:
++			err = -EFAULT;
++			if (copy_from_user(&qub, (void *)arg, sizeof(qub)))
++				break;
++			err = do_vzquotaugidctl(&qub);
++			break;
++#endif
++		default:
++			err = -ENOTTY;
++	}
++	might_sleep(); /* debug */
++	return err;
++}
++
++static struct vzioctlinfo vzdqcalls = {
++	.type	= VZDQCTLTYPE,
++	.func	= vzquota_ioctl,
++	.owner	= THIS_MODULE,
++};
++
++/**
++ * vzquota_dstat - get quota usage info for virtual superblock
++ */
++static int vzquota_dstat(struct super_block *super, struct dq_stat *qstat)
++{
++	struct vz_quota_master *qmblk;
++
++	qmblk = vzquota_find_qmblk(super);
++	if (qmblk == NULL)
++		return -ENOENT;
++	if (qmblk == VZ_QUOTA_BAD) {
++		memset(qstat, 0, sizeof(*qstat));
++		return 0;
++	}
++
++	qmblk_data_read_lock(qmblk);
++	memcpy(qstat, &qmblk->dq_stat, sizeof(*qstat));
++	qmblk_data_read_unlock(qmblk);
++	qmblk_put(qmblk);
++	return 0;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Init/exit helpers
++ *
++ * ---------------------------------------------------------------------*/
++
++static int vzquota_cache_init(void)
++{
++	int i;
++
++	vzquota_cachep = kmem_cache_create("vz_quota_master",
++					 sizeof(struct vz_quota_master),
++					 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++	if (vzquota_cachep == NULL) {
++		printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
++		goto nomem2;
++	}
++	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&vzquota_hash_table[i]);
++
++	return 0;
++
++nomem2:
++	return -ENOMEM;
++}
++
++static void vzquota_cache_release(void)
++{
++	int i;
++
++	/* sanity check */
++	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
++		if (!list_empty(&vzquota_hash_table[i]))
++			BUG();
++
++	/* release caches */
++	if (kmem_cache_destroy(vzquota_cachep))
++		printk(KERN_ERR
++			"VZQUOTA: vz_quota_master kmem_cache_destroy failed\n");
++	vzquota_cachep = NULL;
++}
++
++static int quota_notifier_call(struct vnotifier_block *self,
++		unsigned long n, void *data, int err)
++{
++	struct virt_info_quota *viq;
++	struct super_block *sb;
++
++	viq = (struct virt_info_quota *)data;
++	switch (n) {
++	case VIRTINFO_QUOTA_ON:
++		err = NOTIFY_BAD;
++		if (!try_module_get(THIS_MODULE))
++			break;
++		sb = viq->super;
++		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++		err = NOTIFY_OK;
++		break;
++	case VIRTINFO_QUOTA_OFF:
++		module_put(THIS_MODULE);
++		err = NOTIFY_OK;
++		break;
++	case VIRTINFO_QUOTA_GETSTAT:
++		err = NOTIFY_BAD;
++		if (vzquota_dstat(viq->super, viq->qstat))
++			break;
++		err = NOTIFY_OK;
++		break;
++	}
++	return err;
++}
++
++struct vnotifier_block quota_notifier_block = {
++	.notifier_call = quota_notifier_call,
++	.priority = INT_MAX,
++};
++
++/* ----------------------------------------------------------------------
++ *
++ * Init/exit procedures
++ *
++ * ---------------------------------------------------------------------*/
++
++static int __init vzquota_init(void)
++{
++	int err;
++
++	if ((err = vzquota_cache_init()) != 0)
++		goto out_cache;
++
++	if ((err = vzquota_proc_init()) != 0)
++		goto out_proc;
++
++#ifdef CONFIG_VZ_QUOTA_UGID
++	if ((err = vzquota_ugid_init()) != 0)
++		goto out_ugid;
++#endif
++
++	init_MUTEX(&vz_quota_sem);
++	vzioctl_register(&vzdqcalls);
++	virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
++#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
++	vzaquota_init();
++#endif
++
++	return 0;
++
++#ifdef CONFIG_VZ_QUOTA_UGID
++out_ugid:
++	vzquota_proc_release();
++#endif
++out_proc:
++	vzquota_cache_release();
++out_cache:
++	return err;
++}
++
++#if defined(VZ_QUOTA_UNLOAD)
++static void __exit vzquota_release(void)
++{
++	virtinfo_notifier_unregister(VITYPE_QUOTA, &quota_notifier_block);
++	vzioctl_unregister(&vzdqcalls);
++#ifdef CONFIG_VZ_QUOTA_UGID
++#ifdef CONFIG_PROC_FS
++	vzaquota_fini();
++#endif
++	vzquota_ugid_release();
++#endif
++	vzquota_proc_release();
++	vzquota_cache_release();
++}
++#endif
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Disk Quota");
++MODULE_LICENSE("GPL v2");
++
++module_init(vzquota_init)
++#if defined(VZ_QUOTA_UNLOAD)
++module_exit(vzquota_release)
++#endif
+diff -upr linux-2.6.16.orig/fs/xattr.c linux-2.6.16-026test015/fs/xattr.c
+--- linux-2.6.16.orig/fs/xattr.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/xattr.c	2006-07-04 14:41:37.000000000 +0400
+@@ -58,7 +58,7 @@ xattr_permission(struct inode *inode, co
+ 			return -EPERM;
+ 	}
+ 
+-	return permission(inode, mask, NULL);
++	return permission(inode, mask, NULL, NULL);
+ }
+ 
+ int
+diff -upr linux-2.6.16.orig/fs/xfs/linux-2.6/xfs_aops.c linux-2.6.16-026test015/fs/xfs/linux-2.6/xfs_aops.c
+--- linux-2.6.16.orig/fs/xfs/linux-2.6/xfs_aops.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/xfs/linux-2.6/xfs_aops.c	2006-07-04 14:41:36.000000000 +0400
+@@ -616,7 +616,7 @@ xfs_is_delayed_page(
+ 				acceptable = (type == IOMAP_UNWRITTEN);
+ 			else if (buffer_delay(bh))
+ 				acceptable = (type == IOMAP_DELAY);
+-			else if (buffer_mapped(bh))
++			else if (buffer_dirty(bh) && buffer_mapped(bh))
+ 				acceptable = (type == 0);
+ 			else
+ 				break;
+diff -upr linux-2.6.16.orig/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.16-026test015/fs/xfs/linux-2.6/xfs_iops.c
+--- linux-2.6.16.orig/fs/xfs/linux-2.6/xfs_iops.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/fs/xfs/linux-2.6/xfs_iops.c	2006-07-04 14:41:37.000000000 +0400
+@@ -615,7 +615,8 @@ STATIC int
+ linvfs_permission(
+ 	struct inode	*inode,
+ 	int		mode,
+-	struct nameidata *nd)
++	struct nameidata *nd,
++	struct exec_perm *perm)
+ {
+ 	vnode_t		*vp = LINVFS_GET_VP(inode);
+ 	int		error;
+@@ -673,8 +674,7 @@ linvfs_setattr(
+ 	if (ia_valid & ATTR_ATIME) {
+ 		vattr.va_mask |= XFS_AT_ATIME;
+ 		vattr.va_atime = attr->ia_atime;
+-		if (ia_valid & ATTR_ATIME_SET)
+-			inode->i_atime = attr->ia_atime;
++		inode->i_atime = attr->ia_atime;
+ 	}
+ 	if (ia_valid & ATTR_MTIME) {
+ 		vattr.va_mask |= XFS_AT_MTIME;
+diff -upr linux-2.6.16.orig/include/asm-arm26/tlbflush.h linux-2.6.16-026test015/include/asm-arm26/tlbflush.h
+--- linux-2.6.16.orig/include/asm-arm26/tlbflush.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-arm26/tlbflush.h	2006-07-04 14:41:38.000000000 +0400
+@@ -25,7 +25,7 @@ static inline void memc_update_all(void)
+ {
+ 	struct task_struct *p;
+ 	cpu_memc_update_all(init_mm.pgd);
+-	for_each_process(p) {
++	for_each_process_all(p) {
+ 		if (!p->mm)
+ 			continue;
+ 		cpu_memc_update_all(p->mm->pgd);
+diff -upr linux-2.6.16.orig/include/asm-generic/atomic.h linux-2.6.16-026test015/include/asm-generic/atomic.h
+--- linux-2.6.16.orig/include/asm-generic/atomic.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-generic/atomic.h	2006-07-04 14:41:37.000000000 +0400
+@@ -66,6 +66,13 @@ static inline void atomic_long_sub(long 
+ 	atomic64_sub(i, v);
+ }
+ 
++static inline int atomic_long_add_negative(long i, atomic_long_t *l)
++{
++	atomic64_t *v = (atomic64_t *)l;
++
++	return atomic64_add_negative(i, v);
++}
++
+ #else
+ 
+ typedef atomic_t atomic_long_t;
+@@ -113,5 +120,12 @@ static inline void atomic_long_sub(long 
+ 	atomic_sub(i, v);
+ }
+ 
++static inline int atomic_long_add_negative(long i, atomic_long_t *l)
++{
++	atomic_t *v = (atomic_t *)l;
++
++	return atomic_add_negative(i, v);
++}
++
+ #endif
+ #endif
+diff -upr linux-2.6.16.orig/include/asm-generic/pgtable.h linux-2.6.16-026test015/include/asm-generic/pgtable.h
+--- linux-2.6.16.orig/include/asm-generic/pgtable.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-generic/pgtable.h	2006-07-04 14:41:36.000000000 +0400
+@@ -159,17 +159,8 @@ static inline void ptep_set_wrprotect(st
+ #define lazy_mmu_prot_update(pte)	do { } while (0)
+ #endif
+ 
+-#ifndef __HAVE_ARCH_MULTIPLE_ZERO_PAGE
++#ifndef __HAVE_ARCH_MOVE_PTE
+ #define move_pte(pte, prot, old_addr, new_addr)	(pte)
+-#else
+-#define move_pte(pte, prot, old_addr, new_addr)				\
+-({									\
+- 	pte_t newpte = (pte);						\
+-	if (pte_present(pte) && pfn_valid(pte_pfn(pte)) &&		\
+-			pte_page(pte) == ZERO_PAGE(old_addr))		\
+-		newpte = mk_pte(ZERO_PAGE(new_addr), (prot));		\
+-	newpte;								\
+-})
+ #endif
+ 
+ /*
+diff -upr linux-2.6.16.orig/include/asm-i386/bug.h linux-2.6.16-026test015/include/asm-i386/bug.h
+--- linux-2.6.16.orig/include/asm-i386/bug.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/bug.h	2006-07-04 14:41:37.000000000 +0400
+@@ -14,7 +14,10 @@
+ #ifdef CONFIG_DEBUG_BUGVERBOSE
+ #define BUG()				\
+  __asm__ __volatile__(	"ud2\n"		\
++			"\t.byte 0x66\n"\
++			"\t.byte 0xb8\n" /* mov $xxx, %ax */\
+ 			"\t.word %c0\n"	\
++			"\t.byte 0xb8\n" /* mov $xxx, %eax */\
+ 			"\t.long %c1\n"	\
+ 			 : : "i" (__LINE__), "i" (__FILE__))
+ #else
+diff -upr linux-2.6.16.orig/include/asm-i386/cpufeature.h linux-2.6.16-026test015/include/asm-i386/cpufeature.h
+--- linux-2.6.16.orig/include/asm-i386/cpufeature.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/cpufeature.h	2006-07-04 14:41:36.000000000 +0400
+@@ -70,6 +70,7 @@
+ #define X86_FEATURE_P3		(3*32+ 6) /* P3 */
+ #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
+ #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
++#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
+ 
+ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+ #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
+diff -upr linux-2.6.16.orig/include/asm-i386/elf.h linux-2.6.16-026test015/include/asm-i386/elf.h
+--- linux-2.6.16.orig/include/asm-i386/elf.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/elf.h	2006-07-04 14:41:39.000000000 +0400
+@@ -108,7 +108,7 @@ typedef struct user_fxsr_struct elf_fpxr
+    For the moment, we have only optimizations for the Intel generations,
+    but that could change... */
+ 
+-#define ELF_PLATFORM  (system_utsname.machine)
++#define ELF_PLATFORM  (ve_utsname.machine)
+ 
+ #ifdef __KERNEL__
+ #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
+@@ -136,8 +136,10 @@ extern void __kernel_vsyscall;
+ 
+ #define ARCH_DLINFO						\
+ do {								\
++	if (sysctl_at_vsyscall) {				\
+ 		NEW_AUX_ENT(AT_SYSINFO,	VSYSCALL_ENTRY);	\
+ 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE);	\
++	}							\
+ } while (0)
+ 
+ /*
+diff -upr linux-2.6.16.orig/include/asm-i386/i387.h linux-2.6.16-026test015/include/asm-i386/i387.h
+--- linux-2.6.16.orig/include/asm-i386/i387.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/i387.h	2006-07-04 14:41:36.000000000 +0400
+@@ -13,6 +13,7 @@
+ 
+ #include <linux/sched.h>
+ #include <linux/init.h>
++#include <linux/kernel_stat.h>
+ #include <asm/processor.h>
+ #include <asm/sigcontext.h>
+ #include <asm/user.h>
+@@ -38,17 +39,38 @@ extern void init_fpu(struct task_struct 
+ extern void kernel_fpu_begin(void);
+ #define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
+ 
++/* We need a safe address that is cheap to find and that is already
++   in L1 during context switch. The best choices are unfortunately
++   different for UP and SMP */
++#ifdef CONFIG_SMP
++#define safe_address (__per_cpu_offset[0])
++#else
++#define safe_address (kstat_cpu(0).cpustat.user)
++#endif
++
+ /*
+  * These must be called with preempt disabled
+  */
+ static inline void __save_init_fpu( struct task_struct *tsk )
+ {
++	/* Use more nops than strictly needed in case the compiler
++	   varies code */
+ 	alternative_input(
+-		"fnsave %1 ; fwait ;" GENERIC_NOP2,
+-		"fxsave %1 ; fnclex",
++		"fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
++		"fxsave %[fx]\n"
++		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
+ 		X86_FEATURE_FXSR,
+-		"m" (tsk->thread.i387.fxsave)
+-		:"memory");
++		[fx] "m" (tsk->thread.i387.fxsave),
++		[fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
++	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
++	   is pending.  Clear the x87 state here by setting it to fixed
++   	   values. safe_address is a random variable that should be in L1 */
++	alternative_input(
++		GENERIC_NOP8 GENERIC_NOP2,
++		"emms\n\t"	  	/* clear stack tags */
++		"fildl %[addr]", 	/* set F?P to defined value */
++		X86_FEATURE_FXSAVE_LEAK,
++		[addr] "m" (safe_address));
+ 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ }
+ 
+diff -upr linux-2.6.16.orig/include/asm-i386/mman.h linux-2.6.16-026test015/include/asm-i386/mman.h
+--- linux-2.6.16.orig/include/asm-i386/mman.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/mman.h	2006-07-04 14:41:37.000000000 +0400
+@@ -10,6 +10,7 @@
+ #define MAP_NORESERVE	0x4000		/* don't check for reservations */
+ #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+ #define MAP_NONBLOCK	0x10000		/* do not block on IO */
++#define MAP_EXECPRIO	0x20000		/* do soft ubc charge */
+ 
+ #define MCL_CURRENT	1		/* lock all current mappings */
+ #define MCL_FUTURE	2		/* lock all future mappings */
+diff -upr linux-2.6.16.orig/include/asm-i386/nmi.h linux-2.6.16-026test015/include/asm-i386/nmi.h
+--- linux-2.6.16.orig/include/asm-i386/nmi.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/nmi.h	2006-07-04 14:41:37.000000000 +0400
+@@ -17,6 +17,7 @@ typedef int (*nmi_callback_t)(struct pt_
+  * set. Return 1 if the NMI was handled.
+  */
+ void set_nmi_callback(nmi_callback_t callback);
++void set_nmi_ipi_callback(nmi_callback_t callback);
+  
+ /** 
+  * unset_nmi_callback
+@@ -24,5 +25,6 @@ void set_nmi_callback(nmi_callback_t cal
+  * Remove the handler previously set.
+  */
+ void unset_nmi_callback(void);
++void unset_nmi_ipi_callback(void);
+  
+ #endif /* ASM_NMI_H */
+diff -upr linux-2.6.16.orig/include/asm-i386/pgtable-2level.h linux-2.6.16-026test015/include/asm-i386/pgtable-2level.h
+--- linux-2.6.16.orig/include/asm-i386/pgtable-2level.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/pgtable-2level.h	2006-07-04 14:41:36.000000000 +0400
+@@ -18,6 +18,9 @@
+ #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
+ #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+ 
++#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
++#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
++
+ #define ptep_get_and_clear(mm,addr,xp)	__pte(xchg(&(xp)->pte_low, 0))
+ #define pte_same(a, b)		((a).pte_low == (b).pte_low)
+ #define pte_page(x)		pfn_to_page(pte_pfn(x))
+diff -upr linux-2.6.16.orig/include/asm-i386/pgtable-3level.h linux-2.6.16-026test015/include/asm-i386/pgtable-3level.h
+--- linux-2.6.16.orig/include/asm-i386/pgtable-3level.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/pgtable-3level.h	2006-07-04 14:41:36.000000000 +0400
+@@ -85,6 +85,26 @@ static inline void pud_clear (pud_t * pu
+ #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
+ 			pmd_index(address))
+ 
++/*
++ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
++ * entry, so clear the bottom half first and enforce ordering with a compiler
++ * barrier.
++ */
++static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
++{
++	ptep->pte_low = 0;
++	smp_wmb();
++	ptep->pte_high = 0;
++}
++
++static inline void pmd_clear(pmd_t *pmd)
++{
++	u32 *tmp = (u32 *)pmd;
++	*tmp = 0;
++	smp_wmb();
++	*(tmp + 1) = 0;
++}
++
+ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+ 	pte_t res;
+diff -upr linux-2.6.16.orig/include/asm-i386/pgtable.h linux-2.6.16-026test015/include/asm-i386/pgtable.h
+--- linux-2.6.16.orig/include/asm-i386/pgtable.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/pgtable.h	2006-07-04 14:41:36.000000000 +0400
+@@ -204,12 +204,10 @@ extern unsigned long long __PAGE_KERNEL,
+ extern unsigned long pg0[];
+ 
+ #define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+-#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+ 
+ /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
+ #define pmd_none(x)	(!(unsigned long)pmd_val(x))
+ #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
+-#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+ #define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+ 
+ 
+@@ -269,7 +267,7 @@ static inline pte_t ptep_get_and_clear_f
+ 	pte_t pte;
+ 	if (full) {
+ 		pte = *ptep;
+-		*ptep = __pte(0);
++		pte_clear(mm, addr, ptep);
+ 	} else {
+ 		pte = ptep_get_and_clear(mm, addr, ptep);
+ 	}
+diff -upr linux-2.6.16.orig/include/asm-i386/thread_info.h linux-2.6.16-026test015/include/asm-i386/thread_info.h
+--- linux-2.6.16.orig/include/asm-i386/thread_info.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/thread_info.h	2006-07-04 14:41:39.000000000 +0400
+@@ -101,13 +101,13 @@ register unsigned long current_stack_poi
+ 	({							\
+ 		struct thread_info *ret;			\
+ 								\
+-		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
++		ret = kmalloc(THREAD_SIZE, GFP_KERNEL_UBC);	\
+ 		if (ret)					\
+ 			memset(ret, 0, THREAD_SIZE);		\
+ 		ret;						\
+ 	})
+ #else
+-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
++#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL_UBC)
+ #endif
+ 
+ #define free_thread_info(info)	kfree(info)
+@@ -142,7 +142,8 @@ register unsigned long current_stack_poi
+ #define TIF_SECCOMP		8	/* secure computing */
+ #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
+ #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+-#define TIF_MEMDIE		17
++#define TIF_FREEZE		17	/* Freeze request, atomic version of PF_FREEZE */
++#define TIF_MEMDIE		18
+ 
+ #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+ #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+diff -upr linux-2.6.16.orig/include/asm-i386/timex.h linux-2.6.16-026test015/include/asm-i386/timex.h
+--- linux-2.6.16.orig/include/asm-i386/timex.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/timex.h	2006-07-04 14:41:38.000000000 +0400
+@@ -36,13 +36,17 @@ static inline cycles_t get_cycles (void)
+ {
+ 	unsigned long long ret=0;
+ 
+-#ifndef CONFIG_X86_TSC
+-	if (!cpu_has_tsc)
+-		return 0;
+-#endif
+-
+ #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+ 	rdtscll(ret);
++#elif defined(CONFIG_VE)
++	/*
++	 * get_cycles is used in the following calculations:
++	 * - VPS idle and iowait times in kernel/shced.h
++	 * - task's sleep time to be shown with SyRq-t
++	 * - kstat latencies in linux/vzstat.h
++	 * - sched latency via wakeup_stamp in linux/ve_task.h
++	 */
++#warning "some of VPS statistics won't be correct without get_cycles() (kstat_lat, ve_idle, etc)"
+ #endif
+ 	return ret;
+ }
+diff -upr linux-2.6.16.orig/include/asm-i386/unistd.h linux-2.6.16-026test015/include/asm-i386/unistd.h
+--- linux-2.6.16.orig/include/asm-i386/unistd.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-i386/unistd.h	2006-07-04 14:41:39.000000000 +0400
+@@ -316,8 +316,16 @@
+ #define __NR_pselect6		308
+ #define __NR_ppoll		309
+ #define __NR_unshare		310
+-
+-#define NR_syscalls 311
++#define __NR_fairsched_mknod	500     /* FairScheduler syscalls */
++#define __NR_fairsched_rmnod	501
++#define __NR_fairsched_chwt	502
++#define __NR_fairsched_mvpr	503
++#define __NR_fairsched_rate	504
++#define __NR_getluid		510
++#define __NR_setluid		511
++#define __NR_setublimit		512
++#define __NR_ubstat		513
++#define NR_syscalls		513
+ 
+ /*
+  * user-visible error numbers are in the range -1 - -128: see
+diff -upr linux-2.6.16.orig/include/asm-ia64/mman.h linux-2.6.16-026test015/include/asm-ia64/mman.h
+--- linux-2.6.16.orig/include/asm-ia64/mman.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-ia64/mman.h	2006-07-04 14:41:37.000000000 +0400
+@@ -18,6 +18,7 @@
+ #define MAP_NORESERVE	0x04000		/* don't check for reservations */
+ #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
+ #define MAP_NONBLOCK	0x10000		/* do not block on IO */
++#define MAP_EXECPRIO	0x20000		/* soft ubc charge */
+ 
+ #define MCL_CURRENT	1		/* lock all current mappings */
+ #define MCL_FUTURE	2		/* lock all future mappings */
+diff -upr linux-2.6.16.orig/include/asm-ia64/pgalloc.h linux-2.6.16-026test015/include/asm-ia64/pgalloc.h
+--- linux-2.6.16.orig/include/asm-ia64/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-ia64/pgalloc.h	2006-07-04 14:41:37.000000000 +0400
+@@ -20,6 +20,8 @@
+ #include <linux/page-flags.h>
+ #include <linux/threads.h>
+ 
++#include <ub/ub_mem.h>
++
+ #include <asm/mmu_context.h>
+ 
+ DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
+@@ -38,7 +40,7 @@ static inline long pgtable_quicklist_tot
+ 	return ql_size;
+ }
+ 
+-static inline void *pgtable_quicklist_alloc(void)
++static inline void *pgtable_quicklist_alloc(int charge)
+ {
+ 	unsigned long *ret = NULL;
+ 
+@@ -46,13 +48,19 @@ static inline void *pgtable_quicklist_al
+ 
+ 	ret = pgtable_quicklist;
+ 	if (likely(ret != NULL)) {
++		if (ub_page_charge(virt_to_page(ret), 0, 
++					charge ? __GFP_UBC|__GFP_SOFT_UBC : 0))
++			goto out;
++
+ 		pgtable_quicklist = (unsigned long *)(*ret);
+ 		ret[0] = 0;
+ 		--pgtable_quicklist_size;
++out:
+ 		preempt_enable();
+ 	} else {
+ 		preempt_enable();
+-		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
++		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO |
++				(charge ? __GFP_UBC | __GFP_SOFT_UBC : 0));
+ 	}
+ 
+ 	return ret;
+@@ -70,6 +78,7 @@ static inline void pgtable_quicklist_fre
+ #endif
+ 
+ 	preempt_disable();
++	ub_page_uncharge(virt_to_page(pgtable_entry), 0);
+ 	*(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
+ 	pgtable_quicklist = (unsigned long *)pgtable_entry;
+ 	++pgtable_quicklist_size;
+@@ -78,7 +87,7 @@ static inline void pgtable_quicklist_fre
+ 
+ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+-	return pgtable_quicklist_alloc();
++	return pgtable_quicklist_alloc(1);
+ }
+ 
+ static inline void pgd_free(pgd_t * pgd)
+@@ -95,7 +104,7 @@ pgd_populate(struct mm_struct *mm, pgd_t
+ 
+ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+-	return pgtable_quicklist_alloc();
++	return pgtable_quicklist_alloc(1);
+ }
+ 
+ static inline void pud_free(pud_t * pud)
+@@ -113,7 +122,7 @@ pud_populate(struct mm_struct *mm, pud_t
+ 
+ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+-	return pgtable_quicklist_alloc();
++	return pgtable_quicklist_alloc(1);
+ }
+ 
+ static inline void pmd_free(pmd_t * pmd)
+@@ -138,13 +147,13 @@ pmd_populate_kernel(struct mm_struct *mm
+ static inline struct page *pte_alloc_one(struct mm_struct *mm,
+ 					 unsigned long addr)
+ {
+-	return virt_to_page(pgtable_quicklist_alloc());
++	return virt_to_page(pgtable_quicklist_alloc(1));
+ }
+ 
+ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ 					  unsigned long addr)
+ {
+-	return pgtable_quicklist_alloc();
++	return pgtable_quicklist_alloc(0);
+ }
+ 
+ static inline void pte_free(struct page *pte)
+diff -upr linux-2.6.16.orig/include/asm-ia64/processor.h linux-2.6.16-026test015/include/asm-ia64/processor.h
+--- linux-2.6.16.orig/include/asm-ia64/processor.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-ia64/processor.h	2006-07-04 14:41:38.000000000 +0400
+@@ -306,7 +306,7 @@ struct thread_struct {
+ 	regs->loadrs = 0;									\
+ 	regs->r8 = current->mm->dumpable;	/* set "don't zap registers" flag */		\
+ 	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
+-	if (unlikely(!current->mm->dumpable)) {							\
++	if (unlikely(!current->mm->dumpable || !current->mm->vps_dumpable)) {			\
+ 		/*										\
+ 		 * Zap scratch regs to avoid leaking bits between processes with different	\
+ 		 * uid/privileges.								\
+diff -upr linux-2.6.16.orig/include/asm-ia64/thread_info.h linux-2.6.16-026test015/include/asm-ia64/thread_info.h
+--- linux-2.6.16.orig/include/asm-ia64/thread_info.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-ia64/thread_info.h	2006-07-04 14:41:37.000000000 +0400
+@@ -94,6 +94,7 @@ struct thread_info {
+ #define TIF_MEMDIE		17
+ #define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
+ #define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
++#define TIF_FREEZE		20	/* Freeze request, atomic version of PF_FREEZE */
+ 
+ #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+ #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+diff -upr linux-2.6.16.orig/include/asm-ia64/unistd.h linux-2.6.16-026test015/include/asm-ia64/unistd.h
+--- linux-2.6.16.orig/include/asm-ia64/unistd.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-ia64/unistd.h	2006-07-04 14:41:39.000000000 +0400
+@@ -285,12 +285,22 @@
+ #define __NR_faccessat			1293
+ /* 1294, 1295 reserved for pselect/ppoll */
+ #define __NR_unshare			1296
++#define __NR_fairsched_mknod		1500
++#define __NR_fairsched_rmnod		1501
++#define __NR_fairsched_chwt		1502
++#define __NR_fairsched_mvpr		1503
++#define __NR_fairsched_rate		1504
++#define __NR_getluid			1505
++#define __NR_setluid			1506
++#define __NR_setublimit			1507
++#define __NR_ubstat			1508
+ 
+ #ifdef __KERNEL__
+ 
+ #include <linux/config.h>
+ 
+-#define NR_syscalls			273 /* length of syscall table */
++/* length of syscall table */
++#define NR_syscalls (__NR_ubstat - __NR_ni_syscall + 1)
+ 
+ #define __ARCH_WANT_SYS_RT_SIGACTION
+ 
+diff -upr linux-2.6.16.orig/include/asm-m32r/smp.h linux-2.6.16-026test015/include/asm-m32r/smp.h
+--- linux-2.6.16.orig/include/asm-m32r/smp.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-m32r/smp.h	2006-07-04 14:41:36.000000000 +0400
+@@ -67,7 +67,8 @@ extern volatile int cpu_2_physid[NR_CPUS
+ #define raw_smp_processor_id()	(current_thread_info()->cpu)
+ 
+ extern cpumask_t cpu_callout_map;
+-#define cpu_possible_map cpu_callout_map
++extern cpumask_t cpu_possible_map;
++extern cpumask_t cpu_present_map;
+ 
+ static __inline__ int hard_smp_processor_id(void)
+ {
+diff -upr linux-2.6.16.orig/include/asm-m32r/uaccess.h linux-2.6.16-026test015/include/asm-m32r/uaccess.h
+--- linux-2.6.16.orig/include/asm-m32r/uaccess.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-m32r/uaccess.h	2006-07-04 14:41:36.000000000 +0400
+@@ -5,17 +5,9 @@
+  *  linux/include/asm-m32r/uaccess.h
+  *
+  *  M32R version.
+- *    Copyright (C) 2004  Hirokazu Takata <takata at linux-m32r.org>
++ *    Copyright (C) 2004, 2006  Hirokazu Takata <takata at linux-m32r.org>
+  */
+ 
+-#undef UACCESS_DEBUG
+-
+-#ifdef UACCESS_DEBUG
+-#define UAPRINTK(args...) printk(args)
+-#else
+-#define UAPRINTK(args...)
+-#endif /* UACCESS_DEBUG */
+-
+ /*
+  * User space memory access functions
+  */
+@@ -38,27 +30,29 @@
+ #define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+ 
+ #ifdef CONFIG_MMU
++
+ #define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
+ #define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
+-#else
+-#define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
+-#define USER_DS		MAKE_MM_SEG(0xFFFFFFFF)
+-#endif /* CONFIG_MMU */
+-
+ #define get_ds()	(KERNEL_DS)
+-#ifdef CONFIG_MMU
+ #define get_fs()	(current_thread_info()->addr_limit)
+ #define set_fs(x)	(current_thread_info()->addr_limit = (x))
+-#else
++
++#else /* not CONFIG_MMU */
++
++#define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
++#define USER_DS		MAKE_MM_SEG(0xFFFFFFFF)
++#define get_ds()	(KERNEL_DS)
++
+ static inline mm_segment_t get_fs(void)
+ {
+-  return USER_DS;
++	return USER_DS;
+ }
+ 
+ static inline void set_fs(mm_segment_t s)
+ {
+ }
+-#endif /* CONFIG_MMU */
++
++#endif /* not CONFIG_MMU */
+ 
+ #define segment_eq(a,b)	((a).seg == (b).seg)
+ 
+@@ -83,9 +77,9 @@ static inline void set_fs(mm_segment_t s
+ 		"	subx	%0, %0\n"				\
+ 		"	cmpu	%4, %1\n"				\
+ 		"	subx	%0, %5\n"				\
+-		: "=&r"(flag), "=r"(sum)				\
+-		: "1"(addr), "r"((int)(size)), 				\
+-		  "r"(current_thread_info()->addr_limit.seg), "r"(0)	\
++		: "=&r" (flag), "=r" (sum)				\
++		: "1" (addr), "r" ((int)(size)), 			\
++		  "r" (current_thread_info()->addr_limit.seg), "r" (0)	\
+ 		: "cbit" );						\
+ 	flag; })
+ 
+@@ -113,10 +107,10 @@ static inline void set_fs(mm_segment_t s
+ #else
+ static inline int access_ok(int type, const void *addr, unsigned long size)
+ {
+-  extern unsigned long memory_start, memory_end;
+-  unsigned long val = (unsigned long)addr;
++	extern unsigned long memory_start, memory_end;
++	unsigned long val = (unsigned long)addr;
+ 
+-  return ((val >= memory_start) && ((val + size) < memory_end));
++	return ((val >= memory_start) && ((val + size) < memory_end));
+ }
+ #endif /* CONFIG_MMU */
+ 
+@@ -155,39 +149,6 @@ extern int fixup_exception(struct pt_reg
+  * accesses to the same area of user memory).
+  */
+ 
+-extern void __get_user_1(void);
+-extern void __get_user_2(void);
+-extern void __get_user_4(void);
+-
+-#ifndef MODULE
+-#define __get_user_x(size,ret,x,ptr) 					\
+-	__asm__ __volatile__(						\
+-		"	mv	r0, %0\n"				\
+-		"	mv	r1, %1\n" 				\
+-		"	bl __get_user_" #size "\n"			\
+-		"	mv	%0, r0\n"				\
+-		"	mv	%1, r1\n" 				\
+-		: "=r"(ret), "=r"(x) 					\
+-		: "0"(ptr)						\
+-		: "r0", "r1", "r14" )
+-#else /* MODULE */
+-/*
+- * Use "jl" instead of "bl" for MODULE
+- */
+-#define __get_user_x(size,ret,x,ptr) 					\
+-	__asm__ __volatile__(						\
+-		"	mv	r0, %0\n"				\
+-		"	mv	r1, %1\n" 				\
+-		"	seth	lr, #high(__get_user_" #size ")\n"	\
+-		"	or3	lr, lr, #low(__get_user_" #size ")\n"	\
+-		"	jl 	lr\n"					\
+-		"	mv	%0, r0\n"				\
+-		"	mv	%1, r1\n" 				\
+-		: "=r"(ret), "=r"(x) 					\
+-		: "0"(ptr)						\
+-		: "r0", "r1", "r14" )
+-#endif
+-
+ /* Careful: we have to cast the result to the type of the pointer for sign
+    reasons */
+ /**
+@@ -208,20 +169,7 @@ extern void __get_user_4(void);
+  * On error, the variable @x is set to zero.
+  */
+ #define get_user(x,ptr)							\
+-({	int __ret_gu;							\
+-	unsigned long __val_gu;						\
+-	__chk_user_ptr(ptr);						\
+-	switch(sizeof (*(ptr))) {					\
+-	case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;		\
+-	case 2:  __get_user_x(2,__ret_gu,__val_gu,ptr); break;		\
+-	case 4:  __get_user_x(4,__ret_gu,__val_gu,ptr); break;		\
+-	default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;		\
+-	}								\
+-	(x) = (__typeof__(*(ptr)))__val_gu;				\
+-	__ret_gu;							\
+-})
+-
+-extern void __put_user_bad(void);
++	__get_user_check((x),(ptr),sizeof(*(ptr)))
+ 
+ /**
+  * put_user: - Write a simple value into user space.
+@@ -240,8 +188,7 @@ extern void __put_user_bad(void);
+  * Returns zero on success, or -EFAULT on error.
+  */
+ #define put_user(x,ptr)							\
+-  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+-
++	__put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+ 
+ /**
+  * __get_user: - Get a simple variable from user space, with less checking.
+@@ -264,8 +211,64 @@ extern void __put_user_bad(void);
+  * On error, the variable @x is set to zero.
+  */
+ #define __get_user(x,ptr) \
+-  __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
++	__get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+ 
++#define __get_user_nocheck(x,ptr,size)					\
++({									\
++	long __gu_err = 0;						\
++	unsigned long __gu_val;						\
++	might_sleep();							\
++	__get_user_size(__gu_val,(ptr),(size),__gu_err);		\
++	(x) = (__typeof__(*(ptr)))__gu_val;				\
++	__gu_err;							\
++})
++
++#define __get_user_check(x,ptr,size)					\
++({									\
++	long __gu_err = -EFAULT;					\
++	unsigned long __gu_val = 0;					\
++	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
++	might_sleep();							\
++	if (access_ok(VERIFY_READ,__gu_addr,size))			\
++		__get_user_size(__gu_val,__gu_addr,(size),__gu_err);	\
++	(x) = (__typeof__(*(ptr)))__gu_val;				\
++	__gu_err;							\
++})
++
++extern long __get_user_bad(void);
++
++#define __get_user_size(x,ptr,size,retval)				\
++do {									\
++	retval = 0;							\
++	__chk_user_ptr(ptr);						\
++	switch (size) {							\
++	  case 1: __get_user_asm(x,ptr,retval,"ub"); break;		\
++	  case 2: __get_user_asm(x,ptr,retval,"uh"); break;		\
++	  case 4: __get_user_asm(x,ptr,retval,""); break;		\
++	  default: (x) = __get_user_bad();				\
++	}								\
++} while (0)
++
++#define __get_user_asm(x, addr, err, itype)				\
++	__asm__ __volatile__(						\
++		"	.fillinsn\n"					\
++		"1:	ld"itype" %1,@%2\n"				\
++		"	.fillinsn\n"					\
++		"2:\n"							\
++		".section .fixup,\"ax\"\n"				\
++		"	.balign 4\n"					\
++		"3:	ldi %0,%3\n"					\
++		"	seth r14,#high(2b)\n"				\
++		"	or3 r14,r14,#low(2b)\n"				\
++		"	jmp r14\n"					\
++		".previous\n"						\
++		".section __ex_table,\"a\"\n"				\
++		"	.balign 4\n"					\
++		"	.long 1b,3b\n"					\
++		".previous"						\
++		: "=&r" (err), "=&r" (x)				\
++		: "r" (addr), "i" (-EFAULT), "0" (err)			\
++		: "r14", "memory")
+ 
+ /**
+  * __put_user: - Write a simple value into user space, with less checking.
+@@ -287,11 +290,13 @@ extern void __put_user_bad(void);
+  * Returns zero on success, or -EFAULT on error.
+  */
+ #define __put_user(x,ptr) \
+-  __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
++	__put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
++
+ 
+ #define __put_user_nocheck(x,ptr,size)					\
+ ({									\
+ 	long __pu_err;							\
++	might_sleep();							\
+ 	__put_user_size((x),(ptr),(size),__pu_err);			\
+ 	__pu_err;							\
+ })
+@@ -308,28 +313,28 @@ extern void __put_user_bad(void);
+ })
+ 
+ #if defined(__LITTLE_ENDIAN__)
+-#define __put_user_u64(x, addr, err)                                    \
+-        __asm__ __volatile__(                                           \
+-                "       .fillinsn\n"                                    \
+-                "1:     st %L1,@%2\n"                                    \
+-                "       .fillinsn\n"                                    \
+-                "2:     st %H1,@(4,%2)\n"                                \
+-                "       .fillinsn\n"                                    \
+-                "3:\n"                                                  \
+-                ".section .fixup,\"ax\"\n"                              \
+-                "       .balign 4\n"                                    \
+-                "4:     ldi %0,%3\n"                                    \
+-                "       seth r14,#high(3b)\n"                           \
+-                "       or3 r14,r14,#low(3b)\n"                         \
+-                "       jmp r14\n"                                      \
+-                ".previous\n"                                           \
+-                ".section __ex_table,\"a\"\n"                           \
+-                "       .balign 4\n"                                    \
+-                "       .long 1b,4b\n"                                  \
+-                "       .long 2b,4b\n"                                  \
+-                ".previous"                                             \
+-                : "=&r"(err)                                             \
+-                : "r"(x), "r"(addr), "i"(-EFAULT), "0"(err)		\
++#define __put_user_u64(x, addr, err)					\
++        __asm__ __volatile__(						\
++                "       .fillinsn\n"					\
++                "1:     st %L1,@%2\n"					\
++                "       .fillinsn\n"					\
++                "2:     st %H1,@(4,%2)\n"				\
++                "       .fillinsn\n"					\
++                "3:\n"							\
++                ".section .fixup,\"ax\"\n"				\
++                "       .balign 4\n"					\
++                "4:     ldi %0,%3\n"					\
++                "       seth r14,#high(3b)\n"				\
++                "       or3 r14,r14,#low(3b)\n"				\
++                "       jmp r14\n"					\
++                ".previous\n"						\
++                ".section __ex_table,\"a\"\n"				\
++                "       .balign 4\n"					\
++                "       .long 1b,4b\n"					\
++                "       .long 2b,4b\n"					\
++                ".previous"						\
++                : "=&r" (err)						\
++                : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)		\
+                 : "r14", "memory")
+ 
+ #elif defined(__BIG_ENDIAN__)
+@@ -353,13 +358,15 @@ extern void __put_user_bad(void);
+ 		"	.long 1b,4b\n"					\
+ 		"	.long 2b,4b\n"					\
+ 		".previous"						\
+-		: "=&r"(err)						\
+-		: "r"(x), "r"(addr), "i"(-EFAULT), "0"(err)		\
++		: "=&r" (err)						\
++		: "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)		\
+ 		: "r14", "memory")
+ #else
+ #error no endian defined
+ #endif
+ 
++extern void __put_user_bad(void);
++
+ #define __put_user_size(x,ptr,size,retval)				\
+ do {									\
+ 	retval = 0;							\
+@@ -398,52 +405,8 @@ struct __large_struct { unsigned long bu
+ 		"	.balign 4\n"					\
+ 		"	.long 1b,3b\n"					\
+ 		".previous"						\
+-		: "=&r"(err)						\
+-		: "r"(x), "r"(addr), "i"(-EFAULT), "0"(err)		\
+-		: "r14", "memory")
+-
+-#define __get_user_nocheck(x,ptr,size)					\
+-({									\
+-	long __gu_err;							\
+-	unsigned long __gu_val;						\
+-	__get_user_size(__gu_val,(ptr),(size),__gu_err);		\
+-	(x) = (__typeof__(*(ptr)))__gu_val;				\
+-	__gu_err;							\
+-})
+-
+-extern long __get_user_bad(void);
+-
+-#define __get_user_size(x,ptr,size,retval)				\
+-do {									\
+-	retval = 0;							\
+-	__chk_user_ptr(ptr);						\
+-	switch (size) {							\
+-	  case 1: __get_user_asm(x,ptr,retval,"ub"); break;		\
+-	  case 2: __get_user_asm(x,ptr,retval,"uh"); break;		\
+-	  case 4: __get_user_asm(x,ptr,retval,""); break;		\
+-	  default: (x) = __get_user_bad();				\
+-	}								\
+-} while (0)
+-
+-#define __get_user_asm(x, addr, err, itype)				\
+-	__asm__ __volatile__(						\
+-		"	.fillinsn\n"					\
+-		"1:	ld"itype" %1,@%2\n"				\
+-		"	.fillinsn\n"					\
+-		"2:\n"							\
+-		".section .fixup,\"ax\"\n"				\
+-		"	.balign 4\n"					\
+-		"3:	ldi %0,%3\n"					\
+-		"	seth r14,#high(2b)\n"				\
+-		"	or3 r14,r14,#low(2b)\n"				\
+-		"	jmp r14\n"					\
+-		".previous\n"						\
+-		".section __ex_table,\"a\"\n"				\
+-		"	.balign 4\n"					\
+-		"	.long 1b,3b\n"					\
+-		".previous"						\
+-		: "=&r"(err), "=&r"(x)					\
+-		: "r"(addr), "i"(-EFAULT), "0"(err)			\
++		: "=&r" (err)						\
++		: "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)		\
+ 		: "r14", "memory")
+ 
+ /*
+@@ -453,7 +416,6 @@ do {									\
+  * anything, so this is accurate.
+  */
+ 
+-
+ /*
+  * Copy To/From Userspace
+  */
+@@ -511,8 +473,9 @@ do {									\
+ 		"	.long 2b,9b\n"					\
+ 		"	.long 3b,9b\n"					\
+ 		".previous\n"						\
+-		: "=&r"(__dst), "=&r"(__src), "=&r"(size), "=&r"(__c)	\
+-		: "0"(to), "1"(from), "2"(size), "3"(size / 4)		\
++		: "=&r" (__dst), "=&r" (__src), "=&r" (size),		\
++		  "=&r" (__c)						\
++		: "0" (to), "1" (from), "2" (size), "3" (size / 4)	\
+ 		: "r14", "memory");					\
+ } while (0)
+ 
+@@ -573,8 +536,9 @@ do {									\
+ 		"	.long 2b,7b\n"					\
+ 		"	.long 3b,7b\n"					\
+ 		".previous\n"						\
+-		: "=&r"(__dst), "=&r"(__src), "=&r"(size), "=&r"(__c)	\
+-		: "0"(to), "1"(from), "2"(size), "3"(size / 4)		\
++		: "=&r" (__dst), "=&r" (__src), "=&r" (size),		\
++		  "=&r" (__c)						\
++		: "0" (to), "1" (from), "2" (size), "3" (size / 4)	\
+ 		: "r14", "memory");					\
+ } while (0)
+ 
+@@ -676,7 +640,7 @@ unsigned long __generic_copy_from_user(v
+ #define copy_from_user(to,from,n)			\
+ ({							\
+ 	might_sleep();					\
+-__generic_copy_from_user((to),(from),(n));	\
++	__generic_copy_from_user((to),(from),(n));	\
+ })
+ 
+ long __must_check strncpy_from_user(char *dst, const char __user *src,
+diff -upr linux-2.6.16.orig/include/asm-mips/bitops.h linux-2.6.16-026test015/include/asm-mips/bitops.h
+--- linux-2.6.16.orig/include/asm-mips/bitops.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-mips/bitops.h	2006-07-04 14:41:36.000000000 +0400
+@@ -654,7 +654,12 @@ static inline unsigned long fls(unsigned
+ {
+ #ifdef CONFIG_32BIT
+ #ifdef CONFIG_CPU_MIPS32
+-	__asm__ ("clz %0, %1" : "=r" (word) : "r" (word));
++	__asm__ (
++	"	.set	mips32					\n"
++	"	clz	%0, %1					\n"
++	"	.set	mips0					\n"
++	: "=r" (word)
++	: "r" (word));
+ 
+ 	return 32 - word;
+ #else
+@@ -678,7 +683,12 @@ static inline unsigned long fls(unsigned
+ #ifdef CONFIG_64BIT
+ #ifdef CONFIG_CPU_MIPS64
+ 
+-	__asm__ ("dclz %0, %1" : "=r" (word) : "r" (word));
++	__asm__ (
++	"	.set	mips64					\n"
++	"	dclz	%0, %1					\n"
++	"	.set	mips0					\n"
++	: "=r" (word)
++	: "r" (word));
+ 
+ 	return 64 - word;
+ #else
+diff -upr linux-2.6.16.orig/include/asm-mips/byteorder.h linux-2.6.16-026test015/include/asm-mips/byteorder.h
+--- linux-2.6.16.orig/include/asm-mips/byteorder.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-mips/byteorder.h	2006-07-04 14:41:36.000000000 +0400
+@@ -19,7 +19,9 @@
+ static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 x)
+ {
+ 	__asm__(
++	"	.set	mips32r2		\n"
+ 	"	wsbh	%0, %1			\n"
++	"	.set	mips0			\n"
+ 	: "=r" (x)
+ 	: "r" (x));
+ 
+@@ -30,8 +32,10 @@ static __inline__ __attribute_const__ __
+ static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 x)
+ {
+ 	__asm__(
++	"	.set	mips32r2		\n"
+ 	"	wsbh	%0, %1			\n"
+ 	"	rotr	%0, %0, 16		\n"
++	"	.set	mips0			\n"
+ 	: "=r" (x)
+ 	: "r" (x));
+ 
+diff -upr linux-2.6.16.orig/include/asm-mips/interrupt.h linux-2.6.16-026test015/include/asm-mips/interrupt.h
+--- linux-2.6.16.orig/include/asm-mips/interrupt.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-mips/interrupt.h	2006-07-04 14:41:36.000000000 +0400
+@@ -20,7 +20,9 @@ __asm__ (
+ 	"	.set	reorder						\n"
+ 	"	.set	noat						\n"
+ #ifdef CONFIG_CPU_MIPSR2
++	"	.set	mips32r2					\n"
+ 	"	ei							\n"
++	"	.set	mips0						\n"
+ #else
+ 	"	mfc0	$1,$12						\n"
+ 	"	ori	$1,0x1f						\n"
+@@ -63,7 +65,9 @@ __asm__ (
+ 	"	.set	push						\n"
+ 	"	.set	noat						\n"
+ #ifdef CONFIG_CPU_MIPSR2
++	"	.set	mips32r2					\n"
+ 	"	di							\n"
++	"	.set	mips0						\n"
+ #else
+ 	"	mfc0	$1,$12						\n"
+ 	"	ori	$1,0x1f						\n"
+@@ -103,8 +107,10 @@ __asm__ (
+ 	"	.set	reorder						\n"
+ 	"	.set	noat						\n"
+ #ifdef CONFIG_CPU_MIPSR2
++	"	.set	mips32r2					\n"
+ 	"	di	\\result					\n"
+ 	"	andi	\\result, 1					\n"
++	"	.set	mips0						\n"
+ #else
+ 	"	mfc0	\\result, $12					\n"
+ 	"	ori	$1, \\result, 0x1f				\n"
+@@ -133,9 +139,11 @@ __asm__ (
+ 	 * Slow, but doesn't suffer from a relativly unlikely race
+ 	 * condition we're having since days 1.
+ 	 */
++	"	.set	mips32r2					\n"
+ 	"	beqz	\\flags, 1f					\n"
+ 	"	 di							\n"
+ 	"	ei							\n"
++	"	.set	mips0						\n"
+ 	"1:								\n"
+ #elif defined(CONFIG_CPU_MIPSR2)
+ 	/*
+diff -upr linux-2.6.16.orig/include/asm-mips/pgtable.h linux-2.6.16-026test015/include/asm-mips/pgtable.h
+--- linux-2.6.16.orig/include/asm-mips/pgtable.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-mips/pgtable.h	2006-07-04 14:41:36.000000000 +0400
+@@ -70,7 +70,15 @@ extern unsigned long zero_page_mask;
+ #define ZERO_PAGE(vaddr) \
+ 	(virt_to_page(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))
+ 
+-#define __HAVE_ARCH_MULTIPLE_ZERO_PAGE
++#define __HAVE_ARCH_MOVE_PTE
++#define move_pte(pte, prot, old_addr, new_addr)				\
++({									\
++ 	pte_t newpte = (pte);						\
++	if (pte_present(pte) && pfn_valid(pte_pfn(pte)) &&		\
++			pte_page(pte) == ZERO_PAGE(old_addr))		\
++		newpte = mk_pte(ZERO_PAGE(new_addr), (prot));		\
++	newpte;								\
++})
+ 
+ extern void paging_init(void);
+ 
+diff -upr linux-2.6.16.orig/include/asm-mips/r4kcache.h linux-2.6.16-026test015/include/asm-mips/r4kcache.h
+--- linux-2.6.16.orig/include/asm-mips/r4kcache.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-mips/r4kcache.h	2006-07-04 14:41:36.000000000 +0400
+@@ -37,7 +37,7 @@
+ 	"	cache	%0, %1					\n"	\
+ 	"	.set	pop					\n"	\
+ 	:								\
+-	: "i" (op), "m" (*(unsigned char *)(addr)))
++	: "i" (op), "R" (*(unsigned char *)(addr)))
+ 
+ static inline void flush_icache_line_indexed(unsigned long addr)
+ {
+diff -upr linux-2.6.16.orig/include/asm-powerpc/floppy.h linux-2.6.16-026test015/include/asm-powerpc/floppy.h
+--- linux-2.6.16.orig/include/asm-powerpc/floppy.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-powerpc/floppy.h	2006-07-04 14:41:36.000000000 +0400
+@@ -35,6 +35,7 @@
+ #ifdef CONFIG_PCI
+ 
+ #include <linux/pci.h>
++#include <asm/ppc-pci.h>	/* for ppc64_isabridge_dev */
+ 
+ #define fd_dma_setup(addr,size,mode,io) powerpc_fd_dma_setup(addr,size,mode,io)
+ 
+@@ -52,12 +53,12 @@ static __inline__ int powerpc_fd_dma_set
+ 	if (bus_addr 
+ 	    && (addr != prev_addr || size != prev_size || dir != prev_dir)) {
+ 		/* different from last time -- unmap prev */
+-		pci_unmap_single(NULL, bus_addr, prev_size, prev_dir);
++		pci_unmap_single(ppc64_isabridge_dev, bus_addr, prev_size, prev_dir);
+ 		bus_addr = 0;
+ 	}
+ 
+ 	if (!bus_addr)	/* need to map it */
+-		bus_addr = pci_map_single(NULL, addr, size, dir);
++		bus_addr = pci_map_single(ppc64_isabridge_dev, addr, size, dir);
+ 
+ 	/* remember this one as prev */
+ 	prev_addr = addr;
+diff -upr linux-2.6.16.orig/include/asm-powerpc/pgalloc.h linux-2.6.16-026test015/include/asm-powerpc/pgalloc.h
+--- linux-2.6.16.orig/include/asm-powerpc/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-powerpc/pgalloc.h	2006-07-04 14:41:37.000000000 +0400
+@@ -33,7 +33,8 @@ extern kmem_cache_t *pgtable_cache[];
+ 
+ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+-	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
++	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM],
++			GFP_KERNEL_UBC | __GFP_SOFT_UBC);
+ }
+ 
+ static inline void pgd_free(pgd_t *pgd)
+@@ -48,7 +49,7 @@ static inline void pgd_free(pgd_t *pgd)
+ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+ 	return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
+-				GFP_KERNEL|__GFP_REPEAT);
++				GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
+ }
+ 
+ static inline void pud_free(pud_t *pud)
+@@ -84,7 +85,7 @@ static inline void pmd_populate_kernel(s
+ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+ 	return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
+-				GFP_KERNEL|__GFP_REPEAT);
++				GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
+ }
+ 
+ static inline void pmd_free(pmd_t *pmd)
+@@ -92,17 +93,21 @@ static inline void pmd_free(pmd_t *pmd)
+ 	kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
+ }
+ 
++static inline pte_t *__pte_alloc(gfp_t flags)
++{
++	return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], flags);
++}
++
+ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ 					  unsigned long address)
+ {
+-	return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
+-				GFP_KERNEL|__GFP_REPEAT);
++	return __pte_alloc(GFP_KERNEL | __GFP_REPEAT);
+ }
+ 
+ static inline struct page *pte_alloc_one(struct mm_struct *mm,
+ 					 unsigned long address)
+ {
+-	return virt_to_page(pte_alloc_one_kernel(mm, address));
++	return virt_to_page(__pte_alloc(GFP_KERNEL_UBC | __GFP_SOFT_UBC));
+ }
+ 		
+ static inline void pte_free_kernel(pte_t *pte)
+diff -upr linux-2.6.16.orig/include/asm-powerpc/unistd.h linux-2.6.16-026test015/include/asm-powerpc/unistd.h
+--- linux-2.6.16.orig/include/asm-powerpc/unistd.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-powerpc/unistd.h	2006-07-04 14:41:37.000000000 +0400
+@@ -301,8 +301,12 @@
+ #define __NR_pselect6		280
+ #define __NR_ppoll		281
+ #define __NR_unshare		282
+-
+-#define __NR_syscalls		283
++#define __NR_getluid		410
++#define __NR_setluid		411
++#define __NR_setublimit		412
++#define __NR_ubstat		413
++ 
++#define NR_syscalls 414
+ 
+ #ifdef __KERNEL__
+ #define __NR__exit __NR_exit
+diff -upr linux-2.6.16.orig/include/asm-s390/pgalloc.h linux-2.6.16-026test015/include/asm-s390/pgalloc.h
+--- linux-2.6.16.orig/include/asm-s390/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-s390/pgalloc.h	2006-07-04 14:41:37.000000000 +0400
+@@ -34,12 +34,12 @@ static inline pgd_t *pgd_alloc(struct mm
+ 	int i;
+ 
+ #ifndef __s390x__
+-	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,1);
++	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 1);
+         if (pgd != NULL)
+ 		for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ 			pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE));
+ #else /* __s390x__ */
+-	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,2);
++	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 2);
+         if (pgd != NULL)
+ 		for (i = 0; i < PTRS_PER_PGD; i++)
+ 			pgd_clear(pgd + i);
+@@ -72,7 +72,7 @@ static inline pmd_t * pmd_alloc_one(stru
+ 	pmd_t *pmd;
+         int i;
+ 
+-	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, 2);
++	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 2);
+ 	if (pmd != NULL) {
+ 		for (i=0; i < PTRS_PER_PMD; i++)
+ 			pmd_clear(pmd+i);
+@@ -118,16 +118,13 @@ pmd_populate(struct mm_struct *mm, pmd_t
+ 	pmd_populate_kernel(mm, pmd, (pte_t *)((page-mem_map) << PAGE_SHIFT));
+ }
+ 
+-/*
+- * page table entry allocation/free routines.
+- */
+-static inline pte_t *
+-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
++static inline pte_t *pte_alloc(struct mm_struct *mm, unsigned long vmaddr,
++		gfp_t mask)
+ {
+ 	pte_t *pte;
+         int i;
+ 
+-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
++	pte = (pte_t *)__get_free_page(mask);
+ 	if (pte != NULL) {
+ 		for (i=0; i < PTRS_PER_PTE; i++) {
+ 			pte_clear(mm, vmaddr, pte+i);
+@@ -137,10 +134,20 @@ pte_alloc_one_kernel(struct mm_struct *m
+ 	return pte;
+ }
+ 
++/*
++ * page table entry allocation/free routines.
++ */
++static inline pte_t *
++pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
++{
++	return pte_alloc(mm, vmaddr, GFP_KERNEL | __GFP_REPEAT);
++}
++
+ static inline struct page *
+ pte_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
+ {
+-	pte_t *pte = pte_alloc_one_kernel(mm, vmaddr);
++	pte_t *pte = pte_alloc(mm, vmaddr, GFP_KERNEL_UBC | __GFP_SOFT_UBC |
++			__GFP_REPEAT);
+ 	if (pte)
+ 		return virt_to_page(pte);
+ 	return 0;
+diff -upr linux-2.6.16.orig/include/asm-sh64/pgalloc.h linux-2.6.16-026test015/include/asm-sh64/pgalloc.h
+--- linux-2.6.16.orig/include/asm-sh64/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-sh64/pgalloc.h	2006-07-04 14:41:38.000000000 +0400
+@@ -173,7 +173,7 @@ static inline void set_pgdir(unsigned lo
+ 	pgd_t *pgd;
+ 
+ 	read_lock(&tasklist_lock);
+-	for_each_process(p) {
++	for_each_process_all(p) {
+ 		if (!p->mm)
+ 			continue;
+ 		*pgd_offset(p->mm,address) = entry;
+diff -upr linux-2.6.16.orig/include/asm-sparc64/dma-mapping.h linux-2.6.16-026test015/include/asm-sparc64/dma-mapping.h
+--- linux-2.6.16.orig/include/asm-sparc64/dma-mapping.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-sparc64/dma-mapping.h	2006-07-04 14:41:36.000000000 +0400
+@@ -4,7 +4,146 @@
+ #include <linux/config.h>
+ 
+ #ifdef CONFIG_PCI
+-#include <asm-generic/dma-mapping.h>
++
++/* we implement the API below in terms of the existing PCI one,
++ * so include it */
++#include <linux/pci.h>
++/* need struct page definitions */
++#include <linux/mm.h>
++
++static inline int
++dma_supported(struct device *dev, u64 mask)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	return pci_dma_supported(to_pci_dev(dev), mask);
++}
++
++static inline int
++dma_set_mask(struct device *dev, u64 dma_mask)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
++}
++
++static inline void *
++dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
++		   gfp_t flag)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	return __pci_alloc_consistent(to_pci_dev(dev), size, dma_handle, flag);
++}
++
++static inline void
++dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
++		    dma_addr_t dma_handle)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	pci_free_consistent(to_pci_dev(dev), size, cpu_addr, dma_handle);
++}
++
++static inline dma_addr_t
++dma_map_single(struct device *dev, void *cpu_addr, size_t size,
++	       enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	return pci_map_single(to_pci_dev(dev), cpu_addr, size, (int)direction);
++}
++
++static inline void
++dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
++		 enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	pci_unmap_single(to_pci_dev(dev), dma_addr, size, (int)direction);
++}
++
++static inline dma_addr_t
++dma_map_page(struct device *dev, struct page *page,
++	     unsigned long offset, size_t size,
++	     enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	return pci_map_page(to_pci_dev(dev), page, offset, size, (int)direction);
++}
++
++static inline void
++dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
++	       enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	pci_unmap_page(to_pci_dev(dev), dma_address, size, (int)direction);
++}
++
++static inline int
++dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
++	   enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	return pci_map_sg(to_pci_dev(dev), sg, nents, (int)direction);
++}
++
++static inline void
++dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
++	     enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	pci_unmap_sg(to_pci_dev(dev), sg, nhwentries, (int)direction);
++}
++
++static inline void
++dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
++			enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	pci_dma_sync_single_for_cpu(to_pci_dev(dev), dma_handle,
++				    size, (int)direction);
++}
++
++static inline void
++dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
++			   enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	pci_dma_sync_single_for_device(to_pci_dev(dev), dma_handle,
++				       size, (int)direction);
++}
++
++static inline void
++dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
++		    enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	pci_dma_sync_sg_for_cpu(to_pci_dev(dev), sg, nelems, (int)direction);
++}
++
++static inline void
++dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
++		       enum dma_data_direction direction)
++{
++	BUG_ON(dev->bus != &pci_bus_type);
++
++	pci_dma_sync_sg_for_device(to_pci_dev(dev), sg, nelems, (int)direction);
++}
++
++static inline int
++dma_mapping_error(dma_addr_t dma_addr)
++{
++	return pci_dma_mapping_error(dma_addr);
++}
++
+ #else
+ 
+ struct device;
+diff -upr linux-2.6.16.orig/include/asm-sparc64/pci.h linux-2.6.16-026test015/include/asm-sparc64/pci.h
+--- linux-2.6.16.orig/include/asm-sparc64/pci.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-sparc64/pci.h	2006-07-04 14:41:36.000000000 +0400
+@@ -44,7 +44,9 @@ struct pci_dev;
+ /* Allocate and map kernel buffer using consistent mode DMA for a device.
+  * hwdev should be valid struct pci_dev pointer for PCI devices.
+  */
+-extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle);
++extern void *__pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t gfp);
++#define pci_alloc_consistent(DEV,SZ,HANDLE) \
++	__pci_alloc_consistent(DEV,SZ,HANDLE,GFP_ATOMIC)
+ 
+ /* Free and unmap a consistent DMA buffer.
+  * cpu_addr is what was returned from pci_alloc_consistent,
+diff -upr linux-2.6.16.orig/include/asm-sparc64/pgtable.h linux-2.6.16-026test015/include/asm-sparc64/pgtable.h
+--- linux-2.6.16.orig/include/asm-sparc64/pgtable.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-sparc64/pgtable.h	2006-07-04 14:41:36.000000000 +0400
+@@ -335,6 +335,23 @@ static inline void set_pte_at(struct mm_
+ #define pte_clear(mm,addr,ptep)		\
+ 	set_pte_at((mm), (addr), (ptep), __pte(0UL))
+ 
++#ifdef DCACHE_ALIASING_POSSIBLE
++#define __HAVE_ARCH_MOVE_PTE
++#define move_pte(pte, prot, old_addr, new_addr)				\
++({									\
++ 	pte_t newpte = (pte);						\
++	if (pte_present(pte)) {						\
++		unsigned long this_pfn = pte_pfn(pte);			\
++									\
++		if (pfn_valid(this_pfn) &&				\
++		    (((old_addr) ^ (new_addr)) & (1 << 13)))		\
++			flush_dcache_page_all(current->mm,		\
++					      pfn_to_page(this_pfn));	\
++	}								\
++	newpte;								\
++})
++#endif
++
+ extern pgd_t swapper_pg_dir[2048];
+ extern pmd_t swapper_low_pmd_dir[2048];
+ 
+diff -upr linux-2.6.16.orig/include/asm-x86_64/cpufeature.h linux-2.6.16-026test015/include/asm-x86_64/cpufeature.h
+--- linux-2.6.16.orig/include/asm-x86_64/cpufeature.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/cpufeature.h	2006-07-04 14:41:36.000000000 +0400
+@@ -64,6 +64,7 @@
+ #define X86_FEATURE_REP_GOOD	(3*32+ 4) /* rep microcode works well on this CPU */
+ #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
+ #define X86_FEATURE_SYNC_RDTSC  (3*32+6)  /* RDTSC syncs CPU core */
++#define X86_FEATURE_FXSAVE_LEAK (3*32+7)  /* FIP/FOP/FDP leaks through FXSAVE */
+ 
+ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+ #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
+diff -upr linux-2.6.16.orig/include/asm-x86_64/i387.h linux-2.6.16-026test015/include/asm-x86_64/i387.h
+--- linux-2.6.16.orig/include/asm-x86_64/i387.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/i387.h	2006-07-04 14:41:36.000000000 +0400
+@@ -72,6 +72,23 @@ extern int set_fpregs(struct task_struct
+ #define set_fpu_swd(t,val) ((t)->thread.i387.fxsave.swd = (val))
+ #define set_fpu_fxsr_twd(t,val) ((t)->thread.i387.fxsave.twd = (val))
+ 
++#define X87_FSW_ES (1 << 7)	/* Exception Summary */
++
++/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
++   is pending. Clear the x87 state here by setting it to fixed
++   values. The kernel data segment can be sometimes 0 and sometimes
++   new user value. Both should be ok.
++   Use the PDA as safe address because it should be already in L1. */
++static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
++{
++	if (unlikely(fx->swd & X87_FSW_ES))
++		 asm volatile("fnclex");
++	alternative_input(ASM_NOP8 ASM_NOP2,
++	     	     "    emms\n"		/* clear stack tags */
++	     	     "    fildl %%gs:0",	/* load to clear state */
++		     X86_FEATURE_FXSAVE_LEAK);
++}
++
+ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) 
+ { 
+ 	int err;
+@@ -119,6 +136,7 @@ static inline int save_i387_checking(str
+ #endif
+ 	if (unlikely(err))
+ 		__clear_user(fx, sizeof(struct i387_fxsave_struct));
++	/* No need to clear here because the caller clears USED_MATH */
+ 	return err;
+ } 
+ 
+@@ -149,7 +167,7 @@ static inline void __fxsave_clear(struct
+ 				"i" (offsetof(__typeof__(*tsk),
+ 					      thread.i387.fxsave)));
+ #endif
+-	__asm__ __volatile__("fnclex");
++	clear_fpu_state(&tsk->thread.i387.fxsave);
+ }
+ 
+ static inline void kernel_fpu_begin(void)
+diff -upr linux-2.6.16.orig/include/asm-x86_64/mman.h linux-2.6.16-026test015/include/asm-x86_64/mman.h
+--- linux-2.6.16.orig/include/asm-x86_64/mman.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/mman.h	2006-07-04 14:41:37.000000000 +0400
+@@ -12,6 +12,7 @@
+ #define MAP_NORESERVE	0x4000		/* don't check for reservations */
+ #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+ #define MAP_NONBLOCK	0x10000		/* do not block on IO */
++#define MAP_EXECPRIO	0x20000		/* soft ubc charge */
+ 
+ #define MCL_CURRENT	1		/* lock all current mappings */
+ #define MCL_FUTURE	2		/* lock all future mappings */
+diff -upr linux-2.6.16.orig/include/asm-x86_64/nmi.h linux-2.6.16-026test015/include/asm-x86_64/nmi.h
+--- linux-2.6.16.orig/include/asm-x86_64/nmi.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/nmi.h	2006-07-04 14:41:37.000000000 +0400
+@@ -24,6 +24,9 @@ void set_nmi_callback(nmi_callback_t cal
+  * Remove the handler previously set.
+  */
+ void unset_nmi_callback(void);
++
++void set_nmi_ipi_callback(nmi_callback_t callback);
++void unset_nmi_ipi_callback(void);
+  
+ #ifdef CONFIG_PM
+  
+diff -upr linux-2.6.16.orig/include/asm-x86_64/pgalloc.h linux-2.6.16-026test015/include/asm-x86_64/pgalloc.h
+--- linux-2.6.16.orig/include/asm-x86_64/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/pgalloc.h	2006-07-04 14:41:37.000000000 +0400
+@@ -31,12 +31,14 @@ static inline void pmd_free(pmd_t *pmd)
+ 
+ static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+ {
+-	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++	return (pmd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
++			__GFP_SOFT_UBC);
+ }
+ 
+ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+-	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++	return (pud_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
++			__GFP_SOFT_UBC);
+ }
+ 
+ static inline void pud_free (pud_t *pud)
+@@ -48,7 +50,8 @@ static inline void pud_free (pud_t *pud)
+ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ 	unsigned boundary;
+-	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
++	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL_UBC|__GFP_REPEAT|
++			__GFP_SOFT_UBC);
+ 	if (!pgd)
+ 		return NULL;
+ 	/*
+@@ -77,7 +80,8 @@ static inline pte_t *pte_alloc_one_kerne
+ 
+ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+ {
+-	void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++	void *p = (void *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
++			__GFP_SOFT_UBC);
+ 	if (!p)
+ 		return NULL;
+ 	return virt_to_page(p);
+diff -upr linux-2.6.16.orig/include/asm-x86_64/processor.h linux-2.6.16-026test015/include/asm-x86_64/processor.h
+--- linux-2.6.16.orig/include/asm-x86_64/processor.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/processor.h	2006-07-04 14:41:39.000000000 +0400
+@@ -167,7 +167,7 @@ static inline void clear_in_cr4 (unsigne
+ /* This decides where the kernel will search for a free chunk of vm
+  * space during mmap's.
+  */
+-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
++#define IA32_PAGE_OFFSET 0xc0000000
+ 
+ #define TASK_SIZE 		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
+ #define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
+diff -upr linux-2.6.16.orig/include/asm-x86_64/segment.h linux-2.6.16-026test015/include/asm-x86_64/segment.h
+--- linux-2.6.16.orig/include/asm-x86_64/segment.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/segment.h	2006-07-04 14:41:39.000000000 +0400
+@@ -3,29 +3,28 @@
+ 
+ #include <asm/cache.h>
+ 
+-#define __KERNEL_CS	0x10
+-#define __KERNEL_DS	0x18
+-
+-#define __KERNEL32_CS   0x38
+-
++#define GDT_ENTRY_BOOT_CS		2
++#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
++#define GDT_ENTRY_BOOT_DS		3
++#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
++#define GDT_ENTRY_TSS 4	/* needs two entries */
+ /* 
+  * we cannot use the same code segment descriptor for user and kernel
+  * -- not even in the long flat mode, because of different DPL /kkeil 
+  * The segment offset needs to contain a RPL. Grr. -AK
+  * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 
+  */
++#define GDT_ENTRY_TLS_MIN 6
++#define GDT_ENTRY_TLS_MAX 8
+ 
+-#define __USER32_CS   0x23   /* 4*8+3 */ 
+-#define __USER_DS     0x2b   /* 5*8+3 */ 
+-#define __USER_CS     0x33   /* 6*8+3 */ 
++#define GDT_ENTRY_LDT 9 /* needs two entries */
++#define __KERNEL32_CS   0x58	/* 11*8 */
++#define __KERNEL_CS	0x60	/* 12*8 */
++#define __KERNEL_DS	0x68	/* 13*8 */
++#define __USER32_CS   0x73   /* 14*8+3 */ 
++#define __USER_DS     0x7b   /* 15*8+3 */ 
+ #define __USER32_DS	__USER_DS 
+-
+-#define GDT_ENTRY_TLS 1
+-#define GDT_ENTRY_TSS 8	/* needs two entries */
+-#define GDT_ENTRY_LDT 10 /* needs two entries */
+-#define GDT_ENTRY_TLS_MIN 12
+-#define GDT_ENTRY_TLS_MAX 14
+-/* 15 free */
++#define __USER_CS     0x83   /* 16*8+3 */ 
+ 
+ #define GDT_ENTRY_TLS_ENTRIES 3
+ 
+@@ -37,7 +36,7 @@
+ #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
+ 
+ #define IDT_ENTRIES 256
+-#define GDT_ENTRIES 16
++#define GDT_ENTRIES 32
+ #define GDT_SIZE (GDT_ENTRIES * 8)
+ #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 
+ 
+diff -upr linux-2.6.16.orig/include/asm-x86_64/signal.h linux-2.6.16-026test015/include/asm-x86_64/signal.h
+--- linux-2.6.16.orig/include/asm-x86_64/signal.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/signal.h	2006-07-04 14:41:39.000000000 +0400
+@@ -23,11 +23,6 @@ typedef struct {
+ 	unsigned long sig[_NSIG_WORDS];
+ } sigset_t;
+ 
+-
+-struct pt_regs; 
+-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
+-
+-
+ #else
+ /* Here we must cater to libcs that poke about in kernel headers.  */
+ 
+diff -upr linux-2.6.16.orig/include/asm-x86_64/thread_info.h linux-2.6.16-026test015/include/asm-x86_64/thread_info.h
+--- linux-2.6.16.orig/include/asm-x86_64/thread_info.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/thread_info.h	2006-07-04 14:41:39.000000000 +0400
+@@ -74,7 +74,7 @@ static inline struct thread_info *stack_
+ 
+ /* thread information allocation */
+ #define alloc_thread_info(tsk) \
+-	((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
++	((struct thread_info *) __get_free_pages(GFP_KERNEL_UBC,THREAD_ORDER))
+ #define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
+ 
+ #else /* !__ASSEMBLY__ */
+@@ -101,11 +101,13 @@ static inline struct thread_info *stack_
+ #define TIF_IRET		5	/* force IRET */
+ #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+ #define TIF_SECCOMP		8	/* secure computing */
++#define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
+ #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+ #define TIF_IA32		17	/* 32bit process */ 
+ #define TIF_FORK		18	/* ret_from_fork */
+ #define TIF_ABI_PENDING		19
+-#define TIF_MEMDIE		20
++#define TIF_FREEZE		20
++#define TIF_MEMDIE		21
+ 
+ #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+ #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+@@ -115,6 +117,7 @@ static inline struct thread_info *stack_
+ #define _TIF_IRET		(1<<TIF_IRET)
+ #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
++#define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+ #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+ #define _TIF_IA32		(1<<TIF_IA32)
+ #define _TIF_FORK		(1<<TIF_FORK)
+diff -upr linux-2.6.16.orig/include/asm-x86_64/unistd.h linux-2.6.16-026test015/include/asm-x86_64/unistd.h
+--- linux-2.6.16.orig/include/asm-x86_64/unistd.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/asm-x86_64/unistd.h	2006-07-04 14:41:39.000000000 +0400
+@@ -605,8 +605,26 @@ __SYSCALL(__NR_pselect6, sys_ni_syscall)
+ __SYSCALL(__NR_ppoll,	sys_ni_syscall)		/* for now */
+ #define __NR_unshare		272
+ __SYSCALL(__NR_unshare,	sys_unshare)
+-
+-#define __NR_syscall_max __NR_unshare
++#define __NR_getluid		500
++__SYSCALL(__NR_getluid, sys_getluid)
++#define __NR_setluid		501
++__SYSCALL(__NR_setluid, sys_setluid)
++#define __NR_setublimit		502
++__SYSCALL(__NR_setublimit, sys_setublimit)
++#define __NR_ubstat		503
++__SYSCALL(__NR_ubstat, sys_ubstat)
++#define __NR_fairsched_mknod	504 /* FairScheduler syscalls */
++__SYSCALL(__NR_fairsched_mknod, sys_fairsched_mknod)
++#define __NR_fairsched_rmnod	505
++__SYSCALL(__NR_fairsched_rmnod, sys_fairsched_rmnod)
++#define __NR_fairsched_chwt	506
++__SYSCALL(__NR_fairsched_chwt, sys_fairsched_chwt)
++#define __NR_fairsched_mvpr	507
++__SYSCALL(__NR_fairsched_mvpr, sys_fairsched_mvpr)
++#define __NR_fairsched_rate	508
++__SYSCALL(__NR_fairsched_rate, sys_fairsched_rate)
++ 
++#define __NR_syscall_max __NR_fairsched_rate
+ 
+ #ifndef __NO_STUBS
+ 
+@@ -645,6 +663,7 @@ do { \
+ #define __ARCH_WANT_SYS_RT_SIGACTION
+ #define __ARCH_WANT_SYS_TIME
+ #define __ARCH_WANT_COMPAT_SYS_TIME
++#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+ #endif
+ 
+ #ifndef __KERNEL_SYSCALLS__
+diff -upr linux-2.6.16.orig/include/linux/aio.h linux-2.6.16-026test015/include/linux/aio.h
+--- linux-2.6.16.orig/include/linux/aio.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/aio.h	2006-07-04 14:41:39.000000000 +0400
+@@ -247,4 +247,8 @@ static inline struct kiocb *list_kiocb(s
+ extern unsigned long aio_nr;
+ extern unsigned long aio_max_nr;
+ 
++void wait_for_all_aios(struct kioctx *ctx);
++extern kmem_cache_t	*kioctx_cachep;
++extern void aio_kick_handler(void *);
++
+ #endif /* __LINUX__AIO_H */
+diff -upr linux-2.6.16.orig/include/linux/binfmts.h linux-2.6.16-026test015/include/linux/binfmts.h
+--- linux-2.6.16.orig/include/linux/binfmts.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/binfmts.h	2006-07-04 14:41:37.000000000 +0400
+@@ -2,6 +2,7 @@
+ #define _LINUX_BINFMTS_H
+ 
+ #include <linux/capability.h>
++#include <linux/fs.h>
+ 
+ struct pt_regs;
+ 
+@@ -28,6 +29,7 @@ struct linux_binprm{
+ 	int sh_bang;
+ 	struct file * file;
+ 	int e_uid, e_gid;
++	struct exec_perm perm;
+ 	kernel_cap_t cap_inheritable, cap_permitted, cap_effective;
+ 	void *security;
+ 	int argc, envc;
+diff -upr linux-2.6.16.orig/include/linux/capability.h linux-2.6.16-026test015/include/linux/capability.h
+--- linux-2.6.16.orig/include/linux/capability.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/capability.h	2006-07-04 14:41:38.000000000 +0400
+@@ -146,12 +146,9 @@ typedef __u32 kernel_cap_t;
+ 
+ #define CAP_NET_BROADCAST    11
+ 
+-/* Allow interface configuration */
+ /* Allow administration of IP firewall, masquerading and accounting */
+ /* Allow setting debug option on sockets */
+ /* Allow modification of routing tables */
+-/* Allow setting arbitrary process / process group ownership on
+-   sockets */
+ /* Allow binding to any address for transparent proxying */
+ /* Allow setting TOS (type of service) */
+ /* Allow setting promiscuous mode */
+@@ -200,24 +197,19 @@ typedef __u32 kernel_cap_t;
+ 
+ /* Allow configuration of the secure attention key */
+ /* Allow administration of the random device */
+-/* Allow examination and configuration of disk quotas */
+ /* Allow configuring the kernel's syslog (printk behaviour) */
+ /* Allow setting the domainname */
+ /* Allow setting the hostname */
+ /* Allow calling bdflush() */
+-/* Allow mount() and umount(), setting up new smb connection */
++/* Allow setting up new smb connection */
+ /* Allow some autofs root ioctls */
+ /* Allow nfsservctl */
+ /* Allow VM86_REQUEST_IRQ */
+ /* Allow to read/write pci config on alpha */
+ /* Allow irix_prctl on mips (setstacksize) */
+ /* Allow flushing all cache on m68k (sys_cacheflush) */
+-/* Allow removing semaphores */
+-/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
+-   and shared memory */
+ /* Allow locking/unlocking of shared memory segment */
+ /* Allow turning swap on/off */
+-/* Allow forged pids on socket credentials passing */
+ /* Allow setting readahead and flushing buffers on block devices */
+ /* Allow setting geometry in floppy driver */
+ /* Allow turning DMA on/off in xd driver */
+@@ -235,6 +227,8 @@ typedef __u32 kernel_cap_t;
+    arbitrary SCSI commands */
+ /* Allow setting encryption key on loopback filesystem */
+ /* Allow setting zone reclaim policy */
++/* Modify data journaling mode on ext3 filesystem (uses journaling
++   resources) */
+ 
+ #define CAP_SYS_ADMIN        21
+ 
+@@ -254,8 +248,6 @@ typedef __u32 kernel_cap_t;
+ /* Override resource limits. Set resource limits. */
+ /* Override quota limits. */
+ /* Override reserved space on ext2 filesystem */
+-/* Modify data journaling mode on ext3 filesystem (uses journaling
+-   resources) */
+ /* NOTE: ext2 honors fsuid when checking for resource overrides, so 
+    you can override using fsuid too */
+ /* Override size restrictions on IPC message queues */
+@@ -288,7 +280,52 @@ typedef __u32 kernel_cap_t;
+ 
+ #define CAP_AUDIT_CONTROL    30
+ 
++/*
++ * Important note: VZ capabilities do intersect with CAP_AUDIT
++ * this is due to compatibility reasons. Nothing bad.
++ * Both VZ and Audit/SELinux caps are disabled in VPSs.
++ */
++
++/* Allow access to all information. In the other case some structures will be
++   hiding to ensure different Virtual Environment non-interaction on the same
++   node */
++#define CAP_SETVEID	     29
++
++#define CAP_VE_ADMIN	     30
++
+ #ifdef __KERNEL__
++
++#include <linux/config.h>
++
++#ifdef CONFIG_VE
++
++/* Replacement for CAP_NET_ADMIN:
++   delegated rights to the Virtual environment of its network administration.
++   For now the following rights have been delegated:
++
++   Allow setting arbitrary process / process group ownership on sockets
++   Allow interface configuration
++ */
++#define CAP_VE_NET_ADMIN     CAP_VE_ADMIN
++
++/* Replacement for CAP_SYS_ADMIN:
++   delegated rights to the Virtual environment of its administration.
++   For now the following rights have been delegated:
++ */
++/* Allow mount/umount/remount */
++/* Allow examination and configuration of disk quotas */
++/* Allow removing semaphores */
++/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
++   and shared memory */
++/* Allow locking/unlocking of shared memory segment */
++/* Allow forged pids on socket credentials passing */
++
++#define CAP_VE_SYS_ADMIN     CAP_VE_ADMIN
++#else
++#define CAP_VE_NET_ADMIN     CAP_NET_ADMIN
++#define CAP_VE_SYS_ADMIN     CAP_SYS_ADMIN
++#endif
++
+ /* 
+  * Bounding set
+  */
+@@ -352,9 +389,14 @@ static inline kernel_cap_t cap_invert(ke
+ #define cap_issubset(a,set)  (!(cap_t(a) & ~cap_t(set)))
+ 
+ #define cap_clear(c)         do { cap_t(c) =  0; } while(0)
++#ifndef CONFIG_VE
+ #define cap_set_full(c)      do { cap_t(c) = ~0; } while(0)
++#else
++#define cap_set_full(c) \
++        do {cap_t(c) = ve_is_super(get_exec_env()) ? ~0 :		\
++					get_exec_env()->cap_default; } while(0)
++#endif
+ #define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
+-
+ #define cap_is_fs_cap(c)     (CAP_TO_MASK(c) & CAP_FS_MASK)
+ 
+ extern int capable(int cap);
+diff -upr linux-2.6.16.orig/include/linux/coda_linux.h linux-2.6.16-026test015/include/linux/coda_linux.h
+--- linux-2.6.16.orig/include/linux/coda_linux.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/coda_linux.h	2006-07-04 14:41:37.000000000 +0400
+@@ -38,7 +38,8 @@ extern struct file_operations coda_ioctl
+ int coda_open(struct inode *i, struct file *f);
+ int coda_flush(struct file *f);
+ int coda_release(struct inode *i, struct file *f);
+-int coda_permission(struct inode *inode, int mask, struct nameidata *nd);
++int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *);
+ int coda_revalidate_inode(struct dentry *);
+ int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+ int coda_setattr(struct dentry *, struct iattr *);
+diff -upr linux-2.6.16.orig/include/linux/compat.h linux-2.6.16-026test015/include/linux/compat.h
+--- linux-2.6.16.orig/include/linux/compat.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/compat.h	2006-07-04 14:41:39.000000000 +0400
+@@ -181,5 +181,7 @@ static inline int compat_timespec_compar
+ 	return lhs->tv_nsec - rhs->tv_nsec;
+ }
+ 
++extern long compat_nanosleep_restart(struct restart_block *restart);
++
+ #endif /* CONFIG_COMPAT */
+ #endif /* _LINUX_COMPAT_H */
+diff -upr linux-2.6.16.orig/include/linux/cpt_image.h linux-2.6.16-026test015/include/linux/cpt_image.h
+--- linux-2.6.16.orig/include/linux/cpt_image.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/cpt_image.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,1453 @@
++/*
++ *
++ *  include/linux/cpt_image.h
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __CPT_IMAGE_H_
++#define __CPT_IMAGE_H_ 1
++
++#define CPT_NULL (~0ULL)
++#define CPT_NOINDEX (~0U)
++
++/*
++ * Image file layout.
++ *
++ * - major header
++ * - sections[]
++ *
++ *	Each section is:
++ *	- section header
++ *	- array of objects
++ *
++ * All data records are arch independent, 64 bit aligned.
++ */
++
++enum _cpt_object_type
++{
++	CPT_OBJ_TASK = 0,
++	CPT_OBJ_MM,
++	CPT_OBJ_FS,
++	CPT_OBJ_FILES,
++	CPT_OBJ_FILE,
++	CPT_OBJ_SIGHAND_STRUCT,
++	CPT_OBJ_SIGNAL_STRUCT,
++	CPT_OBJ_TTY,
++	CPT_OBJ_SOCKET,
++	CPT_OBJ_SYSVSEM_UNDO,
++	CPT_OBJ_NAMESPACE,
++	CPT_OBJ_SYSV_SHM,
++	CPT_OBJ_INODE,
++	CPT_OBJ_UBC,
++	CPT_OBJ_SLM_SGREG,
++	CPT_OBJ_SLM_REGOBJ,
++	CPT_OBJ_SLM_MM,
++	CPT_OBJ_MAX,
++	/* The objects above are stored in memory while checkpointing */
++
++	CPT_OBJ_VMA = 1024,
++	CPT_OBJ_FILEDESC,
++	CPT_OBJ_SIGHANDLER,
++	CPT_OBJ_SIGINFO,
++	CPT_OBJ_LASTSIGINFO,
++	CPT_OBJ_SYSV_SEM,
++	CPT_OBJ_SKB,
++	CPT_OBJ_FLOCK,
++	CPT_OBJ_OPENREQ,
++	CPT_OBJ_VFSMOUNT,
++	CPT_OBJ_TRAILER,
++	CPT_OBJ_SYSVSEM_UNDO_REC,
++	CPT_OBJ_NET_DEVICE,
++	CPT_OBJ_NET_IFADDR,
++	CPT_OBJ_NET_ROUTE,
++	CPT_OBJ_NET_CONNTRACK,
++	CPT_OBJ_NET_CONNTRACK_EXPECT,
++	CPT_OBJ_AIO_CONTEXT,
++	CPT_OBJ_VEINFO,
++	CPT_OBJ_EPOLL,
++	CPT_OBJ_EPOLL_FILE,
++	CPT_OBJ_SKFILTER,
++	CPT_OBJ_SIGALTSTACK,
++  	CPT_OBJ_SOCK_MCADDR,
++
++	CPT_OBJ_X86_REGS = 4096,
++	CPT_OBJ_X86_64_REGS,
++	CPT_OBJ_PAGES,
++	CPT_OBJ_COPYPAGES,
++	CPT_OBJ_REMAPPAGES,
++	CPT_OBJ_LAZYPAGES,
++	CPT_OBJ_NAME,
++	CPT_OBJ_BITS,
++	CPT_OBJ_REF,
++};
++
++#define CPT_ALIGN(n) (((n)+7)&~7)
++
++struct cpt_major_hdr
++{
++	__u8	cpt_signature[4];	/* Magic number */
++	__u16	cpt_hdrlen;		/* Length of this header */
++	__u16	cpt_image_version;	/* Format of this file; mbz */
++	__u16	cpt_os_arch;		/* Architecture */
++#define CPT_OS_ARCH_I386	0
++#define CPT_OS_ARCH_EMT64	1
++#define CPT_OS_ARCH_IA64	2
++	__u16	__cpt_pad1;
++	__u32	cpt_os_version;		/* Version of kernel, where image was done */
++	__u32	cpt_os_features;	/* Kernel features: SMP etc. */
++	__u16	cpt_pagesize;		/* Page size used by OS */
++	__u16	cpt_hz;			/* HZ used by OS */
++	__u64	cpt_start_jiffies64;	/* Jiffies */
++	__u32	cpt_start_sec;		/* Seconds */
++	__u32	cpt_start_nsec;		/* Nanoseconds */
++	__u32	cpt_cpu_caps[4];	/* CPU capabilities */
++	__u32	cpt_kernel_config[4];	/* Kernel config */
++	__u64	cpt_iptables_mask;	/* Used netfilter modules */
++} __attribute__ ((aligned (8)));
++
++#define CPT_SIGNATURE0 0x79
++#define CPT_SIGNATURE1 0x1c
++#define CPT_SIGNATURE2 0x01
++#define CPT_SIGNATURE3 0x63
++
++#define CPT_CPU_X86_CMOV	0
++#define CPT_CPU_X86_FXSR	1
++#define CPT_CPU_X86_SSE		2
++#define CPT_CPU_X86_SSE2	3
++#define CPT_CPU_X86_MMX		4
++#define CPT_CPU_X86_3DNOW	5
++#define CPT_CPU_X86_3DNOW2	6
++#define CPT_CPU_X86_SEP		7
++#define CPT_CPU_X86_EMT64	8
++#define CPT_CPU_X86_IA64	9
++
++#define CPT_KERNEL_CONFIG_PAE	0
++
++struct cpt_section_hdr
++{
++	__u64	cpt_next;
++	__u32	cpt_section;
++	__u16	cpt_hdrlen;
++	__u16	cpt_align;
++} __attribute__ ((aligned (8)));
++
++enum
++{
++	CPT_SECT_ERROR,			/* Error section, content is string */
++	CPT_SECT_VEINFO,
++	CPT_SECT_FILES,			/* Files. Content is array of file objects */
++	CPT_SECT_TASKS,
++	CPT_SECT_MM,
++	CPT_SECT_FILES_STRUCT,
++	CPT_SECT_FS,
++	CPT_SECT_SIGHAND_STRUCT,
++	CPT_SECT_TTY,
++	CPT_SECT_SOCKET,
++	CPT_SECT_NAMESPACE,
++	CPT_SECT_SYSVSEM_UNDO,
++	CPT_SECT_INODE,			/* Inodes with i->i_nlink==0 and
++					 * deleted dentires with inodes not
++					 * referenced inside dumped process.
++					 */
++	CPT_SECT_SYSV_SHM,
++	CPT_SECT_SYSV_SEM,
++	CPT_SECT_ORPHANS,
++	CPT_SECT_NET_DEVICE,
++	CPT_SECT_NET_IFADDR,
++	CPT_SECT_NET_ROUTE,
++	CPT_SECT_NET_IPTABLES,
++	CPT_SECT_NET_CONNTRACK,
++	CPT_SECT_NET_CONNTRACK_VE0,
++	CPT_SECT_UTSNAME,
++	CPT_SECT_TRAILER,
++	CPT_SECT_UBC,
++	CPT_SECT_SLM_SGREGS,
++	CPT_SECT_SLM_REGOBJS,
++/* Due to silly mistake we cannot index sections beyond this value */
++#define	CPT_SECT_MAX_INDEX	(CPT_SECT_SLM_REGOBJS+1)
++	CPT_SECT_EPOLL,
++	CPT_SECT_MAX
++};
++
++struct cpt_major_tail
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_lazypages;
++	__u32	cpt_64bit;
++	__u64	cpt_sections[CPT_SECT_MAX_INDEX];
++	__u32	cpt_nsect;
++	__u8	cpt_signature[4];	/* Magic number */
++} __attribute__ ((aligned (8)));
++
++
++/* Common object header. */
++struct cpt_object_hdr
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++} __attribute__ ((aligned (8)));
++
++enum _cpt_content_type {
++	CPT_CONTENT_VOID,
++	CPT_CONTENT_ARRAY,
++	CPT_CONTENT_DATA,
++	CPT_CONTENT_NAME,
++
++	CPT_CONTENT_STACK,
++	CPT_CONTENT_X86_FPUSTATE_OLD,
++	CPT_CONTENT_X86_FPUSTATE,
++	CPT_CONTENT_MM_CONTEXT,
++	CPT_CONTENT_SEMARRAY,
++	CPT_CONTENT_SEMUNDO,
++	CPT_CONTENT_NLMARRAY,
++	CPT_CONTENT_MAX
++};
++
++/* CPT_OBJ_BITS: encode array of bytes */ 
++struct cpt_obj_bits
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_size;
++	__u32	__cpt_pad1;
++} __attribute__ ((aligned (8)));
++
++/* CPT_OBJ_REF: a reference to another object */ 
++struct cpt_obj_ref
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_pos;
++} __attribute__ ((aligned (8)));
++
++/* CPT_OBJ_VEINFO: various ve specific data */
++struct cpt_veinfo_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	/* ipc ctls */
++	__u32	shm_ctl_max;
++	__u32	shm_ctl_all;
++	__u32	shm_ctl_mni;
++	__u32	msg_ctl_max;
++	__u32	msg_ctl_mni;
++	__u32	msg_ctl_mnb;
++	__u32	sem_ctl_arr[4];
++
++	/* start time */
++	__u64	start_timespec_delta;
++	__u64	start_jiffies_delta;
++} __attribute__ ((aligned (8)));
++
++/* CPT_OBJ_FILE: one struct file */ 
++struct cpt_file_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_flags;
++	__u32	cpt_mode;
++	__u64	cpt_pos;
++	__u32	cpt_uid;
++	__u32	cpt_gid;
++
++	__u32	cpt_i_mode;
++	__u32	cpt_lflags;
++#define CPT_DENTRY_DELETED	1
++#define CPT_DENTRY_ROOT		2
++#define CPT_DENTRY_CLONING	4
++#define CPT_DENTRY_PROC		8
++#define CPT_DENTRY_EPOLL	0x10
++	__u64	cpt_inode;
++	__u64	cpt_priv;
++
++	__u32	cpt_fown_fd;
++	__u32	cpt_fown_pid;
++	__u32	cpt_fown_uid;
++	__u32	cpt_fown_euid;
++	__u32	cpt_fown_signo;
++	__u32	__cpt_pad1;
++} __attribute__ ((aligned (8)));
++/* Followed by file name, encoded as CPT_OBJ_NAME */
++
++struct cpt_epoll_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_file;
++} __attribute__ ((aligned (8)));
++/* Followed by array of struct cpt_epoll_file */
++
++struct cpt_epoll_file_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_file;
++	__u32	cpt_fd;
++	__u32	cpt_events;
++	__u64	cpt_data;
++	__u32	cpt_revents;
++	__u32	cpt_ready;
++} __attribute__ ((aligned (8)));
++
++
++/* CPT_OBJ_FILEDESC: one file descriptor */
++struct cpt_fd_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_fd;
++	__u32	cpt_flags;
++#define CPT_FD_FLAG_CLOSEEXEC	1
++	__u64	cpt_file;
++} __attribute__ ((aligned (8)));
++
++/* CPT_OBJ_FILES: one files_struct */
++struct cpt_files_struct_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_index;
++	__u32	cpt_max_fds;
++	__u32	cpt_next_fd;
++	__u32	__cpt_pad1;
++} __attribute__ ((aligned (8)));
++/* Followed by array of cpt_fd_image */
++
++/* CPT_OBJ_FS: one fs_struct */
++struct cpt_fs_struct_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_umask;
++	__u32	__cpt_pad1;
++} __attribute__ ((aligned (8)));
++/* Followed by two/three CPT_OBJ_FILENAME for root, pwd and, optionally, altroot */
++
++/* CPT_OBJ_INODE: one struct inode */
++struct cpt_inode_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_dev;
++	__u64	cpt_ino;
++	__u32	cpt_mode;
++	__u32	cpt_nlink;
++	__u32	cpt_uid;
++	__u32	cpt_gid;
++	__u64	cpt_rdev;
++	__u64	cpt_size;
++	__u64	cpt_blksize;
++	__u64	cpt_atime;
++	__u64	cpt_mtime;
++	__u64	cpt_ctime;
++	__u64	cpt_blocks;
++	__u32	cpt_sb;
++	__u32	__cpt_pad1;
++} __attribute__ ((aligned (8)));
++
++/* CPT_OBJ_VFSMOUNT: one vfsmount */
++struct cpt_vfsmount_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_mntflags;
++	__u32	cpt_flags;
++} __attribute__ ((aligned (8)));
++
++
++struct cpt_flock_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_owner;
++	__u32	cpt_pid;
++	__u64	cpt_start;
++	__u64	cpt_end;
++	__u32	cpt_flags;
++	__u32	cpt_type;
++} __attribute__ ((aligned (8)));
++
++
++struct cpt_tty_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_flags;
++	__u32	cpt_link;
++	__u32	cpt_index;
++	__u32	cpt_drv_type;
++	__u32	cpt_drv_subtype;
++	__u32	cpt_drv_flags;
++	__u8	cpt_packet;
++	__u8	cpt_stopped;
++	__u8	cpt_hw_stopped;
++	__u8	cpt_flow_stopped;
++
++	__u32	cpt_canon_data;
++	__u32	cpt_canon_head;
++	__u32	cpt_canon_column;
++	__u32	cpt_column;
++	__u8	cpt_ctrl_status;
++	__u8	cpt_erasing;
++	__u8	cpt_lnext;
++	__u8	cpt_icanon;
++	__u8	cpt_raw;
++	__u8	cpt_real_raw;
++	__u8	cpt_closing;
++	__u8	__cpt_pad1;
++	__u16	cpt_minimum_to_wake;
++	__u16	__cpt_pad2;
++	__u32	cpt_pgrp;
++	__u32	cpt_session;
++	__u32	cpt_c_line;
++	__u8	cpt_name[64];	
++	__u16	cpt_ws_row;
++	__u16	cpt_ws_col;
++	__u16	cpt_ws_prow;
++	__u16	cpt_ws_pcol;
++	__u8	cpt_c_cc[32];
++	__u32	cpt_c_iflag;
++	__u32	cpt_c_oflag;
++	__u32	cpt_c_cflag;
++	__u32	cpt_c_lflag;
++	__u32	cpt_read_flags[4096/32];
++} __attribute__ ((aligned (8)));
++
++struct cpt_sock_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_file;
++	__u32	cpt_parent;
++	__u32	cpt_index;
++
++	__u64	cpt_ssflags;
++	__u16	cpt_type;
++	__u16	cpt_family;
++	__u8	cpt_sstate;
++	__u8	cpt_passcred;
++	__u8	cpt_state;
++	__u8	cpt_reuse;
++
++	__u8	cpt_zapped;
++	__u8	cpt_shutdown;
++	__u8	cpt_userlocks;
++	__u8	cpt_no_check;
++	__u8	cpt_debug;
++	__u8	cpt_rcvtstamp;
++	__u8	cpt_localroute;
++	__u8	cpt_protocol;
++
++	__u32	cpt_err;
++	__u32	cpt_err_soft;
++
++	__u16	cpt_max_ack_backlog;
++	__u16   __cpt_pad1;
++	__u32	cpt_priority;
++
++	__u32	cpt_rcvlowat;
++	__u32	cpt_bound_dev_if;
++
++	__u64	cpt_rcvtimeo;
++	__u64	cpt_sndtimeo;
++	__u32	cpt_rcvbuf;
++	__u32	cpt_sndbuf;
++	__u64	cpt_flags;
++	__u64	cpt_lingertime;
++	__u32	cpt_peer_pid;
++	__u32	cpt_peer_uid;
++
++	__u32	cpt_peer_gid;
++	__u32	cpt_laddrlen;
++	__u32	cpt_laddr[128/4];
++	__u32	cpt_raddrlen;
++	__u32	cpt_raddr[128/4];
++	/* AF_UNIX */
++	__u32	cpt_peer;
++
++	__u8	cpt_socketpair;
++	__u8	cpt_deleted;
++	__u16	__cpt_pad4;
++	__u32	__cpt_pad5;
++/*
++	struct sk_filter      	*sk_filter;
++ */
++
++	__u64			cpt_stamp;
++	__u32			cpt_daddr;
++	__u16			cpt_dport;
++	__u16			cpt_sport;
++
++	__u32			cpt_saddr;
++	__u32			cpt_rcv_saddr;
++
++	__u32			cpt_uc_ttl;
++	__u32			cpt_tos;
++
++	__u32			cpt_cmsg_flags;
++	__u32			cpt_mc_index;
++
++	__u32			cpt_mc_addr;
++/*
++	struct ip_options	*opt;
++ */
++	__u8			cpt_hdrincl;
++	__u8			cpt_mc_ttl;
++	__u8			cpt_mc_loop;
++	__u8			cpt_pmtudisc;
++
++	__u8			cpt_recverr;
++	__u8			cpt_freebind;
++	__u16			cpt_idcounter;
++	__u32			cpt_cork_flags;
++
++	__u32			cpt_cork_fragsize;
++	__u32			cpt_cork_length;
++	__u32			cpt_cork_addr;
++	__u32			cpt_cork_saddr;
++	__u32			cpt_cork_daddr;
++	__u32			cpt_cork_oif;
++
++	__u32			cpt_udp_pending;
++	__u32			cpt_udp_corkflag;
++	__u16			cpt_udp_encap;
++	__u16			cpt_udp_len;
++	__u32			__cpt_pad7;
++
++	__u64			cpt_saddr6[2];
++	__u64			cpt_rcv_saddr6[2];
++	__u64			cpt_daddr6[2];
++	__u32			cpt_flow_label6;
++	__u32			cpt_frag_size6;
++	__u32			cpt_hop_limit6;
++	__u32			cpt_mcast_hops6;
++
++	__u32			cpt_mcast_oif6;
++	__u8			cpt_rxopt6;
++	__u8			cpt_mc_loop6;
++	__u8			cpt_recverr6;
++	__u8			cpt_sndflow6;
++
++	__u8			cpt_pmtudisc6;
++	__u8			cpt_ipv6only6;
++	__u8			cpt_mapped;
++	__u8			__cpt_pad8;
++	__u32	cpt_pred_flags;
++
++	__u32	cpt_rcv_nxt;
++	__u32	cpt_snd_nxt;
++
++	__u32	cpt_snd_una;
++	__u32	cpt_snd_sml;
++
++	__u32	cpt_rcv_tstamp;
++	__u32	cpt_lsndtime;
++
++	__u8	cpt_tcp_header_len;
++	__u8	cpt_ack_pending;
++	__u8	cpt_quick;
++	__u8	cpt_pingpong;
++	__u8	cpt_blocked;
++	__u8	__cpt_pad9;
++	__u16	__cpt_pad10;
++
++	__u32	cpt_ato;
++	__u32	cpt_ack_timeout;
++
++	__u32	cpt_lrcvtime;
++	__u16	cpt_last_seg_size;
++	__u16	cpt_rcv_mss;
++
++	__u32	cpt_snd_wl1;
++	__u32	cpt_snd_wnd;
++
++	__u32	cpt_max_window;
++	__u32	cpt_pmtu_cookie;
++
++	__u32	cpt_mss_cache;
++	__u16	cpt_mss_cache_std;
++	__u16	cpt_mss_clamp;
++
++	__u16	cpt_ext_header_len;
++	__u16	cpt_ext2_header_len;
++	__u8	cpt_ca_state;
++	__u8	cpt_retransmits;
++	__u8	cpt_reordering;
++	__u8	cpt_frto_counter;
++
++	__u32	cpt_frto_highmark;
++	__u8	cpt_adv_cong;
++	__u8	cpt_defer_accept;
++	__u8	cpt_backoff;
++	__u8	__cpt_pad11;
++
++	__u32	cpt_srtt;
++	__u32	cpt_mdev;
++
++	__u32	cpt_mdev_max;
++	__u32	cpt_rttvar;
++
++	__u32	cpt_rtt_seq;
++	__u32	cpt_rto;
++
++	__u32	cpt_packets_out;
++	__u32	cpt_left_out;
++
++	__u32	cpt_retrans_out;
++ 	__u32	cpt_snd_ssthresh;
++
++ 	__u32	cpt_snd_cwnd;
++ 	__u16	cpt_snd_cwnd_cnt;
++	__u16	cpt_snd_cwnd_clamp;
++
++	__u32	cpt_snd_cwnd_used;
++	__u32	cpt_snd_cwnd_stamp;
++
++	__u32	cpt_timeout;
++	__u32	cpt_ka_timeout;
++
++ 	__u32	cpt_rcv_wnd;
++	__u32	cpt_rcv_wup;
++
++	__u32	cpt_write_seq;
++	__u32	cpt_pushed_seq;
++
++	__u32	cpt_copied_seq;
++	__u8	cpt_tstamp_ok;
++	__u8	cpt_wscale_ok;
++	__u8	cpt_sack_ok;
++	__u8	cpt_saw_tstamp;
++
++        __u8	cpt_snd_wscale;
++        __u8	cpt_rcv_wscale;
++	__u8	cpt_nonagle;
++	__u8	cpt_keepalive_probes;
++        __u32	cpt_rcv_tsval;
++
++        __u32	cpt_rcv_tsecr;
++        __u32	cpt_ts_recent;
++
++	__u64	cpt_ts_recent_stamp;
++	__u16	cpt_user_mss;
++	__u8	cpt_dsack;
++	__u8	cpt_eff_sacks;
++	__u32	cpt_sack_array[2*5];
++	__u32	cpt_window_clamp;
++
++	__u32	cpt_rcv_ssthresh;
++	__u8	cpt_probes_out;
++	__u8	cpt_num_sacks;
++	__u16	cpt_advmss;
++
++	__u8	cpt_syn_retries;
++	__u8	cpt_ecn_flags;
++	__u16	cpt_prior_ssthresh;
++	__u32	cpt_lost_out;
++
++	__u32   cpt_sacked_out;
++	__u32   cpt_fackets_out;
++
++	__u32   cpt_high_seq;
++	__u32	cpt_retrans_stamp;
++
++	__u32	cpt_undo_marker;
++	__u32	cpt_undo_retrans;
++
++	__u32	cpt_urg_seq;
++	__u16	cpt_urg_data;
++	__u8	cpt_pending;
++	__u8	cpt_urg_mode;
++
++	__u32	cpt_snd_up;
++	__u32	cpt_keepalive_time;
++
++	__u32   cpt_keepalive_intvl;
++	__u32   cpt_linger2;
++
++	__u32	cpt_rcvrtt_rtt;
++	__u32	cpt_rcvrtt_seq;
++
++	__u32	cpt_rcvrtt_time;
++	__u32	__cpt_pad12;
++} __attribute__ ((aligned (8)));
++
++struct cpt_sockmc_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u16	cpt_family;
++	__u16	cpt_mode;
++	__u32	cpt_ifindex;
++	__u32	cpt_mcaddr[4];
++} __attribute__ ((aligned (8)));
++/* Followed by array of source addresses, each zero padded to 16 bytes */
++
++struct cpt_openreq_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_rcv_isn;
++	__u32	cpt_snt_isn;
++
++	__u16	cpt_rmt_port;
++	__u16	cpt_mss;
++	__u8	cpt_family;
++	__u8	cpt_retrans;
++	__u8	cpt_snd_wscale;
++	__u8	cpt_rcv_wscale;
++
++	__u8	cpt_tstamp_ok;
++	__u8	cpt_sack_ok;
++	__u8	cpt_wscale_ok;
++	__u8	cpt_ecn_ok;
++	__u8	cpt_acked;
++	__u8	__cpt_pad1;
++	__u16	__cpt_pad2;
++
++	__u32	cpt_window_clamp;
++	__u32	cpt_rcv_wnd;
++	__u32	cpt_ts_recent;
++	__u32	cpt_iif;
++	__u64	cpt_expires;
++
++	__u64	cpt_loc_addr[2];
++	__u64	cpt_rmt_addr[2];
++/*
++	struct ip_options	*opt;
++ */
++	
++} __attribute__ ((aligned (8)));
++
++struct cpt_skb_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_owner;
++	__u32	cpt_queue;
++#define CPT_SKB_NQ	0
++#define CPT_SKB_RQ	1
++#define CPT_SKB_WQ	2
++#define CPT_SKB_OFOQ	3
++
++	__u64	cpt_stamp;
++	__u32	cpt_len;
++	__u32	cpt_hspace;
++	__u32	cpt_tspace;
++	__u32	cpt_h;
++	__u32	cpt_nh;
++	__u32	cpt_mac;
++	
++	__u64	cpt_cb[5];
++	__u32	cpt_mac_len;
++	__u32	cpt_csum;
++	__u8	cpt_local_df;
++	__u8	cpt_pkt_type;
++	__u8	cpt_ip_summed;
++	__u8	__cpt_pad1;
++	__u32	cpt_priority;
++	__u16	cpt_protocol;
++	__u16	cpt_security;
++	__u16	cpt_tso_segs;
++	__u16	cpt_tso_size;
++} __attribute__ ((aligned (8)));
++
++
++struct cpt_sysvshm_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_key;
++	__u64	cpt_uid;
++	__u64	cpt_gid;
++	__u64	cpt_cuid;
++	__u64	cpt_cgid;
++	__u64	cpt_mode;
++	__u64	cpt_seq;
++
++	__u32	cpt_id;
++	__u32	cpt_mlockuser;
++	__u64	cpt_segsz;
++	__u64	cpt_atime;
++	__u64	cpt_ctime;
++	__u64	cpt_dtime;
++	__u64	cpt_creator;
++	__u64	cpt_last;
++} __attribute__ ((aligned (8)));
++
++
++struct cpt_sysvsem_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_key;
++	__u64	cpt_uid;
++	__u64	cpt_gid;
++	__u64	cpt_cuid;
++	__u64	cpt_cgid;
++	__u64	cpt_mode;
++	__u64	cpt_seq;
++	__u32	cpt_id;
++	__u32	__cpt_pad1;
++
++	__u64	cpt_otime;
++	__u64	cpt_ctime;
++} __attribute__ ((aligned (8)));
++/* Content is array of pairs semval/sempid */
++
++struct cpt_sysvsem_undo_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_id;
++	__u32	cpt_nsem;
++} __attribute__ ((aligned (8)));
++
++
++struct cpt_mm_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_start_code;
++	__u64	cpt_end_code;
++	__u64	cpt_start_data;
++	__u64	cpt_end_data;
++	__u64	cpt_start_brk;
++	__u64	cpt_brk;
++	__u64	cpt_start_stack;
++	__u64	cpt_start_arg;
++	__u64	cpt_end_arg;
++	__u64	cpt_start_env;
++	__u64	cpt_end_env;
++	__u64	cpt_def_flags;
++	__u64	cpt_mmub;
++	__u8	cpt_dumpable;
++	__u8	cpt_vps_dumpable;
++	__u8	cpt_used_hugetlb;
++	__u8	__cpt_pad;
++} __attribute__ ((aligned (8)));
++
++struct cpt_page_block
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_start;
++	__u64	cpt_end;
++} __attribute__ ((aligned (8)));
++
++struct cpt_remappage_block
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_start;
++	__u64	cpt_end;
++	__u64	cpt_pgoff;
++} __attribute__ ((aligned (8)));
++
++struct cpt_copypage_block
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_start;
++	__u64	cpt_end;
++	__u64	cpt_source;
++} __attribute__ ((aligned (8)));
++
++struct cpt_lazypage_block
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_start;
++	__u64	cpt_end;
++	__u64	cpt_index;
++} __attribute__ ((aligned (8)));
++
++struct cpt_vma_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_file;
++	__u32	cpt_type;
++#define CPT_VMA_TYPE_0		0
++#define CPT_VMA_TYPE_SHM	1
++	__u32	cpt_anonvma;
++	__u64	cpt_anonvmaid;
++
++	__u64	cpt_start;
++	__u64	cpt_end;
++	__u64	cpt_flags;
++	__u64	cpt_pgprot;
++	__u64	cpt_pgoff;
++} __attribute__ ((aligned (8)));
++
++struct cpt_aio_ctx_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_max_reqs;
++	__u32	cpt_ring_pages;
++	__u32	cpt_tail;
++	__u32	cpt_nr;
++	__u64	cpt_mmap_base;
++	/* Data (io_event's) and struct aio_ring are stored in user space VM */
++} __attribute__ ((aligned (8)));
++
++
++/* Format of MM section.
++ *
++ * It is array of MM objects (mm_struct). Each MM object is
++ * header, encoding mm_struct, followed by array of VMA objects.
++ * Each VMA consists of VMA header, encoding vm_area_struct, and
++ * if the VMA contains copied pages, the header is followed by
++ * array of tuples start-end each followed by data.
++ *
++ * ATTN: no block/page alignment. Only 64bit alignment. This might be not good?
++ */
++
++struct cpt_restart_block {
++	__u64	fn;
++#define CPT_RBL_0			0
++#define CPT_RBL_NANOSLEEP		1
++#define CPT_RBL_COMPAT_NANOSLEEP	2
++	__u64	arg0;
++	__u64	arg1;
++	__u64	arg2;
++	__u64	arg3;
++} __attribute__ ((aligned (8)));
++
++struct cpt_siginfo_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_qflags;
++	__u32	cpt_signo;
++	__u32	cpt_errno;
++	__u32	cpt_code;
++
++	__u64	cpt_sigval;
++	__u32	cpt_pid;
++	__u32	cpt_uid;
++	__u64	cpt_utime;
++	__u64	cpt_stime;
++
++	__u64	cpt_user;
++} __attribute__ ((aligned (8)));
++
++/* Portable presentaions for segment registers */
++
++#define CPT_SEG_ZERO		0
++#define CPT_SEG_TLS1		1
++#define CPT_SEG_TLS2		2
++#define CPT_SEG_TLS3		3
++#define CPT_SEG_USER32_DS	4
++#define CPT_SEG_USER32_CS	5
++#define CPT_SEG_USER64_DS	6
++#define CPT_SEG_USER64_CS	7
++#define CPT_SEG_LDT		256
++
++struct cpt_x86_regs
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_debugreg[8];
++	__u32	cpt_fs;
++	__u32	cpt_gs;
++
++	__u32	cpt_ebx;
++	__u32	cpt_ecx;
++	__u32	cpt_edx;
++	__u32	cpt_esi;
++	__u32	cpt_edi;
++	__u32	cpt_ebp;
++	__u32	cpt_eax;
++	__u32	cpt_xds;
++	__u32	cpt_xes;
++	__u32	cpt_orig_eax;
++	__u32	cpt_eip;
++	__u32	cpt_xcs;
++	__u32	cpt_eflags;
++	__u32	cpt_esp;
++	__u32	cpt_xss;
++	__u32	cpt_pad;
++};
++
++struct cpt_x86_64_regs
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_debugreg[8];
++
++	__u64	cpt_fsbase;
++	__u64	cpt_gsbase;
++	__u32	cpt_fsindex;
++	__u32	cpt_gsindex;
++	__u32	cpt_ds;
++	__u32	cpt_es;
++
++	__u64	cpt_r15;
++	__u64	cpt_r14;
++	__u64	cpt_r13;
++	__u64	cpt_r12;
++	__u64	cpt_rbp;
++	__u64	cpt_rbx;
++	__u64	cpt_r11;
++	__u64	cpt_r10;	
++	__u64	cpt_r9;
++	__u64	cpt_r8;
++	__u64	cpt_rax;
++	__u64	cpt_rcx;
++	__u64	cpt_rdx;
++	__u64	cpt_rsi;
++	__u64	cpt_rdi;
++	__u64	cpt_orig_rax;
++	__u64	cpt_rip;
++	__u64	cpt_cs;
++	__u64	cpt_eflags;
++	__u64	cpt_rsp;
++	__u64	cpt_ss;
++};
++
++struct cpt_task_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_state;
++	__u64	cpt_flags;
++	__u64	cpt_ptrace;
++	__u32	cpt_prio;
++	__u32	cpt_static_prio;
++	__u32	cpt_policy;
++	__u32	cpt_rt_priority;
++
++	/* struct thread_info */
++	__u64	cpt_exec_domain;
++	__u64	cpt_thrflags;
++	__u64	cpt_thrstatus;
++	__u64	cpt_addr_limit;
++
++	__u64	cpt_personality;
++
++	__u64	cpt_mm;
++	__u64	cpt_files;
++	__u64	cpt_fs;
++	__u64	cpt_signal;
++	__u64	cpt_sighand;
++	__u64	cpt_sigblocked;
++	__u64	cpt_sigrblocked;
++	__u64	cpt_sigpending;
++	__u64	cpt_namespace;
++	__u64	cpt_sysvsem_undo;
++	__u32	cpt_pid;
++	__u32	cpt_tgid;
++	__u32	cpt_ppid;
++	__u32	cpt_rppid;
++	__u32	cpt_pgrp;
++	__u32	cpt_session;
++	__u32	cpt_old_pgrp;
++	__u32	__cpt_pad;
++	__u32	cpt_leader;
++	__u8	cpt_pn_state;
++	__u8	cpt_stopped_state;
++	__u8	cpt_sigsuspend_state;
++	__u8	cpt_64bit;
++	__u64	cpt_set_tid;
++	__u64	cpt_clear_tid;
++	__u32	cpt_exit_code;
++	__u32	cpt_exit_signal;
++	__u32	cpt_pdeath_signal;
++	__u32	cpt_user;
++	__u32	cpt_uid;
++	__u32	cpt_euid;
++	__u32	cpt_suid;
++	__u32	cpt_fsuid;
++	__u32	cpt_gid;
++	__u32	cpt_egid;
++	__u32	cpt_sgid;
++	__u32	cpt_fsgid;
++	__u32	cpt_ngids;
++	__u32	cpt_gids[32];
++	__u32	__cpt_pad2;
++	__u64	cpt_ecap;
++	__u64	cpt_icap;
++	__u64	cpt_pcap;
++	__u8	cpt_comm[16];
++	__u64	cpt_tls[3];
++	struct cpt_restart_block cpt_restart;
++	__u64	cpt_it_real_value;	/* V0: jiffies, V1: nsec */
++	__u64	cpt_it_real_incr;	/* V0: jiffies, V1: nsec */
++	__u64	cpt_it_prof_value;
++	__u64	cpt_it_prof_incr;
++	__u64	cpt_it_virt_value;
++	__u64	cpt_it_virt_incr;
++
++	__u16	cpt_used_math;
++	__u8	cpt_keepcap;
++	__u8	cpt_did_exec;
++	__u32	cpt_ptrace_message;
++
++	__u64	cpt_utime;
++	__u64	cpt_stime;
++	__u64	cpt_starttime;		/* V0: jiffies, V1: timespec */
++	__u64	cpt_nvcsw;
++	__u64	cpt_nivcsw;
++	__u64	cpt_min_flt;
++	__u64	cpt_maj_flt;
++
++	__u64	cpt_sigsuspend_blocked;
++	__u64	cpt_cutime, cpt_cstime;
++	__u64	cpt_cnvcsw, cpt_cnivcsw;
++	__u64	cpt_cmin_flt, cpt_cmaj_flt;
++
++#define CPT_RLIM_NLIMITS 16
++	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
++	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
++
++	__u64	cpt_task_ub;
++	__u64	cpt_exec_ub;
++	__u64	cpt_mm_ub;
++	__u64	cpt_fork_sub;
++} __attribute__ ((aligned (8)));
++
++struct cpt_sigaltstack_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_stack;
++	__u32	cpt_stacksize;
++	__u32	__cpt_pad1;
++} __attribute__ ((aligned (8)));
++
++struct cpt_signal_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_leader;
++	__u8	cpt_pgrp_type;
++	__u8	cpt_old_pgrp_type;
++	__u8	cpt_session_type;
++#define CPT_PGRP_NORMAL		0
++#define CPT_PGRP_ORPHAN		1
++#define CPT_PGRP_STRAY		2
++	__u8	__cpt_pad1;
++	__u64	cpt_pgrp;
++	__u64	cpt_old_pgrp;
++	__u64	cpt_session;
++	__u64	cpt_sigpending;
++	__u64	cpt_ctty;
++
++	__u32	cpt_curr_target;
++	__u32	cpt_group_exit;
++	__u32	cpt_group_exit_code;
++	__u32	cpt_group_exit_task;
++	__u32	cpt_notify_count;
++	__u32	cpt_group_stop_count;
++	__u32	cpt_stop_state;
++	__u32	__cpt_pad2;
++
++	__u64	cpt_utime, cpt_stime, cpt_cutime, cpt_cstime;
++	__u64	cpt_nvcsw, cpt_nivcsw, cpt_cnvcsw, cpt_cnivcsw;
++	__u64	cpt_min_flt, cpt_maj_flt, cpt_cmin_flt, cpt_cmaj_flt;
++
++	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
++	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
++} __attribute__ ((aligned (8)));
++/* Followed by list of posix timers. */
++
++struct cpt_sighand_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++} __attribute__ ((aligned (8)));
++/* Followed by list of sighandles. */
++
++struct cpt_sighandler_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++	
++	__u32	cpt_signo;
++	__u32	__cpt_pad1;
++	__u64	cpt_handler;
++	__u64	cpt_restorer;
++	__u64	cpt_flags;
++	__u64	cpt_mask;
++} __attribute__ ((aligned (8)));
++
++struct cpt_netdev_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_index;
++	__u32	cpt_flags;
++	__u8	cpt_name[16];
++} __attribute__ ((aligned (8)));
++
++struct cpt_ifaddr_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u32	cpt_index;
++	__u8	cpt_family;
++	__u8	cpt_masklen;
++	__u8	cpt_flags;
++	__u8	cpt_scope;
++	__u32	cpt_address[4];
++	__u32	cpt_peer[4];
++	__u32	cpt_broadcast[4];
++	__u8	cpt_label[16];
++} __attribute__ ((aligned (8)));
++
++struct cpt_ipct_tuple
++{
++	__u32	cpt_src;
++	__u16	cpt_srcport;
++	__u16	__cpt_pad1;
++
++	__u32	cpt_dst;
++	__u16	cpt_dstport;
++	__u8	cpt_protonum;
++	__u8	cpt_dir;	/* TEMPORARY HACK TO VALIDATE CODE */
++} __attribute__ ((aligned (8)));
++
++struct cpt_nat_manip
++{
++	__u8	cpt_direction;
++	__u8	cpt_hooknum;
++	__u8	cpt_maniptype;
++	__u8	__cpt_pad1;
++
++	__u32	cpt_manip_addr;
++	__u16	cpt_manip_port;
++	__u16	__cpt_pad2;
++	__u32	__cpt_pad3;
++} __attribute__ ((aligned (8)));
++
++struct cpt_nat_seq
++{
++	__u32	cpt_correction_pos;
++	__u32	cpt_offset_before;
++	__u32	cpt_offset_after;
++	__u32	__cpt_pad1;
++} __attribute__ ((aligned (8)));
++
++struct cpt_ip_connexpect_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_timeout;
++	__u32	cpt_sibling_conntrack;	/* Index of child conntrack */
++	__u32	cpt_seq;		/* id in 2.6.15 */
++
++	struct cpt_ipct_tuple	cpt_ct_tuple;	/* NU 2.6.15 */
++	struct cpt_ipct_tuple	cpt_tuple;
++	struct cpt_ipct_tuple	cpt_mask;
++
++	/* union ip_conntrack_expect_help. Used by ftp, irc, amanda */
++	__u32	cpt_help[3];			/* NU 2.6.15 */
++	__u16	cpt_manip_proto;
++	__u8	cpt_dir;
++	__u8	cpt_flags;
++} __attribute__ ((aligned (8)));
++
++struct cpt_ip_conntrack_image
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	struct cpt_ipct_tuple cpt_tuple[2];
++	__u64	cpt_status;
++	__u64	cpt_timeout;
++	__u32	cpt_index;
++	__u8	cpt_ct_helper;
++	__u8	cpt_nat_helper;
++	__u16	cpt_pad1;
++
++	/* union ip_conntrack_proto. Used by tcp and icmp. */
++	__u32	cpt_proto_data[12];
++
++	/* union ip_conntrack_help. Used by ftp and pptp helper.
++	 * We do not support pptp...
++	 */
++	__u32	cpt_help_data[6];
++
++	/* nat info */
++	__u32	cpt_initialized;	/* NU 2.6.15 */
++	__u32	cpt_num_manips;		/* NU 2.6.15 */
++	struct  cpt_nat_manip	cpt_nat_manips[6];	/* NU 2.6.15 */
++
++	struct	cpt_nat_seq	cpt_nat_seq[2];
++
++	__u32	cpt_masq_index;
++	__u32	cpt_id;
++	__u32	cpt_mark;
++} __attribute__ ((aligned (8)));
++
++struct cpt_beancounter_image {
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	__u64	cpt_parent;
++	__u32	cpt_id;
++	__u32	__cpt_pad;
++	__u64	cpt_parms[32 * 6 * 2];
++} __attribute__ ((aligned (8)));
++
++#ifdef __KERNEL__
++
++static inline void *cpt_ptr_import(__u64 ptr)
++{
++	return (void*)(unsigned long)ptr;
++}
++
++static inline __u64 cpt_ptr_export(void __user *ptr)
++{
++	return (__u64)(unsigned long)ptr;
++}
++
++static inline void cpt_sigset_import(sigset_t *sig, __u64 ptr)
++{
++	memcpy(sig, &ptr, sizeof(*sig));
++}
++
++static inline __u64 cpt_sigset_export(sigset_t *sig)
++{
++	return *(__u64*)sig;
++}
++
++static inline __u64 cpt_timespec_export(struct timespec *tv)
++{
++	return (((u64)tv->tv_sec) << 32) + tv->tv_nsec;
++}
++
++static inline void cpt_timespec_import(struct timespec *tv, __u64 val)
++{
++	tv->tv_sec = val>>32;
++	tv->tv_nsec = (val&0xFFFFFFFF);
++}
++
++static inline __u64 cpt_timeval_export(struct timeval *tv)
++{
++	return (((u64)tv->tv_sec) << 32) + tv->tv_usec;
++}
++
++static inline void cpt_timeval_import(struct timeval *tv, __u64 val)
++{
++	tv->tv_sec = val>>32;
++	tv->tv_usec = (val&0xFFFFFFFF);
++}
++
++#endif
++
++#endif /* __CPT_IMAGE_H_ */
+diff -upr linux-2.6.16.orig/include/linux/cpt_ioctl.h linux-2.6.16-026test015/include/linux/cpt_ioctl.h
+--- linux-2.6.16.orig/include/linux/cpt_ioctl.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/cpt_ioctl.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,41 @@
++/*
++ *
++ *  include/linux/cpt_ioctl.h
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _CPT_IOCTL_H_
++#define _CPT_IOCTL_H_ 1
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#define CPTCTLTYPE '-'
++#define CPT_SET_DUMPFD	_IOW(CPTCTLTYPE, 1, int)
++#define CPT_SET_STATUSFD _IOW(CPTCTLTYPE, 2, int)
++#define CPT_SET_LOCKFD	_IOW(CPTCTLTYPE, 3, int)
++#define CPT_SET_VEID	_IOW(CPTCTLTYPE, 4, int)
++#define CPT_SUSPEND	_IO(CPTCTLTYPE, 5)
++#define CPT_DUMP	_IO(CPTCTLTYPE, 6)
++#define CPT_UNDUMP	_IO(CPTCTLTYPE, 7)
++#define CPT_RESUME	_IO(CPTCTLTYPE, 8)
++#define CPT_KILL	_IO(CPTCTLTYPE, 9)
++#define CPT_JOIN_CONTEXT _IO(CPTCTLTYPE, 10)
++#define CPT_GET_CONTEXT _IOW(CPTCTLTYPE, 11, unsigned int)
++#define CPT_PUT_CONTEXT _IO(CPTCTLTYPE, 12)
++#define CPT_SET_PAGEINFDIN _IOW(CPTCTLTYPE, 13, int)
++#define CPT_SET_PAGEINFDOUT _IOW(CPTCTLTYPE, 14, int)
++#define CPT_PAGEIND	_IO(CPTCTLTYPE, 15)
++#define CPT_VMPREP	_IOW(CPTCTLTYPE, 16, int)
++#define CPT_SET_LAZY	_IOW(CPTCTLTYPE, 17, int)
++#define CPT_SET_CPU_FLAGS _IOW(CPTCTLTYPE, 18, unsigned int)
++#define CPT_TEST_CAPS	_IOW(CPTCTLTYPE, 19, unsigned int)
++#define CPT_TEST_VECAPS	_IOW(CPTCTLTYPE, 20, unsigned int)
++#define CPT_SET_ERRORFD _IOW(CPTCTLTYPE, 21, int)
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/cpu.h linux-2.6.16-026test015/include/linux/cpu.h
+--- linux-2.6.16.orig/include/linux/cpu.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/cpu.h	2006-07-04 14:41:36.000000000 +0400
+@@ -32,7 +32,7 @@ struct cpu {
+ };
+ 
+ extern int register_cpu(struct cpu *, int, struct node *);
+-extern struct sys_device *get_cpu_sysdev(int cpu);
++extern struct sys_device *get_cpu_sysdev(unsigned cpu);
+ #ifdef CONFIG_HOTPLUG_CPU
+ extern void unregister_cpu(struct cpu *, struct node *);
+ #endif
+diff -upr linux-2.6.16.orig/include/linux/cpumask.h linux-2.6.16-026test015/include/linux/cpumask.h
+--- linux-2.6.16.orig/include/linux/cpumask.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/cpumask.h	2006-07-04 14:41:36.000000000 +0400
+@@ -408,6 +408,7 @@ extern cpumask_t cpu_present_map;
+ })
+ 
+ #define for_each_cpu(cpu)	  for_each_cpu_mask((cpu), cpu_possible_map)
++#define for_each_possible_cpu(cpu)  for_each_cpu_mask((cpu), cpu_possible_map)
+ #define for_each_online_cpu(cpu)  for_each_cpu_mask((cpu), cpu_online_map)
+ #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
+ 
+diff -upr linux-2.6.16.orig/include/linux/dcache.h linux-2.6.16-026test015/include/linux/dcache.h
+--- linux-2.6.16.orig/include/linux/dcache.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/dcache.h	2006-07-04 14:41:38.000000000 +0400
+@@ -9,6 +9,8 @@
+ #include <linux/cache.h>
+ #include <linux/rcupdate.h>
+ 
++#include <ub/ub_dcache.h>
++
+ struct nameidata;
+ struct vfsmount;
+ 
+@@ -111,6 +113,9 @@ struct dentry {
+ 	struct dcookie_struct *d_cookie; /* cookie, if any */
+ #endif
+ 	int d_mounted;
++#ifdef CONFIG_USER_RESOURCE
++	struct dentry_beancounter dentry_bc;
++#endif
+ 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
+ };
+ 
+@@ -161,7 +166,11 @@ d_iput:		no		no		no       yes
+ 
+ #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
+ #define DCACHE_UNHASHED		0x0010	
++#define DCACHE_VIRTUAL		0x0100	/* ve accessible */
++
++extern void mark_tree_virtual(struct vfsmount *m, struct dentry *d);
+ 
++extern kmem_cache_t *dentry_cache;
+ extern spinlock_t dcache_lock;
+ 
+ /**
+@@ -215,7 +224,7 @@ extern struct dentry * d_alloc_anon(stru
+ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+ extern void shrink_dcache_sb(struct super_block *);
+ extern void shrink_dcache_parent(struct dentry *);
+-extern void shrink_dcache_anon(struct hlist_head *);
++extern void shrink_dcache_anon(struct super_block *);
+ extern int d_invalidate(struct dentry *);
+ 
+ /* only used at mount-time */
+@@ -277,6 +286,7 @@ extern struct dentry * __d_lookup(struct
+ /* validate "insecure" dentry pointer */
+ extern int d_validate(struct dentry *, struct dentry *);
+ 
++extern int d_root_check(struct dentry *, struct vfsmount *);
+ extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
+   
+ /* Allocation counts.. */
+@@ -297,6 +307,8 @@ extern char * d_path(struct dentry *, st
+ static inline struct dentry *dget(struct dentry *dentry)
+ {
+ 	if (dentry) {
++		if (ub_dget_testone(dentry))
++			BUG();
+ 		BUG_ON(!atomic_read(&dentry->d_count));
+ 		atomic_inc(&dentry->d_count);
+ 	}
+@@ -340,6 +352,8 @@ extern struct dentry *lookup_create(stru
+ 
+ extern int sysctl_vfs_cache_pressure;
+ 
++extern int check_area_access_ve(struct dentry *, struct vfsmount *);
++extern int check_area_execute_ve(struct dentry *, struct vfsmount *);
+ #endif /* __KERNEL__ */
+ 
+ #endif	/* __LINUX_DCACHE_H */
+diff -upr linux-2.6.16.orig/include/linux/devpts_fs.h linux-2.6.16-026test015/include/linux/devpts_fs.h
+--- linux-2.6.16.orig/include/linux/devpts_fs.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/devpts_fs.h	2006-07-04 14:41:38.000000000 +0400
+@@ -21,6 +21,15 @@ int devpts_pty_new(struct tty_struct *tt
+ struct tty_struct *devpts_get_tty(int number);	 /* get tty structure */
+ void devpts_pty_kill(int number);		 /* unlink */
+ 
++struct devpts_config {
++	int setuid;
++	int setgid;
++	uid_t   uid;
++	gid_t   gid;
++	umode_t mode;
++};
++
++extern struct devpts_config devpts_config;
+ #else
+ 
+ /* Dummy stubs in the no-pty case */
+diff -upr linux-2.6.16.orig/include/linux/elfcore.h linux-2.6.16-026test015/include/linux/elfcore.h
+--- linux-2.6.16.orig/include/linux/elfcore.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/elfcore.h	2006-07-04 14:41:39.000000000 +0400
+@@ -7,6 +7,8 @@
+ #include <linux/user.h>
+ #include <linux/ptrace.h>
+ 
++extern int sysctl_at_vsyscall;
++
+ struct elf_siginfo
+ {
+ 	int	si_signo;			/* signal number */
+diff -upr linux-2.6.16.orig/include/linux/eventpoll.h linux-2.6.16-026test015/include/linux/eventpoll.h
+--- linux-2.6.16.orig/include/linux/eventpoll.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/eventpoll.h	2006-07-04 14:41:39.000000000 +0400
+@@ -85,6 +85,91 @@ static inline void eventpoll_release(str
+ 	eventpoll_release_file(file);
+ }
+ 
++struct epoll_filefd {
++	struct file *file;
++	int fd;
++};
++
++/*
++ * This structure is stored inside the "private_data" member of the file
++ * structure and rapresent the main data sructure for the eventpoll
++ * interface.
++ */
++struct eventpoll {
++	/* Protect the this structure access */
++	rwlock_t lock;
++
++	/*
++	 * This semaphore is used to ensure that files are not removed
++	 * while epoll is using them. This is read-held during the event
++	 * collection loop and it is write-held during the file cleanup
++	 * path, the epoll file exit code and the ctl operations.
++	 */
++	struct rw_semaphore sem;
++
++	/* Wait queue used by sys_epoll_wait() */
++	wait_queue_head_t wq;
++
++	/* Wait queue used by file->poll() */
++	wait_queue_head_t poll_wait;
++
++	/* List of ready file descriptors */
++	struct list_head rdllist;
++
++	/* RB-Tree root used to store monitored fd structs */
++	struct rb_root rbr;
++};
++
++/*
++ * Each file descriptor added to the eventpoll interface will
++ * have an entry of this type linked to the hash.
++ */
++struct epitem {
++	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
++	struct rb_node rbn;
++
++	/* List header used to link this structure to the eventpoll ready list */
++	struct list_head rdllink;
++
++	/* The file descriptor information this item refers to */
++	struct epoll_filefd ffd;
++
++	/* Number of active wait queue attached to poll operations */
++	int nwait;
++
++	/* List containing poll wait queues */
++	struct list_head pwqlist;
++
++	/* The "container" of this item */
++	struct eventpoll *ep;
++
++	/* The structure that describe the interested events and the source fd */
++	struct epoll_event event;
++
++	/*
++	 * Used to keep track of the usage count of the structure. This avoids
++	 * that the structure will desappear from underneath our processing.
++	 */
++	atomic_t usecnt;
++
++	/* List header used to link this item to the "struct file" items list */
++	struct list_head fllink;
++
++	/* List header used to link the item to the transfer list */
++	struct list_head txlink;
++
++	/*
++	 * This is used during the collection/transfer of events to userspace
++	 * to pin items empty events set.
++	 */
++	unsigned int revents;
++};
++
++extern struct semaphore epsem;
++struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
++int ep_insert(struct eventpoll *ep, struct epoll_event *event,
++		     struct file *tfile, int fd);
++void ep_release_epitem(struct epitem *epi);
+ 
+ #else
+ 
+diff -upr linux-2.6.16.orig/include/linux/fairsched.h linux-2.6.16-026test015/include/linux/fairsched.h
+--- linux-2.6.16.orig/include/linux/fairsched.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/fairsched.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,118 @@
++#ifndef __LINUX_FAIRSCHED_H__
++#define __LINUX_FAIRSCHED_H__
++
++/*
++ * Fair Scheduler
++ *
++ * Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/cache.h>
++#include <asm/timex.h>
++
++#define FAIRSCHED_HAS_CPU_BINDING	0
++
++typedef struct { cycles_t t; } fschtag_t;
++typedef struct { unsigned long d; } fschdur_t;
++typedef struct { cycles_t v; } fschvalue_t;
++
++struct vcpu_scheduler;
++
++struct fairsched_node {
++	struct list_head runlist;
++
++	/*
++	 * Fair Scheduler fields
++	 *
++	 * nr_running >= nr_ready (!= if delayed)
++	 */
++	fschtag_t start_tag;
++	int nr_ready;
++	int nr_runnable;
++	int nr_pcpu;
++
++	/*
++	 * Rate limitator fields
++	 */
++	cycles_t last_updated_at;
++	fschvalue_t value;	/* leaky function value */
++	cycles_t delay;		/* removed from schedule till */
++	unsigned char delayed;
++
++	/*
++	 * Configuration
++	 *
++	 * Read-only most of the time.
++	 */
++	unsigned weight ____cacheline_aligned_in_smp;
++				/* fairness weight */
++	unsigned char rate_limited;
++	unsigned rate;		/* max CPU share */
++	fschtag_t max_latency;
++	unsigned min_weight;
++
++	struct list_head nodelist;
++	int id;
++#ifdef CONFIG_VE
++	struct ve_struct *owner_env;
++#endif
++	struct vcpu_scheduler *vsched;
++};
++
++#ifdef CONFIG_FAIRSCHED
++
++#define FSCHWEIGHT_MAX			((1 << 16) - 1)
++#define FSCHRATE_SHIFT			10
++
++/*
++ * Fairsched nodes used in boot process.
++ */
++extern struct fairsched_node fairsched_init_node;
++extern struct fairsched_node fairsched_idle_node;
++
++/*
++ * For proc output.
++ */
++extern unsigned fairsched_nr_cpus;
++extern void fairsched_cpu_online_map(int id, cpumask_t *mask);
++
++/* I hope vsched_id is always equal to fairsched node id  --SAW */
++#define task_fairsched_node_id(p)	task_vsched_id(p)
++
++/*
++ * Core functions.
++ */
++extern void fairsched_incrun(struct fairsched_node *node);
++extern void fairsched_decrun(struct fairsched_node *node);
++extern void fairsched_inccpu(struct fairsched_node *node);
++extern void fairsched_deccpu(struct fairsched_node *node);
++extern struct fairsched_node *fairsched_schedule(
++		struct fairsched_node *prev_node,
++		struct fairsched_node *cur_node,
++		int cur_node_active,
++		cycles_t time);
++
++/*
++ * Management functions.
++ */
++void fairsched_init_early(void);
++asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
++		unsigned int newid);
++asmlinkage int sys_fairsched_rmnod(unsigned int id);
++asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid);
++
++#else /* CONFIG_FAIRSCHED */
++
++#define task_fairsched_node_id(p)	0
++#define fairsched_incrun(p)		do { } while (0)
++#define fairsched_decrun(p)		do { } while (0)
++#define fairsched_deccpu(p)		do { } while (0)
++#define fairsched_cpu_online_map(id, mask)      do { *(mask) = cpu_online_map; } while (0)
++
++#endif /* CONFIG_FAIRSCHED */
++
++#endif /* __LINUX_FAIRSCHED_H__ */
+diff -upr linux-2.6.16.orig/include/linux/faudit.h linux-2.6.16-026test015/include/linux/faudit.h
+--- linux-2.6.16.orig/include/linux/faudit.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/faudit.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,46 @@
++/*
++ *  include/linux/faudit.h
++ *
++ *  Copyright (C) 2005  SWSoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __FAUDIT_H_
++#define __FAUDIT_H_
++
++#include <linux/config.h>
++#include <linux/virtinfo.h>
++
++struct vfsmount;
++struct dentry;
++struct super_block;
++struct kstatfs;
++struct kstat;
++struct pt_regs;
++
++struct faudit_regs_arg {
++	int err;
++	struct pt_regs *regs;
++};
++
++struct faudit_stat_arg {
++	int err;
++	struct vfsmount *mnt;
++	struct dentry *dentry;
++	struct kstat *stat;
++};
++
++struct faudit_statfs_arg {
++	int err;
++	struct super_block *sb;
++	struct kstatfs *stat;
++};
++
++#define VIRTINFO_FAUDIT			(0)
++#define VIRTINFO_FAUDIT_STAT		(VIRTINFO_FAUDIT + 0)
++#define VIRTINFO_FAUDIT_STATFS		(VIRTINFO_FAUDIT + 1)
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/fb.h linux-2.6.16-026test015/include/linux/fb.h
+--- linux-2.6.16.orig/include/linux/fb.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/fb.h	2006-07-04 14:41:36.000000000 +0400
+@@ -839,12 +839,10 @@ struct fb_info {
+ #define FB_LEFT_POS(bpp)          (32 - bpp)
+ #define FB_SHIFT_HIGH(val, bits)  ((val) >> (bits))
+ #define FB_SHIFT_LOW(val, bits)   ((val) << (bits))
+-#define FB_BIT_NR(b)              (7 - (b))
+ #else
+ #define FB_LEFT_POS(bpp)          (0)
+ #define FB_SHIFT_HIGH(val, bits)  ((val) << (bits))
+ #define FB_SHIFT_LOW(val, bits)   ((val) >> (bits))
+-#define FB_BIT_NR(b)              (b)
+ #endif
+ 
+     /*
+diff -upr linux-2.6.16.orig/include/linux/fs.h linux-2.6.16-026test015/include/linux/fs.h
+--- linux-2.6.16.orig/include/linux/fs.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/fs.h	2006-07-04 14:41:39.000000000 +0400
+@@ -7,6 +7,7 @@
+  */
+ 
+ #include <linux/config.h>
++#include <linux/ve_owner.h>
+ #include <linux/limits.h>
+ #include <linux/ioctl.h>
+ 
+@@ -64,6 +65,7 @@ extern int dir_notify_enable;
+ #define FMODE_LSEEK	4
+ #define FMODE_PREAD	8
+ #define FMODE_PWRITE	FMODE_PREAD	/* These go hand in hand */
++#define FMODE_QUOTACTL	4
+ 
+ #define RW_MASK		1
+ #define RWA_MASK	2
+@@ -83,6 +85,7 @@ extern int dir_notify_enable;
+ /* public flags for file_system_type */
+ #define FS_REQUIRES_DEV 1 
+ #define FS_BINARY_MOUNTDATA 2
++#define FS_VIRTUALIZED	64	/* Can mount this fstype inside ve */
+ #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
+ #define FS_ODD_RENAME	32768	/* Temporary stuff; will go away as soon
+ 				  * as nfs_rename() will be cleaned up
+@@ -297,6 +300,9 @@ struct iattr {
+  * Includes for diskquotas.
+  */
+ #include <linux/quota.h>
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++#include <linux/vzquota_qlnk.h>
++#endif
+ 
+ /** 
+  * enum positive_aop_returns - aop return codes with specific semantics
+@@ -493,6 +499,9 @@ struct inode {
+ #ifdef CONFIG_QUOTA
+ 	struct dquot		*i_dquot[MAXQUOTAS];
+ #endif
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++	struct vz_quota_ilink	i_qlnk;
++#endif
+ 	/* These three should probably be a union */
+ 	struct list_head	i_devices;
+ 	struct pipe_inode_info	*i_pipe;
+@@ -527,6 +536,8 @@ struct inode {
+ #endif
+ };
+ 
++extern kmem_cache_t *inode_cachep;
++
+ /*
+  * NOTE: in a 32bit arch with a preemptable kernel and
+  * an UP compile the i_size_read/write must be atomic
+@@ -588,6 +599,20 @@ static inline unsigned imajor(struct ino
+ 
+ extern struct block_device *I_BDEV(struct inode *inode);
+ 
++struct exec_perm {
++	umode_t mode;
++	uid_t uid, gid;
++	int set;
++};
++
++static inline void set_exec_perm(struct exec_perm *perm, struct inode *ino)
++{
++	perm->set = 1;
++	perm->mode = ino->i_mode;
++	perm->uid = ino->i_uid;
++	perm->gid = ino->i_gid;
++}
++
+ struct fown_struct {
+ 	rwlock_t lock;          /* protects pid, uid, euid fields */
+ 	int pid;		/* pid or -pgrp where SIGIO should be sent */
+@@ -646,7 +671,10 @@ struct file {
+ 	spinlock_t		f_ep_lock;
+ #endif /* #ifdef CONFIG_EPOLL */
+ 	struct address_space	*f_mapping;
++	struct ve_struct	*owner_env;
+ };
++DCL_VE_OWNER_PROTO(FILP, struct file, owner_env)
++
+ extern spinlock_t files_lock;
+ #define file_list_lock() spin_lock(&files_lock);
+ #define file_list_unlock() spin_unlock(&files_lock);
+@@ -710,6 +738,9 @@ struct file_lock {
+ 	struct file *fl_file;
+ 	unsigned char fl_flags;
+ 	unsigned char fl_type;
++#ifdef CONFIG_USER_RESOURCE
++	unsigned char fl_charged;
++#endif
+ 	loff_t fl_start;
+ 	loff_t fl_end;
+ 
+@@ -902,7 +933,7 @@ static inline void unlock_super(struct s
+ /*
+  * VFS helper functions..
+  */
+-extern int vfs_permission(struct nameidata *, int);
++extern int vfs_permission(struct nameidata *, int, struct exec_perm *);
+ extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
+ extern int vfs_mkdir(struct inode *, struct dentry *, int);
+ extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
+@@ -1041,7 +1072,8 @@ struct inode_operations {
+ 	void * (*follow_link) (struct dentry *, struct nameidata *);
+ 	void (*put_link) (struct dentry *, struct nameidata *, void *);
+ 	void (*truncate) (struct inode *);
+-	int (*permission) (struct inode *, int, struct nameidata *);
++	int (*permission) (struct inode *, int, struct nameidata *,
++			struct exec_perm *);
+ 	int (*setattr) (struct dentry *, struct iattr *);
+ 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+ 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
+@@ -1089,6 +1121,8 @@ struct super_operations {
+ 
+ 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
+ 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
++
++	struct inode *(*get_quota_root)(struct super_block *);
+ };
+ 
+ /* Inode state bits.  Protected by inode_lock. */
+@@ -1246,8 +1280,14 @@ struct file_system_type {
+ 	struct module *owner;
+ 	struct file_system_type * next;
+ 	struct list_head fs_supers;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(FSTYPE, struct file_system_type, owner_env)
++
++void get_filesystem(struct file_system_type *fs);
++void put_filesystem(struct file_system_type *fs);
++
+ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
+ 	int flags, const char *dev_name, void *data,
+ 	int (*fill_super)(struct super_block *, void *, int));
+@@ -1285,6 +1325,7 @@ extern struct vfsmount *kern_mount(struc
+ extern int may_umount_tree(struct vfsmount *);
+ extern int may_umount(struct vfsmount *);
+ extern void umount_tree(struct vfsmount *, int, struct list_head *);
++#define kern_umount mntput
+ extern void release_mounts(struct list_head *);
+ extern long do_mount(char *, char *, char *, unsigned long, void *);
+ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
+@@ -1292,6 +1333,7 @@ extern void mnt_set_mountpoint(struct vf
+ 				  struct vfsmount *);
+ 
+ extern int vfs_statfs(struct super_block *, struct kstatfs *);
++extern int faudit_statfs(struct super_block *, struct kstatfs *);
+ 
+ /* /sys/fs */
+ extern struct subsystem fs_subsys;
+@@ -1383,6 +1425,7 @@ extern int bd_claim(struct block_device 
+ extern void bd_release(struct block_device *);
+ 
+ /* fs/char_dev.c */
++#define CHRDEV_MAJOR_HASH_SIZE	255
+ extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
+ extern int register_chrdev_region(dev_t, unsigned, const char *);
+ extern int register_chrdev(unsigned int, const char *,
+@@ -1390,25 +1433,17 @@ extern int register_chrdev(unsigned int,
+ extern int unregister_chrdev(unsigned int, const char *);
+ extern void unregister_chrdev_region(dev_t, unsigned);
+ extern int chrdev_open(struct inode *, struct file *);
+-extern int get_chrdev_list(char *);
+-extern void *acquire_chrdev_list(void);
+-extern int count_chrdev_list(void);
+-extern void *get_next_chrdev(void *);
+-extern int get_chrdev_info(void *, int *, char **);
+-extern void release_chrdev_list(void *);
++extern void chrdev_show(struct seq_file *,off_t);
+ 
+ /* fs/block_dev.c */
++#define BLKDEV_MAJOR_HASH_SIZE	255
+ #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
+ extern const char *__bdevname(dev_t, char *buffer);
+ extern const char *bdevname(struct block_device *bdev, char *buffer);
+-extern struct block_device *lookup_bdev(const char *);
++extern struct block_device *lookup_bdev(const char *, int mode);
+ extern struct block_device *open_bdev_excl(const char *, int, void *);
+ extern void close_bdev_excl(struct block_device *);
+-extern void *acquire_blkdev_list(void);
+-extern int count_blkdev_list(void);
+-extern void *get_next_blkdev(void *);
+-extern int get_blkdev_info(void *, int *, char **);
+-extern void release_blkdev_list(void *);
++extern void blkdev_show(struct seq_file *,off_t);
+ 
+ extern void init_special_inode(struct inode *, umode_t, dev_t);
+ 
+@@ -1433,7 +1468,7 @@ extern int fs_may_remount_ro(struct supe
+ #define bio_data_dir(bio)	((bio)->bi_rw & 1)
+ 
+ extern int check_disk_change(struct block_device *);
+-extern int invalidate_inodes(struct super_block *);
++extern int invalidate_inodes(struct super_block *, int);
+ extern int __invalidate_device(struct block_device *);
+ extern int invalidate_partition(struct gendisk *, int);
+ unsigned long invalidate_mapping_pages(struct address_space *mapping,
+@@ -1463,9 +1498,10 @@ extern int do_remount_sb(struct super_bl
+ 			 void *data, int force);
+ extern sector_t bmap(struct inode *, sector_t);
+ extern int notify_change(struct dentry *, struct iattr *);
+-extern int permission(struct inode *, int, struct nameidata *);
++extern int permission(struct inode *, int, struct nameidata *,
++		struct exec_perm *);
+ extern int generic_permission(struct inode *, int,
+-		int (*check_acl)(struct inode *, int));
++		int (*check_acl)(struct inode *, int), struct exec_perm *);
+ 
+ extern int get_write_access(struct inode *);
+ extern int deny_write_access(struct file *);
+@@ -1484,7 +1520,9 @@ extern int open_namei(int dfd, const cha
+ extern int may_open(struct nameidata *, int, int);
+ 
+ extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
+-extern struct file * open_exec(const char *);
++
++struct linux_binprm;
++extern struct file * open_exec(const char *, struct linux_binprm *);
+  
+ /* fs/dcache.c -- generic fs support functions */
+ extern int is_subdir(struct dentry *, struct dentry *);
+diff -upr linux-2.6.16.orig/include/linux/genhd.h linux-2.6.16-026test015/include/linux/genhd.h
+--- linux-2.6.16.orig/include/linux/genhd.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/genhd.h	2006-07-04 14:41:38.000000000 +0400
+@@ -421,6 +421,7 @@ static inline struct block_device *bdget
+ 	return bdget(MKDEV(disk->major, disk->first_minor) + index);
+ }
+ 
++extern struct subsystem block_subsys;
+ #endif
+ 
+ #endif
+diff -upr linux-2.6.16.orig/include/linux/gfp.h linux-2.6.16-026test015/include/linux/gfp.h
+--- linux-2.6.16.orig/include/linux/gfp.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/gfp.h	2006-07-04 14:41:37.000000000 +0400
+@@ -47,6 +47,8 @@ struct vm_area_struct;
+ #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
+ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
+ #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
++#define __GFP_UBC	((__force gfp_t)0x40000u)/* charge kmem in buddy and slab */
++#define __GFP_SOFT_UBC	((__force gfp_t)0x80000u)/* use soft charging */
+ 
+ #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
+ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+@@ -55,14 +57,17 @@ struct vm_area_struct;
+ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
+ 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
+ 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+-			__GFP_NOMEMALLOC|__GFP_HARDWALL)
++			__GFP_NOMEMALLOC|__GFP_HARDWALL| \
++			__GFP_UBC|__GFP_SOFT_UBC)
+ 
+ /* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
+ #define GFP_ATOMIC	(__GFP_HIGH)
+ #define GFP_NOIO	(__GFP_WAIT)
+ #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
+ #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
++#define GFP_KERNEL_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
+ #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
++#define GFP_USER_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | __GFP_UBC)
+ #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
+ 			 __GFP_HIGHMEM)
+ 
+diff -upr linux-2.6.16.orig/include/linux/hrtimer.h linux-2.6.16-026test015/include/linux/hrtimer.h
+--- linux-2.6.16.orig/include/linux/hrtimer.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/hrtimer.h	2006-07-04 14:41:39.000000000 +0400
+@@ -140,4 +140,9 @@ extern void hrtimer_run_queues(void);
+ /* Bootup initialization: */
+ extern void __init hrtimers_init(void);
+ 
++extern long nanosleep_restart(struct restart_block *restart);
++
++extern ktime_t schedule_hrtimer(struct hrtimer *timer,
++				const enum hrtimer_mode mode);
++
+ #endif
+diff -upr linux-2.6.16.orig/include/linux/i2o.h linux-2.6.16-026test015/include/linux/i2o.h
+--- linux-2.6.16.orig/include/linux/i2o.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/i2o.h	2006-07-04 14:41:36.000000000 +0400
+@@ -1116,8 +1116,11 @@ static inline struct i2o_message *i2o_ms
+ 
+ 	mmsg->mfa = readl(c->in_port);
+ 	if (unlikely(mmsg->mfa >= c->in_queue.len)) {
++		u32 mfa = mmsg->mfa;
++
+ 		mempool_free(mmsg, c->in_msg.mempool);
+-		if(mmsg->mfa == I2O_QUEUE_EMPTY)
++
++		if (mfa == I2O_QUEUE_EMPTY)
+ 			return ERR_PTR(-EBUSY);
+ 		return ERR_PTR(-EFAULT);
+ 	}
+diff -upr linux-2.6.16.orig/include/linux/inetdevice.h linux-2.6.16-026test015/include/linux/inetdevice.h
+--- linux-2.6.16.orig/include/linux/inetdevice.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/inetdevice.h	2006-07-04 14:41:38.000000000 +0400
+@@ -34,6 +34,12 @@ struct ipv4_devconf
+ };
+ 
+ extern struct ipv4_devconf ipv4_devconf;
++extern struct ipv4_devconf ipv4_devconf_dflt;
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv4_devconf		(*(get_exec_env()->_ipv4_devconf))
++#else
++#define ve_ipv4_devconf		ipv4_devconf
++#endif
+ 
+ struct in_device
+ {
+@@ -60,29 +66,29 @@ struct in_device
+ };
+ 
+ #define IN_DEV_FORWARD(in_dev)		((in_dev)->cnf.forwarding)
+-#define IN_DEV_MFORWARD(in_dev)		(ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
+-#define IN_DEV_RPFILTER(in_dev)		(ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
+-#define IN_DEV_SOURCE_ROUTE(in_dev)	(ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
+-#define IN_DEV_BOOTP_RELAY(in_dev)	(ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
+-
+-#define IN_DEV_LOG_MARTIANS(in_dev)	(ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
+-#define IN_DEV_PROXY_ARP(in_dev)	(ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
+-#define IN_DEV_SHARED_MEDIA(in_dev)	(ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
+-#define IN_DEV_TX_REDIRECTS(in_dev)	(ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
+-#define IN_DEV_SEC_REDIRECTS(in_dev)	(ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
++#define IN_DEV_MFORWARD(in_dev)		(ve_ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
++#define IN_DEV_RPFILTER(in_dev)		(ve_ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
++#define IN_DEV_SOURCE_ROUTE(in_dev)	(ve_ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
++#define IN_DEV_BOOTP_RELAY(in_dev)	(ve_ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
++
++#define IN_DEV_LOG_MARTIANS(in_dev)	(ve_ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
++#define IN_DEV_PROXY_ARP(in_dev)	(ve_ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
++#define IN_DEV_SHARED_MEDIA(in_dev)	(ve_ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
++#define IN_DEV_TX_REDIRECTS(in_dev)	(ve_ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
++#define IN_DEV_SEC_REDIRECTS(in_dev)	(ve_ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
+ #define IN_DEV_IDTAG(in_dev)		((in_dev)->cnf.tag)
+ #define IN_DEV_MEDIUM_ID(in_dev)	((in_dev)->cnf.medium_id)
+ #define IN_DEV_PROMOTE_SECONDARIES(in_dev)	(ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries)
+ 
+ #define IN_DEV_RX_REDIRECTS(in_dev) \
+ 	((IN_DEV_FORWARD(in_dev) && \
+-	  (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
++	  (ve_ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
+ 	 || (!IN_DEV_FORWARD(in_dev) && \
+-	  (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
++ 	  (ve_ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
+ 
+-#define IN_DEV_ARPFILTER(in_dev)	(ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
+-#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
+-#define IN_DEV_ARP_IGNORE(in_dev)	(max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
++#define IN_DEV_ARPFILTER(in_dev)	(ve_ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
++#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ve_ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
++#define IN_DEV_ARP_IGNORE(in_dev)	(max(ve_ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
+ 
+ struct in_ifaddr
+ {
+@@ -113,6 +119,7 @@ extern u32		inet_select_addr(const struc
+ extern u32		inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
+ extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
+ extern void		inet_forward_change(void);
++extern void		inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
+ 
+ static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
+ {
+@@ -180,6 +187,10 @@ static inline void in_dev_put(struct in_
+ #define __in_dev_put(idev)  atomic_dec(&(idev)->refcnt)
+ #define in_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
+ 
++struct ve_struct;
++extern int devinet_sysctl_init(struct ve_struct *);
++extern void devinet_sysctl_fini(struct ve_struct *);
++extern void devinet_sysctl_free(struct ve_struct *);
+ #endif /* __KERNEL__ */
+ 
+ static __inline__ __u32 inet_make_mask(int logmask)
+diff -upr linux-2.6.16.orig/include/linux/ipv6.h linux-2.6.16-026test015/include/linux/ipv6.h
+--- linux-2.6.16.orig/include/linux/ipv6.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/ipv6.h	2006-07-04 14:41:39.000000000 +0400
+@@ -415,12 +415,13 @@ static inline struct raw6_sock *raw6_sk(
+ #define inet_v6_ipv6only(__sk)		0
+ #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+ 
+-#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\
++#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif,__ve)\
+ 	(((__sk)->sk_hash == (__hash))				&& \
+ 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))  	&& \
+ 	 ((__sk)->sk_family		== AF_INET6)		&& \
+ 	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&& \
+ 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&& \
++	 ve_accessible_strict(VE_OWNER_SK(__sk), (__ve))	&& \
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ 
+ #endif /* __KERNEL__ */
+diff -upr linux-2.6.16.orig/include/linux/jbd.h linux-2.6.16-026test015/include/linux/jbd.h
+--- linux-2.6.16.orig/include/linux/jbd.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/jbd.h	2006-07-04 14:41:37.000000000 +0400
+@@ -245,10 +245,15 @@ typedef struct journal_superblock_s
+ #define J_ASSERT(assert)						\
+ do {									\
+ 	if (!(assert)) {						\
++		unsigned long stack;					\
+ 		printk (KERN_EMERG					\
+ 			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
+ 			__FUNCTION__, __FILE__, __LINE__, # assert);	\
+-		BUG();							\
++		printk("Stack=%p current=%p pid=%d ve=%d comm='%s'\n",	\
++				&stack, current, current->pid,		\
++				get_exec_env()->veid,			\
++				current->comm);				\
++		dump_stack();						\
+ 	}								\
+ } while (0)
+ 
+diff -upr linux-2.6.16.orig/include/linux/jiffies.h linux-2.6.16-026test015/include/linux/jiffies.h
+--- linux-2.6.16.orig/include/linux/jiffies.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/jiffies.h	2006-07-04 14:41:39.000000000 +0400
+@@ -74,6 +74,7 @@
+  */
+ extern u64 __jiffy_data jiffies_64;
+ extern unsigned long volatile __jiffy_data jiffies;
++extern unsigned long cycles_per_jiffy, cycles_per_clock;
+ 
+ #if (BITS_PER_LONG < 64)
+ u64 get_jiffies_64(void);
+diff -upr linux-2.6.16.orig/include/linux/kdev_t.h linux-2.6.16-026test015/include/linux/kdev_t.h
+--- linux-2.6.16.orig/include/linux/kdev_t.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/kdev_t.h	2006-07-04 14:41:38.000000000 +0400
+@@ -87,6 +87,57 @@ static inline unsigned sysv_minor(u32 de
+ 	return dev & 0x3ffff;
+ }
+ 
++#define UNNAMED_MAJOR_COUNT	16
++
++#if UNNAMED_MAJOR_COUNT > 1
++
++extern int unnamed_dev_majors[UNNAMED_MAJOR_COUNT];
++
++static inline dev_t make_unnamed_dev(int idx)
++{
++	/*
++	 * Here we transfer bits from 8 to 8+log2(UNNAMED_MAJOR_COUNT) of the
++	 * unnamed device index into major number.
++	 */
++	return MKDEV(unnamed_dev_majors[(idx >> 8) & (UNNAMED_MAJOR_COUNT - 1)],
++		     idx & ~((UNNAMED_MAJOR_COUNT - 1) << 8));
++}
++
++static inline int unnamed_dev_idx(dev_t dev)
++{
++	int i;
++	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
++				MAJOR(dev) != unnamed_dev_majors[i]; i++);
++	return MINOR(dev) | (i << 8);
++}
++
++static inline int is_unnamed_dev(dev_t dev)
++{
++	int i;
++	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
++				MAJOR(dev) != unnamed_dev_majors[i]; i++);
++	return i < UNNAMED_MAJOR_COUNT;
++}
++
++#else /* UNNAMED_MAJOR_COUNT */
++
++static inline dev_t make_unnamed_dev(int idx)
++{
++	return MKDEV(0, idx);
++}
++
++static inline int unnamed_dev_idx(dev_t dev)
++{
++	return MINOR(dev);
++}
++
++static inline int is_unnamed_dev(dev_t dev)
++{
++	return MAJOR(dev) == 0;
++}
++
++#endif /* UNNAMED_MAJOR_COUNT */
++
+ 
+ #else /* __KERNEL__ */
+ 
+diff -upr linux-2.6.16.orig/include/linux/kernel.h linux-2.6.16-026test015/include/linux/kernel.h
+--- linux-2.6.16.orig/include/linux/kernel.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/kernel.h	2006-07-04 14:41:38.000000000 +0400
+@@ -132,6 +132,9 @@ asmlinkage int vprintk(const char *fmt, 
+ 	__attribute__ ((format (printf, 1, 0)));
+ asmlinkage int printk(const char * fmt, ...)
+ 	__attribute__ ((format (printf, 1, 2)));
++asmlinkage int ve_printk(int, const char * fmt, ...)
++	__attribute__ ((format (printf, 2, 3)));
++void prepare_printk(void);
+ #else
+ static inline int vprintk(const char *s, va_list args)
+ 	__attribute__ ((format (printf, 1, 0)));
+@@ -139,8 +142,16 @@ static inline int vprintk(const char *s,
+ static inline int printk(const char *s, ...)
+ 	__attribute__ ((format (printf, 1, 2)));
+ static inline int printk(const char *s, ...) { return 0; }
++static inline int ve_printk(int d, const char *s, ...)
++	__attribute__ ((format (printf, 1, 2)));
++static inline int printk(int d, const char *s, ...) { return 0; }
++#define prepare_printk()	do { } while (0)
+ #endif
+ 
++#define VE0_LOG		1
++#define VE_LOG		2
++#define VE_LOG_BOTH	(VE0_LOG | VE_LOG)
++
+ unsigned long int_sqrt(unsigned long);
+ 
+ static inline int __attribute_pure__ long_log2(unsigned long x)
+@@ -159,9 +170,14 @@ static inline unsigned long __attribute_
+ extern int printk_ratelimit(void);
+ extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
+ 
++extern int console_silence_loglevel;
++
+ static inline void console_silent(void)
+ {
+-	console_loglevel = 0;
++	if (console_loglevel > console_silence_loglevel) {
++		printk(KERN_EMERG "console shuts up ...\n");
++		console_loglevel = 0;
++	}
+ }
+ 
+ static inline void console_verbose(void)
+@@ -171,10 +187,13 @@ static inline void console_verbose(void)
+ }
+ 
+ extern void bust_spinlocks(int yes);
++extern void wake_up_klogd(void);
+ extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
+ extern __deprecated_for_modules int panic_timeout;
+ extern int panic_on_oops;
++extern int decode_call_traces;
+ extern int tainted;
++extern int kernel_text_csum_broken;
+ extern const char *print_tainted(void);
+ extern void add_taint(unsigned);
+ 
+diff -upr linux-2.6.16.orig/include/linux/kmem_cache.h linux-2.6.16-026test015/include/linux/kmem_cache.h
+--- linux-2.6.16.orig/include/linux/kmem_cache.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/kmem_cache.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,199 @@
++#ifndef __KMEM_CACHE_H__
++#define __KMEM_CACHE_H__
++#include <linux/threads.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/list.h>
++#include <linux/mm.h>
++#include <asm/atomic.h>
++
++/*
++ * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
++ *		  SLAB_RED_ZONE & SLAB_POISON.
++ *		  0 for faster, smaller code (especially in the critical paths).
++ *
++ * STATS	- 1 to collect stats for /proc/slabinfo.
++ *		  0 for faster, smaller code (especially in the critical paths).
++ *
++ * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
++ */
++
++#ifdef CONFIG_DEBUG_SLAB
++#define	SLAB_DEBUG		1
++#define	SLAB_STATS		1
++#define	SLAB_FORCED_DEBUG	1
++#else
++#define	SLAB_DEBUG		0
++#define	SLAB_STATS		0
++#define	SLAB_FORCED_DEBUG	0
++#endif
++
++/*
++ * struct array_cache
++ *
++ * Purpose:
++ * - LIFO ordering, to hand out cache-warm objects from _alloc
++ * - reduce the number of linked list operations
++ * - reduce spinlock operations
++ *
++ * The limit is stored in the per-cpu structure to reduce the data cache
++ * footprint.
++ *
++ */
++struct array_cache {
++	unsigned int avail;
++	unsigned int limit;
++	unsigned int batchcount;
++	unsigned int touched;
++	spinlock_t lock;
++	void *entry[0];		/*
++				 * Must have this definition in here for the proper
++				 * alignment of array_cache. Also simplifies accessing
++				 * the entries.
++				 * [0] is for gcc 2.95. It should really be [].
++				 */
++};
++
++/* bootstrap: The caches do not work without cpuarrays anymore,
++ * but the cpuarrays are allocated from the generic caches...
++ */
++#define BOOT_CPUCACHE_ENTRIES	1
++struct arraycache_init {
++	struct array_cache cache;
++	void *entries[BOOT_CPUCACHE_ENTRIES];
++};
++
++/*
++ * The slab lists for all objects.
++ */
++struct kmem_list3 {
++	struct list_head slabs_partial;	/* partial list first, better asm code */
++	struct list_head slabs_full;
++	struct list_head slabs_free;
++	unsigned long free_objects;
++	unsigned long next_reap;
++	int free_touched;
++	unsigned int free_limit;
++	unsigned int colour_next;	/* Per-node cache coloring */
++	spinlock_t list_lock;
++	struct array_cache *shared;	/* shared per node */
++	struct array_cache **alien;	/* on other nodes */
++};
++
++/*
++ * struct kmem_cache
++ *
++ * manages a cache.
++ */
++
++struct kmem_cache {
++/* 1) per-cpu data, touched during every alloc/free */
++	struct array_cache *array[NR_CPUS];
++	unsigned int batchcount;
++	unsigned int limit;
++	unsigned int shared;
++	unsigned int buffer_size;
++/* 2) touched by every alloc & free from the backend */
++	struct kmem_list3 *nodelists[MAX_NUMNODES];
++	unsigned int flags;	/* constant flags */
++	unsigned int num;	/* # of objs per slab */
++	spinlock_t spinlock;
++
++/* 3) cache_grow/shrink */
++	/* order of pgs per slab (2^n) */
++	unsigned int gfporder;
++
++	/* force GFP flags, e.g. GFP_DMA */
++	gfp_t gfpflags;
++
++	size_t colour;		/* cache colouring range */
++	unsigned int colour_off;	/* colour offset */
++	struct kmem_cache *slabp_cache;
++	unsigned int slab_size;
++	unsigned int dflags;	/* dynamic flags */
++
++	/* constructor func */
++	void (*ctor) (void *, struct kmem_cache *, unsigned long);
++
++	/* de-constructor func */
++	void (*dtor) (void *, struct kmem_cache *, unsigned long);
++
++/* 4) cache creation/removal */
++	const char *name;
++	struct list_head next;
++
++/* 5) statistics */
++#if SLAB_STATS
++	unsigned long num_active;
++	unsigned long num_allocations;
++	unsigned long high_mark;
++	unsigned long grown;
++	unsigned long reaped;
++	unsigned long errors;
++	unsigned long max_freeable;
++	unsigned long node_allocs;
++	unsigned long node_frees;
++	atomic_t allochit;
++	atomic_t allocmiss;
++	atomic_t freehit;
++	atomic_t freemiss;
++#endif
++#if SLAB_DEBUG
++	/*
++	 * If debugging is enabled, then the allocator can add additional
++	 * fields and/or padding to every object. buffer_size contains the total
++	 * object size including these internal fields, the following two
++	 * variables contain the offset to the user object and its size.
++	 */
++	int obj_offset;
++	int obj_size;
++#endif
++#ifdef CONFIG_USER_RESOURCE
++	unsigned int		objuse;
++#endif
++};
++
++#define CFLGS_OFF_SLAB		(0x80000000UL)
++#define CFLGS_ENVIDS		(0x04000000UL)
++#define	OFF_SLAB(x)		((x)->flags & CFLGS_OFF_SLAB)
++#define ENVIDS(x)		((x)->flags & CFLGS_ENVIDS)
++#define kmem_mark_nocharge(c)	do { (c)->flags |= SLAB_NO_CHARGE; } while (0)
++
++struct slab;
++/* Functions for storing/retrieving the cachep and or slab from the
++ * global 'mem_map'. These are used to find the slab an obj belongs to.
++ * With kfree(), these are used to find the cache which an obj belongs to.
++ */
++static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
++{
++	page->lru.next = (struct list_head *)cache;
++}
++
++static inline struct kmem_cache *page_get_cache(struct page *page)
++{
++	return (struct kmem_cache *)page->lru.next;
++}
++
++static inline void page_set_slab(struct page *page, struct slab *slab)
++{
++	page->lru.prev = (struct list_head *)slab;
++}
++
++static inline struct slab *page_get_slab(struct page *page)
++{
++	return (struct slab *)page->lru.prev;
++}
++
++static inline struct kmem_cache *virt_to_cache(const void *obj)
++{
++	struct page *page = virt_to_page(obj);
++	return page_get_cache(page);
++}
++
++static inline struct slab *virt_to_slab(const void *obj)
++{
++	struct page *page = virt_to_page(obj);
++	return page_get_slab(page);
++}
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/kmem_slab.h linux-2.6.16-026test015/include/linux/kmem_slab.h
+--- linux-2.6.16.orig/include/linux/kmem_slab.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/kmem_slab.h	2006-07-04 14:41:36.000000000 +0400
+@@ -0,0 +1,71 @@
++#ifndef __KMEM_SLAB_H__
++#define __KMEM_SLAB_H__
++
++/*
++ * kmem_bufctl_t:
++ *
++ * Bufctl's are used for linking objs within a slab
++ * linked offsets.
++ *
++ * This implementation relies on "struct page" for locating the cache &
++ * slab an object belongs to.
++ * This allows the bufctl structure to be small (one int), but limits
++ * the number of objects a slab (not a cache) can contain when off-slab
++ * bufctls are used. The limit is the size of the largest general cache
++ * that does not use off-slab slabs.
++ * For 32bit archs with 4 kB pages, is this 56.
++ * This is not serious, as it is only for large objects, when it is unwise
++ * to have too many per slab.
++ * Note: This limit can be raised by introducing a general cache whose size
++ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
++ */
++
++typedef unsigned int kmem_bufctl_t;
++#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
++#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
++#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
++
++/*
++ * struct slab
++ *
++ * Manages the objs in a slab. Placed either at the beginning of mem allocated
++ * for a slab, or allocated from an general cache.
++ * Slabs are chained into three list: fully used, partial, fully free slabs.
++ */
++struct slab {
++	struct list_head list;
++	unsigned long colouroff;
++	void *s_mem;		/* including colour offset */
++	unsigned int inuse;	/* num of objs active in slab */
++	kmem_bufctl_t free;
++	unsigned short nodeid;
++};
++
++/*
++ * struct slab_rcu
++ *
++ * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
++ * arrange for kmem_freepages to be called via RCU.  This is useful if
++ * we need to approach a kernel structure obliquely, from its address
++ * obtained without the usual locking.  We can lock the structure to
++ * stabilize it and check it's still at the given address, only if we
++ * can be sure that the memory has not been meanwhile reused for some
++ * other kind of object (which our subsystem's lock might corrupt).
++ *
++ * rcu_read_lock before reading the address, then rcu_read_unlock after
++ * taking the spinlock within the structure expected at that address.
++ *
++ * We assume struct slab_rcu can overlay struct slab when destroying.
++ */
++struct slab_rcu {
++	struct rcu_head head;
++	struct kmem_cache *cachep;
++	void *addr;
++};
++
++static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
++{
++	return (kmem_bufctl_t *) (slabp + 1);
++}
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/list.h linux-2.6.16-026test015/include/linux/list.h
+--- linux-2.6.16.orig/include/linux/list.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/list.h	2006-07-04 14:41:38.000000000 +0400
+@@ -325,6 +325,9 @@ static inline void list_splice_init(stru
+ #define list_entry(ptr, type, member) \
+ 	container_of(ptr, type, member)
+ 
++#define list_first_entry(ptr, type, member) \
++	container_of((ptr)->next, type, member)
++
+ /**
+  * list_for_each	-	iterate over a list
+  * @pos:	the &struct list_head to use as a loop counter.
+@@ -411,6 +414,20 @@ static inline void list_splice_init(stru
+ 	     pos = list_entry(pos->member.next, typeof(*pos), member))
+ 
+ /**
++ * list_for_each_entry_continue_reverse - iterate backwards over list of given
++ *			type continuing after existing point
++ * @pos:	the type * to use as a loop counter.
++ * @head:	the head for your list.
++ * @member:	the name of the list_struct within the struct.
++ */
++#define list_for_each_entry_continue_reverse(pos, head, member) 	\
++	for (pos = list_entry(pos->member.prev, typeof(*pos), member),	\
++			prefetch(pos->member.prev);			\
++		&pos->member != (head);					\
++	pos = list_entry(pos->member.prev, typeof(*pos), member),	\
++			prefetch(pos->member.prev))
++
++/**
+  * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+  * @pos:	the type * to use as a loop counter.
+  * @n:		another type * to use as temporary storage
+diff -upr linux-2.6.16.orig/include/linux/major.h linux-2.6.16-026test015/include/linux/major.h
+--- linux-2.6.16.orig/include/linux/major.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/major.h	2006-07-04 14:41:38.000000000 +0400
+@@ -165,4 +165,7 @@
+ 
+ #define VIOTAPE_MAJOR		230
+ 
++#define UNNAMED_EXTRA_MAJOR		130
++#define UNNAMED_EXTRA_MAJOR_COUNT	120
++
+ #endif
+diff -upr linux-2.6.16.orig/include/linux/mm.h linux-2.6.16-026test015/include/linux/mm.h
+--- linux-2.6.16.orig/include/linux/mm.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/mm.h	2006-07-04 14:41:39.000000000 +0400
+@@ -41,6 +41,27 @@ extern int sysctl_legacy_va_layout;
+ 
+ #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
+ 
++#include <linux/mm_counter.h>
++
++#ifdef CONFIG_USER_RESOURCE
++#define set_vma_rss(vma, v)	set_mm_counter(vma, vm_rss, v)
++#define get_vma_rss(vma)	get_mm_counter(vma, vm_rss)
++#define inc_vma_rss(vma)	inc_mm_counter(vma, vm_rss)
++#define dec_vma_rss(vma)	dec_mm_counter(vma, vm_rss)
++#define add_vma_rss(vma, v)	add_mm_counter(vma, vm_rss, v)
++#define sub_vma_rss(vma, v)	do {					\
++		if (unlikely(dec_mm_counter_chk(vma, vm_rss, v)))	\
++			warn_bad_rss(vma, v);				\
++	} while (0)
++#else
++#define set_vma_rss(vma, v)	do { } while (0)
++#define get_vma_rss(vma)	(0)
++#define inc_vma_rss(vma)	do { } while (0)
++#define dec_vma_rss(vma)	do { } while (0)
++#define add_vma_rss(vma, v)	do { } while (0)
++#define sub_vma_rss(vma, v)	do { } while (0)
++#endif
++
+ /*
+  * Linux kernel virtual memory manager primitives.
+  * The idea being to have a "virtual" mm in the same way
+@@ -111,6 +132,9 @@ struct vm_area_struct {
+ #ifdef CONFIG_NUMA
+ 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
+ #endif
++#ifdef CONFIG_USER_RESOURCE
++	mm_counter_t _vm_rss;
++#endif
+ };
+ 
+ /*
+@@ -229,10 +253,9 @@ struct page {
+ 		unsigned long private;		/* Mapping-private opaque data:
+ 					 	 * usually used for buffer_heads
+ 						 * if PagePrivate set; used for
+-						 * swp_entry_t if PageSwapCache.
+-						 * When page is free, this
++						 * swp_entry_t if PageSwapCache;
+ 						 * indicates order in the buddy
+-						 * system.
++						 * system if PG_buddy is set.
+ 						 */
+ 		struct address_space *mapping;	/* If low bit clear, points to
+ 						 * inode address_space, or NULL.
+@@ -264,6 +287,12 @@ struct page {
+ 	void *virtual;			/* Kernel virtual address (NULL if
+ 					   not kmapped, ie. highmem) */
+ #endif /* WANT_PAGE_VIRTUAL */
++#ifdef CONFIG_USER_RESOURCE
++	union {
++		struct user_beancounter *page_ub;
++		struct page_beancounter *page_pb;
++	} bc;
++#endif
+ };
+ 
+ #define page_private(page)		((page)->private)
+@@ -636,16 +665,9 @@ struct page *shmem_nopage(struct vm_area
+ int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
+ struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
+ 					unsigned long addr);
+-int shmem_lock(struct file *file, int lock, struct user_struct *user);
+ #else
+ #define shmem_nopage filemap_nopage
+ 
+-static inline int shmem_lock(struct file *file, int lock,
+-			     struct user_struct *user)
+-{
+-	return 0;
+-}
+-
+ static inline int shmem_set_policy(struct vm_area_struct *vma,
+ 				   struct mempolicy *new)
+ {
+@@ -706,7 +728,9 @@ void free_pgd_range(struct mmu_gather **
+ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
+ 		unsigned long floor, unsigned long ceiling);
+ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+-			struct vm_area_struct *vma);
++		struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
++int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
++		      unsigned long addr, size_t size);
+ int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
+ 			unsigned long size, pgprot_t prot);
+ void unmap_mapping_range(struct address_space *mapping,
+diff -upr linux-2.6.16.orig/include/linux/mm_counter.h linux-2.6.16-026test015/include/linux/mm_counter.h
+--- linux-2.6.16.orig/include/linux/mm_counter.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/mm_counter.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,32 @@
++#ifndef __MM_COUNTER_H_
++#define __MM_COUNTER_H_
++#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
++/*
++ * The mm counters are not protected by its page_table_lock,
++ * so must be incremented atomically.
++ */
++#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
++#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
++#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
++#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
++#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
++#define dec_mm_counter_chk(mm, member, value)	\
++	atomic_long_add_negative(-(value), &(mm)->_##member)
++typedef atomic_long_t mm_counter_t;
++
++#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
++/*
++ * The mm counters are protected by its page_table_lock,
++ * so can be incremented directly.
++ */
++#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
++#define get_mm_counter(mm, member) ((mm)->_##member)
++#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
++#define inc_mm_counter(mm, member) (mm)->_##member++
++#define dec_mm_counter(mm, member) (mm)->_##member--
++#define dec_mm_counter_chk(mm, member, value)	\
++	(((mm)->_##member -= (value)) < 0)
++typedef unsigned long mm_counter_t;
++
++#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
++#endif
+diff -upr linux-2.6.16.orig/include/linux/mount.h linux-2.6.16-026test015/include/linux/mount.h
+--- linux-2.6.16.orig/include/linux/mount.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/mount.h	2006-07-04 14:41:38.000000000 +0400
+@@ -47,6 +47,7 @@ struct vfsmount {
+ 	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
+ 	struct namespace *mnt_namespace; /* containing namespace */
+ 	int mnt_pinned;
++	unsigned owner;
+ };
+ 
+ static inline struct vfsmount *mntget(struct vfsmount *mnt)
+diff -upr linux-2.6.16.orig/include/linux/msg.h linux-2.6.16-026test015/include/linux/msg.h
+--- linux-2.6.16.orig/include/linux/msg.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/msg.h	2006-07-04 14:41:39.000000000 +0400
+@@ -92,6 +92,8 @@ struct msg_queue {
+ 	struct list_head q_senders;
+ };
+ 
++int sysvipc_walk_msg(int (*func)(int, struct msg_queue*, void *), void *arg);
++
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _LINUX_MSG_H */
+diff -upr linux-2.6.16.orig/include/linux/namei.h linux-2.6.16-026test015/include/linux/namei.h
+--- linux-2.6.16.orig/include/linux/namei.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/namei.h	2006-07-04 14:41:38.000000000 +0400
+@@ -48,12 +48,15 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+ #define LOOKUP_PARENT		16
+ #define LOOKUP_NOALT		32
+ #define LOOKUP_REVAL		64
++#define LOOKUP_STRICT		128	/* no symlinks or other filesystems */
++
+ /*
+  * Intent data
+  */
+ #define LOOKUP_OPEN		(0x0100)
+ #define LOOKUP_CREATE		(0x0200)
+ #define LOOKUP_ACCESS		(0x0400)
++#define LOOKUP_NOAREACHECK	(0x0800)	/* no area check on lookup */
+ 
+ extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
+ extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
+diff -upr linux-2.6.16.orig/include/linux/namespace.h linux-2.6.16-026test015/include/linux/namespace.h
+--- linux-2.6.16.orig/include/linux/namespace.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/namespace.h	2006-07-04 14:41:38.000000000 +0400
+@@ -13,6 +13,8 @@ struct namespace {
+ 	int event;
+ };
+ 
++extern struct rw_semaphore namespace_sem;
++
+ extern int copy_namespace(int, struct task_struct *);
+ extern void __put_namespace(struct namespace *namespace);
+ extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
+diff -upr linux-2.6.16.orig/include/linux/netdevice.h linux-2.6.16-026test015/include/linux/netdevice.h
+--- linux-2.6.16.orig/include/linux/netdevice.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netdevice.h	2006-07-04 14:41:39.000000000 +0400
+@@ -37,6 +37,7 @@
+ #include <linux/config.h>
+ #include <linux/device.h>
+ #include <linux/percpu.h>
++#include <linux/ctype.h>
+ 
+ struct divert_blk;
+ struct vlan_group;
+@@ -233,6 +234,11 @@ enum netdev_state_t
+ 	__LINK_STATE_LINKWATCH_PENDING
+ };
+ 
++struct netdev_bc {
++	struct user_beancounter *exec_ub, *owner_ub;
++};
++
++#define netdev_bc(dev)		(&(dev)->dev_bc)
+ 
+ /*
+  * This structure holds at boot time configured netdevice settings. They
+@@ -309,6 +315,8 @@ struct net_device
+ #define NETIF_F_TSO		2048	/* Can offload TCP/IP segmentation */
+ #define NETIF_F_LLTX		4096	/* LockLess TX */
+ #define NETIF_F_UFO             8192    /* Can offload UDP Large Send*/
++#define NETIF_F_VIRTUAL		0x40000000 /* can be registered in ve */
++#define NETIF_F_VENET		0x80000000 /* Device is VENET device */
+ 
+ 	struct net_device	*next_sched;
+ 
+@@ -431,6 +439,7 @@ struct net_device
+ 	enum { NETREG_UNINITIALIZED=0,
+ 	       NETREG_REGISTERING,	/* called register_netdevice */
+ 	       NETREG_REGISTERED,	/* completed register todo */
++	       NETREG_REGISTER_ERR,	/* register todo failed */
+ 	       NETREG_UNREGISTERING,	/* called unregister_netdevice */
+ 	       NETREG_UNREGISTERED,	/* completed unregister todo */
+ 	       NETREG_RELEASED,		/* called free_netdev */
+@@ -500,8 +509,18 @@ struct net_device
+ 	struct divert_blk	*divert;
+ #endif /* CONFIG_NET_DIVERT */
+ 
++	unsigned                orig_mtu;   /* MTU value before move to VE */
++	struct ve_struct	*owner_env; /* Owner VE of the interface */
++	struct netdev_bc	dev_bc;
++
+ 	/* class/net/name entry */
+ 	struct class_device	class_dev;
++
++#ifdef CONFIG_VE
++	/* List entry in global devices list to keep track of their names
++	 * assignment */
++	struct list_head	dev_global_list_entry;
++#endif
+ };
+ 
+ #define	NETDEV_ALIGN		32
+@@ -535,9 +554,23 @@ struct packet_type {
+ #include <linux/notifier.h>
+ 
+ extern struct net_device		loopback_dev;		/* The loopback */
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define loopback_dev	(*get_exec_env()->_loopback_dev)
++#define ve0_loopback	(*get_ve0()->_loopback_dev)
++#define dev_base	(get_exec_env()->_net_dev_base)
++#define visible_dev_head(x)	(&(x)->_net_dev_head)
++#define visible_dev_index_head(x) (&(x)->_net_dev_index_head)
++#else
+ extern struct net_device		*dev_base;		/* All devices */
++#define ve0_loopback	loopback_dev
++#define visible_dev_head(x)	NULL
++#define visible_dev_index_head(x) NULL
++#endif
+ extern rwlock_t				dev_base_lock;		/* Device list lock */
+ 
++struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env);
++struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env);
++
+ extern int 			netdev_boot_setup_check(struct net_device *dev);
+ extern unsigned long		netdev_boot_base(const char *prefix, int unit);
+ extern struct net_device    *dev_getbyhwaddr(unsigned short type, char *hwaddr);
+@@ -554,6 +587,7 @@ extern int		dev_alloc_name(struct net_de
+ extern int		dev_open(struct net_device *dev);
+ extern int		dev_close(struct net_device *dev);
+ extern int		dev_queue_xmit(struct sk_buff *skb);
++extern int		dev_set_mtu(struct net_device *dev, int new_mtu);
+ extern int		register_netdevice(struct net_device *dev);
+ extern int		unregister_netdevice(struct net_device *dev);
+ extern void		free_netdev(struct net_device *dev);
+@@ -951,6 +985,18 @@ extern void dev_seq_stop(struct seq_file
+ 
+ extern void linkwatch_run_queue(void);
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static inline int ve_is_dev_movable(struct net_device *dev)
++{
++	return !(dev->features & NETIF_F_VIRTUAL);
++}
++#else
++static inline int ve_is_dev_movable(struct net_device *dev)
++{
++	return 0;
++}
++#endif
++
+ #endif /* __KERNEL__ */
+ 
+ #endif	/* _LINUX_DEV_H */
+diff -upr linux-2.6.16.orig/include/linux/netfilter/nf_conntrack_ftp.h linux-2.6.16-026test015/include/linux/netfilter/nf_conntrack_ftp.h
+--- linux-2.6.16.orig/include/linux/netfilter/nf_conntrack_ftp.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter/nf_conntrack_ftp.h	2006-07-04 14:41:39.000000000 +0400
+@@ -32,13 +32,22 @@ struct ip_conntrack_expect;
+ 
+ /* For NAT to hook in when we find a packet which describes what other
+  * connection we should expect. */
+-extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
++typedef unsigned int (*ip_nat_helper_ftp_hook)(struct sk_buff **pskb,
+ 				       enum ip_conntrack_info ctinfo,
+ 				       enum ip_ct_ftp_type type,
+ 				       unsigned int matchoff,
+ 				       unsigned int matchlen,
+ 				       struct ip_conntrack_expect *exp,
+ 				       u32 *seq);
++extern ip_nat_helper_ftp_hook ip_nat_ftp_hook;
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_nat_ftp_hook \
++	((ip_nat_helper_ftp_hook) \
++		(get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook))
++#else
++#define ve_ip_nat_ftp_hook	ip_nat_ftp_hook
++#endif
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _NF_CONNTRACK_FTP_H */
+diff -upr linux-2.6.16.orig/include/linux/netfilter/x_tables.h linux-2.6.16-026test015/include/linux/netfilter/x_tables.h
+--- linux-2.6.16.orig/include/linux/netfilter/x_tables.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter/x_tables.h	2006-07-04 14:41:39.000000000 +0400
+@@ -80,12 +80,19 @@ struct xt_counters_info
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/config.h>
+ #include <linux/netdevice.h>
+ 
+ #define ASSERT_READ_LOCK(x)
+ #define ASSERT_WRITE_LOCK(x)
+ #include <linux/netfilter_ipv4/listhelp.h>
+ 
++#ifdef CONFIG_COMPAT
++#define COMPAT_TO_USER		1
++#define COMPAT_FROM_USER	-1
++#define COMPAT_CALC_SIZE	0
++#endif
++
+ struct xt_match
+ {
+ 	struct list_head list;
+@@ -118,6 +125,10 @@ struct xt_match
+ 	/* Called when entry of this type deleted. */
+ 	void (*destroy)(void *matchinfo, unsigned int matchinfosize);
+ 
++#ifdef CONFIG_COMPAT
++	/* Called when userspace align differs from kernel space one */
++	int (*compat)(void *match, void **dstptr, int *size, int convert);
++#endif
+ 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
+ 	struct module *me;
+ };
+@@ -154,6 +165,10 @@ struct xt_target
+ 	/* Called when entry of this type deleted. */
+ 	void (*destroy)(void *targinfo, unsigned int targinfosize);
+ 
++#ifdef CONFIG_COMPAT
++	/* Called when userspace align differs from kernel space one */
++	int (*compat)(void *target, void **dstptr, int *size, int convert);
++#endif
+ 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
+ 	struct module *me;
+ };
+@@ -211,6 +226,10 @@ extern int xt_register_table(struct xt_t
+ 			     struct xt_table_info *bootstrap,
+ 			     struct xt_table_info *newinfo);
+ extern void *xt_unregister_table(struct xt_table *table);
++extern struct xt_table *virt_xt_register_table(struct xt_table *table,
++			     struct xt_table_info *bootstrap,
++			     struct xt_table_info *newinfo);
++extern void *virt_xt_unregister_table(struct xt_table *table);
+ 
+ extern struct xt_table_info *xt_replace_table(struct xt_table *table,
+ 					      unsigned int num_counters,
+@@ -233,6 +252,34 @@ extern void xt_proto_fini(int af);
+ extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
+ extern void xt_free_table_info(struct xt_table_info *info);
+ 
++#ifdef CONFIG_COMPAT
++#include <net/compat.h>
++
++/* FIXME: this works only on 32 bit tasks
++ * need to change whole approach in order to calculate align as function of
++ * current task alignment */
++
++struct compat_xt_counters
++{
++	u_int32_t cnt[4];
++};
++
++struct compat_xt_counters_info
++{
++	char name[XT_TABLE_MAXNAMELEN];
++	compat_uint_t num_counters;
++	struct compat_xt_counters counters[0];
++};
++
++#define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \
++		& ~(__alignof__(struct compat_xt_counters)-1))
++
++extern int ipt_match_align_compat(void *match, void **dstptr,
++		int *size, int off, int convert);
++extern int ipt_target_align_compat(void *target, void **dstptr,
++		int *size, int off, int convert);
++
++#endif /* CONFIG_COMPAT */
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _X_TABLES_H */
+diff -upr linux-2.6.16.orig/include/linux/netfilter/xt_conntrack.h linux-2.6.16-026test015/include/linux/netfilter/xt_conntrack.h
+--- linux-2.6.16.orig/include/linux/netfilter/xt_conntrack.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter/xt_conntrack.h	2006-07-04 14:41:36.000000000 +0400
+@@ -5,6 +5,7 @@
+ #ifndef _XT_CONNTRACK_H
+ #define _XT_CONNTRACK_H
+ 
++#include <linux/config.h>
+ #include <linux/netfilter/nf_conntrack_tuple_common.h>
+ #include <linux/in.h>
+ 
+@@ -60,4 +61,21 @@ struct xt_conntrack_info
+ 	/* Inverse flags */
+ 	u_int8_t invflags;
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_xt_conntrack_info
++{
++	compat_uint_t statemask, statusmask;
++
++	struct ip_conntrack_tuple tuple[IP_CT_DIR_MAX];
++	struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX];
++
++	compat_ulong_t expires_min, expires_max;
++
++	/* Flags word */
++	u_int8_t flags;
++	/* Inverse flags */
++	u_int8_t invflags;
++};
++#endif
+ #endif /*_XT_CONNTRACK_H*/
+diff -upr linux-2.6.16.orig/include/linux/netfilter/xt_helper.h linux-2.6.16-026test015/include/linux/netfilter/xt_helper.h
+--- linux-2.6.16.orig/include/linux/netfilter/xt_helper.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter/xt_helper.h	2006-07-04 14:41:36.000000000 +0400
+@@ -1,8 +1,17 @@
+ #ifndef _XT_HELPER_H
+ #define _XT_HELPER_H
+ 
++#include <linux/config.h>
++
+ struct xt_helper_info {
+ 	int invert;
+ 	char name[30];
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_xt_helper_info {
++	compat_int_t invert;
++	char name[30];
++};
++#endif
+ #endif /* _XT_HELPER_H */
+diff -upr linux-2.6.16.orig/include/linux/netfilter/xt_limit.h linux-2.6.16-026test015/include/linux/netfilter/xt_limit.h
+--- linux-2.6.16.orig/include/linux/netfilter/xt_limit.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter/xt_limit.h	2006-07-04 14:41:36.000000000 +0400
+@@ -1,6 +1,8 @@
+ #ifndef _XT_RATE_H
+ #define _XT_RATE_H
+ 
++#include <linux/config.h>
++
+ /* timings are in milliseconds. */
+ #define XT_LIMIT_SCALE 10000
+ 
+@@ -18,4 +20,19 @@ struct xt_rateinfo {
+ 	/* Ugly, ugly fucker. */
+ 	struct xt_rateinfo *master;
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_xt_rateinfo {
++	u_int32_t avg;    /* Average secs between packets * scale */
++	u_int32_t burst;  /* Period multiplier for upper limit. */
++
++	/* Used internally by the kernel */
++	compat_ulong_t prev;
++	u_int32_t credit;
++	u_int32_t credit_cap, cost;
++
++	/* Ugly, ugly fucker. */
++	compat_uptr_t master;
++};
++#endif
+ #endif /*_XT_RATE_H*/
+diff -upr linux-2.6.16.orig/include/linux/netfilter/xt_state.h linux-2.6.16-026test015/include/linux/netfilter/xt_state.h
+--- linux-2.6.16.orig/include/linux/netfilter/xt_state.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter/xt_state.h	2006-07-04 14:41:36.000000000 +0400
+@@ -1,6 +1,8 @@
+ #ifndef _XT_STATE_H
+ #define _XT_STATE_H
+ 
++#include <linux/config.h>
++
+ #define XT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
+ #define XT_STATE_INVALID (1 << 0)
+ 
+@@ -10,4 +12,11 @@ struct xt_state_info
+ {
+ 	unsigned int statemask;
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_xt_state_info
++{
++	compat_uint_t statemask;
++};
++#endif
+ #endif /*_XT_STATE_H*/
+diff -upr linux-2.6.16.orig/include/linux/netfilter.h linux-2.6.16-026test015/include/linux/netfilter.h
+--- linux-2.6.16.orig/include/linux/netfilter.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter.h	2006-07-04 14:41:39.000000000 +0400
+@@ -107,12 +107,21 @@ struct nf_info
+ int nf_register_hook(struct nf_hook_ops *reg);
+ void nf_unregister_hook(struct nf_hook_ops *reg);
+ 
++int virt_nf_register_hook(struct nf_hook_ops *reg);
++int virt_nf_unregister_hook(struct nf_hook_ops *reg);
++
+ /* Functions to register get/setsockopt ranges (non-inclusive).  You
+    need to check permissions yourself! */
+ int nf_register_sockopt(struct nf_sockopt_ops *reg);
+ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
+ 
++#ifdef CONFIG_VE_IPTABLES
++#define ve_nf_hooks \
++       ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
++#else
+ extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
++#define ve_nf_hooks nf_hooks
++#endif
+ 
+ /* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
+  * disappear once iptables is replaced with pkttables.  Please DO NOT use them
+@@ -190,7 +199,7 @@ static inline int nf_hook_thresh(int pf,
+ 	if (!cond)
+ 		return 1;
+ #ifndef CONFIG_NETFILTER_DEBUG
+-	if (list_empty(&nf_hooks[pf][hook]))
++	if (list_empty(&ve_nf_hooks[pf][hook]))
+ 		return 1;
+ #endif
+ 	return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
+diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack.h
+--- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack.h	2006-07-04 14:41:39.000000000 +0400
+@@ -71,6 +71,11 @@ do {									\
+ 
+ struct ip_conntrack_helper;
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/ve.h>
++#include <linux/ve_owner.h>
++#endif
++
+ struct ip_conntrack
+ {
+ 	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
+@@ -122,8 +127,15 @@ struct ip_conntrack
+ 	/* Traversed often, so hopefully in different cacheline to top */
+ 	/* These are my tuples; original and reply */
+ 	struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
++#ifdef CONFIG_VE_IPTABLES
++        struct ve_struct *ct_owner_env;
++#endif
+ };
+ 
++#ifdef CONFIG_VE_IPTABLES
++DCL_VE_OWNER_PROTO(CT, struct ip_conntrack, ct_owner_env)
++#endif
++
+ struct ip_conntrack_expect
+ {
+ 	/* Internal linked list (global expectation list) */
+@@ -232,7 +244,15 @@ extern void ip_conntrack_tcp_update(stru
+ 				    enum ip_conntrack_dir dir);
+ 
+ /* Call me when a conntrack is destroyed. */
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_conntrack_destroyed	\
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_destroyed)
++#else
+ extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
++#define ve_ip_conntrack_destroyed	ip_conntrack_destroyed
++#endif
++
+ 
+ /* Fake conntrack entry for untracked connections */
+ extern struct ip_conntrack ip_conntrack_untracked;
+@@ -261,7 +281,7 @@ extern void ip_conntrack_proto_put(struc
+ extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
+ 
+ extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
+-					       struct ip_conntrack_tuple *);
++		struct ip_conntrack_tuple *, struct user_beancounter *);
+ 
+ extern void ip_conntrack_free(struct ip_conntrack *ct);
+ 
+@@ -270,6 +290,8 @@ extern void ip_conntrack_hash_insert(str
+ extern struct ip_conntrack_expect *
+ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
+ 
++extern void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp);
++
+ extern struct ip_conntrack_expect *
+ ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
+ 
+@@ -291,6 +313,7 @@ static inline int is_dying(struct ip_con
+ }
+ 
+ extern unsigned int ip_conntrack_htable_size;
++extern int ip_conntrack_disable_ve0;
+  
+ #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
+ 
+@@ -341,6 +364,9 @@ ip_conntrack_event_cache(enum ip_conntra
+ 	struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
+ 	struct ip_conntrack_ecache *ecache;
+ 	
++	if (!ve_is_super(get_exec_env()))
++		return;
++
+ 	local_bh_disable();
+ 	ecache = &__get_cpu_var(ip_conntrack_ecache);
+ 	if (ct != ecache->ct)
+@@ -352,7 +378,7 @@ ip_conntrack_event_cache(enum ip_conntra
+ static inline void ip_conntrack_event(enum ip_conntrack_events event,
+ 				      struct ip_conntrack *ct)
+ {
+-	if (is_confirmed(ct) && !is_dying(ct))
++	if (is_confirmed(ct) && !is_dying(ct) && ve_is_super(get_exec_env()))
+ 		notifier_call_chain(&ip_conntrack_chain, event, ct);
+ }
+ 
+@@ -360,7 +386,8 @@ static inline void 
+ ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
+ 			  struct ip_conntrack_expect *exp)
+ {
+-	notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
++	if (ve_is_super(get_exec_env()))
++		notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
+ }
+ #else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
+ static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, 
+diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack_core.h
+--- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack_core.h	2006-07-04 14:41:39.000000000 +0400
+@@ -3,7 +3,6 @@
+ #include <linux/netfilter.h>
+ 
+ #define MAX_IP_CT_PROTO 256
+-extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+ 
+ /* This header is used to share core functionality between the
+    standalone connection tracking module, and the compatibility layer's use
+@@ -54,8 +53,26 @@ static inline int ip_conntrack_confirm(s
+ 
+ extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp);
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_ct_protos \
++	(get_exec_env()->_ip_conntrack->_ip_ct_protos)
++#define ve_ip_conntrack_hash	\
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_hash)
++#define ve_ip_conntrack_expect_list \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_expect_list)
++#define ve_ip_conntrack_vmalloc \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_vmalloc)
++#else
++extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+ extern struct list_head *ip_conntrack_hash;
+ extern struct list_head ip_conntrack_expect_list;
++#define ve_ip_ct_protos			ip_ct_protos
++#define ve_ip_conntrack_hash		ip_conntrack_hash
++#define ve_ip_conntrack_expect_list	ip_conntrack_expect_list
++#define ve_ip_conntrack_vmalloc		ip_conntrack_vmalloc
++#endif /* CONFIG_VE_IPTABLES */
++
+ extern rwlock_t ip_conntrack_lock;
+ #endif /* _IP_CONNTRACK_CORE_H */
+ 
+diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+--- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2006-07-04 14:41:39.000000000 +0400
+@@ -31,6 +31,9 @@ struct ip_conntrack_helper
+ extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
+ extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
+ 
++extern int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *);
++extern void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
++
+ /* Allocate space for an expectation: this is mandatory before calling 
+    ip_conntrack_expect_related.  You will have to call put afterwards. */
+ extern struct ip_conntrack_expect *
+@@ -41,4 +44,5 @@ extern void ip_conntrack_expect_put(stru
+ extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
+ extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
+ 
++extern struct list_head helpers;
+ #endif /*_IP_CONNTRACK_HELPER_H*/
+diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack_irc.h
+--- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2006-07-04 14:41:39.000000000 +0400
+@@ -14,16 +14,26 @@
+ #ifndef _IP_CONNTRACK_IRC_H
+ #define _IP_CONNTRACK_IRC_H
+ 
++#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++
+ /* This structure exists only once per master */
+ struct ip_ct_irc_master {
+ };
+ 
+ #ifdef __KERNEL__
+-extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
+-				       enum ip_conntrack_info ctinfo,
+-				       unsigned int matchoff,
+-				       unsigned int matchlen,
+-				       struct ip_conntrack_expect *exp);
++typedef unsigned int (*ip_nat_helper_irc_hook)(struct sk_buff **,
++		enum ip_conntrack_info, unsigned int, unsigned int,
++		struct ip_conntrack_expect *);
++
++extern ip_nat_helper_irc_hook ip_nat_irc_hook;
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_nat_irc_hook \
++	((ip_nat_helper_irc_hook) \
++		(get_exec_env()->_ip_conntrack->_ip_nat_irc_hook))
++#else
++#define ve_ip_nat_irc_hook	ip_nat_irc_hook
++#endif
+ 
+ #define IRC_PORT	6667
+ 
+diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+--- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2006-07-04 14:41:39.000000000 +0400
+@@ -67,6 +67,7 @@ struct ip_conntrack_protocol
+ /* Protocol registration. */
+ extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
+ extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
++
+ /* Existing built-in protocols */
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
+@@ -74,6 +75,41 @@ extern struct ip_conntrack_protocol ip_c
+ extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
+ extern int ip_conntrack_protocol_tcp_init(void);
+ 
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_SYSCTL)
++#include <linux/sched.h>
++#define ve_ip_ct_tcp_timeouts \
++	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeouts)
++#define ve_ip_ct_udp_timeout \
++	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout)
++#define ve_ip_ct_udp_timeout_stream \
++	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout_stream)
++#define ve_ip_ct_icmp_timeout \
++	(get_exec_env()->_ip_conntrack->_ip_ct_icmp_timeout)
++#define ve_ip_ct_generic_timeout \
++	(get_exec_env()->_ip_conntrack->_ip_ct_generic_timeout)
++#define ve_ip_ct_log_invalid	\
++	(get_exec_env()->_ip_conntrack->_ip_ct_log_invalid)
++#define ve_ip_ct_tcp_timeout_max_retrans \
++	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeout_max_retrans)
++#define ve_ip_ct_tcp_loose	\
++	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_loose)
++#define ve_ip_ct_tcp_be_liberal	\
++	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_be_liberal)
++#define ve_ip_ct_tcp_max_retrans	\
++	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_max_retrans)
++#else
++#define ve_ip_ct_tcp_timeouts		*tcp_timeouts
++#define ve_ip_ct_udp_timeout		ip_ct_udp_timeout
++#define ve_ip_ct_udp_timeout_stream	ip_ct_udp_timeout_stream
++#define ve_ip_ct_icmp_timeout		ip_ct_icmp_timeout
++#define ve_ip_ct_generic_timeout	ip_ct_generic_timeout
++#define ve_ip_ct_log_invalid		ip_ct_log_invalid
++#define ve_ip_ct_tcp_timeout_max_retrans ip_ct_tcp_timeout_max_retrans
++#define ve_ip_ct_tcp_loose		ip_ct_tcp_loose
++#define ve_ip_ct_tcp_be_liberal		ip_ct_tcp_be_liberal
++#define ve_ip_ct_tcp_max_retrans	ip_ct_tcp_max_retrans
++#endif
++
+ /* Log invalid packets */
+ extern unsigned int ip_ct_log_invalid;
+ 
+@@ -85,10 +121,10 @@ extern int ip_ct_port_nfattr_to_tuple(st
+ #ifdef CONFIG_SYSCTL
+ #ifdef DEBUG_INVALID_PACKETS
+ #define LOG_INVALID(proto) \
+-	(ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
++	(ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW)
+ #else
+ #define LOG_INVALID(proto) \
+-	((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
++	((ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW) \
+ 	 && net_ratelimit())
+ #endif
+ #else
+diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_nat.h linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_nat.h
+--- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_nat.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_nat.h	2006-07-04 14:41:39.000000000 +0400
+@@ -1,5 +1,6 @@
+ #ifndef _IP_NAT_H
+ #define _IP_NAT_H
++#include <linux/config.h>
+ #include <linux/netfilter_ipv4.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
+ 
+@@ -72,10 +73,29 @@ extern unsigned int ip_nat_setup_info(st
+ extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
+ 			     const struct ip_conntrack *ignored_conntrack);
+ 
++extern void ip_nat_hash_conntrack(struct ip_conntrack *conntrack);
++
+ /* Calculate relative checksum. */
+ extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
+ 				    u_int32_t newval,
+ 				    u_int16_t oldcheck);
++
++#ifdef CONFIG_COMPAT
++#include <net/compat.h>
++
++struct compat_ip_nat_range
++{
++	compat_uint_t flags;
++	u_int32_t min_ip, max_ip;
++	union ip_conntrack_manip_proto min, max;
++};
++
++struct compat_ip_nat_multi_range
++{
++	compat_uint_t rangesize;
++	struct compat_ip_nat_range range[1];
++};
++#endif
+ #else  /* !__KERNEL__: iptables wants this to compile. */
+ #define ip_nat_multi_range ip_nat_multi_range_compat
+ #endif /*__KERNEL__*/
+diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_nat_rule.h linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_nat_rule.h
+--- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_nat_rule.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_nat_rule.h	2006-07-04 14:41:39.000000000 +0400
+@@ -6,7 +6,7 @@
+ 
+ #ifdef __KERNEL__
+ 
+-extern int ip_nat_rule_init(void) __init;
++extern int ip_nat_rule_init(void);
+ extern void ip_nat_rule_cleanup(void);
+ extern int ip_nat_rule_find(struct sk_buff **pskb,
+ 			    unsigned int hooknum,
+diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_tables.h
+--- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_tables.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter_ipv4/ip_tables.h	2006-07-04 14:41:39.000000000 +0400
+@@ -16,6 +16,7 @@
+ #define _IPTABLES_H
+ 
+ #ifdef __KERNEL__
++#include <linux/config.h>
+ #include <linux/if.h>
+ #include <linux/types.h>
+ #include <linux/in.h>
+@@ -330,7 +331,7 @@ extern void ipt_init(void) __init;
+ //#define ipt_register_table(tbl, repl) xt_register_table(AF_INET, tbl, repl)
+ //#define ipt_unregister_table(tbl) xt_unregister_table(AF_INET, tbl)
+ 
+-extern int ipt_register_table(struct ipt_table *table,
++extern struct ipt_table *ipt_register_table(struct ipt_table *table,
+ 			      const struct ipt_replace *repl);
+ extern void ipt_unregister_table(struct ipt_table *table);
+ 
+@@ -364,5 +365,62 @@ extern unsigned int ipt_do_table(struct 
+ 				 void *userdata);
+ 
+ #define IPT_ALIGN(s) XT_ALIGN(s)
++
++#ifdef CONFIG_COMPAT
++#include <net/compat.h>
++
++struct compat_ipt_getinfo
++{
++	char name[IPT_TABLE_MAXNAMELEN];
++	compat_uint_t valid_hooks;
++	compat_uint_t hook_entry[NF_IP_NUMHOOKS];
++	compat_uint_t underflow[NF_IP_NUMHOOKS];
++	compat_uint_t num_entries;
++	compat_uint_t size;
++};
++
++struct compat_ipt_entry
++{
++	struct ipt_ip ip;
++	compat_uint_t nfcache;
++	u_int16_t target_offset;
++	u_int16_t next_offset;
++	compat_uint_t comefrom;
++	struct compat_xt_counters counters;
++	unsigned char elems[0];
++};
++
++struct compat_ipt_entry_match
++{
++	union {
++		struct {
++			u_int16_t match_size;
++			char name[IPT_FUNCTION_MAXNAMELEN];
++		} user;
++		u_int16_t match_size;
++	} u;
++	unsigned char data[0];
++};
++
++struct compat_ipt_entry_target
++{
++	union {
++		struct {
++			u_int16_t target_size;
++			char name[IPT_FUNCTION_MAXNAMELEN];
++		} user;
++		u_int16_t target_size;
++	} u;
++	unsigned char data[0];
++};
++
++#define COMPAT_IPT_ALIGN(s) 	COMPAT_XT_ALIGN(s)
++
++extern int ipt_match_align_compat(void *match, void **dstptr,
++		int *size, int off, int convert);
++extern int ipt_target_align_compat(void *target, void **dstptr,
++		int *size, int off, int convert);
++
++#endif /* CONFIG_COMPAT */
+ #endif /*__KERNEL__*/
+ #endif /* _IPTABLES_H */
+diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv6/ip6_tables.h linux-2.6.16-026test015/include/linux/netfilter_ipv6/ip6_tables.h
+--- linux-2.6.16.orig/include/linux/netfilter_ipv6/ip6_tables.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/netfilter_ipv6/ip6_tables.h	2006-07-04 14:41:39.000000000 +0400
+@@ -340,7 +340,7 @@ extern void ip6t_init(void) __init;
+ #define ip6t_register_match(match) xt_register_match(AF_INET6, match)
+ #define ip6t_unregister_match(match) xt_unregister_match(AF_INET6, match)
+ 
+-extern int ip6t_register_table(struct ip6t_table *table,
++extern struct ip6t_table *ip6t_register_table(struct ip6t_table *table,
+ 			       const struct ip6t_replace *repl);
+ extern void ip6t_unregister_table(struct ip6t_table *table);
+ extern unsigned int ip6t_do_table(struct sk_buff **pskb,
+diff -upr linux-2.6.16.orig/include/linux/nfcalls.h linux-2.6.16-026test015/include/linux/nfcalls.h
+--- linux-2.6.16.orig/include/linux/nfcalls.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/nfcalls.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,254 @@
++/*
++ *  include/linux/nfcalls.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_NFCALLS_H
++#define _LINUX_NFCALLS_H
++
++#include <linux/rcupdate.h>
++
++#ifdef CONFIG_MODULES
++extern struct module no_module;
++
++#define DECL_KSYM_MODULE(name)				\
++	extern struct module *vz_mod_##name
++#define DECL_KSYM_CALL(type, name, args)		\
++	extern type (*vz_##name) args
++
++#define INIT_KSYM_MODULE(name)				\
++	struct module *vz_mod_##name = &no_module;	\
++	EXPORT_SYMBOL(vz_mod_##name)
++#define INIT_KSYM_CALL(type, name, args)		\
++	type (*vz_##name) args;				\
++	EXPORT_SYMBOL(vz_##name)
++
++#define __KSYMERRCALL(err, type, mod, name, args)	\
++({							\
++	type ret = (type)err;				\
++	if (!__vzksym_module_get(vz_mod_##mod))	{	\
++		if (vz_##name)				\
++			ret = ((*vz_##name)args); 	\
++		__vzksym_module_put(vz_mod_##mod);	\
++	}						\
++	ret;						\
++})
++#define __KSYMSAFECALL_VOID(mod, name, args)		\
++do {							\
++	if (!__vzksym_module_get(vz_mod_##mod)) {	\
++		if (vz_##name)				\
++			((*vz_##name)args); 		\
++		__vzksym_module_put(vz_mod_##mod);	\
++	}						\
++} while (0)
++#else
++#define DECL_KSYM_CALL(type, name, args)                \
++	extern type name args
++#define INIT_KSYM_MODULE(name)
++#define INIT_KSYM_CALL(type, name, args)		\
++	type name args
++#define __KSYMERRCALL(err, type, mod, name, args)      ((*name)args)
++#define __KSYMSAFECALL_VOID(mod, name, args)           ((*name)args)
++#endif
++
++#define KSYMERRCALL(err, mod, name, args)		\
++	__KSYMERRCALL(err, int, mod, name, args)
++#define KSYMSAFECALL(type, mod, name, args)		\
++	__KSYMERRCALL(0, type, mod, name, args)
++#define KSYMSAFECALL_VOID(mod, name, args)		\
++	__KSYMSAFECALL_VOID(mod, name, args)
++
++#if defined(CONFIG_VE) && defined(CONFIG_MODULES)
++/* should be called _after_ KSYMRESOLVE's */
++#define KSYMMODRESOLVE(name)				\
++	__vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
++#define KSYMMODUNRESOLVE(name)				\
++	__vzksym_modunresolve(&vz_mod_##name)
++
++#define KSYMRESOLVE(name)				\
++	vz_##name = &name
++#define KSYMUNRESOLVE(name)				\
++	vz_##name = NULL
++#else
++#define KSYMRESOLVE(name)	do { } while (0)
++#define KSYMUNRESOLVE(name)	do { } while (0)
++#define KSYMMODRESOLVE(name)	do { } while (0)
++#define KSYMMODUNRESOLVE(name)	do { } while (0)
++#endif
++
++#ifdef CONFIG_MODULES
++static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
++{
++	/*
++	 * we want to be sure, that pointer updates are visible first:
++	 * 1. wmb() is here only for piece of sure
++	 *    (note, no rmb() in KSYMSAFECALL)
++	 * 2. synchronize_sched() guarantees that updates are visible
++	 *    on all cpus and allows us to remove rmb() in KSYMSAFECALL
++	 */
++	wmb(); synchronize_sched();
++	*modp = mod;
++	/* just to be sure, our changes are visible as soon as possible */
++	wmb(); synchronize_sched();
++}
++
++static inline void __vzksym_modunresolve(struct module **modp)
++{
++	/*
++	 * try_module_get() in KSYMSAFECALL should fail at this moment since
++	 * THIS_MODULE in in unloading state (we should be called from fini),
++	 * no need to syncronize pointers/ve_module updates.
++	 */
++	*modp = &no_module;
++	/*
++	 * synchronize_sched() guarantees here that we see
++	 * updated module pointer before the module really gets away
++	 */
++	synchronize_sched();
++}
++
++static inline int __vzksym_module_get(struct module *mod)
++{
++	/*
++	 * we want to avoid rmb(), so use synchronize_sched() in KSYMUNRESOLVE
++	 * and smp_read_barrier_depends() here...
++	 */
++	smp_read_barrier_depends(); /* for module loading */
++	if (!try_module_get(mod))
++		return -EBUSY;
++
++	return 0;
++}
++
++static inline void __vzksym_module_put(struct module *mod)
++{
++	module_put(mod);
++}
++#endif
++
++#if defined(CONFIG_VE)
++#ifdef CONFIG_MODULES
++DECL_KSYM_MODULE(x_tables);
++DECL_KSYM_MODULE(xt_tcpudp);
++DECL_KSYM_MODULE(ip_tables);
++DECL_KSYM_MODULE(ip6_tables);
++DECL_KSYM_MODULE(iptable_filter);
++DECL_KSYM_MODULE(ip6table_filter);
++DECL_KSYM_MODULE(iptable_mangle);
++DECL_KSYM_MODULE(ip6table_mangle);
++DECL_KSYM_MODULE(xt_limit);
++DECL_KSYM_MODULE(ipt_multiport);
++DECL_KSYM_MODULE(ip6t_multiport);
++DECL_KSYM_MODULE(ipt_tos);
++DECL_KSYM_MODULE(ipt_TOS);
++DECL_KSYM_MODULE(ipt_REJECT);
++DECL_KSYM_MODULE(ip6t_REJECT);
++DECL_KSYM_MODULE(ipt_TCPMSS);
++DECL_KSYM_MODULE(xt_tcpmss);
++DECL_KSYM_MODULE(ipt_ttl);
++DECL_KSYM_MODULE(ipt_LOG);
++DECL_KSYM_MODULE(ip6t_LOG);
++DECL_KSYM_MODULE(xt_length);
++DECL_KSYM_MODULE(ip_conntrack);
++DECL_KSYM_MODULE(ip_conntrack_ftp);
++DECL_KSYM_MODULE(ip_conntrack_irc);
++DECL_KSYM_MODULE(xt_conntrack);
++DECL_KSYM_MODULE(xt_state);
++DECL_KSYM_MODULE(xt_helper);
++DECL_KSYM_MODULE(ip_nat);
++DECL_KSYM_MODULE(iptable_nat);
++DECL_KSYM_MODULE(ip_nat_ftp);
++DECL_KSYM_MODULE(ip_nat_irc);
++DECL_KSYM_MODULE(ipt_REDIRECT);
++#endif
++
++struct sk_buff;
++
++DECL_KSYM_CALL(int, init_netfilter, (void));
++DECL_KSYM_CALL(int, init_xtables, (void));
++DECL_KSYM_CALL(int, init_xt_tcpudp, (void));
++DECL_KSYM_CALL(int, init_iptables, (void));
++DECL_KSYM_CALL(int, init_ip6tables, (void));
++DECL_KSYM_CALL(int, init_iptable_filter, (void));
++DECL_KSYM_CALL(int, init_ip6table_filter, (void));
++DECL_KSYM_CALL(int, init_iptable_mangle, (void));
++DECL_KSYM_CALL(int, init_ip6table_mangle, (void));
++DECL_KSYM_CALL(int, init_xt_limit, (void));
++DECL_KSYM_CALL(int, init_iptable_multiport, (void));
++DECL_KSYM_CALL(int, init_ip6table_multiport, (void));
++DECL_KSYM_CALL(int, init_iptable_tos, (void));
++DECL_KSYM_CALL(int, init_iptable_TOS, (void));
++DECL_KSYM_CALL(int, init_iptable_REJECT, (void));
++DECL_KSYM_CALL(int, init_ip6table_REJECT, (void));
++DECL_KSYM_CALL(int, init_iptable_TCPMSS, (void));
++DECL_KSYM_CALL(int, init_xt_tcpmss, (void));
++DECL_KSYM_CALL(int, init_iptable_ttl, (void));
++DECL_KSYM_CALL(int, init_iptable_LOG, (void));
++DECL_KSYM_CALL(int, init_ip6table_LOG, (void));
++DECL_KSYM_CALL(int, init_xt_length, (void));
++DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
++DECL_KSYM_CALL(int, init_iptable_ftp, (void));
++DECL_KSYM_CALL(int, init_iptable_irc, (void));
++DECL_KSYM_CALL(int, init_xt_conntrack_match, (void));
++DECL_KSYM_CALL(int, init_xt_state, (void));
++DECL_KSYM_CALL(int, init_xt_helper, (void));
++DECL_KSYM_CALL(int, ip_nat_init, (void));
++DECL_KSYM_CALL(int, init_iptable_nat, (void));
++DECL_KSYM_CALL(int, init_iptable_nat_ftp, (void));
++DECL_KSYM_CALL(int, init_iptable_nat_irc, (void));
++DECL_KSYM_CALL(int, init_iptable_REDIRECT, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat_irc, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat, (void));
++DECL_KSYM_CALL(void, ip_nat_cleanup, (void));
++DECL_KSYM_CALL(void, fini_xt_helper, (void));
++DECL_KSYM_CALL(void, fini_xt_state, (void));
++DECL_KSYM_CALL(void, fini_xt_conntrack_match, (void));
++DECL_KSYM_CALL(void, fini_iptable_irc, (void));
++DECL_KSYM_CALL(void, fini_iptable_ftp, (void));
++DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
++DECL_KSYM_CALL(void, fini_xt_length, (void));
++DECL_KSYM_CALL(void, fini_ip6table_LOG, (void));
++DECL_KSYM_CALL(void, fini_iptable_LOG, (void));
++DECL_KSYM_CALL(void, fini_iptable_ttl, (void));
++DECL_KSYM_CALL(void, fini_xt_tcpmss, (void));
++DECL_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
++DECL_KSYM_CALL(void, fini_ip6table_REJECT, (void));
++DECL_KSYM_CALL(void, fini_iptable_REJECT, (void));
++DECL_KSYM_CALL(void, fini_iptable_TOS, (void));
++DECL_KSYM_CALL(void, fini_iptable_tos, (void));
++DECL_KSYM_CALL(void, fini_ip6table_multiport, (void));
++DECL_KSYM_CALL(void, fini_iptable_multiport, (void));
++DECL_KSYM_CALL(void, fini_xt_limit, (void));
++DECL_KSYM_CALL(void, fini_iptable_filter, (void));
++DECL_KSYM_CALL(void, fini_ip6table_filter, (void));
++DECL_KSYM_CALL(void, fini_iptable_mangle, (void));
++DECL_KSYM_CALL(void, fini_ip6table_mangle, (void));
++DECL_KSYM_CALL(void, fini_ip6tables, (void));
++DECL_KSYM_CALL(void, fini_iptables, (void));
++DECL_KSYM_CALL(void, fini_xt_tcpudp, (void));
++DECL_KSYM_CALL(void, fini_xtables, (void));
++DECL_KSYM_CALL(void, fini_netfilter, (void));
++DECL_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
++
++#include <linux/netfilter/x_tables.h>
++
++DECL_KSYM_CALL(void, ipt_flush_table, (struct xt_table *table));
++DECL_KSYM_CALL(void, ip6t_flush_table, (struct xt_table *table));
++#endif /* CONFIG_VE */
++
++#ifdef CONFIG_VE_CALLS_MODULE
++DECL_KSYM_MODULE(vzmon);
++DECL_KSYM_CALL(int, real_get_device_perms_ve,
++	(int dev_type, dev_t dev, int access_mode));
++DECL_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
++DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
++DECL_KSYM_CALL(void, real_update_load_avg_ve, (void));
++#endif
++
++#endif /* _LINUX_NFCALLS_H */
+diff -upr linux-2.6.16.orig/include/linux/nfs_fs.h linux-2.6.16-026test015/include/linux/nfs_fs.h
+--- linux-2.6.16.orig/include/linux/nfs_fs.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/nfs_fs.h	2006-07-04 14:41:37.000000000 +0400
+@@ -296,7 +296,7 @@ extern struct inode *nfs_fhget(struct su
+ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
+ extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
+ extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+-extern int nfs_permission(struct inode *, int, struct nameidata *);
++extern int nfs_permission(struct inode *, int, struct nameidata *, struct exec_perm *);
+ extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
+ extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
+ extern int nfs_open(struct inode *, struct file *);
+diff -upr linux-2.6.16.orig/include/linux/notifier.h linux-2.6.16-026test015/include/linux/notifier.h
+--- linux-2.6.16.orig/include/linux/notifier.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/notifier.h	2006-07-04 14:41:39.000000000 +0400
+@@ -27,8 +27,9 @@ extern int notifier_call_chain(struct no
+ 
+ #define NOTIFY_DONE		0x0000		/* Don't care */
+ #define NOTIFY_OK		0x0001		/* Suits me */
++#define NOTIFY_FAIL		0x0002		/* Reject */
+ #define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
+-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)	/* Bad/Veto action	*/
++#define NOTIFY_BAD		(NOTIFY_STOP_MASK|NOTIFY_FAIL)	/* Bad/Veto action	*/
+ /*
+  * Clean way to return from the notifier and stop further calls.
+  */
+diff -upr linux-2.6.16.orig/include/linux/page-flags.h linux-2.6.16-026test015/include/linux/page-flags.h
+--- linux-2.6.16.orig/include/linux/page-flags.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/page-flags.h	2006-07-04 14:41:36.000000000 +0400
+@@ -74,7 +74,9 @@
+ #define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
+ #define PG_reclaim		17	/* To be reclaimed asap */
+ #define PG_nosave_free		18	/* Free, should not be written */
+-#define PG_uncached		19	/* Page has been mapped as uncached */
++#define PG_buddy		19	/* Page is free, on buddy lists */
++
++#define PG_uncached		20	/* Page has been mapped as uncached */
+ 
+ /*
+  * Global page accounting.  One instance per CPU.  Only unsigned longs are
+@@ -319,6 +321,10 @@ extern void __mod_page_state_offset(unsi
+ #define SetPageNosaveFree(page)	set_bit(PG_nosave_free, &(page)->flags)
+ #define ClearPageNosaveFree(page)		clear_bit(PG_nosave_free, &(page)->flags)
+ 
++#define PageBuddy(page)		test_bit(PG_buddy, &(page)->flags)
++#define __SetPageBuddy(page)	__set_bit(PG_buddy, &(page)->flags)
++#define __ClearPageBuddy(page)	__clear_bit(PG_buddy, &(page)->flags)
++
+ #define PageMappedToDisk(page)	test_bit(PG_mappedtodisk, &(page)->flags)
+ #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
+ #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
+diff -upr linux-2.6.16.orig/include/linux/pid.h linux-2.6.16-026test015/include/linux/pid.h
+--- linux-2.6.16.orig/include/linux/pid.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/pid.h	2006-07-04 14:41:38.000000000 +0400
+@@ -1,6 +1,18 @@
+ #ifndef _LINUX_PID_H
+ #define _LINUX_PID_H
+ 
++#define VPID_BIT	10
++#define VPID_DIV	(1<<VPID_BIT)
++
++#ifdef CONFIG_VE
++#define __is_virtual_pid(pid)	((pid) & VPID_DIV)
++#define is_virtual_pid(pid)	\
++   (__is_virtual_pid(pid) || ((pid)==1 && !ve_is_super(get_exec_env())))
++#else
++#define __is_virtual_pid(pid)	0
++#define is_virtual_pid(pid)	0
++#endif
++
+ enum pid_type
+ {
+ 	PIDTYPE_PID,
+@@ -15,6 +27,9 @@ struct pid
+ 	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
+ 	int nr;
+ 	struct hlist_node pid_chain;
++#ifdef CONFIG_VE
++	int vnr;
++#endif
+ 	/* list of pids with the same nr, only one of them is in the hash */
+ 	struct list_head pid_list;
+ };
+@@ -40,16 +55,89 @@ extern int alloc_pidmap(void);
+ extern void FASTCALL(free_pidmap(int));
+ extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
+ 
+-#define do_each_task_pid(who, type, task)				\
+-	if ((task = find_task_by_pid_type(type, who))) {		\
++#ifndef CONFIG_VE
++
++#define vpid_to_pid(pid)	(pid)
++#define __vpid_to_pid(pid)	(pid)
++#define pid_type_to_vpid(type, pid)	(pid)
++#define __pid_type_to_vpid(type, pid)	(pid)
++
++#define comb_vpid_to_pid(pid)	(pid)
++#define comb_pid_to_vpid(pid)	(pid)
++
++#else
++
++struct ve_struct;
++extern void free_vpid(int vpid, struct ve_struct *ve);
++extern int alloc_vpid(int pid, int vpid);
++extern int vpid_to_pid(int pid);
++extern int __vpid_to_pid(int pid);
++extern pid_t pid_type_to_vpid(int type, pid_t pid);
++extern pid_t _pid_type_to_vpid(int type, pid_t pid);
++
++static inline int comb_vpid_to_pid(int vpid)
++{
++	int pid = vpid;
++
++	if (vpid > 0) {
++		pid = vpid_to_pid(vpid);
++		if (unlikely(pid < 0))
++			return 0;
++	} else if (vpid < 0) {
++		pid = vpid_to_pid(-vpid);
++		if (unlikely(pid < 0))
++			return 0;
++		pid = -pid;
++	}
++	return pid;
++}
++
++static inline int comb_pid_to_vpid(int pid)
++{
++	int vpid = pid;
++
++	if (pid > 0) {
++		vpid = pid_type_to_vpid(PIDTYPE_PID, pid);
++		if (unlikely(vpid < 0))
++			return 0;
++	} else if (pid < 0) {
++		vpid = pid_type_to_vpid(PIDTYPE_PGID, -pid);
++		if (unlikely(vpid < 0))
++			return 0;
++		vpid = -vpid;
++	}
++	return vpid;
++}
++#endif
++
++#define do_each_task_pid_all(who, type, task)				\
++	if ((task = find_task_by_pid_type_all(type, who))) {		\
+ 		prefetch((task)->pids[type].pid_list.next);		\
+ 		do {
+ 
+-#define while_each_task_pid(who, type, task)				\
++#define while_each_task_pid_all(who, type, task)			\
+ 		} while (task = pid_task((task)->pids[type].pid_list.next,\
+ 						type),			\
+ 			prefetch((task)->pids[type].pid_list.next),	\
+ 			hlist_unhashed(&(task)->pids[type].pid_chain));	\
+ 	}								\
+ 
++#ifndef CONFIG_VE
++#define __do_each_task_pid_ve(who, type, task, owner)			\
++		do_each_task_pid_all(who, type, task)
++#define __while_each_task_pid_ve(who, type, task, owner)		\
++		while_each_task_pid_all(who, type, task)
++#else /* CONFIG_VE */
++#define __do_each_task_pid_ve(who, type, task, owner)			\
++		do_each_task_pid_all(who, type, task)			\
++			if (ve_accessible(VE_TASK_INFO(task)->owner_env, owner))
++#define __while_each_task_pid_ve(who, type, task, owner)		\
++		while_each_task_pid_all(who, type, task)
++#endif /* CONFIG_VE */
++
++#define do_each_task_pid_ve(who, type, task)				\
++		__do_each_task_pid_ve(who, type, task, get_exec_env());
++#define while_each_task_pid_ve(who, type, task)				\
++		__while_each_task_pid_ve(who, type, task, get_exec_env());
++
+ #endif /* _LINUX_PID_H */
+diff -upr linux-2.6.16.orig/include/linux/proc_fs.h linux-2.6.16-026test015/include/linux/proc_fs.h
+--- linux-2.6.16.orig/include/linux/proc_fs.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/proc_fs.h	2006-07-04 14:41:38.000000000 +0400
+@@ -78,7 +78,7 @@ struct kcore_list {
+ struct vmcore {
+ 	struct list_head list;
+ 	unsigned long long paddr;
+-	unsigned long size;
++	unsigned long long size;
+ 	loff_t offset;
+ };
+ 
+@@ -86,8 +86,14 @@ struct vmcore {
+ 
+ extern struct proc_dir_entry proc_root;
+ extern struct proc_dir_entry *proc_root_fs;
++#ifdef CONFIG_VE
++#include <linux/sched.h>
++#define proc_net	(get_exec_env()->_proc_net)
++#define proc_net_stat	(get_exec_env()->_proc_net_stat)
++#else
+ extern struct proc_dir_entry *proc_net;
+ extern struct proc_dir_entry *proc_net_stat;
++#endif
+ extern struct proc_dir_entry *proc_bus;
+ extern struct proc_dir_entry *proc_root_driver;
+ extern struct proc_dir_entry *proc_root_kcore;
+@@ -98,8 +104,8 @@ extern void proc_misc_init(void);
+ struct mm_struct;
+ 
+ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+-struct dentry *proc_pid_unhash(struct task_struct *p);
+-void proc_pid_flush(struct dentry *proc_dentry);
++void proc_pid_unhash(struct task_struct *p, struct dentry * [2]);
++void proc_pid_flush(struct dentry *proc_dentry[2]);
+ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+ unsigned long task_vsize(struct mm_struct *);
+ int task_statm(struct mm_struct *, int *, int *, int *, int *);
+@@ -107,7 +113,11 @@ char *task_mem(struct mm_struct *, char 
+ 
+ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+ 						struct proc_dir_entry *parent);
++extern struct proc_dir_entry *create_proc_glob_entry(const char *name,
++						mode_t mode,
++						struct proc_dir_entry *parent);
+ extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
++extern void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent);
+ 
+ extern struct vfsmount *proc_mnt;
+ extern int proc_fill_super(struct super_block *,void *,int);
+@@ -194,6 +204,15 @@ static inline struct proc_dir_entry *pro
+ 	return res;
+ }
+ 
++static inline struct proc_dir_entry *proc_glob_fops_create(const char *name,
++	mode_t mode, struct file_operations *fops)
++{
++	struct proc_dir_entry *res = create_proc_glob_entry(name, mode, NULL);
++	if (res)
++		res->proc_fops = fops;
++	return res;
++}
++
+ static inline void proc_net_remove(const char *name)
+ {
+ 	remove_proc_entry(name,proc_net);
+@@ -206,16 +225,21 @@ static inline void proc_net_remove(const
+ #define proc_bus NULL
+ 
+ #define proc_net_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
++#define proc_glob_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
+ #define proc_net_create(name, mode, info)	({ (void)(mode), NULL; })
+ static inline void proc_net_remove(const char *name) {}
+ 
+-static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; }
+-static inline void proc_pid_flush(struct dentry *proc_dentry) { }
++static inline struct dentry *proc_pid_unhash(struct task_struct *p,
++		struct dentry *d[2]) { return NULL; }
++static inline void proc_pid_flush(struct dentry *proc_dentry[2]) { }
+ 
+ static inline struct proc_dir_entry *create_proc_entry(const char *name,
+ 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
++static inline struct proc_dir_entry *create_proc_glob_entry(const char *name,
++	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+ 
+ #define remove_proc_entry(name, parent) do {} while (0)
++#define remove_proc_glob_entry(name, parent) do {} while (0)
+ 
+ static inline struct proc_dir_entry *proc_symlink(const char *name,
+ 		struct proc_dir_entry *parent,const char *dest) {return NULL;}
+@@ -266,4 +290,18 @@ static inline struct proc_dir_entry *PDE
+ 	return PROC_I(inode)->pde;
+ }
+ 
++static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
++{
++	if (de)
++		atomic_inc(&de->count);
++	return de;
++}
++
++extern void de_put(struct proc_dir_entry *);
++
++#define LPDE(inode)	(PROC_I((inode))->pde)
++#ifdef CONFIG_VE
++#define GPDE(inode)	(*(struct proc_dir_entry **)(&(inode)->i_pipe))
++#endif
++
+ #endif /* _LINUX_PROC_FS_H */
+diff -upr linux-2.6.16.orig/include/linux/quota.h linux-2.6.16-026test015/include/linux/quota.h
+--- linux-2.6.16.orig/include/linux/quota.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/quota.h	2006-07-04 14:41:39.000000000 +0400
+@@ -37,7 +37,6 @@
+ 
+ #include <linux/errno.h>
+ #include <linux/types.h>
+-#include <linux/spinlock.h>
+ 
+ #define __DQUOT_VERSION__	"dquot_6.5.1"
+ #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
+@@ -45,8 +44,6 @@
+ typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+ typedef __u64 qsize_t;          /* Type in which we store sizes */
+ 
+-extern spinlock_t dq_data_lock;
+-
+ /* Size of blocks in which are counted size limits */
+ #define QUOTABLOCK_BITS 10
+ #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+@@ -133,6 +130,10 @@ struct if_dqinfo {
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/spinlock.h>
++
++extern spinlock_t dq_data_lock;
++
+ #include <linux/dqblk_xfs.h>
+ #include <linux/dqblk_v1.h>
+ #include <linux/dqblk_v2.h>
+@@ -242,6 +243,8 @@ struct quota_format_ops {
+ 	int (*release_dqblk)(struct dquot *dquot);	/* Called when last reference to dquot is being dropped */
+ };
+ 
++struct inode;
++struct iattr;
+ /* Operations working with dquots */
+ struct dquot_operations {
+ 	int (*initialize) (struct inode *, int);
+@@ -256,9 +259,11 @@ struct dquot_operations {
+ 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
+ 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
+ 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
++	int (*rename) (struct inode *, struct inode *, struct inode *);
+ };
+ 
+ /* Operations handling requests from userspace */
++struct v2_disk_dqblk;
+ struct quotactl_ops {
+ 	int (*quota_on)(struct super_block *, int, int, char *);
+ 	int (*quota_off)(struct super_block *, int);
+@@ -271,6 +276,9 @@ struct quotactl_ops {
+ 	int (*set_xstate)(struct super_block *, unsigned int, int);
+ 	int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
+ 	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
++#ifdef CONFIG_QUOTA_COMPAT
++	int (*get_quoti)(struct super_block *, int, unsigned int, struct v2_disk_dqblk *);
++#endif
+ };
+ 
+ struct quota_format_type {
+@@ -291,6 +299,10 @@ struct quota_info {
+ 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
+ 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
+ 	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++	struct vz_quota_master *vzdq_master;
++	int vzdq_count;
++#endif
+ };
+ 
+ /* Inline would be better but we need to dereference super_block which is not defined yet */
+diff -upr linux-2.6.16.orig/include/linux/quotaops.h linux-2.6.16-026test015/include/linux/quotaops.h
+--- linux-2.6.16.orig/include/linux/quotaops.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/quotaops.h	2006-07-04 14:41:39.000000000 +0400
+@@ -171,6 +171,19 @@ static __inline__ int DQUOT_TRANSFER(str
+ 	return 0;
+ }
+ 
++static __inline__ int DQUOT_RENAME(struct inode *inode,
++		struct inode *old_dir, struct inode *new_dir)
++{
++	struct dquot_operations *q_op;
++
++	q_op = inode->i_sb->dq_op;
++	if (q_op && q_op->rename) {
++		if (q_op->rename(inode, old_dir, new_dir) == NO_QUOTA)
++			return 1;
++	}
++	return 0;
++}
++
+ /* The following two functions cannot be called inside a transaction */
+ #define DQUOT_SYNC(sb)	sync_dquots(sb, -1)
+ 
+@@ -197,6 +210,7 @@ static __inline__ int DQUOT_OFF(struct s
+ #define DQUOT_SYNC(sb)				do { } while(0)
+ #define DQUOT_OFF(sb)				do { } while(0)
+ #define DQUOT_TRANSFER(inode, iattr)		(0)
++#define DQUOT_RENAME(inode, old_dir, new_dir)	(0)
+ static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+ {
+ 	inode_add_bytes(inode, nr);
+diff -upr linux-2.6.16.orig/include/linux/raid/raid1.h linux-2.6.16-026test015/include/linux/raid/raid1.h
+--- linux-2.6.16.orig/include/linux/raid/raid1.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/raid/raid1.h	2006-07-04 14:41:36.000000000 +0400
+@@ -130,6 +130,6 @@ struct r1bio_s {
+  * with failure when last write completes (and all failed).
+  * Record that bi_end_io was called with this flag...
+  */
+-#define	R1BIO_Returned 4
++#define	R1BIO_Returned 6
+ 
+ #endif
+diff -upr linux-2.6.16.orig/include/linux/reiserfs_xattr.h linux-2.6.16-026test015/include/linux/reiserfs_xattr.h
+--- linux-2.6.16.orig/include/linux/reiserfs_xattr.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/reiserfs_xattr.h	2006-07-04 14:41:37.000000000 +0400
+@@ -42,7 +42,8 @@ int reiserfs_removexattr(struct dentry *
+ int reiserfs_delete_xattrs(struct inode *inode);
+ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
+ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
+-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd);
++int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *);
+ 
+ int reiserfs_xattr_del(struct inode *, const char *);
+ int reiserfs_xattr_get(const struct inode *, const char *, void *, size_t);
+diff -upr linux-2.6.16.orig/include/linux/rmap.h linux-2.6.16-026test015/include/linux/rmap.h
+--- linux-2.6.16.orig/include/linux/rmap.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/rmap.h	2006-07-04 14:41:39.000000000 +0400
+@@ -74,6 +74,7 @@ void page_add_anon_rmap(struct page *, s
+ void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+ void page_add_file_rmap(struct page *);
+ void page_remove_rmap(struct page *);
++struct anon_vma *page_lock_anon_vma(struct page *page);
+ 
+ /**
+  * page_dup_rmap - duplicate pte mapping to a page
+diff -upr linux-2.6.16.orig/include/linux/rtc.h linux-2.6.16-026test015/include/linux/rtc.h
+--- linux-2.6.16.orig/include/linux/rtc.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/rtc.h	2006-07-04 14:41:36.000000000 +0400
+@@ -11,8 +11,6 @@
+ #ifndef _LINUX_RTC_H_
+ #define _LINUX_RTC_H_
+ 
+-#include <linux/interrupt.h>
+-
+ /*
+  * The struct used to pass data via the following ioctl. Similar to the
+  * struct tm in <time.h>, but it needs to be here so that the kernel 
+@@ -95,6 +93,8 @@ struct rtc_pll_info {
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/interrupt.h>
++
+ typedef struct rtc_task {
+ 	void (*func)(void *private_data);
+ 	void *private_data;
+diff -upr linux-2.6.16.orig/include/linux/sched.h linux-2.6.16-026test015/include/linux/sched.h
+--- linux-2.6.16.orig/include/linux/sched.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/sched.h	2006-07-04 14:41:39.000000000 +0400
+@@ -38,7 +38,10 @@
+ 
+ #include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
+ 
++#include <ub/ub_task.h>
++
+ struct exec_domain;
++struct ve_struct;
+ 
+ /*
+  * cloning flags:
+@@ -92,15 +95,34 @@ extern unsigned long avenrun[];		/* Load
+ 	load += n*(FIXED_1-exp); \
+ 	load >>= FSHIFT;
+ 
++#define LOAD_INT(x) ((x) >> FSHIFT)
++#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
++
+ extern unsigned long total_forks;
+ extern int nr_threads;
+ extern int last_pid;
+ DECLARE_PER_CPU(unsigned long, process_counts);
+ extern int nr_processes(void);
++
++extern unsigned long nr_sleeping(void);
++extern unsigned long nr_stopped(void);
++extern unsigned long nr_zombie;
++extern atomic_t nr_dead;
+ extern unsigned long nr_running(void);
+ extern unsigned long nr_uninterruptible(void);
+ extern unsigned long nr_iowait(void);
+ 
++#ifdef CONFIG_VE
++struct ve_struct;
++extern unsigned long nr_running_ve(struct ve_struct *);
++extern unsigned long nr_iowait_ve(struct ve_struct *);
++extern unsigned long nr_uninterruptible_ve(struct ve_struct *);
++#else
++#define nr_running_ve(ve)		0
++#define nr_iowait_ve(ve)		0
++#define nr_uninterruptible_ve(ve)	0
++#endif
++
+ #include <linux/time.h>
+ #include <linux/param.h>
+ #include <linux/resource.h>
+@@ -189,6 +211,8 @@ extern cpumask_t nohz_cpu_mask;
+ 
+ extern void show_state(void);
+ extern void show_regs(struct pt_regs *);
++extern void smp_show_regs(struct pt_regs *, void *);
++extern void show_vsched(void);
+ 
+ /*
+  * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
+@@ -252,31 +276,7 @@ arch_get_unmapped_area_topdown(struct fi
+ extern void arch_unmap_area(struct mm_struct *, unsigned long);
+ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+ 
+-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+-/*
+- * The mm counters are not protected by its page_table_lock,
+- * so must be incremented atomically.
+- */
+-#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
+-#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
+-#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
+-#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
+-#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
+-typedef atomic_long_t mm_counter_t;
+-
+-#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+-/*
+- * The mm counters are protected by its page_table_lock,
+- * so can be incremented directly.
+- */
+-#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
+-#define get_mm_counter(mm, member) ((mm)->_##member)
+-#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
+-#define inc_mm_counter(mm, member) (mm)->_##member++
+-#define dec_mm_counter(mm, member) (mm)->_##member--
+-typedef unsigned long mm_counter_t;
+-
+-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
++#include <linux/mm_counter.h>
+ 
+ #define get_mm_rss(mm)					\
+ 	(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
+@@ -332,6 +332,7 @@ struct mm_struct {
+ 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
+ 
+ 	unsigned dumpable:2;
++	unsigned vps_dumpable:1;
+ 	cpumask_t cpu_vm_mask;
+ 
+ 	/* Architecture-specific MM context */
+@@ -348,6 +349,9 @@ struct mm_struct {
+ 	/* aio bits */
+ 	rwlock_t		ioctx_list_lock;
+ 	struct kioctx		*ioctx_list;
++#ifdef CONFIG_USER_RESOURCE
++	struct user_beancounter *mm_ub;
++#endif
+ };
+ 
+ struct sighand_struct {
+@@ -364,6 +368,9 @@ static inline void sighand_free(struct s
+ 	call_rcu(&sp->rcu, sighand_free_cb);
+ }
+ 
++#include <linux/ve.h>
++#include <linux/ve_task.h>
++
+ /*
+  * NOTE! "signal_struct" does not have it's own
+  * locking, because a shared signal_struct always
+@@ -688,6 +695,8 @@ static inline void prefetch_stack(struct
+ 
+ struct audit_context;		/* See audit.c */
+ struct mempolicy;
++struct vcpu_scheduler;
++struct vcpu_info;
+ 
+ struct task_struct {
+ 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
+@@ -701,6 +710,14 @@ struct task_struct {
+ #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
+ 	int oncpu;
+ #endif
++#ifdef CONFIG_SCHED_VCPU
++	struct vcpu_scheduler *vsched;
++	struct vcpu_info *vcpu;
++
++	/* id's are saved to avoid locking (e.g. on vsched->id access) */
++	int vsched_id;
++	int vcpu_id;
++#endif
+ 	int prio, static_prio;
+ 	struct list_head run_list;
+ 	prio_array_t *array;
+@@ -846,6 +863,11 @@ struct task_struct {
+ 
+ 	unsigned long ptrace_message;
+ 	siginfo_t *last_siginfo; /* For ptrace use.  */
++
++/* state tracking for suspend */
++	__u8	 pn_state;
++	__u8	 stopped_state:1;
++
+ /*
+  * current io wait handle: wait queue entry to use for io waits
+  * If this thread is processing aio, this points at the waitqueue
+@@ -871,6 +893,16 @@ struct task_struct {
+ #endif
+ 	atomic_t fs_excl;	/* holding fs exclusive resources */
+ 	struct rcu_head rcu;
++#ifdef CONFIG_USER_RESOURCE
++	struct task_beancounter task_bc;
++#endif
++#ifdef CONFIG_VE
++	struct ve_task_info ve_task_info;
++#endif
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++	unsigned long	magic;
++	struct inode	*ino;
++#endif
+ };
+ 
+ static inline pid_t process_group(struct task_struct *tsk)
+@@ -929,6 +961,43 @@ static inline void put_task_struct(struc
+ #define PF_RANDOMIZE	0x00800000	/* randomize virtual address space */
+ #define PF_SWAPWRITE	0x01000000	/* Allowed to write to swap */
+ 
++#ifndef CONFIG_VE
++#define set_pn_state(tsk, state)	do { } while(0)
++#define clear_pn_state(tsk)		do { } while(0)
++#define set_stop_state(tsk)		do { } while(0)
++#define clear_stop_state(tsk)		do { } while(0)
++#else
++#define PN_STOP_TF	1	/* was not in 2.6.8 */
++#define PN_STOP_TF_RT	2	/* was not in 2.6.8 */ 
++#define PN_STOP_ENTRY	3
++#define PN_STOP_FORK	4
++#define PN_STOP_VFORK	5
++#define PN_STOP_SIGNAL	6
++#define PN_STOP_EXIT	7
++#define PN_STOP_EXEC	8
++#define PN_STOP_LEAVE	9
++
++static inline void set_pn_state(struct task_struct *tsk, int state)
++{
++	tsk->pn_state = state;
++}
++
++static inline void clear_pn_state(struct task_struct *tsk)
++{
++	tsk->pn_state = 0;
++}
++
++static inline void set_stop_state(struct task_struct *tsk)
++{
++	tsk->stopped_state = 1;
++}
++
++static inline void clear_stop_state(struct task_struct *tsk)
++{
++	tsk->stopped_state = 0;
++}
++#endif
++
+ /*
+  * Only the _current_ task can read/write to tsk->flags, but other
+  * tasks can access tsk->flags in readonly mode for example
+@@ -968,6 +1037,21 @@ static inline int set_cpus_allowed(task_
+ extern unsigned long long sched_clock(void);
+ extern unsigned long long current_sched_time(const task_t *current_task);
+ 
++static inline unsigned long cycles_to_clocks(cycles_t cycles)
++{
++	extern unsigned long cycles_per_clock;
++	do_div(cycles, cycles_per_clock);
++	return cycles;
++}
++
++static inline u64 cycles_to_jiffies(cycles_t cycles)
++{
++	extern unsigned long cycles_per_jiffy;
++	do_div(cycles, cycles_per_jiffy);
++	return cycles;
++}
++
++
+ /* sched_exec is called by processes performing an exec */
+ #ifdef CONFIG_SMP
+ extern void sched_exec(void);
+@@ -1020,12 +1104,237 @@ extern struct task_struct init_task;
+ 
+ extern struct   mm_struct init_mm;
+ 
+-#define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
+-extern struct task_struct *find_task_by_pid_type(int type, int pid);
++#define find_task_by_pid_all(nr)	\
++		find_task_by_pid_type_all(PIDTYPE_PID, nr)
++extern struct task_struct *find_task_by_pid_type_all(int type, int pid);
+ extern void set_special_pids(pid_t session, pid_t pgrp);
+ extern void __set_special_pids(pid_t session, pid_t pgrp);
+ 
++#ifndef CONFIG_VE
++#define find_task_by_pid_ve find_task_by_pid_all
++
++#define get_exec_env()		((struct ve_struct *)NULL)
++#define set_exec_env(new_env)	((struct ve_struct *)NULL)
++
++#define ve_is_super(env)			1
++#define ve_accessible(target, owner)		1
++#define ve_accessible_strict(target, owner)	1
++#define ve_accessible_veid(target, owner)		1
++#define ve_accessible_strict_veid(target, owner)	1
++
++#define VEID(envid)				0
++#define get_ve0() NULL
++
++static inline pid_t virt_pid(struct task_struct *tsk)
++{
++	return tsk->pid;
++}
++
++static inline pid_t virt_tgid(struct task_struct *tsk)
++{
++	return tsk->tgid;
++}
++
++static inline pid_t virt_pgid(struct task_struct *tsk)
++{
++	return tsk->signal->pgrp;
++}
++
++static inline pid_t virt_sid(struct task_struct *tsk)
++{
++	return tsk->signal->session;
++}
++
++#define get_task_pid_ve(tsk, ve)	get_task_pid(tsk)
++
++static inline pid_t get_task_pid(struct task_struct *tsk)
++{
++	return tsk->pid;
++}
++
++static inline pid_t get_task_tgid(struct task_struct *tsk)
++{
++	return tsk->tgid;
++}
++
++static inline pid_t get_task_pgid(struct task_struct *tsk)
++{
++	return tsk->signal->pgrp;
++}
++
++static inline pid_t get_task_sid(struct task_struct *tsk)
++{
++	return tsk->signal->session;
++}
++
++static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline pid_t get_task_ppid(struct task_struct *p)
++{
++	return pid_alive(p) ? p->group_leader->real_parent->tgid : 0;
++}
++
++#else	/* CONFIG_VE */
++
++#include <asm/current.h>
++#include <linux/ve.h>
++
++extern struct ve_struct ve0;
++
++#define find_task_by_pid_ve(nr)	\
++		find_task_by_pid_type_ve(PIDTYPE_PID, nr)
++
++extern struct task_struct *find_task_by_pid_type_ve(int type, int pid);
++
++#define get_ve0()	(&ve0)
++#define VEID(envid)	((envid)->veid)
++
++#define get_exec_env()	(VE_TASK_INFO(current)->exec_env)
++static inline struct ve_struct *set_exec_env(struct ve_struct *new_env)
++{
++	struct ve_struct *old_env;
++
++	old_env = VE_TASK_INFO(current)->exec_env;
++	VE_TASK_INFO(current)->exec_env = new_env;
++
++	return old_env;
++}
++
++#define ve_is_super(env) ((env) == get_ve0())
++#define ve_accessible_strict(target, owner)	((target) == (owner))
++static inline int ve_accessible(struct ve_struct *target,
++				struct ve_struct *owner) {
++	return ve_is_super(owner) || ve_accessible_strict(target, owner);
++}
++
++#define ve_accessible_strict_veid(target, owner) ((target) == (owner))
++static inline int ve_accessible_veid(envid_t target, envid_t owner)
++{
++	return get_ve0()->veid == owner ||
++	       ve_accessible_strict_veid(target, owner);
++}
++
++static inline pid_t virt_pid(struct task_struct *tsk)
++{
++	return tsk->pids[PIDTYPE_PID].vnr;
++}
++
++static inline pid_t virt_tgid(struct task_struct *tsk)
++{
++	return tsk->pids[PIDTYPE_TGID].vnr;
++}
++
++static inline pid_t virt_pgid(struct task_struct *tsk)
++{
++	return tsk->pids[PIDTYPE_PGID].vnr;
++}
++
++static inline pid_t virt_sid(struct task_struct *tsk)
++{
++	return tsk->pids[PIDTYPE_SID].vnr;
++}
++
++static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *env)
++{
++	return ve_is_super(env) ? tsk->pid : virt_pid(tsk);
++}
++
++static inline pid_t get_task_pid(struct task_struct *tsk)
++{
++	return get_task_pid_ve(tsk, get_exec_env());
++}
++
++static inline pid_t get_task_tgid(struct task_struct *tsk)
++{
++	return ve_is_super(get_exec_env()) ? tsk->tgid : virt_tgid(tsk);
++}
++
++static inline pid_t get_task_pgid(struct task_struct *tsk)
++{
++	return ve_is_super(get_exec_env()) ? tsk->signal->pgrp : virt_pgid(tsk);
++}
++
++static inline pid_t get_task_sid(struct task_struct *tsk)
++{
++	return ve_is_super(get_exec_env()) ? tsk->signal->session : virt_sid(tsk);
++}
++
++static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
++{
++	tsk->pids[PIDTYPE_PID].vnr = pid;
++}
++
++static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
++{
++	tsk->pids[PIDTYPE_TGID].vnr = pid;
++}
++
++static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
++{
++	tsk->pids[PIDTYPE_PGID].vnr = pid;
++}
++
++static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
++{
++	tsk->pids[PIDTYPE_SID].vnr = pid;
++}
++
++static inline pid_t get_task_ppid(struct task_struct *p)
++{
++	struct task_struct *parent;
++	struct ve_struct *env;
++
++	if (!pid_alive(p))
++		return 0;
++	env = get_exec_env();
++	if (get_task_pid_ve(p, env) == 1)
++		return 0;
++	parent = p->group_leader->real_parent;
++	return ve_accessible(VE_TASK_INFO(parent)->owner_env, env) ?
++		get_task_tgid(parent) : 1;
++}
++
++void ve_sched_get_cpu_stat(struct ve_struct *envid, cycles_t *idle,
++				cycles_t *strv, unsigned int cpu);
++void ve_sched_attach(struct ve_struct *envid);
++
++#endif	/* CONFIG_VE */
++
++
++#ifdef CONFIG_VE
++extern cycles_t ve_sched_get_idle_time(struct ve_struct *, int);
++extern cycles_t ve_sched_get_iowait_time(struct ve_struct *, int);
++#else
++#define ve_sched_get_idle_time(ve, cpu) 	0
++#define ve_sched_get_iowait_time(ve, cpu)	0
++#endif
++
++#ifdef CONFIG_SCHED_VCPU
++struct vcpu_scheduler;
++extern void fastcall vsched_cpu_online_map(struct vcpu_scheduler *sched,
++		cpumask_t *mask);
++#else
++#define vsched_cpu_online_map(vsched, mask)     do {    \
++			*mask = cpu_online_map;         \
++	} while (0)
++#endif
++
+ /* per-UID process charging. */
++extern int set_user(uid_t new_ruid, int dumpclear);
+ extern struct user_struct * alloc_uid(uid_t);
+ static inline struct user_struct *get_uid(struct user_struct *u)
+ {
+@@ -1043,7 +1352,7 @@ extern int FASTCALL(wake_up_state(struct
+ extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
+ 						unsigned long clone_flags));
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined (CONFIG_SCHED_VCPU)
+  extern void kick_process(struct task_struct *tsk);
+ #else
+  static inline void kick_process(struct task_struct *tsk) { }
+@@ -1161,12 +1470,19 @@ extern task_t *child_reaper;
+ 
+ extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
+ extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
++extern long do_fork_pid(unsigned long clone_flags,
++			unsigned long stack_start,
++			struct pt_regs *regs,
++			unsigned long stack_size,
++			int __user *parent_tidptr,
++			int __user *child_tidptr,
++			long pid0);
+ task_t *fork_idle(int);
+ 
+ extern void set_task_comm(struct task_struct *tsk, char *from);
+ extern void get_task_comm(char *to, struct task_struct *tsk);
+ 
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined (CONFIG_SCHED_VCPU)
+ extern void wait_task_inactive(task_t * p);
+ #else
+ #define wait_task_inactive(p)	do { } while (0)
+@@ -1187,22 +1503,100 @@ extern void wait_task_inactive(task_t * 
+ 	add_parent(p, (p)->parent);				\
+ 	} while (0)
+ 
+-#define next_task(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
+-#define prev_task(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
++#define next_task_all(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
++#define prev_task_all(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
+ 
+-#define for_each_process(p) \
+-	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
++#define for_each_process_all(p) \
++	for (p = &init_task ; (p = next_task_all(p)) != &init_task ; )
+ 
+ /*
+  * Careful: do_each_thread/while_each_thread is a double loop so
+  *          'break' will not work as expected - use goto instead.
+  */
+-#define do_each_thread(g, t) \
+-	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
++#define do_each_thread_all(g, t) \
++	for (g = t = &init_task ; (g = t = next_task_all(g)) != &init_task ; ) do
+ 
+-#define while_each_thread(g, t) \
++#define while_each_thread_all(g, t) \
+ 	while ((t = next_thread(t)) != g)
+ 
++#ifndef CONFIG_VE
++
++#define SET_VE_LINKS(p)
++#define REMOVE_VE_LINKS(p)
++#define for_each_process_ve(p)		for_each_process_all(p)
++#define do_each_thread_ve(g, t)		do_each_thread_all(g, t)
++#define while_each_thread_ve(g, t)	while_each_thread_all(g, t)
++#define first_task_ve()			next_task_ve(&init_task)
++#define __first_task_ve(owner)		next_task_ve(&init_task)
++#define __next_task_ve(owner, p)	next_task_ve(p)
++#define next_task_ve(p)			\
++		(next_task_all(p) != &init_task ? next_task_all(p) : NULL)
++
++#else	/* CONFIG_VE */
++
++#define SET_VE_LINKS(p)							\
++	do {								\
++		if (thread_group_leader(p))				\
++			list_add_tail(&VE_TASK_INFO(p)->vetask_list,	\
++					&VE_TASK_INFO(p)->owner_env->vetask_lh); \
++	} while (0)
++
++#define REMOVE_VE_LINKS(p)						\
++	do {								\
++		if (thread_group_leader(p))				\
++			list_del(&VE_TASK_INFO(p)->vetask_list);	\
++	} while(0)
++
++static inline task_t* __first_task_ve(struct ve_struct *ve)
++{
++	task_t *tsk;
++
++	if (unlikely(ve_is_super(ve))) {
++		tsk = next_task_all(&init_task);
++		if (tsk == &init_task)
++			tsk = NULL;
++	} else {
++		/* probably can return ve->init_entry, but it's more clear */
++		BUG_ON(list_empty(&ve->vetask_lh));
++		tsk = VE_TASK_LIST_2_TASK(ve->vetask_lh.next);
++	}
++	return tsk;
++}
++
++static inline task_t* __next_task_ve(struct ve_struct *ve, task_t *tsk)
++{
++	if (unlikely(ve_is_super(ve))) {
++		tsk = next_task_all(tsk);
++		if (tsk == &init_task)
++			tsk = NULL;
++	} else {
++		struct list_head *tmp;
++
++		BUG_ON(VE_TASK_INFO(tsk)->owner_env != ve);
++		tmp = VE_TASK_INFO(tsk)->vetask_list.next;
++		if (tmp == &ve->vetask_lh)
++			tsk = NULL;
++		else
++			tsk = VE_TASK_LIST_2_TASK(tmp);
++	}
++	return tsk;
++}
++
++#define first_task_ve()	__first_task_ve(get_exec_env())
++#define next_task_ve(p)	__next_task_ve(get_exec_env(), p)
++/* no one uses prev_task_ve(), copy next_task_ve() if needed */
++
++#define for_each_process_ve(p) \
++	for (p = first_task_ve(); p != NULL ; p = next_task_ve(p))
++
++#define do_each_thread_ve(g, t) \
++	for (g = t = first_task_ve() ; g != NULL; g = t = next_task_ve(g)) do
++
++#define while_each_thread_ve(g, t) \
++	while ((t = next_thread(t)) != g)
++
++#endif	/* CONFIG_VE */
++
+ extern task_t * FASTCALL(next_thread(const task_t *p));
+ 
+ #define thread_group_leader(p)	(p->pid == p->tgid)
+@@ -1348,28 +1742,63 @@ extern void signal_wake_up(struct task_s
+  */
+ #ifdef CONFIG_SMP
+ 
+-static inline unsigned int task_cpu(const struct task_struct *p)
++static inline unsigned int task_pcpu(const struct task_struct *p)
+ {
+ 	return task_thread_info(p)->cpu;
+ }
+ 
+-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
++static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
+ {
+ 	task_thread_info(p)->cpu = cpu;
+ }
+ 
+ #else
+ 
++static inline unsigned int task_pcpu(const struct task_struct *p)
++{
++	return 0;
++}
++
++static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
++{
++}
++
++#endif /* CONFIG_SMP */
++
++#ifdef CONFIG_SCHED_VCPU
++
++static inline unsigned int task_vsched_id(const struct task_struct *p)
++{
++	return p->vsched_id;
++}
++
+ static inline unsigned int task_cpu(const struct task_struct *p)
+ {
++	return p->vcpu_id;
++}
++
++extern void set_task_cpu(struct task_struct *p, unsigned int vcpu);
++extern int vcpu_online(int cpu);
++
++#else
++
++static inline unsigned int task_vsched_id(const struct task_struct *p)
++{
+ 	return 0;
+ }
+ 
++static inline unsigned int task_cpu(const struct task_struct *p)
++{
++	return task_pcpu(p);
++}
++
+ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+ {
++	set_task_pcpu(p, cpu);
+ }
+ 
+-#endif /* CONFIG_SMP */
++#define vcpu_online(cpu)	cpu_online(cpu)
++#endif /* CONFIG_SCHED_VCPU */
+ 
+ #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+ extern void arch_pick_mmap_layout(struct mm_struct *mm);
+@@ -1401,7 +1830,7 @@ static inline int frozen(struct task_str
+  */
+ static inline int freezing(struct task_struct *p)
+ {
+-	return p->flags & PF_FREEZE;
++	return test_tsk_thread_flag(p, TIF_FREEZE);
+ }
+ 
+ /*
+@@ -1410,7 +1839,7 @@ static inline int freezing(struct task_s
+  */
+ static inline void freeze(struct task_struct *p)
+ {
+-	p->flags |= PF_FREEZE;
++	set_tsk_thread_flag(p, TIF_FREEZE);
+ }
+ 
+ /*
+@@ -1431,7 +1860,8 @@ static inline int thaw_process(struct ta
+  */
+ static inline void frozen_process(struct task_struct *p)
+ {
+-	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
++	clear_tsk_thread_flag(p, TIF_FREEZE);
++	p->flags |= PF_FROZEN;
+ }
+ 
+ extern void refrigerator(void);
+diff -upr linux-2.6.16.orig/include/linux/sem.h linux-2.6.16-026test015/include/linux/sem.h
+--- linux-2.6.16.orig/include/linux/sem.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/sem.h	2006-07-04 14:41:39.000000000 +0400
+@@ -155,6 +155,9 @@ static inline void exit_sem(struct task_
+ }
+ #endif
+ 
++int sysvipc_walk_sem(int (*func)(int, struct sem_array*, void *), void *arg);
++int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg);
++
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _LINUX_SEM_H */
+diff -upr linux-2.6.16.orig/include/linux/shm.h linux-2.6.16-026test015/include/linux/shm.h
+--- linux-2.6.16.orig/include/linux/shm.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/shm.h	2006-07-04 14:41:39.000000000 +0400
+@@ -86,6 +86,7 @@ struct shmid_kernel /* private to the ke
+ 	pid_t			shm_cprid;
+ 	pid_t			shm_lprid;
+ 	struct user_struct	*mlock_user;
++	struct ipc_ids		*_shm_ids;
+ };
+ 
+ /* shm_mode upper byte flags */
+@@ -104,6 +105,9 @@ static inline long do_shmat(int shmid, c
+ }
+ #endif
+ 
++int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg);
++struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg);
++
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _LINUX_SHM_H_ */
+diff -upr linux-2.6.16.orig/include/linux/shmem_fs.h linux-2.6.16-026test015/include/linux/shmem_fs.h
+--- linux-2.6.16.orig/include/linux/shmem_fs.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/shmem_fs.h	2006-07-04 14:41:37.000000000 +0400
+@@ -19,6 +19,9 @@ struct shmem_inode_info {
+ 	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
+ 	struct list_head	swaplist;	/* chain of maybes on swap */
+ 	struct inode		vfs_inode;
++#ifdef CONFIG_USER_RESOURCE
++	struct user_beancounter	*shmi_ub;
++#endif
+ };
+ 
+ struct shmem_sb_info {
+diff -upr linux-2.6.16.orig/include/linux/signal.h linux-2.6.16-026test015/include/linux/signal.h
+--- linux-2.6.16.orig/include/linux/signal.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/signal.h	2006-07-04 14:41:39.000000000 +0400
+@@ -3,6 +3,7 @@
+ 
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
++#include <linux/slab.h>
+ #include <asm/signal.h>
+ #include <asm/siginfo.h>
+ 
+@@ -41,6 +42,9 @@ struct sigqueue {
+ 	int flags;
+ 	siginfo_t info;
+ 	struct user_struct *user;
++#ifdef CONFIG_USER_RESOURCE
++	struct user_beancounter *sig_ub;
++#endif
+ };
+ 
+ /* flags values. */
+@@ -263,6 +267,8 @@ extern int sigprocmask(int, sigset_t *, 
+ struct pt_regs;
+ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
+ 
++extern kmem_cache_t *sigqueue_cachep;
++
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _LINUX_SIGNAL_H */
+diff -upr linux-2.6.16.orig/include/linux/skbuff.h linux-2.6.16-026test015/include/linux/skbuff.h
+--- linux-2.6.16.orig/include/linux/skbuff.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/skbuff.h	2006-07-04 14:41:38.000000000 +0400
+@@ -19,6 +19,7 @@
+ #include <linux/compiler.h>
+ #include <linux/time.h>
+ #include <linux/cache.h>
++#include <linux/ve_owner.h>
+ 
+ #include <asm/atomic.h>
+ #include <asm/types.h>
+@@ -211,6 +212,8 @@ enum {
+  *	@tc_verd: traffic control verdict
+  */
+ 
++#include <ub/ub_sk.h>
++
+ struct sk_buff {
+ 	/* These two members must be first. */
+ 	struct sk_buff		*next;
+@@ -294,13 +297,18 @@ struct sk_buff {
+ 				*data,
+ 				*tail,
+ 				*end;
++	struct skb_beancounter	skb_bc;
++	struct ve_struct	*owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(SKB, struct sk_buff, owner_env)
++
+ #ifdef __KERNEL__
+ /*
+  *	Handling routines are only of interest to the kernel
+  */
+ #include <linux/slab.h>
++#include <ub/ub_net.h>
+ 
+ #include <asm/system.h>
+ 
+@@ -1007,6 +1015,8 @@ static inline int pskb_trim(struct sk_bu
+  */
+ static inline void skb_orphan(struct sk_buff *skb)
+ {
++	ub_skb_uncharge(skb);
++
+ 	if (skb->destructor)
+ 		skb->destructor(skb);
+ 	skb->destructor = NULL;
+diff -upr linux-2.6.16.orig/include/linux/slab.h linux-2.6.16-026test015/include/linux/slab.h
+--- linux-2.6.16.orig/include/linux/slab.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/slab.h	2006-07-04 14:41:37.000000000 +0400
+@@ -48,6 +48,26 @@ typedef struct kmem_cache kmem_cache_t;
+ #define SLAB_PANIC		0x00040000UL	/* panic if kmem_cache_create() fails */
+ #define SLAB_DESTROY_BY_RCU	0x00080000UL	/* defer freeing pages to RCU */
+ 
++/*
++ * allocation rules:                            __GFP_UBC       0
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ *  cache (SLAB_UBC)				charge		charge
++ *				      (usual caches: mm, vma, task_struct, ...)
++ *
++ *  cache (SLAB_UBC | SLAB_NO_CHARGE)		charge		---
++ *					     (ub_kmalloc)    (kmalloc)
++ *
++ *  cache (no UB flags)				BUG()		---
++ *							(nonub caches, mempools)
++ *
++ *  pages					charge		---
++ *					   (ub_vmalloc,	      (vmalloc,
++ *				        poll, fdsets, ...)  non-ub allocs)
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ */
++#define SLAB_UBC		0x20000000UL	/* alloc space for ubs ... */
++#define SLAB_NO_CHARGE		0x40000000UL	/* ... but don't charge */
++
+ /* flags passed to a constructor func */
+ #define	SLAB_CTOR_CONSTRUCTOR	0x001UL		/* if not set, then deconstructor */
+ #define SLAB_CTOR_ATOMIC	0x002UL		/* tell constructor it can't sleep */
+@@ -108,6 +128,8 @@ found:
+ 	return __kmalloc(size, flags);
+ }
+ 
++#define ub_kmalloc(size, flags) kmalloc(size, ((flags) | __GFP_UBC))
++
+ extern void *kzalloc(size_t, gfp_t);
+ 
+ /**
+diff -upr linux-2.6.16.orig/include/linux/smp.h linux-2.6.16-026test015/include/linux/smp.h
+--- linux-2.6.16.orig/include/linux/smp.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/smp.h	2006-07-04 14:41:37.000000000 +0400
+@@ -10,6 +10,9 @@
+ 
+ extern void cpu_idle(void);
+ 
++struct pt_regs;
++typedef void (*smp_nmi_function)(struct pt_regs *regs, void *info);
++
+ #ifdef CONFIG_SMP
+ 
+ #include <linux/preempt.h>
+@@ -49,6 +52,8 @@ extern int __cpu_up(unsigned int cpunum)
+  */
+ extern void smp_cpus_done(unsigned int max_cpus);
+ 
++extern int smp_nmi_call_function(smp_nmi_function func, void *info, int wait);
++
+ /*
+  * Call a function on all other processors
+  */
+@@ -99,6 +104,12 @@ static inline void smp_send_reschedule(i
+ #define num_booting_cpus()			1
+ #define smp_prepare_boot_cpu()			do {} while (0)
+ 
++static inline int smp_nmi_call_function(smp_nmi_function func,
++					 void *info, int wait)
++{
++	return 0;
++}
++
+ #endif /* !SMP */
+ 
+ /*
+diff -upr linux-2.6.16.orig/include/linux/socket.h linux-2.6.16-026test015/include/linux/socket.h
+--- linux-2.6.16.orig/include/linux/socket.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/socket.h	2006-07-04 14:41:38.000000000 +0400
+@@ -300,6 +300,7 @@ extern int memcpy_toiovec(struct iovec *
+ extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
+ extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
+ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
++extern int vz_security_proto_check(int family, int type, int protocol);
+ 
+ #endif
+ #endif /* not kernel and not glibc */
+diff -upr linux-2.6.16.orig/include/linux/swap.h linux-2.6.16-026test015/include/linux/swap.h
+--- linux-2.6.16.orig/include/linux/swap.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/swap.h	2006-07-04 14:41:37.000000000 +0400
+@@ -80,6 +80,7 @@ struct address_space;
+ struct sysinfo;
+ struct writeback_control;
+ struct zone;
++struct user_beancounter;
+ 
+ /*
+  * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
+@@ -119,6 +120,7 @@ enum {
+ /*
+  * The in-memory structure used to track swap areas.
+  */
++struct user_beancounter;
+ struct swap_info_struct {
+ 	unsigned int flags;
+ 	int prio;			/* swap priority */
+@@ -136,6 +138,9 @@ struct swap_info_struct {
+ 	unsigned int max;
+ 	unsigned int inuse_pages;
+ 	int next;			/* next entry on swap list */
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++	struct user_beancounter **swap_ubs;
++#endif
+ };
+ 
+ struct swap_list_t {
+@@ -240,7 +245,7 @@ extern long total_swap_pages;
+ extern unsigned int nr_swapfiles;
+ extern struct swap_info_struct swap_info[];
+ extern void si_swapinfo(struct sysinfo *);
+-extern swp_entry_t get_swap_page(void);
++extern swp_entry_t get_swap_page(struct user_beancounter *);
+ extern swp_entry_t get_swap_page_of_type(int type);
+ extern int swap_duplicate(swp_entry_t);
+ extern int valid_swaphandles(swp_entry_t, unsigned long *);
+@@ -253,7 +258,9 @@ extern int remove_exclusive_swap_page(st
+ struct backing_dev_info;
+ 
+ extern spinlock_t swap_lock;
+-extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page);
++struct page_beancounter;
++extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page,
++		struct page_beancounter **pb);
+ 
+ /* linux/mm/thrash.c */
+ extern struct mm_struct * swap_token_mm;
+@@ -310,7 +317,7 @@ static inline int remove_exclusive_swap_
+ 	return 0;
+ }
+ 
+-static inline swp_entry_t get_swap_page(void)
++static inline swp_entry_t get_swap_page(struct user_beancounter *ub)
+ {
+ 	swp_entry_t entry;
+ 	entry.val = 0;
+diff -upr linux-2.6.16.orig/include/linux/sysctl.h linux-2.6.16-026test015/include/linux/sysctl.h
+--- linux-2.6.16.orig/include/linux/sysctl.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/sysctl.h	2006-07-04 14:41:39.000000000 +0400
+@@ -148,6 +148,13 @@ enum
+ 	KERN_SPIN_RETRY=70,	/* int: number of spinlock retries */
+ 	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
+ 	KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
++	KERN_SILENCE_LEVEL=200, /* int: Console silence loglevel */
++	KERN_ALLOC_FAIL_WARN=201, /* int: whether we'll print "alloc failure" */
++	KERN_VIRT_PIDS=202,	/* int: VE pids virtualization */
++	KERN_VIRT_OSRELEASE=205,/* virtualization of utsname.release */
++	KERN_FAIRSCHED_MAX_LATENCY=201, /* int: Max start_tag delta */
++	KERN_VCPU_SCHED_TIMESLICE=202,
++	KERN_VCPU_TIMESLICE=203,
+ };
+ 
+ 
+@@ -397,10 +404,12 @@ enum
+ 	NET_TCP_CONG_CONTROL=110,
+ 	NET_TCP_ABC=111,
+ 	NET_IPV4_IPFRAG_MAX_DIST=112,
++	NET_TCP_USE_SG=245,
+ };
+ 
+ enum {
+ 	NET_IPV4_ROUTE_FLUSH=1,
++	NET_IPV4_ROUTE_SRC_CHECK=188,
+ 	NET_IPV4_ROUTE_MIN_DELAY=2,
+ 	NET_IPV4_ROUTE_MAX_DELAY=3,
+ 	NET_IPV4_ROUTE_GC_THRESH=4,
+@@ -760,6 +769,12 @@ enum
+ 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
+ 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
+ 	FS_INOTIFY=20,	/* inotify submenu */
++ 	FS_AT_VSYSCALL=21,	/* int: to announce vsyscall data */
++};
++
++/* /proc/sys/debug */
++enum {
++	DBG_DECODE_CALLTRACES = 1,	/* int: decode call traces on oops */
+ };
+ 
+ /* /proc/sys/fs/quota/ */
+@@ -900,6 +915,8 @@ extern int proc_doulongvec_minmax(ctl_ta
+ 				  void __user *, size_t *, loff_t *);
+ extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
+ 				      struct file *, void __user *, size_t *, loff_t *);
++extern int proc_doutsstring(ctl_table *table, int write, struct file *,
++			    void __user *, size_t *, loff_t *);
+ 
+ extern int do_sysctl (int __user *name, int nlen,
+ 		      void __user *oldval, size_t __user *oldlenp,
+@@ -954,6 +971,8 @@ extern ctl_handler sysctl_ms_jiffies;
+  */
+ 
+ /* A sysctl table is an array of struct ctl_table: */
++struct ve_struct;
++
+ struct ctl_table 
+ {
+ 	int ctl_name;			/* Binary ID */
+@@ -967,6 +986,7 @@ struct ctl_table 
+ 	struct proc_dir_entry *de;	/* /proc control block */
+ 	void *extra1;
+ 	void *extra2;
++	struct ve_struct *owner_env;
+ };
+ 
+ /* struct ctl_table_header is used to maintain dynamic lists of
+@@ -983,6 +1003,9 @@ struct ctl_table_header * register_sysct
+ 						int insert_at_head);
+ void unregister_sysctl_table(struct ctl_table_header * table);
+ 
++ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr);
++void free_sysctl_clone(ctl_table *clone);
++
+ #else /* __KERNEL__ */
+ 
+ #endif /* __KERNEL__ */
+diff -upr linux-2.6.16.orig/include/linux/tty.h linux-2.6.16-026test015/include/linux/tty.h
+--- linux-2.6.16.orig/include/linux/tty.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/tty.h	2006-07-04 14:41:38.000000000 +0400
+@@ -238,8 +238,11 @@ struct tty_struct {
+ 	spinlock_t read_lock;
+ 	/* If the tty has a pending do_SAK, queue it here - akpm */
+ 	struct work_struct SAK_work;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(TTY, struct tty_struct, owner_env)
++
+ /* tty magic number */
+ #define TTY_MAGIC		0x5401
+ 
+@@ -266,6 +269,7 @@ struct tty_struct {
+ #define TTY_PTY_LOCK 		16	/* pty private */
+ #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
+ #define TTY_HUPPED 		18	/* Post driver->hangup() */
++#define TTY_CHARGED		19	/* Charged as ub resource */
+ 
+ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
+ 
+diff -upr linux-2.6.16.orig/include/linux/tty_driver.h linux-2.6.16-026test015/include/linux/tty_driver.h
+--- linux-2.6.16.orig/include/linux/tty_driver.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/tty_driver.h	2006-07-04 14:41:38.000000000 +0400
+@@ -115,6 +115,7 @@
+  * 	character to the device.
+  */
+ 
++#include <linux/ve_owner.h>
+ #include <linux/fs.h>
+ #include <linux/list.h>
+ #include <linux/cdev.h>
+@@ -214,9 +215,18 @@ struct tty_driver {
+ 			unsigned int set, unsigned int clear);
+ 
+ 	struct list_head tty_drivers;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(TTYDRV, struct tty_driver, owner_env)
++
++#ifdef CONFIG_LEGACY_PTYS
++extern struct tty_driver *pty_driver;
++extern struct tty_driver *pty_slave_driver;
++#endif
++
+ extern struct list_head tty_drivers;
++extern rwlock_t tty_driver_guard;
+ 
+ struct tty_driver *alloc_tty_driver(int lines);
+ void put_tty_driver(struct tty_driver *driver);
+diff -upr linux-2.6.16.orig/include/linux/ve.h linux-2.6.16-026test015/include/linux/ve.h
+--- linux-2.6.16.orig/include/linux/ve.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/ve.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,337 @@
++/*
++ *  include/linux/ve.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VE_H
++#define _LINUX_VE_H
++
++#include <linux/config.h>
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++#include <linux/types.h>
++#include <linux/capability.h>
++#include <linux/utsname.h>
++#include <linux/sysctl.h>
++#include <linux/vzstat.h>
++#include <linux/kobject.h>
++
++#ifdef VZMON_DEBUG
++#  define VZTRACE(fmt,args...) \
++	printk(KERN_DEBUG fmt, ##args)
++#else
++#  define VZTRACE(fmt,args...)
++#endif /* VZMON_DEBUG */
++
++struct tty_driver;
++struct devpts_config;
++struct task_struct;
++struct new_utsname;
++struct file_system_type;
++struct icmp_mib;
++struct ip_mib;
++struct tcp_mib;
++struct udp_mib;
++struct linux_mib;
++struct fib_info;
++struct fib_rule;
++struct veip_struct;
++struct ve_monitor;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++struct fib_table;
++struct devcnfv4_struct;
++#ifdef CONFIG_VE_IPTABLES
++struct xt_af;
++struct xt_table;
++struct xt_target;
++struct ip_conntrack;
++typedef unsigned int (*ip_nat_helper_func)(void);
++struct ve_ip_conntrack {
++	struct list_head 	*_ip_conntrack_hash;
++	struct list_head	_ip_conntrack_expect_list;
++	struct list_head	_ip_conntrack_unconfirmed;
++	struct ip_conntrack_protocol ** _ip_ct_protos;
++	struct list_head	_ip_conntrack_helpers;
++	int 			_ip_conntrack_max;
++	int			_ip_conntrack_vmalloc;
++	atomic_t		_ip_conntrack_count;
++	void (*_ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
++#ifdef CONFIG_SYSCTL
++	unsigned long		_ip_ct_tcp_timeouts[10];
++	unsigned long		_ip_ct_udp_timeout;
++	unsigned long		_ip_ct_udp_timeout_stream;
++	unsigned long		_ip_ct_icmp_timeout;
++	unsigned long		_ip_ct_generic_timeout;
++	unsigned int		_ip_ct_log_invalid;
++	unsigned long		_ip_ct_tcp_timeout_max_retrans;
++	int			_ip_ct_tcp_loose;
++	int			_ip_ct_tcp_be_liberal;
++	int			_ip_ct_tcp_max_retrans;
++	struct ctl_table_header *_ip_ct_sysctl_header;
++	ctl_table		*_ip_ct_net_table;
++	ctl_table		*_ip_ct_ipv4_table;
++	ctl_table		*_ip_ct_netfilter_table;
++	ctl_table		*_ip_ct_sysctl_table;
++#endif /*CONFIG_SYSCTL*/
++
++	struct ip_nat_protocol	**_ip_nat_protos;
++	ip_nat_helper_func	_ip_nat_ftp_hook;
++	ip_nat_helper_func	_ip_nat_irc_hook;
++	struct list_head	*_ip_nat_bysource;
++	struct xt_table		*_ip_nat_table;
++
++	/* resource accounting */
++	struct user_beancounter *ub;
++};
++#endif
++#endif
++
++#define UIDHASH_BITS_VE		6
++#define UIDHASH_SZ_VE		(1 << UIDHASH_BITS_VE)
++
++struct ve_cpu_stats {
++	cycles_t	idle_time;
++	cycles_t	iowait_time;
++	cycles_t	strt_idle_time;
++	cycles_t	used_time;
++	seqcount_t	stat_lock;
++	int		nr_running;
++	int		nr_unint;
++	int		nr_iowait;
++	cputime64_t	user;
++	cputime64_t	nice;
++	cputime64_t	system;
++} ____cacheline_aligned;
++
++struct ve_struct {
++	struct ve_struct	*prev;
++	struct ve_struct	*next;
++
++	envid_t			veid;
++	struct task_struct	*init_entry;
++	struct list_head	vetask_lh;
++	kernel_cap_t		cap_default;
++	atomic_t		pcounter;
++	/* ref counter to ve from ipc */
++	atomic_t		counter;	
++	unsigned int		class_id;
++	struct veip_struct	*veip;
++	struct rw_semaphore	op_sem;
++	int			is_running;
++	int			is_locked;
++	int			virt_pids;
++	/* see vzcalluser.h for VE_FEATURE_XXX definitions */
++	__u64			features;
++
++/* VE's root */
++	struct vfsmount 	*fs_rootmnt;
++	struct dentry 		*fs_root;
++
++/* sysctl */
++	struct new_utsname	*utsname;
++	struct list_head	sysctl_lh;
++	struct ctl_table_header	*kern_header;
++	struct ctl_table	*kern_table;
++	struct ctl_table_header	*quota_header;
++	struct ctl_table	*quota_table;
++	struct file_system_type *proc_fstype;
++	struct vfsmount		*proc_mnt;
++	struct proc_dir_entry	*proc_root;
++	struct proc_dir_entry	*proc_sys_root;
++	struct proc_dir_entry	*_proc_net;
++	struct proc_dir_entry	*_proc_net_stat;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct proc_dir_entry	*_proc_net_devsnmp6;
++#endif
++
++/* SYSV IPC */
++	struct ipc_ids		*_shm_ids;
++	struct ipc_ids		*_msg_ids;
++	struct ipc_ids		*_sem_ids;
++	int			_used_sems;
++	int			_shm_tot;
++	size_t			_shm_ctlmax;
++	size_t			_shm_ctlall;
++	int			_shm_ctlmni;
++	int			_msg_ctlmax;
++	int			_msg_ctlmni;
++	int			_msg_ctlmnb;
++	int			_sem_ctls[4];
++
++/* BSD pty's */
++	struct tty_driver       *pty_driver;
++	struct tty_driver       *pty_slave_driver;
++
++#ifdef CONFIG_UNIX98_PTYS
++	struct tty_driver	*ptm_driver;
++	struct tty_driver	*pts_driver;
++	struct idr		*allocated_ptys;
++	struct file_system_type *devpts_fstype;
++	struct vfsmount		*devpts_mnt;
++	struct dentry		*devpts_root;
++	struct devpts_config	*devpts_config;
++#endif
++
++	struct file_system_type *shmem_fstype;
++	struct vfsmount		*shmem_mnt;
++#ifdef CONFIG_SYSFS
++	struct file_system_type *sysfs_fstype;
++	struct vfsmount		*sysfs_mnt;
++	struct super_block	*sysfs_sb;
++	struct sysfs_dirent	*sysfs_root;
++#endif
++	struct subsystem	*class_subsys;
++	struct subsystem	*class_obj_subsys;
++	struct class		*net_class;
++
++/* User uids hash */
++	struct list_head	uidhash_table[UIDHASH_SZ_VE];
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	struct hlist_head	_net_dev_head;
++	struct hlist_head	_net_dev_index_head;
++	struct net_device	*_net_dev_base, **_net_dev_tail;
++	int			ifindex;
++	struct net_device	*_loopback_dev;
++	struct net_device	*_venet_dev;
++	struct ipv4_devconf	*_ipv4_devconf;
++	struct ipv4_devconf	*_ipv4_devconf_dflt;
++	struct ctl_table_header	*forward_header;
++	struct ctl_table	*forward_table;
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct ipv6_devconf	*_ipv6_devconf;
++	struct ipv6_devconf	*_ipv6_devconf_dflt;
++#endif
++#endif
++ 	unsigned long		rt_flush_required;
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct neigh_table	*ve_nd_tbl;
++#endif
++	struct neigh_table	*ve_arp_tbl;
++
++/* per VE CPU stats*/
++	struct timespec		start_timespec;
++	u64			start_jiffies;
++	cycles_t 		start_cycles;
++	unsigned long		avenrun[3];	/* loadavg data */
++
++	cycles_t 		cpu_used_ve;
++	struct kstat_lat_pcpu_struct	sched_lat_ve;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	struct hlist_head	*_fib_info_hash;
++	struct hlist_head	*_fib_info_laddrhash;
++	int			_fib_hash_size;
++	int			_fib_info_cnt;
++
++	struct fib_rule		*_local_rule;
++	struct fib_rule		*_fib_rules;
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++	/* XXX: why a magic constant? */
++	struct fib_table 	*_fib_tables[256]; /* RT_TABLE_MAX - for now */
++#else
++	struct fib_table	*_main_table;
++	struct fib_table	*_local_table;
++#endif
++	struct icmp_mib		*_icmp_statistics[2];
++	struct ipstats_mib	*_ip_statistics[2];
++	struct tcp_mib		*_tcp_statistics[2];
++	struct udp_mib		*_udp_statistics[2];
++	struct linux_mib	*_net_statistics[2];
++	struct venet_stat       *stat;
++#ifdef CONFIG_VE_IPTABLES
++/* core/netfilter.c virtualization */
++	void			*_nf_hooks;
++	struct xt_table		*_ve_ipt_filter_pf; /* packet_filter struct */
++	struct xt_table		*_ve_ip6t_filter_pf;
++	struct xt_table		*_ipt_mangle_table;
++	struct xt_table		*_ip6t_mangle_table;
++	struct xt_af		*_xt;
++	struct xt_target 	*_ipt_standard_target;
++	struct xt_target 	*_ip6t_standard_target;
++
++	__u64			_iptables_modules;
++	struct ve_ip_conntrack	*_ip_conntrack;
++#endif /* CONFIG_VE_IPTABLES */
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct fib6_table	*_fib6_table;
++	struct ipstats_mib	*_ipv6_statistics[2];
++	struct icmpv6_mib	*_icmpv6_statistics[2];
++	struct udp_mib		*_udp_stats_in6[2];
++#endif
++#endif
++	wait_queue_head_t	*_log_wait;
++	unsigned long		*_log_start;
++	unsigned long		*_log_end;
++	unsigned long		*_logged_chars;
++	char			*log_buf;
++#define VE_DEFAULT_LOG_BUF_LEN	4096
++
++	struct ve_cpu_stats 	ve_cpu_stats[NR_CPUS] ____cacheline_aligned;
++	unsigned long		down_at;
++	struct list_head	cleanup_list;
++ 
++ 	unsigned long		jiffies_fixup;
++ 	unsigned char		disable_net;
++ 	unsigned char		sparse_vpid;
++	struct ve_monitor	*monitor;
++	struct proc_dir_entry	*monitor_proc;
++	unsigned long		meminfo_val;
++};
++
++#define VE_CPU_STATS(ve, cpu) (&((ve)->ve_cpu_stats[(cpu)]))
++
++extern int nr_ve;
++
++#ifdef CONFIG_VE
++
++int get_device_perms_ve(int dev_type, dev_t dev, int access_mode);
++void do_env_cleanup(struct ve_struct *envid);
++void do_update_load_avg_ve(void);
++void do_env_free(struct ve_struct *ptr);
++
++#define ve_utsname (*get_exec_env()->utsname)
++
++static inline struct ve_struct *get_ve(struct ve_struct *ptr)
++{
++	if (ptr != NULL)
++		atomic_inc(&ptr->counter);
++	return ptr;
++}
++
++static inline void put_ve(struct ve_struct *ptr)
++{
++	if (ptr && atomic_dec_and_test(&ptr->counter)) {
++		if (atomic_read(&ptr->pcounter) > 0)
++			BUG();
++		if (ptr->is_running)
++			BUG();
++		do_env_free(ptr);
++	}
++}
++
++#ifdef CONFIG_FAIRSCHED
++#define ve_cpu_online_map(ve, mask) fairsched_cpu_online_map(ve->veid, mask)
++#else
++#define ve_cpu_online_map(ve, mask) do { *(mask) = cpu_online_map; } while (0)
++#endif
++#else	/* CONFIG_VE */
++#define ve_utsname	system_utsname
++#define get_ve(ve)	(NULL)
++#define put_ve(ve)	do { } while (0)
++#endif	/* CONFIG_VE */
++
++#endif /* _LINUX_VE_H */
+diff -upr linux-2.6.16.orig/include/linux/ve_owner.h linux-2.6.16-026test015/include/linux/ve_owner.h
+--- linux-2.6.16.orig/include/linux/ve_owner.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/ve_owner.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,32 @@
++/*
++ *  include/linux/ve_owner.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_OWNER_H__
++#define __VE_OWNER_H__
++
++#include <linux/config.h>
++#include <linux/vmalloc.h>
++
++
++#define DCL_VE_OWNER(name, type, member)
++	/* prototype declares static inline functions */
++
++#define DCL_VE_OWNER_PROTO(name, type, member)				\
++type;									\
++static inline struct ve_struct *VE_OWNER_##name(const type *obj)	\
++{									\
++	return obj->member;						\
++}									\
++static inline void SET_VE_OWNER_##name(type *obj, struct ve_struct *ve)	\
++{									\
++	obj->member = ve;						\
++}
++
++#endif /* __VE_OWNER_H__ */
+diff -upr linux-2.6.16.orig/include/linux/ve_proto.h linux-2.6.16-026test015/include/linux/ve_proto.h
+--- linux-2.6.16.orig/include/linux/ve_proto.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/ve_proto.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,75 @@
++/*
++ *  include/linux/ve_proto.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_H__
++#define __VE_H__
++
++#ifdef CONFIG_VE
++
++extern struct semaphore ve_call_guard;
++extern rwlock_t ve_call_lock;
++
++#ifdef CONFIG_SYSVIPC
++extern void prepare_ipc(void);
++extern int init_ve_ipc(struct ve_struct *);
++extern void fini_ve_ipc(struct ve_struct *);
++extern void ve_ipc_cleanup(void);
++#endif
++
++#ifdef CONFIG_UNIX98_PTYS
++extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
++extern struct tty_driver *pts_driver;	/* Unix98 pty slaves;  for /dev/ptmx */
++#endif
++
++extern rwlock_t  tty_driver_guard;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++void ip_fragment_cleanup(struct ve_struct *envid);
++void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
++struct fib_table * fib_hash_init(int id);
++int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr);
++extern int main_loopback_init(struct net_device*);
++int venet_init(void);
++#endif
++
++extern struct ve_struct *ve_list_head;
++extern rwlock_t ve_list_guard;
++extern struct ve_struct *get_ve_by_id(envid_t);
++extern struct ve_struct *__find_ve_by_id(envid_t);
++
++struct env_create_param2;
++extern int real_env_create(envid_t veid, unsigned flags, u32 class_id,
++			   struct env_create_param2 *data, int datalen);
++
++extern int do_setdevperms(envid_t veid, unsigned type,
++		dev_t dev, unsigned mask);
++
++#define VE_HOOK_INIT	0
++#define VE_HOOK_FINI	1
++#define VE_MAX_HOOKS	2
++
++typedef int ve_hookfn(unsigned int hooknum, void *data);
++
++struct ve_hook
++{
++	struct list_head list;
++	ve_hookfn *hook;
++	ve_hookfn *undo;
++	struct module *owner;
++	int hooknum;
++	/* Functions are called in ascending priority. */
++	int priority;
++};
++
++extern int ve_hook_register(struct ve_hook *vh);
++extern void ve_hook_unregister(struct ve_hook *vh);
++
++#endif
++#endif
+diff -upr linux-2.6.16.orig/include/linux/ve_task.h linux-2.6.16-026test015/include/linux/ve_task.h
+--- linux-2.6.16.orig/include/linux/ve_task.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/ve_task.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,34 @@
++/*
++ *  include/linux/ve_task.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_TASK_H__
++#define __VE_TASK_H__
++
++#include <linux/seqlock.h>
++
++struct ve_task_info {
++/* virtualization */
++	struct ve_struct *owner_env;
++	struct ve_struct *exec_env;
++	struct list_head vetask_list;
++	struct dentry *glob_proc_dentry;
++/* statistics: scheduling latency */
++	cycles_t sleep_time;
++	cycles_t sched_time;
++	cycles_t sleep_stamp;
++	cycles_t wakeup_stamp;
++	seqcount_t wakeup_lock;
++};
++
++#define VE_TASK_INFO(task)	(&(task)->ve_task_info)
++#define VE_TASK_LIST_2_TASK(lh)	\
++	list_entry(lh, struct task_struct, ve_task_info.vetask_list)
++
++#endif /* __VE_TASK_H__ */
+diff -upr linux-2.6.16.orig/include/linux/venet.h linux-2.6.16-026test015/include/linux/venet.h
+--- linux-2.6.16.orig/include/linux/venet.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/venet.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,70 @@
++/*
++ *  include/linux/venet.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VENET_H
++#define _VENET_H
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/vzcalluser.h>
++
++#define VEIP_HASH_SZ 512
++
++struct ve_struct;
++struct venet_stat;
++struct ip_entry_struct
++{
++	__u32			key[4];
++	int			family;
++	struct ve_struct	*active_env;
++	struct venet_stat	*stat;
++	struct veip_struct	*veip;
++	struct list_head 	ip_hash;
++	struct list_head 	ve_list;
++};
++
++struct veip_struct
++{
++	struct list_head	src_lh;
++	struct list_head	dst_lh;
++	struct list_head	ip_lh;
++	struct list_head	list;
++	envid_t			veid;
++};
++
++/* veip_hash_lock should be taken for write by caller */
++void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip);
++/* veip_hash_lock should be taken for write by caller */
++void ip_entry_unhash(struct ip_entry_struct *entry);
++/* veip_hash_lock should be taken for read by caller */
++struct ip_entry_struct *ip_entry_lookup(u32 addr);
++struct ip_entry_struct *venet_entry_lookup(u32 *addr, int family);
++
++/* veip_hash_lock should be taken for read by caller */
++struct veip_struct *veip_find(envid_t veid);
++/* veip_hash_lock should be taken for write by caller */
++struct veip_struct *veip_findcreate(envid_t veid);
++/* veip_hash_lock should be taken for write by caller */
++void veip_put(struct veip_struct *veip);
++
++int veip_start(struct ve_struct *ve);
++void veip_stop(struct ve_struct *ve);
++int veip_entry_add(struct ve_struct *ve, struct sockaddr *addr);
++int veip_entry_del(envid_t veid, struct sockaddr *addr);
++int venet_change_skb_owner(struct sk_buff *skb);
++
++extern struct list_head ip_entry_hash_table[];
++extern rwlock_t veip_hash_lock;
++
++#ifdef CONFIG_PROC_FS
++int veip_seq_show(struct seq_file *m, void *v);
++#endif
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/veprintk.h linux-2.6.16-026test015/include/linux/veprintk.h
+--- linux-2.6.16.orig/include/linux/veprintk.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/veprintk.h	2006-07-04 14:41:38.000000000 +0400
+@@ -0,0 +1,38 @@
++/*
++ *  include/linux/veprintk.h
++ *
++ *  Copyright (C) 2006  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_PRINTK_H__
++#define __VE_PRINTK_H__
++
++#ifdef CONFIG_VE
++
++#define ve_log_wait		(*(get_exec_env()->_log_wait))
++#define ve_log_start		(*(get_exec_env()->_log_start))
++#define ve_log_end		(*(get_exec_env()->_log_end))
++#define ve_logged_chars		(*(get_exec_env()->_logged_chars))
++#define ve_log_buf		(get_exec_env()->log_buf)
++#define ve_log_buf_len		(ve_is_super(get_exec_env()) ? \
++				log_buf_len : VE_DEFAULT_LOG_BUF_LEN)
++#define VE_LOG_BUF_MASK		(ve_log_buf_len - 1)
++#define VE_LOG_BUF(idx)		(ve_log_buf[(idx) & VE_LOG_BUF_MASK])
++
++#else
++
++#define ve_log_wait		log_wait
++#define ve_log_start		log_start
++#define ve_log_end		log_end
++#define ve_logged_chars		logged_chars
++#define ve_log_buf		log_buf
++#define ve_log_buf_len		log_buf_len
++#define VE_LOG_BUF_MASK		LOG_BUF_MASK
++#define VE_LOG_BUF(idx)		LOG_BUF(idx)
++
++#endif /* CONFIG_VE */
++#endif /* __VE_PRINTK_H__ */
+diff -upr linux-2.6.16.orig/include/linux/virtinfo.h linux-2.6.16-026test015/include/linux/virtinfo.h
+--- linux-2.6.16.orig/include/linux/virtinfo.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/virtinfo.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,52 @@
++/*
++ *  include/linux/virtinfo.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __LINUX_VIRTINFO_H
++#define __LINUX_VIRTINFO_H
++
++#include <linux/kernel.h>
++#include <linux/page-flags.h>
++#include <linux/rwsem.h>
++#include <linux/notifier.h>
++
++struct vnotifier_block
++{
++	int (*notifier_call)(struct vnotifier_block *self,
++			unsigned long, void *, int);
++	struct vnotifier_block *next;
++	int priority;
++};
++
++void virtinfo_notifier_register(int type, struct vnotifier_block *nb);
++void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb);
++int virtinfo_notifier_call(int type, unsigned long n, void *data);
++
++struct meminfo {
++	struct sysinfo si;
++	unsigned long active, inactive;
++	unsigned long cache, swapcache;
++	unsigned long committed_space;
++	unsigned long allowed;
++	struct page_state ps;
++	unsigned long vmalloc_total, vmalloc_used, vmalloc_largest;
++};
++
++#define VIRTINFO_MEMINFO	0
++#define VIRTINFO_ENOUGHMEM	1
++
++enum virt_info_types {
++	VITYPE_GENERAL,
++	VITYPE_FAUDIT,
++	VITYPE_QUOTA,
++
++	VIRT_TYPES
++};
++
++#endif /* __LINUX_VIRTINFO_H */
+diff -upr linux-2.6.16.orig/include/linux/vmalloc.h linux-2.6.16-026test015/include/linux/vmalloc.h
+--- linux-2.6.16.orig/include/linux/vmalloc.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/linux/vmalloc.h	2006-07-04 14:41:37.000000000 +0400
+@@ -18,6 +18,10 @@
+ #define IOREMAP_MAX_ORDER	(7 + PAGE_SHIFT)	/* 128 pages */
+ #endif
+ 
++/* align size to 2^n page boundary */
++#define POWER2_PAGE_ALIGN(size) \
++	((typeof(size))(1UL << (PAGE_SHIFT + get_order(size))))
++
+ struct vm_struct {
+ 	void			*addr;
+ 	unsigned long		size;
+@@ -32,10 +36,14 @@ struct vm_struct {
+  *	Highlevel APIs for driver use
+  */
+ extern void *vmalloc(unsigned long size);
++extern void *ub_vmalloc(unsigned long size);
+ extern void *vmalloc_node(unsigned long size, int node);
++extern void *ub_vmalloc_node(unsigned long size, int node);
+ extern void *vmalloc_exec(unsigned long size);
+ extern void *vmalloc_32(unsigned long size);
+ extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
++extern void *vmalloc_best(unsigned long size);
++extern void *ub_vmalloc_best(unsigned long size);
+ extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
+ 				pgprot_t prot);
+ extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
+@@ -52,6 +60,9 @@ extern void vunmap(void *addr);
+ extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
+ extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
+ 					unsigned long start, unsigned long end);
++extern struct vm_struct * get_vm_area_best(unsigned long size,
++					   unsigned long flags);
++extern void vprintstat(void);
+ extern struct vm_struct *get_vm_area_node(unsigned long size,
+ 					unsigned long flags, int node);
+ extern struct vm_struct *remove_vm_area(void *addr);
+diff -upr linux-2.6.16.orig/include/linux/vsched.h linux-2.6.16-026test015/include/linux/vsched.h
+--- linux-2.6.16.orig/include/linux/vsched.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vsched.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,26 @@
++/*
++ *  include/linux/vsched.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VSCHED_H__
++#define __VSCHED_H__
++
++#include <linux/config.h>
++#include <linux/cache.h>
++#include <linux/fairsched.h>
++#include <linux/sched.h>
++
++extern int vsched_create(int id, struct fairsched_node *node);
++extern int vsched_destroy(struct vcpu_scheduler *vsched);
++
++extern int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched);
++
++extern int vcpu_online(int cpu);
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/vzcalluser.h linux-2.6.16-026test015/include/linux/vzcalluser.h
+--- linux-2.6.16.orig/include/linux/vzcalluser.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzcalluser.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,228 @@
++/*
++ *  include/linux/vzcalluser.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VZCALLUSER_H
++#define _LINUX_VZCALLUSER_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#define KERN_VZ_PRIV_RANGE 51
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++/*
++ * VE management ioctls
++ */
++
++struct vzctl_old_env_create {
++	envid_t veid;
++	unsigned flags;
++#define VE_CREATE 	1	/* Create VE, VE_ENTER added automatically */
++#define VE_EXCLUSIVE	2	/* Fail if exists */
++#define VE_ENTER	4	/* Enter existing VE */
++#define VE_TEST		8	/* Test if VE exists */
++#define VE_LOCK		16	/* Do not allow entering created VE */
++#define VE_SKIPLOCK	32	/* Allow entering embrion VE */
++	__u32 addr;
++};
++
++struct vzctl_mark_env_to_down {
++	envid_t veid;
++};
++
++struct vzctl_setdevperms {
++	envid_t veid;
++	unsigned type;
++#define VE_USE_MAJOR	010	/* Test MAJOR supplied in rule */
++#define VE_USE_MINOR	030	/* Test MINOR supplied in rule */
++#define VE_USE_MASK	030	/* Testing mask, VE_USE_MAJOR|VE_USE_MINOR */
++	unsigned dev;
++	unsigned mask;
++};
++
++struct vzctl_ve_netdev {
++	envid_t veid;
++	int op;
++#define VE_NETDEV_ADD  1
++#define VE_NETDEV_DEL  2
++	char *dev_name;
++};
++
++struct vzctl_ve_meminfo {
++	envid_t veid;
++	unsigned long val;
++};
++
++/* these masks represent modules */
++#define VE_IP_IPTABLES_MOD		(1U<<0)
++#define VE_IP_FILTER_MOD		(1U<<1)
++#define VE_IP_MANGLE_MOD		(1U<<2)
++#define VE_IP_MATCH_LIMIT_MOD		(1U<<3)
++#define VE_IP_MATCH_MULTIPORT_MOD	(1U<<4)
++#define VE_IP_MATCH_TOS_MOD		(1U<<5)
++#define VE_IP_TARGET_TOS_MOD		(1U<<6)
++#define VE_IP_TARGET_REJECT_MOD		(1U<<7)
++#define VE_IP_TARGET_TCPMSS_MOD		(1U<<8)
++#define VE_IP_MATCH_TCPMSS_MOD		(1U<<9)
++#define VE_IP_MATCH_TTL_MOD		(1U<<10)
++#define VE_IP_TARGET_LOG_MOD		(1U<<11)
++#define VE_IP_MATCH_LENGTH_MOD		(1U<<12)
++#define VE_IP_CONNTRACK_MOD		(1U<<14)
++#define VE_IP_CONNTRACK_FTP_MOD		(1U<<15)
++#define VE_IP_CONNTRACK_IRC_MOD		(1U<<16)
++#define VE_IP_MATCH_CONNTRACK_MOD	(1U<<17)
++#define VE_IP_MATCH_STATE_MOD		(1U<<18)
++#define VE_IP_MATCH_HELPER_MOD		(1U<<19)
++#define VE_IP_NAT_MOD			(1U<<20)
++#define VE_IP_NAT_FTP_MOD		(1U<<21)
++#define VE_IP_NAT_IRC_MOD		(1U<<22)
++#define VE_IP_TARGET_REDIRECT_MOD	(1U<<23)
++
++/* these masks represent modules with their dependences */
++#define VE_IP_IPTABLES		(VE_IP_IPTABLES_MOD)
++#define VE_IP_FILTER		(VE_IP_FILTER_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MANGLE		(VE_IP_MANGLE_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_LIMIT	(VE_IP_MATCH_LIMIT_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_MULTIPORT	(VE_IP_MATCH_MULTIPORT_MOD	\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_TOS		(VE_IP_MATCH_TOS_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_TARGET_TOS	(VE_IP_TARGET_TOS_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_TARGET_REJECT	(VE_IP_TARGET_REJECT_MOD	\
++					| VE_IP_IPTABLES)
++#define VE_IP_TARGET_TCPMSS	(VE_IP_TARGET_TCPMSS_MOD	\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_TCPMSS	(VE_IP_MATCH_TCPMSS_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_TTL		(VE_IP_MATCH_TTL_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_TARGET_LOG	(VE_IP_TARGET_LOG_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_LENGTH	(VE_IP_MATCH_LENGTH_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_CONNTRACK		(VE_IP_CONNTRACK_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_CONNTRACK_FTP	(VE_IP_CONNTRACK_FTP_MOD	\
++					| VE_IP_CONNTRACK)
++#define VE_IP_CONNTRACK_IRC	(VE_IP_CONNTRACK_IRC_MOD	\
++					| VE_IP_CONNTRACK)
++#define VE_IP_MATCH_CONNTRACK	(VE_IP_MATCH_CONNTRACK_MOD	\
++					| VE_IP_CONNTRACK)
++#define VE_IP_MATCH_STATE	(VE_IP_MATCH_STATE_MOD		\
++					| VE_IP_CONNTRACK)
++#define VE_IP_MATCH_HELPER	(VE_IP_MATCH_HELPER_MOD		\
++					| VE_IP_CONNTRACK)
++#define VE_IP_NAT		(VE_IP_NAT_MOD			\
++					| VE_IP_CONNTRACK)
++#define VE_IP_NAT_FTP		(VE_IP_NAT_FTP_MOD		\
++					| VE_IP_NAT | VE_IP_CONNTRACK_FTP)
++#define VE_IP_NAT_IRC		(VE_IP_NAT_IRC_MOD		\
++					| VE_IP_NAT | VE_IP_CONNTRACK_IRC)
++#define VE_IP_TARGET_REDIRECT	(VE_IP_TARGET_REDIRECT_MOD	\
++					| VE_IP_NAT)
++
++/* safe iptables mask to be used by default */
++#define VE_IP_DEFAULT					\
++	(VE_IP_IPTABLES |				\
++	VE_IP_FILTER | VE_IP_MANGLE |			\
++	VE_IP_MATCH_LIMIT | VE_IP_MATCH_MULTIPORT |	\
++	VE_IP_MATCH_TOS | VE_IP_TARGET_REJECT | 	\
++	VE_IP_TARGET_TCPMSS | VE_IP_MATCH_TCPMSS |	\
++	VE_IP_MATCH_TTL | VE_IP_MATCH_LENGTH)
++
++#define VE_IPT_CMP(x,y)		(((x) & (y)) == (y))
++
++struct vzctl_env_create_cid {
++	envid_t veid;
++	unsigned flags;
++	__u32 class_id;
++};
++
++struct vzctl_env_create {
++	envid_t veid;
++	unsigned flags;
++	__u32 class_id;
++};
++
++struct env_create_param {
++	__u64 iptables_mask;
++};
++
++#define VZCTL_ENV_CREATE_DATA_MINLEN	sizeof(struct env_create_param)
++
++struct env_create_param2 {
++	__u64 iptables_mask;
++	__u64 feature_mask;
++#define VE_FEATURE_SYSFS	(1ULL << 0)
++	__u32 total_vcpus;	/* 0 - don't care, same as in host */
++};
++#define VZCTL_ENV_CREATE_DATA_MAXLEN	sizeof(struct env_create_param2)
++
++typedef struct env_create_param2 env_create_param_t;
++
++struct vzctl_env_create_data {
++	envid_t veid;
++	unsigned flags;
++	__u32 class_id;
++	env_create_param_t *data;
++	int datalen;
++};
++
++struct vz_load_avg {
++	int val_int;
++	int val_frac;
++};
++
++struct vz_cpu_stat {
++	unsigned long user_jif;
++	unsigned long nice_jif;
++	unsigned long system_jif; 
++	unsigned long uptime_jif;
++	__u64 idle_clk;
++	__u64 strv_clk;
++	__u64 uptime_clk;
++	struct vz_load_avg avenrun[3];	/* loadavg data */
++};
++
++struct vzctl_cpustatctl {
++	envid_t veid;
++	struct vz_cpu_stat *cpustat;
++};
++
++#define VZCTLTYPE '.'
++#define VZCTL_OLD_ENV_CREATE	_IOW(VZCTLTYPE, 0,			\
++					struct vzctl_old_env_create)
++#define VZCTL_MARK_ENV_TO_DOWN	_IOW(VZCTLTYPE, 1,			\
++					struct vzctl_mark_env_to_down)
++#define VZCTL_SETDEVPERMS	_IOW(VZCTLTYPE, 2,			\
++					struct vzctl_setdevperms)
++#define VZCTL_ENV_CREATE_CID	_IOW(VZCTLTYPE, 4,			\
++					struct vzctl_env_create_cid)
++#define VZCTL_ENV_CREATE	_IOW(VZCTLTYPE, 5,			\
++					struct vzctl_env_create)
++#define VZCTL_GET_CPU_STAT	_IOW(VZCTLTYPE, 6,			\
++					struct vzctl_cpustatctl)
++#define VZCTL_ENV_CREATE_DATA	_IOW(VZCTLTYPE, 10,			\
++					struct vzctl_env_create_data)
++#define VZCTL_VE_NETDEV		_IOW(VZCTLTYPE, 11,			\
++					struct vzctl_ve_netdev)
++#define VZCTL_VE_MEMINFO	_IOW(VZCTLTYPE, 13,                     \
++					struct vzctl_ve_meminfo)
++
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/vzctl.h linux-2.6.16-026test015/include/linux/vzctl.h
+--- linux-2.6.16.orig/include/linux/vzctl.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzctl.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,30 @@
++/*
++ *  include/linux/vzctl.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VZCTL_H
++#define _LINUX_VZCTL_H
++
++#include <linux/list.h>
++
++struct module;
++struct inode;
++struct file;
++struct vzioctlinfo {
++	unsigned type;
++	int (*func)(struct inode *, struct file *,
++			unsigned int, unsigned long);
++	struct module *owner;
++	struct list_head list;
++};
++
++extern void vzioctl_register(struct vzioctlinfo *inf);
++extern void vzioctl_unregister(struct vzioctlinfo *inf);
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/vzctl_quota.h linux-2.6.16-026test015/include/linux/vzctl_quota.h
+--- linux-2.6.16.orig/include/linux/vzctl_quota.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzctl_quota.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,43 @@
++/*
++ *  include/linux/vzctl_quota.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __LINUX_VZCTL_QUOTA_H__
++#define __LINUX_VZCTL_QUOTA_H__
++
++/*
++ * Quota management ioctl
++ */
++
++struct vz_quota_stat;
++struct vzctl_quotactl {
++	int cmd;
++	unsigned int quota_id;
++	struct vz_quota_stat *qstat;
++	char *ve_root;
++};
++
++struct vzctl_quotaugidctl {
++	int cmd;		/* subcommand */
++	unsigned int quota_id;	/* quota id where it applies to */
++	unsigned int ugid_index;/* for reading statistic. index of first
++				    uid/gid record to read */
++	unsigned int ugid_size;	/* size of ugid_buf array */
++	void *addr; 		/* user-level buffer */
++};
++
++#define VZDQCTLTYPE '+'
++#define VZCTL_QUOTA_CTL		_IOWR(VZDQCTLTYPE, 1,			\
++					struct vzctl_quotactl)
++#define VZCTL_QUOTA_NEW_CTL	_IOWR(VZDQCTLTYPE, 2,			\
++					struct vzctl_quotactl)
++#define VZCTL_QUOTA_UGID_CTL	_IOWR(VZDQCTLTYPE, 3,			\
++					struct vzctl_quotaugidctl)
++
++#endif /* __LINUX_VZCTL_QUOTA_H__ */
+diff -upr linux-2.6.16.orig/include/linux/vzctl_venet.h linux-2.6.16-026test015/include/linux/vzctl_venet.h
+--- linux-2.6.16.orig/include/linux/vzctl_venet.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzctl_venet.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,36 @@
++/*
++ *  include/linux/vzctl_venet.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VZCTL_VENET_H
++#define _VZCTL_VENET_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++struct vzctl_ve_ip_map {
++	envid_t veid;
++	int op;
++#define VE_IP_ADD	1
++#define VE_IP_DEL	2
++	struct sockaddr *addr;
++	int addrlen;
++};
++
++#define VENETCTLTYPE '('
++
++#define VENETCTL_VE_IP_MAP	_IOW(VENETCTLTYPE, 3,			\
++					struct vzctl_ve_ip_map)
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/vzctl_veth.h linux-2.6.16-026test015/include/linux/vzctl_veth.h
+--- linux-2.6.16.orig/include/linux/vzctl_veth.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzctl_veth.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,40 @@
++/*
++ *  include/linux/vzctl_veth.h
++ *
++ *  Copyright (C) 2006  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VZCTL_VETH_H
++#define _VZCTL_VETH_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++struct vzctl_ve_hwaddr {
++	envid_t veid;
++	int op;
++#define VE_ETH_ADD	1
++#define VE_ETH_DEL	2
++	unsigned char	dev_addr[6];
++	int addrlen;
++	char		dev_name[16];
++	unsigned char	dev_addr_ve[6];
++	int addrlen_ve;
++	char		dev_name_ve[16];
++};
++
++#define VETHCTLTYPE '['
++
++#define VETHCTL_VE_HWADDR	_IOW(VETHCTLTYPE, 3,			\
++					struct vzctl_ve_hwaddr)
++
++#endif
+diff -upr linux-2.6.16.orig/include/linux/vzdq_tree.h linux-2.6.16-026test015/include/linux/vzdq_tree.h
+--- linux-2.6.16.orig/include/linux/vzdq_tree.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzdq_tree.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,99 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo disk quota tree definition
++ */
++
++#ifndef _VZDQ_TREE_H
++#define _VZDQ_TREE_H
++
++#include <linux/list.h>
++#include <asm/string.h>
++
++typedef unsigned int quotaid_t;
++#define QUOTAID_BITS		32
++#define QUOTAID_BBITS		4
++#define QUOTAID_EBITS		8
++
++#if QUOTAID_EBITS % QUOTAID_BBITS
++#error Quota bit assumption failure
++#endif
++
++#define QUOTATREE_BSIZE		(1 << QUOTAID_BBITS)
++#define QUOTATREE_BMASK		(QUOTATREE_BSIZE - 1)
++#define QUOTATREE_DEPTH		((QUOTAID_BITS + QUOTAID_BBITS - 1) \
++							/ QUOTAID_BBITS)
++#define QUOTATREE_EDEPTH	((QUOTAID_BITS + QUOTAID_EBITS - 1) \
++							/ QUOTAID_EBITS)
++#define QUOTATREE_BSHIFT(lvl)	((QUOTATREE_DEPTH - (lvl) - 1) * QUOTAID_BBITS)
++
++/*
++ * Depth of keeping unused node (not inclusive).
++ * 0 means release all nodes including root,
++ * QUOTATREE_DEPTH means never release nodes.
++ * Current value: release all nodes strictly after QUOTATREE_EDEPTH 
++ * (measured in external shift units).
++ */
++#define QUOTATREE_CDEPTH	(QUOTATREE_DEPTH \
++				- 2 * QUOTATREE_DEPTH / QUOTATREE_EDEPTH \
++				+ 1)
++
++/*
++ * Levels 0..(QUOTATREE_DEPTH-1) are tree nodes.
++ * On level i the maximal number of nodes is 2^(i*QUOTAID_BBITS),
++ * and each node contains 2^QUOTAID_BBITS pointers.
++ * Level 0 is a (single) tree root node.
++ *
++ * Nodes of level (QUOTATREE_DEPTH-1) contain pointers to caller's data.
++ * Nodes of lower levels contain pointers to nodes.
++ *
++ * Double pointer in array of i-level node, pointing to a (i+1)-level node
++ * (such as inside quotatree_find_state) are marked by level (i+1), not i.
++ * Level 0 double pointer is a pointer to root inside tree struct.
++ *
++ * The tree is permanent, i.e. all index blocks allocated are keeped alive to
++ * preserve the blocks numbers in the quota file tree to keep its changes
++ * locally.
++ */
++struct quotatree_node {
++	struct list_head list;
++	quotaid_t num;
++	void *blocks[QUOTATREE_BSIZE];
++};
++
++struct quotatree_level {
++	struct list_head usedlh, freelh;
++	quotaid_t freenum;
++};
++
++struct quotatree_tree {
++	struct quotatree_level levels[QUOTATREE_DEPTH];
++	struct quotatree_node *root;
++	unsigned int leaf_num;
++};
++
++struct quotatree_find_state {
++	void **block;
++	int level;
++};
++
++/* number of leafs (objects) and leaf level of the tree */
++#define QTREE_LEAFNUM(tree)	((tree)->leaf_num)
++#define QTREE_LEAFLVL(tree)	(&(tree)->levels[QUOTATREE_DEPTH - 1])
++
++struct quotatree_tree *quotatree_alloc(void);
++void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
++		struct quotatree_find_state *st);
++int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
++		struct quotatree_find_state *st, void *data);
++void quotatree_remove(struct quotatree_tree *tree, quotaid_t id);
++void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *));
++void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id);
++void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index);
++
++#endif /* _VZDQ_TREE_H */
++
+diff -upr linux-2.6.16.orig/include/linux/vzquota.h linux-2.6.16-026test015/include/linux/vzquota.h
+--- linux-2.6.16.orig/include/linux/vzquota.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzquota.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,291 @@
++/*
++ *
++ * Copyright (C) 2001-2005 SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo disk quota implementation
++ */
++
++#ifndef _VZDQUOTA_H
++#define _VZDQUOTA_H
++
++#include <linux/types.h>
++#include <linux/quota.h>
++
++/* vzquotactl syscall commands */
++#define VZ_DQ_CREATE		5 /* create quota master block */
++#define VZ_DQ_DESTROY		6 /* destroy qmblk */
++#define VZ_DQ_ON		7 /* mark dentry with already created qmblk */
++#define VZ_DQ_OFF		8 /* remove mark, don't destroy qmblk */
++#define VZ_DQ_SETLIMIT		9 /* set new limits */
++#define VZ_DQ_GETSTAT		10 /* get usage statistic */
++/* set of syscalls to maintain UGID quotas */
++#define VZ_DQ_UGID_GETSTAT	1 /* get usage/limits for ugid(s) */
++#define VZ_DQ_UGID_ADDSTAT	2 /* set usage/limits statistic for ugid(s) */
++#define VZ_DQ_UGID_GETGRACE	3 /* get expire times */
++#define VZ_DQ_UGID_SETGRACE	4 /* set expire times */
++#define VZ_DQ_UGID_GETCONFIG	5 /* get ugid_max limit, cnt, flags of qmblk */
++#define VZ_DQ_UGID_SETCONFIG	6 /* set ugid_max limit, flags of qmblk */
++#define VZ_DQ_UGID_SETLIMIT	7 /* set ugid B/I limits */
++#define VZ_DQ_UGID_SETINFO	8 /* set ugid info */
++
++/* common structure for vz and ugid quota */
++struct dq_stat {
++	/* blocks limits */
++	__u64	bhardlimit;	/* absolute limit in bytes */
++	__u64	bsoftlimit;	/* preferred limit in bytes */
++	time_t	btime;		/* time limit for excessive disk use */
++	__u64	bcurrent;	/* current bytes count */
++	/* inodes limits */
++	__u32	ihardlimit;	/* absolute limit on allocated inodes */
++	__u32	isoftlimit;	/* preferred inode limit */
++	time_t	itime;		/* time limit for excessive inode use */
++	__u32	icurrent;	/* current # allocated inodes */
++};
++
++/* One second resolution for grace times */
++#define CURRENT_TIME_SECONDS	(get_seconds())
++
++/* Values for dq_info->flags */
++#define VZ_QUOTA_INODES 0x01       /* inodes limit warning printed */
++#define VZ_QUOTA_SPACE  0x02       /* space limit warning printed */
++
++struct dq_info {
++	time_t		bexpire;   /* expire timeout for excessive disk use */
++	time_t		iexpire;   /* expire timeout for excessive inode use */
++	unsigned	flags;	   /* see previos defines */
++};
++
++struct vz_quota_stat  {
++	struct dq_stat dq_stat;
++	struct dq_info dq_info;
++};
++
++/* UID/GID interface record - for user-kernel level exchange */
++struct vz_quota_iface {
++	unsigned int	qi_id;	   /* UID/GID this applies to */
++	unsigned int	qi_type;   /* USRQUOTA|GRPQUOTA */
++	struct dq_stat	qi_stat;   /* limits, options, usage stats */
++};
++
++/* values for flags and dq_flags */
++/* this flag is set if the userspace has been unable to provide usage
++ * information about all ugids
++ * if the flag is set, we don't allocate new UG quota blocks (their
++ * current usage is unknown) or free existing UG quota blocks (not to
++ * lose information that this block is ok) */
++#define VZDQUG_FIXED_SET	0x01
++/* permit to use ugid quota */
++#define VZDQUG_ON		0x02
++#define VZDQ_USRQUOTA		0x10
++#define VZDQ_GRPQUOTA		0x20
++#define VZDQ_NOACT		0x1000	/* not actual */
++#define VZDQ_NOQUOT		0x2000	/* not under quota tree */
++
++struct vz_quota_ugid_stat {
++	unsigned int	limit;	/* max amount of ugid records */
++	unsigned int	count;	/* amount of ugid records */
++	unsigned int	flags;	
++};
++
++struct vz_quota_ugid_setlimit {
++	unsigned int	type;	/* quota type (USR/GRP) */
++	unsigned int	id;	/* ugid */
++	struct if_dqblk dqb;	/* limits info */
++};
++
++struct vz_quota_ugid_setinfo {
++	unsigned int	type;	/* quota type (USR/GRP) */
++	struct if_dqinfo dqi;	/* grace info */
++};
++
++#ifdef __KERNEL__
++#include <linux/list.h>
++#include <asm/atomic.h>
++#include <asm/semaphore.h>
++#include <linux/time.h>
++#include <linux/vzquota_qlnk.h>
++#include <linux/vzdq_tree.h>
++
++/* Values for dq_info flags */
++#define VZ_QUOTA_INODES	0x01	   /* inodes limit warning printed */
++#define VZ_QUOTA_SPACE	0x02	   /* space limit warning printed */
++
++/* values for dq_state */
++#define VZDQ_STARTING		0 /* created, not turned on yet */
++#define VZDQ_WORKING		1 /* quota created, turned on */
++#define VZDQ_STOPING		2 /* created, turned on and off */
++
++/* master quota record - one per veid */
++struct vz_quota_master {
++	struct list_head	dq_hash;	/* next quota in hash list */
++	atomic_t		dq_count;	/* inode reference count */
++	unsigned int		dq_flags;	/* see VZDQUG_FIXED_SET */
++	unsigned int		dq_state;	/* see values above */
++	unsigned int		dq_id;		/* VEID this applies to */
++	struct dq_stat		dq_stat; 	/* limits, grace, usage stats */
++	struct dq_info		dq_info;	/* grace times and flags */
++	spinlock_t		dq_data_lock;	/* for dq_stat */
++
++	struct semaphore	dq_sem;		/* semaphore to protect 
++						   ugid tree */
++
++	struct list_head	dq_ilink_list;	/* list of vz_quota_ilink */
++	struct quotatree_tree	*dq_uid_tree;	/* vz_quota_ugid tree for UIDs */
++	struct quotatree_tree	*dq_gid_tree;	/* vz_quota_ugid tree for GIDs */
++	unsigned int		dq_ugid_count;	/* amount of ugid records */
++	unsigned int		dq_ugid_max;	/* max amount of ugid records */
++	struct dq_info		dq_ugid_info[MAXQUOTAS]; /* ugid grace times */
++
++	struct dentry		*dq_root_dentry;/* dentry of fs tree */
++	struct vfsmount		*dq_root_mnt;	/* vfsmnt of this dentry */
++	struct super_block	*dq_sb;	      /* superblock of our quota root */
++};
++
++/* UID/GID quota record - one per pair (quota_master, uid or gid) */
++struct vz_quota_ugid {
++	unsigned int		qugid_id;     /* UID/GID this applies to */
++	struct dq_stat		qugid_stat;   /* limits, options, usage stats */
++	int			qugid_type;   /* USRQUOTA|GRPQUOTA */
++	atomic_t		qugid_count;  /* reference count */
++};
++
++#define VZ_QUOTA_UGBAD		((struct vz_quota_ugid *)0xfeafea11)
++
++struct vz_quota_datast {
++	struct vz_quota_ilink qlnk;
++};
++
++#define VIRTINFO_QUOTA_GETSTAT	0
++#define VIRTINFO_QUOTA_ON	1
++#define VIRTINFO_QUOTA_OFF	2
++
++struct virt_info_quota {
++	struct super_block *super;
++	struct dq_stat *qstat;
++};
++
++/*
++ * Interface to VZ quota core
++ */
++#define INODE_QLNK(inode)	(&(inode)->i_qlnk)
++#define QLNK_INODE(qlnk)	container_of((qlnk), struct inode, i_qlnk)
++
++#define VZ_QUOTA_BAD		((struct vz_quota_master *)0xefefefef)
++
++#define VZ_QUOTAO_SETE		1
++#define VZ_QUOTAO_INIT		2
++#define VZ_QUOTAO_DESTR		3
++#define VZ_QUOTAO_SWAP		4
++#define VZ_QUOTAO_INICAL	5
++#define VZ_QUOTAO_DRCAL		6
++#define VZ_QUOTAO_QSET		7
++#define VZ_QUOTAO_TRANS		8
++#define VZ_QUOTAO_ACT		9
++#define VZ_QUOTAO_DTREE		10
++#define VZ_QUOTAO_DET		11
++#define VZ_QUOTAO_ON		12
++
++extern struct semaphore vz_quota_sem;
++void inode_qmblk_lock(struct super_block *sb);
++void inode_qmblk_unlock(struct super_block *sb);
++void qmblk_data_read_lock(struct vz_quota_master *qmblk);
++void qmblk_data_read_unlock(struct vz_quota_master *qmblk);
++void qmblk_data_write_lock(struct vz_quota_master *qmblk);
++void qmblk_data_write_unlock(struct vz_quota_master *qmblk);
++
++/* for quota operations */
++void vzquota_inode_init_call(struct inode *inode);
++void vzquota_inode_drop_call(struct inode *inode);
++int vzquota_inode_transfer_call(struct inode *, struct iattr *);
++struct vz_quota_master *vzquota_inode_data(struct inode *inode,
++		struct vz_quota_datast *);
++void vzquota_data_unlock(struct inode *inode, struct vz_quota_datast *);
++int vzquota_rename_check(struct inode *inode,
++		struct inode *old_dir, struct inode *new_dir);
++struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode);
++/* for second-level quota */
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
++/* for management operations */
++struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
++		struct vz_quota_stat *qstat);
++void vzquota_free_master(struct vz_quota_master *);
++struct vz_quota_master *vzquota_find_master(unsigned int quota_id);
++int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
++		struct vz_quota_master *qmblk);
++int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk);
++int vzquota_get_super(struct super_block *sb);
++void vzquota_put_super(struct super_block *sb);
++
++static inline struct vz_quota_master *qmblk_get(struct vz_quota_master *qmblk)
++{
++	if (!atomic_read(&qmblk->dq_count))
++		BUG();
++	atomic_inc(&qmblk->dq_count);
++	return qmblk;
++}
++
++static inline void __qmblk_put(struct vz_quota_master *qmblk)
++{
++	atomic_dec(&qmblk->dq_count);
++}
++
++static inline void qmblk_put(struct vz_quota_master *qmblk)
++{
++	if (!atomic_dec_and_test(&qmblk->dq_count))
++		return;
++	vzquota_free_master(qmblk);
++}
++
++extern struct list_head vzquota_hash_table[];
++extern int vzquota_hash_size;
++
++/*
++ * Interface to VZ UGID quota
++ */
++extern struct quotactl_ops vz_quotactl_operations;
++extern struct dquot_operations vz_quota_operations2;
++extern struct quota_format_type vz_quota_empty_v2_format;
++
++#define QUGID_TREE(qmblk, type)	(((type) == USRQUOTA) ?		\
++					qmblk->dq_uid_tree :	\
++					qmblk->dq_gid_tree)
++
++#define VZDQUG_FIND_DONT_ALLOC	1
++#define VZDQUG_FIND_FAKE	2
++struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
++		unsigned int quota_id, int type, int flags);
++struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
++		unsigned int quota_id, int type, int flags);
++struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid);
++void vzquota_put_ugid(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid);
++void vzquota_kill_ugid(struct vz_quota_master *qmblk);
++int vzquota_ugid_init(void);
++void vzquota_ugid_release(void);
++int vzquota_transfer_usage(struct inode *inode, int mask,
++		struct vz_quota_ilink *qlnk);
++
++struct vzctl_quotaugidctl;
++long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub);
++
++/*
++ * Other VZ quota parts
++ */
++extern struct dquot_operations vz_quota_operations;
++
++long do_vzquotactl(int cmd, unsigned int quota_id,
++			  struct vz_quota_stat *qstat, const char *ve_root);
++int vzquota_proc_init(void);
++void vzquota_proc_release(void);
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
++extern struct semaphore vz_quota_sem;
++
++void vzaquota_init(void);
++void vzaquota_fini(void);
++
++#endif /* __KERNEL__ */
++
++#endif /* _VZDQUOTA_H */
+diff -upr linux-2.6.16.orig/include/linux/vzquota_qlnk.h linux-2.6.16-026test015/include/linux/vzquota_qlnk.h
+--- linux-2.6.16.orig/include/linux/vzquota_qlnk.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzquota_qlnk.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,25 @@
++/*
++ *  include/linux/vzquota_qlnk.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VZDQUOTA_QLNK_H
++#define _VZDQUOTA_QLNK_H
++
++struct vz_quota_master;
++struct vz_quota_ugid;
++
++/* inode link, used to track inodes using quota via dq_ilink_list */
++struct vz_quota_ilink {
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid *qugid[MAXQUOTAS];
++	struct list_head list;
++	unsigned char origin;
++};
++
++#endif /* _VZDQUOTA_QLNK_H */
+diff -upr linux-2.6.16.orig/include/linux/vzratelimit.h linux-2.6.16-026test015/include/linux/vzratelimit.h
+--- linux-2.6.16.orig/include/linux/vzratelimit.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzratelimit.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,28 @@
++/*
++ *  include/linux/vzratelimit.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VZ_RATELIMIT_H__
++#define __VZ_RATELIMIT_H__
++
++/*
++ * Generic ratelimiting stuff.
++ */
++
++struct vz_rate_info {
++	int burst;
++	int interval; /* jiffy_t per event */
++	int bucket; /* kind of leaky bucket */
++	unsigned long last; /* last event */
++};
++
++/* Return true if rate limit permits. */
++int vz_ratelimit(struct vz_rate_info *p);
++
++#endif /* __VZ_RATELIMIT_H__ */
+diff -upr linux-2.6.16.orig/include/linux/vzstat.h linux-2.6.16-026test015/include/linux/vzstat.h
+--- linux-2.6.16.orig/include/linux/vzstat.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/linux/vzstat.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,182 @@
++/*
++ *  include/linux/vzstat.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VZSTAT_H__
++#define __VZSTAT_H__
++
++struct swap_cache_info_struct {
++	unsigned long add_total;
++	unsigned long del_total;
++	unsigned long find_success;
++	unsigned long find_total;
++	unsigned long noent_race;
++	unsigned long exist_race;
++	unsigned long remove_race;
++};
++
++struct kstat_lat_snap_struct {
++	cycles_t maxlat, totlat;
++	unsigned long count;
++};
++struct kstat_lat_pcpu_snap_struct {
++	cycles_t maxlat, totlat;
++	unsigned long count;
++	seqcount_t lock;
++} ____cacheline_aligned_in_smp;
++
++struct kstat_lat_struct {
++	struct kstat_lat_snap_struct cur, last;
++	cycles_t avg[3];
++};
++struct kstat_lat_pcpu_struct {
++	struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
++	cycles_t max_snap;
++	struct kstat_lat_snap_struct last;
++	cycles_t avg[3];
++};
++
++struct kstat_perf_snap_struct {
++	cycles_t wall_tottime, cpu_tottime;
++	cycles_t wall_maxdur, cpu_maxdur;
++	unsigned long count;
++};
++struct kstat_perf_struct {
++	struct kstat_perf_snap_struct cur, last;
++};
++
++struct kstat_zone_avg {
++	unsigned long		free_pages_avg[3],
++				nr_active_avg[3],
++				nr_inactive_avg[3];
++};
++
++#define KSTAT_ALLOCSTAT_NR 5
++
++struct kernel_stat_glob {
++	unsigned long nr_unint_avg[3];
++
++	unsigned long alloc_fails[KSTAT_ALLOCSTAT_NR];
++	struct kstat_lat_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
++	struct kstat_lat_pcpu_struct sched_lat;
++	struct kstat_lat_struct swap_in;
++
++	struct kstat_perf_struct ttfp, cache_reap,
++			refill_inact, shrink_icache, shrink_dcache;
++
++	struct kstat_zone_avg zone_avg[3];	/* MAX_NR_ZONES */
++} ____cacheline_aligned;
++
++extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
++extern spinlock_t kstat_glb_lock;
++
++#ifdef CONFIG_VE
++#define KSTAT_PERF_ENTER(name)				\
++	unsigned long flags;				\
++	cycles_t start, sleep_time;			\
++							\
++	start = get_cycles();				\
++	sleep_time = VE_TASK_INFO(current)->sleep_time;	\
++
++#define KSTAT_PERF_LEAVE(name)				\
++	spin_lock_irqsave(&kstat_glb_lock, flags);	\
++	kstat_glob.name.cur.count++;			\
++	start = get_cycles() - start;			\
++	if (kstat_glob.name.cur.wall_maxdur < start)	\
++		kstat_glob.name.cur.wall_maxdur = start;\
++	kstat_glob.name.cur.wall_tottime += start;	\
++	start -= VE_TASK_INFO(current)->sleep_time -	\
++					sleep_time;	\
++	if (kstat_glob.name.cur.cpu_maxdur < start)	\
++		kstat_glob.name.cur.cpu_maxdur = start;	\
++	kstat_glob.name.cur.cpu_tottime += start;	\
++	spin_unlock_irqrestore(&kstat_glb_lock, flags);	\
++
++#else
++#define KSTAT_PERF_ENTER(name)
++#define KSTAT_PERF_LEAVE(name)
++#endif
++
++/*
++ * Add another statistics reading.
++ * Serialization is the caller's due.
++ */
++static inline void KSTAT_LAT_ADD(struct kstat_lat_struct *p,
++		cycles_t dur)
++{
++	p->cur.count++;
++	if (p->cur.maxlat < dur)
++		p->cur.maxlat = dur;
++	p->cur.totlat += dur;
++}
++
++static inline void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, int cpu,
++		cycles_t dur)
++{
++	struct kstat_lat_pcpu_snap_struct *cur;
++
++	cur = &p->cur[cpu];
++	write_seqcount_begin(&cur->lock);
++	cur->count++;
++	if (cur->maxlat < dur)
++		cur->maxlat = dur;
++	cur->totlat += dur;
++	write_seqcount_end(&cur->lock);
++}
++
++/*
++ * Move current statistics to last, clear last.
++ * Serialization is the caller's due.
++ */
++static inline void KSTAT_LAT_UPDATE(struct kstat_lat_struct *p)
++{
++	cycles_t m;
++	memcpy(&p->last, &p->cur, sizeof(p->last));
++	p->cur.maxlat = 0;
++	m = p->last.maxlat;
++	CALC_LOAD(p->avg[0], EXP_1, m)
++	CALC_LOAD(p->avg[1], EXP_5, m)
++	CALC_LOAD(p->avg[2], EXP_15, m)
++}
++
++static inline void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
++{
++	unsigned i, cpu;
++	struct kstat_lat_pcpu_snap_struct snap, *cur;
++	cycles_t m;
++
++	memset(&p->last, 0, sizeof(p->last));
++	for (cpu = 0; cpu < NR_CPUS; cpu++) {
++		cur = &p->cur[cpu];
++		do {
++			i = read_seqcount_begin(&cur->lock);
++			memcpy(&snap, cur, sizeof(snap));
++		} while (read_seqcount_retry(&cur->lock, i));
++		/* 
++		 * read above and this update of maxlat is not atomic,
++		 * but this is OK, since it happens rarely and losing
++		 * a couple of peaks is not essential. xemul
++		 */
++		cur->maxlat = 0;
++
++		p->last.count += snap.count;
++		p->last.totlat += snap.totlat;
++		if (p->last.maxlat < snap.maxlat)
++			p->last.maxlat = snap.maxlat;
++	}
++
++	m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
++	CALC_LOAD(p->avg[0], EXP_1, m);
++	CALC_LOAD(p->avg[1], EXP_5, m);
++	CALC_LOAD(p->avg[2], EXP_15, m);
++	/* reset max_snap to calculate it correctly next time */
++	p->max_snap = 0;
++}
++
++#endif /* __VZSTAT_H__ */
+diff -upr linux-2.6.16.orig/include/net/addrconf.h linux-2.6.16-026test015/include/net/addrconf.h
+--- linux-2.6.16.orig/include/net/addrconf.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/addrconf.h	2006-07-04 14:41:39.000000000 +0400
+@@ -244,5 +244,14 @@ extern int if6_proc_init(void);
+ extern void if6_proc_exit(void);
+ #endif
+ 
++int addrconf_ifdown(struct net_device *dev, int how);
++int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen);
++
++#ifdef CONFIG_VE
++int addrconf_sysctl_init(struct ve_struct *ve);
++void addrconf_sysctl_fini(struct ve_struct *ve);
++void addrconf_sysctl_free(struct ve_struct *ve);
++#endif
++
+ #endif
+ #endif
+diff -upr linux-2.6.16.orig/include/net/af_unix.h linux-2.6.16-026test015/include/net/af_unix.h
+--- linux-2.6.16.orig/include/net/af_unix.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/af_unix.h	2006-07-04 14:41:38.000000000 +0400
+@@ -19,23 +19,37 @@ extern atomic_t unix_tot_inflight;
+ 
+ static inline struct sock *first_unix_socket(int *i)
+ {
++	struct sock *s;
++	struct ve_struct *ve;
++
++	ve = get_exec_env();
+ 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
+-		if (!hlist_empty(&unix_socket_table[*i]))
+-			return __sk_head(&unix_socket_table[*i]);
++		for (s = sk_head(&unix_socket_table[*i]);
++		     s != NULL && !ve_accessible(s->sk_owner_env, ve);
++		     s = sk_next(s));
++		if (s != NULL)
++			return s;
+ 	}
+ 	return NULL;
+ }
+ 
+ static inline struct sock *next_unix_socket(int *i, struct sock *s)
+ {
+-	struct sock *next = sk_next(s);
+-	/* More in this chain? */
+-	if (next)
+-		return next;
++	struct ve_struct *ve;
++
++	ve = get_exec_env();
++	for (s = sk_next(s); s != NULL; s = sk_next(s)) {
++		if (!ve_accessible(s->sk_owner_env, ve))
++			continue;
++		return s;
++	}
+ 	/* Look for next non-empty chain. */
+ 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
+-		if (!hlist_empty(&unix_socket_table[*i]))
+-			return __sk_head(&unix_socket_table[*i]);
++		for (s = sk_head(&unix_socket_table[*i]);
++		     s != NULL && !ve_accessible(s->sk_owner_env, ve);
++		     s = sk_next(s));
++		if (s != NULL)
++			return s;
+ 	}
+ 	return NULL;
+ }
+diff -upr linux-2.6.16.orig/include/net/arp.h linux-2.6.16-026test015/include/net/arp.h
+--- linux-2.6.16.orig/include/net/arp.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/arp.h	2006-07-04 14:41:39.000000000 +0400
+@@ -7,7 +7,14 @@
+ 
+ #define HAVE_ARP_CREATE
+ 
+-extern struct neigh_table arp_tbl;
++#ifdef CONFIG_VE
++#define arp_tbl		(*(get_exec_env()->ve_arp_tbl))
++extern int ve_arp_init(struct ve_struct *ve);
++extern void ve_arp_fini(struct ve_struct *ve);
++#else
++struct neigh_table	global_arp_tbl;
++#define arp_tbl		global_arp_tbl
++#endif
+ 
+ extern void	arp_init(void);
+ extern int	arp_rcv(struct sk_buff *skb, struct net_device *dev,
+diff -upr linux-2.6.16.orig/include/net/compat.h linux-2.6.16-026test015/include/net/compat.h
+--- linux-2.6.16.orig/include/net/compat.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/compat.h	2006-07-04 14:41:36.000000000 +0400
+@@ -23,6 +23,14 @@ struct compat_cmsghdr {
+ 	compat_int_t	cmsg_type;
+ };
+ 
++#if defined(CONFIG_X86_64)
++#define is_current_32bits() (current_thread_info()->flags & _TIF_IA32)
++#elif defined(CONFIG_IA64)
++#define is_current_32bits() (IS_IA32_PROCESS(ia64_task_regs(current)))
++#else
++#define is_current_32bits()	0
++#endif
++
+ #else /* defined(CONFIG_COMPAT) */
+ #define compat_msghdr	msghdr		/* to avoid compiler warnings */
+ #endif /* defined(CONFIG_COMPAT) */
+diff -upr linux-2.6.16.orig/include/net/flow.h linux-2.6.16-026test015/include/net/flow.h
+--- linux-2.6.16.orig/include/net/flow.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/flow.h	2006-07-04 14:41:38.000000000 +0400
+@@ -10,6 +10,7 @@
+ #include <linux/in6.h>
+ #include <asm/atomic.h>
+ 
++struct ve_struct;
+ struct flowi {
+ 	int	oif;
+ 	int	iif;
+@@ -78,6 +79,9 @@ struct flowi {
+ #define fl_icmp_type	uli_u.icmpt.type
+ #define fl_icmp_code	uli_u.icmpt.code
+ #define fl_ipsec_spi	uli_u.spi
++#ifdef CONFIG_VE
++	struct ve_struct *owner_env;
++#endif
+ } __attribute__((__aligned__(BITS_PER_LONG/8)));
+ 
+ #define FLOW_DIR_IN	0
+diff -upr linux-2.6.16.orig/include/net/icmp.h linux-2.6.16-026test015/include/net/icmp.h
+--- linux-2.6.16.orig/include/net/icmp.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/icmp.h	2006-07-04 14:41:38.000000000 +0400
+@@ -31,9 +31,14 @@ struct icmp_err {
+ 
+ extern struct icmp_err icmp_err_convert[];
+ DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+-#define ICMP_INC_STATS(field)		SNMP_INC_STATS(icmp_statistics, field)
+-#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmp_statistics, field)
+-#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(icmp_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_icmp_statistics (get_exec_env()->_icmp_statistics)
++#else
++#define ve_icmp_statistics icmp_statistics
++#endif
++#define ICMP_INC_STATS(field)		SNMP_INC_STATS(ve_icmp_statistics, field)
++#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_icmp_statistics, field)
++#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_icmp_statistics, field)
+ 
+ struct dst_entry;
+ struct net_proto_family;
+diff -upr linux-2.6.16.orig/include/net/if_inet6.h linux-2.6.16-026test015/include/net/if_inet6.h
+--- linux-2.6.16.orig/include/net/if_inet6.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/if_inet6.h	2006-07-04 14:41:39.000000000 +0400
+@@ -194,7 +194,14 @@ struct inet6_dev 
+ 	unsigned long		tstamp; /* ipv6InterfaceTable update timestamp */
+ };
+ 
+-extern struct ipv6_devconf ipv6_devconf;
++extern struct ipv6_devconf global_ipv6_devconf;
++extern struct ipv6_devconf global_ipv6_devconf_dflt;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv6_devconf	(*(get_exec_env()->_ipv6_devconf))
++#else
++#define ve_ipv6_devconf	global_ipv6_devconf
++#endif
+ 
+ static inline void ipv6_eth_mc_map(struct in6_addr *addr, char *buf)
+ {
+diff -upr linux-2.6.16.orig/include/net/inet6_hashtables.h linux-2.6.16-026test015/include/net/inet6_hashtables.h
+--- linux-2.6.16.orig/include/net/inet6_hashtables.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/inet6_hashtables.h	2006-07-04 14:41:39.000000000 +0400
+@@ -27,11 +27,13 @@ struct inet_hashinfo;
+ 
+ /* I have no idea if this is a good hash for v6 or not. -DaveM */
+ static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
+-				const struct in6_addr *faddr, const u16 fport)
++				const struct in6_addr *faddr, const u16 fport,
++				const envid_t veid)
+ {
+ 	unsigned int hashent = (lport ^ fport);
+ 
+ 	hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
++	hashent ^= (veid ^ (veid >> 16));
+ 	hashent ^= hashent >> 16;
+ 	hashent ^= hashent >> 8;
+ 	return hashent;
+@@ -45,7 +47,7 @@ static inline int inet6_sk_ehashfn(const
+ 	const struct in6_addr *faddr = &np->daddr;
+ 	const __u16 lport = inet->num;
+ 	const __u16 fport = inet->dport;
+-	return inet6_ehashfn(laddr, lport, faddr, fport);
++	return inet6_ehashfn(laddr, lport, faddr, fport, VEID(VE_OWNER_SK(sk)));
+ }
+ 
+ static inline void __inet6_hash(struct inet_hashinfo *hashinfo,
+@@ -94,14 +96,15 @@ static inline struct sock *
+ 	/* Optimize here for direct hit, only listening connections can
+ 	 * have wildcards anyways.
+ 	 */
+-	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
++	struct ve_struct *env = get_exec_env();
++	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport, VEID(env));
+ 	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
+ 
+ 	prefetch(head->chain.first);
+ 	read_lock(&head->lock);
+ 	sk_for_each(sk, node, &head->chain) {
+ 		/* For IPV6 do the cheaper port and family tests first. */
+-		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif))
++		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif, env))
+ 			goto hit; /* You sunk my battleship! */
+ 	}
+ 	/* Must check for a TIME_WAIT'er before going to listener hash. */
+@@ -114,6 +117,7 @@ static inline struct sock *
+ 
+ 			if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&&
+ 			    ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&&
++			    ve_accessible_strict(tw->tw_owner_env, VEID(env)) &&
+ 			    (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
+ 				goto hit;
+ 		}
+diff -upr linux-2.6.16.orig/include/net/inet_hashtables.h linux-2.6.16-026test015/include/net/inet_hashtables.h
+--- linux-2.6.16.orig/include/net/inet_hashtables.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/inet_hashtables.h	2006-07-04 14:41:38.000000000 +0400
+@@ -24,6 +24,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/types.h>
+ #include <linux/wait.h>
++#include <linux/ve_owner.h>
+ 
+ #include <net/inet_connection_sock.h>
+ #include <net/inet_sock.h>
+@@ -75,11 +76,13 @@ struct inet_ehash_bucket {
+  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
+  */
+ struct inet_bind_bucket {
++	struct ve_struct	*owner_env;
+ 	unsigned short		port;
+ 	signed short		fastreuse;
+ 	struct hlist_node	node;
+ 	struct hlist_head	owners;
+ };
++DCL_VE_OWNER_PROTO(TB, struct inet_bind_bucket, owner_env)
+ 
+ #define inet_bind_bucket_for_each(tb, node, head) \
+ 	hlist_for_each_entry(tb, node, head, node)
+@@ -139,37 +142,43 @@ static inline struct inet_ehash_bucket *
+ extern struct inet_bind_bucket *
+ 		    inet_bind_bucket_create(kmem_cache_t *cachep,
+ 					    struct inet_bind_hashbucket *head,
+-					    const unsigned short snum);
++					    const unsigned short snum,
++					    struct ve_struct *env);
+ extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
+ 				     struct inet_bind_bucket *tb);
+ 
+-static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
++static inline int inet_bhashfn(const __u16 lport, const int bhash_size,
++		unsigned veid)
+ {
+-	return lport & (bhash_size - 1);
++	return ((lport + (veid ^ (veid >> 16))) & (bhash_size - 1));
+ }
+ 
+ extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
+ 			   const unsigned short snum);
+ 
+ /* These can have wildcards, don't try too hard. */
+-static inline int inet_lhashfn(const unsigned short num)
++static inline int inet_lhashfn(const unsigned short num, unsigned veid)
+ {
+-	return num & (INET_LHTABLE_SIZE - 1);
++	return ((num + (veid ^ (veid >> 16))) & (INET_LHTABLE_SIZE - 1));
+ }
+ 
+ static inline int inet_sk_listen_hashfn(const struct sock *sk)
+ {
+-	return inet_lhashfn(inet_sk(sk)->num);
++	return inet_lhashfn(inet_sk(sk)->num, VEID(VE_OWNER_SK(sk)));
+ }
+ 
+ /* Caller must disable local BH processing. */
+ static inline void __inet_inherit_port(struct inet_hashinfo *table,
+ 				       struct sock *sk, struct sock *child)
+ {
+-	const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
+-	struct inet_bind_hashbucket *head = &table->bhash[bhash];
++	int bhash;
++	struct inet_bind_hashbucket *head;
+ 	struct inet_bind_bucket *tb;
+ 
++	bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size,
++			VEID(VE_OWNER_SK(child)));
++	head = &table->bhash[bhash];
++
+ 	spin_lock(&head->lock);
+ 	tb = inet_csk(sk)->icsk_bind_hash;
+ 	sk_add_bind_node(child, &tb->owners);
+@@ -275,7 +284,8 @@ static inline int inet_iif(const struct 
+ extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
+ 					   const u32 daddr,
+ 					   const unsigned short hnum,
+-					   const int dif);
++					   const int dif,
++					   struct ve_struct *env);
+ 
+ /* Optimize the common listener case. */
+ static inline struct sock *
+@@ -285,18 +295,21 @@ static inline struct sock *
+ {
+ 	struct sock *sk = NULL;
+ 	const struct hlist_head *head;
++	struct ve_struct *env;
+ 
++	env = get_exec_env();
+ 	read_lock(&hashinfo->lhash_lock);
+-	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
++	head = &hashinfo->listening_hash[inet_lhashfn(hnum, VEID(env))];
+ 	if (!hlist_empty(head)) {
+ 		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+ 
+ 		if (inet->num == hnum && !sk->sk_node.next &&
++		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
+ 		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+ 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+ 		    !sk->sk_bound_dev_if)
+ 			goto sherry_cache;
+-		sk = __inet_lookup_listener(head, daddr, hnum, dif);
++		sk = __inet_lookup_listener(head, daddr, hnum, dif, env);
+ 	}
+ 	if (sk) {
+ sherry_cache:
+@@ -323,25 +336,25 @@ sherry_cache:
+ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
+ 	const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
+ #endif /* __BIG_ENDIAN */
+-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
++#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+ 	(((__sk)->sk_hash == (__hash))				&&	\
+ 	 ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
+ 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
++#define INET_TW_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+ 	(((__sk)->sk_hash == (__hash))				&&	\
+ 	 ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
+ 	 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ #else /* 32-bit arch */
+ #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
+-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
++#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
+ 	(((__sk)->sk_hash == (__hash))				&&	\
+ 	 (inet_sk(__sk)->daddr		== (__saddr))		&&	\
+ 	 (inet_sk(__sk)->rcv_saddr	== (__daddr))		&&	\
+ 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
++#define INET_TW_MATCH_ALLVE(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
+ 	(((__sk)->sk_hash == (__hash))				&&	\
+ 	 (inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
+ 	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))		&&	\
+@@ -349,6 +362,18 @@ sherry_cache:
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ #endif /* 64-bit arch */
+ 
++#define INET_MATCH(__sk, __hash, __cookie, __saddr,			\
++					__daddr, __ports, __dif, __ve)  \
++        (INET_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr),	\
++			  		(__daddr), (__ports), (__dif))	\
++	 && ve_accessible_strict(VE_OWNER_SK(__sk), (__ve)))
++
++#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr,			\
++					__daddr, __ports, __dif, __ve)	\
++        (INET_TW_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr),	\
++					(__daddr), (__ports), (__dif))	\
++	 && ve_accessible_strict(inet_twsk(__sk)->tw_owner_env, VEID(__ve)))
++
+ /*
+  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
+  * not check it for lookups anymore, thanks Alexey. -DaveM
+@@ -368,19 +393,25 @@ static inline struct sock *
+ 	/* Optimize here for direct hit, only listening connections can
+ 	 * have wildcards anyways.
+ 	 */
+-	unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
+-	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
+-
++	unsigned int hash;
++	struct inet_ehash_bucket *head;
++	struct ve_struct *env;
++
++	env = get_exec_env();
++	hash = inet_ehashfn(daddr, hnum, saddr, sport, VEID(env));
++	head = inet_ehash_bucket(hashinfo, hash);
+ 	prefetch(head->chain.first);
+ 	read_lock(&head->lock);
+ 	sk_for_each(sk, node, &head->chain) {
+-		if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
++		if (INET_MATCH(sk, hash, acookie, saddr, daddr,
++					ports, dif, env))
+ 			goto hit; /* You sunk my battleship! */
+ 	}
+ 
+ 	/* Must check for a TIME_WAIT'er before going to listener hash. */
+ 	sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
+-		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
++		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr,
++					ports, dif, env))
+ 			goto hit;
+ 	}
+ 	sk = NULL;
+diff -upr linux-2.6.16.orig/include/net/inet_sock.h linux-2.6.16-026test015/include/net/inet_sock.h
+--- linux-2.6.16.orig/include/net/inet_sock.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/inet_sock.h	2006-07-04 14:41:38.000000000 +0400
+@@ -171,9 +171,10 @@ static inline void inet_sk_copy_descenda
+ extern int inet_sk_rebuild_header(struct sock *sk);
+ 
+ static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
+-					const __u32 faddr, const __u16 fport)
++					const __u32 faddr, const __u16 fport,
++					const envid_t veid)
+ {
+-	unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
++	int h = (laddr ^ lport) ^ (faddr ^ fport) ^ (veid ^ (veid >> 16));
+ 	h ^= h >> 16;
+ 	h ^= h >> 8;
+ 	return h;
+@@ -186,8 +187,9 @@ static inline int inet_sk_ehashfn(const 
+ 	const __u16 lport = inet->num;
+ 	const __u32 faddr = inet->daddr;
+ 	const __u16 fport = inet->dport;
++	envid_t veid = VEID(VE_OWNER_SK(sk));
+ 
+-	return inet_ehashfn(laddr, lport, faddr, fport);
++	return inet_ehashfn(laddr, lport, faddr, fport, veid);
+ }
+ 
+ #endif	/* _INET_SOCK_H */
+diff -upr linux-2.6.16.orig/include/net/inet_timewait_sock.h linux-2.6.16-026test015/include/net/inet_timewait_sock.h
+--- linux-2.6.16.orig/include/net/inet_timewait_sock.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/inet_timewait_sock.h	2006-07-04 14:41:38.000000000 +0400
+@@ -134,6 +134,7 @@ struct inet_timewait_sock {
+ 	unsigned long		tw_ttd;
+ 	struct inet_bind_bucket	*tw_tb;
+ 	struct hlist_node	tw_death_node;
++	envid_t			tw_owner_env;
+ };
+ 
+ static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
+diff -upr linux-2.6.16.orig/include/net/ip.h linux-2.6.16-026test015/include/net/ip.h
+--- linux-2.6.16.orig/include/net/ip.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/ip.h	2006-07-04 14:41:38.000000000 +0400
+@@ -95,6 +95,7 @@ extern int		ip_local_deliver(struct sk_b
+ extern int		ip_mr_input(struct sk_buff *skb);
+ extern int		ip_output(struct sk_buff *skb);
+ extern int		ip_mc_output(struct sk_buff *skb);
++extern int		ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+ extern int		ip_do_nat(struct sk_buff *skb);
+ extern void		ip_send_check(struct iphdr *ip);
+ extern int		ip_queue_xmit(struct sk_buff *skb, int ipfragok);
+@@ -152,15 +153,25 @@ struct ipv4_config
+ 
+ extern struct ipv4_config ipv4_config;
+ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+-#define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
+-#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
+-#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ip_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ip_statistics (get_exec_env()->_ip_statistics)
++#else
++#define ve_ip_statistics ip_statistics
++#endif
++#define IP_INC_STATS(field)		SNMP_INC_STATS(ve_ip_statistics, field)
++#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_ip_statistics, field)
++#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_ip_statistics, field)
+ DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
+-#define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
+-#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
+-#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(net_statistics, field)
+-#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
+-#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_net_statistics (get_exec_env()->_net_statistics)
++#else
++#define ve_net_statistics net_statistics
++#endif
++#define NET_INC_STATS(field)		SNMP_INC_STATS(ve_net_statistics, field)
++#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_net_statistics, field)
++#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_net_statistics, field)
++#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(ve_net_statistics, field, adnd)
++#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(ve_net_statistics, field, adnd)
+ 
+ extern int sysctl_local_port_range[2];
+ extern int sysctl_ip_default_ttl;
+@@ -380,4 +391,11 @@ extern int ip_misc_proc_init(void);
+ 
+ extern struct ctl_table ipv4_table[];
+ 
++#ifdef CONFIG_SYSCTL
++extern int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
++			void __user *buffer, size_t *lenp, loff_t *ppos);
++extern int ipv4_sysctl_forward_strategy(ctl_table *table, int __user *name,
++			int nlen, void __user *oldval, size_t __user *oldlenp,
++			 void __user *newval, size_t newlen, void **context);
++#endif
+ #endif	/* _IP_H */
+diff -upr linux-2.6.16.orig/include/net/ip6_fib.h linux-2.6.16-026test015/include/net/ip6_fib.h
+--- linux-2.6.16.orig/include/net/ip6_fib.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/ip6_fib.h	2006-07-04 14:41:39.000000000 +0400
+@@ -78,6 +78,15 @@ struct rt6_info
+ 	u8				rt6i_protocol;
+ };
+ 
++struct fib6_table
++{
++	struct list_head	list;
++	struct fib6_node	root;
++	struct ve_struct	*owner_env;
++};
++
++extern struct list_head	fib6_table_list;
++
+ struct fib6_walker_t
+ {
+ 	struct fib6_walker_t *prev, *next;
+@@ -143,7 +152,7 @@ struct rt6_statistics {
+ 
+ typedef void			(*f_pnode)(struct fib6_node *fn, void *);
+ 
+-extern struct fib6_node		ip6_routing_table;
++extern struct fib6_node		ve0_ip6_routing_table;
+ 
+ /*
+  *	exported functions
+diff -upr linux-2.6.16.orig/include/net/ip6_route.h linux-2.6.16-026test015/include/net/ip6_route.h
+--- linux-2.6.16.orig/include/net/ip6_route.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/ip6_route.h	2006-07-04 14:41:39.000000000 +0400
+@@ -139,5 +139,10 @@ static inline int ipv6_unicast_destinati
+ 	return rt->rt6i_flags & RTF_LOCAL;
+ }
+ 
++#ifdef CONFIG_VE
++int init_ve_route6(struct ve_struct *ve);
++void fini_ve_route6(struct ve_struct *ve);
++#endif
++
+ #endif
+ #endif
+diff -upr linux-2.6.16.orig/include/net/ip_fib.h linux-2.6.16-026test015/include/net/ip_fib.h
+--- linux-2.6.16.orig/include/net/ip_fib.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/ip_fib.h	2006-07-04 14:41:38.000000000 +0400
+@@ -168,10 +168,22 @@ struct fib_table {
+ 	unsigned char	tb_data[0];
+ };
+ 
++struct fn_zone;
++struct fn_hash
++{
++	struct fn_zone	*fn_zones[33];
++	struct fn_zone	*fn_zone_list;
++};
++
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ip_fib_local_table 	get_exec_env()->_local_table
++#define ip_fib_main_table 	get_exec_env()->_main_table
++#else
+ extern struct fib_table *ip_fib_local_table;
+ extern struct fib_table *ip_fib_main_table;
++#endif
+ 
+ static inline struct fib_table *fib_get_table(int id)
+ {
+@@ -203,7 +215,12 @@ static inline void fib_select_default(co
+ #define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
+ #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_tables get_exec_env()->_fib_tables
++#else
+ extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
++#endif
++
+ extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
+ extern struct fib_table *__fib_new_table(int id);
+ extern void fib_rule_put(struct fib_rule *r);
+@@ -250,10 +267,19 @@ extern u32  __fib_res_prefsrc(struct fib
+ 
+ /* Exported by fib_hash.c */
+ extern struct fib_table *fib_hash_init(int id);
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++struct ve_struct;
++extern int init_ve_route(struct ve_struct *ve);
++extern void fini_ve_route(struct ve_struct *ve);
++#else
++#define init_ve_route(ve)	(0)
++#define fini_ve_route(ve)	do { } while (0)
++#endif
+ 
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ /* Exported by fib_rules.c */
+-
++extern int fib_rules_create(void);
++extern void fib_rules_destroy(void);
+ extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
+ extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
+ extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
+diff -upr linux-2.6.16.orig/include/net/ipv6.h linux-2.6.16-026test015/include/net/ipv6.h
+--- linux-2.6.16.orig/include/net/ipv6.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/ipv6.h	2006-07-04 14:41:39.000000000 +0400
+@@ -113,39 +113,48 @@ extern int sysctl_mld_max_msf;
+ 
+ /* MIBs */
+ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+-#define IP6_INC_STATS(field)		SNMP_INC_STATS(ipv6_statistics, field)
+-#define IP6_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ipv6_statistics, field)
+-#define IP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ipv6_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv6_statistics (get_exec_env()->_ipv6_statistics)
++#define ve_icmpv6_statistics (get_exec_env()->_icmpv6_statistics)
++#define ve_udp_stats_in6 (get_exec_env()->_udp_stats_in6)
++#else
++#define ve_ipv6_statistics ipv6_statistics
++#define ve_icmpv6_statistics icmpv6_statistics
++#define ve_udp_stats_in6 udp_stats_in6
++#endif
++#define IP6_INC_STATS(field)		SNMP_INC_STATS(ve_ipv6_statistics, field)
++#define IP6_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_ipv6_statistics, field)
++#define IP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_ipv6_statistics, field)
+ DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+ #define ICMP6_INC_STATS(idev, field)		({			\
+ 	struct inet6_dev *_idev = (idev);				\
+ 	if (likely(_idev != NULL))					\
+ 		SNMP_INC_STATS(idev->stats.icmpv6, field); 		\
+-	SNMP_INC_STATS(icmpv6_statistics, field);			\
++	SNMP_INC_STATS(ve_icmpv6_statistics, field);			\
+ })
+ #define ICMP6_INC_STATS_BH(idev, field)		({			\
+ 	struct inet6_dev *_idev = (idev);				\
+ 	if (likely(_idev != NULL))					\
+ 		SNMP_INC_STATS_BH((_idev)->stats.icmpv6, field);	\
+-	SNMP_INC_STATS_BH(icmpv6_statistics, field);			\
++	SNMP_INC_STATS_BH(ve_icmpv6_statistics, field);			\
+ })
+ #define ICMP6_INC_STATS_USER(idev, field) 	({			\
+ 	struct inet6_dev *_idev = (idev);				\
+ 	if (likely(_idev != NULL))					\
+ 		SNMP_INC_STATS_USER(_idev->stats.icmpv6, field);	\
+-	SNMP_INC_STATS_USER(icmpv6_statistics, field);			\
++	SNMP_INC_STATS_USER(ve_icmpv6_statistics, field);			\
+ })
+ #define ICMP6_INC_STATS_OFFSET_BH(idev, field, offset)	({			\
+ 	struct inet6_dev *_idev = idev;						\
+ 	__typeof__(offset) _offset = (offset);					\
+ 	if (likely(_idev != NULL))						\
+ 		SNMP_INC_STATS_OFFSET_BH(_idev->stats.icmpv6, field, _offset);	\
+-	SNMP_INC_STATS_OFFSET_BH(icmpv6_statistics, field, _offset);    	\
++	SNMP_INC_STATS_OFFSET_BH(ve_icmpv6_statistics, field, _offset);    	\
+ })
+ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
+-#define UDP6_INC_STATS(field)		SNMP_INC_STATS(udp_stats_in6, field)
+-#define UDP6_INC_STATS_BH(field)	SNMP_INC_STATS_BH(udp_stats_in6, field)
+-#define UDP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_stats_in6, field)
++#define UDP6_INC_STATS(field)		SNMP_INC_STATS(ve_udp_stats_in6, field)
++#define UDP6_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_udp_stats_in6, field)
++#define UDP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_udp_stats_in6, field)
+ 
+ int snmp6_register_dev(struct inet6_dev *idev);
+ int snmp6_unregister_dev(struct inet6_dev *idev);
+@@ -154,6 +163,11 @@ int snmp6_free_dev(struct inet6_dev *ide
+ int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
+ void snmp6_mib_free(void *ptr[2]);
+ 
++#ifdef CONFIG_VE
++int ve_snmp_proc_init(void);
++void ve_snmp_proc_fini(void);
++#endif
++
+ struct ip6_ra_chain
+ {
+ 	struct ip6_ra_chain	*next;
+diff -upr linux-2.6.16.orig/include/net/ndisc.h linux-2.6.16-026test015/include/net/ndisc.h
+--- linux-2.6.16.orig/include/net/ndisc.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/ndisc.h	2006-07-04 14:41:39.000000000 +0400
+@@ -50,7 +50,14 @@ struct net_device;
+ struct net_proto_family;
+ struct sk_buff;
+ 
+-extern struct neigh_table nd_tbl;
++#ifdef CONFIG_VE
++#define nd_tbl		(*(get_exec_env()->ve_nd_tbl))
++extern int ve_ndisc_init(struct ve_struct *ve);
++extern void ve_ndisc_fini(struct ve_struct *ve);
++#else
++extern struct neigh_table global_nd_tbl;
++#define nd_tbl		global_nd_tbl
++#endif
+ 
+ struct nd_msg {
+         struct icmp6hdr	icmph;
+@@ -128,6 +135,7 @@ extern int 			ndisc_ifinfo_sysctl_change
+ extern void 			inet6_ifinfo_notify(int event,
+ 						    struct inet6_dev *idev);
+ 
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ static inline struct neighbour * ndisc_get_neigh(struct net_device *dev, struct in6_addr *addr)
+ {
+ 
+@@ -136,6 +144,7 @@ static inline struct neighbour * ndisc_g
+ 
+ 	return NULL;
+ }
++#endif
+ 
+ 
+ #endif /* __KERNEL__ */
+diff -upr linux-2.6.16.orig/include/net/neighbour.h linux-2.6.16-026test015/include/net/neighbour.h
+--- linux-2.6.16.orig/include/net/neighbour.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/neighbour.h	2006-07-04 14:41:39.000000000 +0400
+@@ -191,6 +191,8 @@ struct neigh_table
+ 	atomic_t		entries;
+ 	rwlock_t		lock;
+ 	unsigned long		last_rand;
++	struct ve_struct	*owner_env;
++	struct user_beancounter *owner_ub;
+ 	kmem_cache_t		*kmem_cachep;
+ 	struct neigh_statistics	*stats;
+ 	struct neighbour	**hash_buckets;
+@@ -210,7 +212,7 @@ struct neigh_table
+ #define NEIGH_UPDATE_F_ISROUTER			0x40000000
+ #define NEIGH_UPDATE_F_ADMIN			0x80000000
+ 
+-extern void			neigh_table_init(struct neigh_table *tbl);
++extern int			neigh_table_init(struct neigh_table *tbl);
+ extern int			neigh_table_clear(struct neigh_table *tbl);
+ extern struct neighbour *	neigh_lookup(struct neigh_table *tbl,
+ 					     const void *pkey,
+diff -upr linux-2.6.16.orig/include/net/netlink_sock.h linux-2.6.16-026test015/include/net/netlink_sock.h
+--- linux-2.6.16.orig/include/net/netlink_sock.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/net/netlink_sock.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,22 @@
++#ifndef __NET_NETLINK_SOCK_H
++#define __NET_NETLINK_SOCK_H
++
++struct netlink_sock {
++	/* struct sock has to be the first member of netlink_sock */
++	struct sock		sk;
++	u32			pid;
++	u32			dst_pid;
++	u32			dst_group;
++	u32			flags;
++	u32			subscriptions;
++	u32			ngroups;
++	unsigned long		*groups;
++	unsigned long		state;
++	wait_queue_head_t	wait;
++	struct netlink_callback	*cb;
++	spinlock_t		cb_lock;
++	void			(*data_ready)(struct sock *sk, int bytes);
++	struct module		*module;
++};
++
++#endif /* __NET_NETLINK_SOCK_H */
+diff -upr linux-2.6.16.orig/include/net/route.h linux-2.6.16-026test015/include/net/route.h
+--- linux-2.6.16.orig/include/net/route.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/route.h	2006-07-04 14:41:38.000000000 +0400
+@@ -201,4 +201,14 @@ static inline struct inet_peer *rt_get_p
+ 
+ extern ctl_table ipv4_route_table[];
+ 
++#ifdef CONFIG_SYSCTL
++extern int ipv4_flush_delay;
++extern int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
++		struct file *filp, void __user *buffer,	size_t *lenp,
++		loff_t *ppos);
++extern int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
++		int __user *name, int nlen, void __user *oldval,
++		size_t __user *oldlenp,	void __user *newval,
++		size_t newlen, void **context);
++#endif
+ #endif	/* _ROUTE_H */
+diff -upr linux-2.6.16.orig/include/net/scm.h linux-2.6.16-026test015/include/net/scm.h
+--- linux-2.6.16.orig/include/net/scm.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/scm.h	2006-07-04 14:41:38.000000000 +0400
+@@ -40,7 +40,7 @@ static __inline__ int scm_send(struct so
+ 	memset(scm, 0, sizeof(*scm));
+ 	scm->creds.uid = current->uid;
+ 	scm->creds.gid = current->gid;
+-	scm->creds.pid = current->tgid;
++	scm->creds.pid = virt_tgid(current);
+ 	if (msg->msg_controllen <= 0)
+ 		return 0;
+ 	return __scm_send(sock, msg, scm);
+diff -upr linux-2.6.16.orig/include/net/sctp/sctp.h linux-2.6.16-026test015/include/net/sctp/sctp.h
+--- linux-2.6.16.orig/include/net/sctp/sctp.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/sctp/sctp.h	2006-07-04 14:41:36.000000000 +0400
+@@ -461,12 +461,12 @@ static inline int sctp_frag_point(const 
+  * there is room for a param header too.
+  */
+ #define sctp_walk_params(pos, chunk, member)\
+-_sctp_walk_params((pos), (chunk), WORD_ROUND(ntohs((chunk)->chunk_hdr.length)), member)
++_sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
+ 
+ #define _sctp_walk_params(pos, chunk, end, member)\
+ for (pos.v = chunk->member;\
+      pos.v <= (void *)chunk + end - sizeof(sctp_paramhdr_t) &&\
+-     pos.v <= (void *)chunk + end - WORD_ROUND(ntohs(pos.p->length)) &&\
++     pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
+      ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+      pos.v += WORD_ROUND(ntohs(pos.p->length)))
+ 
+@@ -477,7 +477,7 @@ _sctp_walk_errors((err), (chunk_hdr), nt
+ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+ 	    sizeof(sctp_chunkhdr_t));\
+      (void *)err <= (void *)chunk_hdr + end - sizeof(sctp_errhdr_t) &&\
+-     (void *)err <= (void *)chunk_hdr + end - WORD_ROUND(ntohs(err->length)) &&\
++     (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
+      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
+      err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length))))
+ 
+diff -upr linux-2.6.16.orig/include/net/sctp/structs.h linux-2.6.16-026test015/include/net/sctp/structs.h
+--- linux-2.6.16.orig/include/net/sctp/structs.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/sctp/structs.h	2006-07-04 14:41:36.000000000 +0400
+@@ -702,6 +702,7 @@ struct sctp_chunk {
+ 	__u8 tsn_gap_acked;	/* Is this chunk acked by a GAP ACK? */
+ 	__s8 fast_retransmit;	 /* Is this chunk fast retransmitted? */
+ 	__u8 tsn_missing_report; /* Data chunk missing counter. */
++	__u8 data_accepted; 	/* At least 1 chunk in this packet accepted */
+ };
+ 
+ void sctp_chunk_hold(struct sctp_chunk *);
+diff -upr linux-2.6.16.orig/include/net/sock.h linux-2.6.16-026test015/include/net/sock.h
+--- linux-2.6.16.orig/include/net/sock.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/sock.h	2006-07-04 14:41:38.000000000 +0400
+@@ -55,6 +55,8 @@
+ #include <net/dst.h>
+ #include <net/checksum.h>
+ 
++#include <ub/ub_net.h>
++
+ /*
+  * This structure really needs to be cleaned up.
+  * Most of it is for TCP, and not used by any of
+@@ -251,8 +253,12 @@ struct sock {
+   	int			(*sk_backlog_rcv)(struct sock *sk,
+ 						  struct sk_buff *skb);  
+ 	void                    (*sk_destruct)(struct sock *sk);
++	struct sock_beancounter sk_bc;
++	struct ve_struct	*sk_owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(SK, struct sock, sk_owner_env)
++
+ /*
+  * Hashed lists helper routines
+  */
+@@ -485,7 +491,8 @@ static inline void sk_add_backlog(struct
+ })
+ 
+ extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
+-extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
++extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p,
++				unsigned long amount);
+ extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
+ extern int sk_stream_error(struct sock *sk, int flags, int err);
+ extern void sk_stream_kill_queues(struct sock *sk);
+@@ -706,8 +713,11 @@ static inline void sk_stream_writequeue_
+ 
+ static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
+ {
+-	return (int)skb->truesize <= sk->sk_forward_alloc ||
+-		sk_stream_mem_schedule(sk, skb->truesize, 1);
++	if ((int)skb->truesize > sk->sk_forward_alloc &&
++		!sk_stream_mem_schedule(sk, skb->truesize, 1))
++		/* The situation is bad according to mainstream. Den */
++		return 0;
++	return ub_tcprcvbuf_charge(sk, skb) == 0;
+ }
+ 
+ static inline int sk_stream_wmem_schedule(struct sock *sk, int size)
+@@ -765,6 +775,11 @@ extern struct sk_buff 		*sock_alloc_send
+ 						     unsigned long size,
+ 						     int noblock,
+ 						     int *errcode);
++extern struct sk_buff 		*sock_alloc_send_skb2(struct sock *sk,
++						     unsigned long size,
++						     unsigned long size2,
++						     int noblock,
++						     int *errcode);
+ extern void *sock_kmalloc(struct sock *sk, int size,
+ 			  gfp_t priority);
+ extern void sock_kfree_s(struct sock *sk, void *mem, int size);
+@@ -1062,12 +1077,16 @@ sk_dst_check(struct sock *sk, u32 cookie
+ 
+ static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+ {
++	extern int sysctl_tcp_use_sg;
++
+ 	__sk_dst_set(sk, dst);
+ 	sk->sk_route_caps = dst->dev->features;
+ 	if (sk->sk_route_caps & NETIF_F_TSO) {
+ 		if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
+ 			sk->sk_route_caps &= ~NETIF_F_TSO;
+ 	}
++	if (!sysctl_tcp_use_sg)
++		sk->sk_route_caps &= ~NETIF_F_SG;
+ }
+ 
+ static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
+@@ -1142,6 +1161,10 @@ static inline int sock_queue_rcv_skb(str
+ 		goto out;
+ 	}
+ 
++	err = ub_sockrcvbuf_charge(sk, skb);
++	if (err < 0)
++		goto out;
++
+ 	/* It would be deadlock, if sock_queue_rcv_skb is used
+ 	   with socket lock! We assume that users of this
+ 	   function are lock free.
+diff -upr linux-2.6.16.orig/include/net/tcp.h linux-2.6.16-026test015/include/net/tcp.h
+--- linux-2.6.16.orig/include/net/tcp.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/tcp.h	2006-07-04 14:41:39.000000000 +0400
+@@ -40,6 +40,7 @@
+ #include <net/tcp_states.h>
+ 
+ #include <linux/seq_file.h>
++#include <ub/ub_net.h>
+ 
+ extern struct inet_hashinfo tcp_hashinfo;
+ 
+@@ -219,6 +220,7 @@ extern int sysctl_tcp_nometrics_save;
+ extern int sysctl_tcp_moderate_rcvbuf;
+ extern int sysctl_tcp_tso_win_divisor;
+ extern int sysctl_tcp_abc;
++extern int sysctl_tcp_use_sg;
+ 
+ extern atomic_t tcp_memory_allocated;
+ extern atomic_t tcp_sockets_allocated;
+@@ -250,12 +252,17 @@ static inline int between(__u32 seq1, __
+ extern struct proto tcp_prot;
+ 
+ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
+-#define TCP_INC_STATS(field)		SNMP_INC_STATS(tcp_statistics, field)
+-#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
+-#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(tcp_statistics, field)
+-#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(tcp_statistics, field)
+-#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(tcp_statistics, field, val)
+-#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(tcp_statistics, field, val)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_tcp_statistics (get_exec_env()->_tcp_statistics)
++#else
++#define ve_tcp_statistics tcp_statistics
++#endif
++#define TCP_INC_STATS(field)		SNMP_INC_STATS(ve_tcp_statistics, field)
++#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_tcp_statistics, field)
++#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_tcp_statistics, field)
++#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(ve_tcp_statistics, field)
++#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ve_tcp_statistics, field, val)
++#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(ve_tcp_statistics, field, val)
+ 
+ extern void			tcp_v4_err(struct sk_buff *skb, u32);
+ 
+@@ -493,7 +500,7 @@ extern u32	__tcp_select_window(struct so
+  * to use only the low 32-bits of jiffies and hide the ugly
+  * casts with the following macro.
+  */
+-#define tcp_time_stamp		((__u32)(jiffies))
++#define tcp_time_stamp		((__u32)(jiffies + get_exec_env()->jiffies_fixup))
+ 
+ /* This is what the send packet queuing engine uses to pass
+  * TCP per-packet control information to the transmission
+diff -upr linux-2.6.16.orig/include/net/udp.h linux-2.6.16-026test015/include/net/udp.h
+--- linux-2.6.16.orig/include/net/udp.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/include/net/udp.h	2006-07-04 14:41:38.000000000 +0400
+@@ -39,13 +39,19 @@ extern rwlock_t udp_hash_lock;
+ 
+ extern int udp_port_rover;
+ 
+-static inline int udp_lport_inuse(u16 num)
++static inline int udp_hashfn(u16 num, unsigned veid)
++{
++	return ((num + (veid ^ (veid >> 16))) & (UDP_HTABLE_SIZE - 1));
++}
++
++static inline int udp_lport_inuse(u16 num, struct ve_struct *env)
+ {
+ 	struct sock *sk;
+ 	struct hlist_node *node;
+ 
+-	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
+-		if (inet_sk(sk)->num == num)
++	sk_for_each(sk, node, &udp_hash[udp_hashfn(num, VEID(env))])
++		if (inet_sk(sk)->num == num &&
++		    ve_accessible_strict(sk->sk_owner_env, env))
+ 			return 1;
+ 	return 0;
+ }
+@@ -75,9 +81,14 @@ extern unsigned int udp_poll(struct file
+ 			     poll_table *wait);
+ 
+ DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
+-#define UDP_INC_STATS(field)		SNMP_INC_STATS(udp_statistics, field)
+-#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(udp_statistics, field)
+-#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_udp_statistics (get_exec_env()->_udp_statistics)
++#else
++#define ve_udp_statistics udp_statistics
++#endif
++#define UDP_INC_STATS(field)		SNMP_INC_STATS(ve_udp_statistics, field)
++#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_udp_statistics, field)
++#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_udp_statistics, field)
+ 
+ /* /proc */
+ struct udp_seq_afinfo {
+diff -upr linux-2.6.16.orig/include/ub/beancounter.h linux-2.6.16-026test015/include/ub/beancounter.h
+--- linux-2.6.16.orig/include/ub/beancounter.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/beancounter.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,329 @@
++/*
++ *  include/ub/beancounter.h
++ *
++ *  Copyright (C) 1999-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ *  Andrey Savochkin	saw@sw-soft.com
++ *
++ */
++
++#ifndef _LINUX_BEANCOUNTER_H
++#define _LINUX_BEANCOUNTER_H
++
++#include <linux/config.h>
++
++/*
++ * Generic ratelimiting stuff.
++ */
++
++struct ub_rate_info {
++	int burst;
++	int interval; /* jiffy_t per event */
++	int bucket; /* kind of leaky bucket */
++	unsigned long last; /* last event */
++};
++
++/* Return true if rate limit permits. */
++int ub_ratelimit(struct ub_rate_info *);
++
++
++/*
++ * This magic is used to distinuish user beancounter and pages beancounter
++ * in struct page. page_ub and page_bc are placed in union and MAGIC
++ * ensures us that we don't use pbc as ubc in ub_page_uncharge().
++ */
++#define UB_MAGIC		0x62756275
++
++/*
++ *	Resource list.
++ */
++
++#define UB_KMEMSIZE	0	/* Unswappable kernel memory size including
++				 * struct task, page directories, etc.
++				 */
++#define UB_LOCKEDPAGES	1	/* Mlock()ed pages. */
++#define UB_PRIVVMPAGES	2	/* Total number of pages, counting potentially
++				 * private pages as private and used.
++				 */
++#define UB_SHMPAGES	3	/* IPC SHM segment size. */
++#define UB_ZSHMPAGES	4	/* Anonymous shared memory. */
++#define UB_NUMPROC	5	/* Number of processes. */
++#define UB_PHYSPAGES	6	/* All resident pages, for swapout guarantee. */
++#define UB_VMGUARPAGES	7	/* Guarantee for memory allocation,
++				 * checked against PRIVVMPAGES.
++				 */
++#define UB_OOMGUARPAGES	8	/* Guarantees against OOM kill.
++				 * Only limit is used, no accounting.
++				 */
++#define UB_NUMTCPSOCK	9	/* Number of TCP sockets. */
++#define UB_NUMFLOCK	10	/* Number of file locks. */
++#define UB_NUMPTY	11	/* Number of PTYs. */
++#define UB_NUMSIGINFO	12	/* Number of siginfos. */
++#define UB_TCPSNDBUF	13	/* Total size of tcp send buffers. */
++#define UB_TCPRCVBUF	14	/* Total size of tcp receive buffers. */
++#define UB_OTHERSOCKBUF	15	/* Total size of other socket
++				 * send buffers (all buffers for PF_UNIX).
++				 */
++#define UB_DGRAMRCVBUF	16	/* Total size of other socket
++				 * receive buffers.
++				 */
++#define UB_NUMOTHERSOCK	17	/* Number of other sockets. */
++#define UB_DCACHESIZE	18	/* Size of busy dentry/inode cache. */
++#define UB_NUMFILE	19	/* Number of open files. */
++
++#define UB_RESOURCES	24
++
++#define UB_UNUSEDPRIVVM	(UB_RESOURCES + 0)
++#define UB_TMPFSPAGES	(UB_RESOURCES + 1)
++#define UB_SWAPPAGES	(UB_RESOURCES + 2)
++#define UB_HELDPAGES	(UB_RESOURCES + 3)
++
++struct ubparm {
++	/* 
++	 * A barrier over which resource allocations are failed gracefully.
++	 * If the amount of consumed memory is over the barrier further sbrk()
++	 * or mmap() calls fail, the existing processes are not killed. 
++	 */
++	unsigned long	barrier;
++	/* hard resource limit */
++	unsigned long	limit;
++	/* consumed resources */
++	unsigned long	held;
++	/* maximum amount of consumed resources through the last period */
++	unsigned long	maxheld;
++	/* minimum amount of consumed resources through the last period */
++	unsigned long	minheld;
++	/* count of failed charges */
++	unsigned long	failcnt;
++};
++
++/*
++ * Kernel internal part.
++ */
++
++#ifdef __KERNEL__
++
++#include <ub/ub_debug.h>
++#include <linux/interrupt.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <linux/cache.h>
++#include <linux/threads.h>
++
++/*
++ * UB_MAXVALUE is essentially LONG_MAX declared in a cross-compiling safe form.
++ */
++#define UB_MAXVALUE	( (1UL << (sizeof(unsigned long)*8-1)) - 1)
++
++
++/*
++ *	Resource management structures
++ * Serialization issues:
++ *   beancounter list management is protected via ub_hash_lock
++ *   task pointers are set only for current task and only once
++ *   refcount is managed atomically
++ *   value and limit comparison and change are protected by per-ub spinlock
++ */
++
++struct page_beancounter;
++struct task_beancounter;
++struct sock_beancounter;
++
++struct page_private {
++	unsigned long		ubp_unused_privvmpages;
++	unsigned long		ubp_tmpfs_respages;
++	unsigned long		ubp_swap_pages;
++	unsigned long long	ubp_held_pages;
++};
++
++struct sock_private {
++	unsigned long		ubp_rmem_thres;
++	unsigned long		ubp_wmem_pressure;
++	unsigned long		ubp_maxadvmss;
++	unsigned long		ubp_rmem_pressure;
++#define UB_RMEM_EXPAND          0
++#define UB_RMEM_KEEP            1
++#define UB_RMEM_SHRINK          2
++	struct list_head	ubp_other_socks;
++	struct list_head	ubp_tcp_socks;
++	atomic_t		ubp_orphan_count;
++};
++
++struct ub_perfstat {
++	unsigned long unmap;
++	unsigned long swapin;
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	long	pages_charged;
++	long	vmalloc_charged;
++	long	pbcs;
++#endif
++} ____cacheline_aligned_in_smp;
++
++struct user_beancounter
++{
++	unsigned long		ub_magic;
++	atomic_t		ub_refcount;
++	struct			user_beancounter *ub_next;
++	spinlock_t		ub_lock;
++	uid_t			ub_uid;
++
++	struct ub_rate_info	ub_limit_rl;
++	int			ub_oom_noproc;
++
++	struct page_private	ppriv;
++#define ub_unused_privvmpages	ppriv.ubp_unused_privvmpages
++#define ub_tmpfs_respages	ppriv.ubp_tmpfs_respages
++#define ub_swap_pages		ppriv.ubp_swap_pages
++#define ub_held_pages		ppriv.ubp_held_pages
++	struct sock_private	spriv;
++#define ub_rmem_thres		spriv.ubp_rmem_thres
++#define ub_maxadvmss		spriv.ubp_maxadvmss
++#define ub_rmem_pressure	spriv.ubp_rmem_pressure
++#define ub_wmem_pressure	spriv.ubp_wmem_pressure
++#define ub_tcp_sk_list		spriv.ubp_tcp_socks
++#define ub_other_sk_list	spriv.ubp_other_socks
++#define ub_orphan_count		spriv.ubp_orphan_count
++
++	struct user_beancounter *parent;
++	void *private_data;
++
++	/* resources statistic and settings */
++	struct ubparm		ub_parms[UB_RESOURCES];
++	/* resources statistic for last interval */
++	struct ubparm		ub_store[UB_RESOURCES];
++
++	struct ub_perfstat	ub_stat[NR_CPUS];
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	struct list_head	ub_cclist;
++#endif
++};
++
++enum severity { UB_HARD, UB_SOFT, UB_FORCE };
++
++static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
++{
++	return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
++}
++
++static inline int ub_hfbarrier_hit(struct user_beancounter *ub, int resource)
++{
++	return (ub->ub_parms[resource].held > 
++		((ub->ub_parms[resource].barrier) >> 1));
++}
++
++#ifndef CONFIG_USER_RESOURCE
++
++extern inline struct user_beancounter *get_beancounter_byuid
++		(uid_t uid, int create) { return NULL; }
++extern inline struct user_beancounter *get_beancounter
++		(struct user_beancounter *ub) { return NULL; }
++extern inline void put_beancounter(struct user_beancounter *ub) {;}
++
++static inline void ub_init_cache(unsigned long mempages) { };
++static inline void ub_init_ub0(void) { };
++
++#define get_ub0()	NULL
++
++#else /* CONFIG_USER_RESOURCE */
++
++/*
++ *  Charge/uncharge operations
++ */
++
++extern int __charge_beancounter_locked(struct user_beancounter *ub,
++		int resource, unsigned long val, enum severity strict);
++
++extern void __uncharge_beancounter_locked(struct user_beancounter *ub,
++		int resource, unsigned long val);
++
++extern void __put_beancounter(struct user_beancounter *ub);
++
++extern void uncharge_warn(struct user_beancounter *ub, int resource,
++		unsigned long val, unsigned long held);
++
++extern const char *ub_rnames[];
++/*
++ *	Put a beancounter reference
++ */
++
++static inline void put_beancounter(struct user_beancounter *ub)
++{
++	if (unlikely(ub == NULL))
++		return;
++
++	__put_beancounter(ub);
++}
++
++/*
++ *	Create a new beancounter reference
++ */
++extern struct user_beancounter *get_beancounter_byuid(uid_t uid, int create);
++
++static inline 
++struct user_beancounter *get_beancounter(struct user_beancounter *ub)
++{
++	if (unlikely(ub == NULL))
++		return NULL;
++
++	atomic_inc(&ub->ub_refcount);
++	return ub;
++}
++
++extern struct user_beancounter *get_subbeancounter_byid(
++		struct user_beancounter *,
++		int id, int create);
++extern struct user_beancounter *subbeancounter_findcreate(
++		struct user_beancounter *p, int id);
++
++extern struct user_beancounter ub0;
++
++extern void ub_init_cache(unsigned long);
++extern void ub_init_ub0(void);
++#define get_ub0()	(&ub0)
++
++extern void print_ub_uid(struct user_beancounter *ub, char *buf, int size);
++
++/*
++ *	Resource charging
++ * Change user's account and compare against limits
++ */
++
++static inline void ub_adjust_maxheld(struct user_beancounter *ub, int resource)
++{
++	if (ub->ub_parms[resource].maxheld < ub->ub_parms[resource].held)
++		ub->ub_parms[resource].maxheld = ub->ub_parms[resource].held;
++	if (ub->ub_parms[resource].minheld > ub->ub_parms[resource].held)
++		ub->ub_parms[resource].minheld = ub->ub_parms[resource].held;
++}
++
++#endif /* CONFIG_USER_RESOURCE */
++
++#include <ub/ub_decl.h>
++UB_DECLARE_FUNC(int, charge_beancounter(struct user_beancounter *ub,
++			int resource, unsigned long val, enum severity strict));
++UB_DECLARE_VOID_FUNC(uncharge_beancounter(struct user_beancounter *ub,
++			int resource, unsigned long val));
++
++UB_DECLARE_VOID_FUNC(charge_beancounter_notop(struct user_beancounter *ub,
++			int resource, unsigned long val));
++UB_DECLARE_VOID_FUNC(uncharge_beancounter_notop(struct user_beancounter *ub,
++			int resource, unsigned long val));
++
++#ifndef CONFIG_USER_RESOURCE_PROC
++static inline void ub_init_proc(void) { };
++#else
++extern void ub_init_proc(void);
++#endif
++
++#ifdef CONFIG_USER_RSS_ACCOUNTING
++extern void ub_init_pbc(void);
++#else
++static inline void ub_ini_pbc(void) { }
++#endif
++#endif /* __KERNEL__ */
++#endif /* _LINUX_BEANCOUNTER_H */
+diff -upr linux-2.6.16.orig/include/ub/ub_dcache.h linux-2.6.16-026test015/include/ub/ub_dcache.h
+--- linux-2.6.16.orig/include/ub/ub_dcache.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_dcache.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,57 @@
++/*
++ *  include/ub/ub_dcache.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DCACHE_H_
++#define __UB_DCACHE_H_
++
++#include <ub/ub_decl.h>
++
++/*
++ * UB_DCACHESIZE accounting
++ */
++
++struct dentry_beancounter
++{
++	/*
++	 *  d_inuse =
++	 *         <number of external refs> +
++	 *         <number of 'used' childs>
++	 *
++	 * d_inuse == -1 means that dentry is unused
++	 * state change -1 => 0 causes charge
++	 * state change 0 => -1 causes uncharge
++	 */
++	atomic_t d_inuse;
++	/* charged size, including name length if name is not inline */
++	unsigned long d_ubsize;
++	struct user_beancounter *d_ub;
++};
++
++struct dentry;
++
++UB_DECLARE_FUNC(int, ub_dentry_alloc(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_charge_nofail(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_uncharge(struct dentry *d))
++
++#ifdef CONFIG_USER_RESOURCE
++UB_DECLARE_FUNC(int, ub_dentry_charge(struct dentry *d))
++#define ub_dget_testone(d)	(atomic_inc_and_test(&(d)->dentry_bc.d_inuse))
++#define ub_dput_testzero(d)	(atomic_add_negative(-1, &(d)->dentry_bc.d_inuse))
++#define INUSE_INIT		0
++#else
++#define ub_dentry_charge(d)	({			\
++			spin_unlock(&d->d_lock);	\
++			rcu_read_unlock();		\
++			0;				\
++		})
++#define ub_dget_testone(d)	(0)
++#define ub_dput_testzero(d)	(0)
++#endif
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_debug.h linux-2.6.16-026test015/include/ub/ub_debug.h
+--- linux-2.6.16.orig/include/ub/ub_debug.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_debug.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,95 @@
++/*
++ *  include/ub/ub_debug.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DEBUG_H_
++#define __UB_DEBUG_H_
++
++/*
++ * general debugging
++ */
++
++#define UBD_ALLOC	0x1
++#define UBD_CHARGE	0x2
++#define UBD_LIMIT	0x4
++#define UBD_TRACE	0x8
++
++/*
++ * ub_net debugging
++ */
++
++#define UBD_NET_SOCKET	0x10
++#define UBD_NET_SLEEP	0x20
++#define UBD_NET_SEND	0x40
++#define UBD_NET_RECV	0x80
++
++/*
++ * Main routines
++ */
++
++#define UB_DEBUG (0)
++#define DEBUG_RESOURCE (0ULL)
++
++#define ub_dbg_cond(__cond, __str, args...)				\
++	do { 								\
++		if ((__cond) != 0)					\
++			printk(__str, ##args);				\
++	} while(0)
++
++#define ub_debug(__section, __str, args...) 				\
++	ub_dbg_cond(UB_DEBUG & (__section), __str, ##args)
++
++#define ub_debug_resource(__resource, __str, args...)			\
++	ub_dbg_cond((UB_DEBUG & UBD_CHARGE) && 				\
++			(DEBUG_RESOURCE & (1 << (__resource))), 	\
++			__str, ##args)
++
++#if UB_DEBUG & UBD_TRACE
++#define ub_debug_trace(__cond, __b, __r)				\
++		do {							\
++			static struct ub_rate_info ri =	{ __b, __r };	\
++			if ((__cond) != 0 && ub_ratelimit(&ri))		\
++				dump_stack(); 				\
++		} while(0)
++#else
++#define ub_debug_trace(__cond, __burst, __rate)
++#endif
++
++#include <linux/config.h>
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++#include <linux/list.h>
++#include <linux/kmem_cache.h>
++
++struct user_beancounter;
++struct ub_cache_counter {
++	struct list_head ulist;
++	struct ub_cache_counter *next;
++	struct user_beancounter *ub;
++	kmem_cache_t *cachep;
++	unsigned long counter;
++};
++
++extern spinlock_t cc_lock;
++extern void init_cache_counters(void);
++extern void ub_free_counters(struct user_beancounter *);
++extern void ub_kmemcache_free(kmem_cache_t *cachep);
++
++struct vm_struct;
++extern void inc_vmalloc_charged(struct vm_struct *, int);
++extern void dec_vmalloc_charged(struct vm_struct *);
++#else
++#define init_cache_counters()		do { } while (0)
++#define inc_vmalloc_charged(vm, f)	do { } while (0)
++#define dec_vmalloc_charged(vm)		do { } while (0)
++#define ub_free_counters(ub)		do { } while (0)
++#define ub_kmemcache_free(cachep)	do { } while (0)
++#endif
++
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_decl.h linux-2.6.16-026test015/include/ub/ub_decl.h
+--- linux-2.6.16.orig/include/ub/ub_decl.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_decl.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,40 @@
++/*
++ *  include/ub/ub_decl.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DECL_H_
++#define __UB_DECL_H_
++
++#include <linux/config.h>
++
++/*
++ * Naming convension:
++ * ub_<section|object>_<operation>
++ */
++
++#ifdef CONFIG_USER_RESOURCE
++
++#define UB_DECLARE_FUNC(ret_type, decl)	extern ret_type decl;
++#define UB_DECLARE_VOID_FUNC(decl)	extern void decl;
++
++#else /* CONFIG_USER_RESOURCE */
++
++#define UB_DECLARE_FUNC(ret_type, decl)		\
++	static inline ret_type decl		\
++	{					\
++		return (ret_type)0;		\
++	}
++#define UB_DECLARE_VOID_FUNC(decl)		\
++	static inline void decl			\
++	{					\
++	}
++
++#endif /* CONFIG_USER_RESOURCE */
++
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_hash.h linux-2.6.16-026test015/include/ub/ub_hash.h
+--- linux-2.6.16.orig/include/ub/ub_hash.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_hash.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,41 @@
++/*
++ *  include/ub/ub_hash.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_UBHASH_H
++#define _LINUX_UBHASH_H
++
++#ifdef __KERNEL__
++
++#define UB_HASH_SIZE 256
++
++struct ub_hash_slot {
++	struct user_beancounter *ubh_beans;
++};
++
++extern struct ub_hash_slot ub_hash[];
++extern spinlock_t ub_hash_lock;
++
++#ifdef CONFIG_USER_RESOURCE
++
++/*
++ * Iterate over beancounters
++ * @__slot  - hash slot
++ * @__ubp - beancounter ptr
++ * Can use break :)
++ */
++#define for_each_beancounter(__slot, __ubp)				\
++	for (__slot = 0, __ubp = NULL; 					\
++		__slot < UB_HASH_SIZE && __ubp == NULL; __slot++)	\
++		 for (__ubp = ub_hash[__slot].ubh_beans; __ubp;		\
++				 __ubp = __ubp->ub_next)
++
++#endif /* CONFIG_USER_RESOURCE */
++#endif /* __KERNEL__ */
++#endif /* _LINUX_UBHASH_H */
+diff -upr linux-2.6.16.orig/include/ub/ub_mem.h linux-2.6.16-026test015/include/ub/ub_mem.h
+--- linux-2.6.16.orig/include/ub/ub_mem.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_mem.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,76 @@
++/*
++ *  include/ub/ub_mem.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_SLAB_H_
++#define __UB_SLAB_H_
++
++#include <linux/config.h>
++#include <linux/kmem_slab.h>
++#include <ub/beancounter.h>
++#include <ub/ub_decl.h>
++
++/*
++ * UB_KMEMSIZE accounting
++ */
++
++#ifdef CONFIG_UBC_DEBUG_ITEMS
++#define CHARGE_ORDER(__o)		(1 << __o)
++#define CHARGE_SIZE(__s)		1
++#else
++#define CHARGE_ORDER(__o)		(PAGE_SIZE << (__o))
++#define CHARGE_SIZE(__s)		(__s)
++#endif
++
++#define page_ub(__page)	((__page)->bc.page_ub)
++
++struct mm_struct;
++struct page;
++
++UB_DECLARE_FUNC(struct user_beancounter *, slab_ub(void *obj))
++UB_DECLARE_FUNC(struct user_beancounter *, vmalloc_ub(void *obj))
++UB_DECLARE_FUNC(struct user_beancounter *, mem_ub(void *obj))
++
++UB_DECLARE_FUNC(int, ub_page_charge(struct page *page, int order, int mask))
++UB_DECLARE_VOID_FUNC(ub_page_uncharge(struct page *page, int order))
++UB_DECLARE_FUNC(int, ub_slab_charge(void *objp, int flags))
++UB_DECLARE_VOID_FUNC(ub_slab_uncharge(void *obj))
++
++#define slab_ubcs(cachep, slabp) ((struct user_beancounter **)\
++		(ALIGN((unsigned long)(slab_bufctl(slabp) + (cachep)->num),\
++		       sizeof(void *))))
++
++#ifdef CONFIG_USER_RESOURCE
++extern struct user_beancounter *ub_select_worst(long *);
++
++/* mm/slab.c needed stuff */
++#define UB_ALIGN(flags)		(flags & SLAB_UBC ? sizeof(void *) : 1)
++#define UB_EXTRA(flags)		(flags & SLAB_UBC ? sizeof(void *) : 0)
++#define set_cache_objuse(cachep)	do {				\
++		(cachep)->objuse = ((PAGE_SIZE << (cachep)->gfporder) +	\
++				(cachep)->num - 1) / (cachep)->num;	\
++		if (!OFF_SLAB(cachep))					\
++			break;						\
++		(cachep)->objuse += ((cachep)->slabp_cache->objuse +	\
++				(cachep)->num - 1) / (cachep)->num;	\
++	} while (0)
++#define init_slab_ubps(cachep, slabp)	do {				\
++		if (!((cachep)->flags & SLAB_UBC))			\
++			break;						\
++		memset(slab_ubcs(cachep, slabp), 0,			\
++				(cachep)->num * sizeof(void *));	\
++	} while (0)
++#define kmem_obj_memusage(o)	(virt_to_cache(o)->objuse)
++#else
++#define UB_ALIGN(flags)		1
++#define UB_EXTRA(flags)		0
++#define set_cache_objuse(c)	do { } while (0)
++#define init_slab_ubps(c, s)	do { } while (0)
++#endif
++#endif /* __UB_SLAB_H_ */
+diff -upr linux-2.6.16.orig/include/ub/ub_misc.h linux-2.6.16-026test015/include/ub/ub_misc.h
+--- linux-2.6.16.orig/include/ub/ub_misc.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_misc.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,54 @@
++/*
++ *  include/ub/ub_misc.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_MISC_H_
++#define __UB_MISC_H_
++
++#include <ub/ub_decl.h>
++
++struct tty_struct;
++struct file;
++struct file_lock;
++struct sigqueue;
++
++UB_DECLARE_FUNC(int, ub_file_charge(struct file *f))
++UB_DECLARE_VOID_FUNC(ub_file_uncharge(struct file *f))
++UB_DECLARE_FUNC(int, ub_flock_charge(struct file_lock *fl, int hard))
++UB_DECLARE_VOID_FUNC(ub_flock_uncharge(struct file_lock *fl))
++UB_DECLARE_FUNC(int, ub_siginfo_charge(struct sigqueue *q,
++			struct user_beancounter *ub))
++UB_DECLARE_VOID_FUNC(ub_siginfo_uncharge(struct sigqueue *q))
++UB_DECLARE_FUNC(int, ub_task_charge(struct task_struct *parent,
++			struct task_struct *task))
++UB_DECLARE_VOID_FUNC(ub_task_uncharge(struct task_struct *task))
++UB_DECLARE_FUNC(int, ub_pty_charge(struct tty_struct *tty))
++UB_DECLARE_VOID_FUNC(ub_pty_uncharge(struct tty_struct *tty))
++
++#ifdef CONFIG_USER_RESOURCE
++#define set_flock_charged(fl)	do { (fl)->fl_charged = 1; } while (0)
++#define unset_flock_charged(fl)	do {		\
++		WARN_ON((fl)->fl_charged == 0);	\
++		(fl)->fl_charged = 0;		\
++	} while (0)
++#define set_mm_ub(mm, tsk)	do {				\
++		(mm)->mm_ub = get_beancounter(tsk ? 		\
++			tsk->task_bc.task_ub : get_exec_ub());	\
++	} while (0)
++#define put_mm_ub(mm)		do {				\
++		put_beancounter((mm)->mm_ub);			\
++		(mm)->mm_ub = NULL;				\
++	} while (0)
++#else
++#define set_flock_charged(fl)	do { } while (0)
++#define ubset_flock_charged(fl)	do { } while (0)
++#define set_mm_ub(mm, tsk)	do { } while (0)
++#define put_mm_ub(mm)		do { } while (0)
++#endif
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_net.h linux-2.6.16-026test015/include/ub/ub_net.h
+--- linux-2.6.16.orig/include/ub/ub_net.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_net.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,141 @@
++/*
++ *  include/ub/ub_net.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_NET_H_
++#define __UB_NET_H_
++
++/*
++ * UB_NUMXXXSOCK, UB_XXXBUF accounting
++ */
++
++#include <ub/ub_decl.h>
++#include <ub/ub_sk.h>
++
++#define bid2sid(__bufid) \
++	((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
++
++#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
++			~(SMP_CACHE_BYTES-1)))
++#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
++
++
++#define IS_TCP_SOCK(__family, __type) \
++		(((__family) == PF_INET || (__family) == PF_INET6) && (__type) == SOCK_STREAM)
++
++UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
++UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk)) 
++UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask))
++UB_DECLARE_VOID_FUNC(ub_skb_free_bc(struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
++UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_VOID_FUNC(ub_sock_snd_queue_add(struct sock *sk, int resource, 
++			unsigned long size))
++UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo, 
++			unsigned long size))
++
++UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge_forced(struct sock *sk,
++						struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge_forced(struct sock *sk,
++						struct sk_buff *skb))
++
++/* Charge size */
++static inline unsigned long skb_charge_datalen(unsigned long chargesize)
++{
++#ifdef CONFIG_USER_RESOURCE
++	unsigned long slabsize;
++
++	chargesize -= sizeof(struct sk_buff);
++	slabsize = 64;
++	do { 
++		slabsize <<= 1; 
++	} while (slabsize <= chargesize);
++
++	slabsize >>= 1;
++	return (slabsize - sizeof(struct skb_shared_info)) &
++		~(SMP_CACHE_BYTES-1);
++#else
++	return 0;
++#endif
++}
++
++static inline unsigned long skb_charge_size_gen(unsigned long size)
++{ 
++#ifdef CONFIG_USER_RESOURCE
++	unsigned int slabsize;
++
++	size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
++	slabsize = 32; /* min size is 64 because of skb_shared_info */
++	do { 
++		slabsize <<= 1; 
++	} while (slabsize < size);
++
++	return slabsize + sizeof(struct sk_buff);
++#else
++	return 0;
++#endif
++
++}
++	
++static inline unsigned long skb_charge_size_const(unsigned long size)
++{
++#ifdef CONFIG_USER_RESOURCE
++	unsigned int ret;
++	if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
++		ret = 64 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
++		ret = 128 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
++		ret = 256 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
++		ret = 512 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
++		ret = 1024 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
++		ret = 2048 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
++		ret = 4096 + sizeof(struct sk_buff);
++	else
++		ret = skb_charge_size_gen(size);
++	return ret;
++#else
++	return 0;
++#endif
++}
++
++
++#define skb_charge_size(__size)			\
++	(__builtin_constant_p(__size)	?	\
++	 skb_charge_size_const(__size)	:	\
++	 skb_charge_size_gen(__size))
++
++UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
++UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb, 
++			struct sock *sk, unsigned long size, int res))
++
++/* Poll reserv */
++UB_DECLARE_FUNC(int, ub_sock_makewres_other(struct sock *sk, unsigned long sz))
++UB_DECLARE_FUNC(int, ub_sock_makewres_tcp(struct sock *sk, unsigned long size))
++UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk, unsigned long size))
++UB_DECLARE_FUNC(int, ub_sock_getwres_tcp(struct sock *sk, unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk, unsigned long size,
++			unsigned long ressize))
++UB_DECLARE_VOID_FUNC(ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
++			unsigned long ressize))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_other(struct sock *sk, 
++			unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
++
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_orphan.h linux-2.6.16-026test015/include/ub/ub_orphan.h
+--- linux-2.6.16.orig/include/ub/ub_orphan.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_orphan.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,56 @@
++/*
++ *  include/ub/ub_orphan.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_ORPHAN_H_
++#define __UB_ORPHAN_H_
++
++#include <net/tcp.h>
++
++#include "ub/beancounter.h"
++#include "ub/ub_net.h"
++
++
++static inline atomic_t *__ub_get_orphan_count_ptr(struct sock *sk)
++{
++#ifdef CONFIG_USER_RESOURCE
++	if (sock_has_ubc(sk))
++		return &sock_bc(sk)->ub->ub_orphan_count;
++#endif
++	return sk->sk_prot->orphan_count;
++}
++
++static inline void ub_inc_orphan_count(struct sock *sk)
++{
++	atomic_inc(__ub_get_orphan_count_ptr(sk));
++}
++
++static inline void ub_dec_orphan_count(struct sock *sk)
++{
++	atomic_dec(__ub_get_orphan_count_ptr(sk));
++}
++
++static inline int ub_get_orphan_count(struct sock *sk)
++{
++	return atomic_read(__ub_get_orphan_count_ptr(sk));
++}
++
++extern int __ub_too_many_orphans(struct sock *sk, int count);
++static inline int ub_too_many_orphans(struct sock *sk, int count)
++{
++#ifdef CONFIG_USER_RESOURCE
++	if (__ub_too_many_orphans(sk, count))
++		return 1;
++#endif
++	return (ub_get_orphan_count(sk) > sysctl_tcp_max_orphans ||
++		(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
++		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
++}
++
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_page.h linux-2.6.16-026test015/include/ub/ub_page.h
+--- linux-2.6.16.orig/include/ub/ub_page.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_page.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,48 @@
++/*
++ *  include/ub/ub_page.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_PAGE_H_
++#define __UB_PAGE_H_
++
++#include <linux/config.h>
++
++/*
++ * Page_beancounters
++ */
++
++struct page;
++struct user_beancounter;
++
++#define PB_MAGIC 0x62700001UL
++
++struct page_beancounter {
++	unsigned long pb_magic;
++	struct page *page;
++	struct user_beancounter *ub;
++	struct page_beancounter *next_hash;
++	unsigned refcount;
++	struct list_head page_list;
++};
++
++#define PB_REFCOUNT_BITS 24
++#define PB_SHIFT_GET(c) ((c) >> PB_REFCOUNT_BITS)
++#define PB_SHIFT_INC(c) ((c) += (1 << PB_REFCOUNT_BITS))
++#define PB_SHIFT_DEC(c) ((c) -= (1 << PB_REFCOUNT_BITS))
++#define PB_COUNT_GET(c) ((c) & ((1 << PB_REFCOUNT_BITS) - 1))
++#define PB_COUNT_INC(c) ((c)++)
++#define PB_COUNT_DEC(c) ((c)--)
++#define PB_REFCOUNT_MAKE(s, c) (((s) << PB_REFCOUNT_BITS) + (c))
++
++#define page_pbc(__page)        ((__page)->bc.page_pb)
++
++struct address_space;
++extern int is_shmem_mapping(struct address_space *);
++
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_sk.h linux-2.6.16-026test015/include/ub/ub_sk.h
+--- linux-2.6.16.orig/include/ub/ub_sk.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_sk.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,43 @@
++/*
++ *  include/ub/ub_sk.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_SK_H_
++#define __UB_SK_H_
++
++#include <linux/config.h>
++#include <ub/ub_task.h>
++
++struct sock;
++struct sk_buff;
++
++struct skb_beancounter {
++	struct user_beancounter *ub;
++	unsigned long charged:27, resource:5;
++};
++
++struct sock_beancounter {
++	/*
++	 * already charged for future sends, to make poll work;
++	 * changes are protected by bc spinlock, read is under socket
++	 * semaphore for sends and unprotected in poll
++	 */
++	unsigned long           poll_reserv;
++	unsigned long           ub_waitspc;     /* space waiting for */
++	unsigned long           ub_wcharged;
++	struct list_head        ub_sock_list;
++	struct user_beancounter *ub;
++};
++
++#define sock_bc(__sk)		(&(__sk)->sk_bc)
++#define skb_bc(__skb)		(&(__skb)->skb_bc)
++#define skbc_sock(__skbc)	(container_of(__skbc, struct sock, sk_bc))
++#define sock_has_ubc(__sk)	(sock_bc(__sk)->ub != NULL)
++
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_stat.h linux-2.6.16-026test015/include/ub/ub_stat.h
+--- linux-2.6.16.orig/include/ub/ub_stat.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_stat.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,70 @@
++/*
++ *  include/ub/ub_stat.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_STAT_H_
++#define __UB_STAT_H_
++
++/* sys_ubstat commands list */
++#define UBSTAT_READ_ONE			0x010000
++#define UBSTAT_READ_ALL			0x020000
++#define UBSTAT_READ_FULL		0x030000
++#define UBSTAT_UBLIST			0x040000
++#define UBSTAT_UBPARMNUM		0x050000
++#define UBSTAT_GETTIME			0x060000
++
++#define UBSTAT_CMD(func)		((func) & 0xF0000)
++#define UBSTAT_PARMID(func)		((func) & 0x0FFFF)
++
++#define TIME_MAX_SEC		(LONG_MAX / HZ)
++#define TIME_MAX_JIF		(TIME_MAX_SEC * HZ)
++
++typedef unsigned long ubstattime_t;
++
++typedef struct {
++	ubstattime_t	start_time;
++	ubstattime_t	end_time;
++	ubstattime_t	cur_time;
++} ubgettime_t;
++
++typedef struct {
++	long		maxinterval;
++	int		signum;
++} ubnotifrq_t;
++
++typedef struct {
++	unsigned long	maxheld;
++	unsigned long	failcnt;
++} ubstatparm_t;
++
++typedef struct {
++	unsigned long	barrier;
++	unsigned long	limit;
++	unsigned long	held;
++	unsigned long	maxheld;
++	unsigned long	minheld;
++	unsigned long	failcnt;
++	unsigned long __unused1;
++	unsigned long __unused2;
++} ubstatparmf_t;
++
++typedef struct {
++	ubstattime_t	start_time;
++	ubstattime_t	end_time;
++	ubstatparmf_t	param[0];
++} ubstatfull_t;
++
++#ifdef __KERNEL__
++struct ub_stat_notify {
++	struct list_head	list;
++	struct task_struct	*task;
++	int			signum;
++};
++#endif
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_task.h linux-2.6.16-026test015/include/ub/ub_task.h
+--- linux-2.6.16.orig/include/ub/ub_task.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_task.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,49 @@
++/*
++ *  include/ub/ub_task.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_TASK_H_
++#define __UB_TASK_H_
++
++#include <linux/config.h>
++
++struct user_beancounter;
++
++
++#ifdef CONFIG_USER_RESOURCE
++
++struct task_beancounter {
++	struct user_beancounter	*exec_ub;
++	struct user_beancounter	*task_ub;
++	struct user_beancounter *fork_sub;
++	void *task_fnode, *task_freserv;
++	unsigned long oom_generation;
++	unsigned long task_data[4];
++};
++
++#define get_exec_ub()		(current->task_bc.exec_ub)
++#define get_task_ub(__task)	((__task)->task_bc.task_ub)
++#define set_exec_ub(__newub)		\
++({					\
++	struct user_beancounter *old;	\
++	struct task_beancounter *tbc;	\
++	tbc = &current->task_bc;	\
++	old = tbc->exec_ub;		\
++	tbc->exec_ub = __newub;		\
++	old;				\
++})
++
++#else /* CONFIG_USER_RESOURCE */
++
++#define get_exec_ub()		(NULL)
++#define get_task_ub(task)	(NULL)
++#define set_exec_ub(__ub)	(NULL)
++
++#endif /* CONFIG_USER_RESOURCE */
++#endif /* __UB_TASK_H_ */
+diff -upr linux-2.6.16.orig/include/ub/ub_tcp.h linux-2.6.16-026test015/include/ub/ub_tcp.h
+--- linux-2.6.16.orig/include/ub/ub_tcp.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_tcp.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,79 @@
++/*
++ *  include/ub/ub_tcp.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_TCP_H_
++#define __UB_TCP_H_
++
++/*
++ * UB_NUMXXXSOCK, UB_XXXBUF accounting
++ */
++
++#include <ub/ub_sk.h>
++#include <ub/beancounter.h>
++
++static inline void ub_tcp_update_maxadvmss(struct sock *sk)
++{
++#ifdef CONFIG_USER_RESOURCE
++	if (!sock_has_ubc(sk))
++		return;
++	if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
++		return;
++
++	sock_bc(sk)->ub->ub_maxadvmss =
++		skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
++				+ sizeof(struct tcphdr)	+ tcp_sk(sk)->advmss);
++#endif
++}
++
++static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
++{
++	if (tcp_memory_pressure)
++		return 0;
++#ifdef CONFIG_USER_RESOURCE
++	if (sock_has_ubc(sk)) {
++		struct user_beancounter *ub;
++
++		ub = sock_bc(sk)->ub;
++		if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
++			return 1;
++		if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
++			return 0;
++		return sk->sk_rcvbuf <= ub->ub_rmem_thres;
++	}
++#endif
++	return 1;
++}
++
++static inline int ub_tcp_memory_pressure(struct sock *sk)
++{
++	if (tcp_memory_pressure)
++		return 1;
++#ifdef CONFIG_USER_RESOURCE
++	if (sock_has_ubc(sk))
++		return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
++#endif
++	return 0;
++}
++
++static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
++{
++	if (tcp_memory_pressure)
++		return 1;
++#ifdef CONFIG_USER_RESOURCE
++	if (sock_has_ubc(sk))
++		return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
++#endif
++	return 0;
++}
++
++UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
++
++#endif
+diff -upr linux-2.6.16.orig/include/ub/ub_vmpages.h linux-2.6.16-026test015/include/ub/ub_vmpages.h
+--- linux-2.6.16.orig/include/ub/ub_vmpages.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/include/ub/ub_vmpages.h	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,167 @@
++/*
++ *  include/ub/ub_vmpages.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_PAGES_H_
++#define __UB_PAGES_H_
++
++#include <linux/linkage.h>
++#include <linux/config.h>
++#include <ub/beancounter.h>
++#include <ub/ub_decl.h>
++
++/*
++ * Check whether vma has private or copy-on-write mapping.
++ * Should match checks in ub_protected_charge().
++ */
++#define VM_UB_PRIVATE(__flags, __file)					\
++		( ((__flags) & VM_WRITE) ?				\
++			(__file) == NULL || !((__flags) & VM_SHARED) :	\
++			0						\
++		)
++
++/* Mprotect charging result */
++#define PRIVVM_ERROR		-1
++#define PRIVVM_NO_CHARGE	 0 /* UB_DECLARE_FUNC retval with ubc off */
++#define PRIVVM_TO_PRIVATE	 1
++#define PRIVVM_TO_SHARED	 2
++
++UB_DECLARE_FUNC(int, ub_protected_charge(struct mm_struct *mm,
++			unsigned long size,
++			unsigned long newflags,
++			struct vm_area_struct *vma))
++
++UB_DECLARE_VOID_FUNC(ub_unused_privvm_add(struct mm_struct *mm,
++			struct vm_area_struct *vma,
++			unsigned long num))
++#define ub_unused_privvm_inc(mm, vma)	ub_unused_privvm_add(mm, vma, 1)
++UB_DECLARE_VOID_FUNC(ub_unused_privvm_sub(struct mm_struct *mm,
++			struct vm_area_struct *vma,
++			unsigned long num))
++#define ub_unused_privvm_dec(mm, vma)	ub_unused_privvm_sub(mm, vma, 1)
++
++UB_DECLARE_VOID_FUNC(__ub_unused_privvm_dec(struct mm_struct *mm,
++			long sz))
++
++UB_DECLARE_FUNC(int, ub_memory_charge(struct mm_struct *mm,
++			unsigned long size,
++			unsigned vm_flags,
++			struct file *vm_file,
++			int strict))
++UB_DECLARE_VOID_FUNC(ub_memory_uncharge(struct mm_struct *mm,
++			unsigned long size,
++			unsigned vm_flags,
++			struct file *vm_file))
++
++struct shmem_inode_info;
++UB_DECLARE_FUNC(int, ub_shmpages_charge(struct shmem_inode_info *i,
++			unsigned long sz))
++UB_DECLARE_VOID_FUNC(ub_shmpages_uncharge(struct shmem_inode_info *i,
++			unsigned long sz))
++UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_inc(struct shmem_inode_info *shi))
++UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
++			unsigned long size))
++#define ub_tmpfs_respages_dec(shi)	ub_tmpfs_respages_sub(shi, 1)
++
++#ifdef CONFIG_USER_RESOURCE
++#define shmi_ub_set(shi, ub)	do {			\
++		(shi)->shmi_ub = get_beancounter(ub);	\
++	} while (0)
++#define shmi_ub_put(shi)	do {			\
++		put_beancounter((shi)->shmi_ub);	\
++		(shi)->shmi_ub = NULL;			\
++	} while (0)
++#else
++#define shmi_ub_set(shi, ub)	do { } while (0)
++#define shmi_ub_put(shi)	do { } while (0)
++#endif
++
++UB_DECLARE_FUNC(int, ub_locked_charge(struct mm_struct *mm,
++			unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_locked_uncharge(struct mm_struct *mm,
++			unsigned long size))
++UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi,
++			unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi,
++			unsigned long size))
++
++UB_DECLARE_FUNC(unsigned long, pages_in_vma_range(struct vm_area_struct *vma,
++			unsigned long addr, unsigned long end))
++UB_DECLARE_VOID_FUNC(warn_bad_rss(struct vm_area_struct *vma,
++			unsigned long freed))
++#define pages_in_vma(vma)	(pages_in_vma_range(vma, \
++			vma->vm_start, vma->vm_end))
++
++#define UB_PAGE_WEIGHT_SHIFT 24
++#define UB_PAGE_WEIGHT (1 << UB_PAGE_WEIGHT_SHIFT)
++
++struct page_beancounter;
++#define PBC_COPY_SAME	((struct page_beancounter *) 1)
++
++/* Mprotect charging result */
++#define PRIVVM_ERROR		-1
++#define PRIVVM_NO_CHARGE	0
++#define PRIVVM_TO_PRIVATE	1
++#define PRIVVM_TO_SHARED	2
++
++extern void fastcall __ub_update_physpages(struct user_beancounter *ub);
++extern void fastcall __ub_update_oomguarpages(struct user_beancounter *ub);
++extern void fastcall __ub_update_privvm(struct user_beancounter *ub);
++
++#ifdef CONFIG_USER_RSS_ACCOUNTING
++#define PB_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
++#define PB_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
++#else
++#define PB_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
++#define PB_DECLARE_VOID_FUNC(decl)	static inline void decl { }
++#endif
++
++PB_DECLARE_FUNC(int, pb_alloc(struct page_beancounter **pbc))
++PB_DECLARE_FUNC(int, pb_alloc_list(struct page_beancounter **pbc, int num))
++PB_DECLARE_FUNC(int, pb_alloc_all(struct page_beancounter **pbc))
++PB_DECLARE_VOID_FUNC(pb_add_ref(struct page *page,
++			struct mm_struct *mm,
++			struct page_beancounter **pbc))
++PB_DECLARE_VOID_FUNC(pb_dup_ref(struct page *page, 
++			struct mm_struct *mm, 
++			struct page_beancounter **pbc))
++PB_DECLARE_VOID_FUNC(pb_free_list(struct page_beancounter **pb))
++PB_DECLARE_VOID_FUNC(pb_free(struct page_beancounter **pb))
++PB_DECLARE_VOID_FUNC(pb_remove_ref(struct page *page, 
++			struct mm_struct *mm))
++
++PB_DECLARE_FUNC(struct user_beancounter *, pb_grab_page_ub(struct page *page))
++#endif
++
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++#define SWP_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
++#define SWP_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
++#else
++#define SWP_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
++#define SWP_DECLARE_VOID_FUNC(decl)	static inline void decl { }
++#endif
++
++struct swap_info_struct;
++SWP_DECLARE_FUNC(int, ub_swap_init(struct swap_info_struct *si, pgoff_t n))
++SWP_DECLARE_VOID_FUNC(ub_swap_fini(struct swap_info_struct *si))
++SWP_DECLARE_VOID_FUNC(ub_swapentry_inc(struct swap_info_struct *si, pgoff_t n,
++			struct user_beancounter *ub))
++SWP_DECLARE_VOID_FUNC(ub_swapentry_dec(struct swap_info_struct *si, pgoff_t n))
++
++#ifdef CONFIG_USER_RESOURCE
++#define ub_unmap_inc(mm)	do { 					\
++		(mm)->mm_ub->ub_stat[smp_processor_id()].unmap++;	\
++	} while (0)
++#define ub_swapin_inc(mm)	do {					\
++		(mm)->mm_ub->ub_stat[smp_processor_id()].swapin++;	\
++	} while (0)
++#else
++#define ub_unmap_inc(mm)	do { } while (0)
++#define ub_swapin_inc(mm)	do { } while (0)
++#endif
+diff -upr linux-2.6.16.orig/init/calibrate.c linux-2.6.16-026test015/init/calibrate.c
+--- linux-2.6.16.orig/init/calibrate.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/init/calibrate.c	2006-07-04 14:41:38.000000000 +0400
+@@ -7,6 +7,7 @@
+ #include <linux/sched.h>
+ #include <linux/delay.h>
+ #include <linux/init.h>
++#include <linux/module.h>
+ 
+ #include <asm/timex.h>
+ 
+@@ -105,6 +106,60 @@ static unsigned long __devinit calibrate
+ static unsigned long __devinit calibrate_delay_direct(void) {return 0;}
+ #endif
+ 
++unsigned long cycles_per_jiffy, cycles_per_clock;
++
++static __devinit void calibrate_cycles(void)
++{
++	unsigned long ticks;
++	cycles_t time;
++
++	ticks = jiffies;
++	while (ticks == jiffies)
++		/* nothing */;
++	time = get_cycles();
++	ticks = jiffies;
++	while (ticks == jiffies)
++		/* nothing */;
++
++	time = get_cycles() - time;
++	cycles_per_jiffy = time;
++	if ((time >> 32) != 0) {
++		printk("CPU too fast! timings are incorrect\n");
++		cycles_per_jiffy = -1;
++	}
++}
++
++EXPORT_SYMBOL(cycles_per_jiffy);
++EXPORT_SYMBOL(cycles_per_clock);
++
++static __devinit void calc_cycles_per_jiffy(void)
++{
++#if defined(__i386__)
++	extern unsigned long fast_gettimeoffset_quotient;
++	unsigned long low, high;
++
++	if (fast_gettimeoffset_quotient != 0) {
++		__asm__("divl %2"
++				:"=a" (low), "=d" (high)
++				:"r" (fast_gettimeoffset_quotient),
++				"0" (0), "1" (1000000/HZ));
++
++		cycles_per_jiffy = low;
++	}
++#endif
++	if (cycles_per_jiffy == 0)
++		calibrate_cycles();
++
++	if (cycles_per_jiffy == 0) {
++		printk(KERN_WARNING "Cycles are stuck! "
++				"Some VPS statistics will not be available.");
++		/* to prevent division by zero in cycles_to_(clocks|jiffies) */
++		cycles_per_jiffy = 1;
++		cycles_per_clock = 1;
++	} else
++		cycles_per_clock = cycles_per_jiffy * (HZ / CLOCKS_PER_SEC);
++}
++
+ /*
+  * This is the number of bits of precision for the loops_per_jiffy.  Each
+  * bit takes on average 1.5/HZ seconds.  This (like the original) is a little
+@@ -170,4 +225,5 @@ void __devinit calibrate_delay(void)
+ 			loops_per_jiffy);
+ 	}
+ 
++	calc_cycles_per_jiffy();
+ }
+diff -upr linux-2.6.16.orig/init/main.c linux-2.6.16-026test015/init/main.c
+--- linux-2.6.16.orig/init/main.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/init/main.c	2006-07-04 14:41:39.000000000 +0400
+@@ -48,6 +48,8 @@
+ #include <linux/mempolicy.h>
+ #include <linux/key.h>
+ 
++#include <ub/beancounter.h>
++
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+ #include <asm/setup.h>
+@@ -80,6 +82,7 @@ extern void sbus_init(void);
+ extern void sysctl_init(void);
+ extern void signals_init(void);
+ extern void buffer_init(void);
++extern void fairsched_init_late(void);
+ extern void pidhash_init(void);
+ extern void pidmap_init(void);
+ extern void prio_tree_init(void);
+@@ -104,6 +107,24 @@ extern void tc_init(void);
+ enum system_states system_state;
+ EXPORT_SYMBOL(system_state);
+ 
++#ifdef CONFIG_VE
++extern void init_ve_system(void);
++extern void prepare_ve0_process(struct task_struct *tsk);
++extern void prepare_ve0_proc_root(void);
++extern void prepare_ve0_sysctl(void);
++#else
++#define init_ve_system()		do { } while (0)
++#define prepare_ve0_process(tsk)	do { } while (0)
++#define prepare_ve0_proc_root()		do { } while (0)
++#define prepare_ve0_sysctl()		do { } while (0)
++#endif
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++extern void prepare_ve0_loopback(void);
++#else
++#define prepare_ve0_loopback()		do { } while (0)
++#endif
++
+ /*
+  * Boot command-line arguments
+  */
+@@ -447,6 +468,10 @@ asmlinkage void __init start_kernel(void
+  * enable them
+  */
+ 	lock_kernel();
++	/*
++	 * Prepare ub0 to account early allocations if any
++	 */
++	ub_init_ub0();
+ 	page_address_init();
+ 	printk(KERN_NOTICE);
+ 	printk(linux_banner);
+@@ -459,6 +484,8 @@ asmlinkage void __init start_kernel(void
+ 	 */
+ 	smp_prepare_boot_cpu();
+ 
++	prepare_ve0_process(&init_task);
++
+ 	/*
+ 	 * Set up the scheduler prior starting any interrupts (such as the
+ 	 * timer interrupt). Full topology setup happens at smp_init()
+@@ -524,6 +551,7 @@ asmlinkage void __init start_kernel(void
+ #endif
+ 	fork_init(num_physpages);
+ 	proc_caches_init();
++	ub_init_cache(num_physpages);
+ 	buffer_init();
+ 	unnamed_dev_init();
+ 	key_init();
+@@ -534,7 +562,10 @@ asmlinkage void __init start_kernel(void
+ 	/* rootfs populating might need page-writeback */
+ 	page_writeback_init();
+ #ifdef CONFIG_PROC_FS
++	prepare_ve0_proc_root();
++	prepare_ve0_sysctl();
+ 	proc_root_init();
++	ub_init_proc();
+ #endif
+ 	cpuset_init();
+ 
+@@ -542,6 +573,10 @@ asmlinkage void __init start_kernel(void
+ 
+ 	acpi_early_init(); /* before LAPIC and SMP init */
+ 
++#ifdef CONFIG_USER_RSS_ACCOUNTING
++	ub_init_pbc();
++#endif
++
+ 	/* Do the rest non-__init'ed, we're now alive */
+ 	rest_init();
+ }
+@@ -603,6 +638,9 @@ static void __init do_initcalls(void)
+  */
+ static void __init do_basic_setup(void)
+ {
++	prepare_ve0_loopback();
++	init_ve_system();
++
+ 	/* drivers will send hotplug events */
+ 	init_workqueues();
+ 	usermodehelper_init();
+@@ -618,7 +656,7 @@ static void __init do_basic_setup(void)
+ static void do_pre_smp_initcalls(void)
+ {
+ 	extern int spawn_ksoftirqd(void);
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
+ 	extern int migration_init(void);
+ 
+ 	migration_init();
+@@ -674,6 +712,12 @@ static int init(void * unused)
+ 
+ 	fixup_cpu_present_map();
+ 	smp_init();
++
++	/* 
++	 * This should be done after all cpus are known to
++	 * be online.  smp_init gives us confidence in it.
++	 */
++	fairsched_init_late();
+ 	sched_init_smp();
+ 
+ 	cpuset_init_smp();
+diff -upr linux-2.6.16.orig/init/version.c linux-2.6.16-026test015/init/version.c
+--- linux-2.6.16.orig/init/version.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/init/version.c	2006-07-04 14:41:38.000000000 +0400
+@@ -28,6 +28,12 @@ struct new_utsname system_utsname = {
+ 
+ EXPORT_SYMBOL(system_utsname);
+ 
++struct new_utsname virt_utsname = {
++	/* we need only this field */
++	.release        = UTS_RELEASE,
++};
++EXPORT_SYMBOL(virt_utsname);
++
+ const char linux_banner[] =
+ 	"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ 	LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
+diff -upr linux-2.6.16.orig/ipc/mqueue.c linux-2.6.16-026test015/ipc/mqueue.c
+--- linux-2.6.16.orig/ipc/mqueue.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/ipc/mqueue.c	2006-07-04 14:41:37.000000000 +0400
+@@ -639,7 +639,8 @@ static int oflag2acc[O_ACCMODE] = { MAY_
+ 		return ERR_PTR(-EINVAL);
+ 	}
+ 
+-	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
++	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE],
++			       NULL, NULL)) {
+ 		dput(dentry);
+ 		mntput(mqueue_mnt);
+ 		return ERR_PTR(-EACCES);
+diff -upr linux-2.6.16.orig/ipc/msg.c linux-2.6.16-026test015/ipc/msg.c
+--- linux-2.6.16.orig/ipc/msg.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/ipc/msg.c	2006-07-04 14:41:39.000000000 +0400
+@@ -88,6 +88,45 @@ void __init msg_init (void)
+ 				sysvipc_msg_proc_show);
+ }
+ 
++#ifdef CONFIG_VE
++void __init prepare_msg(void)
++{
++	get_ve0()->_msg_ids = &msg_ids;
++	get_ve0()->_msg_ctlmax = msg_ctlmax;
++	get_ve0()->_msg_ctlmnb = msg_ctlmnb;
++	get_ve0()->_msg_ctlmni = msg_ctlmni;
++}
++
++#define msg_ids		(*(get_exec_env()->_msg_ids))
++#define msg_ctlmax	(get_exec_env()->_msg_ctlmax)
++#define msg_ctlmnb	(get_exec_env()->_msg_ctlmnb)
++#define msg_ctlmni	(get_exec_env()->_msg_ctlmni)
++
++void init_ve_ipc_msg(void)
++{
++	msg_ctlmax = MSGMAX;
++	msg_ctlmnb = MSGMNB;
++	msg_ctlmni = MSGMNI;
++	ipc_init_ids(&msg_ids, MSGMNI);
++}
++
++void cleanup_ve_ipc_msg(void)
++{
++	int i;
++	struct msg_queue *msq;
++
++	down(&msg_ids.sem);
++	for (i = 0; i <= msg_ids.max_id; i++) {
++		msq = msg_lock(i);
++		if (msq == NULL)
++			continue;
++
++		freeque(msq, i);
++	}
++	up(&msg_ids.sem);
++}
++#endif
++
+ static int newque (key_t key, int msgflg)
+ {
+ 	int id;
+@@ -108,7 +147,7 @@ static int newque (key_t key, int msgflg
+ 		return retval;
+ 	}
+ 
+-	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
++	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni, -1);
+ 	if(id == -1) {
+ 		security_msg_queue_free(msq);
+ 		ipc_rcu_putref(msq);
+@@ -450,7 +489,7 @@ asmlinkage long sys_msgctl (int msqid, i
+ 	ipcp = &msq->q_perm;
+ 	err = -EPERM;
+ 	if (current->euid != ipcp->cuid && 
+-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
++	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN))
+ 	    /* We _could_ check for CAP_CHOWN above, but we don't */
+ 		goto out_unlock_up;
+ 
+@@ -540,7 +579,7 @@ static inline int pipelined_send(struct 
+ 				msr->r_msg = ERR_PTR(-E2BIG);
+ 			} else {
+ 				msr->r_msg = NULL;
+-				msq->q_lrpid = msr->r_tsk->pid;
++				msq->q_lrpid = virt_pid(msr->r_tsk);
+ 				msq->q_rtime = get_seconds();
+ 				wake_up_process(msr->r_tsk);
+ 				smp_mb();
+@@ -622,7 +661,7 @@ asmlinkage long sys_msgsnd (int msqid, s
+ 		}
+ 	}
+ 
+-	msq->q_lspid = current->tgid;
++	msq->q_lspid = virt_tgid(current);
+ 	msq->q_stime = get_seconds();
+ 
+ 	if(!pipelined_send(msq,msg)) {
+@@ -718,7 +757,7 @@ asmlinkage long sys_msgrcv (int msqid, s
+ 			list_del(&msg->m_list);
+ 			msq->q_qnum--;
+ 			msq->q_rtime = get_seconds();
+-			msq->q_lrpid = current->tgid;
++			msq->q_lrpid = virt_tgid(current);
+ 			msq->q_cbytes -= msg->m_ts;
+ 			atomic_sub(msg->m_ts,&msg_bytes);
+ 			atomic_dec(&msg_hdrs);
+@@ -833,3 +872,27 @@ static int sysvipc_msg_proc_show(struct 
+ 			  msq->q_ctime);
+ }
+ #endif
++
++#if defined(CONFIG_VZ_CHECKPOINT) || defined(CONFIG_VZ_CHECKPOINT_MODULE)
++#include <linux/module.h>
++
++int sysvipc_walk_msg(int (*func)(int i, struct msg_queue*, void *), void *arg)
++{
++	int i;
++	int err = 0;
++	struct msg_queue * msq;
++
++	down(&msg_ids.sem);
++	for(i = 0; i <= msg_ids.max_id; i++) {
++		if ((msq = msg_lock(i)) == NULL)
++			continue;
++		err = func(msg_buildid(i,msq->q_perm.seq), msq, arg);
++		msg_unlock(msq);
++		if (err)
++			break;
++	}
++	up(&msg_ids.sem);
++	return err;
++}
++EXPORT_SYMBOL_GPL(sysvipc_walk_msg);
++#endif
+diff -upr linux-2.6.16.orig/ipc/msgutil.c linux-2.6.16-026test015/ipc/msgutil.c
+--- linux-2.6.16.orig/ipc/msgutil.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/ipc/msgutil.c	2006-07-04 14:41:37.000000000 +0400
+@@ -17,6 +17,8 @@
+ 
+ #include "util.h"
+ 
++#include <ub/ub_mem.h>
++
+ struct msg_msgseg {
+ 	struct msg_msgseg* next;
+ 	/* the next part of the message follows immediately */
+@@ -36,7 +38,7 @@ struct msg_msg *load_msg(const void __us
+ 	if (alen > DATALEN_MSG)
+ 		alen = DATALEN_MSG;
+ 
+-	msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
++	msg = (struct msg_msg *)ub_kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
+ 	if (msg == NULL)
+ 		return ERR_PTR(-ENOMEM);
+ 
+@@ -56,7 +58,7 @@ struct msg_msg *load_msg(const void __us
+ 		alen = len;
+ 		if (alen > DATALEN_SEG)
+ 			alen = DATALEN_SEG;
+-		seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
++		seg = (struct msg_msgseg *)ub_kmalloc(sizeof(*seg) + alen,
+ 						 GFP_KERNEL);
+ 		if (seg == NULL) {
+ 			err = -ENOMEM;
+diff -upr linux-2.6.16.orig/ipc/sem.c linux-2.6.16-026test015/ipc/sem.c
+--- linux-2.6.16.orig/ipc/sem.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/ipc/sem.c	2006-07-04 14:41:39.000000000 +0400
+@@ -78,6 +78,7 @@
+ #include <asm/uaccess.h>
+ #include "util.h"
+ 
++#include <ub/ub_mem.h>
+ 
+ #define sem_lock(id)	((struct sem_array*)ipc_lock(&sem_ids,id))
+ #define sem_unlock(sma)	ipc_unlock(&(sma)->sem_perm)
+@@ -88,7 +89,7 @@
+ 	ipc_buildid(&sem_ids, id, seq)
+ static struct ipc_ids sem_ids;
+ 
+-static int newary (key_t, int, int);
++static int newary (key_t, int, int, int);
+ static void freeary (struct sem_array *sma, int id);
+ #ifdef CONFIG_PROC_FS
+ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
+@@ -124,6 +125,48 @@ void __init sem_init (void)
+ 				sysvipc_sem_proc_show);
+ }
+ 
++#ifdef CONFIG_VE
++void __init prepare_sem(void)
++{
++	get_ve0()->_sem_ids = &sem_ids;
++	get_ve0()->_used_sems = used_sems;
++	get_ve0()->_sem_ctls[0] = sem_ctls[0];
++	get_ve0()->_sem_ctls[1] = sem_ctls[1];
++	get_ve0()->_sem_ctls[2] = sem_ctls[2];
++	get_ve0()->_sem_ctls[3] = sem_ctls[3];
++}
++
++#define sem_ids		(*(get_exec_env()->_sem_ids))
++#define used_sems	(get_exec_env()->_used_sems)
++#define sem_ctls	(get_exec_env()->_sem_ctls)
++
++void init_ve_ipc_sem(void)
++{
++	used_sems = 0;
++	sem_ctls[0] = SEMMSL;
++	sem_ctls[1] = SEMMNS;
++	sem_ctls[2] = SEMOPM;
++	sem_ctls[3] = SEMMNI;
++	ipc_init_ids(&sem_ids, SEMMNI);
++}
++
++void cleanup_ve_ipc_sem(void)
++{
++	int i;
++	struct sem_array *sma;
++
++	down(&sem_ids.sem);
++	for (i = 0; i <= sem_ids.max_id; i++) {
++		sma = sem_lock(i);
++		if (sma == NULL)
++			continue;
++
++		freeary(sma, i);
++	}
++	up(&sem_ids.sem);
++}
++#endif
++
+ /*
+  * Lockless wakeup algorithm:
+  * Without the check/retry algorithm a lockless wakeup is possible:
+@@ -158,7 +201,7 @@ void __init sem_init (void)
+  */
+ #define IN_WAKEUP	1
+ 
+-static int newary (key_t key, int nsems, int semflg)
++static int newary (key_t key, int semid, int nsems, int semflg)
+ {
+ 	int id;
+ 	int retval;
+@@ -187,7 +230,7 @@ static int newary (key_t key, int nsems,
+ 		return retval;
+ 	}
+ 
+-	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
++	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni, semid);
+ 	if(id == -1) {
+ 		security_sem_free(sma);
+ 		ipc_rcu_putref(sma);
+@@ -217,12 +260,12 @@ asmlinkage long sys_semget (key_t key, i
+ 	down(&sem_ids.sem);
+ 	
+ 	if (key == IPC_PRIVATE) {
+-		err = newary(key, nsems, semflg);
++		err = newary(key, -1, nsems, semflg);
+ 	} else if ((id = ipc_findkey(&sem_ids, key)) == -1) {  /* key not used */
+ 		if (!(semflg & IPC_CREAT))
+ 			err = -ENOENT;
+ 		else
+-			err = newary(key, nsems, semflg);
++			err = newary(key, -1, nsems, semflg);
+ 	} else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
+ 		err = -EEXIST;
+ 	} else {
+@@ -743,7 +786,7 @@ static int semctl_main(int semid, int se
+ 		for (un = sma->undo; un; un = un->id_next)
+ 			un->semadj[semnum] = 0;
+ 		curr->semval = val;
+-		curr->sempid = current->tgid;
++		curr->sempid = virt_tgid(current);
+ 		sma->sem_ctime = get_seconds();
+ 		/* maybe some queued-up processes were waiting for this */
+ 		update_queue(sma);
+@@ -823,7 +866,7 @@ static int semctl_down(int semid, int se
+ 	ipcp = &sma->sem_perm;
+ 	
+ 	if (current->euid != ipcp->cuid && 
+-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
++	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN)) {
+ 	    	err=-EPERM;
+ 		goto out_unlock;
+ 	}
+@@ -944,7 +987,8 @@ static inline int get_undo_list(struct s
+ 	undo_list = current->sysvsem.undo_list;
+ 	if (!undo_list) {
+ 		size = sizeof(struct sem_undo_list);
+-		undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
++		undo_list = (struct sem_undo_list *) ub_kmalloc(size,
++				GFP_KERNEL);
+ 		if (undo_list == NULL)
+ 			return -ENOMEM;
+ 		memset(undo_list, 0, size);
+@@ -1008,7 +1052,8 @@ static struct sem_undo *find_undo(int se
+ 	ipc_rcu_getref(sma);
+ 	sem_unlock(sma);
+ 
+-	new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
++	new = (struct sem_undo *) ub_kmalloc(sizeof(struct sem_undo) +
++			sizeof(short)*nsems, GFP_KERNEL);
+ 	if (!new) {
+ 		ipc_lock_by_ptr(&sma->sem_perm);
+ 		ipc_rcu_putref(sma);
+@@ -1066,7 +1111,7 @@ asmlinkage long sys_semtimedop(int semid
+ 	if (nsops > sc_semopm)
+ 		return -E2BIG;
+ 	if(nsops > SEMOPM_FAST) {
+-		sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
++		sops = ub_kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
+ 		if(sops==NULL)
+ 			return -ENOMEM;
+ 	}
+@@ -1150,7 +1195,7 @@ retry_undos:
+ 	queue.sops = sops;
+ 	queue.nsops = nsops;
+ 	queue.undo = un;
+-	queue.pid = current->tgid;
++	queue.pid = virt_tgid(current);
+ 	queue.id = semid;
+ 	queue.alter = alter;
+ 	if (alter)
+@@ -1320,7 +1365,7 @@ found:
+ 					sem->semval = 0;
+ 				if (sem->semval > SEMVMX)
+ 					sem->semval = SEMVMX;
+-				sem->sempid = current->tgid;
++				sem->sempid = virt_tgid(current);
+ 			}
+ 		}
+ 		sma->sem_otime = get_seconds();
+@@ -1351,3 +1396,48 @@ static int sysvipc_sem_proc_show(struct 
+ 			  sma->sem_ctime);
+ }
+ #endif
++
++#if defined(CONFIG_VZ_CHECKPOINT) || defined(CONFIG_VZ_CHECKPOINT_MODULE)
++#include <linux/module.h>
++
++int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg)
++{
++	int err = 0;
++	struct sem_array *sma;
++
++	down(&sem_ids.sem);
++	sma = sem_lock(semid);
++	if (!sma) {
++		err = newary(key, semid, size, semflg);
++		if (err >= 0)
++			sma = sem_lock(semid);
++	}
++	if (sma)
++		sem_unlock(sma);
++	up(&sem_ids.sem);
++
++	return err > 0 ? 0 : err;
++}
++EXPORT_SYMBOL_GPL(sysvipc_setup_sem);
++
++int sysvipc_walk_sem(int (*func)(int i, struct sem_array*, void *), void *arg)
++{
++	int i;
++	int err = 0;
++	struct sem_array *sma;
++
++	down(&sem_ids.sem);
++	for (i = 0; i <= sem_ids.max_id; i++) {
++		if ((sma = sem_lock(i)) == NULL)
++			continue;
++		err = func(sem_buildid(i,sma->sem_perm.seq), sma, arg);
++		sem_unlock(sma);
++		if (err)
++			break;
++	}
++	up(&sem_ids.sem);
++	return err;
++}
++EXPORT_SYMBOL_GPL(sysvipc_walk_sem);
++EXPORT_SYMBOL_GPL(exit_sem);
++#endif
+diff -upr linux-2.6.16.orig/ipc/shm.c linux-2.6.16-026test015/ipc/shm.c
+--- linux-2.6.16.orig/ipc/shm.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/ipc/shm.c	2006-07-04 14:41:39.000000000 +0400
+@@ -30,9 +30,13 @@
+ #include <linux/capability.h>
+ #include <linux/ptrace.h>
+ #include <linux/seq_file.h>
++#include <linux/shmem_fs.h>
+ 
+ #include <asm/uaccess.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
+ #include "util.h"
+ 
+ static struct file_operations shm_file_operations;
+@@ -46,9 +50,11 @@ static struct ipc_ids shm_ids;
+ #define shm_buildid(id, seq) \
+ 	ipc_buildid(&shm_ids, id, seq)
+ 
+-static int newseg (key_t key, int shmflg, size_t size);
++static int newseg (key_t key, int shmid, int shmflg, size_t size);
+ static void shm_open (struct vm_area_struct *shmd);
+ static void shm_close (struct vm_area_struct *shmd);
++static void shm_destroy (struct shmid_kernel *shmd);
++static void do_shm_rmid(struct shmid_kernel *shp);
+ #ifdef CONFIG_PROC_FS
+ static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
+ #endif
+@@ -68,6 +74,68 @@ void __init shm_init (void)
+ 				sysvipc_shm_proc_show);
+ }
+ 
++#ifdef CONFIG_VE
++void __init prepare_shm(void)
++{
++	get_ve0()->_shm_ids = &shm_ids;
++	get_ve0()->_shm_ctlmax = shm_ctlmax;
++	get_ve0()->_shm_ctlall = shm_ctlall;
++	get_ve0()->_shm_ctlmni = shm_ctlmni;
++	get_ve0()->_shm_tot = shm_tot;
++}
++
++#define shm_ids		(*(get_exec_env()->_shm_ids))
++#define shm_ctlmax	(get_exec_env()->_shm_ctlmax)
++#define shm_ctlall	(get_exec_env()->_shm_ctlall)
++#define shm_ctlmni	(get_exec_env()->_shm_ctlmni)
++#define shm_total	(get_exec_env()->_shm_tot)
++
++void init_ve_ipc_shm(void)
++{
++	shm_ctlmax = SHMMAX;
++	shm_ctlall = SHMALL;
++	shm_ctlmni = SHMMNI;
++	shm_total = 0;
++	ipc_init_ids(&shm_ids, 1);
++}
++
++void cleanup_ve_ipc_shm(void)
++{
++	int i;
++	struct shmid_kernel *shp;
++
++	down(&shm_ids.sem);
++	for (i = 0; i <= shm_ids.max_id; i++) {
++		shp = shm_lock(i);
++		if (shp == NULL)
++			continue;
++
++		do_shm_rmid(shp);
++	}
++	up(&shm_ids.sem);
++}
++#define sb_ve(sb)		VE_OWNER_FSTYPE(sb->s_type)
++#define shm_total_sb(sb)	(&sb_ve(sb)->_shm_tot)
++#define shm_lock_sb(id, sb)	((struct shmid_kernel *) \
++		ipc_lock(sb_ve(sb)->_shm_ids, id))
++#else
++/* renamed since there is a struct field named shm_tot */
++#define shm_total		shm_tot
++#define shm_total_sb(sb)	(&shm_tot)
++#define shm_lock_sb(id, sb)	shm_lock(id)
++#endif
++
++static void do_shm_rmid(struct shmid_kernel *shp)
++{
++	if (shp->shm_nattch){
++		shp->shm_perm.mode |= SHM_DEST;
++		/* Do not find it any more */
++		shp->shm_perm.key = IPC_PRIVATE;
++		shm_unlock(shp);
++	} else
++		shm_destroy (shp);
++}
++
+ static inline int shm_checkid(struct shmid_kernel *s, int id)
+ {
+ 	if (ipc_checkid(&shm_ids,&s->shm_perm,id))
+@@ -75,25 +143,25 @@ static inline int shm_checkid(struct shm
+ 	return 0;
+ }
+ 
+-static inline struct shmid_kernel *shm_rmid(int id)
++static inline struct shmid_kernel *shm_rmid(struct ipc_ids *ids, int id)
+ {
+-	return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
++	return (struct shmid_kernel *)ipc_rmid(ids,id);
+ }
+ 
+-static inline int shm_addid(struct shmid_kernel *shp)
++static inline int shm_addid(struct shmid_kernel *shp, int reqid)
+ {
+-	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
++	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni, reqid);
+ }
+ 
+ 
+ 
+-static inline void shm_inc (int id) {
++static inline void shm_inc(int id, struct super_block *sb) {
+ 	struct shmid_kernel *shp;
+ 
+-	if(!(shp = shm_lock(id)))
++	if(!(shp = shm_lock_sb(id, sb)))
+ 		BUG();
+ 	shp->shm_atim = get_seconds();
+-	shp->shm_lprid = current->tgid;
++	shp->shm_lprid = virt_tgid(current);
+ 	shp->shm_nattch++;
+ 	shm_unlock(shp);
+ }
+@@ -101,7 +169,50 @@ static inline void shm_inc (int id) {
+ /* This is called by fork, once for every shm attach. */
+ static void shm_open (struct vm_area_struct *shmd)
+ {
+-	shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
++	shm_inc(shmd->vm_file->f_dentry->d_inode->i_ino,
++			shmd->vm_file->f_dentry->d_inode->i_sb);
++}
++
++static int shmem_lock(struct shmid_kernel *shp, int lock,
++		struct user_struct *user)
++{
++	struct file *file = shp->shm_file;
++	struct inode *inode = file->f_dentry->d_inode;
++	struct shmem_inode_info *info = SHMEM_I(inode);
++	unsigned long size;
++
++	size = shp->shm_segsz + PAGE_SIZE - 1;
++
++#ifdef CONFIG_SHMEM
++	spin_lock(&info->lock);
++	if (lock && !(info->flags & VM_LOCKED)) {
++		if (ub_lockedshm_charge(info, size) < 0)
++			goto out_ch;
++
++		if (!user_shm_lock(inode->i_size, user))
++			goto out_user;
++		info->flags |= VM_LOCKED;
++	}
++	if (!lock && (info->flags & VM_LOCKED) && user) {
++		ub_lockedshm_uncharge(info, size);
++		user_shm_unlock(inode->i_size, user);
++		info->flags &= ~VM_LOCKED;
++	}
++	spin_unlock(&info->lock);
++	return 0;
++
++out_user:
++	ub_lockedshm_uncharge(info, size);
++out_ch:
++	spin_unlock(&info->lock);
++	return -ENOMEM;
++#else
++	if (lock && ub_lockedshm_charge(info, size))
++		return -ENOMEM;
++	if (!lock)
++		ub_lockedshm_uncharge(info, size);
++	return 0;
++#endif
+ }
+ 
+ /*
+@@ -114,15 +225,24 @@ static void shm_open (struct vm_area_str
+  */
+ static void shm_destroy (struct shmid_kernel *shp)
+ {
+-	shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-	shm_rmid (shp->id);
++	int numpages, *shm_totalp;
++	struct file *f;
++	struct super_block *sb;
++
++	f = shp->shm_file;
++	sb = f->f_dentry->d_inode->i_sb;
++	numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	shm_totalp = shm_total_sb(sb);
++	*shm_totalp -= numpages;
++
++	shm_rmid (shp->_shm_ids, shp->id);
+ 	shm_unlock(shp);
+ 	if (!is_file_hugepages(shp->shm_file))
+-		shmem_lock(shp->shm_file, 0, shp->mlock_user);
++		shmem_lock(shp, 0, shp->mlock_user);
+ 	else
+ 		user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
+ 						shp->mlock_user);
+-	fput (shp->shm_file);
++	fput(f);
+ 	security_shm_free(shp);
+ 	ipc_rcu_putref(shp);
+ }
+@@ -138,12 +258,24 @@ static void shm_close (struct vm_area_st
+ 	struct file * file = shmd->vm_file;
+ 	int id = file->f_dentry->d_inode->i_ino;
+ 	struct shmid_kernel *shp;
++	struct super_block *sb;
++	struct ipc_ids *ids;
++#ifdef CONFIG_VE
++	struct ve_struct *ve;
++
++	sb = file->f_dentry->d_inode->i_sb;
++	ve = get_ve(sb_ve(sb));
++	ids = ve->_shm_ids;
++#else
++	sb = file->f_dentry->d_inode->i_sb;
++	ids = &shm_ids;
++#endif
+ 
+-	down (&shm_ids.sem);
++	down (&ids->sem);
+ 	/* remove from the list of attaches of the shm segment */
+-	if(!(shp = shm_lock(id)))
++	if(!(shp = shm_lock_sb(id, sb)))
+ 		BUG();
+-	shp->shm_lprid = current->tgid;
++	shp->shm_lprid = virt_tgid(current);
+ 	shp->shm_dtim = get_seconds();
+ 	shp->shm_nattch--;
+ 	if(shp->shm_nattch == 0 &&
+@@ -151,7 +283,10 @@ static void shm_close (struct vm_area_st
+ 		shm_destroy (shp);
+ 	else
+ 		shm_unlock(shp);
+-	up (&shm_ids.sem);
++	up(&ids->sem);
++#ifdef CONFIG_VE
++	put_ve(ve);
++#endif
+ }
+ 
+ static int shm_mmap(struct file * file, struct vm_area_struct * vma)
+@@ -161,7 +296,10 @@ static int shm_mmap(struct file * file, 
+ 	ret = shmem_mmap(file, vma);
+ 	if (ret == 0) {
+ 		vma->vm_ops = &shm_vm_ops;
+-		shm_inc(file->f_dentry->d_inode->i_ino);
++		if (!(vma->vm_flags & VM_WRITE))
++			vma->vm_flags &= ~VM_MAYWRITE;
++		shm_inc(file->f_dentry->d_inode->i_ino,
++				file->f_dentry->d_inode->i_sb);
+ 	}
+ 
+ 	return ret;
+@@ -184,19 +322,19 @@ static struct vm_operations_struct shm_v
+ #endif
+ };
+ 
+-static int newseg (key_t key, int shmflg, size_t size)
++static int newseg (key_t key, int shmid, int shmflg, size_t size)
+ {
+ 	int error;
+ 	struct shmid_kernel *shp;
+ 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ 	struct file * file;
+-	char name[13];
++	char name[26];
+ 	int id;
+ 
+ 	if (size < SHMMIN || size > shm_ctlmax)
+ 		return -EINVAL;
+ 
+-	if (shm_tot + numpages >= shm_ctlall)
++	if (shm_total + numpages >= shm_ctlall)
+ 		return -ENOSPC;
+ 
+ 	shp = ipc_rcu_alloc(sizeof(*shp));
+@@ -227,7 +365,11 @@ static int newseg (key_t key, int shmflg
+ 		if  ((shmflg & SHM_NORESERVE) &&
+ 				sysctl_overcommit_memory != OVERCOMMIT_NEVER)
+ 			acctflag = 0;
++#ifdef CONFIG_VE
++		sprintf (name, "VE%d.SYSV%08x", get_exec_env()->veid, key);
++#else
+ 		sprintf (name, "SYSV%08x", key);
++#endif
+ 		file = shmem_file_setup(name, size, acctflag);
+ 	}
+ 	error = PTR_ERR(file);
+@@ -235,17 +377,18 @@ static int newseg (key_t key, int shmflg
+ 		goto no_file;
+ 
+ 	error = -ENOSPC;
+-	id = shm_addid(shp);
++	id = shm_addid(shp, shmid);
+ 	if(id == -1) 
+ 		goto no_id;
+ 
+-	shp->shm_cprid = current->tgid;
++	shp->shm_cprid = virt_tgid(current);
+ 	shp->shm_lprid = 0;
+ 	shp->shm_atim = shp->shm_dtim = 0;
+ 	shp->shm_ctim = get_seconds();
+ 	shp->shm_segsz = size;
+ 	shp->shm_nattch = 0;
+ 	shp->id = shm_buildid(id,shp->shm_perm.seq);
++	shp->_shm_ids = &shm_ids;
+ 	shp->shm_file = file;
+ 	file->f_dentry->d_inode->i_ino = shp->id;
+ 
+@@ -253,7 +396,7 @@ static int newseg (key_t key, int shmflg
+ 	if (!(shmflg & SHM_HUGETLB))
+ 		file->f_op = &shm_file_operations;
+ 
+-	shm_tot += numpages;
++	shm_total += numpages;
+ 	shm_unlock(shp);
+ 	return shp->id;
+ 
+@@ -272,12 +415,12 @@ asmlinkage long sys_shmget (key_t key, s
+ 
+ 	down(&shm_ids.sem);
+ 	if (key == IPC_PRIVATE) {
+-		err = newseg(key, shmflg, size);
++		err = newseg(key, -1, shmflg, size);
+ 	} else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
+ 		if (!(shmflg & IPC_CREAT))
+ 			err = -ENOENT;
+ 		else
+-			err = newseg(key, shmflg, size);
++			err = newseg(key, -1, shmflg, size);
+ 	} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
+ 		err = -EEXIST;
+ 	} else {
+@@ -470,7 +613,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ 		down(&shm_ids.sem);
+ 		shm_info.used_ids = shm_ids.in_use;
+ 		shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
+-		shm_info.shm_tot = shm_tot;
++		shm_info.shm_tot = shm_total;
+ 		shm_info.swap_attempts = 0;
+ 		shm_info.swap_successes = 0;
+ 		err = shm_ids.max_id;
+@@ -557,14 +700,14 @@ asmlinkage long sys_shmctl (int shmid, i
+ 		if(cmd==SHM_LOCK) {
+ 			struct user_struct * user = current->user;
+ 			if (!is_file_hugepages(shp->shm_file)) {
+-				err = shmem_lock(shp->shm_file, 1, user);
++				err = shmem_lock(shp, 1, user);
+ 				if (!err) {
+ 					shp->shm_perm.mode |= SHM_LOCKED;
+ 					shp->mlock_user = user;
+ 				}
+ 			}
+ 		} else if (!is_file_hugepages(shp->shm_file)) {
+-			shmem_lock(shp->shm_file, 0, shp->mlock_user);
++			shmem_lock(shp, 0, shp->mlock_user);
+ 			shp->shm_perm.mode &= ~SHM_LOCKED;
+ 			shp->mlock_user = NULL;
+ 		}
+@@ -594,7 +737,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ 
+ 		if (current->euid != shp->shm_perm.uid &&
+ 		    current->euid != shp->shm_perm.cuid && 
+-		    !capable(CAP_SYS_ADMIN)) {
++		    !capable(CAP_VE_SYS_ADMIN)) {
+ 			err=-EPERM;
+ 			goto out_unlock_up;
+ 		}
+@@ -603,13 +746,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ 		if (err)
+ 			goto out_unlock_up;
+ 
+-		if (shp->shm_nattch){
+-			shp->shm_perm.mode |= SHM_DEST;
+-			/* Do not find it any more */
+-			shp->shm_perm.key = IPC_PRIVATE;
+-			shm_unlock(shp);
+-		} else
+-			shm_destroy (shp);
++		do_shm_rmid(shp);
+ 		up(&shm_ids.sem);
+ 		goto out;
+ 	}
+@@ -633,7 +770,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ 		err=-EPERM;
+ 		if (current->euid != shp->shm_perm.uid &&
+ 		    current->euid != shp->shm_perm.cuid && 
+-		    !capable(CAP_SYS_ADMIN)) {
++		    !capable(CAP_VE_SYS_ADMIN)) {
+ 			goto out_unlock_up;
+ 		}
+ 
+@@ -916,3 +1053,55 @@ static int sysvipc_shm_proc_show(struct 
+ 			  shp->shm_ctim);
+ }
+ #endif
++
++#if defined(CONFIG_VZ_CHECKPOINT) || defined(CONFIG_VZ_CHECKPOINT_MODULE)
++#include <linux/module.h>
++
++struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg)
++{
++	struct shmid_kernel *shp;
++	struct file *file;
++
++	down(&shm_ids.sem);
++	shp = shm_lock(shmid);
++	if (!shp) {
++		int err;
++
++		err = newseg(key, shmid, shmflg, size);
++		file = ERR_PTR(err);
++		if (err < 0)
++			goto out;
++		shp = shm_lock(shmid);
++	}
++	file = ERR_PTR(-EINVAL);
++	if (shp) {
++		file = shp->shm_file;
++		get_file(file);
++		shm_unlock(shp);
++	}
++out:
++	up(&shm_ids.sem);
++	return file;
++}
++EXPORT_SYMBOL_GPL(sysvipc_setup_shm);
++
++int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg)
++{
++	int i;
++	int err = 0;
++	struct shmid_kernel* shp;
++
++	down(&shm_ids.sem);
++	for(i = 0; i <= shm_ids.max_id; i++) {
++		if ((shp = shm_lock(i)) == NULL)
++			continue;
++		err = func(shp, arg);
++		shm_unlock(shp);
++		if (err)
++			break;
++	}
++	up(&shm_ids.sem);
++	return err;
++}
++EXPORT_SYMBOL_GPL(sysvipc_walk_shm);
++#endif
+diff -upr linux-2.6.16.orig/ipc/util.c linux-2.6.16-026test015/ipc/util.c
+--- linux-2.6.16.orig/ipc/util.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/ipc/util.c	2006-07-04 14:41:39.000000000 +0400
+@@ -13,6 +13,7 @@
+  */
+ 
+ #include <linux/config.h>
++#include <linux/module.h>
+ #include <linux/mm.h>
+ #include <linux/shm.h>
+ #include <linux/init.h>
+@@ -30,6 +31,8 @@
+ 
+ #include <asm/unistd.h>
+ 
++#include <ub/ub_mem.h>
++
+ #include "util.h"
+ 
+ struct ipc_proc_iface {
+@@ -65,7 +68,7 @@ __initcall(ipc_init);
+  *	array itself. 
+  */
+  
+-void __init ipc_init_ids(struct ipc_ids* ids, int size)
++void __ve_init ipc_init_ids(struct ipc_ids* ids, int size)
+ {
+ 	int i;
+ 	sema_init(&ids->sem,1);
+@@ -94,7 +97,21 @@ void __init ipc_init_ids(struct ipc_ids*
+ 	ids->entries->size = size;
+ 	for(i=0;i<size;i++)
+ 		ids->entries->p[i] = NULL;
++
++	ids->owner_env = get_exec_env();
++}
++
++#ifdef CONFIG_VE
++static inline void ipc_free_ids(struct ipc_ids *ids)
++{
++	if (ids == NULL)
++		return;
++
++	if (ids->entries != &ids->nullentry)
++		ipc_rcu_putref(ids->entries);
++	kfree(ids);
+ }
++#endif
+ 
+ #ifdef CONFIG_PROC_FS
+ static struct file_operations sysvipc_proc_fops;
+@@ -182,8 +199,7 @@ static int grow_ary(struct ipc_ids* ids,
+ 	if(new == NULL)
+ 		return size;
+ 	new->size = newsize;
+-	memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size +
+-					sizeof(struct ipc_id_ary));
++	memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size);
+ 	for(i=size;i<newsize;i++) {
+ 		new->p[i] = NULL;
+ 	}
+@@ -213,10 +229,20 @@ static int grow_ary(struct ipc_ids* ids,
+  *	Called with ipc_ids.sem held.
+  */
+  
+-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
++int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid)
+ {
+ 	int id;
+ 
++	if (reqid >= 0) {
++		id = reqid%SEQ_MULTIPLIER;
++		size = grow_ary(ids,id+1);
++		if (id >= size)
++			return -1;
++		if (ids->entries->p[id] == NULL)
++			goto found;
++		return -1;
++	}
++
+ 	size = grow_ary(ids,size);
+ 
+ 	/*
+@@ -229,16 +255,21 @@ int ipc_addid(struct ipc_ids* ids, struc
+ 	}
+ 	return -1;
+ found:
+-	ids->in_use++;
++	if (ids->in_use++ == 0)
++		(void)get_ve(ids->owner_env);
+ 	if (id > ids->max_id)
+ 		ids->max_id = id;
+ 
+ 	new->cuid = new->uid = current->euid;
+ 	new->gid = new->cgid = current->egid;
+ 
+-	new->seq = ids->seq++;
+-	if(ids->seq > ids->seq_max)
+-		ids->seq = 0;
++	if (reqid >= 0) {
++		new->seq = reqid/SEQ_MULTIPLIER;
++	} else {
++		new->seq = ids->seq++;
++		if(ids->seq > ids->seq_max)
++			ids->seq = 0;
++	}
+ 
+ 	spin_lock_init(&new->lock);
+ 	new->deleted = 0;
+@@ -276,7 +307,8 @@ struct kern_ipc_perm* ipc_rmid(struct ip
+ 	ids->entries->p[lid] = NULL;
+ 	if(p==NULL)
+ 		BUG();
+-	ids->in_use--;
++	if (--ids->in_use == 0)
++		put_ve(ids->owner_env);
+ 
+ 	if (lid == ids->max_id) {
+ 		do {
+@@ -302,9 +334,9 @@ void* ipc_alloc(int size)
+ {
+ 	void* out;
+ 	if(size > PAGE_SIZE)
+-		out = vmalloc(size);
++		out = ub_vmalloc(size);
+ 	else
+-		out = kmalloc(size, GFP_KERNEL);
++		out = ub_kmalloc(size, GFP_KERNEL);
+ 	return out;
+ }
+ 
+@@ -387,14 +419,14 @@ void* ipc_rcu_alloc(int size)
+ 	 * workqueue if necessary (for vmalloc). 
+ 	 */
+ 	if (rcu_use_vmalloc(size)) {
+-		out = vmalloc(HDRLEN_VMALLOC + size);
++		out = ub_vmalloc(HDRLEN_VMALLOC + size);
+ 		if (out) {
+ 			out += HDRLEN_VMALLOC;
+ 			container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
+ 			container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
+ 		}
+ 	} else {
+-		out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
++		out = ub_kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
+ 		if (out) {
+ 			out += HDRLEN_KMALLOC;
+ 			container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
+@@ -603,6 +635,71 @@ int ipc_checkid(struct ipc_ids* ids, str
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_VE
++void __init prepare_ipc(void)
++{
++	prepare_msg();
++	prepare_sem();
++	prepare_shm();
++}
++
++int init_ve_ipc(struct ve_struct * envid)
++{
++	envid->_msg_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++			GFP_KERNEL);
++	if (envid->_msg_ids == NULL)
++		goto out_nomem;
++	envid->_sem_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++			GFP_KERNEL);
++	if (envid->_sem_ids == NULL)
++		goto out_free_msg;
++	envid->_shm_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++			GFP_KERNEL);
++	if (envid->_shm_ids == NULL)
++		goto out_free_sem;
++
++	init_ve_ipc_msg();
++	init_ve_ipc_sem();
++	init_ve_ipc_shm();
++	return 0;
++
++out_free_sem:
++	kfree(envid->_sem_ids);
++out_free_msg:
++	kfree(envid->_msg_ids);
++out_nomem:
++	return -ENOMEM;
++}
++
++void ve_ipc_cleanup(void)
++{
++	cleanup_ve_ipc_msg();
++	cleanup_ve_ipc_sem();
++	cleanup_ve_ipc_shm();
++}
++
++void ve_ipc_free(struct ve_struct *env)
++{
++	ipc_free_ids(env->_msg_ids);
++	ipc_free_ids(env->_sem_ids);
++	ipc_free_ids(env->_shm_ids);
++	env->_msg_ids = NULL;
++	env->_sem_ids = NULL;
++	env->_shm_ids = NULL;
++}
++
++void fini_ve_ipc(struct ve_struct *ptr)
++{
++	ve_ipc_cleanup();
++	ve_ipc_free(ptr);
++}
++
++EXPORT_SYMBOL(init_ve_ipc);
++EXPORT_SYMBOL(ve_ipc_cleanup);
++EXPORT_SYMBOL(ve_ipc_free);
++EXPORT_SYMBOL(fini_ve_ipc);
++#endif /* CONFIG_VE */
++
+ #ifdef __ARCH_WANT_IPC_PARSE_VERSION
+ 
+ 
+diff -upr linux-2.6.16.orig/ipc/util.h linux-2.6.16-026test015/ipc/util.h
+--- linux-2.6.16.orig/ipc/util.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/ipc/util.h	2006-07-04 14:41:39.000000000 +0400
+@@ -15,6 +15,22 @@ void sem_init (void);
+ void msg_init (void);
+ void shm_init (void);
+ 
++#ifdef CONFIG_VE
++void prepare_msg(void);
++void prepare_sem(void);
++void prepare_shm(void);
++void init_ve_ipc_msg(void);
++void init_ve_ipc_sem(void);
++void init_ve_ipc_shm(void);
++void cleanup_ve_ipc_msg(void);
++void cleanup_ve_ipc_sem(void);
++void cleanup_ve_ipc_shm(void);
++
++#define __ve_init
++#else
++#define __ve_init	__init
++#endif
++
+ struct ipc_id_ary {
+ 	int size;
+ 	struct kern_ipc_perm *p[0];
+@@ -28,10 +44,11 @@ struct ipc_ids {
+ 	struct semaphore sem;	
+ 	struct ipc_id_ary nullentry;
+ 	struct ipc_id_ary* entries;
++	struct ve_struct *owner_env;
+ };
+ 
+ struct seq_file;
+-void __init ipc_init_ids(struct ipc_ids* ids, int size);
++void __ve_init ipc_init_ids(struct ipc_ids *ids, int size);
+ #ifdef CONFIG_PROC_FS
+ void __init ipc_init_proc_interface(const char *path, const char *header,
+ 				    struct ipc_ids *ids,
+@@ -42,7 +59,7 @@ void __init ipc_init_proc_interface(cons
+ 
+ /* must be called with ids->sem acquired.*/
+ int ipc_findkey(struct ipc_ids* ids, key_t key);
+-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
++int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid);
+ 
+ /* must be called with both locks acquired. */
+ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
+diff -upr linux-2.6.16.orig/kernel/Kconfig.fairsched linux-2.6.16-026test015/kernel/Kconfig.fairsched
+--- linux-2.6.16.orig/kernel/Kconfig.fairsched	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/Kconfig.fairsched	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,22 @@
++config SCHED_VCPU
++	bool "VCPU scheduler support"
++	default n
++	help
++	  VCPU scheduler support adds additional layer of abstraction
++	  which allows to virtualize cpu notion and split physical cpus
++	  and virtual cpus. This support allows to use CPU fair scheduler,
++	  dynamically add/remove cpus to/from VPS and so on.
++
++config FAIRSCHED
++	bool "Fair CPU scheduler (EXPERIMENTAL)"
++	depends on SCHED_VCPU
++	default SCHED_VCPU
++	help
++	  Config option for Fair CPU scheduler (fairsched).
++	  This option allows to group processes to scheduling nodes
++	  which receive CPU proportional to their weight.
++	  This is very important feature for process groups isolation and
++	  QoS management.
++
++	  If unsure, say N.
++
+diff -upr linux-2.6.16.orig/kernel/Kconfig.openvz linux-2.6.16-026test015/kernel/Kconfig.openvz
+--- linux-2.6.16.orig/kernel/Kconfig.openvz	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/Kconfig.openvz	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,61 @@
++# Copyright (C) 2005  SWsoft
++# All rights reserved.
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++menu "OpenVZ"
++
++config VE
++	bool "Virtual Environment support"
++	default y
++	help
++	  This option adds support of virtual Linux running on the original box
++	  with fully supported virtual network driver, tty subsystem and
++	  configurable access for hardware and other resources.
++
++config VE_CALLS
++	tristate "VE calls interface"
++	depends on VE
++	default m
++	help
++	  This option controls how to build vzmon code containing VE calls.
++	  By default it's build in module vzmon.o
++
++config VE_NETDEV
++	tristate "VE networking"
++	depends on VE_CALLS
++	default m
++	help
++	  This option controls whether to build VE networking code.
++
++config VE_ETHDEV
++	tristate "Virtual ethernet device"
++	depends on VE_CALLS
++	default m
++	help
++	  This option controls whether to build virtual ethernet device.
++
++config VE_IPTABLES
++	bool "VE netfiltering"
++	depends on VE && VE_NETDEV && INET && NETFILTER
++	default y
++	help
++	  This option controls whether to build VE netfiltering code.
++
++config VZ_WDOG
++	tristate "VE watchdog module"
++	depends on VE_CALLS
++	default m
++	help
++	  This option controls building of vzwdog module, which dumps
++	  a lot of useful system info on console periodically.
++ 
++config VZ_CHECKPOINT
++ 	tristate "Checkpointing & restoring Virtual Environments"
++ 	depends on SOFTWARE_SUSPEND && VE_CALLS
++ 	default m
++ 	help
++ 	  This option adds two modules, "cpt" and "rst", which allow
++ 	  to save a running Virtual Environment and restore it
++ 	  on another host (live migration) or on the same host (checkpointing).
++
++endmenu
+diff -upr linux-2.6.16.orig/kernel/Makefile linux-2.6.16-026test015/kernel/Makefile
+--- linux-2.6.16.orig/kernel/Makefile	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/Makefile	2006-07-04 14:41:39.000000000 +0400
+@@ -2,7 +2,8 @@
+ # Makefile for the linux kernel.
+ #
+ 
+-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
++obj-y     = sched.o fairsched.o \
++	    fork.o exec_domain.o panic.o printk.o profile.o \
+ 	    exit.o itimer.o time.o softirq.o resource.o \
+ 	    sysctl.o capability.o ptrace.o timer.o user.o \
+ 	    signal.o sys.o kmod.o workqueue.o pid.o \
+@@ -10,6 +11,18 @@ obj-y     = sched.o fork.o exec_domain.o
+ 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
+ 	    hrtimer.o
+ 
++obj-y += ub/
++
++obj-$(CONFIG_VE) += ve.o
++obj-$(CONFIG_VE) += veowner.o
++obj-$(CONFIG_VE_CALLS) += vzdev.o
++obj-$(CONFIG_VZ_WDOG) += vzwdog.o
++obj-$(CONFIG_VE_CALLS) += vzmon.o
++
++vzmon-objs = vecalls.o
++
++obj-$(CONFIG_VZ_CHECKPOINT) += cpt/
++
+ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
+ obj-$(CONFIG_FUTEX) += futex.o
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+diff -upr linux-2.6.16.orig/kernel/audit.c linux-2.6.16-026test015/kernel/audit.c
+--- linux-2.6.16.orig/kernel/audit.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/audit.c	2006-07-04 14:41:38.000000000 +0400
+@@ -372,6 +372,9 @@ static int audit_receive_msg(struct sk_b
+ 	uid_t			loginuid; /* loginuid of sender */
+ 	struct audit_sig_info   sig_data;
+ 
++	if (!ve_is_super(VE_OWNER_SKB(skb)))
++		return -ECONNREFUSED;
++
+ 	err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
+ 	if (err)
+ 		return err;
+diff -upr linux-2.6.16.orig/kernel/auditsc.c linux-2.6.16-026test015/kernel/auditsc.c
+--- linux-2.6.16.orig/kernel/auditsc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/auditsc.c	2006-07-04 14:41:36.000000000 +0400
+@@ -966,11 +966,6 @@ void audit_syscall_entry(struct task_str
+ 	if (context->in_syscall) {
+ 		struct audit_context *newctx;
+ 
+-#if defined(__NR_vm86) && defined(__NR_vm86old)
+-		/* vm86 mode should only be entered once */
+-		if (major == __NR_vm86 || major == __NR_vm86old)
+-			return;
+-#endif
+ #if AUDIT_DEBUG
+ 		printk(KERN_ERR
+ 		       "audit(:%d) pid=%d in syscall=%d;"
+diff -upr linux-2.6.16.orig/kernel/capability.c linux-2.6.16-026test015/kernel/capability.c
+--- linux-2.6.16.orig/kernel/capability.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/capability.c	2006-07-04 14:41:39.000000000 +0400
+@@ -24,7 +24,8 @@ EXPORT_SYMBOL(cap_bset);
+  * This lock protects task->cap_* for all tasks including current.
+  * Locking rule: acquire this prior to tasklist_lock.
+  */
+-static DEFINE_SPINLOCK(task_capability_lock);
++DEFINE_SPINLOCK(task_capability_lock);
++EXPORT_SYMBOL(task_capability_lock);
+ 
+ /*
+  * For sys_getproccap() and sys_setproccap(), any of the three
+@@ -67,8 +68,8 @@ asmlinkage long sys_capget(cap_user_head
+      spin_lock(&task_capability_lock);
+      read_lock(&tasklist_lock); 
+ 
+-     if (pid && pid != current->pid) {
+-	     target = find_task_by_pid(pid);
++     if (pid && pid != virt_pid(current)) {
++	     target = find_task_by_pid_ve(pid);
+ 	     if (!target) {
+ 	          ret = -ESRCH;
+ 	          goto out;
+@@ -100,9 +101,13 @@ static inline int cap_set_pg(int pgrp, k
+ 	int ret = -EPERM;
+ 	int found = 0;
+ 
+-	do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
++	pgrp = vpid_to_pid(pgrp);
++	if (pgrp < 0)
++		return ret;
++
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, g) {
+ 		target = g;
+-		while_each_thread(g, target) {
++		while_each_thread_ve(g, target) {
+ 			if (!security_capset_check(target, effective,
+ 							inheritable,
+ 							permitted)) {
+@@ -113,7 +118,7 @@ static inline int cap_set_pg(int pgrp, k
+ 			}
+ 			found = 1;
+ 		}
+-	} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, g);
+ 
+ 	if (!found)
+ 	     ret = 0;
+@@ -132,7 +137,7 @@ static inline int cap_set_all(kernel_cap
+      int ret = -EPERM;
+      int found = 0;
+ 
+-     do_each_thread(g, target) {
++     do_each_thread_ve(g, target) {
+              if (target == current || target->pid == 1)
+                      continue;
+              found = 1;
+@@ -141,7 +146,7 @@ static inline int cap_set_all(kernel_cap
+ 		     continue;
+ 	     ret = 0;
+ 	     security_capset_set(target, effective, inheritable, permitted);
+-     } while_each_thread(g, target);
++     } while_each_thread_ve(g, target);
+ 
+      if (!found)
+ 	     ret = 0;
+@@ -188,7 +193,7 @@ asmlinkage long sys_capset(cap_user_head
+      if (get_user(pid, &header->pid))
+ 	     return -EFAULT; 
+ 
+-     if (pid && pid != current->pid && !capable(CAP_SETPCAP))
++     if (pid && pid != virt_pid(current) && !capable(CAP_SETPCAP))
+              return -EPERM;
+ 
+      if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
+@@ -199,8 +204,8 @@ asmlinkage long sys_capset(cap_user_head
+      spin_lock(&task_capability_lock);
+      read_lock(&tasklist_lock);
+ 
+-     if (pid > 0 && pid != current->pid) {
+-          target = find_task_by_pid(pid);
++     if (pid > 0 && pid != virt_pid(current)) {
++          target = find_task_by_pid_ve(pid);
+           if (!target) {
+                ret = -ESRCH;
+                goto out;
+diff -upr linux-2.6.16.orig/kernel/compat.c linux-2.6.16-026test015/kernel/compat.c
+--- linux-2.6.16.orig/kernel/compat.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/compat.c	2006-07-04 14:41:39.000000000 +0400
+@@ -21,6 +21,8 @@
+ #include <linux/syscalls.h>
+ #include <linux/unistd.h>
+ #include <linux/security.h>
++#include <linux/hrtimer.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -38,61 +40,73 @@ int put_compat_timespec(const struct tim
+ 			__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
+ }
+ 
+-static long compat_nanosleep_restart(struct restart_block *restart)
++long compat_nanosleep_restart(struct restart_block *restart)
+ {
+-	unsigned long expire = restart->arg0, now = jiffies;
+ 	struct compat_timespec __user *rmtp;
++	struct timespec tu;
++	void *rfn_save = restart->fn;
++	struct hrtimer timer;
++	ktime_t rem;
+ 
+-	/* Did it expire while we handled signals? */
+-	if (!time_after(expire, now))
+-		return 0;
++	restart->fn = do_no_restart_syscall;
++
++	hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS);
++
++	timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
+ 
+-	expire = schedule_timeout_interruptible(expire - now);
+-	if (expire == 0)
++	set_current_state(TASK_INTERRUPTIBLE);
++	rem = schedule_hrtimer(&timer, HRTIMER_ABS);
++
++	if (rem.tv64 <= 0)
+ 		return 0;
+ 
+-	rmtp = (struct compat_timespec __user *)restart->arg1;
+-	if (rmtp) {
+-		struct compat_timespec ct;
+-		struct timespec t;
+-
+-		jiffies_to_timespec(expire, &t);
+-		ct.tv_sec = t.tv_sec;
+-		ct.tv_nsec = t.tv_nsec;
+-		if (copy_to_user(rmtp, &ct, sizeof(ct)))
+-			return -EFAULT;
+-	}
+-	/* The 'restart' block is already filled in */
++	rmtp = (struct compat_timespec __user *) restart->arg2;
++	tu = ktime_to_timespec(rem);
++	if (rmtp && put_compat_timespec(&tu, rmtp))
++		return -EFAULT;
++
++	restart->fn = rfn_save;
++
++	/* The other values in restart are already filled in */
+ 	return -ERESTART_RESTARTBLOCK;
+ }
++EXPORT_SYMBOL_GPL(compat_nanosleep_restart);
+ 
+ asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
+ 		struct compat_timespec __user *rmtp)
+ {
+ 	struct timespec t;
+ 	struct restart_block *restart;
+-	unsigned long expire;
++	struct hrtimer timer;
++	ktime_t rem;
+ 
+ 	if (get_compat_timespec(&t, rqtp))
+ 		return -EFAULT;
+ 
+-	if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
++	if (!timespec_valid(&t))
+ 		return -EINVAL;
+ 
+-	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
+-	expire = schedule_timeout_interruptible(expire);
+-	if (expire == 0)
++	hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_REL);
++
++	timer.expires = timespec_to_ktime(t);
++
++	set_current_state(TASK_INTERRUPTIBLE);
++	rem = schedule_hrtimer(&timer, HRTIMER_REL);
++	if (rem.tv64 <= 0)
+ 		return 0;
+ 
+-	if (rmtp) {
+-		jiffies_to_timespec(expire, &t);
+-		if (put_compat_timespec(&t, rmtp))
+-			return -EFAULT;
+-	}
++	t = ktime_to_timespec(rem);
++
++	if (rmtp && put_compat_timespec(&t, rmtp))
++		return -EFAULT;
++
+ 	restart = &current_thread_info()->restart_block;
+ 	restart->fn = compat_nanosleep_restart;
+-	restart->arg0 = jiffies + expire;
+-	restart->arg1 = (unsigned long) rmtp;
++	restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF;
++	restart->arg1 = timer.expires.tv64 >> 32;
++	restart->arg2 = (unsigned long) rmtp;
++	restart->arg3 = (unsigned long) timer.base->index;
++
+ 	return -ERESTART_RESTARTBLOCK;
+ }
+ 
+diff -upr linux-2.6.16.orig/kernel/configs.c linux-2.6.16-026test015/kernel/configs.c
+--- linux-2.6.16.orig/kernel/configs.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/configs.c	2006-07-04 14:41:38.000000000 +0400
+@@ -89,8 +89,7 @@ static int __init ikconfig_init(void)
+ 	struct proc_dir_entry *entry;
+ 
+ 	/* create the current config file */
+-	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
+-				  &proc_root);
++	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL);
+ 	if (!entry)
+ 		return -ENOMEM;
+ 
+diff -upr linux-2.6.16.orig/kernel/cpt/Makefile linux-2.6.16-026test015/kernel/cpt/Makefile
+--- linux-2.6.16.orig/kernel/cpt/Makefile	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/Makefile	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,41 @@
++#
++#
++#  kernel/cpt/Makefile
++#
++#  Copyright (C) 2000-2005  SWsoft
++#  All rights reserved.
++#
++#  Licensing governed by "linux/COPYING.SWsoft" file.
++
++obj-$(CONFIG_VZ_CHECKPOINT) += vzcpt.o vzrst.o
++
++vzcpt-objs := cpt_proc.o cpt_dump.o cpt_obj.o cpt_context.o cpt_process.o \
++	cpt_mm.o cpt_files.o cpt_kernel.o \
++	cpt_socket.o cpt_socket_in.o cpt_tty.o cpt_sysvipc.o cpt_net.o \
++	cpt_conntrack.o cpt_ubc.o cpt_epoll.o
++
++vzrst-objs := rst_proc.o rst_undump.o rst_context.o rst_process.o \
++	rst_mm.o rst_files.o \
++	rst_socket.o rst_socket_in.o rst_tty.o rst_sysvipc.o rst_net.o \
++	rst_conntrack.o rst_ubc.o rst_epoll.o
++
++ifeq ($(CONFIG_VZ_CHECKPOINT), m)
++vzrst-objs += cpt_obj.o cpt_kernel.o
++endif
++
++ifeq ($(CONFIG_VZ_CHECKPOINT_LAZY), y)
++vzcpt-objs += cpt_pagein.o
++vzrst-objs += rst_pagein.o
++endif
++
++ifeq ($(CONFIG_X86_64), y)
++vzcpt-objs += cpt_x8664.o
++vzrst-objs += rst_x8664.o
++ifeq ($(CONFIG_VZ_CHECKPOINT), m)
++vzrst-objs += cpt_x8664.o
++endif
++endif
++
++ifeq ($(CONFIG_X86_32), y)
++vzrst-objs += rst_i386.o
++endif
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_conntrack.c linux-2.6.16-026test015/kernel/cpt/cpt_conntrack.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_conntrack.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_conntrack.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,370 @@
++/*
++ *
++ *  kernel/cpt/cpt_conntrack.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/socket.h>
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <linux/rtnetlink.h>
++#include <linux/unistd.h>
++#include <linux/ve.h>
++#include <linux/vzcalluser.h>
++#include <linux/cpt_image.h>
++#include <linux/icmp.h>
++#include <linux/ip.h>
++
++#if defined(CONFIG_VE_IPTABLES) && \
++    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
++
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv4/ip_conntrack.h>
++#include <linux/netfilter_ipv4/ip_nat.h>
++#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
++#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++#include <linux/netfilter_ipv4/ip_conntrack_core.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++
++
++/* How does it work?
++ *
++ * Network is disabled, so new conntrack entries will not appear.
++ * However, some of them can disappear because of timeouts.
++ *
++ * So, we take read_lock, collect all required information atomically,
++ * essentially, creating parallel "refcount" structures holding pointers.
++ * We delete conntrack timers as well, so the structures cannot disappear
++ * after releasing the lock. Now, after releasing lock we can dump everything
++ * safely. And on exit we restore timers to their original values.
++ *
++ * Note, this approach is not going to work in VE0.
++ */
++
++struct ct_holder
++{
++	struct ct_holder *next;
++	struct ip_conntrack_tuple_hash *cth;
++	int index;
++};
++
++static void encode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple)
++{
++	v->cpt_dst = tuple->dst.ip;
++	v->cpt_dstport = tuple->dst.u.all;
++	v->cpt_protonum = tuple->dst.protonum;
++	v->cpt_dir = tuple->dst.dir;
++
++	v->cpt_src = tuple->src.ip;
++	v->cpt_srcport = tuple->src.u.all;
++}
++
++static int dump_one_expect(struct cpt_ip_connexpect_image *v,
++			   struct ip_conntrack_expect *exp,
++			   int sibling, cpt_context_t *ctx)
++{
++	int err = 0;
++
++	v->cpt_next = sizeof(*v);
++	v->cpt_object = CPT_OBJ_NET_CONNTRACK_EXPECT;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_VOID;
++
++	encode_tuple(&v->cpt_tuple, &exp->tuple);
++	encode_tuple(&v->cpt_mask, &exp->mask);
++	v->cpt_sibling_conntrack = sibling;
++	v->cpt_flags = exp->flags;
++	v->cpt_seq = exp->id;
++	v->cpt_dir = 0;
++	v->cpt_manip_proto = 0;
++#ifdef CONFIG_IP_NF_NAT_NEEDED
++	v->cpt_manip_proto = exp->saved_proto.all;
++	v->cpt_dir = exp->dir;
++#endif
++	v->cpt_timeout = 0;
++	if (exp->master->helper->timeout)
++		v->cpt_timeout = exp->timeout.expires - jiffies;
++	return err;
++}
++
++/* NOTE. We use one page to dump list of expectations. This may be not enough
++ * in theory. In practice there is only one expectation per conntrack record.
++ * Moreover, taking into account that _ALL_ of expecations are saved in one
++ * global list, which is looked up each incoming/outpging packet, the system
++ * would be severely dead when even one conntrack would have so much of
++ * expectations. Shortly, I am not going to repair this.
++ */
++
++static int dump_expect_list(struct ip_conntrack *ct, struct ct_holder *list,
++			    cpt_context_t *ctx)
++{
++	int err = 0;
++	unsigned long pg;
++	struct cpt_ip_connexpect_image *v;
++	struct ip_conntrack_expect *exp;
++
++	if (ct->expecting == 0)
++		return err;
++	if (ct->expecting*sizeof(struct cpt_ip_connexpect_image) > PAGE_SIZE)
++		return -ENOBUFS;
++
++	pg = __get_free_page(GFP_KERNEL);
++	if (!pg)
++		return -ENOMEM;
++	v = (struct cpt_ip_connexpect_image *)pg;
++
++	read_lock_bh(&ip_conntrack_lock);
++	list_for_each_entry(exp, &ve_ip_conntrack_expect_list, list) {
++		int sibling;
++
++		if (exp->master != ct)
++			continue;
++
++		if (ct->helper == NULL) {
++			eprintk_ctx("conntrack: no helper and non-trivial expectation\n");
++			err = -EINVAL;
++			break;
++		}
++
++		sibling = 0;
++#if 0
++		/* That's all? No need to calculate sibling? */
++		if (exp->sibling) {
++			struct ct_holder *c;
++			for (c = list; c; c = c->next) {
++				if (tuplehash_to_ctrack(c->cth) == exp->sibling) {
++					sibling = c->index;
++					break;
++				}
++			}
++			/* NOTE: exp->sibling could be not "confirmed" and, hence,
++			 * out of hash table. We should just ignore such a sibling,
++			 * the connection is going to be retried, the packet
++			 * apparently was lost somewhere.
++			 */
++			if (sibling == 0)
++				dprintk_ctx("sibling conntrack is not found\n");
++		}
++#endif
++
++		/* If the expectation still does not have exp->sibling
++		 * and timer is not running, it is about to die on another
++		 * cpu. Skip it. */
++		if (!sibling &&
++		    ct->helper->timeout &&
++		    !timer_pending(&exp->timeout)) {
++			dprintk_ctx("conntrack: expectation: no timer\n");
++			continue;
++		}
++
++		err = dump_one_expect(v, exp, sibling, ctx);
++		if (err)
++			break;
++
++		v++;
++	}
++	read_unlock_bh(&ip_conntrack_lock);
++
++	if (err == 0 && (unsigned long)v != pg)
++		ctx->write((void*)pg, (unsigned long)v - pg, ctx);
++
++	free_page(pg);
++	return err;
++}
++
++static int dump_one_ct(struct ct_holder *c, struct ct_holder *list,
++		       cpt_context_t *ctx)
++{
++	struct ip_conntrack_tuple_hash *h = c->cth;
++	struct ip_conntrack *ct = tuplehash_to_ctrack(h);
++	struct cpt_ip_conntrack_image v;
++	int err = 0;
++
++	if (sizeof(v.cpt_proto_data) != sizeof(ct->proto)) {
++		eprintk_ctx("conntrack module ct->proto version mismatch\n");
++		return -EINVAL;
++	}
++	if (sizeof(v.cpt_help_data) != sizeof(ct->help)) {
++		eprintk_ctx("conntrack module ct->help version mismatch\n");
++		return -EINVAL;
++	}
++
++	cpt_open_object(NULL, ctx);
++
++	v.cpt_next = CPT_NULL;
++	v.cpt_object = CPT_OBJ_NET_CONNTRACK;
++	v.cpt_hdrlen = sizeof(v);
++	v.cpt_content = CPT_CONTENT_ARRAY;
++
++	read_lock_bh(&ip_conntrack_lock);
++	v.cpt_status = ct->status;
++	v.cpt_timeout = ct->timeout.expires - jiffies;
++	v.cpt_ct_helper = (ct->helper != NULL);
++	v.cpt_index = c->index;
++	v.cpt_id = ct->id;
++	v.cpt_mark = 0;
++#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
++	v.cpt_mark = ct->mark;
++#endif
++	encode_tuple(&v.cpt_tuple[0], &ct->tuplehash[0].tuple);
++	encode_tuple(&v.cpt_tuple[1], &ct->tuplehash[1].tuple);
++	memcpy(&v.cpt_proto_data, &ct->proto, sizeof(v.cpt_proto_data));
++	memcpy(&v.cpt_help_data, &ct->help, sizeof(v.cpt_help_data));
++
++	v.cpt_masq_index = 0;
++	v.cpt_initialized = 0;
++	v.cpt_num_manips = 0;
++	v.cpt_nat_helper = 0;
++#ifdef CONFIG_IP_NF_NAT_NEEDED
++#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
++	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
++	v.cpt_masq_index = ct->nat.masq_index;
++#endif
++	/* "help" data is used by pptp, difficult to support */
++	v.cpt_nat_seq[0].cpt_correction_pos = ct->nat.info.seq[0].correction_pos;
++	v.cpt_nat_seq[0].cpt_offset_before = ct->nat.info.seq[0].offset_before;
++	v.cpt_nat_seq[0].cpt_offset_after = ct->nat.info.seq[0].offset_after;
++	v.cpt_nat_seq[1].cpt_correction_pos = ct->nat.info.seq[1].correction_pos;
++	v.cpt_nat_seq[1].cpt_offset_before = ct->nat.info.seq[1].offset_before;
++	v.cpt_nat_seq[1].cpt_offset_after = ct->nat.info.seq[1].offset_after;
++#endif
++	read_unlock_bh(&ip_conntrack_lock);
++
++	ctx->write(&v, sizeof(v), ctx);
++
++	err = dump_expect_list(ct, list, ctx);
++
++	cpt_close_object(ctx);
++	return err;
++}
++
++int cpt_dump_ip_conntrack(cpt_context_t * ctx)
++{
++	struct ct_holder *ct_list = NULL;
++	struct ct_holder *c, **cp;
++	int err = 0;
++	int index = 0;
++	int idx;
++
++	if (get_exec_env()->_ip_conntrack == NULL)
++		return 0;
++
++	for (idx = atomic_read(&(get_exec_env()->_ip_conntrack->_ip_conntrack_count)); idx >= 0; idx--) {
++		c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
++		if (c == NULL) {
++			err = -ENOMEM;
++			goto done;
++		}
++		memset(c, 0, sizeof(struct ct_holder));
++		c->next = ct_list;
++		ct_list = c;
++	}
++
++	c = ct_list;
++
++	read_lock_bh(&ip_conntrack_lock);
++	for (idx = 0; idx < ip_conntrack_htable_size; idx++) {
++		struct ip_conntrack_tuple_hash *h;
++		list_for_each_entry(h, &ve_ip_conntrack_hash[idx], list) {
++			/* Skip reply tuples, they are covered by original
++			 * direction. */
++			if (DIRECTION(h))
++				continue;
++
++			/* Oops, we have not enough of holders...
++			 * It is impossible. */
++			if (unlikely(c == NULL)) {
++				read_unlock_bh(&ip_conntrack_lock);
++				eprintk_ctx("unexpected conntrack appeared\n");
++				err = -ENOMEM;
++				goto done;
++			}
++
++			/* If timer is not running, it means that it
++			 * has just been scheduled on another cpu.
++			 * We should skip this conntrack, it is about to be
++			 * destroyed. */
++			if (!del_timer(&tuplehash_to_ctrack(h)->timeout)) {
++				dprintk_ctx("conntrack: no timer\n");
++				continue;
++			}
++
++			/* Timer is deleted. refcnt is _not_ decreased.
++			 * We are going to restore the timer on exit
++			 * from this function. */
++			c->cth = h;
++			c->index = ++index;
++			c = c->next;
++		}
++	}
++	read_unlock_bh(&ip_conntrack_lock);
++
++	/* No conntracks? Good. */
++	if (index == 0)
++		goto done;
++
++	/* Comb the list a little. */
++	cp = &ct_list;
++	while ((c = *cp) != NULL) {
++		/* Discard unused entries; they can appear, if some
++		 * entries were timed out since we preallocated the list.
++		 */
++		if (c->cth == NULL) {
++			*cp = c->next;
++			kfree(c);
++			continue;
++		}
++
++		/* Move conntracks attached to expectations to the beginning
++		 * of the list. */
++		if (tuplehash_to_ctrack(c->cth)->master && c != ct_list) {
++			*cp = c->next;
++			c->next = ct_list;
++			ct_list = c;
++			dprintk_ctx("conntrack: %d moved in list\n", c->index);
++			continue;
++		}
++		cp = &c->next;
++	}
++
++	cpt_open_section(ctx, CPT_SECT_NET_CONNTRACK);
++
++	for (c = ct_list; c; c = c->next) {
++		err = dump_one_ct(c, ct_list, ctx);
++		if (err)
++			goto done;
++	}
++
++	cpt_close_section(ctx);
++
++done:
++	while ((c = ct_list) != NULL) {
++		ct_list = c->next;
++		if (c->cth) {
++			/* Restore timer. refcnt is preserved. */
++			add_timer(&tuplehash_to_ctrack(c->cth)->timeout);
++		}
++		kfree(c);
++	}
++	return err;
++}
++
++#endif
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_context.c linux-2.6.16-026test015/kernel/cpt/cpt_context.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_context.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_context.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,255 @@
++/*
++ *
++ *  kernel/cpt/cpt_context.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/pagemap.h>
++
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++
++
++static void file_write(const void *addr, size_t count, struct cpt_context *ctx)
++{
++	mm_segment_t oldfs;
++	ssize_t err = -EBADF;
++	struct file *file = ctx->file;
++
++	oldfs = get_fs(); set_fs(KERNEL_DS);
++	if (file)
++		err = file->f_op->write(file, addr, count, &file->f_pos);
++	set_fs(oldfs);
++	if (err != count && !ctx->write_error)
++		ctx->write_error = err < 0 ? err : -EIO;
++}
++
++static void file_pwrite(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
++{
++	mm_segment_t oldfs;
++	ssize_t err = -EBADF;
++	struct file *file = ctx->file;
++
++	oldfs = get_fs(); set_fs(KERNEL_DS);
++	if (file)
++		err = file->f_op->write(file, addr, count, &pos);
++	set_fs(oldfs);
++	if (err != count && !ctx->write_error)
++		ctx->write_error = err < 0 ? err : -EIO;
++}
++
++static void file_align(struct cpt_context *ctx)
++{
++	struct file *file = ctx->file;
++
++	if (file)
++		file->f_pos = CPT_ALIGN(file->f_pos);
++}
++
++void cpt_context_init(struct cpt_context *ctx)
++{
++	int i;
++
++	memset(ctx, 0, sizeof(*ctx));
++
++	init_MUTEX(&ctx->main_sem);
++	ctx->refcount = 1;
++
++	ctx->current_section = -1;
++	ctx->current_object = -1;
++	ctx->pagesize = PAGE_SIZE;
++	ctx->write = file_write;
++	ctx->pwrite = file_pwrite;
++	ctx->align = file_align;
++	for (i=0; i < CPT_SECT_MAX; i++)
++		ctx->sections[i] = CPT_NULL;
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	init_completion(&ctx->pgin_notify);
++#endif
++	cpt_object_init(ctx);
++}
++
++int cpt_open_dumpfile(struct cpt_context *ctx)
++{
++	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
++	if (ctx->tmpbuf == NULL)
++		return -ENOMEM;
++	__cpt_release_buf(ctx);
++	return 0;
++}
++
++int cpt_close_dumpfile(struct cpt_context *ctx)
++{
++	if (ctx->file) {
++		fput(ctx->file);
++		ctx->file = NULL;
++	}
++	if (ctx->tmpbuf) {
++		free_page((unsigned long)ctx->tmpbuf);
++		ctx->tmpbuf = NULL;
++	}
++	if (ctx->write_error)
++		eprintk_ctx("error while writing dump file: %d\n", ctx->write_error);
++	return ctx->write_error;
++}
++
++int cpt_major_hdr_out(struct cpt_context *ctx)
++{
++	struct cpt_major_hdr hdr;
++
++	if (ctx->file == NULL)
++		return 0;
++
++	memset(&hdr, 0, sizeof(hdr));
++	hdr.cpt_signature[0] = CPT_SIGNATURE0;
++	hdr.cpt_signature[1] = CPT_SIGNATURE1;
++	hdr.cpt_signature[2] = CPT_SIGNATURE2;
++	hdr.cpt_signature[3] = CPT_SIGNATURE3;
++	hdr.cpt_hdrlen = sizeof(hdr);
++	hdr.cpt_image_version = 1;
++#ifdef CONFIG_X86_32
++	hdr.cpt_os_arch = CPT_OS_ARCH_I386;
++#endif
++#ifdef CONFIG_X86_64
++	hdr.cpt_os_arch = CPT_OS_ARCH_EMT64;
++#endif
++	hdr.cpt_os_version = 0;
++	hdr.cpt_os_features = 0;
++	hdr.cpt_pagesize = PAGE_SIZE;
++	hdr.cpt_hz = HZ;
++	hdr.cpt_start_jiffies64 = ctx->virt_jiffies64;
++	hdr.cpt_start_sec = ctx->start_time.tv_sec;
++	hdr.cpt_start_nsec = ctx->start_time.tv_nsec;
++	hdr.cpt_cpu_caps[0] = ctx->src_cpu_flags;
++	hdr.cpt_kernel_config[0] = ctx->kernel_config_flags;
++	hdr.cpt_iptables_mask = ctx->iptables_mask;
++
++	ctx->write(&hdr, sizeof(hdr), ctx);
++	return 0;
++}
++
++int cpt_close_section(struct cpt_context *ctx)
++{
++	if (ctx->file && ctx->current_section >= 0) {
++		__u64 next = ctx->file->f_pos - ctx->current_section;
++		ctx->pwrite(&next, 8, ctx, ctx->current_section);
++		ctx->current_section = -1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(cpt_close_section);
++
++int cpt_open_section(struct cpt_context *ctx, __u32 type)
++{
++	struct cpt_section_hdr hdr;
++
++	if (ctx->file == NULL)
++		return 0;
++
++	cpt_close_section(ctx);
++
++	ctx->current_section = ctx->file->f_pos;
++	ctx->sections[type] = ctx->current_section;
++
++	hdr.cpt_next = 0;
++	hdr.cpt_section = type;
++	hdr.cpt_hdrlen = sizeof(hdr);
++	hdr.cpt_align = 0;
++	ctx->write(&hdr, sizeof(hdr), ctx);
++
++	return 0;
++}
++EXPORT_SYMBOL(cpt_open_section);
++
++
++int cpt_close_object(struct cpt_context *ctx)
++{
++	if (ctx->file && ctx->current_object >= 0) {
++		__u64 next = ctx->file->f_pos - ctx->current_object;
++		ctx->pwrite(&next, 8, ctx, ctx->current_object);
++		ctx->current_object = -1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(cpt_close_object);
++
++int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	if (ctx->file == NULL)
++		return 0;
++
++	cpt_close_object(ctx);
++
++	ctx->current_object = ctx->file->f_pos;
++	if (obj)
++		cpt_obj_setpos(obj, ctx->current_object, ctx);
++
++	return 0;
++}
++EXPORT_SYMBOL(cpt_open_object);
++
++int cpt_push_object(loff_t *saved, struct cpt_context *ctx)
++{
++	if (ctx->file) {
++		*saved = ctx->current_object;
++		ctx->current_object = ctx->file->f_pos;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(cpt_push_object);
++
++int cpt_pop_object(loff_t *saved, struct cpt_context *ctx)
++{
++	ctx->current_object = *saved;
++	return 0;
++}
++EXPORT_SYMBOL(cpt_pop_object);
++
++int cpt_dump_tail(struct cpt_context *ctx)
++{
++	struct cpt_major_tail hdr;
++	int i;
++
++	if (ctx->file == NULL)
++		return 0;
++
++	cpt_open_section(ctx, CPT_SECT_TRAILER);
++	memset(&hdr, 0, sizeof(hdr));
++	hdr.cpt_next = sizeof(hdr);
++	hdr.cpt_object = CPT_OBJ_TRAILER;
++	hdr.cpt_hdrlen = sizeof(hdr);
++	hdr.cpt_content = CPT_CONTENT_VOID;
++	hdr.cpt_lazypages = 0;
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	hdr.cpt_lazypages = ctx->lazypages;
++#endif
++	hdr.cpt_64bit = ctx->tasks64;
++	hdr.cpt_signature[0] = CPT_SIGNATURE0;
++	hdr.cpt_signature[1] = CPT_SIGNATURE1;
++	hdr.cpt_signature[2] = CPT_SIGNATURE2;
++	hdr.cpt_signature[3] = CPT_SIGNATURE3;
++	hdr.cpt_nsect = CPT_SECT_MAX_INDEX;
++	for (i = 0; i < CPT_SECT_MAX_INDEX; i++)
++		hdr.cpt_sections[i] = ctx->sections[i];
++
++	ctx->write(&hdr, sizeof(hdr), ctx);
++	cpt_close_section(ctx);
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_context.h linux-2.6.16-026test015/kernel/cpt/cpt_context.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_context.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_context.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,196 @@
++#include <linux/fs.h>
++#include <asm/uaccess.h>
++
++#define	CPT_CTX_ERROR		-1
++#define	CPT_CTX_IDLE		0
++#define CPT_CTX_SUSPENDING	1
++#define	CPT_CTX_SUSPENDED	2
++#define CPT_CTX_DUMPING		3
++#define CPT_CTX_UNDUMPING	4
++#define CPT_CTX_UNDUMPED	5
++
++#define CPT_TID(tsk)   (tsk)->pid, virt_pid(tsk), (tsk)->comm
++#define CPT_FID		"%d,%d(%s)"
++
++
++typedef struct cpt_context
++{
++	struct list_head ctx_list;
++	int	refcount;
++	int	ctx_state;
++	int	objcount;
++	int	sticky;
++	struct semaphore main_sem;
++
++	struct file *errorfile;
++	struct file *statusfile;
++	struct file *lockfile;
++
++	int	errno;
++	char	*error_msg;
++	loff_t	err_offset;
++
++	struct file	*file;
++	char		*tmpbuf;
++	int		pagesize;
++
++	loff_t		current_section;
++	loff_t		current_object;
++
++	loff_t		sections[CPT_SECT_MAX];
++
++	__u32		errormask;
++	__u32		write_error;
++
++	struct list_head object_array[CPT_OBJ_MAX];
++
++	void		(*write)(const void *addr, size_t count, struct cpt_context *ctx);
++	void		(*pwrite)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
++	ssize_t		(*read)(void *addr, size_t count, struct cpt_context *ctx);
++	ssize_t		(*pread)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
++	void		(*align)(struct cpt_context *ctx);
++	int		ve_id;
++	int		contextid;
++	__u64		cpt_jiffies64; 	/* Host jiffies64 at the moment of cpt/rst,
++					 * corresponging to start_time */
++	__u64		virt_jiffies64;	/* Virtual jiffies64. It is == cpt_jiffies64 when
++					 * VE did not migrate. */
++	struct timespec	start_time;
++	struct timespec delta_time;
++	int		image_version;
++	int		lo_index;
++	int		lo_index_old;
++	int		venet_index;
++	int		venet_index_old;
++	__u64		iptables_mask;
++
++#define CPT_ANONVMA_HBITS (sizeof(void*) == 4 ? 10 : 9)
++#define CPT_ANONVMA_HSIZE (1<<CPT_ANONVMA_HBITS)
++	struct hlist_head *anonvmas;
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	struct file	*pagein_file_in;
++	struct file	*pagein_file_out;
++	int		lazy_vm;
++	int		lazypages;
++	int		lazytype;
++	task_t		*pgin_task;
++	unsigned long	last_pagein;
++	struct pagein_desc	**pgin_dir;
++	struct pgin_device	*pagein_dev;
++	struct completion	pgin_notify;
++	struct completion	*pgind_completion;
++	struct swap_info_struct	*pgin_swp;
++#endif
++	int		tasks64;
++	__u32		src_cpu_flags;
++	__u32		dst_cpu_flags;
++	__u32		kernel_config_flags;
++
++	struct filejob  *filejob_queue;
++} cpt_context_t;
++
++typedef struct {
++	int pid;
++	cpt_context_t *ctx;
++	struct completion done;
++} pagein_info_t;
++
++int pagein_info_printf(char *buf, cpt_context_t *ctx);
++
++int cpt_open_dumpfile(struct cpt_context *);
++int cpt_close_dumpfile(struct cpt_context *);
++int rst_open_dumpfile(struct cpt_context *);
++void rst_close_dumpfile(struct cpt_context *);
++void cpt_context_init(struct cpt_context *);
++void rst_context_init(struct cpt_context *);
++void cpt_context_destroy(struct cpt_context *);
++
++void rst_report_error(int err, cpt_context_t *ctx);
++
++
++int cpt_major_hdr_out(struct cpt_context *ctx);
++int cpt_dump_tail(struct cpt_context *ctx);
++int cpt_close_section(struct cpt_context *ctx);
++int cpt_open_section(struct cpt_context *ctx, __u32 type);
++int cpt_close_object(struct cpt_context *ctx);
++int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx);
++int cpt_push_object(loff_t *saved, struct cpt_context *ctx);
++int cpt_pop_object(loff_t *saved, struct cpt_context *ctx);
++
++int rst_get_section(int type, struct cpt_context * ctx, loff_t *, loff_t *);
++__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx);
++__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx);
++void rst_put_name(__u8 *name, struct cpt_context *ctx);
++int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx);
++void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx);
++
++#define rst_get_object(type, pos, tmp, ctx) \
++ _rst_get_object((type), (pos), (tmp), sizeof(*(tmp)), (ctx))
++
++extern int debug_level;
++
++#define cpt_printk(lvl, fmt, args...)	do {	\
++		if (lvl <= debug_level)		\
++			printk(fmt, ##args);	\
++	} while (0)
++
++#define dprintk(a...) cpt_printk(3, "CPT DBG: " a)
++#define dprintk_ctx(f, arg...) dprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
++
++#define wprintk(a...) cpt_printk(2, "CPT WRN: " a)
++#define wprintk_ctx(f, arg...) wprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
++
++#define eprintk(a...) cpt_printk(1, "CPT ERR: " a)
++#define eprintk_ctx(f, arg...)						\
++do {									\
++	eprintk("%p,%u :" f, ctx, ctx->ve_id, ##arg);			\
++	if (ctx->error_msg && ctx->err_offset < PAGE_SIZE)		\
++		ctx->err_offset += snprintf((char*)(ctx->error_msg +	\
++				ctx->err_offset),			\
++			       	PAGE_SIZE - ctx->err_offset, f, ##arg);	\
++} while(0)
++
++#define CPT_TMPBUF_FREE 0x789adf12
++#define CPT_TMPBUF_BUSY 0xabcd9876
++
++static inline void *cpt_get_buf(cpt_context_t *ctx)
++{
++	void *buf = ctx->tmpbuf;
++
++	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_FREE);
++	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_BUSY;
++	return buf;
++}
++
++static inline void __cpt_release_buf(cpt_context_t *ctx)
++{
++	void *buf = ctx->tmpbuf;
++
++	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
++}
++
++static inline void cpt_release_buf(cpt_context_t *ctx)
++{
++	void *buf = ctx->tmpbuf;
++
++	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_BUSY);
++	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
++}
++
++static inline void cpt_flush_error(cpt_context_t *ctx)
++{
++	mm_segment_t oldfs;
++
++	if (ctx->errorfile && ctx->error_msg && ctx->err_offset) {
++		if (ctx->errorfile->f_op && ctx->errorfile->f_op->write) {
++			oldfs = get_fs();
++			set_fs(KERNEL_DS);
++			ctx->errorfile->f_op->write(ctx->errorfile,
++				ctx->error_msg, ctx->err_offset,
++				&ctx->errorfile->f_pos);
++			set_fs(oldfs);
++		}
++		ctx->error_msg[0] = 0;
++		ctx->err_offset = 0;
++	}
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_dump.c linux-2.6.16-026test015/kernel/cpt/cpt_dump.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_dump.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_dump.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,840 @@
++/*
++ *
++ *  kernel/cpt/cpt_dump.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/pagemap.h>
++#include <linux/ptrace.h>
++#include <linux/smp_lock.h>
++#include <linux/ve.h>
++#include <linux/ve_proto.h>
++#include <linux/virtinfo.h>
++#include <ub/ub_task.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_dump.h"
++#include "cpt_files.h"
++#include "cpt_mm.h"
++#include "cpt_process.h"
++#include "cpt_net.h"
++#include "cpt_socket.h"
++#include "cpt_ubc.h"
++#include "cpt_kernel.h"
++
++
++static int vps_child_level(task_t *root, task_t *c)
++{
++	int level = 0;
++	int veid = VE_TASK_INFO(c)->owner_env->veid;
++
++	while (VE_TASK_INFO(c)->owner_env->veid == veid) {
++		if (c->pid != c->tgid)
++			c = c->group_leader;
++		if (c == root)
++			return level;
++
++		c = c->real_parent;
++		level++;
++	}
++	return -1;
++}
++
++static inline int freezable(struct task_struct * p)
++{
++	if (p->exit_state)
++		return 0;
++
++	switch (p->state) {
++	case EXIT_ZOMBIE:
++	case EXIT_DEAD:
++	case TASK_STOPPED:
++#if TASK_TRACED != TASK_STOPPED
++	case TASK_TRACED:
++#endif
++		return 0;
++	default:
++		return 1;
++	}
++}
++
++/*
++ * Some comment is necessary about PF_FREEZE,PF_FROZEN,TIF_FREEZE...
++ *
++ * SWSUSP uses PF_FREEZE flag in tsk->flags raising it in context
++ * of another process. Apparently, it is unacceptable on SMP.
++ * Let's take freeze_processes() in kernel/power/process.c as an example.
++ * Unserialized modifications tsk->flags easily
++ * (believe or not, but it happens with probability of almost 100% :-))
++ * creates the situation when setting PF_FREEZE in freeze_processes(),
++ * which quickly spins raising PF_FREEZE of all the processes,
++ * _clears_ PF_FROZEN just set in refrigerator(), so that suspend deadlocks.
++ *
++ * So, to make things clean, we require that those flags may be modified
++ * only under tsk->sighand->siglock, which is quite natural because PF_FREEZE
++ * is just a kind of signal.
++ *
++ * It is not enough, because we are still not allowed to change tsk->flags
++ * in context of another process, we can corrupt another flags, when the process
++ * running on another cpu modifies them. So, we use TIF_FREEZE in thread flags,
++ * which can be changed atomically.
++ *
++ * PF_FROZEN also changes in context of another process, but this happens
++ * only when the process is already in refrigerator() which does not modify
++ * tsk->flags.
++ */
++
++static int vps_stop_tasks(struct cpt_context *ctx)
++{
++	unsigned long start_time = jiffies;
++	int err;
++	task_t *p, *g;
++	int todo;
++	int round = 0;
++
++	do_gettimespec(&ctx->start_time); 
++	ctx->cpt_jiffies64 = get_jiffies_64();
++	ctx->virt_jiffies64 = ctx->cpt_jiffies64 + get_exec_env()->jiffies_fixup;
++
++	read_lock(&tasklist_lock);
++	for(;;) {
++		task_t *root;
++		todo = 0;
++
++		root = find_task_by_pid_ve(1);
++		if (!root) {
++			read_unlock(&tasklist_lock);
++			eprintk_ctx("cannot find ve init\n");
++			return -ESRCH;
++		}
++
++		do_each_thread_ve(g, p) {
++			if (vps_child_level(root, p) >= 0) {
++				if (!is_virtual_pid(virt_pid(p))) {
++					eprintk_ctx("external process %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
++					todo = -1;
++					goto out;
++				}
++				if (p->vfork_done) {
++					/* Task between vfork()...exec()
++					 * cannot be frozen, because parent
++					 * wait in uninterruptible state.
++					 * So, we do nothing, waiting for
++					 * exec(), unless:
++					 */
++					if (p->state == TASK_STOPPED ||
++					    p->state == TASK_TRACED) {
++						eprintk_ctx("task %d/%d(%s) is stopped while vfork(). Checkpointing is impossible.\n", virt_pid(p), p->pid, p->comm);
++						todo = -1;
++						/* It is fatal, _user_ stopped
++						 * vfork()ing task, so that we
++						 * cannot suspend now.
++						 */
++					} else {
++						todo = -3;
++					}
++					goto out;
++				}
++				if (p->state == TASK_TRACED
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
++				    && !p->stopped_state
++#endif
++				    ) {
++					int ptrace_id = p->pn_state;
++					/* Debugger waits for signal. */
++					switch (ptrace_id) {
++					case PN_STOP_TF:
++					case PN_STOP_TF_RT:
++					case PN_STOP_ENTRY:
++					case PN_STOP_FORK:
++					case PN_STOP_VFORK:
++					case PN_STOP_SIGNAL:
++					case PN_STOP_EXIT:
++					case PN_STOP_LEAVE:
++						break;
++					default:
++						eprintk_ctx("task %d/%d(%s) is stopped by debugger while %d.\n", virt_pid(p), p->pid, p->comm, ptrace_id);
++						todo = -1;
++						goto out;
++					}
++				}
++				if (p->flags & PF_NOFREEZE)
++					goto out;
++				if (p->flags & PF_FROZEN)
++					continue;
++				if (!freezable(p))
++					continue;
++
++				spin_lock_irq(&p->sighand->siglock);
++				set_tsk_thread_flag(p, TIF_FREEZE);
++				signal_wake_up(p, 0);
++				spin_unlock_irq(&p->sighand->siglock);
++
++				if (round == 10)
++					wprintk_ctx("%d/%d(%s) is running\n", virt_pid(p), p->pid, p->comm);
++
++				todo++;
++			} else {
++				if (p != current) {
++					eprintk_ctx("foreign process %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
++					todo = -1;
++					goto out;
++				}
++			}
++		} while_each_thread_ve(g, p);
++
++out:
++		if (todo &&
++		    (time_after(jiffies, start_time + 10*HZ) ||
++		     signal_pending(current) || todo < 0)) {
++			do_each_thread_ve(g, p) {
++				if (vps_child_level(root, p) >= 0) {
++					spin_lock_irq(&p->sighand->siglock);
++					clear_tsk_thread_flag(p, TIF_FREEZE);
++					if (p->flags & PF_FROZEN) {
++						p->flags &= ~PF_FROZEN;
++						wake_up_process(p);
++					}
++					spin_unlock_irq(&p->sighand->siglock);
++				}
++			} while_each_thread_ve(g, p);
++			if (todo > 0)
++				todo = -2;
++			/* This is sign of failure of printk(), which is not
++			 * ours. So, no prefixes. */
++			printk(">\n");
++		}
++
++		read_unlock(&tasklist_lock);
++
++		if (!todo)
++			return 0;
++
++		if (todo == -1) {
++			eprintk_ctx("suspend is impossible now.\n");
++			return -EAGAIN;
++		}
++
++		if (todo == -2) {
++			eprintk_ctx("interrupted or timed out.\n");
++			return -EINTR;
++		}
++
++		if (time_after(jiffies, start_time + 10*HZ) ||
++		    signal_pending(current)) {
++			if (todo == -3) {
++				eprintk_ctx("vfork() is active, suspend is impossible now.\n");
++			} else {
++				eprintk_ctx("suspend is impossible, reason %d\n", todo);
++			}
++			return -EAGAIN;
++		}
++
++		if (todo < 0 || round > 0) {
++			current->state = TASK_INTERRUPTIBLE;
++			schedule_timeout(HZ/50);
++		} else {
++			yield();
++		}
++
++		read_lock(&tasklist_lock);
++		round++;
++	}
++
++	read_unlock(&tasklist_lock);
++	return err;
++}
++
++static int cpt_unlock_ve(struct cpt_context *ctx)
++{
++	struct ve_struct *env;
++
++	env = get_ve_by_id(ctx->ve_id);
++	if (!env)
++		return -ESRCH;
++	down_write(&env->op_sem);
++	env->is_locked = 0;
++	up_write(&env->op_sem);
++	put_ve(env);
++	return 0;
++}
++
++int cpt_resume(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	cpt_unlock_sockets(ctx);
++
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	if (ctx->pgin_task) {
++		wait_for_completion(&ctx->pgin_notify);
++		put_task_struct(ctx->pgin_task);
++		ctx->pgin_task = NULL;
++	}
++#endif
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++
++		spin_lock_irq(&tsk->sighand->siglock);
++		clear_tsk_thread_flag(tsk, TIF_FREEZE);
++		if (tsk->flags & PF_FROZEN) {
++			tsk->flags &= ~PF_FROZEN;
++			wake_up_process(tsk);
++		} else if (freezable(tsk)) {
++			eprintk_ctx("strange, %s not frozen\n", tsk->comm );
++		}
++		spin_unlock_irq(&tsk->sighand->siglock);
++		put_task_struct(tsk);
++	}
++
++	cpt_resume_network(ctx);
++
++	cpt_unlock_ve(ctx);
++
++	cpt_finish_ubc(ctx);
++	cpt_object_destroy(ctx);
++	return 0;
++}
++
++int cpt_kill(struct cpt_context *ctx)
++{
++	int err = 0;
++	struct ve_struct *env;
++	cpt_object_t *obj;
++	task_t *root_task = NULL;
++	long delay;
++
++	if (!ctx->ve_id)
++		return -EINVAL;
++
++	env = get_ve_by_id(ctx->ve_id);
++	if (!env)
++		return -ESRCH;
++
++	/* from here cpt_kill succeeds */
++	if (VE_TASK_INFO(current)->owner_env == env) {
++		wprintk_ctx("attempt to kill ve from inside, escaping...\n");
++
++		write_lock_irq(&tasklist_lock);
++		VE_TASK_INFO(current)->owner_env = get_ve0();
++		REMOVE_VE_LINKS(current);
++		SET_VE_LINKS(current);
++
++		atomic_inc(&get_ve0()->pcounter);
++		atomic_dec(&env->pcounter);
++		write_unlock_irq(&tasklist_lock);
++		set_exec_env(get_ve0());
++	}
++
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	if (ctx->pgin_task) {
++		wait_for_completion(&ctx->pgin_notify);
++		put_task_struct(ctx->pgin_task);
++		ctx->pgin_task = NULL;
++	}
++#endif
++
++	cpt_kill_sockets(ctx);
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++
++		if (tsk->exit_state) {
++			put_task_struct(tsk);
++			continue;
++		}
++
++		if (virt_pid(tsk) == 1) {
++			root_task = tsk;
++			continue;
++		}
++
++		if (tsk->ptrace) {
++			write_lock_irq(&tasklist_lock);
++			tsk->ptrace = 0;
++			if (!list_empty(&tsk->ptrace_list)) {
++				list_del_init(&tsk->ptrace_list);
++				REMOVE_LINKS(tsk);
++				tsk->parent = tsk->real_parent;
++				SET_LINKS(tsk);
++			}
++			write_unlock_irq(&tasklist_lock);
++		}
++
++		send_sig(SIGKILL, tsk, 1);
++
++		spin_lock_irq(&tsk->sighand->siglock);
++		sigfillset(&tsk->blocked);
++		sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
++		set_tsk_thread_flag(tsk, TIF_SIGPENDING);
++		clear_tsk_thread_flag(tsk, TIF_FREEZE);
++		if (tsk->flags & PF_FROZEN)
++			tsk->flags &= ~PF_FROZEN;
++		spin_unlock_irq(&tsk->sighand->siglock);
++
++		wake_up_process(tsk);
++		put_task_struct(tsk);
++	}
++
++	yield();
++
++	if (root_task != NULL) {
++		send_sig(SIGKILL, root_task, 1);
++
++		spin_lock_irq(&root_task->sighand->siglock);
++		sigfillset(&root_task->blocked);
++		sigdelsetmask(&root_task->blocked, sigmask(SIGKILL));
++		set_tsk_thread_flag(root_task, TIF_SIGPENDING);
++		clear_tsk_thread_flag(root_task, TIF_FREEZE);
++		if (root_task->flags & PF_FROZEN)
++			root_task->flags &= ~PF_FROZEN;
++		spin_unlock_irq(&root_task->sighand->siglock);
++
++		wake_up_process(root_task);
++		put_task_struct(root_task);
++	}
++
++	cpt_finish_ubc(ctx);
++	cpt_object_destroy(ctx);
++
++	delay = 1;
++	while (atomic_read(&env->counter) != 1) {
++		if (signal_pending(current))
++			break;
++		current->state = TASK_INTERRUPTIBLE;
++		delay = (delay < HZ) ? (delay << 1) : HZ;
++		schedule_timeout(delay);
++	}
++	put_ve(env);
++
++	return err;
++}
++
++static void collect_task_ubc(task_t *t, struct cpt_context *ctx)
++{
++	struct task_beancounter *tbc;
++
++	tbc = &(t->task_bc);
++	cpt_add_ubc(tbc->exec_ub, ctx);
++	cpt_add_ubc(tbc->task_ub, ctx);
++	cpt_add_ubc(tbc->fork_sub, ctx);
++}
++
++static cpt_object_t * remember_task(task_t * child, cpt_object_t * head,
++				    cpt_context_t * ctx)
++{
++	cpt_object_t *cobj;
++
++	if (freezable(child) && !(child->flags&PF_FROZEN)) {
++		eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(child));
++		put_task_struct(child);
++		return NULL;
++	}
++
++	if (lookup_cpt_object(CPT_OBJ_TASK, child, ctx)) BUG();
++	if ((cobj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
++		put_task_struct(child);
++		return NULL;
++	}
++	cobj->o_count = 1;
++	cpt_obj_setobj(cobj, child, ctx);
++	insert_cpt_object(CPT_OBJ_TASK, cobj, head, ctx);
++	collect_task_ubc(child, ctx);
++	return cobj;
++}
++
++static int vps_collect_tasks(struct cpt_context *ctx)
++{
++	int err = -ESRCH;
++	cpt_object_t *obj;
++	task_t *root;
++
++	read_lock(&tasklist_lock);
++	root = find_task_by_pid_ve(1);
++	if (root)
++		get_task_struct(root);
++	read_unlock(&tasklist_lock);
++
++	if (!root) {
++		err = -ESRCH;
++		eprintk_ctx("vps_collect_tasks: cannot find root\n");
++		goto out;
++	}
++
++	if ((obj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
++		put_task_struct(root);
++		return -ENOMEM;
++	}
++	obj->o_count = 1;
++	cpt_obj_setobj(obj, root, ctx);
++	intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
++	collect_task_ubc(root, ctx);
++
++	/* Collect process subtree recursively */
++	for_each_object(obj, CPT_OBJ_TASK) {
++		cpt_object_t *head = obj;
++		task_t *tsk = obj->o_obj;
++		task_t *child;
++
++		if (freezable(tsk) && !(tsk->flags&PF_FROZEN)) {
++			eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(tsk));
++			err = -EINVAL;
++			goto out;
++		}
++
++		wait_task_inactive(tsk);
++
++		if (tsk->pid == tsk->tgid) {
++			child = tsk;
++			for (;;) {
++				read_lock(&tasklist_lock);
++				child = next_thread(child);
++				if (child != tsk)
++					get_task_struct(child);
++				read_unlock(&tasklist_lock);
++
++				if (child == tsk)
++					break;
++
++				if (child->real_parent != tsk->real_parent) {
++					put_task_struct(child);
++					eprintk_ctx("illegal thread structure, kernel bug\n");
++					return -EINVAL;
++				}
++
++				if ((head = remember_task(child, head, ctx)) == NULL)
++					return -ENOMEM;
++			}
++		}
++
++		/* About locking. VE is frozen. But lists of children
++		 * may change at least for init, when entered task reparents
++		 * to init and when reparented task exits. If we take care
++		 * of this case, we still can unlock while scanning
++		 * tasklists.
++		 */
++		read_lock(&tasklist_lock);
++		list_for_each_entry(child, &tsk->children, sibling) {
++			if (child->real_parent != tsk)
++				continue;
++			if (child->pid != child->tgid)
++				continue;
++			get_task_struct(child);
++			read_unlock(&tasklist_lock);
++
++			if ((head = remember_task(child, head, ctx)) == NULL)
++				return -ENOMEM;
++
++			read_lock(&tasklist_lock);
++		}
++
++		list_for_each_entry(child, &tsk->ptrace_children, ptrace_list) {
++			if (child->real_parent != tsk)
++				continue;
++			if (child->pid != child->tgid)
++				continue;
++			get_task_struct(child);
++			read_unlock(&tasklist_lock);
++
++			if ((head = remember_task(child, head, ctx)) == NULL)
++				return -ENOMEM;
++
++			read_lock(&tasklist_lock);
++		}
++		read_unlock(&tasklist_lock);
++	}
++
++	return 0;
++
++out:
++	return err;
++}
++
++static int cpt_collect(struct cpt_context *ctx)
++{
++	int err;
++
++	if ((err = cpt_collect_mm(ctx)) != 0)
++		return err;
++
++	if ((err = cpt_collect_sysv(ctx)) != 0)
++		return err;
++
++	if ((err = cpt_collect_files(ctx)) != 0)
++		return err;
++
++	if ((err = cpt_collect_fs(ctx)) != 0)
++		return err;
++
++	if ((err = cpt_collect_namespace(ctx)) != 0)
++		return err;
++
++	if ((err = cpt_collect_signals(ctx)) != 0)
++		return err;
++
++	return 0;
++}
++
++static int cpt_dump_veinfo(cpt_context_t *ctx)
++{
++	struct cpt_veinfo_image i;
++	struct ve_struct *ve;
++	struct timespec delta;
++
++	cpt_open_section(ctx, CPT_SECT_VEINFO);
++	cpt_open_object(NULL, ctx);
++
++	i.cpt_next = CPT_NULL;
++	i.cpt_object = CPT_OBJ_VEINFO;
++	i.cpt_hdrlen = sizeof(i);
++	i.cpt_content = CPT_CONTENT_VOID;
++
++	ve = get_exec_env();
++	i.shm_ctl_all = ve->_shm_ctlall;
++	i.shm_ctl_max = ve->_shm_ctlmax;
++	i.shm_ctl_mni = ve->_shm_ctlmni;
++
++	i.msg_ctl_max = ve->_msg_ctlmax;
++	i.msg_ctl_mni = ve->_msg_ctlmni;
++	i.msg_ctl_mnb = ve->_msg_ctlmnb;
++
++	BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i.sem_ctl_arr));
++	i.sem_ctl_arr[0] = ve->_sem_ctls[0];
++	i.sem_ctl_arr[1] = ve->_sem_ctls[1];
++	i.sem_ctl_arr[2] = ve->_sem_ctls[2];
++	i.sem_ctl_arr[3] = ve->_sem_ctls[3];
++
++	do_posix_clock_monotonic_gettime(&delta);
++	_set_normalized_timespec(&delta,
++			delta.tv_sec - ve->start_timespec.tv_sec,
++			delta.tv_nsec - ve->start_timespec.tv_nsec);
++	i.start_timespec_delta = cpt_timespec_export(&delta);
++	i.start_jiffies_delta = get_jiffies_64() - ve->start_jiffies;
++
++	ctx->write(&i, sizeof(i), ctx);
++	cpt_close_object(ctx);
++	cpt_close_section(ctx);
++	return 0;
++}
++
++static int cpt_dump_utsname(cpt_context_t *ctx)
++{
++	int len;
++	struct cpt_object_hdr o;
++
++	cpt_open_section(ctx, CPT_SECT_UTSNAME);
++
++	len = strlen(ve_utsname.nodename);
++	o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
++	o.cpt_object = CPT_OBJ_NAME;
++	o.cpt_hdrlen = sizeof(o);
++	o.cpt_content = CPT_CONTENT_NAME;
++
++	ctx->write(&o, sizeof(o), ctx);
++	ctx->write(ve_utsname.nodename, len+1, ctx);
++	ctx->align(ctx);
++
++	len = strlen(ve_utsname.domainname);
++	o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
++	o.cpt_object = CPT_OBJ_NAME;
++	o.cpt_hdrlen = sizeof(o);
++	o.cpt_content = CPT_CONTENT_NAME;
++
++	ctx->write(&o, sizeof(o), ctx);
++	ctx->write(ve_utsname.domainname, len+1, ctx);
++	ctx->align(ctx);
++
++	cpt_close_section(ctx);
++	return 0;
++}
++
++int cpt_dump(struct cpt_context *ctx)
++{
++	struct ve_struct *oldenv, *env;
++	int err, err2 = 0;
++
++	if (!ctx->ve_id)
++		return -EINVAL;
++
++	env = get_ve_by_id(ctx->ve_id);
++	if (!env)
++		return -ESRCH;
++
++	down_read(&env->op_sem);
++	err = -ESRCH;
++	if (!env->is_running)
++		goto out_noenv;
++	if (!env->is_locked)
++		goto out_noenv;
++
++	oldenv = set_exec_env(env);
++
++	/* Phase 2: real checkpointing */
++	err = cpt_open_dumpfile(ctx);
++	if (err)
++		goto out;
++	
++	cpt_major_hdr_out(ctx);
++
++	if (!err)
++		err = cpt_dump_veinfo(ctx);
++	if (!err)
++		err = cpt_dump_ubc(ctx);
++	if (!err)
++		err = cpt_dump_ifinfo(ctx);
++	if (!err)
++		err = cpt_dump_files(ctx);
++	if (!err)
++		err = cpt_dump_files_struct(ctx);
++	if (!err)
++		err = cpt_dump_fs_struct(ctx);
++	if (!err)
++		err = cpt_dump_namespace(ctx);
++	if (!err)
++		err = cpt_dump_sighand(ctx);
++	if (!err)
++		err = cpt_dump_vm(ctx);
++	if (!err)
++		err = cpt_dump_sysvsem(ctx);
++	if (!err)
++		err = cpt_dump_tasks(ctx);
++	if (!err)
++		err = cpt_dump_orphaned_sockets(ctx);
++#if defined(CONFIG_VE_IPTABLES) && \
++    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
++	if (!err)
++		err = cpt_dump_ip_conntrack(ctx);
++#endif
++	if (!err)
++		err = cpt_dump_utsname(ctx);
++
++	if (!err)
++		err = cpt_dump_tail(ctx);
++
++	err2 = cpt_close_dumpfile(ctx);
++
++out:
++	set_exec_env(oldenv);
++out_noenv:
++	up_read(&env->op_sem);
++	put_ve(env);
++	return err ? : err2;
++}
++
++int cpt_vps_suspend(struct cpt_context *ctx)
++{
++	struct ve_struct *oldenv, *env;
++	int err = 0;
++
++	ctx->kernel_config_flags = test_kernel_config();
++	cpt_object_init(ctx);
++
++	if (!ctx->ve_id) {
++		env = get_exec_env();
++		if (env == get_ve0())
++			return -EINVAL;
++		wprintk("undefined ve_id\n");
++		ctx->ve_id = env->veid;
++		get_ve(env);
++	} else {
++		env = get_ve_by_id(ctx->ve_id);
++		if (!env)
++			return -ESRCH;
++	}
++
++#ifdef CONFIG_VE_IPTABLES
++	ctx->iptables_mask = env->_iptables_modules;
++#endif
++
++	down_write(&env->op_sem);
++	err = -ESRCH;
++	if (!env->is_running)
++		goto out_noenv;
++
++	err = -EBUSY;
++	if (env->is_locked)
++		goto out_noenv;
++	env->is_locked = 1;
++	downgrade_write(&env->op_sem);
++
++	oldenv = set_exec_env(env);
++
++	/* Phase 0: find and stop all the tasks */
++	if ((err = vps_stop_tasks(ctx)) != 0)
++		goto out;
++
++	if ((err = cpt_suspend_network(ctx)) != 0)
++		goto out;
++
++	/* At the moment all the state is frozen. We do not need to lock
++	 * the state, which can be changed only if the tasks are running.
++	 */
++
++	/* Phase 1: collect task tree */
++	if ((err = vps_collect_tasks(ctx)) != 0)
++		goto out;
++
++	/* Phase 1': collect all the resources */
++	if ((err = cpt_collect(ctx)) != 0)
++		goto out;
++
++out:
++	set_exec_env(oldenv);
++	up_read(&env->op_sem);
++	put_ve(env);
++        return err;
++
++out_noenv:
++	up_write(&env->op_sem);
++	put_ve(env);
++	return err;
++}
++
++int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps)
++{
++	task_t *p;
++	struct ve_struct *env;
++	unsigned int flags = test_cpu_caps();
++
++	if (!ctx->ve_id)
++		return -EINVAL;
++
++	env = get_ve_by_id(ctx->ve_id);
++	if (env == NULL)
++		return -ESRCH;
++
++	*caps = flags & (1<<CPT_CPU_X86_CMOV);
++	flags &= ~((1<<CPT_CPU_X86_EMT64)|(1<<CPT_CPU_X86_IA64));
++
++	read_lock(&tasklist_lock);
++	for (p = __first_task_ve(env); p != NULL ; p = __next_task_ve(env, p)) {
++		if (tsk_used_math(p))
++			*caps |= flags;
++#ifdef CONFIG_X86_64
++		if (!(p->thread_info->flags & _TIF_IA32))
++			*caps |= (1<<CPT_CPU_X86_EMT64);
++#endif
++	}
++	read_unlock(&tasklist_lock);
++	put_ve(env);
++
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_dump.h linux-2.6.16-026test015/kernel/cpt/cpt_dump.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_dump.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_dump.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,14 @@
++int cpt_dump(struct cpt_context *cpt);
++int rst_undump(struct cpt_context *cpt);
++int cpt_suspend(struct cpt_context *cpt);
++int cpt_resume(struct cpt_context *cpt);
++int cpt_kill(struct cpt_context *cpt);
++int rst_clean(struct cpt_context *cpt);
++int rst_resume(struct cpt_context *cpt);
++int rst_kill(struct cpt_context *cpt);
++
++int cpt_freeze_one(pid_t pid, int freeze);
++int cpt_vps_suspend(struct cpt_context *ctx);
++int vps_rst_undump(struct cpt_context *ctx);
++
++int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_epoll.c linux-2.6.16-026test015/kernel/cpt/cpt_epoll.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_epoll.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_epoll.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,116 @@
++/*
++ *
++ *  kernel/cpt/cpt_epoll.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/pipe_fs_i.h>
++#include <linux/mman.h>
++#include <linux/namespace.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/smp_lock.h>
++#include <asm/uaccess.h>
++#include <linux/vzcalluser.h>
++#include <linux/eventpoll.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_files.h"
++#include "cpt_kernel.h"
++#include "cpt_fsmagic.h"
++#include "cpt_syscalls.h"
++
++extern struct file_operations eventpoll_fops;
++
++int cpt_dump_epolldev(cpt_object_t *obj, cpt_context_t *ctx)
++{
++	int err = 0;
++	struct file *file = obj->o_obj;
++	struct eventpoll *ep;
++	struct rb_node *rbp;
++	struct cpt_epoll_image ei;
++
++	if (file->f_op != &eventpoll_fops) {
++		eprintk_ctx("bad epoll file\n");
++		return -EINVAL;
++	}
++
++	ep = file->private_data;
++
++	/* eventpoll.c does not protect open /proc/N/fd, silly.
++	 * Opener will get an invalid file with uninitialized private_data
++	 */
++	if (unlikely(ep == NULL)) {
++		eprintk_ctx("bad epoll device\n");
++		return -EINVAL;
++	}
++
++	cpt_open_object(NULL, ctx);
++
++	ei.cpt_next = CPT_NULL;
++	ei.cpt_object = CPT_OBJ_EPOLL;
++	ei.cpt_hdrlen = sizeof(ei);
++	ei.cpt_content = CPT_CONTENT_ARRAY;
++	ei.cpt_file = obj->o_pos;
++
++	ctx->write(&ei, sizeof(ei), ctx);
++
++	down(&epsem);
++	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
++		loff_t saved_obj;
++		cpt_object_t *tobj;
++		struct cpt_epoll_file_image efi;
++		struct epitem *epi;
++		epi = rb_entry(rbp, struct epitem, rbn);
++		tobj = lookup_cpt_object(CPT_OBJ_FILE, epi->ffd.file, ctx);
++		if (tobj == NULL) {
++			eprintk_ctx("epoll device refers to an external file\n");
++			err = -EBUSY;
++			break;
++		}
++		cpt_push_object(&saved_obj, ctx);
++		cpt_open_object(NULL, ctx);
++
++		efi.cpt_next = CPT_NULL;
++		efi.cpt_object = CPT_OBJ_EPOLL_FILE;
++		efi.cpt_hdrlen = sizeof(efi);
++		efi.cpt_content = CPT_CONTENT_VOID;
++		efi.cpt_file = tobj->o_pos;
++		efi.cpt_fd = epi->ffd.fd;
++		efi.cpt_events = epi->event.events;
++		efi.cpt_data = epi->event.data;
++		efi.cpt_revents = epi->revents;
++		efi.cpt_ready = 0;
++		if (!list_empty(&epi->rdllink))
++			efi.cpt_ready = 1;
++
++		ctx->write(&efi, sizeof(efi), ctx);
++		cpt_close_object(ctx);
++		cpt_pop_object(&saved_obj, ctx);
++	}
++	up(&epsem);
++
++	cpt_close_object(ctx);
++
++	return err;
++}
++
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_files.c linux-2.6.16-026test015/kernel/cpt/cpt_files.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_files.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_files.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,1343 @@
++/*
++ *
++ *  kernel/cpt/cpt_files.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/pipe_fs_i.h>
++#include <linux/mman.h>
++#include <linux/namespace.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/smp_lock.h>
++#include <linux/pagemap.h>
++#include <asm/uaccess.h>
++#include <linux/vzcalluser.h>
++#include <linux/ve_proto.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_files.h"
++#include "cpt_socket.h"
++#include "cpt_kernel.h"
++#include "cpt_fsmagic.h"
++#include "cpt_syscalls.h"
++
++void cpt_printk_dentry(struct dentry *d, struct vfsmount *mnt)
++{
++	char *path;
++	unsigned long pg = __get_free_page(GFP_KERNEL);
++
++	if (!pg)
++		return;
++
++	path = d_path(d, mnt, (char *)pg, PAGE_SIZE);
++
++	if (!IS_ERR(path))
++		printk("<%s>", path);
++	free_page(pg);
++}
++
++int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
++			 cpt_context_t *ctx)
++{
++	if (path[0] == '/' && !IS_ROOT(d) && !d_unhashed(d)) {
++		struct nameidata nd;
++		if (path_lookup(path, 0, &nd)) {
++			eprintk_ctx("d_path cannot be looked up %s\n", path);
++			return -EINVAL;
++		}
++		if (nd.dentry != d || nd.mnt != mnt) {
++			eprintk_ctx("d_path is invisible %s\n", path);
++			path_release(&nd);
++			return -EINVAL;
++		}
++		path_release(&nd);
++	}
++	return 0;
++}
++
++int cpt_dump_dentry(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
++{
++	int len;
++	char *path;
++	char *pg = cpt_get_buf(ctx);
++
++	path = d_path(d, mnt, pg, PAGE_SIZE);
++	len = PTR_ERR(path);
++
++	if (IS_ERR(path)) {
++		struct cpt_object_hdr o;
++		char tmp[1];
++		/* VZ changes d_path() to return EINVAL, when path
++		 * is not supposed to be visible inside VE. */
++		if (len != -EINVAL)
++			eprintk_ctx("d_path err=%d\n", len);
++		else
++			len = 0;
++
++		o.cpt_next = sizeof(o) + CPT_ALIGN(1);
++		o.cpt_object = CPT_OBJ_NAME;
++		o.cpt_hdrlen = sizeof(o);
++		o.cpt_content = CPT_CONTENT_NAME;
++		tmp[0] = 0;
++
++		ctx->write(&o, sizeof(o), ctx);
++		ctx->write(tmp, 1, ctx);
++		ctx->align(ctx);
++
++		__cpt_release_buf(ctx);
++		return len;
++	} else {
++		struct cpt_object_hdr o;
++
++		len = pg + PAGE_SIZE - 1 - path;
++		o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
++		o.cpt_object = CPT_OBJ_NAME;
++		o.cpt_hdrlen = sizeof(o);
++		o.cpt_content = CPT_CONTENT_NAME;
++		path[len] = 0;
++
++		if (cpt_verify_overmount(path, d, mnt, ctx)) {
++			__cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++
++		ctx->write(&o, sizeof(o), ctx);
++		ctx->write(path, len+1, ctx);
++		ctx->align(ctx);
++		__cpt_release_buf(ctx);
++	}
++	return 0;
++}
++
++int cpt_dump_string(const char *s, struct cpt_context *ctx)
++{
++	int len;
++	struct cpt_object_hdr o;
++
++	len = strlen(s);
++	o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
++	o.cpt_object = CPT_OBJ_NAME;
++	o.cpt_hdrlen = sizeof(o);
++	o.cpt_content = CPT_CONTENT_NAME;
++
++	ctx->write(&o, sizeof(o), ctx);
++	ctx->write(s, len+1, ctx);
++	ctx->align(ctx);
++	return 0;
++}
++
++int cpt_dump_filename(struct file *file, struct cpt_context *ctx)
++{
++	return cpt_dump_dentry(file->f_dentry, file->f_vfsmnt, ctx);
++}
++
++int cpt_dump_inode(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_inode_image *v = cpt_get_buf(ctx);
++	struct kstat sbuf;
++
++	v->cpt_next = sizeof(*v);
++	v->cpt_object = CPT_OBJ_INODE;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	if ((err = vfs_getattr(mnt, d, &sbuf)) != 0) {
++		cpt_release_buf(ctx);
++		return err;
++	}
++
++	v->cpt_dev	= d->d_inode->i_sb->s_dev;
++	v->cpt_ino	= d->d_inode->i_ino;
++	v->cpt_mode	= sbuf.mode;
++	v->cpt_nlink	= sbuf.nlink;
++	v->cpt_uid	= sbuf.uid;
++	v->cpt_gid	= sbuf.gid;
++	v->cpt_rdev	= d->d_inode->i_rdev;
++	v->cpt_size	= sbuf.size;
++	v->cpt_atime	= cpt_timespec_export(&sbuf.atime);
++	v->cpt_mtime	= cpt_timespec_export(&sbuf.mtime);
++	v->cpt_ctime	= cpt_timespec_export(&sbuf.ctime);
++	v->cpt_blksize	= sbuf.blksize;
++	v->cpt_blocks	= sbuf.blocks;
++	v->cpt_sb	= d->d_inode->i_sb->s_magic;
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++	return 0;
++}
++
++int cpt_collect_files(cpt_context_t * ctx)
++{
++	int err;
++	cpt_object_t *obj;
++	int index = 0;
++
++	/* Collect process fd sets */
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++		if (tsk->files && cpt_object_add(CPT_OBJ_FILES, tsk->files, ctx) == NULL)
++			return -ENOMEM;
++	}
++
++	/* Collect files from fd sets */
++	for_each_object(obj, CPT_OBJ_FILES) {
++		int fd;
++		struct files_struct *f = obj->o_obj;
++
++		cpt_obj_setindex(obj, index++, ctx);
++
++		if (obj->o_count != atomic_read(&f->count)) {
++			eprintk_ctx("files_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&f->count));
++			return -EBUSY;
++		}
++
++		for (fd = 0; fd < f->fdt->max_fds; fd++) {
++			struct file *file = fcheck_files(f, fd);
++			if (file && cpt_object_add(CPT_OBJ_FILE, file, ctx) == NULL)
++				return -ENOMEM;
++		}
++	}
++
++	/* Collect files queued by AF_UNIX sockets. */
++	if ((err = cpt_collect_passedfds(ctx)) < 0)
++		return err;
++
++	/* OK. At this point we should count all the references. */
++	for_each_object(obj, CPT_OBJ_FILE) {
++		struct file *file = obj->o_obj;
++		struct file *parent;
++		cpt_object_t *ino_obj;
++
++		if (obj->o_count != atomic_read(&file->f_count)) {
++			eprintk_ctx("file struct is referenced outside %d %d\n", obj->o_count, atomic_read(&file->f_count));
++			cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
++			return -EBUSY;
++		}
++
++		switch (file->f_dentry->d_inode->i_sb->s_magic) {
++		case FSMAGIC_FUTEX:
++		case FSMAGIC_MQUEUE:
++		case FSMAGIC_BDEV:
++			eprintk_ctx("file on unsupported FS: magic %08lx\n", file->f_dentry->d_inode->i_sb->s_magic);
++			return -EBUSY;
++		}
++
++		/* Collect inode. It is necessary mostly to resolve deleted
++		 * hard links. */
++		ino_obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
++		if (ino_obj == NULL)
++			return -ENOMEM;
++
++		parent = ino_obj->o_parent;
++		if (!parent || (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry)))
++			ino_obj->o_parent = file;
++
++		if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
++			int maj = imajor(file->f_dentry->d_inode);
++			if (maj == PTY_MASTER_MAJOR ||
++			    (maj >= UNIX98_PTY_MASTER_MAJOR &&
++			     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
++			    maj == PTY_SLAVE_MAJOR ||
++			    maj == UNIX98_PTY_SLAVE_MAJOR ||
++			    maj == TTYAUX_MAJOR) {
++				err = cpt_collect_tty(file, ctx);
++				if (err)
++					return err;
++			}
++		}
++
++		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
++			err = cpt_collect_socket(file, ctx);
++			if (err)
++				return err;
++		}
++	}
++
++	err = cpt_index_sockets(ctx);
++
++	return err;
++}
++
++/* /dev/ptmx is special, all the files share one inode, but real tty backend
++ * is attached via file->private_data.
++ */
++
++static inline int is_cloning_inode(struct inode *ino)
++{
++	return S_ISCHR(ino->i_mode) && 
++		ino->i_rdev == MKDEV(TTYAUX_MAJOR,2);
++}
++
++static int dump_one_flock(struct file_lock *fl, int owner, struct cpt_context *ctx)
++{
++	pid_t pid;
++	struct cpt_flock_image *v = cpt_get_buf(ctx);
++
++	v->cpt_next = sizeof(*v);
++	v->cpt_object = CPT_OBJ_FLOCK;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_VOID;
++
++	v->cpt_owner = owner;
++
++	pid = fl->fl_pid;
++	if (pid && !is_virtual_pid(fl->fl_pid)) {
++		pid = _pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
++		if (pid == -1) {
++			if (!(fl->fl_flags&FL_FLOCK)) {
++				eprintk_ctx("posix lock from another VE?\n");
++				cpt_release_buf(ctx);
++				return -EBUSY;
++			}
++			pid = 0;
++		}
++	}
++
++	v->cpt_pid = pid;
++	v->cpt_start = fl->fl_start;
++	v->cpt_end = fl->fl_end;
++	v->cpt_flags = fl->fl_flags;
++	v->cpt_type = fl->fl_type;
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++	return 0;
++}
++
++
++int cpt_dump_flock(struct file *file, struct cpt_context *ctx)
++{
++	int err = 0;
++	struct file_lock *fl;
++
++	lock_kernel();
++	for (fl = file->f_dentry->d_inode->i_flock;
++	     fl; fl = fl->fl_next) {
++		if (file != fl->fl_file)
++			continue;
++		if (fl->fl_flags & FL_LEASE) {
++			eprintk_ctx("lease lock is not supported\n");
++			err = -EINVAL;
++			break;
++		}
++		if (fl->fl_flags & FL_POSIX) {
++			cpt_object_t *obj;
++			obj = lookup_cpt_object(CPT_OBJ_FILES, fl->fl_owner, ctx);
++			if (obj) {
++				dump_one_flock(fl, obj->o_index, ctx);
++				continue;
++			} else {
++				eprintk_ctx("unknown lock owner %p\n", fl->fl_owner);
++				err = -EINVAL;
++			}
++		}
++		if (fl->fl_flags & FL_FLOCK) {
++			dump_one_flock(fl, -1, ctx);
++			continue;
++		}
++	}
++	unlock_kernel();
++	return err;
++}
++
++static int __comb_pid_to_vpid(int pid)
++{
++	int vpid = pid;
++
++	if (pid > 0) {
++		vpid = _pid_type_to_vpid(PIDTYPE_PID, pid);
++		if (unlikely(vpid < 0)) {
++			dprintk("pid %d does not exist amymore.\n", pid);
++			return 0;
++		}
++	} else if (pid < 0) {
++		vpid = _pid_type_to_vpid(PIDTYPE_PGID, -pid);
++		if (unlikely(vpid < 0)) {
++			dprintk("pgid %d does not exist amymore.\n", -pid);
++			return 0;
++		}
++		vpid = -vpid;
++	}
++	return vpid;
++}
++
++static int dump_one_file(cpt_object_t *obj, struct file *file, cpt_context_t *ctx)
++{
++	int err = 0;
++	cpt_object_t *iobj;
++	struct cpt_file_image *v = cpt_get_buf(ctx);
++	struct kstat sbuf;
++
++	cpt_open_object(obj, ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_FILE;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	v->cpt_flags = file->f_flags;
++	v->cpt_mode = file->f_mode;
++	v->cpt_pos = file->f_pos;
++	v->cpt_uid = file->f_uid;
++	v->cpt_gid = file->f_gid;
++
++	vfs_getattr(file->f_vfsmnt, file->f_dentry, &sbuf);
++
++	v->cpt_i_mode = sbuf.mode;
++	v->cpt_lflags = 0;
++	if (IS_ROOT(file->f_dentry))
++		v->cpt_lflags |= CPT_DENTRY_ROOT;
++	else if (d_unhashed(file->f_dentry))
++		v->cpt_lflags |= CPT_DENTRY_DELETED;
++	if (is_cloning_inode(file->f_dentry->d_inode))
++		v->cpt_lflags |= CPT_DENTRY_CLONING;
++	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC)
++		v->cpt_lflags |= CPT_DENTRY_PROC;
++	v->cpt_inode = CPT_NULL;
++	iobj = lookup_cpt_object(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
++	if (iobj)
++		v->cpt_inode = iobj->o_pos;
++	v->cpt_priv = CPT_NULL;
++	v->cpt_fown_fd = -1;
++	if (S_ISCHR(v->cpt_i_mode)) {
++		iobj = lookup_cpt_object(CPT_OBJ_TTY, file->private_data, ctx);
++		if (iobj) {
++			v->cpt_priv = iobj->o_pos;
++			if (file->f_flags&FASYNC)
++				v->cpt_fown_fd = cpt_tty_fasync(file, ctx);
++		}
++	}
++	if (S_ISSOCK(v->cpt_i_mode)) {
++		if (obj->o_index < 0) {
++			eprintk_ctx("BUG: no socket index\n");
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++		v->cpt_priv = obj->o_index;
++		if (file->f_flags&FASYNC)
++			v->cpt_fown_fd = cpt_socket_fasync(file, ctx);
++	}
++	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
++		v->cpt_priv = file->f_dentry->d_inode->i_ino;
++		v->cpt_lflags |= CPT_DENTRY_EPOLL;
++	}
++
++	v->cpt_fown_pid = __comb_pid_to_vpid((int)file->f_owner.pid);
++	v->cpt_fown_uid = file->f_owner.uid;
++	v->cpt_fown_euid = file->f_owner.euid;
++	v->cpt_fown_signo = file->f_owner.signum;
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	if (!S_ISSOCK(v->cpt_i_mode)) {
++		err = cpt_dump_filename(file, ctx);
++		if (err)
++			return err;
++	}
++
++	if (file->f_dentry->d_inode->i_flock)
++		err = cpt_dump_flock(file, ctx);
++
++	cpt_close_object(ctx);
++
++	return err;
++}
++
++/* About this weird function... Crappy code dealing with SYSV shared memory 
++ * defines TMPFS inode and file with f_op doing only mmap. So...
++ * Maybe, this is wrong and leaks something. It is clear access to
++ * SYSV shmem via mmap is quite unusual and impossible from user space.
++ */
++static int dump_content_shm(struct file *file, struct cpt_context *ctx)
++{
++	struct cpt_obj_bits *v;
++	loff_t saved_pos;
++	unsigned long addr;
++
++	addr = do_mmap_pgoff(file, 0, file->f_dentry->d_inode->i_size,
++			     PROT_READ, MAP_SHARED, 0);
++	if (IS_ERR((void*)addr))
++		return PTR_ERR((void*)addr);
++
++	cpt_push_object(&saved_pos, ctx);
++	cpt_open_object(NULL, ctx);
++	v = cpt_get_buf(ctx);
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_BITS;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_DATA;
++	v->cpt_size = file->f_dentry->d_inode->i_size;
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++	ctx->write((void*)addr, file->f_dentry->d_inode->i_size, ctx);
++	ctx->align(ctx);
++	do_munmap(current->mm, addr, file->f_dentry->d_inode->i_size);
++
++	cpt_close_object(ctx);
++	cpt_pop_object(&saved_pos, ctx);
++	return 0;
++}
++
++static int data_is_zero(char *addr, int len)
++{
++	int i;
++	unsigned long zerolong = 0;
++
++	for (i=0; i<len/sizeof(unsigned long); i++) {
++		if (((unsigned long*)(addr))[i] != 0)
++			return 0;
++	}
++	i = len % sizeof(unsigned long);
++	if (!i)
++		return 1;
++	return memcmp(addr + len - i, &zerolong, i) == 0;
++}
++
++
++static int dump_content_regular(struct file *file, struct cpt_context *ctx)
++{
++	loff_t saved_pos;
++	loff_t pos = 0;
++	loff_t obj_opened = CPT_NULL;
++	struct cpt_page_block pgb;
++	ssize_t (*do_read)(struct file *, char __user *, size_t, loff_t *);
++
++	if (file->f_op == NULL)
++		return -EINVAL;
++
++	if ((do_read = file->f_op->read) == NULL) {
++		if (file->f_op->mmap == NULL)
++			return -EINVAL;
++		if (file->f_dentry->d_inode->i_sb->s_magic != FSMAGIC_TMPFS) {
++			eprintk_ctx("unreadable, but not SYSV SHM file\n");
++			return -EINVAL;
++		}
++		
++		do_read = file->f_dentry->d_inode->i_fop->read;
++		cpt_dump_content_sysvshm(file, ctx);
++		if (!do_read) {
++			wprintk_ctx("TMPFS is not configured?\n");
++			return dump_content_shm(file, ctx);
++		}
++	}
++
++	if (!(file->f_mode & FMODE_READ) ||
++	    (file->f_flags & O_DIRECT)) {
++		file = dentry_open(dget(file->f_dentry),
++				   mntget(file->f_vfsmnt), O_RDONLY);
++	} else {
++		atomic_inc(&file->f_count);
++	}
++
++	for (;;) {
++		mm_segment_t oldfs;
++		int err;
++
++		(void)cpt_get_buf(ctx);
++
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		err = do_read(file, ctx->tmpbuf, PAGE_SIZE, &pos);
++		set_fs(oldfs);
++		if (err < 0) {
++			eprintk_ctx("dump_content_regular: do_read: %d", err);
++			fput(file);
++			__cpt_release_buf(ctx);
++			return err;
++		}
++		if (err == 0) {
++			__cpt_release_buf(ctx);
++			break;
++		}
++		if (data_is_zero(ctx->tmpbuf, err)) {
++			if (obj_opened != CPT_NULL) {
++				ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
++				ctx->align(ctx);
++				cpt_close_object(ctx);
++				cpt_pop_object(&saved_pos, ctx);
++				obj_opened = CPT_NULL;
++			}
++		} else {
++			if (obj_opened == CPT_NULL) {
++				cpt_push_object(&saved_pos, ctx);
++				cpt_open_object(NULL, ctx);
++				obj_opened = ctx->file->f_pos;
++				pgb.cpt_next = CPT_NULL;
++				pgb.cpt_object = CPT_OBJ_PAGES;
++				pgb.cpt_hdrlen = sizeof(pgb);
++				pgb.cpt_content = CPT_CONTENT_DATA;
++				pgb.cpt_start = pos - err;
++				pgb.cpt_end = pgb.cpt_start;
++				ctx->write(&pgb, sizeof(pgb), ctx);
++			}
++			ctx->write(ctx->tmpbuf, err, ctx);
++			pgb.cpt_end += err;
++		}
++		__cpt_release_buf(ctx);
++	}
++
++	fput(file);
++
++	if (obj_opened != CPT_NULL) {
++		ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
++		ctx->align(ctx);
++		cpt_close_object(ctx);
++		cpt_pop_object(&saved_pos, ctx);
++		obj_opened = CPT_NULL;
++	}
++	return 0;
++}
++
++
++static int dump_content_chrdev(struct file *file, struct cpt_context *ctx)
++{
++	struct inode *ino = file->f_dentry->d_inode;
++	int maj;
++
++	maj = imajor(ino);
++	if (maj == MEM_MAJOR) {
++		/* Well, OK. */
++		return 0;
++	}
++	if (maj == PTY_MASTER_MAJOR ||
++	    (maj >= UNIX98_PTY_MASTER_MAJOR &&
++	     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
++	    maj == PTY_SLAVE_MAJOR ||
++	    maj == UNIX98_PTY_SLAVE_MAJOR ||
++	    maj == TTYAUX_MAJOR) {
++		return cpt_dump_content_tty(file, ctx);
++	}
++	eprintk_ctx("unsupported chrdev %d/%d\n", maj, iminor(ino));
++	return -EINVAL;
++}
++
++static int dump_content_blkdev(struct file *file, struct cpt_context *ctx)
++{
++	struct inode *ino = file->f_dentry->d_inode;
++
++	/* We are not going to transfer them. */
++	eprintk_ctx("unsupported blkdev %d/%d\n", imajor(ino), iminor(ino));
++	return -EINVAL;
++}
++
++static int dump_content_fifo(struct file *file, struct cpt_context *ctx)
++{
++	struct inode *ino = file->f_dentry->d_inode;
++	cpt_object_t *obj;
++	loff_t saved_pos;
++	int readers;
++	int writers;
++	int anon = 0;
++
++	mutex_lock(PIPE_MUTEX(*ino));
++	readers = PIPE_READERS(*ino);
++	writers = PIPE_WRITERS(*ino);
++	for_each_object(obj, CPT_OBJ_FILE) {
++		struct file *file1 = obj->o_obj;
++		if (file1->f_dentry->d_inode == ino) {
++			if (file1->f_mode & FMODE_READ)
++				readers--;
++			if (file1->f_mode & FMODE_WRITE)
++				writers--;
++		}
++	}	
++	mutex_unlock(PIPE_MUTEX(*ino));
++	if (readers || writers) {
++		struct dentry *dr = file->f_dentry->d_sb->s_root;
++		if (dr->d_name.len == 7 && memcmp(dr->d_name.name,"pipefs:",7) == 0)
++			anon = 1;
++
++		if (anon) {
++			eprintk_ctx("pipe has %d/%d external readers/writers\n", readers, writers);
++			return -EBUSY;
++		}
++		/* If fifo has external readers/writers, we are in troubles.
++		 * If the buffer is not empty, we must move its content.
++		 * But if the fifo is owned by a service, we cannot do
++		 * this. See?
++		 *
++		 * For now we assume, that if fifo is opened by another
++		 * process, we do not own it and, hence, migrate without
++		 * data.
++		 */
++		return 0;
++	}
++
++	/* OK, we must save fifo state. No semaphores required. */
++
++	if (ino->i_pipe->nrbufs) {
++		struct cpt_obj_bits *v = cpt_get_buf(ctx);
++		struct pipe_inode_info *info;
++		int count, buf, nrbufs;
++
++		mutex_lock(PIPE_MUTEX(*ino));
++		info =  ino->i_pipe;
++		count = 0;
++		buf = info->curbuf;
++		nrbufs = info->nrbufs;
++		while (--nrbufs >= 0) {
++			if (!info->bufs[buf].ops->can_merge) {
++				mutex_unlock(PIPE_MUTEX(*ino));
++				eprintk_ctx("unknown format of pipe buffer\n");
++				return -EINVAL;
++			}
++			count += info->bufs[buf].len;
++			buf = (buf+1) & (PIPE_BUFFERS-1);
++		}
++
++		if (!count) {
++			mutex_unlock(PIPE_MUTEX(*ino));
++			return 0;
++		}
++
++		cpt_push_object(&saved_pos, ctx);
++		cpt_open_object(NULL, ctx);
++		v->cpt_next = CPT_NULL;
++		v->cpt_object = CPT_OBJ_BITS;
++		v->cpt_hdrlen = sizeof(*v);
++		v->cpt_content = CPT_CONTENT_DATA;
++		v->cpt_size = count;
++		ctx->write(v, sizeof(*v), ctx);
++		cpt_release_buf(ctx);
++
++		count = 0;
++		buf = info->curbuf;
++		nrbufs = info->nrbufs;
++		while (--nrbufs >= 0) {
++			struct pipe_buffer *b = info->bufs + buf;
++			void * addr = b->ops->map(file, info, b);
++			ctx->write(addr + b->offset, b->len, ctx);
++			b->ops->unmap(info, b);
++			buf = (buf+1) & (PIPE_BUFFERS-1);
++		}
++
++		mutex_unlock(PIPE_MUTEX(*ino));
++
++		ctx->align(ctx);
++		cpt_close_object(ctx);
++		cpt_pop_object(&saved_pos, ctx);
++	}
++
++	return 0;
++}
++
++static int dump_content_socket(struct file *file, struct cpt_context *ctx)
++{
++	return 0;
++}
++
++static int dump_one_inode(struct file *file, struct dentry *d,
++			  struct vfsmount *mnt, struct cpt_context *ctx)
++{
++	int err = 0;
++	struct inode *ino = d->d_inode;
++	cpt_object_t *iobj;
++	int dump_it = 0;
++
++	iobj = lookup_cpt_object(CPT_OBJ_INODE, ino, ctx);
++	if (!iobj)
++		return -EINVAL;
++
++	if (iobj->o_pos >= 0)
++		return 0;
++
++	if (!IS_ROOT(d) && d_unhashed(d))
++		dump_it = 1;
++	if (!S_ISREG(ino->i_mode) && !S_ISDIR(ino->i_mode)) {
++		/* One more bug in epoll: invalid inode mode.
++		 * What a load of crap...
++		 */
++		if (ino->i_sb->s_magic == FSMAGIC_EPOLL &&
++		    (ino->i_mode & S_IFMT) == 0)
++			return 0;
++		dump_it = 1;
++	}
++
++	if (!dump_it)
++		return 0;
++
++	cpt_open_object(iobj, ctx);
++	cpt_dump_inode(d, mnt, ctx);
++
++	if (!IS_ROOT(d) && d_unhashed(d)) {
++		struct file *parent;
++		parent = iobj->o_parent;
++		if (!parent ||
++		    (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry))) {
++			/* Inode is not deleted, but it does not
++			 * have references from inside checkpointed
++			 * process group. We have options:
++			 * A. Fail, abort checkpointing
++			 * B. Proceed. File will be cloned.
++			 * A is correct, B is more complicated */
++			/* Just as a hint where to create deleted file */
++			if (ino->i_nlink != 0) {
++				eprintk_ctx("deleted reference to existing inode, checkpointing is impossible\n");
++				return -EBUSY;
++			}
++		} else {
++			/* Refer to _another_ file name. */
++			err = cpt_dump_filename(parent, ctx);
++			if (err)
++				return err;
++			if (S_ISREG(ino->i_mode) || S_ISDIR(ino->i_mode))
++				dump_it = 0;
++		}
++	}
++	if (dump_it) {
++		if (S_ISREG(ino->i_mode)) {
++			if ((err = dump_content_regular(file, ctx)) != 0) {
++				eprintk_ctx("dump_content_regular ");
++				cpt_printk_dentry(d, mnt);
++			}
++		} else if (S_ISDIR(ino->i_mode)) {
++			/* We cannot do anything. The directory should be
++			 * empty, so it is not a big deal.
++			 */
++		} else if (S_ISCHR(ino->i_mode)) {
++			err = dump_content_chrdev(file, ctx);
++		} else if (S_ISBLK(ino->i_mode)) {
++			err = dump_content_blkdev(file, ctx);
++		} else if (S_ISFIFO(ino->i_mode)) {
++			err = dump_content_fifo(file, ctx);
++		} else if (S_ISSOCK(ino->i_mode)) {
++			err = dump_content_socket(file, ctx);
++		} else {
++			eprintk_ctx("unknown inode mode %o\n", ino->i_mode & S_IFMT);
++			err = -EINVAL;
++		}
++	}
++	cpt_close_object(ctx);
++
++	return err;
++}
++
++int cpt_dump_files(struct cpt_context *ctx)
++{
++	int epoll_nr;
++	cpt_object_t *obj;
++
++	cpt_open_section(ctx, CPT_SECT_TTY);
++	for_each_object(obj, CPT_OBJ_TTY) {
++		int err;
++
++		if ((err = cpt_dump_tty(obj, ctx)) != 0)
++			return err;
++	}
++	cpt_close_section(ctx);
++
++	cpt_open_section(ctx, CPT_SECT_INODE);
++	for_each_object(obj, CPT_OBJ_FILE) {
++		struct file *file = obj->o_obj;
++		int err;
++
++		if ((err = dump_one_inode(file, file->f_dentry,
++					  file->f_vfsmnt, ctx)) != 0)
++			return err;
++	}
++	for_each_object(obj, CPT_OBJ_FS) {
++		struct fs_struct *fs = obj->o_obj;
++		int err;
++
++		if (fs->root &&
++		    (err = dump_one_inode(NULL, fs->root, fs->rootmnt, ctx)) != 0)
++			return err;
++		if (fs->pwd &&
++		    (err = dump_one_inode(NULL, fs->pwd, fs->pwdmnt, ctx)) != 0)
++			return err;
++		if (fs->altroot &&
++		    (err = dump_one_inode(NULL, fs->altroot, fs->altrootmnt, ctx)) != 0)
++			return err;
++	}
++	cpt_close_section(ctx);
++
++	epoll_nr = 0;
++	cpt_open_section(ctx, CPT_SECT_FILES);
++	for_each_object(obj, CPT_OBJ_FILE) {
++		struct file *file = obj->o_obj;
++		int err;
++
++		if ((err = dump_one_file(obj, file, ctx)) != 0)
++			return err;
++		if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL)
++			epoll_nr++;
++	}
++	cpt_close_section(ctx);
++
++	if (epoll_nr) {
++		cpt_open_section(ctx, CPT_SECT_EPOLL);
++		for_each_object(obj, CPT_OBJ_FILE) {
++			struct file *file = obj->o_obj;
++			if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
++				int err;
++				if ((err = cpt_dump_epolldev(obj, ctx)) != 0)
++					return err;
++			}
++		}
++		cpt_close_section(ctx);
++	}
++
++	cpt_open_section(ctx, CPT_SECT_SOCKET);
++	for_each_object(obj, CPT_OBJ_SOCKET) {
++		int err;
++
++		if ((err = cpt_dump_socket(obj, obj->o_obj, obj->o_index, -1, ctx)) != 0)
++			return err;
++	}
++	cpt_close_section(ctx);
++
++	return 0;
++}
++
++static int dump_filedesc(int fd, struct file *file,
++			 struct files_struct *f, struct cpt_context *ctx)
++{
++	struct cpt_fd_image *v = cpt_get_buf(ctx);
++	cpt_object_t *obj;
++
++	cpt_open_object(NULL, ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_FILEDESC;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_VOID;
++
++	v->cpt_fd = fd;
++	obj = lookup_cpt_object(CPT_OBJ_FILE, file, ctx);
++	if (!obj) BUG();
++	v->cpt_file = obj->o_pos;
++	v->cpt_flags = 0;
++	if (FD_ISSET(fd, f->fdt->close_on_exec))
++		v->cpt_flags = CPT_FD_FLAG_CLOSEEXEC;
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++	cpt_close_object(ctx);
++
++	return 0;
++}
++
++static int dump_one_file_struct(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	struct files_struct *f = obj->o_obj;
++	struct cpt_files_struct_image *v = cpt_get_buf(ctx);
++	int fd;
++	loff_t saved_obj;
++
++	cpt_open_object(obj, ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_FILES;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	v->cpt_index = obj->o_index;
++	v->cpt_max_fds = f->fdt->max_fds;
++	v->cpt_next_fd = f->fdt->next_fd;
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	cpt_push_object(&saved_obj, ctx);
++	for (fd = 0; fd < f->fdt->max_fds; fd++) {
++		struct file *file = fcheck_files(f, fd);
++		if (file)
++			dump_filedesc(fd, file, f, ctx);
++	}
++	cpt_pop_object(&saved_obj, ctx);
++
++	cpt_close_object(ctx);
++
++	return 0;
++}
++
++int cpt_dump_files_struct(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	cpt_open_section(ctx, CPT_SECT_FILES_STRUCT);
++
++	for_each_object(obj, CPT_OBJ_FILES) {
++		int err;
++
++		if ((err = dump_one_file_struct(obj, ctx)) != 0)
++			return err;
++	}
++
++	cpt_close_section(ctx);
++	return 0;
++}
++
++int cpt_collect_fs(cpt_context_t * ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++		if (tsk->fs) {
++			if (cpt_object_add(CPT_OBJ_FS, tsk->fs, ctx) == NULL)
++				return -ENOMEM;
++			if (tsk->fs->pwd &&
++			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->pwd->d_inode, ctx) == NULL)
++				return -ENOMEM;
++			if (tsk->fs->root &&
++			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->root->d_inode, ctx) == NULL)
++				return -ENOMEM;
++			if (tsk->fs->altroot &&
++			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->altroot->d_inode, ctx) == NULL)
++				return -ENOMEM;
++		}
++	}
++	return 0;
++}
++
++static int cpt_dump_dir(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
++{
++	struct file file;
++
++	memset(&file, 0, sizeof(file));
++
++	file.f_dentry = d;
++	file.f_vfsmnt = mnt;
++	file.f_mode = FMODE_READ|FMODE_PREAD|FMODE_LSEEK;
++	return dump_one_file(NULL, &file, ctx);
++}
++
++static int dump_one_fs(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	struct fs_struct *fs = obj->o_obj;
++	struct cpt_fs_struct_image *v = cpt_get_buf(ctx);
++	loff_t saved_obj;
++	int err;
++
++	cpt_open_object(obj, ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_FS;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	v->cpt_umask = fs->umask;
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	cpt_push_object(&saved_obj, ctx);
++	err = cpt_dump_dir(fs->root, fs->rootmnt, ctx);
++	if (!err)
++		err = cpt_dump_dir(fs->pwd, fs->pwdmnt, ctx);
++	if (!err && fs->altroot)
++		err = cpt_dump_dir(fs->altroot, fs->altrootmnt, ctx);
++
++	cpt_pop_object(&saved_obj, ctx);
++
++	cpt_close_object(ctx);
++
++	return err;
++}
++
++int cpt_dump_fs_struct(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	cpt_open_section(ctx, CPT_SECT_FS);
++
++	for_each_object(obj, CPT_OBJ_FS) {
++		int err;
++
++		if ((err = dump_one_fs(obj, ctx)) != 0)
++			return err;
++	}
++
++	cpt_close_section(ctx);
++	return 0;
++}
++
++static int check_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	int err = 0;
++	struct namespace *n = obj->o_obj;
++	struct list_head *p;
++	char *path_buf, *path;
++
++	path_buf = (char *) __get_free_page(GFP_KERNEL);
++	if (!path_buf)
++		return -ENOMEM;
++
++	down_read(&namespace_sem);
++	list_for_each(p, &n->list) {
++		struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list);
++
++		path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
++		if (IS_ERR(path))
++			continue;
++
++		if (
++		    strcmp(mnt->mnt_sb->s_type->name, "rootfs") != 0 &&
++		    strcmp(mnt->mnt_sb->s_type->name, "ext3") != 0 &&
++		    strcmp(mnt->mnt_sb->s_type->name, "simfs") != 0 &&
++		    strcmp(mnt->mnt_sb->s_type->name, "tmpfs") != 0 &&
++		    strcmp(mnt->mnt_sb->s_type->name, "devpts") != 0 &&
++		    strcmp(mnt->mnt_sb->s_type->name, "proc") != 0 &&
++		    strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0) {
++			eprintk_ctx("unsupported fs type %s\n", mnt->mnt_sb->s_type->name);
++			err = -EINVAL;
++			break;
++		}
++	}
++	up_read(&namespace_sem);
++
++	free_page((unsigned long) path_buf);
++
++	return err;
++}
++
++int cpt_collect_namespace(cpt_context_t * ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++		if (tsk->namespace && cpt_object_add(CPT_OBJ_NAMESPACE, tsk->namespace, ctx) == NULL)
++			return -ENOMEM;
++	}
++
++	for_each_object(obj, CPT_OBJ_NAMESPACE) {
++		int err;
++		if ((err = check_one_namespace(obj, ctx)) != 0)
++			return err;
++	}
++
++	return 0;
++}
++
++struct args_t
++{
++	int* pfd;
++	char* path;
++};
++
++static int dumptmpfs(void *arg)
++{
++	int i;
++	struct args_t *args = arg;
++	int *pfd = args->pfd;
++	char *path = args->path;
++	char *argv[] = { "tar", "-c", "-S", "--numeric-owner", path, NULL };
++
++	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
++	if (i < 0) {
++		eprintk("cannot enter ve to dump tmpfs\n");
++		module_put(THIS_MODULE);
++		return 1;
++	}
++
++	if (pfd[1] != 1)
++		sc_dup2(pfd[1], 1);
++
++	for (i=0; i<current->files->fdt->max_fds; i++) {
++		if (i != 1)
++			sc_close(i);
++	}
++
++	module_put(THIS_MODULE);
++
++	set_fs(KERNEL_DS);
++	i = sc_execve("/bin/tar", argv, NULL);
++	eprintk("failed to exec /bin/tar: %d\n", i);
++	return -1;
++}
++
++static int cpt_dump_tmpfs(char *path, struct cpt_context *ctx)
++{
++	int err;
++	int pid;
++	int pfd[2];
++	struct file *f;
++	struct cpt_object_hdr v;
++	char buf[16];
++	int n;
++	loff_t saved_obj;
++	struct args_t args;
++	
++	err = sc_pipe(pfd);
++	if (err < 0)
++		return err;
++	args.pfd = pfd;
++	args.path = path;
++	err = pid = local_kernel_thread(dumptmpfs, (void*)&args, SIGCHLD, 0);
++	if (err < 0)
++		goto out;
++	f = fget(pfd[0]);
++	sc_close(pfd[1]);
++	sc_close(pfd[0]);
++
++	cpt_push_object(&saved_obj, ctx);
++	cpt_open_object(NULL, ctx);
++	v.cpt_next = CPT_NULL;
++	v.cpt_object = CPT_OBJ_NAME;
++	v.cpt_hdrlen = sizeof(v);
++	v.cpt_content = CPT_CONTENT_NAME;
++
++	ctx->write(&v, sizeof(v), ctx);
++
++	do {
++		mm_segment_t oldfs;
++
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
++		set_fs(oldfs);
++		if (n > 0)
++			ctx->write(buf, n, ctx);
++	} while (n > 0);
++
++	fput(f);
++
++	if ((err = sc_waitx(pid, 0)) < 0)
++		eprintk_ctx("wait4: %d\n", err);
++
++	buf[0] = 0;
++	ctx->write(buf, 1, ctx);
++	ctx->align(ctx);
++	cpt_close_object(ctx);
++	cpt_pop_object(&saved_obj, ctx);
++	return n;
++
++out:
++	if (pfd[1] >= 0)
++		sc_close(pfd[1]);
++	if (pfd[0] >= 0)
++		sc_close(pfd[0]);
++	return err;
++}
++
++static int dump_vfsmount(struct vfsmount *mnt, struct cpt_context *ctx)
++{
++	int err = 0;
++	struct cpt_vfsmount_image v;
++	loff_t saved_obj;
++	char *path_buf, *path;
++
++	path_buf = (char *) __get_free_page(GFP_KERNEL);
++	if (!path_buf)
++		return -ENOMEM;
++
++	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
++	if (IS_ERR(path)) {
++		free_page((unsigned long) path_buf);
++		return PTR_ERR(path) == -EINVAL ? 0 : PTR_ERR(path);
++	}
++
++	cpt_open_object(NULL, ctx);
++
++	v.cpt_next = -1;
++	v.cpt_object = CPT_OBJ_VFSMOUNT;
++	v.cpt_hdrlen = sizeof(v);
++	v.cpt_content = CPT_CONTENT_ARRAY;
++
++	v.cpt_mntflags = mnt->mnt_flags;
++	v.cpt_flags = mnt->mnt_sb->s_flags;
++
++	ctx->write(&v, sizeof(v), ctx);
++
++	cpt_push_object(&saved_obj, ctx);
++	cpt_dump_string(mnt->mnt_devname ? : "none", ctx);
++	cpt_dump_string(path, ctx);
++	cpt_dump_string(mnt->mnt_sb->s_type->name, ctx);
++#if 0
++	/* This is an evident crap. Ask Savochkin, he might know this.
++	 * Goal is to get some path to mount --bind to.
++	 */
++	cpt_dump_dentry(mnt->mnt_root, mnt->mnt_parent, ctx);
++#else
++	/* For now we just bail, when some FS is mounted not at root. */
++	if (mnt->mnt_root != mnt->mnt_sb->s_root) {
++		eprintk_ctx("mount --bind prevents checkpointing\n");
++		err = -EINVAL;
++	}
++#endif
++
++	if (strcmp(mnt->mnt_sb->s_type->name, "tmpfs") == 0) {
++		cpt_dump_tmpfs(path, ctx);
++	}
++
++	cpt_pop_object(&saved_obj, ctx);
++
++	cpt_close_object(ctx);
++
++	free_page((unsigned long) path_buf);
++
++	return err;
++}
++
++static int dump_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	struct namespace *n = obj->o_obj;
++	struct cpt_object_hdr v;
++	struct list_head *p;
++	loff_t saved_obj;
++	int err = 0;
++
++	cpt_open_object(obj, ctx);
++
++	v.cpt_next = -1;
++	v.cpt_object = CPT_OBJ_NAMESPACE;
++	v.cpt_hdrlen = sizeof(v);
++	v.cpt_content = CPT_CONTENT_ARRAY;
++
++	ctx->write(&v, sizeof(v), ctx);
++
++	cpt_push_object(&saved_obj, ctx);
++
++	down_read(&namespace_sem);
++	list_for_each(p, &n->list) {
++		err = dump_vfsmount(list_entry(p, struct vfsmount, mnt_list), ctx);
++		if (err)
++			break;
++	}
++	up_read(&namespace_sem);
++
++	cpt_pop_object(&saved_obj, ctx);
++
++	cpt_close_object(ctx);
++
++	return err;
++}
++
++int cpt_dump_namespace(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	cpt_open_section(ctx, CPT_SECT_NAMESPACE);
++
++	for_each_object(obj, CPT_OBJ_NAMESPACE) {
++		int err;
++
++		if ((err = dump_one_namespace(obj, ctx)) != 0)
++			return err;
++	}
++
++	cpt_close_section(ctx);
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_files.h linux-2.6.16-026test015/kernel/cpt/cpt_files.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_files.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_files.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,46 @@
++int cpt_collect_files(cpt_context_t *);
++int cpt_collect_fs(cpt_context_t *);
++int cpt_collect_namespace(cpt_context_t *);
++int cpt_collect_sysvsem_undo(cpt_context_t *);
++int cpt_collect_tty(struct file *, cpt_context_t *);
++int cpt_dump_files(struct cpt_context *ctx);
++int cpt_dump_files_struct(struct cpt_context *ctx);
++int cpt_dump_fs_struct(struct cpt_context *ctx);
++int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx);
++int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx);
++int cpt_dump_tty(cpt_object_t *, struct cpt_context *ctx);
++struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx);
++struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii, unsigned flags, struct cpt_context *ctx);
++__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx);
++
++int rst_posix_locks(struct cpt_context *ctx);
++
++struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx);
++int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
++__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
++int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
++int rst_restore_fs(struct cpt_context *ctx);
++
++int cpt_collect_sysv(cpt_context_t *);
++int cpt_dump_sysvsem(struct cpt_context *ctx);
++int rst_sysv_ipc(struct cpt_context *ctx);
++int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
++__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
++
++int cpt_dump_namespace(struct cpt_context *ctx);
++int rst_root_namespace(struct cpt_context *ctx);
++
++int rst_stray_files(struct cpt_context *ctx);
++int rst_tty_jobcontrol(struct cpt_context *ctx);
++
++void rst_flush_filejobs(struct cpt_context *);
++int rst_do_filejobs(struct cpt_context *);
++
++int rst_eventpoll(struct cpt_context *);
++struct file *cpt_open_epolldev(struct cpt_file_image *fi,
++			       unsigned flags,
++			       struct cpt_context *ctx);
++int cpt_dump_epolldev(cpt_object_t *obj, struct cpt_context *);
++
++int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
++			 cpt_context_t *ctx);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_fsmagic.h linux-2.6.16-026test015/kernel/cpt/cpt_fsmagic.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_fsmagic.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_fsmagic.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,15 @@
++/* Collected from kernel sources. */
++
++#define FSMAGIC_TMPFS	0x01021994
++#define FSMAGIC_PIPEFS	0x50495045
++#define FSMAGIC_SOCKFS	0x534F434B
++#define FSMAGIC_PFMFS	0xa0b4d889
++#define FSMAGIC_BDEV	0x62646576
++#define FSMAGIC_EPOLL	0x03111965
++#define FSMAGIC_FUTEX	0x0BAD1DEA
++#define FSMAGIC_MQUEUE	0x19800202
++#define FSMAGIC_PROC	0x9fa0
++#define FSMAGIC_DEVPTS	0x1CD1
++#define FSMAGIC_AUTOFS	0x0187
++#define FSMAGIC_EXT2	0xEF53
++#define FSMAGIC_REISER	0x52654973
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_kernel.c linux-2.6.16-026test015/kernel/cpt/cpt_kernel.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_kernel.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_kernel.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,134 @@
++/*
++ *
++ *  kernel/cpt/cpt_kernel.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#define __KERNEL_SYSCALLS__ 1
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/mm.h>
++#include <linux/kernel.h>
++#include <asm/cpufeature.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_kernel.h"
++#include "cpt_syscalls.h"
++
++int debug_level = 1;
++
++#ifndef CONFIG_X86_64
++
++extern void local_kernel_thread_helper(void);
++__asm__(".section .text\n"
++	".align 4\n"
++	"local_kernel_thread_helper:\n\t"
++	"movl %edx,%eax\n\t"
++	"pushl %edx\n\t"
++	"call *%ebx\n\t"
++	"pushl %eax\n\t"
++	"pushl $0\n\t"
++	"call complete_and_exit\n"
++	".previous");
++
++/*
++ * Create a kernel thread
++ */
++int asm_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
++{
++	struct pt_regs regs;
++
++	memset(&regs, 0, sizeof(regs));
++
++	regs.ebx = (unsigned long) fn;
++	regs.edx = (unsigned long) arg;
++
++	regs.xds = __USER_DS;
++	regs.xes = __USER_DS;
++	regs.orig_eax = -1;
++	regs.eip = (unsigned long) local_kernel_thread_helper;
++	regs.xcs = __KERNEL_CS;
++	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
++
++	/* Ok, create the new process.. */
++	return do_fork_pid(flags | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL, pid);
++}
++#endif
++
++int local_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
++{
++	pid_t ret;
++
++	if (!try_module_get(THIS_MODULE))
++		return -EBUSY;
++	ret = asm_kernel_thread(fn, arg, flags, pid);
++	if (ret < 0)
++		module_put(THIS_MODULE);
++	return ret;
++}
++
++#ifdef __i386__
++int __execve(const char *file, char **argv, char **envp)
++{
++	long res;
++	__asm__ volatile ("int $0x80"
++	: "=a" (res)
++	: "0" (__NR_execve),"b" ((long)(file)),"c" ((long)(argv)),
++		  "d" ((long)(envp)) : "memory");
++	return (int)res;
++}
++#endif
++
++int sc_execve(char *cmd, char **argv, char **env)
++{
++	int ret;
++#ifndef __i386__
++	ret = execve(cmd, argv, env);
++#else
++	ret = __execve(cmd, argv, env);
++#endif
++	return ret;
++}
++
++unsigned int test_cpu_caps()
++{
++	unsigned int flags = 0;
++	if (boot_cpu_has(X86_FEATURE_CMOV))
++		flags |= 1 << CPT_CPU_X86_CMOV;
++	if (cpu_has_fxsr)
++		flags |= 1 << CPT_CPU_X86_FXSR;
++	if (cpu_has_xmm)
++		flags |= 1 << CPT_CPU_X86_SSE;
++#ifndef CONFIG_X86_64
++	if (cpu_has_xmm2)
++#endif
++		flags |= 1 << CPT_CPU_X86_SSE2;
++	if (cpu_has_mmx)
++		flags |= 1 << CPT_CPU_X86_MMX;
++	if (boot_cpu_has(X86_FEATURE_3DNOW))
++		flags |= 1 << CPT_CPU_X86_3DNOW;
++	if (boot_cpu_has(X86_FEATURE_3DNOWEXT))
++		flags |= 1 << CPT_CPU_X86_3DNOW2;
++	if (boot_cpu_has(X86_FEATURE_SEP))
++		flags |= 1 << CPT_CPU_X86_SEP;
++#ifdef CONFIG_X86_64
++	flags |= 1 << CPT_CPU_X86_EMT64;
++#endif
++	return flags;
++}
++
++unsigned int test_kernel_config()
++{
++	unsigned int flags = 0;
++#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
++	flags |= 1 << CPT_KERNEL_CONFIG_PAE;
++#endif
++	return flags;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_kernel.h linux-2.6.16-026test015/kernel/cpt/cpt_kernel.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_kernel.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_kernel.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,74 @@
++/* Interface to kernel vars which we had to _add_. */
++
++asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
++
++#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
++#define TASK_TRACED TASK_STOPPED
++#define unix_peer(sk) ((sk)->sk_pair)
++#define page_mapcount(pg) ((pg)->mapcount)
++#else
++#define unix_peer(sk) (unix_sk(sk)->peer)
++#endif
++
++#ifdef CONFIG_X86_64
++#define cpu_has_fxsr 1
++#endif
++
++static inline void do_gettimespec(struct timespec *ts)
++{
++	struct timeval tv;
++	do_gettimeofday(&tv);
++	ts->tv_sec = tv.tv_sec;
++	ts->tv_nsec = tv.tv_usec*1000;
++}
++
++int local_kernel_thread(int (*fn)(void *),
++		void * arg,
++		unsigned long flags,
++		pid_t pid);
++int asm_kernel_thread(int (*fn)(void *),
++		void * arg,
++		unsigned long flags,
++		pid_t pid);
++
++unsigned int test_cpu_caps(void);
++unsigned int test_kernel_config(void);
++
++#define test_one_flag(src, dst, flag, message, ret) \
++if (src & (1 << flag)) \
++	if (!(dst & (1 << flag))) { \
++		wprintk("Destination cpu does not have " message "\n"); \
++		ret = 1; \
++	}
++
++static inline void
++_set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
++{
++	while (nsec >= NSEC_PER_SEC) {
++		nsec -= NSEC_PER_SEC;
++		++sec;
++	}
++	while (nsec < 0) {
++		nsec += NSEC_PER_SEC;
++		--sec;
++	}
++	ts->tv_sec = sec;
++	ts->tv_nsec = nsec;
++}
++
++static inline struct timespec
++_ns_to_timespec(const nsec_t nsec)
++{
++	struct timespec ts;
++
++	if (!nsec)
++		return (struct timespec) {0, 0};
++
++	ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec);
++	if (unlikely(nsec < 0))
++		_set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec);
++
++	return ts;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_mm.c linux-2.6.16-026test015/kernel/cpt/cpt_mm.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_mm.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_mm.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,823 @@
++/*
++ *
++ *  kernel/cpt/cpt_mm.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/hugetlb.h>
++#include <linux/errno.h>
++#include <linux/ve.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <asm/ldt.h>
++#include <asm/mmu.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_kernel.h"
++#include "cpt_fsmagic.h"
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++#include "cpt_pagein.h"
++#endif
++#include "cpt_ubc.h"
++
++static int collect_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
++			       cpt_context_t *ctx)
++{
++	if (!list_empty(&aio_ctx->run_list)) {
++		/* This is impossible at least with kernel 2.6.8.1 or 2.6.16 */
++		eprintk_ctx("run list is not empty, cannot suspend AIO\n");
++		return -EBUSY;
++	}
++
++	/* Wait for pending IOCBs. Linux AIO is mostly _fake_.
++	 * It is actually synchronous, except for direct IO and
++	 * some funny raw USB things, which cannot happen inside VE.
++	 * However, we do this for future.
++	 *
++	 * Later note: in 2.6.16 we may allow O_DIRECT, so that
++	 * it is not meaningless code.
++	 */
++	wait_for_all_aios(aio_ctx);
++
++	if (!list_empty(&aio_ctx->run_list) ||
++	    !list_empty(&aio_ctx->active_reqs) ||
++	    aio_ctx->reqs_active) {
++		eprintk_ctx("were not able to suspend AIO\n");
++		return -EBUSY;
++	}
++
++	return 0;
++}
++
++static int collect_one_mm(struct mm_struct *mm, cpt_context_t * ctx)
++{
++	struct vm_area_struct *vma;
++
++	for (vma = mm->mmap; vma; vma = vma->vm_next) {
++		if (vma->vm_file) {
++			if (cpt_object_add(CPT_OBJ_FILE, vma->vm_file, ctx) == NULL)
++				return -ENOMEM;
++		}
++	}
++	if (cpt_add_ubc(mm->mm_ub, ctx) == NULL)
++		return -ENOMEM;
++
++	if (mm->ioctx_list) {
++		struct kioctx *aio_ctx;
++		int err;
++
++		for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
++			if ((err = collect_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
++				return err;
++	}
++
++	return 0;
++}
++
++int cpt_collect_mm(cpt_context_t * ctx)
++{
++	cpt_object_t *obj;
++	int err;
++	int index;
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++		if (tsk->mm && cpt_object_add(CPT_OBJ_MM, tsk->mm, ctx) == NULL)
++			return -ENOMEM;
++	}
++
++	index = 1;
++	for_each_object(obj, CPT_OBJ_MM) {
++		struct mm_struct *mm = obj->o_obj;
++		if (obj->o_count != atomic_read(&mm->mm_users)) {
++			eprintk_ctx("mm_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&mm->mm_users));
++			return -EBUSY;
++		}
++		cpt_obj_setindex(obj, index++, ctx);
++
++		if ((err = collect_one_mm(mm, ctx)) != 0)
++			return err;
++	}
++
++	return 0;
++}
++
++static int zcnt, scnt, scnt0, ucnt;
++
++/* Function where_is_anon_page() returns address of a anonymous page in mm
++ * of already dumped process. This happens f.e. after fork(). We do not use
++ * this right now, just keep statistics, it is diffucult to restore such state,
++ * but the most direct use is to save space in dumped image. */
++
++
++static inline unsigned long
++vma_address0(struct page *page, struct vm_area_struct *vma)
++{
++	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
++	unsigned long address;
++
++	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
++		address |= 1;
++	return address;
++}
++
++static int really_this_one(struct vm_area_struct *vma, unsigned long address,
++			   struct page *page)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++	spinlock_t *ptl;
++	int result;
++
++	pgd = pgd_offset(mm, address);
++	if (unlikely(!pgd_present(*pgd)))
++		return 0;
++
++	pud = pud_offset(pgd, address);
++	if (!pud_present(*pud))
++		return 0;
++
++	pmd = pmd_offset(pud, address);
++	if (unlikely(!pmd_present(*pmd)))
++		return 0;
++
++	result = 0;
++	pte = pte_offset_map(pmd, address);
++	if (!pte_present(*pte)) {
++		pte_unmap(pte);
++		return 0;
++	}
++
++	ptl = pte_lockptr(mm, pmd);
++	if (!spin_trylock(ptl)) {
++		pte_unmap(pte);
++		return 0;
++	}
++	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte))
++		result = 1;
++	pte_unmap_unlock(pte, ptl);
++	return result;
++}
++
++static loff_t where_is_anon_page(cpt_object_t *mmobj, unsigned long mapaddr,
++				 struct page *page, cpt_context_t * ctx)
++{
++	loff_t mmptr = CPT_NULL;
++	struct anon_vma *anon_vma;
++	struct vm_area_struct *vma;
++	int idx = mmobj->o_index;
++
++	if (!PageAnon(page))
++		return CPT_NULL;
++
++	anon_vma = page_lock_anon_vma(page);
++	if (!anon_vma)
++		return CPT_NULL;
++
++	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
++		unsigned long addr = vma_address0(page, vma);
++		cpt_object_t *obj;
++
++		/* We do not try to support mremapped regions (addr != mapaddr),
++		 * only mmaps directly inherited via fork().
++		 * With this limitation we may check self-consistency of
++		 * vmas (vm_start, vm_pgoff, anon_vma) before
++		 * doing __copy_page_range() in rst_mm.
++		 */
++		if (mmobj->o_obj != vma->vm_mm && addr == mapaddr) {
++			obj = lookup_cpt_object(CPT_OBJ_MM, vma->vm_mm, ctx);
++			if (obj && obj->o_pos != CPT_NULL && obj->o_index < idx) {
++				if (really_this_one(vma, addr, page)) {
++					mmptr = obj->o_pos;
++					idx = obj->o_index;
++				}
++			}
++		}
++	}
++	spin_unlock(&anon_vma->lock);
++
++	return mmptr;
++}
++
++struct page_area
++{
++	int type;
++	unsigned long start;
++	unsigned long end;
++	pgoff_t pgoff;
++	loff_t mm;
++};
++
++struct page_desc
++{
++	int	type;
++	pgoff_t	index;
++	loff_t	mm;
++	int	shared;
++};
++
++enum {
++	PD_ABSENT,
++	PD_COPY,
++	PD_ZERO,
++	PD_CLONE,
++	PD_FUNKEY,
++	PD_LAZY
++};
++
++/* 0: page can be obtained from backstore, or still not mapped anonymous  page,
++      or something else, which does not requre copy.
++   1: page requires copy
++   2: page requres copy but its content is zero. Quite useless.
++   3: wp page is shared after fork(). It is to be COWed when modified.
++   4: page is something unsupported... We copy it right now.
++ */
++
++
++
++static void page_get_desc(cpt_object_t *mmobj,
++			  struct vm_area_struct *vma, unsigned long addr,
++			  struct page_desc *pdesc, cpt_context_t * ctx)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *ptep, pte;
++	spinlock_t *ptl;
++	struct page *pg;
++	pgoff_t linear_index = (addr - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff;
++
++	pdesc->index = linear_index;
++	pdesc->shared = 0;
++
++	if (vma->vm_flags & VM_IO) {
++		pdesc->type = PD_ABSENT;
++		return;
++	}
++
++	pgd = pgd_offset(mm, addr);
++	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
++		goto out_absent;
++	pud = pud_offset(pgd, addr);
++	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
++		goto out_absent;
++	pmd = pmd_offset(pud, addr);
++	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
++		goto out_absent;
++	if (pmd_huge(*pmd)) {
++		eprintk_ctx("page_huge\n");
++		goto out_unsupported;
++	}
++
++	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
++	if (!ptep)
++		goto out_absent;
++
++	pte = *ptep;
++	if (pte_none(pte))
++		goto out_absent_unmap;
++
++	if (!pte_present(pte)) {
++		if (pte_file(pte)) {
++			pdesc->index = pte_to_pgoff(pte);
++			goto out_absent_unmap;
++		}
++		if (vma->vm_flags & VM_SHARED) {
++			/* It is impossible: shared mappings cannot be in swap */
++			eprintk_ctx("shared mapping is not present: %08lx@%Ld\n", addr, mmobj->o_pos);
++			goto out_unsupported_unmap;
++		}
++		/* Otherwise it is in swap. */
++		goto out_lazy_unmap;
++	} else if ((pg = vm_normal_page(vma, addr, pte)) != NULL) {
++
++		if (pg->mapping && !PageAnon(pg)) {
++			if (vma->vm_file == NULL) {
++				eprintk_ctx("pg->mapping!=NULL for fileless vma: %08lx\n", addr);
++				goto out_unsupported_unmap;
++			}
++			if (vma->vm_file->f_mapping != pg->mapping) {
++				eprintk_ctx("pg->mapping!=f_mapping: %08lx %p %p %Ld\n", addr, vma->vm_file->f_mapping, pg->mapping, mmobj->o_pos);
++				goto out_unsupported_unmap;
++			}
++			pdesc->index = (pg->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
++			/* Page is in backstore. For us it is like
++			 * it is not present.
++			 */
++			goto out_absent_unmap;
++		}
++
++		if (PageReserved(pg)) {
++			/* Special case: ZERO_PAGE is used, when an
++			 * anonymous page is accessed but not written. */
++			if (pg == ZERO_PAGE(addr)) {
++				if (pte_write(pte)) {
++					eprintk_ctx("not funny already, writable ZERO_PAGE\n");
++					goto out_unsupported_unmap;
++				}
++				zcnt++;
++				goto out_absent_unmap;
++			}
++			eprintk_ctx("reserved page %lu at %08lx@%Ld\n", pg->index, addr, mmobj->o_pos);
++			goto out_unsupported_unmap;
++		}
++
++		if (pg == ZERO_PAGE(addr)) {
++			wprintk_ctx("that's how it works now\n");
++		}
++
++		if (!pg->mapping) {
++			eprintk_ctx("page without mapping at %08lx@%Ld\n", addr, mmobj->o_pos);
++			goto out_unsupported_unmap;
++		}
++
++		if (pg->mapping && page_mapcount(pg) > 1) {
++			pdesc->shared = 1;
++			pdesc->mm = where_is_anon_page(mmobj, addr, pg, ctx);
++			if (pdesc->mm != CPT_NULL) {
++				scnt0++;
++				goto out_clone_unmap;
++			} else {
++				scnt++;
++			}
++		}
++
++		if (!pte_young(pte))
++			goto out_lazy_unmap;
++	}
++	pte_unmap_unlock(ptep, ptl);
++	pdesc->type = PD_COPY;
++	return;
++
++out_lazy_unmap:
++	pte_unmap_unlock(ptep, ptl);
++	pdesc->type = PD_LAZY;
++	return;
++
++out_absent_unmap:
++	pte_unmap_unlock(ptep, ptl);
++out_absent:
++	pdesc->type = PD_ABSENT;
++	return;
++
++out_clone_unmap:
++	pte_unmap_unlock(ptep, ptl);
++	pdesc->type = PD_CLONE;
++	return;
++
++out_unsupported_unmap:
++	pte_unmap_unlock(ptep, ptl);
++out_unsupported:
++	ucnt++;
++	pdesc->type = PD_FUNKEY;
++	return;
++}
++
++/* ATTN: We give "current" to get_user_pages(). This is wrong, but get_user_pages()
++ * does not really need this thing. It just stores some page fault stats there.
++ *
++ * BUG: some archs (f.e. sparc64, but not Intel*) require flush cache pages
++ * before accessing vma.
++ */
++void dump_pages(struct vm_area_struct *vma, unsigned long start,
++		unsigned long end, struct cpt_context *ctx)
++{
++#define MAX_PAGE_BATCH 16
++	struct page *pg[MAX_PAGE_BATCH];
++	int npages = (end - start)/PAGE_SIZE;
++	int count = 0;
++
++	while (count < npages) {
++		int copy = npages - count;
++		int n;
++
++		if (copy > MAX_PAGE_BATCH)
++			copy = MAX_PAGE_BATCH;
++		n = get_user_pages(current, vma->vm_mm, start, copy,
++				   0, 1, pg, NULL);
++		if (n == copy) {
++			int i;
++			for (i=0; i<n; i++) {
++				char *maddr = kmap(pg[i]);
++				ctx->write(maddr, PAGE_SIZE, ctx);
++				kunmap(pg[i]);
++			}
++		} else {
++			eprintk_ctx("get_user_pages fault");
++			for ( ; n > 0; n--)
++				page_cache_release(pg[n-1]);
++			return;
++		}
++		start += n*PAGE_SIZE;
++		count += n;
++		for ( ; n > 0; n--)
++			page_cache_release(pg[n-1]);
++	}
++	return;
++}
++
++int dump_page_block(struct vm_area_struct *vma, struct cpt_page_block *pgb,
++		    int copy,
++		    struct cpt_context *ctx)
++{
++	loff_t saved_object;
++
++	cpt_push_object(&saved_object, ctx);
++
++	pgb->cpt_object = (copy != PD_LAZY) ? CPT_OBJ_PAGES : CPT_OBJ_LAZYPAGES;
++	pgb->cpt_hdrlen = sizeof(*pgb);
++	pgb->cpt_content = (copy == PD_COPY || copy == PD_LAZY) ? CPT_CONTENT_DATA : CPT_CONTENT_VOID;
++
++	ctx->write(pgb, sizeof(*pgb), ctx);
++	if (copy == PD_COPY || copy == PD_LAZY)
++		dump_pages(vma, pgb->cpt_start, pgb->cpt_end, ctx);
++	cpt_close_object(ctx);
++	cpt_pop_object(&saved_object, ctx);
++	return 0;
++}
++
++int dump_remappage_block(struct vm_area_struct *vma, struct page_area *pa,
++			 struct cpt_context *ctx)
++{
++	struct cpt_remappage_block pgb;
++	loff_t saved_object;
++
++	cpt_push_object(&saved_object, ctx);
++
++	pgb.cpt_object = CPT_OBJ_REMAPPAGES;
++	pgb.cpt_hdrlen = sizeof(pgb);
++	pgb.cpt_content = CPT_CONTENT_VOID;
++	pgb.cpt_start = pa->start;
++	pgb.cpt_end = pa->end;
++	pgb.cpt_pgoff = pa->pgoff - (pa->end-pa->start)/PAGE_SIZE + 1;
++
++	ctx->write(&pgb, sizeof(pgb), ctx);
++	cpt_close_object(ctx);
++	cpt_pop_object(&saved_object, ctx);
++	return 0;
++}
++
++int dump_copypage_block(struct vm_area_struct *vma, struct page_area *pa,
++			struct cpt_context *ctx)
++{
++	struct cpt_copypage_block pgb;
++	loff_t saved_object;
++
++	cpt_push_object(&saved_object, ctx);
++
++	pgb.cpt_object = CPT_OBJ_COPYPAGES;
++	pgb.cpt_hdrlen = sizeof(pgb);
++	pgb.cpt_content = CPT_CONTENT_VOID;
++	pgb.cpt_start = pa->start;
++	pgb.cpt_end = pa->end;
++	pgb.cpt_source = pa->mm;
++
++	ctx->write(&pgb, sizeof(pgb), ctx);
++	cpt_close_object(ctx);
++	cpt_pop_object(&saved_object, ctx);
++	return 0;
++}
++
++int dump_lazypage_block(struct vm_area_struct *vma, struct page_area *pa,
++			cpt_context_t *ctx)
++{
++	struct cpt_lazypage_block pgb;
++	loff_t saved_object;
++
++	cpt_push_object(&saved_object, ctx);
++
++	pgb.cpt_object = CPT_OBJ_LAZYPAGES;
++	pgb.cpt_hdrlen = sizeof(pgb);
++	pgb.cpt_content = CPT_CONTENT_VOID;
++	pgb.cpt_start = pa->start;
++	pgb.cpt_end = pa->end;
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	pgb.cpt_index = cpt_alloc_pgin_index(vma, pa->start,
++					     (pa->end-pa->start)/PAGE_SIZE, ctx);
++#endif
++	ctx->write(&pgb, sizeof(pgb), ctx);
++	cpt_close_object(ctx);
++	cpt_pop_object(&saved_object, ctx);
++	return 0;
++}
++
++static int can_expand(struct page_area *pa, struct page_desc *pd)
++{
++	if (pa->start == pa->end)
++		return 1;
++	if (pa->type != pd->type)
++		return 0;
++	if (pa->type == PD_ABSENT)
++		return pd->index == pa->pgoff + 1;
++	if (pa->type == PD_CLONE)
++		return pd->mm == pa->mm;
++	return 1;
++}
++
++static int dump_one_vma(cpt_object_t *mmobj,
++			struct vm_area_struct *vma, struct cpt_context *ctx)
++{
++	struct cpt_vma_image *v = cpt_get_buf(ctx);
++	unsigned long addr;
++	loff_t saved_object;
++	struct cpt_page_block pgb;
++	struct page_area pa;
++	int cloned_pages = 0;
++
++	cpt_push_object(&saved_object, ctx);
++
++	v->cpt_object = CPT_OBJ_VMA;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	v->cpt_start = vma->vm_start;
++	v->cpt_end = vma->vm_end;
++	v->cpt_flags = vma->vm_flags;
++	if (vma->vm_flags&VM_HUGETLB) {
++		eprintk_ctx("huge TLB VMAs are still not supported\n");
++		cpt_release_buf(ctx);
++		return -EINVAL;
++	}
++	v->cpt_pgprot = vma->vm_page_prot.pgprot;
++	v->cpt_pgoff = vma->vm_pgoff;
++	v->cpt_file = CPT_NULL;
++	v->cpt_type = CPT_VMA_TYPE_0;
++	v->cpt_anonvma = 0;
++
++	/* We have to remember what VMAs are bound to one anon_vma.
++	 * So, we store an identifier of group of VMAs. It is handy
++	 * to use absolute address of anon_vma as this identifier. */
++	v->cpt_anonvmaid = (unsigned long)vma->anon_vma;
++
++	if (vma->vm_file) {
++		struct file *filp;
++		cpt_object_t *obj = lookup_cpt_object(CPT_OBJ_FILE, vma->vm_file, ctx);
++		if (obj == NULL) BUG();
++		filp = obj->o_obj;
++		if (filp->f_op &&
++		    filp->f_op->read == NULL &&
++		    filp->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_TMPFS)
++			v->cpt_type = CPT_VMA_TYPE_SHM;
++		v->cpt_file = obj->o_pos;
++	}
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	pa.type = PD_ABSENT;
++	pa.pgoff = vma->vm_pgoff;
++	pa.mm = CPT_NULL;
++	pa.start = vma->vm_start;
++	pa.end = vma->vm_start;
++
++	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
++		struct page_desc pd;
++
++		page_get_desc(mmobj, vma, addr, &pd, ctx);
++		cloned_pages += pd.shared;
++
++		if (pd.type == PD_FUNKEY) {
++			eprintk_ctx("dump_one_vma: funkey page\n");
++			return -EINVAL;
++		}
++
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++		if (pd.type == PD_LAZY &&
++		    (ctx->lazy_vm == 0 || (vma->vm_flags&VM_LOCKED)))
++			pd.type = PD_COPY;
++#else
++		if (pd.type == PD_LAZY)
++			pd.type = PD_COPY;
++#endif
++
++		if (!can_expand(&pa, &pd)) {
++			if (pa.type == PD_COPY ||
++			    pa.type == PD_ZERO) {
++				pgb.cpt_start = pa.start;
++				pgb.cpt_end = pa.end;
++				dump_page_block(vma, &pgb, pa.type, ctx);
++			} else if (pa.type == PD_CLONE) {
++				dump_copypage_block(vma, &pa, ctx);
++				cloned_pages++;
++			} else if (pa.type == PD_LAZY) {
++				dump_lazypage_block(vma, &pa, ctx);
++			} else if (pa.type == PD_ABSENT &&
++				   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
++				dump_remappage_block(vma, &pa, ctx);
++			}
++			pa.start = addr;
++		}
++		pa.type = pd.type;
++		pa.end = addr + PAGE_SIZE;
++		pa.pgoff = pd.index;
++		pa.mm = pd.mm;
++	}
++
++	if (pa.end > pa.start) {
++		if (pa.type == PD_COPY ||
++		    pa.type == PD_ZERO) {
++			pgb.cpt_start = pa.start;
++			pgb.cpt_end = pa.end;
++			dump_page_block(vma, &pgb, pa.type, ctx);
++		} else if (pa.type == PD_CLONE) {
++			dump_copypage_block(vma, &pa, ctx);
++			cloned_pages++;
++		} else if (pa.type == PD_LAZY) {
++			dump_lazypage_block(vma, &pa, ctx);
++		} else if (pa.type == PD_ABSENT &&
++			   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
++			dump_remappage_block(vma, &pa, ctx);
++		}
++	}
++
++	if (cloned_pages) {
++		__u32 anonvma = 1;
++		loff_t anonpos = ctx->current_object + offsetof(struct cpt_vma_image, cpt_anonvma);
++		ctx->pwrite(&anonvma, 4, ctx, anonpos);
++	}
++
++	cpt_close_object(ctx);
++
++	cpt_pop_object(&saved_object, ctx);
++
++	return 0;
++}
++
++static int dump_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
++			    cpt_context_t *ctx)
++{
++	loff_t saved_object;
++	struct cpt_aio_ctx_image aimg;
++
++	if (!list_empty(&aio_ctx->run_list) ||
++	    !list_empty(&aio_ctx->active_reqs) ||
++	    aio_ctx->reqs_active) {
++		eprintk_ctx("AIO is active after suspend\n");
++		return -EBUSY;
++	}
++
++	cpt_push_object(&saved_object, ctx);
++
++	aimg.cpt_next = CPT_ALIGN(sizeof(aimg));
++	aimg.cpt_object = CPT_OBJ_AIO_CONTEXT;
++	aimg.cpt_hdrlen = sizeof(aimg);
++	aimg.cpt_content = CPT_CONTENT_ARRAY;
++
++	aimg.cpt_max_reqs = aio_ctx->max_reqs;
++	aimg.cpt_ring_pages = aio_ctx->ring_info.nr_pages;
++	aimg.cpt_nr = aio_ctx->ring_info.nr;
++	aimg.cpt_tail = aio_ctx->ring_info.tail;
++	aimg.cpt_mmap_base = aio_ctx->ring_info.mmap_base;
++
++	ctx->write(&aimg, sizeof(aimg), ctx);
++
++	cpt_pop_object(&saved_object, ctx);
++	return 0;
++}
++
++static int dump_one_mm(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	struct mm_struct *mm = obj->o_obj;
++	struct vm_area_struct *vma;
++	struct cpt_mm_image *v = cpt_get_buf(ctx);
++
++	cpt_open_object(obj, ctx);
++
++	v->cpt_next = -1;
++	v->cpt_object = CPT_OBJ_MM;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	v->cpt_start_code = mm->start_code;
++	v->cpt_end_code = mm->end_code;
++	v->cpt_start_data = mm->start_data;
++	v->cpt_end_data = mm->end_data;
++	v->cpt_start_brk = mm->start_brk;
++	v->cpt_brk = mm->brk;
++	v->cpt_start_stack = mm->start_stack;
++	v->cpt_start_arg = mm->arg_start;
++	v->cpt_end_arg = mm->arg_end;
++	v->cpt_start_env = mm->env_start;
++	v->cpt_end_env = mm->env_end;
++	v->cpt_def_flags = mm->def_flags;
++	v->cpt_mmub = cpt_lookup_ubc(mm->mm_ub, ctx);
++	v->cpt_dumpable = mm->dumpable;
++	v->cpt_vps_dumpable = mm->vps_dumpable;
++	v->cpt_used_hugetlb = 0; /* not used */
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	if (mm->context.size) {
++		loff_t saved_object;
++		struct cpt_obj_bits b;
++		int size;
++
++		dprintk_ctx("nontrivial LDT\n");
++
++		cpt_push_object(&saved_object, ctx);
++
++		cpt_open_object(NULL, ctx);
++		b.cpt_next = CPT_NULL;
++		b.cpt_object = CPT_OBJ_BITS;
++		b.cpt_hdrlen = sizeof(b);
++		b.cpt_content = CPT_CONTENT_MM_CONTEXT;
++		b.cpt_size = mm->context.size*LDT_ENTRY_SIZE;
++
++		ctx->write(&b, sizeof(b), ctx);
++
++		size = mm->context.size*LDT_ENTRY_SIZE;
++
++#if defined(CONFIG_X86_64) || LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
++		ctx->write(mm->context.ldt, size, ctx);
++#else
++		for (i = 0; i < size; i += PAGE_SIZE) {
++			int nr = i / PAGE_SIZE, bytes;
++			char *kaddr = kmap(mm->context.ldt_pages[nr]);
++
++			bytes = size - i;
++			if (bytes > PAGE_SIZE)
++				bytes = PAGE_SIZE;
++			ctx->write(kaddr, bytes, ctx);
++			kunmap(mm->context.ldt_pages[nr]);
++		}
++#endif
++
++		cpt_close_object(ctx);
++		cpt_pop_object(&saved_object, ctx);
++	}
++
++	for (vma = mm->mmap; vma; vma = vma->vm_next) {
++		int err;
++
++#ifdef CONFIG_X86_64
++		if (vma->vm_start == 0xFFFFE000 &&
++		    vma->vm_end == 0xFFFFF000)
++			continue;
++#endif
++
++		if ((err = dump_one_vma(obj, vma, ctx)) != 0)
++			return err;
++	}
++
++	if (mm->ioctx_list) {
++		struct kioctx *aio_ctx;
++		int err;
++
++		for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
++			if ((err = dump_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
++				return err;
++	}
++
++	cpt_close_object(ctx);
++
++	return 0;
++}
++
++int cpt_dump_vm(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	scnt = scnt0 = zcnt = 0;
++
++	cpt_open_section(ctx, CPT_SECT_MM);
++
++	for_each_object(obj, CPT_OBJ_MM) {
++		int err;
++
++		if ((err = dump_one_mm(obj, ctx)) != 0)
++			return err;
++	}
++
++	cpt_close_section(ctx);
++
++	if (scnt)
++		dprintk_ctx("cpt_dump_vm: %d shared private anon pages\n", scnt);
++	if (scnt0)
++		dprintk_ctx("cpt_dump_vm: %d anon pages are cloned\n", scnt0);
++	if (zcnt)
++		dprintk_ctx("cpt_dump_vm: %d silly pages canceled\n", zcnt);
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_mm.h linux-2.6.16-026test015/kernel/cpt/cpt_mm.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_mm.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_mm.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,16 @@
++int cpt_collect_mm(cpt_context_t *);
++
++int cpt_dump_vm(struct cpt_context *ctx);
++
++__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
++int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx);
++int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
++
++int cpt_mm_prepare(unsigned long veid);
++
++int cpt_free_pgin_dir(struct cpt_context *);
++int cpt_start_pagein(struct cpt_context *);
++int rst_setup_pagein(struct cpt_context *);
++int rst_complete_pagein(struct cpt_context *, int);
++int rst_pageind(struct cpt_context *);
++int rst_swapoff(struct cpt_context *);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_net.c linux-2.6.16-026test015/kernel/cpt/cpt_net.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_net.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_net.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,428 @@
++/*
++ *
++ *  kernel/cpt/cpt_net.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/socket.h>
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <net/addrconf.h>
++#include <linux/rtnetlink.h>
++#include <linux/ve.h>
++#include <linux/ve_proto.h>
++#include <linux/vzcalluser.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_kernel.h"
++#include "cpt_syscalls.h"
++
++int cpt_dump_link(struct cpt_context * ctx)
++{
++	struct net_device *dev;
++
++	cpt_open_section(ctx, CPT_SECT_NET_DEVICE);
++	for (dev = dev_base; dev; dev = dev->next) {
++		struct cpt_netdev_image v;
++
++		cpt_open_object(NULL, ctx);
++
++		v.cpt_next = CPT_NULL;
++		v.cpt_object = CPT_OBJ_NET_DEVICE;
++		v.cpt_hdrlen = sizeof(v);
++		v.cpt_content = CPT_CONTENT_VOID;
++
++		v.cpt_index = dev->ifindex;
++		v.cpt_flags = dev->flags;
++		memcpy(v.cpt_name, dev->name, IFNAMSIZ);
++		ctx->write(&v, sizeof(v), ctx);
++		cpt_close_object(ctx);
++
++		if (strcmp(dev->name, "lo") != 0 &&
++		    strcmp(dev->name, "venet0") != 0) {
++			eprintk_ctx("unsupported netdevice %s\n", dev->name);
++			cpt_close_section(ctx);
++			return -EBUSY;
++		}
++	}
++	cpt_close_section(ctx);
++	return 0;
++}
++
++int cpt_suspend_network(struct cpt_context *ctx)
++{
++	get_exec_env()->disable_net = 1;
++	synchronize_net();
++	return 0;
++}
++
++int cpt_resume_network(struct cpt_context *ctx)
++{
++	struct ve_struct *env;
++	env = get_ve_by_id(ctx->ve_id);
++	if (!env)
++		return -ESRCH;
++	env->disable_net = 0;
++	put_ve(env);
++	return 0;
++}
++
++int cpt_dump_ifaddr(struct cpt_context * ctx)
++{
++	struct net_device *dev;
++
++	cpt_open_section(ctx, CPT_SECT_NET_IFADDR);
++	for (dev = dev_base; dev; dev = dev->next) {
++		struct in_device *idev = in_dev_get(dev);
++		struct in_ifaddr *ifa;
++
++		if (!idev)
++			continue;
++
++		for (ifa = idev->ifa_list; ifa; ifa = ifa->ifa_next) {
++			struct cpt_ifaddr_image v;
++			cpt_open_object(NULL, ctx);
++
++			v.cpt_next = CPT_NULL;
++			v.cpt_object = CPT_OBJ_NET_IFADDR;
++			v.cpt_hdrlen = sizeof(v);
++			v.cpt_content = CPT_CONTENT_VOID;
++
++			v.cpt_index = dev->ifindex;
++			v.cpt_family = AF_INET;
++			v.cpt_masklen = ifa->ifa_prefixlen;
++			v.cpt_flags = ifa->ifa_flags;
++			v.cpt_scope = ifa->ifa_scope;
++			memset(&v.cpt_address, 0, sizeof(v.cpt_address));
++			memset(&v.cpt_peer, 0, sizeof(v.cpt_peer));
++			memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
++			v.cpt_address[0] = ifa->ifa_local;
++			v.cpt_peer[0] = ifa->ifa_address;
++			v.cpt_broadcast[0] = ifa->ifa_broadcast;
++			memcpy(v.cpt_label, ifa->ifa_label, IFNAMSIZ);
++			ctx->write(&v, sizeof(v), ctx);
++			cpt_close_object(ctx);
++		}
++		in_dev_put(idev);
++	}
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++	for (dev = dev_base; dev; dev = dev->next) {
++		struct inet6_dev *idev = in6_dev_get(dev);
++		struct inet6_ifaddr *ifa;
++
++		if (!idev)
++			continue;
++
++		for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
++			struct cpt_ifaddr_image v;
++
++			if (dev == &loopback_dev &&
++			    ifa->prefix_len == 128 &&
++			    ifa->addr.s6_addr32[0] == 0 &&
++			    ifa->addr.s6_addr32[1] == 0 &&
++			    ifa->addr.s6_addr32[2] == 0 &&
++			    ifa->addr.s6_addr32[3] == htonl(1))
++				continue;
++
++			cpt_open_object(NULL, ctx);
++
++			v.cpt_next = CPT_NULL;
++			v.cpt_object = CPT_OBJ_NET_IFADDR;
++			v.cpt_hdrlen = sizeof(v);
++			v.cpt_content = CPT_CONTENT_VOID;
++
++			v.cpt_index = dev->ifindex;
++			v.cpt_family = AF_INET6;
++			v.cpt_masklen = ifa->prefix_len;
++			v.cpt_flags = ifa->flags;
++			v.cpt_scope = ifa->scope;
++			memcpy(&v.cpt_address, &ifa->addr, 16);
++			memcpy(&v.cpt_peer, &ifa->addr, 16);
++			memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
++			memcpy(v.cpt_label, dev->name, IFNAMSIZ);
++			ctx->write(&v, sizeof(v), ctx);
++			cpt_close_object(ctx);
++		}
++		in6_dev_put(idev);
++	}
++#endif
++	cpt_close_section(ctx);
++	return 0;
++}
++
++static int cpt_dump_route(struct cpt_context * ctx)
++{
++	int err;
++	struct socket *sock;
++	struct msghdr msg;
++	struct iovec iov;
++	struct {
++		struct nlmsghdr nlh;
++		struct rtgenmsg g;
++	} req;
++	struct sockaddr_nl nladdr;
++	struct cpt_object_hdr v;
++	mm_segment_t oldfs;
++	char *pg;
++
++	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
++	if (err)
++		return err;
++
++	memset(&nladdr, 0, sizeof(nladdr));
++	nladdr.nl_family = AF_NETLINK;
++
++	req.nlh.nlmsg_len = sizeof(req);
++	req.nlh.nlmsg_type = RTM_GETROUTE;
++	req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
++	req.nlh.nlmsg_pid = 0;
++	req.g.rtgen_family = AF_INET;
++
++	iov.iov_base=&req;
++	iov.iov_len=sizeof(req);
++	msg.msg_name=&nladdr;
++	msg.msg_namelen=sizeof(nladdr);
++	msg.msg_iov=&iov;
++	msg.msg_iovlen=1;
++	msg.msg_control=NULL;
++	msg.msg_controllen=0;
++	msg.msg_flags=MSG_DONTWAIT;
++
++	oldfs = get_fs(); set_fs(KERNEL_DS);
++	err = sock_sendmsg(sock, &msg, sizeof(req));
++	set_fs(oldfs);
++
++	if (err < 0)
++		goto out_sock;
++
++	pg = (char*)__get_free_page(GFP_KERNEL);
++	if (pg == NULL) {
++		err = -ENOMEM;
++		goto out_sock;
++	}
++
++	cpt_open_section(ctx, CPT_SECT_NET_ROUTE);
++	cpt_open_object(NULL, ctx);
++	v.cpt_next = CPT_NULL;
++	v.cpt_object = CPT_OBJ_NET_ROUTE;
++	v.cpt_hdrlen = sizeof(v);
++	v.cpt_content = CPT_CONTENT_NLMARRAY;
++
++	ctx->write(&v, sizeof(v), ctx);
++
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++restart:
++#endif
++	for (;;) {
++		struct nlmsghdr *h;
++
++		iov.iov_base = pg;
++		iov.iov_len = PAGE_SIZE;
++
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
++		set_fs(oldfs);
++
++		if (err < 0)
++			goto out_sock_pg;
++		if (msg.msg_flags & MSG_TRUNC) {
++			err = -ENOBUFS;
++			goto out_sock_pg;
++		}
++
++		h = (struct nlmsghdr*)pg;
++		while (NLMSG_OK(h, err)) {
++			if (h->nlmsg_type == NLMSG_DONE) {
++				err = 0;
++				goto done;
++			}
++			if (h->nlmsg_type == NLMSG_ERROR) {
++				struct nlmsgerr *errm = (struct nlmsgerr*)NLMSG_DATA(h);
++				err = errm->error;
++				eprintk_ctx("NLMSG error: %d\n", errm->error);
++				goto done;
++			}
++			if (h->nlmsg_type != RTM_NEWROUTE) {
++				eprintk_ctx("NLMSG: %d\n", h->nlmsg_type);
++				err = -EINVAL;
++				goto done;
++			}
++			ctx->write(h, NLMSG_ALIGN(h->nlmsg_len), ctx);
++			h = NLMSG_NEXT(h, err);
++		}
++		if (err) {
++			eprintk_ctx("!!!Remnant of size %d %d %d\n", err, h->nlmsg_len, h->nlmsg_type);
++			err = -EINVAL;
++			break;
++		}
++	}
++done:
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++	if (!err && req.g.rtgen_family == AF_INET) {
++		req.g.rtgen_family = AF_INET6;
++		iov.iov_base=&req;
++		iov.iov_len=sizeof(req);
++		msg.msg_name=&nladdr;
++		msg.msg_namelen=sizeof(nladdr);
++		msg.msg_iov=&iov;
++		msg.msg_iovlen=1;
++		msg.msg_control=NULL;
++		msg.msg_controllen=0;
++		msg.msg_flags=MSG_DONTWAIT;
++
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		err = sock_sendmsg(sock, &msg, sizeof(req));
++		set_fs(oldfs);
++
++		if (err > 0)
++			goto restart;
++	}
++#endif
++	cpt_close_object(ctx);
++	cpt_close_section(ctx);
++
++out_sock_pg:
++	free_page((unsigned long)pg);
++out_sock:
++	sock_release(sock);
++	return err;
++}
++
++static int dumpfn(void *arg)
++{
++	int i;
++	int *pfd = arg;
++	char *argv[] = { "iptables-save", "-c", NULL };
++
++	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
++	if (i < 0) {
++		eprintk("cannot enter ve to dump iptables\n");
++		module_put(THIS_MODULE);
++		return 1;
++	}
++
++	if (pfd[1] != 1)
++		sc_dup2(pfd[1], 1);
++
++	for (i=0; i<current->files->fdt->max_fds; i++) {
++		if (i != 1)
++			sc_close(i);
++	}
++
++	module_put(THIS_MODULE);
++
++	set_fs(KERNEL_DS);
++	i = sc_execve("/sbin/iptables-save", argv, NULL);
++	eprintk("failed to exec /sbin/iptables-save: %d\n", i);
++	return -1;
++}
++
++
++static int cpt_dump_iptables(struct cpt_context * ctx)
++{
++	int err;
++	int pid;
++	int pfd[2];
++	struct file *f;
++	struct cpt_object_hdr v;
++	char buf[16];
++	loff_t pos;
++	int n;
++
++	err = sc_pipe(pfd);
++	if (err < 0) {
++		eprintk_ctx("sc_pipe: %d\n", err);
++		return err;
++	}
++	err = pid = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
++	if (err < 0) {
++		eprintk_ctx("local_kernel_thread: %d\n", err);
++		goto out;
++	}
++	f = fget(pfd[0]);
++	sc_close(pfd[1]);
++	sc_close(pfd[0]);
++
++	cpt_open_section(ctx, CPT_SECT_NET_IPTABLES);
++
++	cpt_open_object(NULL, ctx);
++	v.cpt_next = CPT_NULL;
++	v.cpt_object = CPT_OBJ_NAME;
++	v.cpt_hdrlen = sizeof(v);
++	v.cpt_content = CPT_CONTENT_NAME;
++
++	ctx->write(&v, sizeof(v), ctx);
++
++	pos = ctx->file->f_pos;
++	do {
++		mm_segment_t oldfs;
++
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
++		set_fs(oldfs);
++		if (n > 0)
++			ctx->write(buf, n, ctx);
++	} while (n > 0);
++
++	if (n < 0)
++		eprintk_ctx("read: %d\n", n);
++
++	fput(f);
++
++	if ((err = sc_waitx(pid, 0)) < 0)
++		eprintk_ctx("wait4: %d\n", err);
++
++	if (ctx->file->f_pos != pos) {
++		buf[0] = 0;
++		ctx->write(buf, 1, ctx);
++		ctx->align(ctx);
++		cpt_close_object(ctx);
++		cpt_close_section(ctx);
++	} else {
++		pos = ctx->current_section;
++		cpt_close_object(ctx);
++		cpt_close_section(ctx);
++		ctx->sections[CPT_SECT_NET_IPTABLES] = CPT_NULL;
++		ctx->file->f_pos = pos;
++	}
++	return n;
++
++out:
++	if (pfd[1] >= 0)
++		sc_close(pfd[1]);
++	if (pfd[0] >= 0)
++		sc_close(pfd[0]);
++	return err;
++}
++
++int cpt_dump_ifinfo(struct cpt_context * ctx)
++{
++	int err;
++
++	err = cpt_dump_link(ctx);
++	if (!err)
++		err = cpt_dump_ifaddr(ctx);
++	if (!err)
++		err = cpt_dump_route(ctx);
++	if (!err)
++		err = cpt_dump_iptables(ctx);
++	return err;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_net.h linux-2.6.16-026test015/kernel/cpt/cpt_net.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_net.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_net.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,7 @@
++int cpt_dump_ifinfo(struct cpt_context *ctx);
++int rst_restore_net(struct cpt_context *ctx);
++int cpt_suspend_network(struct cpt_context *ctx);
++int cpt_resume_network(struct cpt_context *ctx);
++int rst_resume_network(struct cpt_context *ctx);
++int cpt_dump_ip_conntrack(struct cpt_context *ctx);
++int rst_restore_ip_conntrack(struct cpt_context * ctx);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_obj.c linux-2.6.16-026test015/kernel/cpt/cpt_obj.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_obj.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_obj.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,172 @@
++/*
++ *
++ *  kernel/cpt/cpt_obj.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++
++cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	obj = kmalloc(sizeof(cpt_object_t), gfp);
++	if (obj) {
++		INIT_LIST_HEAD(&obj->o_list);
++		INIT_LIST_HEAD(&obj->o_hash);
++		INIT_LIST_HEAD(&obj->o_alist);
++		obj->o_count = 1;
++		obj->o_pos = CPT_NULL;
++		obj->o_lock = 0;
++		obj->o_parent = NULL;
++		obj->o_index = CPT_NOINDEX;
++		obj->o_obj = NULL;
++		obj->o_image = NULL;
++		ctx->objcount++;
++	}
++	return obj;
++}
++// //EXPORT_SYMBOL(alloc_cpt_object);
++
++void free_cpt_object(cpt_object_t *obj, cpt_context_t *ctx)
++{
++	list_del(&obj->o_alist);
++	kfree(obj);
++	ctx->objcount--;
++}
++
++void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_context_t *ctx)
++{
++	list_add_tail(&obj->o_list, &ctx->object_array[type]);
++}
++// //EXPORT_SYMBOL(intern_cpt_object);
++
++void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj,
++			cpt_object_t *head, cpt_context_t *ctx)
++{
++	list_add(&obj->o_list, &head->o_list);
++}
++// //EXPORT_SYMBOL(insert_cpt_object);
++
++cpt_object_t * __cpt_object_add(enum _cpt_object_type type, void *p,
++		unsigned gfp_mask, cpt_context_t *ctx)
++{
++	cpt_object_t *obj;
++
++	obj = lookup_cpt_object(type, p, ctx);
++
++	if (obj) {
++		obj->o_count++;
++		return obj;
++	}
++
++	if ((obj = alloc_cpt_object(gfp_mask, ctx)) != NULL) {
++		if (p)
++			cpt_obj_setobj(obj, p, ctx);
++		intern_cpt_object(type, obj, ctx);
++		return obj;
++	}
++	return NULL;
++}
++// //EXPORT_SYMBOL(__cpt_object_add);
++
++cpt_object_t * cpt_object_add(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
++{
++	return __cpt_object_add(type, p, GFP_KERNEL, ctx);
++}
++// //EXPORT_SYMBOL(cpt_object_add);
++
++cpt_object_t * cpt_object_get(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
++{
++	cpt_object_t *obj;
++
++	obj = lookup_cpt_object(type, p, ctx);
++
++	if (obj)
++		obj->o_count++;
++
++	return obj;
++}
++// //EXPORT_SYMBOL(cpt_object_get);
++
++int cpt_object_init(cpt_context_t *ctx)
++{
++	int i;
++
++	for (i=0; i<CPT_OBJ_MAX; i++) {
++		INIT_LIST_HEAD(&ctx->object_array[i]);
++	}
++	return 0;
++}
++
++int cpt_object_destroy(cpt_context_t *ctx)
++{
++	int i;
++
++	for (i=0; i<CPT_OBJ_MAX; i++) {
++		while (!list_empty(&ctx->object_array[i])) {
++			struct list_head *head = ctx->object_array[i].next;
++			cpt_object_t *obj = list_entry(head, cpt_object_t, o_list);
++			list_del(head);
++			if (obj->o_image)
++				kfree(obj->o_image);
++			free_cpt_object(obj, ctx);
++		}
++	}
++	if (ctx->objcount != 0)
++		eprintk_ctx("BUG: ctx->objcount=%d\n", ctx->objcount);
++	return 0;
++}
++
++cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, type) {
++		if (obj->o_obj == p)
++			return obj;
++	}
++	return NULL;
++}
++// //EXPORT_SYMBOL(lookup_cpt_object);
++
++cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, type) {
++		if (obj->o_pos == pos)
++			return obj;
++	}
++	return NULL;
++}
++// //EXPORT_SYMBOL(lookup_cpt_obj_bypos);
++
++cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, type) {
++		if (obj->o_index == index)
++			return obj;
++	}
++	return NULL;
++}
++// //EXPORT_SYMBOL(lookup_cpt_obj_byindex);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_obj.h linux-2.6.16-026test015/kernel/cpt/cpt_obj.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_obj.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_obj.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,62 @@
++#ifndef __CPT_OBJ_H_
++#define __CPT_OBJ_H_ 1
++
++#include <linux/list.h>
++#include <linux/cpt_image.h>
++
++typedef struct _cpt_object
++{
++	struct list_head	o_list;
++	struct list_head	o_hash;
++	int			o_count;
++	int			o_index;
++	int			o_lock;
++	loff_t			o_pos;
++	loff_t			o_ppos;
++	void			*o_obj;
++	void			*o_image;
++	void			*o_parent;
++	struct list_head	o_alist;
++} cpt_object_t;
++
++struct cpt_context;
++
++#define for_each_object(obj, type) list_for_each_entry(obj, &ctx->object_array[type], o_list)
++
++
++extern cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx);
++extern void free_cpt_object(cpt_object_t *obj, struct cpt_context *ctx);
++
++cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
++cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx);
++cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx);
++
++static inline void cpt_obj_setpos(cpt_object_t *cpt, loff_t pos, struct cpt_context *ctx)
++{
++	cpt->o_pos = pos;
++	/* Add to pos hash table */
++}
++
++static inline void cpt_obj_setobj(cpt_object_t *cpt, void *ptr, struct cpt_context *ctx)
++{
++	cpt->o_obj = ptr;
++	/* Add to hash table */
++}
++
++static inline void cpt_obj_setindex(cpt_object_t *cpt, __u32 index, struct cpt_context *ctx)
++{
++	cpt->o_index = index;
++	/* Add to index hash table */
++}
++
++
++extern void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, struct cpt_context *ctx);
++extern void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_object_t *head, struct cpt_context *ctx);
++extern cpt_object_t *cpt_object_add(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
++extern cpt_object_t *__cpt_object_add(enum _cpt_object_type type, void *p, unsigned int gfp_mask, struct cpt_context *ctx);
++extern cpt_object_t *cpt_object_get(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
++
++extern int cpt_object_init(struct cpt_context *ctx);
++extern int cpt_object_destroy(struct cpt_context *ctx);
++
++#endif /* __CPT_OBJ_H_ */
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_proc.c linux-2.6.16-026test015/kernel/cpt/cpt_proc.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_proc.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_proc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,575 @@
++/*
++ *
++ *  kernel/cpt/cpt_proc.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/list.h>
++#include <linux/proc_fs.h>
++#include <linux/smp_lock.h>
++#include <asm/uaccess.h>
++#include <linux/cpt_ioctl.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_dump.h"
++#include "cpt_mm.h"
++#include "cpt_kernel.h"
++
++MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
++MODULE_LICENSE("GPL");
++
++/* List of contexts and lock protecting the list */
++static struct list_head cpt_context_list;
++static spinlock_t cpt_context_lock;
++
++static int proc_read(char *buffer, char **start, off_t offset,
++		     int length, int *eof, void *data)
++{
++	off_t pos = 0;
++	off_t begin = 0;
++	int len = 0;
++	cpt_context_t *ctx;
++
++	len += sprintf(buffer, "Ctx      Id       VE       State\n");
++
++	spin_lock(&cpt_context_lock);
++
++	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
++		len += sprintf(buffer+len,"%p %08x %-8u %d",
++			       ctx,
++			       ctx->contextid,
++			       ctx->ve_id,
++			       ctx->ctx_state
++			       );
++
++		buffer[len++] = '\n';
++
++		pos = begin+len;
++		if (pos < offset) {
++			len = 0;
++			begin = pos;
++		}
++		if (pos > offset+length)
++			goto done;
++	}
++	*eof = 1;
++
++done:
++	spin_unlock(&cpt_context_lock);
++	*start = buffer + (offset - begin);
++	len -= (offset - begin);
++	if(len > length)
++		len = length;
++	if(len < 0)
++		len = 0;
++	return len;
++}
++
++void cpt_context_release(cpt_context_t *ctx)
++{
++	list_del(&ctx->ctx_list);
++	spin_unlock(&cpt_context_lock);
++
++	if (ctx->ctx_state > 0)
++		cpt_resume(ctx);
++	ctx->ctx_state = CPT_CTX_ERROR;
++
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	if (ctx->pgin_task)
++		put_task_struct(ctx->pgin_task);
++	if (ctx->pgin_dir)
++		cpt_free_pgin_dir(ctx);
++	if (ctx->pagein_file_out)
++		fput(ctx->pagein_file_out);
++	if (ctx->pagein_file_in)
++		fput(ctx->pagein_file_in);
++#endif
++	if (ctx->objcount)
++		eprintk_ctx("%d objects leaked\n", ctx->objcount);
++	if (ctx->file)
++		fput(ctx->file);
++	cpt_flush_error(ctx);
++	if (ctx->errorfile) {
++		fput(ctx->errorfile);
++		ctx->errorfile = NULL;
++	}
++	if (ctx->error_msg) {
++		free_page((unsigned long)ctx->error_msg);
++		ctx->error_msg = NULL;
++	}
++	if (ctx->statusfile)
++		fput(ctx->statusfile);
++	if (ctx->lockfile)
++		fput(ctx->lockfile);
++	kfree(ctx);
++
++	spin_lock(&cpt_context_lock);
++}
++
++static void __cpt_context_put(cpt_context_t *ctx)
++{
++	if (!--ctx->refcount)
++		cpt_context_release(ctx);
++}
++
++static void cpt_context_put(cpt_context_t *ctx)
++{
++	spin_lock(&cpt_context_lock);
++	__cpt_context_put(ctx);
++	spin_unlock(&cpt_context_lock);
++}
++
++cpt_context_t * cpt_context_open(void)
++{
++	cpt_context_t *ctx;
++
++	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
++		cpt_context_init(ctx);
++		spin_lock(&cpt_context_lock);
++		list_add_tail(&ctx->ctx_list, &cpt_context_list);
++		spin_unlock(&cpt_context_lock);
++		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
++		if (ctx->error_msg != NULL)
++			ctx->error_msg[0] = 0;
++	}
++	return ctx;
++}
++
++static cpt_context_t * cpt_context_lookup(unsigned int contextid)
++{
++	cpt_context_t *ctx;
++
++	spin_lock(&cpt_context_lock);
++	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
++		if (ctx->contextid == contextid) {
++			ctx->refcount++;
++			spin_unlock(&cpt_context_lock);
++			return ctx;
++		}
++	}
++	spin_unlock(&cpt_context_lock);
++	return NULL;
++}
++
++int cpt_context_lookup_veid(unsigned int veid)
++{
++	cpt_context_t *ctx;
++
++	spin_lock(&cpt_context_lock);
++	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
++		if (ctx->ve_id == veid && ctx->ctx_state > 0) {
++			spin_unlock(&cpt_context_lock);
++			return 1;
++		}
++	}
++	spin_unlock(&cpt_context_lock);
++	return 0;
++}
++
++static int cpt_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
++{
++	int err = 0;
++	cpt_context_t *ctx;
++	struct file *dfile = NULL;
++
++	unlock_kernel();
++
++	if (cmd == CPT_VMPREP) {
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++		err = cpt_mm_prepare(arg);
++#else
++		err = -EINVAL;
++#endif
++		goto out_lock;
++	}
++
++	if (cmd == CPT_TEST_CAPS) {
++		unsigned int src_flags, dst_flags = arg;
++
++		err = 0;
++		src_flags = test_cpu_caps();
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
++		goto out_lock;
++	}
++
++	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
++		cpt_context_t *old_ctx;
++
++		ctx = NULL;
++		if (cmd == CPT_JOIN_CONTEXT) {
++			err = -ENOENT;
++			ctx = cpt_context_lookup(arg);
++			if (!ctx)
++				goto out_lock;
++		}
++
++		spin_lock(&cpt_context_lock);
++		old_ctx = (cpt_context_t*)file->private_data;
++		file->private_data = ctx;
++
++		if (old_ctx) {
++			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
++				old_ctx->sticky = 0;
++				old_ctx->refcount--;
++			}
++			__cpt_context_put(old_ctx);
++		}
++		spin_unlock(&cpt_context_lock);
++		err = 0;
++		goto out_lock;
++	}
++
++	spin_lock(&cpt_context_lock);
++	ctx = (cpt_context_t*)file->private_data;
++	if (ctx)
++		ctx->refcount++;
++	spin_unlock(&cpt_context_lock);
++
++	if (!ctx) {
++		cpt_context_t *old_ctx;
++
++		err = -ENOMEM;
++		ctx = cpt_context_open();
++		if (!ctx)
++			goto out_lock;
++
++		spin_lock(&cpt_context_lock);
++		old_ctx = (cpt_context_t*)file->private_data;
++		if (!old_ctx) {
++			ctx->refcount++;
++			file->private_data = ctx;
++		} else {
++			old_ctx->refcount++;
++		}
++		if (old_ctx) {
++			__cpt_context_put(ctx);
++			ctx = old_ctx;
++		}
++		spin_unlock(&cpt_context_lock);
++	}
++
++	if (cmd == CPT_GET_CONTEXT) {
++		unsigned int contextid = (unsigned int)arg;
++
++		if (ctx->contextid && ctx->contextid != contextid) {
++			err = -EINVAL;
++			goto out_nosem;
++		}
++		if (!ctx->contextid) {
++			cpt_context_t *c1 = cpt_context_lookup(contextid);
++			if (c1) {
++				cpt_context_put(c1);
++				err = -EEXIST;
++				goto out_nosem;
++			}
++			ctx->contextid = contextid;
++		}
++		spin_lock(&cpt_context_lock);
++		if (!ctx->sticky) {
++			ctx->sticky = 1;
++			ctx->refcount++;
++		}
++		spin_unlock(&cpt_context_lock);
++		goto out_nosem;
++	}
++
++	down(&ctx->main_sem);
++
++	err = -EBUSY;
++	if (ctx->ctx_state < 0)
++		goto out;
++
++	err = 0;
++	switch (cmd) {
++	case CPT_SET_DUMPFD:
++		if (ctx->ctx_state == CPT_CTX_DUMPING) {
++			err = -EBUSY;
++			break;
++		}
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++			if (dfile->f_op == NULL ||
++			    dfile->f_op->write == NULL) {
++				fput(dfile);
++				err = -EBADF;
++				break;
++			}
++		}
++		if (ctx->file)
++			fput(ctx->file);
++		ctx->file = dfile;
++		break;
++	case CPT_SET_ERRORFD:
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++		}
++		if (ctx->errorfile)
++			fput(ctx->errorfile);
++		ctx->errorfile = dfile;
++		break;
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	case CPT_SET_PAGEINFDIN:
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++		}
++		if (ctx->pagein_file_in)
++			fput(ctx->pagein_file_in);
++		ctx->pagein_file_in = dfile;
++		break;
++	case CPT_SET_PAGEINFDOUT:
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++		}
++		if (ctx->pagein_file_out)
++			fput(ctx->pagein_file_out);
++		ctx->pagein_file_out = dfile;
++		break;
++	case CPT_SET_LAZY:
++		ctx->lazy_vm = arg;
++		break;
++	case CPT_PAGEIND:
++		err = cpt_start_pagein(ctx);
++		break;
++#endif
++	case CPT_SET_VEID:
++		if (ctx->ctx_state > 0) {
++			err = -EBUSY;
++			break;
++		}
++		ctx->ve_id = arg;
++		break;
++	case CPT_SET_CPU_FLAGS:
++		if (ctx->ctx_state > 0) {
++			err = -EBUSY;
++			break;
++		}
++		ctx->dst_cpu_flags = arg;
++		ctx->src_cpu_flags = test_cpu_caps();
++		break;
++	case CPT_SUSPEND:
++		if (cpt_context_lookup_veid(ctx->ve_id) ||
++		    ctx->ctx_state > 0) {
++			err = -EBUSY;
++			break;
++		}
++		ctx->ctx_state = CPT_CTX_SUSPENDING;
++		err = cpt_vps_suspend(ctx);
++		if (err) {
++			if (cpt_resume(ctx) == 0)
++				ctx->ctx_state = CPT_CTX_IDLE;
++		} else {
++			ctx->ctx_state = CPT_CTX_SUSPENDED;
++		}
++		break;
++	case CPT_DUMP:
++		if (!ctx->ctx_state) {
++			err = -ENOENT;
++			break;
++		}
++		err = cpt_dump(ctx);
++		break;
++	case CPT_RESUME:
++		if (ctx->ctx_state == CPT_CTX_IDLE) {
++			err = -ENOENT;
++			break;
++		}
++		err = cpt_resume(ctx);
++		if (!err)
++			ctx->ctx_state = CPT_CTX_IDLE;
++		break;
++	case CPT_KILL:
++		if (ctx->ctx_state == CPT_CTX_IDLE) {
++			err = -ENOENT;
++			break;
++		}
++		err = cpt_kill(ctx);
++		if (!err)
++			ctx->ctx_state = CPT_CTX_IDLE;
++		break;
++	case CPT_TEST_VECAPS:
++	{
++		__u32 dst_flags = arg;
++		__u32 src_flags;
++
++		err = cpt_vps_caps(ctx, &src_flags);
++		if (err)
++			break;
++
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_EMT64, "emt64", err);
++		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_IA64, "ia64", err);
++		break;
++	}
++	default:
++		err = -EINVAL;
++		break;
++	}
++
++out:
++	cpt_flush_error(ctx);
++	up(&ctx->main_sem);
++out_nosem:
++	cpt_context_put(ctx);
++out_lock:
++	lock_kernel();
++	return err;
++}
++
++static int cpt_open(struct inode *inode, struct file *file)
++{
++	if (!try_module_get(THIS_MODULE))
++		return -EBUSY;
++
++	return 0;
++}
++
++static int cpt_release(struct inode * inode, struct file * file)
++{
++	cpt_context_t *ctx;
++
++	spin_lock(&cpt_context_lock);
++	ctx = (cpt_context_t*)file->private_data;
++	file->private_data = NULL;
++
++	if (ctx)
++		__cpt_context_put(ctx);
++	spin_unlock(&cpt_context_lock);
++
++	module_put(THIS_MODULE);
++	return 0;
++}
++
++
++static struct file_operations cpt_fops = {
++	.owner	 = THIS_MODULE,
++	.open    = cpt_open,
++	.release = cpt_release,
++	.ioctl	 = cpt_ioctl,
++};
++
++static struct proc_dir_entry *proc_ent;
++
++static struct ctl_table_header *ctl_header;
++
++static ctl_table debug_table[] = {
++	{
++		.ctl_name	= 9475,
++		.procname	= "cpt",
++		.data		= &debug_level,
++		.maxlen		= sizeof(debug_level),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{ .ctl_name = 0 }
++};
++static ctl_table root_table[] = {
++	{
++		.ctl_name	= CTL_DEBUG,
++		.procname	= "debug",
++		.mode		= 0555,
++		.child		= debug_table,
++	},
++	{ .ctl_name = 0 }
++};
++
++static int __init init_cpt(void)
++{
++	int err;
++
++	err = -ENOMEM;
++	ctl_header = register_sysctl_table(root_table, 0);
++	if (!ctl_header)
++		goto err_mon;
++
++	spin_lock_init(&cpt_context_lock);
++	INIT_LIST_HEAD(&cpt_context_list);
++
++	err = -EINVAL;
++	proc_ent = create_proc_entry("cpt", 0600, NULL);
++	if (!proc_ent)
++		goto err_out;
++
++	cpt_fops.read = proc_ent->proc_fops->read;
++	cpt_fops.write = proc_ent->proc_fops->write;
++	cpt_fops.llseek = proc_ent->proc_fops->llseek;
++	proc_ent->proc_fops = &cpt_fops;
++
++	proc_ent->read_proc = proc_read;
++	proc_ent->data = NULL;
++	proc_ent->owner = THIS_MODULE;
++	return 0;
++
++err_out:
++	unregister_sysctl_table(ctl_header);
++err_mon:
++	return err;
++}
++module_init(init_cpt);
++
++static void __exit exit_cpt(void)
++{
++	remove_proc_entry("cpt", NULL);
++	unregister_sysctl_table(ctl_header);
++
++	spin_lock(&cpt_context_lock);
++	while (!list_empty(&cpt_context_list)) {
++		cpt_context_t *ctx;
++		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
++
++		if (!ctx->sticky)
++			ctx->refcount++;
++		ctx->sticky = 0;
++
++		BUG_ON(ctx->refcount != 1);
++
++		__cpt_context_put(ctx);
++	}
++	spin_unlock(&cpt_context_lock);
++}
++module_exit(exit_cpt);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_process.c linux-2.6.16-026test015/kernel/cpt/cpt_process.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_process.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_process.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,986 @@
++/*
++ *
++ *  kernel/cpt/cpt_process.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/compat.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_ubc.h"
++#include "cpt_process.h"
++#include "cpt_kernel.h"
++
++#ifdef CONFIG_X86_32
++#undef task_pt_regs
++#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
++#endif
++
++static u32 encode_segment(u32 segreg)
++{
++	segreg &= 0xFFFF;
++
++	if (segreg == 0)
++		return CPT_SEG_ZERO;
++	if ((segreg & 3) != 3) {
++		wprintk("Invalid RPL of a segment reg %x\n", segreg);
++		return CPT_SEG_ZERO;
++	}
++
++	/* LDT descriptor, it is just an index to LDT array */
++	if (segreg & 4)
++		return CPT_SEG_LDT + (segreg >> 3);
++
++	/* TLS descriptor. */
++	if ((segreg >> 3) >= GDT_ENTRY_TLS_MIN &&
++	    (segreg >> 3) <= GDT_ENTRY_TLS_MAX)
++		return CPT_SEG_TLS1 + ((segreg>>3) - GDT_ENTRY_TLS_MIN);
++
++	/* One of standard desriptors */
++#ifdef CONFIG_X86_64
++	if (segreg == __USER32_DS)
++		return CPT_SEG_USER32_DS;
++	if (segreg == __USER32_CS)
++		return CPT_SEG_USER32_CS;
++	if (segreg == __USER_DS)
++		return CPT_SEG_USER64_DS;
++	if (segreg == __USER_CS)
++		return CPT_SEG_USER64_CS;
++#else
++	if (segreg == __USER_DS)
++		return CPT_SEG_USER32_DS;
++	if (segreg == __USER_CS)
++		return CPT_SEG_USER32_CS;
++#endif
++	wprintk("Invalid segment reg %x\n", segreg);
++	return CPT_SEG_ZERO;
++}
++
++#ifdef CONFIG_X86_64
++static void xlate_ptregs_64_to_32(struct cpt_x86_regs *d, struct pt_regs *s, task_t *tsk)
++{
++	d->cpt_ebp = s->rbp;
++	d->cpt_ebx = s->rbx;
++	d->cpt_eax = s->rax;
++	d->cpt_ecx = s->rcx;
++	d->cpt_edx = s->rdx;
++	d->cpt_esi = s->rsi;
++	d->cpt_edi = s->rdi;
++	d->cpt_orig_eax = s->orig_rax;
++	d->cpt_eip = s->rip;
++	d->cpt_xcs = encode_segment(s->cs);
++	d->cpt_eflags = s->eflags;
++	d->cpt_esp = s->rsp;
++	d->cpt_xss = encode_segment(s->ss);
++	d->cpt_xds = encode_segment(tsk->thread.ds);
++	d->cpt_xes = encode_segment(tsk->thread.es);
++}
++
++static int dump_registers(task_t *tsk, struct cpt_context *ctx)
++{
++	cpt_open_object(NULL, ctx);
++
++	if (tsk->thread_info->flags&_TIF_IA32) {
++		struct cpt_x86_regs ri;
++		ri.cpt_next = sizeof(ri);
++		ri.cpt_object = CPT_OBJ_X86_REGS;
++		ri.cpt_hdrlen = sizeof(ri);
++		ri.cpt_content = CPT_CONTENT_VOID;
++
++		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
++		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
++		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
++		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
++		ri.cpt_debugreg[4] = 0;
++		ri.cpt_debugreg[5] = 0;
++		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
++		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
++		ri.cpt_fs = encode_segment(tsk->thread.fsindex);
++		ri.cpt_gs = encode_segment(tsk->thread.gsindex);
++
++		xlate_ptregs_64_to_32(&ri, task_pt_regs(tsk), tsk);
++
++		ctx->write(&ri, sizeof(ri), ctx);
++	} else {
++		struct cpt_x86_64_regs ri;
++		ri.cpt_next = sizeof(ri);
++		ri.cpt_object = CPT_OBJ_X86_64_REGS;
++		ri.cpt_hdrlen = sizeof(ri);
++		ri.cpt_content = CPT_CONTENT_VOID;
++
++		ri.cpt_fsbase = tsk->thread.fs;
++		ri.cpt_gsbase = tsk->thread.gs;
++		ri.cpt_fsindex = encode_segment(tsk->thread.fsindex);
++		ri.cpt_gsindex = encode_segment(tsk->thread.gsindex);
++		ri.cpt_ds = encode_segment(tsk->thread.ds);
++		ri.cpt_es = encode_segment(tsk->thread.es);
++		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
++		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
++		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
++		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
++		ri.cpt_debugreg[4] = 0;
++		ri.cpt_debugreg[5] = 0;
++		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
++		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
++
++		memcpy(&ri.cpt_r15, task_pt_regs(tsk), sizeof(struct pt_regs));
++
++		ri.cpt_cs = encode_segment(task_pt_regs(tsk)->cs);
++		ri.cpt_ss = encode_segment(task_pt_regs(tsk)->ss);
++
++		ctx->write(&ri, sizeof(ri), ctx);
++
++#if 0
++		if (ri.cpt_rip >= VSYSCALL_START && ri.cpt_rip < VSYSCALL_END) {
++			eprintk_ctx(CPT_FID "cannot be checkpointied while vsyscall, try later\n", CPT_TID(tsk));
++			return -EAGAIN;
++		}
++#endif
++	}
++	cpt_close_object(ctx);
++
++	return 0;
++}
++
++#else
++
++static int dump_registers(task_t *tsk, struct cpt_context *ctx)
++{
++	struct cpt_x86_regs ri;
++
++	cpt_open_object(NULL, ctx);
++
++	ri.cpt_next = sizeof(ri);
++	ri.cpt_object = CPT_OBJ_X86_REGS;
++	ri.cpt_hdrlen = sizeof(ri);
++	ri.cpt_content = CPT_CONTENT_VOID;
++
++	ri.cpt_debugreg[0] = tsk->thread.debugreg[0];
++	ri.cpt_debugreg[1] = tsk->thread.debugreg[1];
++	ri.cpt_debugreg[2] = tsk->thread.debugreg[2];
++	ri.cpt_debugreg[3] = tsk->thread.debugreg[3];
++	ri.cpt_debugreg[4] = tsk->thread.debugreg[4];
++	ri.cpt_debugreg[5] = tsk->thread.debugreg[5];
++	ri.cpt_debugreg[6] = tsk->thread.debugreg[6];
++	ri.cpt_debugreg[7] = tsk->thread.debugreg[7];
++	ri.cpt_fs = encode_segment(tsk->thread.fs);
++	ri.cpt_gs = encode_segment(tsk->thread.gs);
++
++	memcpy(&ri.cpt_ebx, task_pt_regs(tsk), sizeof(struct pt_regs));
++
++	ri.cpt_xcs = encode_segment(task_pt_regs(tsk)->xcs);
++	ri.cpt_xss = encode_segment(task_pt_regs(tsk)->xss);
++	ri.cpt_xds = encode_segment(task_pt_regs(tsk)->xds);
++        ri.cpt_xes = encode_segment(task_pt_regs(tsk)->xes);
++
++	ctx->write(&ri, sizeof(ri), ctx);
++	cpt_close_object(ctx);
++
++	return 0;
++}
++#endif
++
++static int dump_kstack(task_t *tsk, struct cpt_context *ctx)
++{
++	struct cpt_obj_bits hdr;
++	unsigned long size;
++	void *start;
++
++	cpt_open_object(NULL, ctx);
++
++#ifdef CONFIG_X86_64
++	size = tsk->thread.rsp0 - tsk->thread.rsp;
++	start = (void*)tsk->thread.rsp;
++#else
++	size = tsk->thread.esp0 - tsk->thread.esp;
++	start = (void*)tsk->thread.esp;
++#endif
++
++	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
++	hdr.cpt_object = CPT_OBJ_BITS;
++	hdr.cpt_hdrlen = sizeof(hdr);
++	hdr.cpt_content = CPT_CONTENT_STACK;
++	hdr.cpt_size = size;
++
++	ctx->write(&hdr, sizeof(hdr), ctx);
++	ctx->write(start, size, ctx);
++	ctx->align(ctx);
++	cpt_close_object(ctx);
++	return 0;
++}
++
++/* Formats of i387_fxsave_struct are the same for x86_64
++ * and i386. Plain luck. */
++
++static int dump_fpustate(task_t *tsk, struct cpt_context *ctx)
++{
++	struct cpt_obj_bits hdr;
++	unsigned long size;
++	int type;
++
++	cpt_open_object(NULL, ctx);
++
++	type = CPT_CONTENT_X86_FPUSTATE;
++	size = sizeof(struct i387_fxsave_struct);
++#ifndef CONFIG_X86_64
++	if (!cpu_has_fxsr) {
++		size = sizeof(struct i387_fsave_struct);
++		type = CPT_CONTENT_X86_FPUSTATE_OLD;
++	}
++#endif
++
++	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
++	hdr.cpt_object = CPT_OBJ_BITS;
++	hdr.cpt_hdrlen = sizeof(hdr);
++	hdr.cpt_content = type;
++	hdr.cpt_size = size;
++
++	ctx->write(&hdr, sizeof(hdr), ctx);
++	ctx->write(&tsk->thread.i387, size, ctx);
++	ctx->align(ctx);
++	cpt_close_object(ctx);
++	return 0;
++}
++
++static int encode_siginfo(struct cpt_siginfo_image *si, siginfo_t *info)
++{
++	si->cpt_signo = info->si_signo;
++	si->cpt_errno = info->si_errno;
++	si->cpt_code = info->si_code;
++
++	switch(si->cpt_code & __SI_MASK) {
++	case __SI_TIMER:
++		si->cpt_pid = info->si_tid;
++		si->cpt_uid = info->si_overrun;
++		si->cpt_sigval = cpt_ptr_export(info->_sifields._timer._sigval.sival_ptr);
++		si->cpt_utime = info->si_sys_private;
++		break;
++	case __SI_POLL:
++		si->cpt_pid = info->si_band;
++		si->cpt_uid = info->si_fd;
++		break;
++	case __SI_FAULT:
++		si->cpt_sigval = cpt_ptr_export(info->si_addr);
++#ifdef __ARCH_SI_TRAPNO
++		si->cpt_pid = info->si_trapno;
++#endif
++		break;
++	case __SI_CHLD:
++		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_PID, info->si_pid);
++		si->cpt_uid = info->si_uid;
++		si->cpt_sigval = info->si_status;
++		si->cpt_stime = info->si_stime;
++		si->cpt_utime = info->si_utime;
++		break;
++	case __SI_KILL:
++	case __SI_RT:
++	case __SI_MESGQ:
++	default:
++		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_TGID, info->si_pid);
++		si->cpt_uid = info->si_uid;
++		si->cpt_sigval = cpt_ptr_export(info->si_ptr);
++		break;
++	}
++	return 0;
++}
++
++static int dump_sigqueue(struct sigpending *list, struct cpt_context *ctx)
++{
++	struct sigqueue *q;
++	loff_t saved_obj;
++
++	if (list_empty(&list->list))
++		return 0;
++
++	cpt_push_object(&saved_obj, ctx);
++	list_for_each_entry(q, &list->list, list) {
++		struct cpt_siginfo_image si;
++
++		si.cpt_next = sizeof(si);
++		si.cpt_object = CPT_OBJ_SIGINFO;
++		si.cpt_hdrlen = sizeof(si);
++		si.cpt_content = CPT_CONTENT_VOID;
++
++		si.cpt_qflags = q->flags;
++		si.cpt_user = q->user->uid;
++
++		if (encode_siginfo(&si, &q->info))
++			return -EINVAL;
++
++		ctx->write(&si, sizeof(si), ctx);
++	}
++	cpt_pop_object(&saved_obj, ctx);
++	return 0;
++}
++
++
++
++static int dump_one_signal_struct(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	struct signal_struct *sig = obj->o_obj;
++	struct cpt_signal_image *v = cpt_get_buf(ctx);
++	task_t *tsk;
++	int i;
++
++	cpt_open_object(obj, ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_SIGNAL_STRUCT;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	if (sig->pgrp <= 0) {
++		eprintk_ctx("bad pgid\n");
++		cpt_release_buf(ctx);
++		return -EINVAL;
++	}
++	v->cpt_pgrp_type = CPT_PGRP_NORMAL;
++	read_lock(&tasklist_lock);
++	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->pgrp);
++	if (tsk == NULL)
++		v->cpt_pgrp_type = CPT_PGRP_ORPHAN;
++	read_unlock(&tasklist_lock);
++	v->cpt_pgrp = pid_type_to_vpid(PIDTYPE_PGID, sig->pgrp);
++
++	v->cpt_old_pgrp = 0;
++	if (sig->tty_old_pgrp < 0) {
++		eprintk_ctx("bad tty_old_pgrp\n");
++		cpt_release_buf(ctx);
++		return -EINVAL;
++	}
++	if (sig->tty_old_pgrp > 0) {
++		v->cpt_old_pgrp_type = CPT_PGRP_NORMAL;
++		read_lock(&tasklist_lock);
++		tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->tty_old_pgrp);
++		if (tsk == NULL) {
++			v->cpt_old_pgrp_type = CPT_PGRP_ORPHAN;
++			tsk = find_task_by_pid_type_ve(PIDTYPE_PGID, sig->tty_old_pgrp);
++		}
++		read_unlock(&tasklist_lock);
++		if (tsk == NULL) {
++			eprintk_ctx("tty_old_pgrp does not exist anymore\n");
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++		v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, sig->tty_old_pgrp);
++		if ((int)v->cpt_old_pgrp < 0) {
++			dprintk_ctx("stray tty_old_pgrp %d\n", sig->tty_old_pgrp);
++			v->cpt_old_pgrp = -1;
++			v->cpt_old_pgrp_type = CPT_PGRP_STRAY;
++		}
++	}
++
++	if (sig->session <= 0) {
++		eprintk_ctx("bad session\n");
++		cpt_release_buf(ctx);
++		return -EINVAL;
++	}
++	v->cpt_session_type = CPT_PGRP_NORMAL;
++	read_lock(&tasklist_lock);
++	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->session);
++	if (tsk == NULL)
++		v->cpt_session_type = CPT_PGRP_ORPHAN;
++	read_unlock(&tasklist_lock);
++	v->cpt_session = pid_type_to_vpid(PIDTYPE_SID, sig->session);
++
++	v->cpt_leader = sig->leader;
++	v->cpt_ctty = CPT_NULL;
++	if (sig->tty) {
++		cpt_object_t *cobj = lookup_cpt_object(CPT_OBJ_TTY, sig->tty, ctx);
++		if (cobj)
++			v->cpt_ctty = cobj->o_pos;
++		else {
++			eprintk_ctx("controlling tty is not found\n");
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++	}
++	memcpy(&v->cpt_sigpending, &sig->shared_pending.signal, 8);
++
++	v->cpt_curr_target = 0;
++	if (sig->curr_target)
++		v->cpt_curr_target = virt_pid(sig->curr_target);
++	v->cpt_group_exit = ((sig->flags & SIGNAL_GROUP_EXIT) != 0);
++	v->cpt_group_exit_code = sig->group_exit_code;
++	v->cpt_group_exit_task = 0;
++	if (sig->group_exit_task)
++		v->cpt_group_exit_task = virt_pid(sig->group_exit_task);
++	v->cpt_notify_count = sig->notify_count;
++	v->cpt_group_stop_count = sig->group_stop_count;
++
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,8)
++	v->cpt_utime = sig->utime;
++	v->cpt_stime = sig->stime;
++	v->cpt_cutime = sig->cutime;
++	v->cpt_cstime = sig->cstime;
++	v->cpt_nvcsw = sig->nvcsw;
++	v->cpt_nivcsw = sig->nivcsw;
++	v->cpt_cnvcsw = sig->cnvcsw;
++	v->cpt_cnivcsw = sig->cnivcsw;
++	v->cpt_min_flt = sig->min_flt;
++	v->cpt_maj_flt = sig->maj_flt;
++	v->cpt_cmin_flt = sig->cmin_flt;
++	v->cpt_cmaj_flt = sig->cmaj_flt;
++
++	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
++		__asm__("undefined\n");
++
++	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
++		if (i < RLIM_NLIMITS) {
++			v->cpt_rlim_cur[i] = sig->rlim[i].rlim_cur;
++			v->cpt_rlim_max[i] = sig->rlim[i].rlim_max;
++		} else {
++			v->cpt_rlim_cur[i] = CPT_NULL;
++			v->cpt_rlim_max[i] = CPT_NULL;
++		}
++	}
++#endif
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	dump_sigqueue(&sig->shared_pending, ctx);
++
++	cpt_close_object(ctx);
++	return 0;
++}
++
++
++static int dump_one_process(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	task_t *tsk = obj->o_obj;
++	int last_thread;
++	struct cpt_task_image *v = cpt_get_buf(ctx);
++	cpt_object_t *tobj;
++	cpt_object_t *tg_obj;
++	loff_t saved_obj;
++	int i;
++	int err;
++	struct timespec delta;
++
++	cpt_open_object(obj, ctx);
++
++	v->cpt_signal = CPT_NULL;
++	tg_obj = lookup_cpt_object(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx);
++	if (!tg_obj) BUG();
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_TASK;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	v->cpt_state = tsk->state;
++	if (tsk->state == EXIT_ZOMBIE) {
++		eprintk_ctx("invalid zombie state on" CPT_FID "\n", CPT_TID(tsk));
++		cpt_release_buf(ctx);
++		return -EINVAL;
++	} else if (tsk->state == EXIT_DEAD) {
++		if (tsk->exit_state != EXIT_DEAD &&
++		    tsk->exit_state != EXIT_ZOMBIE) {
++			eprintk_ctx("invalid exit_state %ld on" CPT_FID "\n", tsk->exit_state, CPT_TID(tsk));
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++	}
++	if (tsk->exit_state) {
++		v->cpt_state = tsk->exit_state;
++		if (tsk->state != EXIT_DEAD) {
++			eprintk_ctx("invalid tsk->state %ld/%ld on" CPT_FID "\n",
++				tsk->state, tsk->exit_state, CPT_TID(tsk));
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++	}
++	v->cpt_flags = tsk->flags&~PF_FROZEN;
++	v->cpt_ptrace = tsk->ptrace;
++	v->cpt_prio = tsk->prio;
++	v->cpt_exit_code = tsk->exit_code;
++	v->cpt_exit_signal = tsk->exit_signal;
++	v->cpt_pdeath_signal = tsk->pdeath_signal;
++	v->cpt_static_prio = tsk->static_prio;
++	v->cpt_rt_priority = tsk->rt_priority;
++	v->cpt_policy = tsk->policy;
++	if (v->cpt_policy != SCHED_NORMAL) {
++		eprintk_ctx("scheduler policy is not supported %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
++		cpt_release_buf(ctx);
++		return -EINVAL;
++	}
++
++	v->cpt_mm = CPT_NULL;
++	if (tsk->mm) {
++		tobj = lookup_cpt_object(CPT_OBJ_MM, tsk->mm, ctx);
++		if (!tobj) BUG();
++		v->cpt_mm = tobj->o_pos;
++	}
++	v->cpt_files = CPT_NULL;
++	if (tsk->files) {
++		tobj = lookup_cpt_object(CPT_OBJ_FILES, tsk->files, ctx);
++		if (!tobj) BUG();
++		v->cpt_files = tobj->o_pos;
++	}
++	v->cpt_fs = CPT_NULL;
++	if (tsk->fs) {
++		tobj = lookup_cpt_object(CPT_OBJ_FS, tsk->fs, ctx);
++		if (!tobj) BUG();
++		v->cpt_fs = tobj->o_pos;
++	}
++	v->cpt_namespace = CPT_NULL;
++	if (tsk->namespace) {
++		tobj = lookup_cpt_object(CPT_OBJ_NAMESPACE, tsk->namespace, ctx);
++		if (!tobj) BUG();
++		v->cpt_namespace = tobj->o_pos;
++
++		if (tsk->namespace != current->namespace)
++			eprintk_ctx("namespaces are not supported: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
++	}
++	v->cpt_sysvsem_undo = CPT_NULL;
++	if (tsk->sysvsem.undo_list && !tsk->exit_state) {
++		tobj = lookup_cpt_object(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx);
++		if (!tobj) BUG();
++		v->cpt_sysvsem_undo = tobj->o_pos;
++	}
++	v->cpt_sighand = CPT_NULL;
++	if (tsk->sighand) {
++		tobj = lookup_cpt_object(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx);
++		if (!tobj) BUG();
++		v->cpt_sighand = tobj->o_pos;
++	}
++	v->cpt_sigblocked = cpt_sigset_export(&tsk->blocked);
++	v->cpt_sigrblocked = cpt_sigset_export(&tsk->real_blocked);
++	v->cpt_sigsuspend_blocked = cpt_sigset_export(&tsk->saved_sigmask);
++
++	v->cpt_pid = virt_pid(tsk);
++	v->cpt_tgid = virt_tgid(tsk);
++	v->cpt_ppid = 0;
++	if (tsk->parent) {
++		if (tsk->parent != tsk->real_parent &&
++		    !lookup_cpt_object(CPT_OBJ_TASK, tsk->parent, ctx)) {
++			eprintk_ctx("task %d/%d(%s) is ptraced from ve0\n", tsk->pid, virt_pid(tsk), tsk->comm);
++			cpt_release_buf(ctx);
++			return -EBUSY;
++		}
++		v->cpt_ppid = virt_pid(tsk->parent);
++	}
++	v->cpt_rppid = tsk->real_parent ? virt_pid(tsk->real_parent) : 0;
++	v->cpt_pgrp = virt_pgid(tsk);
++	v->cpt_session = virt_sid(tsk);
++	v->cpt_old_pgrp = 0;
++	if (tsk->signal->tty_old_pgrp)
++		v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tsk->signal->tty_old_pgrp);
++	v->cpt_leader = tsk->group_leader ? virt_pid(tsk->group_leader) : 0;
++	v->cpt_set_tid = (unsigned long)tsk->set_child_tid;
++	v->cpt_clear_tid = (unsigned long)tsk->clear_child_tid;
++	memcpy(v->cpt_comm, tsk->comm, 16);
++	v->cpt_user = tsk->user->uid;
++	v->cpt_uid = tsk->uid;
++	v->cpt_euid = tsk->euid;
++	v->cpt_suid = tsk->suid;
++	v->cpt_fsuid = tsk->fsuid;
++	v->cpt_gid = tsk->gid;
++	v->cpt_egid = tsk->egid;
++	v->cpt_sgid = tsk->sgid;
++	v->cpt_fsgid = tsk->fsgid;
++	v->cpt_ngids = 0;
++	if (tsk->group_info && tsk->group_info->ngroups != 0) {
++		int i = tsk->group_info->ngroups;
++		if (i > 32) {
++			/* Shame... I did a simplified version and _forgot_
++			 * about this. Later, later. */
++			eprintk_ctx("too many of groups " CPT_FID "\n", CPT_TID(tsk));
++			return -EINVAL;
++		}
++		v->cpt_ngids = i;
++		for (i--; i>=0; i--)
++			v->cpt_gids[i] = tsk->group_info->small_block[i];
++	}
++	memcpy(&v->cpt_ecap, &tsk->cap_effective, 8);
++	memcpy(&v->cpt_icap, &tsk->cap_inheritable, 8);
++	memcpy(&v->cpt_pcap, &tsk->cap_permitted, 8);
++	v->cpt_keepcap = tsk->keep_capabilities;
++
++	v->cpt_did_exec = tsk->did_exec;
++	v->cpt_exec_domain = -1;
++	v->cpt_thrflags = tsk->thread_info->flags & ~(1<<TIF_FREEZE);
++	v->cpt_64bit = 0;
++#ifdef CONFIG_X86_64
++	/* Clear x86_64 specific flags */
++	v->cpt_thrflags &= ~(_TIF_FORK|_TIF_ABI_PENDING|_TIF_IA32);
++	if (!(tsk->thread_info->flags & _TIF_IA32)) {
++		ctx->tasks64++;
++		v->cpt_64bit = 1;
++	}
++#endif
++	v->cpt_thrstatus = tsk->thread_info->status;
++	v->cpt_addr_limit = -1;
++
++	v->cpt_personality = tsk->personality;
++
++	for (i=0; i<GDT_ENTRY_TLS_ENTRIES; i++) {
++		if (i>=3) {
++			eprintk_ctx("too many tls descs\n");
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++#ifndef CONFIG_X86_64
++		v->cpt_tls[i] = (((u64)tsk->thread.tls_array[i].b)<<32) + tsk->thread.tls_array[i].a;
++#else
++		v->cpt_tls[i] = tsk->thread.tls_array[i];
++#endif
++	}
++
++	v->cpt_restart.fn = CPT_RBL_0;
++	if (tsk->thread_info->restart_block.fn != current->thread_info->restart_block.fn) {
++		if (tsk->thread_info->restart_block.fn != nanosleep_restart
++#ifdef CONFIG_X86_64
++		    && tsk->thread_info->restart_block.fn != compat_nanosleep_restart
++#endif
++		    ) {
++			eprintk_ctx("unknown restart block %p\n", tsk->thread_info->restart_block.fn);
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++		v->cpt_restart.fn = CPT_RBL_NANOSLEEP;
++#ifdef CONFIG_X86_64
++		if (tsk->thread_info->restart_block.fn == compat_nanosleep_restart)
++			v->cpt_restart.fn = CPT_RBL_COMPAT_NANOSLEEP;
++#endif
++		v->cpt_restart.arg0 = tsk->thread_info->restart_block.arg0;
++		v->cpt_restart.arg1 = tsk->thread_info->restart_block.arg1;
++		v->cpt_restart.arg2 = tsk->thread_info->restart_block.arg2;
++		v->cpt_restart.arg3 = tsk->thread_info->restart_block.arg3;
++		if (debug_level > 2) {
++			ktime_t e, e1;
++			struct timespec now;
++
++			do_posix_clock_monotonic_gettime(&now);
++			e = timespec_to_ktime(now);
++			e1.tv64 = ((u64)tsk->thread_info->restart_block.arg1 << 32) | (u64) tsk->thread_info->restart_block.arg0;
++			e = ktime_sub(e1, e);
++			dprintk("cpt " CPT_FID " RBL %ld/%ld %Ld\n", CPT_TID(tsk),
++				tsk->thread_info->restart_block.arg1,
++				tsk->thread_info->restart_block.arg0, e.tv64);
++		}
++	}
++
++	v->cpt_it_real_incr = 0;
++	v->cpt_it_prof_incr = 0;
++	v->cpt_it_virt_incr = 0;
++	v->cpt_it_real_value = 0;
++	v->cpt_it_prof_value = 0;
++	v->cpt_it_virt_value = 0;
++	if (thread_group_leader(tsk) && tsk->exit_state == 0) {
++		ktime_t rem;
++
++		v->cpt_it_real_incr = ktime_to_ns(tsk->signal->it_real_incr);
++		v->cpt_it_prof_incr = tsk->signal->it_prof_incr;
++		v->cpt_it_virt_incr = tsk->signal->it_virt_incr;
++
++		rem = hrtimer_get_remaining(&tsk->signal->real_timer);
++
++		if (hrtimer_active(&tsk->signal->real_timer)) {
++			if (rem.tv64 <= 0)
++				rem.tv64 = NSEC_PER_USEC;
++			v->cpt_it_real_value = ktime_to_ns(rem);
++			dprintk("cpt itimer " CPT_FID " %Lu\n", CPT_TID(tsk), v->cpt_it_real_value);
++		}
++		v->cpt_it_prof_value = tsk->signal->it_prof_expires;
++		v->cpt_it_virt_value = tsk->signal->it_virt_expires;
++	}
++	v->cpt_used_math = (tsk_used_math(tsk) != 0);
++
++	if (tsk->notifier) {
++		eprintk_ctx("task notifier is in use: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
++		cpt_release_buf(ctx);
++		return -EINVAL;
++	}
++
++	v->cpt_utime = tsk->utime;
++	v->cpt_stime = tsk->stime;
++	delta = tsk->start_time;
++	_set_normalized_timespec(&delta,
++			delta.tv_sec - get_exec_env()->init_entry->start_time.tv_sec,
++			delta.tv_nsec - get_exec_env()->init_entry->start_time.tv_nsec);
++	v->cpt_starttime = cpt_timespec_export(&delta);
++	v->cpt_nvcsw = tsk->nvcsw;
++	v->cpt_nivcsw = tsk->nivcsw;
++	v->cpt_min_flt = tsk->min_flt;
++	v->cpt_maj_flt = tsk->maj_flt;
++
++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
++	v->cpt_cutime = tsk->cutime;
++	v->cpt_cstime = tsk->cstime;
++	v->cpt_cnvcsw = tsk->cnvcsw;
++	v->cpt_cnivcsw = tsk->cnivcsw;
++	v->cpt_cmin_flt = tsk->cmin_flt;
++	v->cpt_cmaj_flt = tsk->cmaj_flt;
++
++	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
++		__asm__("undefined\n");
++
++	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
++		if (i < RLIM_NLIMITS) {
++			v->cpt_rlim_cur[i] = tsk->rlim[i].rlim_cur;
++			v->cpt_rlim_max[i] = tsk->rlim[i].rlim_max;
++		} else {
++			v->cpt_rlim_cur[i] = CPT_NULL;
++			v->cpt_rlim_max[i] = CPT_NULL;
++		}
++	}
++#else
++	v->cpt_cutime = tsk->signal->cutime;
++	v->cpt_cstime = tsk->signal->cstime;
++	v->cpt_cnvcsw = tsk->signal->cnvcsw;
++	v->cpt_cnivcsw = tsk->signal->cnivcsw;
++	v->cpt_cmin_flt = tsk->signal->cmin_flt;
++	v->cpt_cmaj_flt = tsk->signal->cmaj_flt;
++
++	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
++		__asm__("undefined\n");
++
++	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
++		if (i < RLIM_NLIMITS) {
++			v->cpt_rlim_cur[i] = tsk->signal->rlim[i].rlim_cur;
++			v->cpt_rlim_max[i] = tsk->signal->rlim[i].rlim_max;
++		} else {
++			v->cpt_rlim_cur[i] = CPT_NULL;
++			v->cpt_rlim_max[i] = CPT_NULL;
++		}
++	}
++#endif
++
++	if (tsk->mm)
++		v->cpt_mm_ub = cpt_lookup_ubc(tsk->mm->mm_ub, ctx);
++	else
++		v->cpt_mm_ub = CPT_NULL;
++	v->cpt_task_ub = cpt_lookup_ubc(tsk->task_bc.task_ub, ctx);
++	v->cpt_exec_ub = cpt_lookup_ubc(tsk->task_bc.exec_ub, ctx);
++	v->cpt_fork_sub = cpt_lookup_ubc(tsk->task_bc.fork_sub, ctx);
++
++	v->cpt_ptrace_message = tsk->ptrace_message;
++	v->cpt_pn_state = tsk->pn_state;
++	v->cpt_stopped_state = tsk->stopped_state;
++	v->cpt_sigsuspend_state = 0;
++
++#ifndef CONFIG_X86_64
++	if (tsk->thread.vm86_info) {
++		eprintk_ctx("vm86 task is running\n");
++		cpt_release_buf(ctx);
++		return -EBUSY;
++	}
++#endif
++
++	v->cpt_sigpending = cpt_sigset_export(&tsk->pending.signal);
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	cpt_push_object(&saved_obj, ctx);
++	dump_kstack(tsk, ctx);
++	cpt_pop_object(&saved_obj, ctx);
++
++	cpt_push_object(&saved_obj, ctx);
++	err = dump_registers(tsk, ctx);
++	cpt_pop_object(&saved_obj, ctx);
++	if (err)
++		return err;
++
++	if (tsk_used_math(tsk)) {
++		cpt_push_object(&saved_obj, ctx);
++		dump_fpustate(tsk, ctx);
++		cpt_pop_object(&saved_obj, ctx);
++	}
++
++	if (tsk->last_siginfo) {
++		struct cpt_siginfo_image si;
++		cpt_push_object(&saved_obj, ctx);
++
++		si.cpt_next = sizeof(si);
++		si.cpt_object = CPT_OBJ_LASTSIGINFO;
++		si.cpt_hdrlen = sizeof(si);
++		si.cpt_content = CPT_CONTENT_VOID;
++
++		if (encode_siginfo(&si, tsk->last_siginfo))
++			return -EINVAL;
++
++		ctx->write(&si, sizeof(si), ctx);
++		cpt_pop_object(&saved_obj, ctx);
++	}
++
++	if (tsk->sas_ss_size) {
++		struct cpt_sigaltstack_image si;
++		cpt_push_object(&saved_obj, ctx);
++
++		si.cpt_next = sizeof(si);
++		si.cpt_object = CPT_OBJ_SIGALTSTACK;
++		si.cpt_hdrlen = sizeof(si);
++		si.cpt_content = CPT_CONTENT_VOID;
++
++		si.cpt_stack = tsk->sas_ss_sp;
++		si.cpt_stacksize = tsk->sas_ss_size;
++
++		ctx->write(&si, sizeof(si), ctx);
++		cpt_pop_object(&saved_obj, ctx);
++	}
++
++	dump_sigqueue(&tsk->pending, ctx);
++
++	last_thread = 1;
++	read_lock(&tasklist_lock);
++	do {
++		task_t * next = next_thread(tsk);
++		if (next != tsk && !thread_group_leader(next))
++			last_thread = 0;
++	} while (0);
++	read_unlock(&tasklist_lock);
++
++	if (last_thread) {
++		task_t *prev_tsk;
++		int err;
++		loff_t pos = ctx->file->f_pos;
++
++		cpt_push_object(&saved_obj, ctx);
++		err = dump_one_signal_struct(tg_obj, ctx);
++		cpt_pop_object(&saved_obj, ctx);
++		if (err)
++			return err;
++
++		prev_tsk = tsk;
++		for (;;) {
++			if (prev_tsk->tgid == tsk->tgid) {
++				loff_t tg_pos;
++
++				tg_pos = obj->o_pos + offsetof(struct cpt_task_image, cpt_signal);
++				ctx->pwrite(&pos, sizeof(pos), ctx, tg_pos);
++				if (thread_group_leader(prev_tsk))
++					break;
++			}
++
++			if (obj->o_list.prev == &ctx->object_array[CPT_OBJ_TASK]) {
++				eprintk_ctx("bug: thread group leader is lost\n");
++				return -EINVAL;
++			}
++
++			obj = list_entry(obj->o_list.prev, cpt_object_t, o_list);
++			prev_tsk = obj->o_obj;
++		}
++	}
++
++	cpt_close_object(ctx);
++	return 0;
++}
++
++int cpt_dump_tasks(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	cpt_open_section(ctx, CPT_SECT_TASKS);
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		int err;
++
++		if ((err = dump_one_process(obj, ctx)) != 0)
++			return err;
++	}
++
++	cpt_close_section(ctx);
++	return 0;
++}
++
++int cpt_collect_signals(cpt_context_t *ctx)
++{
++	cpt_object_t *obj;
++
++	/* Collect process fd sets */
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++		if (tsk->signal && !list_empty(&tsk->signal->posix_timers)) {
++			eprintk_ctx("task %d/%d(%s) uses posix timers\n", tsk->pid, virt_pid(tsk), tsk->comm);
++			return -EBUSY;
++		}
++		if (tsk->signal && cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx) == NULL)
++			return -ENOMEM;
++		if (tsk->sighand && cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx) == NULL)
++			return -ENOMEM;
++	}
++	return 0;
++}
++
++
++static int dump_one_sighand_struct(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	struct sighand_struct *sig = obj->o_obj;
++	struct cpt_sighand_image *v = cpt_get_buf(ctx);
++	int i;
++
++	cpt_open_object(obj, ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_SIGHAND_STRUCT;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	for (i=0; i< _NSIG; i++) {
++		if (sig->action[i].sa.sa_handler != SIG_DFL) {
++			loff_t saved_obj;
++			struct cpt_sighandler_image *o = cpt_get_buf(ctx);
++
++			cpt_push_object(&saved_obj, ctx);
++			cpt_open_object(NULL, ctx);
++
++			o->cpt_next = CPT_NULL;
++			o->cpt_object = CPT_OBJ_SIGHANDLER;
++			o->cpt_hdrlen = sizeof(*o);
++			o->cpt_content = CPT_CONTENT_VOID;
++
++			o->cpt_signo = i;
++			o->cpt_handler = (unsigned long)sig->action[i].sa.sa_handler;
++			o->cpt_restorer = (unsigned long)sig->action[i].sa.sa_restorer;
++			o->cpt_flags = sig->action[i].sa.sa_flags;
++			memcpy(&o->cpt_mask, &sig->action[i].sa.sa_mask, 8);
++			ctx->write(o, sizeof(*o), ctx);
++			cpt_release_buf(ctx);
++			cpt_close_object(ctx);
++			cpt_pop_object(&saved_obj, ctx);
++		}
++	}
++
++	cpt_close_object(ctx);
++	return 0;
++}
++
++int cpt_dump_sighand(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	cpt_open_section(ctx, CPT_SECT_SIGHAND_STRUCT);
++
++	for_each_object(obj, CPT_OBJ_SIGHAND_STRUCT) {
++		int err;
++
++		if ((err = dump_one_sighand_struct(obj, ctx)) != 0)
++			return err;
++	}
++
++	cpt_close_section(ctx);
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_process.h linux-2.6.16-026test015/kernel/cpt/cpt_process.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_process.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_process.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,10 @@
++int cpt_collect_signals(cpt_context_t *);
++int cpt_dump_signal(struct cpt_context *);
++int cpt_dump_sighand(struct cpt_context *);
++int cpt_dump_tasks(struct cpt_context *);
++
++int rst_signal_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
++__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
++
++int rst_restore_process(struct cpt_context *ctx);
++int rst_process_linkage(struct cpt_context *ctx);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_socket.c linux-2.6.16-026test015/kernel/cpt/cpt_socket.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_socket.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_socket.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,779 @@
++/*
++ *
++ *  kernel/cpt/cpt_socket.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/socket.h>
++#include <linux/un.h>
++#include <linux/tcp.h>
++#include <net/sock.h>
++#include <net/scm.h>
++#include <net/af_unix.h>
++#include <net/tcp.h>
++#include <net/netlink_sock.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_socket.h"
++#include "cpt_files.h"
++#include "cpt_kernel.h"
++
++static int dump_rqueue(int owner, struct sock *sk, struct cpt_context *ctx);
++
++
++/* Sockets are quite different of another kinds of files.
++ * There is one simplification: only one struct file can refer to a socket,
++ * so we could store information about socket directly in section FILES as
++ * a description of a file and append f.e. array of not-yet-accepted
++ * connections of listening socket as array of auxiliary data.
++ *
++ * Complications are:
++ * 1. TCP sockets can be orphans. We have to relocate orphans as well,
++ *    so we have to create special section for orphans.
++ * 2. AF_UNIX sockets are distinguished objects: set of links between
++ *    AF_UNIX sockets is quite arbitrary.
++ *    A. Each socket can refers to many of files due to FD passing.
++ *    B. Each socket except for connected ones can have in queue skbs
++ *       sent by any of sockets.
++ *
++ *    2A is relatively easy: after our tasks are frozen we make an additional
++ *    recursive pass throgh set of collected files and get referenced to
++ *    FD passed files. After end of recursion, all the files are treated
++ *    in the same way. All they will be stored in section FILES.
++ *
++ *    2B. We have to resolve all those references at some point.
++ *    It is the place where pipe-like approach to image fails.
++ *
++ * All this makes socket checkpointing quite chumbersome.
++ * Right now we collect all the sockets and assign some numeric index value
++ * to each of them. The socket section is separate and put after section FILES,
++ * so section FILES refers to sockets by index, section SOCKET refers to FILES
++ * as usual by position in image. All the refs inside socket section are
++ * by index. When restoring we read socket section, create objects to hold
++ * mappings index <-> pos. At the second pass we open sockets (simultaneosly
++ * with their pairs) and create FILE objects.
++ */ 
++
++
++/* ====== FD passing ====== */
++
++/* Almost nobody does FD passing via AF_UNIX sockets, nevertheless we
++ * have to implement this. A problem is that in general case we receive
++ * skbs from an unknown context, so new files can arrive to checkpointed
++ * set of processes even after they are stopped. Well, we are going just
++ * to ignore unknown fds while doing real checkpointing. It is fair because
++ * links outside checkpointed set are going to fail anyway.
++ *
++ * ATTN: the procedure is recursive. We linearize the recursion adding
++ * newly found files to the end of file list, so they will be analyzed
++ * in the same loop.
++ */
++
++static int collect_one_passedfd(struct file *file, cpt_context_t * ctx)
++{
++	struct inode *inode = file->f_dentry->d_inode;
++	struct socket *sock;
++	struct sock *sk;
++	struct sk_buff *skb;
++
++	if (!S_ISSOCK(inode->i_mode))
++		return -ENOTSOCK;
++
++	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
++
++	if (sock->ops->family != AF_UNIX)
++		return 0;
++
++	sk = sock->sk;
++
++	/* Subtle locking issue. skbs cannot be removed while
++	 * we are scanning, because all the processes are stopped.
++	 * They still can be added to tail of queue. Locking while
++	 * we dereference skb->next is enough to resolve this.
++	 * See above about collision with skbs added after we started
++	 * checkpointing.
++	 */
++
++	skb = skb_peek(&sk->sk_receive_queue);
++	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
++		if (UNIXCB(skb).fp && skb->sk &&
++		    (!sock_flag(skb->sk, SOCK_DEAD) || unix_peer(sk) == skb->sk)) {
++			struct scm_fp_list *fpl = UNIXCB(skb).fp;
++			int i;
++
++			for (i = fpl->count-1; i >= 0; i--) {
++				if (cpt_object_add(CPT_OBJ_FILE, fpl->fp[i], ctx) == NULL)
++					return -ENOMEM;
++			}
++		}
++
++		spin_lock_irq(&sk->sk_receive_queue.lock);
++		skb = skb->next;
++		spin_unlock_irq(&sk->sk_receive_queue.lock);
++	}
++
++	return 0;
++}
++
++int cpt_collect_passedfds(cpt_context_t * ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_FILE) {
++		struct file *file = obj->o_obj;
++
++		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
++			int err;
++
++			if ((err = collect_one_passedfd(file, ctx)) < 0)
++				return err;
++		}
++	}
++
++	return 0;
++}
++
++/* ====== End of FD passing ====== */
++
++/* Must be called under bh_lock_sock() */
++
++void clear_backlog(struct sock *sk)
++{
++	struct sk_buff *skb = sk->sk_backlog.head;
++
++	sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
++	while (skb) {
++		struct sk_buff *next = skb->next;
++
++		skb->next = NULL;
++		kfree_skb(skb);
++		skb = next;
++	}
++}
++
++void release_sock_nobacklog(struct sock *sk)
++{
++	spin_lock_bh(&(sk->sk_lock.slock));
++	clear_backlog(sk);
++	sk->sk_lock.owner = NULL;
++        if (waitqueue_active(&(sk->sk_lock.wq)))
++		wake_up(&(sk->sk_lock.wq));
++	spin_unlock_bh(&(sk->sk_lock.slock));
++}
++
++int cpt_dump_skb(int type, int owner, struct sk_buff *skb,
++		 struct cpt_context *ctx)
++{
++	struct cpt_skb_image *v = cpt_get_buf(ctx);
++	loff_t saved_obj;
++	struct timeval tmptv;
++
++	cpt_push_object(&saved_obj, ctx);
++	cpt_open_object(NULL, ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_SKB;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	v->cpt_owner = owner;
++	v->cpt_queue = type;
++	skb_get_timestamp(skb, &tmptv);
++	v->cpt_stamp = cpt_timeval_export(&tmptv);
++	v->cpt_hspace = skb->data - skb->head;
++	v->cpt_tspace = skb->end - skb->tail;
++	v->cpt_h = skb->h.raw - skb->head;
++	v->cpt_nh = skb->nh.raw - skb->head;
++	v->cpt_mac = skb->mac.raw - skb->head;
++	if (sizeof(skb->cb) < sizeof(v->cpt_cb)) BUG();
++	memcpy(v->cpt_cb, skb->cb, sizeof(v->cpt_cb));
++	if (sizeof(skb->cb) > sizeof(v->cpt_cb)) {
++		int i;
++		for (i=sizeof(v->cpt_cb); i<sizeof(skb->cb); i++) {
++			if (skb->cb[i]) {
++				wprintk_ctx("dirty skb cb");
++				break;
++			}
++		}
++	}
++	v->cpt_len = skb->len;
++	v->cpt_mac_len = skb->mac_len;
++	v->cpt_csum = skb->csum;
++	v->cpt_local_df = skb->local_df;
++	v->cpt_pkt_type = skb->pkt_type;
++	v->cpt_ip_summed = skb->ip_summed;
++	v->cpt_priority = skb->priority;
++	v->cpt_protocol = skb->protocol;
++	v->cpt_security = 0;
++	v->cpt_tso_segs = skb_shinfo(skb)->tso_segs;
++	v->cpt_tso_size = skb_shinfo(skb)->tso_size;
++	if (skb_shinfo(skb)->ufo_size) {
++		eprintk_ctx("skb ufo is not supported\n");
++		return -EINVAL;
++	}
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	if (skb->len + (skb->data - skb->head) > 0) {
++		struct cpt_obj_bits ob;
++		loff_t saved_obj2;
++
++		cpt_push_object(&saved_obj2, ctx);
++		cpt_open_object(NULL, ctx);
++		ob.cpt_next = CPT_NULL;
++		ob.cpt_object = CPT_OBJ_BITS;
++		ob.cpt_hdrlen = sizeof(ob);
++		ob.cpt_content = CPT_CONTENT_DATA;
++		ob.cpt_size = skb->len + v->cpt_hspace;
++
++		ctx->write(&ob, sizeof(ob), ctx);
++
++		ctx->write(skb->head, (skb->data-skb->head) + (skb->len-skb->data_len), ctx);
++		if (skb->data_len) {
++			int offset = skb->len - skb->data_len;
++			while (offset < skb->len) {
++				int copy = skb->len - offset;
++				if (copy > PAGE_SIZE)
++					copy = PAGE_SIZE;
++				(void)cpt_get_buf(ctx);
++				if (skb_copy_bits(skb, offset, ctx->tmpbuf, copy))
++					BUG();
++				ctx->write(ctx->tmpbuf, copy, ctx);
++				__cpt_release_buf(ctx);
++				offset += copy;
++			}
++		}
++
++		ctx->align(ctx);
++		cpt_close_object(ctx);
++		cpt_pop_object(&saved_obj2, ctx);
++	}
++
++	if (skb->sk && skb->sk->sk_family == AF_UNIX) {
++		struct scm_fp_list *fpl = UNIXCB(skb).fp;
++
++		if (fpl) {
++			int i;
++
++			for (i = 0; i < fpl->count; i++) {
++				struct cpt_fd_image v;
++				cpt_object_t *obj;
++				loff_t saved_obj2;
++
++				obj = lookup_cpt_object(CPT_OBJ_FILE, fpl->fp[i], ctx);
++
++				if (!obj) {
++					eprintk_ctx("lost passed FD\n");
++					return -EINVAL;
++				}
++
++				cpt_push_object(&saved_obj2, ctx);
++				cpt_open_object(NULL, ctx);
++				v.cpt_next = CPT_NULL;
++				v.cpt_object = CPT_OBJ_FILEDESC;
++				v.cpt_hdrlen = sizeof(v);
++				v.cpt_content = CPT_CONTENT_VOID;
++
++				v.cpt_fd = i;
++				v.cpt_file = obj->o_pos;
++				v.cpt_flags = 0;
++				ctx->write(&v, sizeof(v), ctx);
++				cpt_close_object(ctx);
++				cpt_pop_object(&saved_obj2, ctx);
++			}
++		}
++	}
++
++	cpt_close_object(ctx);
++	cpt_pop_object(&saved_obj, ctx);
++	return 0;
++}
++
++static int dump_rqueue(int idx, struct sock *sk, struct cpt_context *ctx)
++{
++	struct sk_buff *skb;
++	struct sock *sk_cache = NULL;
++
++	skb = skb_peek(&sk->sk_receive_queue);
++	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
++		int err;
++
++		if (sk->sk_family == AF_UNIX) {
++			cpt_object_t *obj;
++			if (skb->sk != sk_cache) {
++				idx = -1;
++				sk_cache = NULL;
++				obj = lookup_cpt_object(CPT_OBJ_SOCKET, skb->sk, ctx);
++				if (obj) {
++					idx = obj->o_index;
++					sk_cache = skb->sk;
++				} else if (unix_peer(sk) != skb->sk)
++					goto next_skb;
++			}
++		}
++
++		err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, ctx);
++		if (err)
++			return err;
++
++next_skb:
++		spin_lock_irq(&sk->sk_receive_queue.lock);
++		skb = skb->next;
++		spin_unlock_irq(&sk->sk_receive_queue.lock);
++	}
++	return 0;
++}
++
++static int dump_wqueue(int idx, struct sock *sk, struct cpt_context *ctx)
++{
++	struct sk_buff *skb;
++
++	skb = skb_peek(&sk->sk_write_queue);
++	while (skb && skb != (struct sk_buff*)&sk->sk_write_queue) {
++		int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, ctx);
++		if (err)
++			return err;
++
++		spin_lock_irq(&sk->sk_write_queue.lock);
++		skb = skb->next;
++		spin_unlock_irq(&sk->sk_write_queue.lock);
++	}
++	return 0;
++}
++
++void cpt_dump_sock_attr(struct sock *sk, cpt_context_t *ctx)
++{
++	loff_t saved_obj;
++	if (sk->sk_filter) {
++		struct cpt_obj_bits v;
++
++		cpt_push_object(&saved_obj, ctx);
++		cpt_open_object(NULL, ctx);
++
++		v.cpt_next = CPT_NULL;
++		v.cpt_object = CPT_OBJ_SKFILTER;
++		v.cpt_hdrlen = sizeof(v);
++		v.cpt_content = CPT_CONTENT_DATA;
++		v.cpt_size = sk->sk_filter->len*sizeof(struct sock_filter);
++
++		ctx->write(&v, sizeof(v), ctx);
++		ctx->write(sk->sk_filter->insns, v.cpt_size, ctx);
++		cpt_close_object(ctx);
++		cpt_pop_object(&saved_obj, ctx);
++	}
++	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
++		cpt_push_object(&saved_obj, ctx);
++		cpt_dump_mcfilter(sk, ctx);
++		cpt_pop_object(&saved_obj, ctx);
++	}
++}
++
++/* Dump socket content */
++
++int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx)
++{
++	struct cpt_sock_image *v = cpt_get_buf(ctx);
++	struct socket *sock;
++
++	cpt_open_object(obj, ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_SOCKET;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	v->cpt_file = CPT_NULL;
++	sock = sk->sk_socket;
++	if (sock && sock->file) {
++		cpt_object_t *tobj;
++		tobj = lookup_cpt_object(CPT_OBJ_FILE, sock->file, ctx);
++		if (tobj)
++			v->cpt_file = tobj->o_pos;
++	}
++	v->cpt_index = index;
++	v->cpt_parent = parent;
++
++	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
++		if (sock && !obj->o_lock) {
++			lock_sock(sk);
++			obj->o_lock = 1;
++		}
++	}
++
++	/* Some bits stored in inode */
++	v->cpt_ssflags = sock ? sock->flags : 0;
++	v->cpt_sstate = sock ? sock->state : 0;
++	v->cpt_passcred = sock ? test_bit(SOCK_PASSCRED, &sock->flags) : 0;
++
++	/* Common data */
++	v->cpt_family = sk->sk_family;
++	v->cpt_type = sk->sk_type;
++	v->cpt_state = sk->sk_state;
++	v->cpt_reuse = sk->sk_reuse;
++	v->cpt_zapped = sock_flag(sk, SOCK_ZAPPED);
++	v->cpt_shutdown = sk->sk_shutdown;
++	v->cpt_userlocks = sk->sk_userlocks;
++	v->cpt_no_check = sk->sk_no_check;
++	v->cpt_zapped = sock_flag(sk, SOCK_DBG);
++	v->cpt_rcvtstamp = sock_flag(sk, SOCK_RCVTSTAMP);
++	v->cpt_localroute = sock_flag(sk, SOCK_LOCALROUTE);
++	v->cpt_protocol = sk->sk_protocol;
++	v->cpt_err = sk->sk_err;
++	v->cpt_err_soft = sk->sk_err_soft;
++	v->cpt_max_ack_backlog = sk->sk_max_ack_backlog;
++	v->cpt_priority = sk->sk_priority;
++	v->cpt_rcvlowat = sk->sk_rcvlowat;
++	v->cpt_rcvtimeo = CPT_NULL;
++	if (sk->sk_rcvtimeo != MAX_SCHEDULE_TIMEOUT)
++		v->cpt_rcvtimeo = sk->sk_rcvtimeo > INT_MAX ? INT_MAX : sk->sk_rcvtimeo;
++	v->cpt_sndtimeo = CPT_NULL;
++	if (sk->sk_sndtimeo != MAX_SCHEDULE_TIMEOUT)
++		v->cpt_sndtimeo = sk->sk_sndtimeo > INT_MAX ? INT_MAX : sk->sk_sndtimeo;
++	v->cpt_rcvbuf = sk->sk_rcvbuf;
++	v->cpt_sndbuf = sk->sk_sndbuf;
++	v->cpt_bound_dev_if = sk->sk_bound_dev_if;
++	v->cpt_flags = sk->sk_flags;
++	v->cpt_lingertime = CPT_NULL;
++	if (sk->sk_lingertime != MAX_SCHEDULE_TIMEOUT)
++		v->cpt_lingertime = sk->sk_lingertime > INT_MAX ? INT_MAX : sk->sk_lingertime;
++	v->cpt_peer_pid = sk->sk_peercred.pid;
++	v->cpt_peer_uid = sk->sk_peercred.uid;
++	v->cpt_peer_gid = sk->sk_peercred.gid;
++	v->cpt_stamp = cpt_timeval_export(&sk->sk_stamp);
++
++	v->cpt_peer = -1;
++	v->cpt_socketpair = 0;
++	v->cpt_deleted = 0;
++
++	v->cpt_laddrlen = 0;
++	if (sock) {
++		int alen = sizeof(v->cpt_laddr);
++		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_laddr, &alen, 0);
++		if (err) {
++			cpt_release_buf(ctx);
++			return err;
++		}
++		v->cpt_laddrlen = alen;
++	}
++	v->cpt_raddrlen = 0;
++	if (sock) {
++		int alen = sizeof(v->cpt_raddr);
++		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_raddr, &alen, 2);
++		if (!err)
++			v->cpt_raddrlen = alen;
++	}
++
++	if (sk->sk_family == AF_UNIX) {
++		if (unix_sk(sk)->dentry) {
++			struct dentry *d = unix_sk(sk)->dentry;
++			v->cpt_deleted = !IS_ROOT(d) && d_unhashed(d);
++			if (!v->cpt_deleted) {
++				int err = 0;
++				char *path;
++				unsigned long pg = __get_free_page(GFP_KERNEL);
++
++				if (!pg) {
++					cpt_release_buf(ctx);
++					return -ENOMEM;
++				}
++
++				path = d_path(d, unix_sk(sk)->mnt, (char *)pg, PAGE_SIZE);
++
++				if (!IS_ERR(path)) {
++					int len = strlen(path);
++					if (len < 126) {
++						strcpy(((char*)v->cpt_laddr)+2, path); 
++						v->cpt_laddrlen = len + 2;
++					} else {
++						wprintk_ctx("af_unix path is too long: %s (%s)\n", path, ((char*)v->cpt_laddr)+2);
++					}
++					err = cpt_verify_overmount(path, d, unix_sk(sk)->mnt, ctx);
++				} else {
++					eprintk_ctx("cannot get path of an af_unix socket\n");
++					err = PTR_ERR(path);
++				}
++				free_page(pg);
++				if (err) {
++					cpt_release_buf(ctx);
++					return err;
++				}
++			}
++		}
++
++		/* If the socket is connected, find its peer. If peer is not
++		 * in our table, the socket is connected to external process
++		 * and we consider it disconnected.
++		 */
++		if (unix_peer(sk)) {
++			cpt_object_t *pobj;
++			pobj = lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(sk), ctx);
++			if (pobj)
++				v->cpt_peer = pobj->o_index;
++			else
++				v->cpt_shutdown = SHUTDOWN_MASK;
++
++			if (unix_peer(unix_peer(sk)) == sk)
++				v->cpt_socketpair = 1;
++		}
++
++		/* If the socket shares address with another socket it is
++		 * child of some listening socket. Find and record it. */
++		if (unix_sk(sk)->addr &&
++		    atomic_read(&unix_sk(sk)->addr->refcnt) > 1 &&
++		    sk->sk_state != TCP_LISTEN) {
++			cpt_object_t *pobj;
++			for_each_object(pobj, CPT_OBJ_SOCKET) {
++				struct sock *psk = pobj->o_obj;
++				if (psk->sk_family == AF_UNIX &&
++				    psk->sk_state == TCP_LISTEN &&
++				    unix_sk(psk)->addr == unix_sk(sk)->addr) {
++					v->cpt_parent = pobj->o_index;
++					break;
++				}
++			}
++		}
++	}
++
++	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
++		cpt_dump_socket_in(v, sk, ctx);
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	cpt_dump_sock_attr(sk, ctx);
++
++	dump_rqueue(index, sk, ctx);
++	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
++		dump_wqueue(index, sk, ctx);
++		cpt_dump_ofo_queue(index, sk, ctx);
++	}
++
++	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
++	    && sk->sk_state == TCP_LISTEN)
++		cpt_dump_synwait_queue(sk, index, ctx);
++
++	cpt_close_object(ctx);
++
++	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
++	    && sk->sk_state == TCP_LISTEN)
++		cpt_dump_accept_queue(sk, index, ctx);
++
++	return 0;
++}
++
++int cpt_dump_orphaned_sockets(struct cpt_context *ctx)
++{
++	int i;
++
++	cpt_open_section(ctx, CPT_SECT_ORPHANS);
++
++	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
++		struct sock *sk;
++		struct hlist_node *node;
++
++retry:
++		read_lock_bh(&tcp_hashinfo.ehash[i].lock);
++		sk_for_each(sk, node, &tcp_hashinfo.ehash[i].chain) {
++
++			if (VE_OWNER_SK(sk) != get_exec_env())
++				continue;
++			if (sk->sk_socket)
++				continue;
++			if (!sock_flag(sk, SOCK_DEAD))
++				continue;
++			if (lookup_cpt_object(CPT_OBJ_SOCKET, sk, ctx))
++				continue;
++			sock_hold(sk);
++			read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
++
++			local_bh_disable();
++			bh_lock_sock(sk);
++			if (sock_owned_by_user(sk))
++				eprintk_ctx("BUG: sk locked by whom?\n");
++			sk->sk_lock.owner = (void *)1;
++			bh_unlock_sock(sk);
++			local_bh_enable();
++
++			cpt_dump_socket(NULL, sk, -1, -1, ctx);
++
++			local_bh_disable();
++			bh_lock_sock(sk);
++			sk->sk_lock.owner = NULL;
++			clear_backlog(sk);
++			tcp_done(sk);
++			bh_unlock_sock(sk);
++			local_bh_enable();
++			sock_put(sk);
++
++			goto retry;
++		}
++		read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
++	}
++	cpt_close_section(ctx);
++	return 0;
++}
++
++static int can_dump(struct sock *sk, cpt_context_t *ctx)
++{
++	switch (sk->sk_family) {
++	case AF_NETLINK:
++		if (((struct netlink_sock *)sk)->cb) {
++			eprintk_ctx("netlink socket has active callback\n");
++			return 0;
++		}
++		break;
++	}
++	return 1;
++}
++
++/* We are not going to block suspend when we have external AF_UNIX connections.
++ * But we cannot stop feed of new packets/connections to our environment
++ * from outside. Taking into account that it is intrincically unreliable,
++ * we collect some amount of data, but when checkpointing/restoring we
++ * are going to drop everything, which does not make sense: skbs sent
++ * by outside processes, connections from outside etc. etc.
++ */
++
++/* The first pass. When we see socket referenced by a file, we just
++ * add it to socket table */
++int cpt_collect_socket(struct file *file, cpt_context_t * ctx)
++{
++	cpt_object_t *obj;
++	struct socket *sock;
++	struct sock *sk;
++
++	if (!S_ISSOCK(file->f_dentry->d_inode->i_mode))
++		return -ENOTSOCK;
++	sock = &container_of(file->f_dentry->d_inode, struct socket_alloc, vfs_inode)->socket;
++	sk = sock->sk;
++	if (!can_dump(sk, ctx))
++		return -EBUSY;
++	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sk, ctx)) == NULL)
++		return -ENOMEM;
++	obj->o_parent = file;
++
++	return 0;
++}
++
++/*
++ * We should end with table containing:
++ *  * all sockets opened by our processes in the table.
++ *  * all the sockets queued in listening queues on _our_ listening sockets,
++ *    which are connected to our opened sockets.
++ */
++
++static int collect_one_unix_listening_sock(cpt_object_t *obj, cpt_context_t * ctx)
++{
++	struct sock *sk = obj->o_obj;
++	cpt_object_t *cobj;
++	struct sk_buff *skb;
++
++	skb = skb_peek(&sk->sk_receive_queue);
++	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
++		struct sock *lsk = skb->sk;
++		if (unix_peer(lsk) &&
++		    lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(lsk), ctx)) {
++			if ((cobj = cpt_object_add(CPT_OBJ_SOCKET, lsk, ctx)) == NULL)
++				return -ENOMEM;
++			cobj->o_parent = obj->o_parent;
++		}
++		spin_lock_irq(&sk->sk_receive_queue.lock);
++		skb = skb->next;
++		spin_unlock_irq(&sk->sk_receive_queue.lock);
++	}
++
++	return 0;
++}
++
++int cpt_index_sockets(cpt_context_t * ctx)
++{
++	cpt_object_t *obj;
++	unsigned long index = 0;
++
++	/* Collect not-yet-accepted children of listening sockets. */
++	for_each_object(obj, CPT_OBJ_SOCKET) {
++		struct sock *sk = obj->o_obj;
++
++		if (sk->sk_state != TCP_LISTEN)
++			continue;
++
++		if (sk->sk_family == AF_UNIX)
++			collect_one_unix_listening_sock(obj, ctx);
++	}
++
++	/* Assign indices to all the sockets. */
++	for_each_object(obj, CPT_OBJ_SOCKET) {
++		struct sock *sk = obj->o_obj;
++		cpt_obj_setindex(obj, index++, ctx);
++
++		if (sk->sk_socket && sk->sk_socket->file) {
++			cpt_object_t *tobj;
++			tobj = lookup_cpt_object(CPT_OBJ_FILE, sk->sk_socket->file, ctx);
++			if (tobj)
++				cpt_obj_setindex(tobj, obj->o_index, ctx);
++		}
++	}
++
++	return 0;
++}
++
++void cpt_unlock_sockets(cpt_context_t * ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_SOCKET) {
++		struct sock *sk = obj->o_obj;
++		if (sk && obj->o_lock) {
++			if (sk->sk_socket)
++				release_sock(sk);
++		}
++	}
++}
++
++void cpt_kill_sockets(cpt_context_t * ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_SOCKET) {
++		struct sock *sk = obj->o_obj;
++		if (sk && obj->o_lock) {
++			cpt_kill_socket(sk, ctx);
++			if (sk->sk_socket)
++				release_sock_nobacklog(sk);
++		}
++	}
++}
++
++__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx)
++{
++	struct fasync_struct *fa;
++	struct inode *inode = file->f_dentry->d_inode;
++	struct socket *sock;
++
++	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
++
++	for (fa = sock->fasync_list; fa; fa = fa->fa_next) {
++		if (fa->fa_file == file)
++			return fa->fa_fd;
++	}
++	return -1;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_socket.h linux-2.6.16-026test015/kernel/cpt/cpt_socket.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_socket.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_socket.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,33 @@
++struct sock;
++
++int cpt_collect_passedfds(cpt_context_t *);
++int cpt_index_sockets(cpt_context_t *);
++int cpt_collect_socket(struct file *, cpt_context_t *);
++int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx);
++int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx);
++int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx);
++int rst_sockets(struct cpt_context *ctx);
++int rst_sockets_complete(struct cpt_context *ctx);
++int cpt_dump_orphaned_sockets(struct cpt_context *ctx);
++
++int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx);
++struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx);
++
++void cpt_unlock_sockets(cpt_context_t *);
++void cpt_kill_sockets(cpt_context_t *);
++
++
++int cpt_kill_socket(struct sock *, cpt_context_t *);
++int cpt_dump_socket_in(struct cpt_sock_image *, struct sock *, struct cpt_context*);
++int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *, struct cpt_context *ctx);
++__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx);
++int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *);
++int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si, loff_t pos, struct cpt_context *ctx);
++int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx);
++int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct cpt_context *ctx);
++int cpt_dump_mcfilter(struct sock *sk, struct cpt_context *ctx);
++
++int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
++		       loff_t pos, cpt_context_t *ctx);
++int rst_sk_mcfilter_in6(struct sock *sk, struct cpt_sockmc_image *v,
++			loff_t pos, cpt_context_t *ctx);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_socket_in.c linux-2.6.16-026test015/kernel/cpt/cpt_socket_in.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_socket_in.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_socket_in.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,443 @@
++/*
++ *
++ *  kernel/cpt/cpt_socket_in.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/socket.h>
++#include <linux/tcp.h>
++#include <net/sock.h>
++#include <net/tcp.h>
++#include <linux/igmp.h>
++#include <linux/ipv6.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_socket.h"
++#include "cpt_kernel.h"
++
++static inline __u32 jiffies_export(unsigned long tmo)
++{
++	__s32 delta = (long)(tmo - jiffies);
++	return delta;
++}
++
++static inline __u32 tcp_jiffies_export(__u32 tmo)
++{
++	__s32 delta = tmo - tcp_time_stamp;
++	return delta;
++}
++
++int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx)
++{
++	struct sk_buff *skb;
++	struct tcp_sock *tp;
++
++	if (sk->sk_type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP)
++		return 0;
++
++	tp = tcp_sk(sk);
++
++	skb = skb_peek(&tp->out_of_order_queue);
++	while (skb && skb != (struct sk_buff*)&tp->out_of_order_queue) {
++		int err;
++
++		err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, ctx);
++		if (err)
++			return err;
++
++		spin_lock_irq(&tp->out_of_order_queue.lock);
++		skb = skb->next;
++		spin_unlock_irq(&tp->out_of_order_queue.lock);
++	}
++	return 0;
++}
++
++static int cpt_dump_socket_tcp(struct cpt_sock_image *si, struct sock *sk,
++			       struct cpt_context *ctx)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++
++	si->cpt_pred_flags = tp->pred_flags;
++	si->cpt_rcv_nxt = tp->rcv_nxt;
++	si->cpt_snd_nxt = tp->snd_nxt;
++	si->cpt_snd_una = tp->snd_una;
++	si->cpt_snd_sml = tp->snd_sml;
++	si->cpt_rcv_tstamp = tcp_jiffies_export(tp->rcv_tstamp);
++	si->cpt_lsndtime = tcp_jiffies_export(tp->lsndtime);
++	si->cpt_tcp_header_len = tp->tcp_header_len;
++	si->cpt_ack_pending = inet_csk(sk)->icsk_ack.pending;
++	si->cpt_quick = inet_csk(sk)->icsk_ack.quick;
++	si->cpt_pingpong = inet_csk(sk)->icsk_ack.pingpong;
++	si->cpt_blocked = inet_csk(sk)->icsk_ack.blocked;
++	si->cpt_ato = inet_csk(sk)->icsk_ack.ato;
++	si->cpt_ack_timeout = jiffies_export(inet_csk(sk)->icsk_ack.timeout);
++	si->cpt_lrcvtime = tcp_jiffies_export(inet_csk(sk)->icsk_ack.lrcvtime);
++	si->cpt_last_seg_size = inet_csk(sk)->icsk_ack.last_seg_size;
++	si->cpt_rcv_mss = inet_csk(sk)->icsk_ack.rcv_mss;
++	si->cpt_snd_wl1 = tp->snd_wl1;
++	si->cpt_snd_wnd = tp->snd_wnd;
++	si->cpt_max_window = tp->max_window;
++	si->cpt_pmtu_cookie = inet_csk(sk)->icsk_pmtu_cookie;
++	si->cpt_mss_cache = tp->mss_cache;
++	si->cpt_mss_cache_std = tp->mss_cache; /* FIXMW was tp->mss_cache_std */
++	si->cpt_mss_clamp = tp->rx_opt.mss_clamp;
++	si->cpt_ext_header_len = inet_csk(sk)->icsk_ext_hdr_len;
++	si->cpt_ext2_header_len = 0;
++	si->cpt_ca_state = inet_csk(sk)->icsk_ca_state;
++	si->cpt_retransmits = inet_csk(sk)->icsk_retransmits;
++	si->cpt_reordering = tp->reordering;
++	si->cpt_frto_counter = tp->frto_counter;
++	si->cpt_frto_highmark = tp->frto_highmark;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
++	// // si->cpt_adv_cong = tp->adv_cong;
++#endif
++	si->cpt_defer_accept = inet_csk(sk)->icsk_accept_queue.rskq_defer_accept;
++	si->cpt_backoff = inet_csk(sk)->icsk_backoff;
++	si->cpt_srtt = tp->srtt;
++	si->cpt_mdev = tp->mdev;
++	si->cpt_mdev_max = tp->mdev_max;
++	si->cpt_rttvar = tp->rttvar;
++	si->cpt_rtt_seq = tp->rtt_seq;
++	si->cpt_rto = inet_csk(sk)->icsk_rto;
++	si->cpt_packets_out = tp->packets_out;
++	si->cpt_left_out = tp->left_out;
++	si->cpt_retrans_out = tp->retrans_out;
++	si->cpt_lost_out = tp->lost_out;
++	si->cpt_sacked_out = tp->sacked_out;
++	si->cpt_fackets_out = tp->fackets_out;
++	si->cpt_snd_ssthresh = tp->snd_ssthresh;
++	si->cpt_snd_cwnd = tp->snd_cwnd;
++	si->cpt_snd_cwnd_cnt = tp->snd_cwnd_cnt;
++	si->cpt_snd_cwnd_clamp = tp->snd_cwnd_clamp;
++	si->cpt_snd_cwnd_used = tp->snd_cwnd_used;
++	si->cpt_snd_cwnd_stamp = tcp_jiffies_export(tp->snd_cwnd_stamp);
++	si->cpt_timeout = jiffies_export(inet_csk(sk)->icsk_timeout);
++	si->cpt_ka_timeout = 0;
++	si->cpt_rcv_wnd = tp->rcv_wnd;
++	si->cpt_rcv_wup = tp->rcv_wup;
++	si->cpt_write_seq = tp->write_seq;
++	si->cpt_pushed_seq = tp->pushed_seq;
++	si->cpt_copied_seq = tp->copied_seq;
++	si->cpt_tstamp_ok = tp->rx_opt.tstamp_ok;
++	si->cpt_wscale_ok = tp->rx_opt.wscale_ok;
++	si->cpt_sack_ok = tp->rx_opt.sack_ok;
++	si->cpt_saw_tstamp = tp->rx_opt.saw_tstamp;
++	si->cpt_snd_wscale = tp->rx_opt.snd_wscale;
++	si->cpt_rcv_wscale = tp->rx_opt.rcv_wscale;
++	si->cpt_nonagle = tp->nonagle;
++	si->cpt_keepalive_probes = tp->keepalive_probes;
++	si->cpt_rcv_tsval = tp->rx_opt.rcv_tsval;
++	si->cpt_rcv_tsecr = tp->rx_opt.rcv_tsecr;
++	si->cpt_ts_recent = tp->rx_opt.ts_recent;
++	si->cpt_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
++	si->cpt_user_mss = tp->rx_opt.user_mss;
++	si->cpt_dsack = tp->rx_opt.dsack;
++	si->cpt_eff_sacks = tp->rx_opt.eff_sacks;
++	si->cpt_sack_array[0] = tp->duplicate_sack[0].start_seq;
++	si->cpt_sack_array[1] = tp->duplicate_sack[0].end_seq;
++	si->cpt_sack_array[2] = tp->selective_acks[0].start_seq;
++	si->cpt_sack_array[3] = tp->selective_acks[0].end_seq;
++	si->cpt_sack_array[4] = tp->selective_acks[1].start_seq;
++	si->cpt_sack_array[5] = tp->selective_acks[1].end_seq;
++	si->cpt_sack_array[6] = tp->selective_acks[2].start_seq;
++	si->cpt_sack_array[7] = tp->selective_acks[2].end_seq;
++	si->cpt_sack_array[8] = tp->selective_acks[3].start_seq;
++	si->cpt_sack_array[9] = tp->selective_acks[3].end_seq;
++	si->cpt_window_clamp = tp->window_clamp;
++	si->cpt_rcv_ssthresh = tp->rcv_ssthresh;
++	si->cpt_probes_out = inet_csk(sk)->icsk_probes_out;
++	si->cpt_num_sacks = tp->rx_opt.num_sacks;
++	si->cpt_advmss = tp->advmss;
++	si->cpt_syn_retries = inet_csk(sk)->icsk_syn_retries;
++	si->cpt_ecn_flags = tp->ecn_flags;
++	si->cpt_prior_ssthresh = tp->prior_ssthresh;
++	si->cpt_high_seq = tp->high_seq;
++	si->cpt_retrans_stamp = tp->retrans_stamp;
++	si->cpt_undo_marker = tp->undo_marker;
++	si->cpt_undo_retrans = tp->undo_retrans;
++	si->cpt_urg_seq = tp->urg_seq;
++	si->cpt_urg_data = tp->urg_data;
++	si->cpt_pending = inet_csk(sk)->icsk_pending;
++	si->cpt_urg_mode = tp->urg_mode;
++	si->cpt_snd_up = tp->snd_up;
++	si->cpt_keepalive_time = tp->keepalive_time;
++	si->cpt_keepalive_intvl = tp->keepalive_intvl;
++	si->cpt_linger2 = tp->linger2;
++
++	if (sk->sk_state != TCP_LISTEN &&
++	    sk->sk_state != TCP_CLOSE &&
++	    sock_flag(sk, SOCK_KEEPOPEN)) {
++		si->cpt_ka_timeout = jiffies_export(sk->sk_timer.expires);
++	}
++
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++	{
++		extern struct inet_connection_sock_af_ops ipv6_mapped;
++		if (sk->sk_family == AF_INET6 &&
++			inet_csk(sk)->icsk_af_ops == &ipv6_mapped)
++			si->cpt_mapped = 1;
++	}
++#endif
++
++	return 0;
++}
++
++
++int cpt_dump_socket_in(struct cpt_sock_image *si, struct sock *sk,
++		       struct cpt_context *ctx)
++{
++	struct inet_sock *inet = inet_sk(sk);
++	struct ipv6_pinfo *np = inet6_sk(sk);
++
++	if (sk->sk_family == AF_INET) {
++		struct sockaddr_in *sin = ((struct sockaddr_in*)si->cpt_laddr);
++		sin->sin_family = AF_INET;
++		sin->sin_port = inet->sport;
++		sin->sin_addr.s_addr = inet->rcv_saddr;
++		si->cpt_laddrlen = sizeof(*sin);
++	} else if (sk->sk_family == AF_INET6) {
++		struct sockaddr_in6 *sin6 = ((struct sockaddr_in6*)si->cpt_laddr);
++		sin6->sin6_family = AF_INET6;
++		sin6->sin6_port = inet->sport;
++		memcpy(&sin6->sin6_addr, &np->rcv_saddr, 16);
++		si->cpt_laddrlen = sizeof(*sin6);
++	}
++	if (!inet->num)
++		si->cpt_laddrlen = 0;
++
++	si->cpt_daddr = inet->daddr;
++	si->cpt_dport = inet->dport;
++	si->cpt_saddr = inet->saddr;
++	si->cpt_rcv_saddr = inet->rcv_saddr;
++	si->cpt_sport = inet->sport;
++	si->cpt_uc_ttl = inet->uc_ttl;
++	si->cpt_tos = inet->tos;
++	si->cpt_cmsg_flags = inet->cmsg_flags;
++	si->cpt_mc_index = inet->mc_index;
++	si->cpt_mc_addr = inet->mc_addr;
++	si->cpt_hdrincl = inet->hdrincl;
++	si->cpt_mc_ttl = inet->mc_ttl;
++	si->cpt_mc_loop = inet->mc_loop;
++	si->cpt_pmtudisc = inet->pmtudisc;
++	si->cpt_recverr = inet->recverr;
++	si->cpt_freebind = inet->freebind;
++	si->cpt_idcounter = inet->id;
++
++	si->cpt_cork_flags = inet->cork.flags;
++	si->cpt_cork_fragsize = 0;
++	si->cpt_cork_length = inet->cork.length;
++	si->cpt_cork_addr = inet->cork.addr;
++	si->cpt_cork_saddr = inet->cork.fl.fl4_src;
++	si->cpt_cork_daddr = inet->cork.fl.fl4_dst;
++	si->cpt_cork_oif = inet->cork.fl.oif;
++	if (inet->cork.rt) {
++		si->cpt_cork_fragsize = inet->cork.fragsize;
++		si->cpt_cork_saddr = inet->cork.rt->fl.fl4_src;
++		si->cpt_cork_daddr = inet->cork.rt->fl.fl4_dst;
++		si->cpt_cork_oif = inet->cork.rt->fl.oif;
++	}
++
++	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
++		struct udp_sock *up = udp_sk(sk);
++		si->cpt_udp_pending  = up->pending;
++		si->cpt_udp_corkflag  = up->corkflag;
++		si->cpt_udp_encap  = up->encap_type;
++		si->cpt_udp_len  = up->len;
++	}
++
++	if (sk->sk_family == AF_INET6) {
++		memcpy(si->cpt_saddr6, &np->saddr, 16);
++		memcpy(si->cpt_rcv_saddr6, &np->rcv_saddr, 16);
++		memcpy(si->cpt_daddr6, &np->daddr, 16);
++		si->cpt_flow_label6 = np->flow_label;
++		si->cpt_frag_size6 = np->frag_size;
++		si->cpt_hop_limit6 = np->hop_limit;
++		si->cpt_mcast_hops6 = np->mcast_hops;
++		si->cpt_mcast_oif6 = np->mcast_oif;
++		si->cpt_rxopt6 = np->rxopt.all;
++		si->cpt_mc_loop6 = np->mc_loop;
++		si->cpt_recverr6 = np->recverr;
++		si->cpt_sndflow6 = np->sndflow;
++		si->cpt_pmtudisc6 = np->pmtudisc;
++		si->cpt_ipv6only6 = np->ipv6only;
++		si->cpt_mapped = 0;
++	}
++
++	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
++		cpt_dump_socket_tcp(si, sk, ctx);
++
++	return 0;
++}
++
++int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx)
++{
++	struct request_sock *req;
++
++	for (req=inet_csk(sk)->icsk_accept_queue.rskq_accept_head; req; req=req->dl_next)
++		cpt_dump_socket(NULL, req->sk, -1, index, ctx);
++	return 0;
++}
++
++
++static int dump_openreq(struct request_sock *req, struct sock *sk, int index,
++			struct cpt_context *ctx)
++{
++	struct cpt_openreq_image *v = cpt_get_buf(ctx);
++
++	cpt_open_object(NULL, ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_OPENREQ;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_VOID;
++
++	v->cpt_rcv_isn = tcp_rsk(req)->rcv_isn;
++	v->cpt_snt_isn = tcp_rsk(req)->snt_isn;
++	v->cpt_rmt_port = inet_rsk(req)->rmt_port;
++	v->cpt_mss = req->mss;
++	// // v->cpt_family = (req->class == &or_ipv4 ? AF_INET : AF_INET6);
++	v->cpt_retrans = req->retrans;
++	v->cpt_snd_wscale = inet_rsk(req)->snd_wscale;
++	v->cpt_rcv_wscale = inet_rsk(req)->rcv_wscale;
++	v->cpt_tstamp_ok = inet_rsk(req)->tstamp_ok;
++	v->cpt_sack_ok = inet_rsk(req)->sack_ok;
++	v->cpt_wscale_ok = inet_rsk(req)->wscale_ok;
++	v->cpt_ecn_ok = inet_rsk(req)->ecn_ok;
++	v->cpt_acked = inet_rsk(req)->acked;
++	v->cpt_window_clamp = req->window_clamp;
++	v->cpt_rcv_wnd = req->rcv_wnd;
++	v->cpt_ts_recent = req->ts_recent;
++	v->cpt_expires = jiffies_export(req->expires);
++
++	if (v->cpt_family == AF_INET) {
++		memcpy(v->cpt_loc_addr, &inet_rsk(req)->loc_addr, 4);
++		memcpy(v->cpt_rmt_addr, &inet_rsk(req)->rmt_addr, 4);
++	} else {
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++		memcpy(v->cpt_loc_addr, &inet6_rsk(req)->loc_addr, 16);
++		memcpy(v->cpt_rmt_addr, &inet6_rsk(req)->rmt_addr, 16);
++		v->cpt_iif = inet6_rsk(req)->iif;
++#endif
++	}
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	cpt_close_object(ctx);
++	return 0;
++}
++
++int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx)
++{
++	struct listen_sock *lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
++	struct request_sock *req;
++	int i;
++
++	for (i=0; i<TCP_SYNQ_HSIZE; i++) {
++		for (req=lopt->syn_table[i]; req; req=req->dl_next) {
++			loff_t saved_obj;
++			cpt_push_object(&saved_obj, ctx);
++			dump_openreq(req, sk, index, ctx);
++			cpt_pop_object(&saved_obj, ctx);
++		}
++	}
++	return 0;
++}
++
++
++int cpt_kill_socket(struct sock *sk, cpt_context_t * ctx)
++{
++	if (sk->sk_state != TCP_CLOSE &&
++	    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
++	    sk->sk_protocol == IPPROTO_TCP) {
++		if (sk->sk_state != TCP_LISTEN)
++			tcp_set_state(sk, TCP_CLOSE);
++		else
++			sk->sk_prot->disconnect(sk, 0);
++	}
++	return 0;
++}
++
++int cpt_dump_mcfilter(struct sock *sk, cpt_context_t *ctx)
++{
++	struct inet_sock *inet = inet_sk(sk);
++	struct ip_mc_socklist *iml;
++
++	for (iml = inet->mc_list; iml; iml = iml->next) {
++		struct cpt_sockmc_image smi;
++		int scnt = 0;
++		int i;
++
++		if (iml->sflist)
++			scnt = iml->sflist->sl_count*16;
++
++		smi.cpt_next = sizeof(smi) + scnt;
++		smi.cpt_object = CPT_OBJ_SOCK_MCADDR;
++		smi.cpt_hdrlen = sizeof(smi);
++		smi.cpt_content = CPT_CONTENT_DATA;
++
++		smi.cpt_family = AF_INET;
++		smi.cpt_mode = iml->sfmode;
++		smi.cpt_ifindex = iml->multi.imr_ifindex;
++		memset(&smi.cpt_mcaddr, 0, sizeof(smi.cpt_mcaddr));
++		smi.cpt_mcaddr[0] = iml->multi.imr_multiaddr.s_addr;
++
++		ctx->write(&smi, sizeof(smi), ctx);
++
++		for (i = 0; i < scnt; i++) {
++			u32 addr[4];
++			memset(&addr, 0, sizeof(addr));
++			addr[0] = iml->sflist->sl_addr[i];
++			ctx->write(&addr, sizeof(addr), ctx);
++		}
++	}
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	if (sk->sk_family == AF_INET6) {
++		struct ipv6_mc_socklist *mcl;
++		struct ipv6_pinfo *np = inet6_sk(sk);
++
++		for (mcl = np->ipv6_mc_list; mcl; mcl = mcl->next) {
++			struct cpt_sockmc_image smi;
++			int scnt = 0;
++			int i;
++
++			if (mcl->sflist)
++				scnt = mcl->sflist->sl_count*16;
++
++			smi.cpt_next = sizeof(smi) + scnt;
++			smi.cpt_object = CPT_OBJ_SOCK_MCADDR;
++			smi.cpt_hdrlen = sizeof(smi);
++			smi.cpt_content = CPT_CONTENT_DATA;
++
++			smi.cpt_family = AF_INET6;
++			smi.cpt_mode = mcl->sfmode;
++			smi.cpt_ifindex = mcl->ifindex;
++			memcpy(&smi.cpt_mcaddr, &mcl->addr, sizeof(smi.cpt_mcaddr));
++
++			ctx->write(&smi, sizeof(smi), ctx);
++			for (i = 0; i < scnt; i++)
++				ctx->write(&mcl->sflist->sl_addr[i], 16, ctx);
++		}
++	}
++#endif
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_syscalls.h linux-2.6.16-026test015/kernel/cpt/cpt_syscalls.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_syscalls.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_syscalls.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,95 @@
++#include <linux/unistd.h>
++#include <linux/syscalls.h>
++#include <asm/uaccess.h>
++
++#define WRAP(c, args) return sys_##c args
++#define WRAP2(c, args) int err; mm_segment_t oldfs; \
++	               oldfs = get_fs(); set_fs(KERNEL_DS); \
++                       err = sys_##c args ;\
++                       set_fs(oldfs); \
++                       return err
++
++static inline int sc_close(int fd)
++{
++	WRAP(close, (fd));
++}
++
++static inline int sc_dup2(int fd1, int fd2)
++{
++	WRAP(dup2, (fd1, fd2));
++}
++
++static inline int sc_unlink(char *name)
++{
++	WRAP2(unlink, (name));
++}
++
++static inline int sc_pipe(int *pfd)
++{
++	return do_pipe(pfd);
++}
++
++static inline int sc_mknod(char *name, int mode, int dev)
++{
++	WRAP2(mknod, (name, mode, dev));
++}
++
++static inline int sc_chmod(char *name, int mode)
++{
++	WRAP2(mkdir, (name, mode));
++}
++
++static inline int sc_chown(char *name, int uid, int gid)
++{
++	WRAP2(chown, (name, uid, gid));
++}
++
++static inline int sc_mkdir(char *name, int mode)
++{
++	WRAP2(mkdir, (name, mode));
++}
++
++static inline int sc_rmdir(char *name)
++{
++	WRAP2(rmdir, (name));
++}
++
++static inline int sc_mount(char *mntdev, char *mntpnt, char *type, unsigned long flags)
++{
++	WRAP2(mount, (mntdev ? : "none", mntpnt, type, flags, NULL));
++}
++
++static inline int sc_mprotect(unsigned long start, size_t len,
++			      unsigned long prot)
++{
++	WRAP(mprotect, (start, len, prot));
++}
++
++static inline int sc_mlock(unsigned long start, size_t len)
++{
++	WRAP(mlock, (start, len));
++}
++
++static inline int sc_munlock(unsigned long start, size_t len)
++{
++	WRAP(munlock, (start, len));
++}
++
++static inline int sc_remap_file_pages(unsigned long start, size_t len,
++				      unsigned long prot, unsigned long pgoff,
++				      unsigned long flags)
++{
++	WRAP(remap_file_pages, (start, len, prot, pgoff, flags));
++}
++
++static inline int sc_waitx(int pid, int opt)
++{
++	WRAP(wait4, (pid, NULL, opt, NULL));
++}
++
++static inline int sc_flock(int fd, int flags)
++{
++	WRAP(flock, (fd, flags));
++}
++
++extern int sc_execve(char *cms, char **argv, char **env);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_sysvipc.c linux-2.6.16-026test015/kernel/cpt/cpt_sysvipc.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_sysvipc.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_sysvipc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,317 @@
++/*
++ *
++ *  kernel/cpt/cpt_sysvipc.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/pipe_fs_i.h>
++#include <linux/mman.h>
++#include <linux/shm.h>
++#include <linux/sem.h>
++#include <linux/msg.h>
++#include <asm/uaccess.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_kernel.h"
++
++struct _warg {
++		struct file			*file;
++		struct cpt_sysvshm_image	*v;
++};
++
++static int dump_one_shm(struct shmid_kernel *shp, void *arg)
++{
++	struct _warg *warg = arg;
++	struct cpt_sysvshm_image *v = (struct cpt_sysvshm_image *)warg->v;
++
++	if (shp->shm_file != warg->file)
++		return 0;
++
++	v->cpt_key = shp->shm_perm.key;
++	v->cpt_uid = shp->shm_perm.uid;
++	v->cpt_gid = shp->shm_perm.gid;
++	v->cpt_cuid = shp->shm_perm.cuid;
++	v->cpt_cgid = shp->shm_perm.cgid;
++	v->cpt_mode = shp->shm_perm.mode;
++	v->cpt_seq = shp->shm_perm.seq;
++
++	v->cpt_id = shp->id;
++	v->cpt_segsz = shp->shm_segsz;
++	v->cpt_atime = shp->shm_atim;
++	v->cpt_ctime = shp->shm_ctim;
++	v->cpt_dtime = shp->shm_dtim;
++	v->cpt_creator = shp->shm_cprid;
++	v->cpt_last = shp->shm_lprid;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
++	v->cpt_mlockuser = shp->mlock_user ? shp->mlock_user->uid : -1;
++#else
++	v->cpt_mlockuser = -1;
++#endif
++	return 1;
++}
++
++int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx)
++{
++	struct cpt_sysvshm_image *v = cpt_get_buf(ctx);
++	struct _warg warg;
++
++	v->cpt_next = sizeof(*v);
++	v->cpt_object = CPT_OBJ_SYSV_SHM;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_VOID;
++
++	warg.file = file;
++	warg.v = v;
++	if (sysvipc_walk_shm(dump_one_shm, &warg) == 0) {
++		cpt_release_buf(ctx);
++		return -ESRCH;
++	}
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++	return 0;
++}
++
++
++int match_sem(int id, struct sem_array *sema, void *arg)
++{
++	if (id != (unsigned long)arg)
++		return 0;
++	return sema->sem_nsems + 1;
++}
++
++static int get_sem_nsem(int id, cpt_context_t *ctx)
++{
++	int res;
++	res = sysvipc_walk_sem(match_sem, (void*)(unsigned long)id);
++	if (res > 0)
++		return res - 1;
++	eprintk_ctx("get_sem_nsem: SYSV semaphore %d not found\n", id);
++	return -ESRCH;
++}
++
++static int dump_one_semundo(struct sem_undo *su, struct cpt_context *ctx)
++{
++	struct cpt_sysvsem_undo_image v;
++	loff_t saved_obj;
++
++	cpt_open_object(NULL, ctx);
++
++	v.cpt_next = CPT_NULL;
++	v.cpt_object = CPT_OBJ_SYSVSEM_UNDO_REC;
++	v.cpt_hdrlen = sizeof(v);
++	v.cpt_content = CPT_CONTENT_SEMUNDO;
++	v.cpt_id = su->semid;
++	v.cpt_nsem = get_sem_nsem(su->semid, ctx);
++	if ((int)v.cpt_nsem < 0)
++		return -ESRCH;
++
++	ctx->write(&v, sizeof(v), ctx);
++
++	cpt_push_object(&saved_obj, ctx);
++	ctx->write(su->semadj, v.cpt_nsem*sizeof(short), ctx);
++	cpt_pop_object(&saved_obj, ctx);
++
++	cpt_close_object(ctx);
++	return 0;
++}
++
++struct sem_warg {
++	int				last_id;
++	struct cpt_sysvsem_image	*v;
++};
++
++static int dump_one_sem(int id, struct sem_array *sma, void *arg)
++{
++	struct sem_warg * warg = (struct sem_warg *)arg;
++	struct cpt_sysvsem_image *v = warg->v;
++	int i;
++
++	if (warg->last_id != -1) {
++		if ((id % IPCMNI) <= warg->last_id)
++			return 0;
++	}
++
++	v->cpt_next = sizeof(*v);
++	v->cpt_object = CPT_OBJ_SYSV_SEM;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_SEMARRAY;
++
++	v->cpt_key = sma->sem_perm.key;
++	v->cpt_uid = sma->sem_perm.uid;
++	v->cpt_gid = sma->sem_perm.gid;
++	v->cpt_cuid = sma->sem_perm.cuid;
++	v->cpt_cgid = sma->sem_perm.cgid;
++	v->cpt_mode = sma->sem_perm.mode;
++	v->cpt_seq = sma->sem_perm.seq;
++
++	v->cpt_id = id;
++	v->cpt_ctime = sma->sem_ctime;
++	v->cpt_otime = sma->sem_otime;
++
++	for (i=0; i<sma->sem_nsems; i++) {
++		struct {
++			__u32 semval;
++			__u32 sempid;
++		} *s = (void*)v + v->cpt_next;
++		if (v->cpt_next >= PAGE_SIZE - sizeof(*s))
++			return -EINVAL;
++		s->semval = sma->sem_base[i].semval;
++		s->sempid = sma->sem_base[i].sempid;
++		v->cpt_next += sizeof(*s);
++	}
++
++	warg->last_id = id % IPCMNI;
++	return 1;
++}
++
++
++int cpt_dump_sysvsem(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++	struct sem_warg warg;
++
++	/* Dumping semaphores is quite tricky because we cannot
++	 * write to dump file under lock inside sysvipc_walk_sem().
++	 */
++	cpt_open_section(ctx, CPT_SECT_SYSV_SEM);
++	warg.last_id = -1;
++	warg.v = cpt_get_buf(ctx);
++	for (;;) {
++		if (sysvipc_walk_sem(dump_one_sem, &warg) <= 0)
++			break;
++		ctx->write(warg.v, warg.v->cpt_next, ctx);
++	}
++	cpt_release_buf(ctx);
++	cpt_close_section(ctx);
++
++	cpt_open_section(ctx, CPT_SECT_SYSVSEM_UNDO);
++	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
++		struct sem_undo_list *semu = obj->o_obj;
++		struct sem_undo *su;
++		struct cpt_object_hdr v;
++		loff_t saved_obj;
++
++		cpt_open_object(obj, ctx);
++
++		v.cpt_next = CPT_NULL;
++		v.cpt_object = CPT_OBJ_SYSVSEM_UNDO;
++		v.cpt_hdrlen = sizeof(v);
++		v.cpt_content = CPT_CONTENT_ARRAY;
++
++		ctx->write(&v, sizeof(v), ctx);
++
++		cpt_push_object(&saved_obj, ctx);
++		for (su = semu->proc_list; su; su = su->proc_next) {
++			if (su->semid != -1) {
++				int err;
++				err = dump_one_semundo(su, ctx);
++				if (err < 0)
++					return err;
++			}
++		}
++		cpt_pop_object(&saved_obj, ctx);
++
++		cpt_close_object(ctx);
++	}
++	cpt_close_section(ctx);
++	return 0;
++}
++
++static int collect_one_msg(int id, struct msg_queue *msq, void *arg)
++{
++	int *retp = arg;
++	(*retp)++;
++	return 0;
++}
++
++int cpt_collect_sysvmsg(cpt_context_t * ctx)
++{
++	int ret = 0;
++	sysvipc_walk_msg(collect_one_msg, &ret);
++	if (ret) {
++		eprintk_ctx("SYSV msgqueues are not supported, found %d\n", ret);
++		return -EBUSY;
++	}
++	return 0;
++}
++
++static int cpt_collect_sysvsem_undo(cpt_context_t *ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++		if (tsk->exit_state) {
++			/* ipc/sem.c forgets to clear tsk->sysvsem.undo_list
++			 * on exit. Grrr... */
++			continue;
++		}
++		if (tsk->sysvsem.undo_list &&
++		    cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx) == NULL)
++			return -ENOMEM;
++	}
++
++	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
++		struct sem_undo_list *semu = obj->o_obj;
++
++		if (atomic_read(&semu->refcnt) != obj->o_count) {
++			eprintk_ctx("sem_undo_list is referenced outside %d %d\n", obj->o_count, atomic_read(&semu->refcnt));
++			return -EBUSY;
++		}
++	}
++	return 0;
++}
++
++static int collect_one_shm(struct shmid_kernel *shp, void *arg)
++{
++	cpt_context_t *ctx = arg;
++
++	if (__cpt_object_add(CPT_OBJ_FILE, shp->shm_file, GFP_ATOMIC, ctx) == NULL)
++		return -ENOMEM;
++	return 0;
++}
++
++int cpt_collect_sysvshm(cpt_context_t * ctx)
++{
++	int err;
++
++	err = sysvipc_walk_shm(collect_one_shm, ctx);
++
++	return err < 0 ? err : 0;
++}
++
++int cpt_collect_sysv(cpt_context_t * ctx)
++{
++	int err;
++
++	err = cpt_collect_sysvsem_undo(ctx);
++	if (err)
++		return err;
++	err = cpt_collect_sysvmsg(ctx);
++	if (err)
++		return err;
++	err = cpt_collect_sysvshm(ctx);
++	if (err)
++		return err;
++
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_tty.c linux-2.6.16-026test015/kernel/cpt/cpt_tty.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_tty.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_tty.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,216 @@
++/*
++ *
++ *  kernel/cpt/cpt_tty.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/tty.h>
++#include <asm/uaccess.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++
++/* We must support at least N_TTY. */
++
++int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx)
++{
++	struct tty_struct *tty = file->private_data;
++	cpt_object_t *obj;
++	struct cpt_obj_ref o;
++	loff_t saved_pos;
++
++	obj = lookup_cpt_object(CPT_OBJ_TTY, tty, ctx);
++	if (!obj)
++		return -EINVAL;
++
++	cpt_push_object(&saved_pos, ctx);
++
++	o.cpt_next = sizeof(o);
++	o.cpt_object = CPT_OBJ_REF;
++	o.cpt_hdrlen = sizeof(o);
++	o.cpt_content = CPT_CONTENT_VOID;
++	o.cpt_pos = obj->o_pos;
++	ctx->write(&o, sizeof(o), ctx);
++
++	cpt_pop_object(&saved_pos, ctx);
++
++	return 0;
++}
++
++int cpt_collect_tty(struct file *file, cpt_context_t * ctx)
++{
++	struct tty_struct *tty = file->private_data;
++
++	if (tty) {
++		if (cpt_object_add(CPT_OBJ_TTY, tty, ctx) == NULL)
++			return -ENOMEM;
++		if (tty->link) {
++			cpt_object_t *obj;
++
++			obj = cpt_object_add(CPT_OBJ_TTY, tty->link, ctx);
++			if (obj == NULL)
++				return -ENOMEM;
++			/* Undo o_count, tty->link is not a reference */
++			obj->o_count--;
++		}
++	}
++	return 0;
++}
++
++int cpt_dump_tty(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	struct tty_struct *tty = obj->o_obj;
++	struct cpt_tty_image *v;
++
++	if (tty->link) {
++		if (lookup_cpt_object(CPT_OBJ_TTY, tty->link, ctx) == NULL) {
++			eprintk_ctx("orphan pty %s %d\n", tty->name, tty->driver->subtype == PTY_TYPE_SLAVE);
++			return -EINVAL;
++		}
++		if (tty->link->link != tty) {
++			eprintk_ctx("bad pty pair\n");
++			return -EINVAL;
++		}
++		if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
++		    tty->driver->subtype == PTY_TYPE_SLAVE &&
++		    tty->link->count)
++			obj->o_count++;
++	}
++	if (obj->o_count != tty->count) {
++		eprintk_ctx("tty %s is referenced outside %d %d\n", tty->name, obj->o_count, tty->count);
++		return -EBUSY;
++	}
++
++	cpt_open_object(obj, ctx);
++
++	v = cpt_get_buf(ctx);
++	v->cpt_next = -1;
++	v->cpt_object = CPT_OBJ_TTY;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_ARRAY;
++
++	v->cpt_index = tty->index;
++	v->cpt_link = -1;
++	if (tty->link)
++		v->cpt_link = tty->link->index;
++	v->cpt_drv_type = tty->driver->type;
++	v->cpt_drv_subtype = tty->driver->subtype;
++	v->cpt_drv_flags = tty->driver->flags;
++	v->cpt_packet = tty->packet;
++	v->cpt_stopped = tty->stopped;
++	v->cpt_hw_stopped = tty->hw_stopped;
++	v->cpt_flow_stopped = tty->flow_stopped;
++	v->cpt_flags = tty->flags;
++	v->cpt_ctrl_status = tty->ctrl_status;
++	v->cpt_canon_data = tty->canon_data;
++	v->cpt_canon_head = tty->canon_head - tty->read_tail;
++	v->cpt_canon_column = tty->canon_column;
++	v->cpt_column = tty->column;
++	v->cpt_erasing = tty->erasing;
++	v->cpt_lnext = tty->lnext;
++	v->cpt_icanon = tty->icanon;
++	v->cpt_raw = tty->raw;
++	v->cpt_real_raw = tty->real_raw;
++	v->cpt_closing = tty->closing;
++	v->cpt_minimum_to_wake = tty->minimum_to_wake;
++	v->cpt_pgrp = 0;
++	if (tty->pgrp > 0) {
++		v->cpt_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tty->pgrp);
++		if ((int)v->cpt_pgrp < 0) {
++			dprintk_ctx("cannot map tty->pgrp %d -> %d\n", tty->pgrp, (int)v->cpt_pgrp);
++			v->cpt_pgrp = -1;
++		}
++	}
++	v->cpt_session = 0;
++	if (tty->session > 0) {
++		v->cpt_session = _pid_type_to_vpid(PIDTYPE_SID, tty->session);
++		if ((int)v->cpt_session < 0) {
++			eprintk_ctx("cannot map tty->session %d -> %d\n", tty->session, (int)v->cpt_session);
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++	}
++	memcpy(v->cpt_name, tty->name, 64);
++	v->cpt_ws_row = tty->winsize.ws_row;
++	v->cpt_ws_col = tty->winsize.ws_col;
++	v->cpt_ws_prow = tty->winsize.ws_ypixel;
++	v->cpt_ws_pcol = tty->winsize.ws_xpixel;
++	if (tty->termios == NULL) {
++		eprintk_ctx("NULL termios");
++		cpt_release_buf(ctx);
++		return -EINVAL;
++	}
++	v->cpt_c_line = tty->termios->c_line;
++	v->cpt_c_iflag = tty->termios->c_iflag;
++	v->cpt_c_oflag = tty->termios->c_oflag;
++	v->cpt_c_cflag = tty->termios->c_cflag;
++	v->cpt_c_lflag = tty->termios->c_lflag;
++	memcpy(v->cpt_c_cc, tty->termios->c_cc, NCCS);
++	if (NCCS < 32)
++		memset(v->cpt_c_cc + NCCS, 255, 32 - NCCS);
++	memcpy(v->cpt_read_flags, tty->read_flags, sizeof(v->cpt_read_flags));
++
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_release_buf(ctx);
++
++	if (tty->read_buf && tty->read_cnt) {
++		struct cpt_obj_bits *v = cpt_get_buf(ctx);
++		loff_t saved_pos;
++
++		cpt_push_object(&saved_pos, ctx);
++		cpt_open_object(NULL, ctx);
++		v->cpt_next = CPT_NULL;
++		v->cpt_object = CPT_OBJ_BITS;
++		v->cpt_hdrlen = sizeof(*v);
++		v->cpt_content = CPT_CONTENT_DATA;
++		v->cpt_size = tty->read_cnt;
++		ctx->write(v, sizeof(*v), ctx);
++		cpt_release_buf(ctx);
++
++		if (tty->read_cnt) {
++			int n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
++			ctx->write(tty->read_buf + tty->read_tail, n, ctx);
++			if (tty->read_cnt > n)
++				ctx->write(tty->read_buf, tty->read_cnt-n, ctx);
++			ctx->align(ctx);
++		}
++
++		cpt_close_object(ctx);
++		cpt_pop_object(&saved_pos, ctx);
++	}
++
++	cpt_close_object(ctx);
++
++	return 0;
++}
++
++__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx)
++{
++	struct tty_struct * tty;
++	struct fasync_struct *fa;
++
++	tty = (struct tty_struct *)file->private_data;
++
++	for (fa = tty->fasync; fa; fa = fa->fa_next) {
++		if (fa->fa_file == file)
++			return fa->fa_fd;
++	}
++	return -1;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_ubc.c linux-2.6.16-026test015/kernel/cpt/cpt_ubc.c
+--- linux-2.6.16.orig/kernel/cpt/cpt_ubc.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_ubc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,132 @@
++/*
++ *
++ *  kernel/cpt/cpt_ubc.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/types.h>
++#include <ub/beancounter.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++
++cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	obj = cpt_object_add(CPT_OBJ_UBC, bc, ctx);
++	if (obj != NULL) {
++		if (obj->o_count == 1)
++			get_beancounter(bc);
++		if (bc->parent != NULL && obj->o_parent == NULL)
++			obj->o_parent = cpt_add_ubc(bc->parent, ctx);
++	}
++	return obj;
++}
++
++__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	obj = lookup_cpt_object(CPT_OBJ_UBC, bc, ctx);
++	if (obj == NULL) {
++		char buf[48];
++		print_ub_uid(bc, buf, sizeof(buf));
++		printk(KERN_ERR "CPT: unknown ub %s (%p)\n", buf, bc);
++		dump_stack();
++		return CPT_NULL;
++	}
++	return obj->o_pos;
++}
++
++static void dump_one_bc_parm(__u64 *dmp, struct ubparm *prm, int held)
++{
++	dmp[0] = (prm->barrier < UB_MAXVALUE ? prm->barrier : CPT_NULL);
++	dmp[1] = (prm->limit < UB_MAXVALUE ? prm->limit : CPT_NULL);
++	dmp[2] = (held ? prm->held : CPT_NULL);
++	dmp[3] = prm->maxheld;
++	dmp[4] = prm->minheld;
++	dmp[5] = prm->failcnt;
++}
++
++static int dump_one_bc(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	struct user_beancounter *bc;
++	struct cpt_beancounter_image *v;
++	int i;
++
++	bc = obj->o_obj;
++	v = cpt_get_buf(ctx);
++
++	v->cpt_next = CPT_NULL;
++	v->cpt_object = CPT_OBJ_UBC;
++	v->cpt_hdrlen = sizeof(*v);
++	v->cpt_content = CPT_CONTENT_VOID;
++
++	if (obj->o_parent != NULL)
++		v->cpt_parent = ((cpt_object_t *)obj->o_parent)->o_pos;
++	else
++		v->cpt_parent = CPT_NULL;
++	v->cpt_id = (obj->o_parent != NULL) ? bc->ub_uid : 0;
++	for (i = 0; i < UB_RESOURCES; i++)
++		dump_one_bc_parm(v->cpt_parms, bc->ub_parms, 0);
++	for (i = 0; i < UB_RESOURCES; i++)
++		dump_one_bc_parm(v->cpt_parms + UB_RESOURCES * 6,
++				bc->ub_store, 1);
++	memset(v->cpt_parms + UB_RESOURCES * 12, 0,
++			sizeof(v->cpt_parms)
++				- UB_RESOURCES * 12 * sizeof(v->cpt_parms[0]));
++
++	cpt_open_object(obj, ctx);
++	ctx->write(v, sizeof(*v), ctx);
++	cpt_close_object(ctx);
++
++	cpt_release_buf(ctx);
++	return 0;
++}
++
++int cpt_dump_ubc(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++	int skipped;
++	int top;
++
++	cpt_open_section(ctx, CPT_SECT_UBC);
++
++	do {
++		skipped = 0;
++		top = 0;
++		for_each_object(obj, CPT_OBJ_UBC) {
++			if (obj->o_parent == NULL)
++				top++;
++			if (obj->o_pos != CPT_NULL)
++				continue;
++			if (obj->o_parent != NULL &&
++			    ((cpt_object_t *)obj->o_parent)->o_pos == CPT_NULL)
++				skipped++;
++			else
++				dump_one_bc(obj, ctx);
++		}
++	} while (skipped && (top < 2));
++
++	cpt_close_section(ctx);
++	if (top > 1) {
++		eprintk_ctx("More than one top level ub exist");
++		return -EINVAL;
++	}
++		
++	return 0;
++}
++
++void cpt_finish_ubc(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_UBC)
++		put_beancounter(obj->o_obj);
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_ubc.h linux-2.6.16-026test015/kernel/cpt/cpt_ubc.h
+--- linux-2.6.16.orig/kernel/cpt/cpt_ubc.h	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_ubc.h	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,9 @@
++cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
++__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
++int cpt_dump_ubc(struct cpt_context *ctx);
++
++struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx);
++int rst_undump_ubc(struct cpt_context *ctx);
++
++void cpt_finish_ubc(struct cpt_context *ctx);
++void rst_finish_ubc(struct cpt_context *ctx);
+diff -upr linux-2.6.16.orig/kernel/cpt/cpt_x8664.S linux-2.6.16-026test015/kernel/cpt/cpt_x8664.S
+--- linux-2.6.16.orig/kernel/cpt/cpt_x8664.S	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/cpt_x8664.S	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,61 @@
++#define ASSEMBLY 1
++#include <linux/config.h>
++#include <linux/linkage.h>
++#include <asm/segment.h>
++#include <asm/smp.h>
++#include <asm/cache.h>
++#include <asm/errno.h>
++#include <asm/dwarf2.h>
++#include <asm/calling.h>
++#include <asm/msr.h>
++#include <asm/unistd.h>
++#include <asm/thread_info.h>
++#include <asm/hw_irq.h>
++#include <asm/errno.h>
++
++	.code64
++
++	.macro FAKE_STACK_FRAME child_rip
++	/* push in order ss, rsp, eflags, cs, rip */
++	xorq %rax, %rax
++	pushq %rax /* ss */
++	pushq %rax /* rsp */
++	pushq $(1<<9) /* eflags - interrupts on */
++	pushq $__KERNEL_CS /* cs */
++	pushq \child_rip /* rip */
++	pushq	%rax /* orig rax */
++	.endm
++
++	.macro UNFAKE_STACK_FRAME
++	addq $8*6, %rsp
++	.endm
++
++ENTRY(asm_kernel_thread)
++	FAKE_STACK_FRAME $child_rip
++	SAVE_ALL
++
++	# rdi: flags, rsi: usp, rdx: will be &pt_regs
++	movq %rdx,%rdi
++	orq  $0x00800000,%rdi
++	movq $-1, %rsi
++	movq %rsp, %rdx
++
++	xorl %r8d,%r8d
++	xorl %r9d,%r9d
++	pushq %rcx
++	call do_fork_pid
++	addq $8, %rsp
++	/* call do_fork */
++	movq %rax,RAX(%rsp)
++	xorl %edi,%edi
++	RESTORE_ALL
++	UNFAKE_STACK_FRAME
++	ret
++
++child_rip:
++	movq %rdi, %rax
++	movq %rsi, %rdi
++	call *%rax
++	xorq %rdi, %rdi
++	xorq %rsi, %rsi
++	call complete_and_exit
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_conntrack.c linux-2.6.16-026test015/kernel/cpt/rst_conntrack.c
+--- linux-2.6.16.orig/kernel/cpt/rst_conntrack.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_conntrack.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,294 @@
++/*
++ *
++ *  kernel/cpt/rst_conntrack.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/socket.h>
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <linux/rtnetlink.h>
++#include <linux/unistd.h>
++#include <linux/ve.h>
++#include <linux/vzcalluser.h>
++#include <linux/cpt_image.h>
++#include <linux/icmp.h>
++#include <linux/ip.h>
++
++#if defined(CONFIG_VE_IPTABLES) && \
++    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
++
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv4/ip_conntrack.h>
++#include <linux/netfilter_ipv4/ip_nat.h>
++#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
++#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++#include <linux/netfilter_ipv4/ip_conntrack_core.h>
++#include <linux/netfilter_ipv4/ip_nat_helper.h>
++#include <linux/netfilter_ipv4/ip_nat_core.h>
++
++#define ASSERT_READ_LOCK(x) do { } while (0)
++#define ASSERT_WRITE_LOCK(x) do { } while (0)
++
++#include <linux/netfilter_ipv4/listhelp.h>
++
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++
++struct ct_holder
++{
++	struct ct_holder *next;
++	struct ip_conntrack *ct;
++	int index;
++};
++
++static void decode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple, int dir)
++{
++	tuple->dst.ip = v->cpt_dst;
++	tuple->dst.u.all = v->cpt_dstport;
++	tuple->dst.protonum = v->cpt_protonum;
++	tuple->dst.dir = v->cpt_dir;
++	if (dir != tuple->dst.dir)
++		wprintk("dir != tuple->dst.dir\n");
++
++	tuple->src.ip = v->cpt_src;
++	tuple->src.u.all = v->cpt_srcport;
++}
++
++
++static int undump_expect_list(struct ip_conntrack *ct,
++			      struct cpt_ip_conntrack_image *ci,
++			      loff_t pos, struct ct_holder *ct_list,
++			      cpt_context_t *ctx)
++{
++	loff_t end;
++	int err;
++
++	end = pos + ci->cpt_next;
++	pos += ci->cpt_hdrlen;
++	while (pos < end) {
++		struct cpt_ip_connexpect_image v;
++		struct ip_conntrack_expect *exp;
++		struct ip_conntrack *sibling;
++
++		err = rst_get_object(CPT_OBJ_NET_CONNTRACK_EXPECT, pos, &v, ctx);
++		if (err)
++			return err;
++
++		sibling = NULL;
++		if (v.cpt_sibling_conntrack) {
++			struct ct_holder *c;
++
++			for (c = ct_list; c; c = c->next) {
++				if (c->index == v.cpt_sibling_conntrack) {
++					sibling = c->ct;
++					break;
++				}
++			}
++			if (!sibling) {
++				eprintk_ctx("lost sibling of expectation\n");
++				return -EINVAL;
++			}
++		}
++
++		write_lock_bh(&ip_conntrack_lock);
++
++		/* It is possible. Helper module could be just unregistered,
++		 * if expectation were on the list, it would be destroyed. */
++		if (ct->helper == NULL) {
++			write_unlock_bh(&ip_conntrack_lock);
++			dprintk_ctx("conntrack: no helper and non-trivial expectation\n");
++			continue;
++		}
++
++		exp = ip_conntrack_expect_alloc(NULL);
++		if (exp == NULL) {
++			write_unlock_bh(&ip_conntrack_lock);
++			return -ENOMEM;
++		}
++
++		if (ct->helper->timeout && !del_timer(&exp->timeout)) {
++			/* Dying already. We can do nothing. */
++			write_unlock_bh(&ip_conntrack_lock);
++			dprintk_ctx("conntrack expectation is dying\n");
++			continue;
++		}
++
++		decode_tuple(&v.cpt_tuple, &exp->tuple, 0);
++		decode_tuple(&v.cpt_mask, &exp->mask, 0);
++
++		exp->master = ct;
++		nf_conntrack_get(&ct->ct_general);
++		ip_conntrack_expect_insert(exp);
++#if 0
++		if (sibling) {
++			exp->sibling = sibling;
++			sibling->master = exp;
++			LIST_DELETE(&ve_ip_conntrack_expect_list, exp);
++			ct->expecting--;
++			nf_conntrack_get(&master_ct(sibling)->infos[0]);
++		} else
++#endif
++		if (ct->helper->timeout) {
++			exp->timeout.expires = jiffies + v.cpt_timeout;
++			add_timer(&exp->timeout);
++		}
++		write_unlock_bh(&ip_conntrack_lock);
++
++		pos += v.cpt_next;
++	}
++	return 0;
++}
++
++static int undump_one_ct(struct cpt_ip_conntrack_image *ci, loff_t pos,
++			 struct ct_holder **ct_list, cpt_context_t *ctx)
++{
++	int err = 0;
++	struct ip_conntrack *conntrack;
++	struct ct_holder *c;
++	struct ip_conntrack_tuple orig, repl;
++
++	c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
++	if (c == NULL)
++		return -ENOMEM;
++
++	decode_tuple(&ci->cpt_tuple[0], &orig, 0);
++	decode_tuple(&ci->cpt_tuple[1], &repl, 1);
++
++	conntrack = ip_conntrack_alloc(&orig, &repl, get_exec_env()->_ip_conntrack->ub);
++	if (!conntrack || IS_ERR(conntrack)) {
++		kfree(c);
++		return -ENOMEM;
++	}
++
++	c->ct = conntrack;
++	c->next = *ct_list;
++	*ct_list = c;
++	c->index = ci->cpt_index;
++
++	decode_tuple(&ci->cpt_tuple[0], &conntrack->tuplehash[0].tuple, 0);
++	decode_tuple(&ci->cpt_tuple[1], &conntrack->tuplehash[1].tuple, 1);
++
++	conntrack->status = ci->cpt_status;
++
++	memcpy(&conntrack->proto, ci->cpt_proto_data, sizeof(conntrack->proto));
++	memcpy(&conntrack->help, ci->cpt_help_data, sizeof(conntrack->help));
++
++#ifdef CONFIG_IP_NF_NAT_NEEDED
++#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
++	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
++	conntrack->nat.masq_index = ci->cpt_masq_index;
++#endif
++	if (ci->cpt_initialized) {
++		conntrack->nat.info.seq[0].correction_pos = ci->cpt_nat_seq[0].cpt_correction_pos;
++		conntrack->nat.info.seq[0].offset_before = ci->cpt_nat_seq[0].cpt_offset_before;
++		conntrack->nat.info.seq[0].offset_after = ci->cpt_nat_seq[0].cpt_offset_after;
++		conntrack->nat.info.seq[1].correction_pos = ci->cpt_nat_seq[1].cpt_correction_pos;
++		conntrack->nat.info.seq[1].offset_before = ci->cpt_nat_seq[1].cpt_offset_before;
++		conntrack->nat.info.seq[1].offset_after = ci->cpt_nat_seq[1].cpt_offset_after;
++	}
++	if (conntrack->status & IPS_NAT_DONE_MASK)
++		ip_nat_hash_conntrack(conntrack);
++#endif
++
++	write_lock_bh(&ip_conntrack_lock);
++
++	if (ci->cpt_ct_helper) {
++		conntrack->helper = ip_conntrack_helper_find_get(&conntrack->tuplehash[1].tuple);
++		if (conntrack->helper == NULL) {
++			eprintk_ctx("conntrack: cannot find helper, some module is not loaded\n");
++			err = -EINVAL;
++		}
++	}
++
++	ip_conntrack_hash_insert(conntrack);
++	conntrack->timeout.expires = jiffies + ci->cpt_timeout;
++
++	write_unlock_bh(&ip_conntrack_lock);
++
++	if (err == 0 && ci->cpt_next > ci->cpt_hdrlen)
++		err = undump_expect_list(conntrack, ci, pos, *ct_list, ctx);
++
++	return err;
++}
++
++int rst_restore_ip_conntrack(struct cpt_context * ctx)
++{
++	int err = 0;
++	loff_t sec = ctx->sections[CPT_SECT_NET_CONNTRACK];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++	struct cpt_ip_conntrack_image ci;
++	struct ct_holder *c;
++	struct ct_holder *ct_list = NULL;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	if (sizeof(ci.cpt_proto_data) != sizeof(union ip_conntrack_proto)) {
++		eprintk_ctx("conntrack module ct->proto version mismatch\n");
++		return -EINVAL;
++	}
++	if (sizeof(ci.cpt_help_data) != sizeof(union ip_conntrack_help)) {
++		eprintk_ctx("conntrack module ct->help version mismatch\n");
++		return -EINVAL;
++	}
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_NET_CONNTRACK || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		err = rst_get_object(CPT_OBJ_NET_CONNTRACK, sec, &ci, ctx);
++		if (err)
++			break;
++		err = undump_one_ct(&ci, sec, &ct_list, ctx);
++		if (err)
++			break;
++		sec += ci.cpt_next;
++	}
++
++	while ((c = ct_list) != NULL) {
++		ct_list = c->next;
++		if (c->ct)
++			add_timer(&c->ct->timeout);
++		kfree(c);
++	}
++
++	return err;
++}
++
++#else
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++
++int rst_restore_ip_conntrack(struct cpt_context * ctx)
++{
++	if (ctx->sections[CPT_SECT_NET_CONNTRACK] != CPT_NULL)
++		return -EINVAL;
++	return 0;
++}
++
++#endif
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_context.c linux-2.6.16-026test015/kernel/cpt/rst_context.c
+--- linux-2.6.16.orig/kernel/cpt/rst_context.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_context.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,315 @@
++/*
++ *
++ *  kernel/cpt/rst_context.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/pagemap.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++
++static ssize_t file_read(void *addr, size_t count, struct cpt_context *ctx)
++{
++	mm_segment_t oldfs;
++	ssize_t err = -EBADF;
++	struct file *file = ctx->file;
++
++	oldfs = get_fs(); set_fs(KERNEL_DS);
++	if (file)
++		err = file->f_op->read(file, addr, count, &file->f_pos);
++	set_fs(oldfs);
++	if (err != count)
++		return err >= 0 ? -EIO : err;
++	return 0;
++}
++
++static ssize_t file_pread(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
++{
++	mm_segment_t oldfs;
++	ssize_t err = -EBADF;
++	struct file *file = ctx->file;
++
++	oldfs = get_fs(); set_fs(KERNEL_DS);
++	if (file)
++		err = file->f_op->read(file, addr, count, &pos);
++	set_fs(oldfs);
++	if (err != count)
++		return err >= 0 ? -EIO : err;
++	return 0;
++}
++
++static void file_align(struct cpt_context *ctx)
++{
++	struct file *file = ctx->file;
++
++	if (file)
++		file->f_pos = CPT_ALIGN(file->f_pos);
++}
++
++int rst_get_section(int type, struct cpt_context *ctx, loff_t *start, loff_t *end)
++{
++	struct cpt_section_hdr hdr;
++	int err;
++	loff_t pos;
++
++	pos = ctx->sections[type];
++	*start = *end = pos;
++
++	if (pos != CPT_NULL) {
++		if ((err = ctx->pread(&hdr, sizeof(hdr), ctx, pos)) != 0)
++			return err;
++		if (hdr.cpt_section != type || hdr.cpt_hdrlen < sizeof(hdr))
++			return -EINVAL;
++		*start = pos + hdr.cpt_hdrlen;
++		*end = pos + hdr.cpt_next;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(rst_get_section);
++
++void rst_context_init(struct cpt_context *ctx)
++{
++	int i;
++
++	memset(ctx, 0, sizeof(*ctx));
++
++	init_MUTEX(&ctx->main_sem);
++	ctx->refcount = 1;
++
++	ctx->current_section = -1;
++	ctx->current_object = -1;
++	ctx->pagesize = PAGE_SIZE;
++	ctx->read = file_read;
++	ctx->pread = file_pread;
++	ctx->align = file_align;
++	for (i=0; i < CPT_SECT_MAX; i++)
++		ctx->sections[i] = CPT_NULL;
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	init_completion(&ctx->pgin_notify);
++#endif
++	cpt_object_init(ctx);
++}
++
++static int parse_sections(loff_t start, loff_t end, cpt_context_t *ctx)
++{
++	struct cpt_section_hdr h;
++
++	while (start < end) {
++		int err;
++
++		err = ctx->pread(&h, sizeof(h), ctx, start);
++		if (err)
++			return err;
++		if (h.cpt_hdrlen < sizeof(h) ||
++		    h.cpt_next < h.cpt_hdrlen ||
++		    start + h.cpt_next > end)
++			return -EINVAL;
++		if (h.cpt_section >= CPT_SECT_MAX)
++			return -EINVAL;
++		ctx->sections[h.cpt_section] = start;
++		start += h.cpt_next;
++	}
++	return 0;
++}
++
++int rst_open_dumpfile(struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_major_tail *v;
++	struct cpt_major_hdr  h;
++	unsigned long size;
++
++	err = -EBADF;
++	if (!ctx->file)
++		goto err_out;
++
++	err = -ENOMEM;
++	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
++	if (ctx->tmpbuf == NULL)
++		goto err_out;
++	__cpt_release_buf(ctx);
++
++	size = ctx->file->f_dentry->d_inode->i_size;
++
++	if (size & 7) {
++		err = -EINVAL;
++		goto err_out;
++	}
++	if (size < sizeof(struct cpt_major_hdr) +
++	    sizeof(struct cpt_major_tail)) {
++		err = -EINVAL;
++		goto err_out;
++	}
++	err = ctx->pread(&h, sizeof(h), ctx, 0);
++	if (err) {
++		eprintk_ctx("too short image 1 %d\n", err);
++		goto err_out;
++	}
++	if (h.cpt_signature[0] != CPT_SIGNATURE0 ||
++	    h.cpt_signature[1] != CPT_SIGNATURE1 ||
++	    h.cpt_signature[2] != CPT_SIGNATURE2 ||
++	    h.cpt_signature[3] != CPT_SIGNATURE3) {
++		err = -EINVAL;
++		goto err_out;
++	}
++	if (h.cpt_hz != HZ) {
++		err = -EINVAL;
++		eprintk_ctx("HZ mismatch: %d != %d\n", h.cpt_hz, HZ);
++		goto err_out;
++	}
++	ctx->virt_jiffies64 = h.cpt_start_jiffies64;
++	ctx->start_time.tv_sec = h.cpt_start_sec;
++	ctx->start_time.tv_nsec = h.cpt_start_nsec;
++	ctx->kernel_config_flags = h.cpt_kernel_config[0];
++	ctx->iptables_mask = h.cpt_iptables_mask;
++	ctx->image_version = h.cpt_image_version;
++
++	v = cpt_get_buf(ctx);
++	err = ctx->pread(v, sizeof(*v), ctx, size - sizeof(*v));
++	if (err) {
++		eprintk_ctx("too short image 2 %d\n", err);
++		cpt_release_buf(ctx);
++		goto err_out;
++	}
++	if (v->cpt_signature[0] != CPT_SIGNATURE0 ||
++	    v->cpt_signature[1] != CPT_SIGNATURE1 ||
++	    v->cpt_signature[2] != CPT_SIGNATURE2 ||
++	    v->cpt_signature[3] != CPT_SIGNATURE3 ||
++	    v->cpt_nsect != CPT_SECT_MAX_INDEX) {
++		err = -EINVAL;
++		cpt_release_buf(ctx);
++		goto err_out;
++	}
++	if ((err = parse_sections(h.cpt_hdrlen, size - sizeof(*v) - sizeof(struct cpt_section_hdr), ctx)) < 0) {
++		cpt_release_buf(ctx);
++		goto err_out;
++	}
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	ctx->lazypages = v->cpt_lazypages;
++#endif
++	ctx->tasks64 = v->cpt_64bit;
++	cpt_release_buf(ctx);
++	return 0;
++
++err_out:
++	if (ctx->tmpbuf) {
++		free_page((unsigned long)ctx->tmpbuf);
++		ctx->tmpbuf = NULL;
++	}
++	return err;
++}
++
++void rst_close_dumpfile(struct cpt_context *ctx)
++{
++	if (ctx->file) {
++		fput(ctx->file);
++		ctx->file = NULL;
++	}
++	if (ctx->tmpbuf) {
++		free_page((unsigned long)ctx->tmpbuf);
++		ctx->tmpbuf = NULL;
++	}
++}
++
++int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_object_hdr *hdr = tmp;
++	err = ctx->pread(hdr, sizeof(struct cpt_object_hdr), ctx, pos);
++	if (err)
++		return err;
++	if (type > 0 && type != hdr->cpt_object)
++		return -EINVAL;
++	if (hdr->cpt_hdrlen > hdr->cpt_next)
++		return -EINVAL;
++	if (hdr->cpt_hdrlen < sizeof(struct cpt_object_hdr))
++		return -EINVAL;
++	if (size < sizeof(*hdr))
++		return -EINVAL;
++	if (size > hdr->cpt_hdrlen)
++		size = hdr->cpt_hdrlen;
++	if (size > sizeof(*hdr))
++		err = ctx->pread(hdr+1, size - sizeof(*hdr),
++				 ctx, pos + sizeof(*hdr));
++	return err;
++}
++EXPORT_SYMBOL(_rst_get_object);
++
++void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx)
++{
++	int err;
++	void *tmp;
++	struct cpt_object_hdr hdr;
++	err = ctx->pread(&hdr, sizeof(hdr), ctx, pos);
++	if (err)
++		return NULL;
++	if (type > 0 && type != hdr.cpt_object)
++		return NULL;
++	if (hdr.cpt_hdrlen > hdr.cpt_next)
++		return NULL;
++	if (hdr.cpt_hdrlen < sizeof(struct cpt_object_hdr))
++		return NULL;
++	tmp = kmalloc(hdr.cpt_hdrlen, GFP_KERNEL);
++	if (!tmp)
++		return NULL;
++	err = ctx->pread(tmp, hdr.cpt_hdrlen, ctx, pos);
++	if (!err)
++		return tmp;
++	kfree(tmp);
++	return NULL;
++}
++EXPORT_SYMBOL(__rst_get_object);
++
++__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_object_hdr hdr;
++	__u8 *name;
++
++	err = rst_get_object(CPT_OBJ_NAME, *pos_p, &hdr, ctx);
++	if (err)
++		return NULL;
++	if (hdr.cpt_next - hdr.cpt_hdrlen > PAGE_SIZE)
++		return NULL;
++	name = (void*)__get_free_page(GFP_KERNEL);
++	if (!name)
++		return NULL;
++	err = ctx->pread(name, hdr.cpt_next - hdr.cpt_hdrlen,
++		   ctx, *pos_p + hdr.cpt_hdrlen);
++	if (err) {
++		free_page((unsigned long)name);
++		return NULL;
++	}
++	*pos_p += hdr.cpt_next;
++	return name;
++}
++
++__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx)
++{
++	return __rst_get_name(&pos, ctx);
++}
++
++void rst_put_name(__u8 *name, struct cpt_context *ctx)
++{
++	unsigned long addr = (unsigned long)name;
++
++	if (addr)
++		free_page(addr&~(PAGE_SIZE-1));
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_epoll.c linux-2.6.16-026test015/kernel/cpt/rst_epoll.c
+--- linux-2.6.16.orig/kernel/cpt/rst_epoll.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_epoll.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,173 @@
++/*
++ *
++ *  kernel/cpt/rst_epoll.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/pipe_fs_i.h>
++#include <linux/mman.h>
++#include <linux/namespace.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/smp_lock.h>
++#include <asm/uaccess.h>
++#include <linux/vzcalluser.h>
++#include <linux/eventpoll.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_files.h"
++#include "cpt_kernel.h"
++#include "cpt_fsmagic.h"
++#include "cpt_syscalls.h"
++
++/* Those funcations are static in fs/eventpoll.c */
++extern struct file_operations eventpoll_fops;
++extern int ep_insert(struct eventpoll *ep, struct epoll_event *event,
++		     struct file *tfile, int fd);
++extern struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
++extern void ep_release_epitem(struct epitem *epi);
++
++
++struct file *cpt_open_epolldev(struct cpt_file_image *fi,
++			       unsigned flags,
++			       struct cpt_context *ctx)
++{
++	struct file *file;
++	int efd;
++
++	/* Argument "size" is ignored, use just 1 */
++	efd = sys_epoll_create(1);
++	if (efd < 0)
++		return ERR_PTR(efd);
++
++	file = fget(efd);
++	sys_close(efd);
++	return file;
++}
++
++static int restore_one_epoll(cpt_object_t *obj,
++			     loff_t pos,
++			     struct cpt_epoll_image *ebuf,
++			     cpt_context_t *ctx)
++{
++	int err = 0;
++	loff_t endpos;
++	struct file *file = obj->o_obj;
++	struct eventpoll *ep;
++
++	if (file->f_op != &eventpoll_fops) {
++		eprintk_ctx("bad epoll file\n");
++		return -EINVAL;
++	}
++
++	ep = file->private_data;
++
++	if (unlikely(ep == NULL)) {
++		eprintk_ctx("bad epoll device\n");
++		return -EINVAL;
++	}
++
++	endpos = pos + ebuf->cpt_next;
++	pos += ebuf->cpt_hdrlen;
++	while (pos < endpos) {
++		struct cpt_epoll_file_image efi;
++		struct epoll_event epds;
++		
++		cpt_object_t *tobj;
++
++		err = rst_get_object(CPT_OBJ_EPOLL_FILE, pos, &efi, ctx);
++		if (err)
++			return err;
++		tobj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, efi.cpt_file, ctx);
++		if (!tobj) {
++			eprintk_ctx("epoll file not found\n");
++			return -EINVAL;
++		}
++		epds.events = efi.cpt_events;
++		epds.data = efi.cpt_data;
++		down_write(&ep->sem);
++		err = ep_insert(ep, &epds, tobj->o_obj, efi.cpt_fd);
++		if (!err) {
++			struct epitem *epi;
++			epi = ep_find(ep, tobj->o_obj, efi.cpt_fd);
++			if (epi) {
++				epi->revents = efi.cpt_revents;
++				if (efi.cpt_ready) {
++					unsigned long flags;
++					write_lock_irqsave(&ep->lock, flags);
++					if (list_empty(&epi->rdllink))
++						list_add_tail(&epi->rdllink, &ep->rdllist);
++					write_unlock_irqrestore(&ep->lock, flags);
++				}
++				ep_release_epitem(epi);
++			}
++		}
++		up_write(&ep->sem);
++		if (err)
++			break;
++		pos += efi.cpt_next;
++	}
++	return err;
++}
++
++int rst_eventpoll(cpt_context_t *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_EPOLL];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_EPOLL || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		cpt_object_t *obj;
++		struct cpt_epoll_image *ebuf = cpt_get_buf(ctx);
++		err = rst_get_object(CPT_OBJ_EPOLL, sec, ebuf, ctx);
++		if (err) {
++			cpt_release_buf(ctx);
++			return err;
++		}
++		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, ebuf->cpt_file, ctx);
++		if (obj == NULL) {
++			eprintk_ctx("cannot find epoll file object\n");
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++		err = restore_one_epoll(obj, sec, ebuf, ctx);
++		cpt_release_buf(ctx);
++		if (err)
++			return err;
++		sec += ebuf->cpt_next;
++	}
++
++	return 0;
++	
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_files.c linux-2.6.16-026test015/kernel/cpt/rst_files.c
+--- linux-2.6.16.orig/kernel/cpt/rst_files.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_files.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,1453 @@
++/*
++ *
++ *  kernel/cpt/rst_files.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/pipe_fs_i.h>
++#include <linux/mman.h>
++#include <linux/mount.h>
++#include <linux/tty.h>
++#include <linux/namei.h>
++#include <linux/vmalloc.h>
++#include <linux/smp_lock.h>
++#include <linux/vmalloc.h>
++#include <linux/pagemap.h>
++#include <asm/uaccess.h>
++#include <ub/ub_mem.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_files.h"
++#include "cpt_kernel.h"
++#include "cpt_fsmagic.h"
++
++#include "cpt_syscalls.h"
++
++
++struct filejob {
++	struct filejob *next;
++	int	pid;
++	loff_t	fdi;
++};
++
++static int rst_filejob_queue(loff_t pos, cpt_context_t *ctx)
++{
++	struct filejob *j;
++
++	j = kmalloc(sizeof(*j), GFP_KERNEL);
++	if (j == NULL)
++		return -ENOMEM;
++	j->pid = current->pid;
++	j->fdi = pos;
++	j->next = ctx->filejob_queue;
++	ctx->filejob_queue = j;
++	return 0;
++}
++
++static void _anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
++{
++	struct page *page = buf->page;
++
++	if (info->tmp_page) {
++		__free_page(page);
++	} else {
++		info->tmp_page = page;
++	}
++	module_put(THIS_MODULE);
++}
++
++static void *_anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
++{
++	return kmap(buf->page);
++}
++
++static void _anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
++{
++	kunmap(buf->page);
++}
++
++static struct pipe_buf_operations _anon_pipe_buf_ops = {
++	.can_merge = 1,
++	.map = _anon_pipe_buf_map,
++	.unmap = _anon_pipe_buf_unmap,
++	.release = _anon_pipe_buf_release,
++};
++
++/* Sorta ugly... Multiple readers/writers of named pipe rewrite buffer
++ * many times. We need to mark it in CPT_OBJ_INODE table in some way.
++ */
++static int fixup_pipe_data(struct file *file, struct cpt_file_image *fi,
++			   struct cpt_context *ctx)
++{
++	struct inode *ino = file->f_dentry->d_inode;
++	struct cpt_inode_image ii;
++	struct cpt_obj_bits b;
++	struct pipe_inode_info *info;
++	int err;
++	int count;
++
++	if (!S_ISFIFO(ino->i_mode)) {
++		eprintk_ctx("fixup_pipe_data: not a pipe %Ld\n", fi->cpt_inode);
++		return -EINVAL;
++	}
++	if (fi->cpt_inode == CPT_NULL)
++		return 0;
++
++	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
++	if (err)
++		return err;
++
++	if (ii.cpt_next <= ii.cpt_hdrlen)
++		return 0;
++
++	err = rst_get_object(CPT_OBJ_BITS, fi->cpt_inode + ii.cpt_hdrlen, &b, ctx);
++	if (err)
++		return err;
++
++	if (b.cpt_size == 0)
++		return 0;
++
++	mutex_lock(PIPE_MUTEX(*ino));
++	info = ino->i_pipe;
++	if (info->nrbufs) {
++		mutex_unlock(PIPE_MUTEX(*ino));
++		eprintk("pipe buffer is restored already\n");
++		return -EINVAL;
++	}
++	info->curbuf = 0;
++	count = 0;
++	while (count < b.cpt_size) {
++		struct pipe_buffer *buf = info->bufs + info->nrbufs;
++		void * addr;
++		int chars;
++
++		chars = b.cpt_size - count;
++		if (chars > PAGE_SIZE)
++			chars = PAGE_SIZE;
++		if (!try_module_get(THIS_MODULE)) {
++			err = -EBUSY;
++			break;
++		}
++
++		buf->page = alloc_page(GFP_HIGHUSER);
++		if (buf->page == NULL) {
++			err = -ENOMEM;
++			break;
++		}
++		buf->ops = &_anon_pipe_buf_ops;
++		buf->offset = 0;
++		buf->len = chars;
++		info->nrbufs++;
++		addr = kmap(buf->page);
++		err = ctx->pread(addr, chars, ctx,
++				 fi->cpt_inode + ii.cpt_hdrlen + b.cpt_hdrlen + count);
++		if (err)
++			break;
++		count += chars;
++	}
++	mutex_unlock(PIPE_MUTEX(*ino));
++
++	return err;
++}
++
++static int make_flags(struct cpt_file_image *fi)
++{
++	int flags = O_NOFOLLOW;
++	switch (fi->cpt_mode&(FMODE_READ|FMODE_WRITE)) {
++	case FMODE_READ|FMODE_WRITE:
++		flags |= O_RDWR; break;
++	case FMODE_WRITE:
++		flags |= O_WRONLY; break;
++	case FMODE_READ:
++		flags |= O_RDONLY; break;
++	default: break;
++	}
++	flags |= fi->cpt_flags&~(O_ACCMODE|O_CREAT|O_TRUNC|O_EXCL|FASYNC);
++	flags |= O_NONBLOCK|O_NOCTTY;
++	return flags;
++}
++
++static struct file *open_pipe(char *name,
++			      struct cpt_file_image *fi,
++			      unsigned flags,
++			      struct cpt_context *ctx)
++{
++	int err;
++	cpt_object_t *obj;
++	struct cpt_inode_image ii;
++	struct file *rf, *wf;
++
++	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
++	if (err)
++		return ERR_PTR(err);
++
++	if (ii.cpt_sb == FSMAGIC_PIPEFS) {
++		int pfd[2];
++
++		if ((err = sc_pipe(pfd)) < 0)
++			return ERR_PTR(err);
++
++		rf = fcheck(pfd[0]);
++		wf = fcheck(pfd[1]);
++		get_file(rf);
++		get_file(wf);
++		sc_close(pfd[0]);
++		sc_close(pfd[1]);
++
++		if (fi->cpt_mode&FMODE_READ) {
++			struct file *tf;
++			tf = wf; wf = rf; rf = tf;
++		}
++	} else {
++		if (fi->cpt_mode&FMODE_READ) {
++			rf = filp_open(name, flags, 0);
++			if (IS_ERR(rf)) {
++				dprintk_ctx("filp_open\n");
++				return rf;
++			}
++			dprintk_ctx(CPT_FID "open RDONLY fifo ino %Ld %p %x\n", CPT_TID(current), fi->cpt_inode, rf, rf->f_dentry->d_inode->i_mode);
++			return rf;
++		}
++
++		dprintk_ctx(CPT_FID "open WRONLY fifo ino %Ld\n", CPT_TID(current), fi->cpt_inode);
++
++		rf = filp_open(name, O_RDWR|O_NONBLOCK, 0);
++		if (IS_ERR(rf))
++			return rf;
++		wf = dentry_open(dget(rf->f_dentry),
++				 mntget(rf->f_vfsmnt), flags);
++	}
++
++	/* Add pipe inode to obj table. */
++	obj = cpt_object_add(CPT_OBJ_INODE, wf->f_dentry->d_inode, ctx);
++	if (obj == NULL) {
++		fput(rf); fput(wf);
++		return ERR_PTR(-ENOMEM);
++	}
++	cpt_obj_setpos(obj, fi->cpt_inode, ctx);
++	obj->o_parent = rf;
++
++	/* Add another side of pipe to obj table, it will not be used
++	 * (o_pos = PT_NULL), another processes opeining pipe will find
++	 * inode and open it with dentry_open(). */
++	obj = cpt_object_add(CPT_OBJ_FILE, rf, ctx);
++	if (obj == NULL) {
++		fput(wf);
++		return ERR_PTR(-ENOMEM);
++	}
++	return wf;
++}
++
++static struct file *open_special(struct cpt_file_image *fi,
++				 unsigned flags,
++				 int deleted,
++				 struct cpt_context *ctx)
++{
++	struct cpt_inode_image *ii;
++	struct file *file;
++
++	/* Directories and named pipes are not special actually */
++	if (S_ISDIR(fi->cpt_i_mode) || S_ISFIFO(fi->cpt_i_mode))
++		return NULL;
++
++	/* No support for block devices at the moment. */
++	if (S_ISBLK(fi->cpt_i_mode))
++		return ERR_PTR(-EINVAL);
++
++	if (S_ISSOCK(fi->cpt_i_mode)) {
++		eprintk_ctx("bug: socket is not open\n");
++		return ERR_PTR(-EINVAL);
++	}
++
++	/* Support only (some) character devices at the moment. */
++	if (!S_ISCHR(fi->cpt_i_mode))
++		return ERR_PTR(-EINVAL);
++
++	ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
++	if (ii == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	/* Do not worry about this right now. /dev/null,zero,*random are here.
++	 * To prohibit at least /dev/mem?
++	 */
++	if (MAJOR(ii->cpt_rdev) == MEM_MAJOR) {
++		kfree(ii);
++		return NULL;
++	}
++
++	file = rst_open_tty(fi, ii, flags, ctx);
++	kfree(ii);
++	return file;
++}
++
++static int restore_posix_lock(struct file *file, struct cpt_flock_image *fli, cpt_context_t *ctx)
++{
++	struct file_lock lock;
++	cpt_object_t *obj;
++
++	memset(&lock, 0, sizeof(lock));
++	lock.fl_type = fli->cpt_type;
++	lock.fl_flags = fli->cpt_flags & ~FL_SLEEP;
++	lock.fl_start = fli->cpt_start;
++	lock.fl_end = fli->cpt_end;
++	obj = lookup_cpt_obj_byindex(CPT_OBJ_FILES, fli->cpt_owner, ctx);
++	if (!obj) {
++		eprintk_ctx("unknown lock owner %d\n", (int)fli->cpt_owner);
++		return -EINVAL;
++	}
++	lock.fl_owner = obj->o_obj;
++	lock.fl_pid = vpid_to_pid(fli->cpt_pid);
++	if (lock.fl_pid < 0) {
++		eprintk_ctx("unknown lock pid %d\n", lock.fl_pid);
++		return -EINVAL;
++	}
++	lock.fl_file = file;
++
++	if (lock.fl_owner == NULL)
++		eprintk_ctx("no lock owner\n");
++	return posix_lock_file(file, &lock);
++}
++
++static int restore_flock(struct file *file, struct cpt_flock_image *fli,
++			 cpt_context_t *ctx)
++{
++	int cmd, err, fd;
++	fd = get_unused_fd();
++	if (fd < 0) {
++		eprintk_ctx("BSD flock cannot be restored\n");
++		return fd;
++	}
++	get_file(file);
++	fd_install(fd, file);
++	if (fli->cpt_type == F_RDLCK) {
++		cmd = LOCK_SH;
++	} else if (fli->cpt_type == F_WRLCK) {
++		cmd = LOCK_EX;
++	} else {
++		eprintk_ctx("flock flavor is unknown: %u\n", fli->cpt_type);
++		sc_close(fd);
++		return -EINVAL;
++	}
++
++	err = sc_flock(fd, LOCK_NB | cmd);
++	sc_close(fd);
++	return err;
++}
++
++
++static int fixup_posix_locks(struct file *file,
++			     struct cpt_file_image *fi,
++			     loff_t pos, struct cpt_context *ctx)
++{
++	int err;
++	loff_t end;
++	struct cpt_flock_image fli;
++
++	end = pos + fi->cpt_next;
++	pos += fi->cpt_hdrlen;
++	while (pos < end) {
++		err = rst_get_object(-1, pos, &fli, ctx);
++		if (err)
++			return err;
++		if (fli.cpt_object == CPT_OBJ_FLOCK &&
++		    (fli.cpt_flags&FL_POSIX)) {
++			err = restore_posix_lock(file, &fli, ctx);
++			if (err)
++				return err;
++			dprintk_ctx("posix lock restored\n");
++		}
++		pos += fli.cpt_next;
++	}
++	return 0;
++}
++
++int rst_posix_locks(struct cpt_context *ctx)
++{
++	int err;
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_FILE) {
++		struct file *file = obj->o_obj;
++		struct cpt_file_image fi;
++
++		if (obj->o_pos == CPT_NULL)
++			continue;
++
++		err = rst_get_object(CPT_OBJ_FILE, obj->o_pos, &fi, ctx);
++		if (err < 0)
++			return err;
++		if (fi.cpt_next > fi.cpt_hdrlen)
++			fixup_posix_locks(file, &fi, obj->o_pos, ctx);
++	}
++	return 0;
++}
++
++static int fixup_flocks(struct file *file,
++			struct cpt_file_image *fi,
++			loff_t pos, struct cpt_context *ctx)
++{
++	int err;
++	loff_t end;
++	struct cpt_flock_image fli;
++
++	end = pos + fi->cpt_next;
++	pos += fi->cpt_hdrlen;
++	while (pos < end) {
++		err = rst_get_object(-1, pos, &fli, ctx);
++		if (err)
++			return err;
++		if (fli.cpt_object == CPT_OBJ_FLOCK &&
++		    (fli.cpt_flags&FL_FLOCK)) {
++			err = restore_flock(file, &fli, ctx);
++			if (err)
++				return err;
++			dprintk_ctx("bsd lock restored\n");
++		}
++		pos += fli.cpt_next;
++	}
++	return 0;
++}
++
++
++static int fixup_reg_data(struct file *file, loff_t pos, loff_t end,
++			  struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_page_block pgb;
++	ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
++
++	do_write = file->f_op->write;
++	if (do_write == NULL) {
++		eprintk_ctx("no write method. Cannot restore contents of the file.\n");
++		return -EINVAL;
++	}
++
++	atomic_inc(&file->f_count);
++
++	while (pos < end) {
++		loff_t opos;
++		loff_t ipos;
++		int count;
++
++		err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
++		if (err)
++			goto out;
++		dprintk_ctx("restoring file data block: %08x-%08x\n",
++		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
++		ipos = pos + pgb.cpt_hdrlen;
++		opos = pgb.cpt_start;
++		count = pgb.cpt_end-pgb.cpt_start;
++		while (count > 0) {
++			mm_segment_t oldfs;
++			int copy = count;
++
++			if (copy > PAGE_SIZE)
++				copy = PAGE_SIZE;
++			(void)cpt_get_buf(ctx);
++			oldfs = get_fs(); set_fs(KERNEL_DS);
++			err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
++			set_fs(oldfs);
++			if (err) {
++				__cpt_release_buf(ctx);
++				goto out;
++			}
++			if (!(file->f_mode & FMODE_WRITE) ||
++			    (file->f_flags&O_DIRECT)) {
++				fput(file);
++				file = dentry_open(dget(file->f_dentry),
++						   mntget(file->f_vfsmnt), O_WRONLY);
++				if (IS_ERR(file)) {
++					__cpt_release_buf(ctx);
++					return PTR_ERR(file);
++				}
++			}
++			oldfs = get_fs(); set_fs(KERNEL_DS);
++			ipos += copy;
++			err = do_write(file, ctx->tmpbuf, copy, &opos);
++			set_fs(oldfs);
++			__cpt_release_buf(ctx);
++			if (err != copy) {
++				if (err >= 0)
++					err = -EIO;
++				goto out;
++			}
++			count -= copy;
++		}
++		pos += pgb.cpt_next;
++	}
++	err = 0;
++
++out:
++	fput(file);
++	return err;
++}
++
++
++static int fixup_file_content(struct file **file_p, struct cpt_file_image *fi,
++			      struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_inode_image ii;
++	struct file *file = *file_p;
++	struct iattr newattrs;
++
++	if (!S_ISREG(fi->cpt_i_mode))
++		return 0;
++
++	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
++	if (err)
++		return err;
++
++	if (file == NULL) {
++		file = shmem_file_setup("dev/zero", ii.cpt_size, 0);
++		if (IS_ERR(file))
++			return PTR_ERR(file);
++		*file_p = file;
++	}
++
++	if (ii.cpt_next > ii.cpt_hdrlen) {
++		err = fixup_reg_data(file, fi->cpt_inode+ii.cpt_hdrlen,
++				     fi->cpt_inode+ii.cpt_next, ctx);
++		if (err)
++			return err;
++	}
++
++	mutex_lock(&file->f_dentry->d_inode->i_mutex);
++	/* stage 1 - update size like do_truncate does */
++	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
++	newattrs.ia_size = ii.cpt_size;
++	cpt_timespec_import(&newattrs.ia_ctime, ii.cpt_ctime);
++	err = notify_change(file->f_dentry, &newattrs);
++	if (err)
++		goto out;
++
++	/* stage 2 - update times */
++	newattrs.ia_valid = ATTR_MTIME | ATTR_ATIME |
++		ATTR_ATIME_SET | ATTR_MTIME_SET;
++	cpt_timespec_import(&newattrs.ia_atime, ii.cpt_atime);
++	cpt_timespec_import(&newattrs.ia_mtime, ii.cpt_mtime);
++	err = notify_change(file->f_dentry, &newattrs);
++
++out:
++	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
++	return err;
++}
++
++static int fixup_file_flags(struct file *file, struct cpt_file_image *fi,
++			    int was_dentry_open, loff_t pos,
++			    cpt_context_t *ctx)
++{
++	if (fi->cpt_pos != file->f_pos) {
++		int err = -ESPIPE;
++		if (file->f_op->llseek)
++			err = file->f_op->llseek(file, fi->cpt_pos, 0);
++		if (err < 0) {
++			dprintk_ctx("file %Ld lseek %Ld - %Ld\n", pos, file->f_pos, fi->cpt_pos);
++			file->f_pos = fi->cpt_pos;
++		}
++	}
++	file->f_uid = fi->cpt_uid;
++	file->f_gid = fi->cpt_gid;
++	file->f_owner.pid = 0;
++	if (fi->cpt_fown_pid) {
++		file->f_owner.pid = comb_vpid_to_pid(fi->cpt_fown_pid);
++		if (file->f_owner.pid == 0) {
++			wprintk_ctx("fixup_file_flags: owner %d does not exist anymore\n", file->f_owner.pid);
++			return -EINVAL;
++		}
++	}
++	file->f_owner.uid = fi->cpt_fown_uid;
++	file->f_owner.euid = fi->cpt_fown_euid;
++	file->f_owner.signum = fi->cpt_fown_signo;
++
++	if (file->f_mode != fi->cpt_mode) {
++		if (was_dentry_open &&
++		    ((file->f_mode^fi->cpt_mode)&(FMODE_PREAD|FMODE_LSEEK))) {
++			file->f_mode &= ~(FMODE_PREAD|FMODE_LSEEK);
++			file->f_mode |= fi->cpt_mode&(FMODE_PREAD|FMODE_LSEEK);
++		}
++		if (file->f_mode != fi->cpt_mode)
++			wprintk_ctx("file %ld mode mismatch %08x %08x\n", (long)pos, file->f_mode, fi->cpt_mode);
++	}
++	if (file->f_flags != fi->cpt_flags) {
++		if (!(fi->cpt_flags&O_NOFOLLOW))
++			file->f_flags &= ~O_NOFOLLOW;
++		if ((file->f_flags^fi->cpt_flags)&O_NONBLOCK) {
++			file->f_flags &= ~O_NONBLOCK;
++			file->f_flags |= fi->cpt_flags&O_NONBLOCK;
++		}
++		if (fi->cpt_flags&FASYNC) {
++			if (fi->cpt_fown_fd == -1) {
++				wprintk_ctx("No fd for FASYNC\n");
++				return -EINVAL;
++			} else if (file->f_op && file->f_op->fasync) {
++				if (file->f_op->fasync(fi->cpt_fown_fd, file, 1) < 0) {
++					wprintk_ctx("FASYNC problem\n");
++					return -EINVAL;
++				} else {
++					file->f_flags |= FASYNC;
++				}
++			}
++		}
++		if (file->f_flags != fi->cpt_flags) {
++			eprintk_ctx("file %ld flags mismatch %08x %08x\n", (long)pos, file->f_flags, fi->cpt_flags);
++			return -EINVAL;
++		}
++	}
++	return 0;
++}
++
++static struct file *
++open_deleted(char *name, unsigned flags, struct cpt_file_image *fi,
++	     cpt_context_t *ctx)
++{
++	struct file * file;
++	char *suffix = NULL;
++	int attempt = 0;
++	int tmp_pass = 0;
++	mode_t mode = fi->cpt_i_mode;
++
++	/* Strip (deleted) part... */
++	if (strlen(name) > strlen(" (deleted)")) {
++		if (strcmp(name + strlen(name) - strlen(" (deleted)"), " (deleted)") == 0) {
++			suffix = &name[strlen(name) - strlen(" (deleted)")];
++			*suffix = 0;
++		} else if (memcmp(name, "(deleted) ", strlen("(deleted) ")) == 0) {
++			memmove(name, name + strlen("(deleted) "), strlen(name) - strlen(" (deleted)") + 1);
++			suffix = name + strlen(name);
++		}
++	}
++
++try_again:
++	for (;;) {
++		if (attempt) {
++			if (attempt > 1000) {
++				eprintk_ctx("open_deleted: failed after %d attempts\n", attempt);
++				return ERR_PTR(-EEXIST);
++			}
++			if (suffix == NULL) {
++				eprintk_ctx("open_deleted: no suffix\n");
++				return ERR_PTR(-EEXIST);
++			}
++			sprintf(suffix, ".%08x", (unsigned)((xtime.tv_nsec>>10)+attempt));
++		}
++		attempt++;
++
++		if (S_ISFIFO(mode)) {
++			int err;
++			err = sc_mknod(name, S_IFIFO|(mode&017777), 0);
++			if (err == -EEXIST)
++				continue;
++			if (err < 0 && !tmp_pass)
++				goto change_dir;
++			if (err < 0)
++				return ERR_PTR(err);
++			file = open_pipe(name, fi, flags, ctx);
++			sc_unlink(name);
++		} else if (S_ISCHR(mode)) {
++			int err;
++			struct cpt_inode_image *ii;
++
++			ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
++			if (ii == NULL)
++				return ERR_PTR(-ENOMEM);
++			err = sc_mknod(name, S_IFCHR|(mode&017777), new_encode_dev(ii->cpt_rdev));
++			kfree(ii);
++			if (err == -EEXIST)
++				continue;
++			if (err < 0 && !tmp_pass)
++				goto change_dir;
++			if (err < 0)
++				return ERR_PTR(err);
++			file = filp_open(name, flags, mode&017777);
++			sc_unlink(name);
++		} else if (S_ISDIR(mode)) {
++			int err;
++			err = sc_mkdir(name, mode&017777);
++			if (err == -EEXIST)
++				continue;
++			if (err < 0 && !tmp_pass)
++				goto change_dir;
++			if (err < 0)
++				return ERR_PTR(err);
++			file = filp_open(name, flags, mode&017777);
++			sc_rmdir(name);
++		} else {
++			file = filp_open(name, O_CREAT|O_EXCL|flags, mode&017777);
++			if (IS_ERR(file)) {
++				if (PTR_ERR(file) == -EEXIST)
++					continue;
++				if (!tmp_pass)
++					goto change_dir;
++			} else {
++				sc_unlink(name);
++			}
++		}
++		break;
++	}
++
++	if (IS_ERR(file)) {
++		eprintk_ctx("filp_open %s: %ld\n", name, PTR_ERR(file));
++		return file;
++	} else {
++		dprintk_ctx("deleted file created as %s, %p, %x\n", name, file, file->f_dentry->d_inode->i_mode);
++	}
++	return file;
++
++change_dir:
++	sprintf(name, "/tmp/rst%u", current->pid);
++	suffix = name + strlen(name);
++	attempt = 1;
++	tmp_pass = 1;
++	goto try_again;
++}
++
++struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx)
++{
++	int err;
++	int was_dentry_open = 0;
++	cpt_object_t *obj;
++	cpt_object_t *iobj;
++	struct cpt_file_image fi;
++	__u8 *name = NULL;
++	struct file *file;
++	int flags;
++
++	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, pos, ctx);
++	if (obj) {
++		file = obj->o_obj;
++		if (obj->o_index >= 0) {
++			dprintk_ctx("file is attached to a socket\n");
++			err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
++			if (err < 0)
++				goto err_out;
++			fixup_file_flags(file, &fi, 0, pos, ctx);
++		}
++		get_file(file);
++		return file;
++	}
++
++	err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
++	if (err < 0)
++		goto err_out;
++
++	flags = make_flags(&fi);
++
++	/* Easy way, inode has been already open. */
++	if (fi.cpt_inode != CPT_NULL &&
++	    !(fi.cpt_lflags & CPT_DENTRY_CLONING) &&
++	    (iobj = lookup_cpt_obj_bypos(CPT_OBJ_INODE, fi.cpt_inode, ctx)) != NULL &&
++	    iobj->o_parent) {
++		struct file *filp = iobj->o_parent;
++		file = dentry_open(dget(filp->f_dentry),
++				   mntget(filp->f_vfsmnt), flags);
++		dprintk_ctx("rst_file: file obtained by dentry_open\n");
++		was_dentry_open = 1;
++		goto map_file;
++	}
++
++	if (fi.cpt_next > fi.cpt_hdrlen)
++		name = rst_get_name(pos + sizeof(fi), ctx);
++
++	if (fi.cpt_lflags == CPT_DENTRY_DELETED) {
++		if (fi.cpt_inode == CPT_NULL) {
++			eprintk_ctx("deleted file and no inode.\n");
++			err = -EINVAL;
++			goto err_out;
++		}
++
++		/* One very special case... */
++		if (S_ISREG(fi.cpt_i_mode) &&
++		    (!name || !name[0] || strcmp(name, "/dev/zero (deleted)") == 0)) {
++			/* MAP_ANON|MAP_SHARED mapping.
++			 * kernel makes this damn ugly way, when file which
++			 * is passed to mmap by user does not match
++			 * file finally attached to VMA. Ok, rst_mm
++			 * has to take care of this. Otherwise, it will fail.
++			 */
++			file = NULL;
++		} else if (S_ISREG(fi.cpt_i_mode) ||
++			   S_ISCHR(fi.cpt_i_mode) ||
++			   S_ISFIFO(fi.cpt_i_mode) ||
++			   S_ISDIR(fi.cpt_i_mode)) {
++			if (S_ISCHR(fi.cpt_i_mode)) {
++				file = open_special(&fi, flags, 1, ctx);
++				if (file != NULL)
++					goto map_file;
++			}
++			file = open_deleted(name, flags, &fi, ctx);
++			if (IS_ERR(file))
++				goto out;
++		} else {
++			eprintk_ctx("not a regular deleted file.\n");
++			err = -EINVAL;
++			goto err_out;
++		}
++
++		err = fixup_file_content(&file, &fi, ctx);
++		if (err)
++			goto err_put;
++		goto map_file;
++	} else {
++		if (!name || !name[0]) {
++			eprintk_ctx("no name for file?\n");
++			err = -EINVAL;
++			goto err_out;
++		}
++		if ((fi.cpt_lflags & CPT_DENTRY_EPOLL) &&
++		    (file = cpt_open_epolldev(&fi, flags, ctx)) != NULL)
++			goto map_file;
++		if (S_ISFIFO(fi.cpt_i_mode) &&
++		    (file = open_pipe(name, &fi, flags, ctx)) != NULL)
++			goto map_file;
++		if (!S_ISREG(fi.cpt_i_mode) &&
++		    (file = open_special(&fi, flags, 0, ctx)) != NULL)
++			goto map_file;
++	}
++
++	file = filp_open(name, flags, 0);
++
++map_file:
++	if (!IS_ERR(file)) {
++		fixup_file_flags(file, &fi, was_dentry_open, pos, ctx);
++
++		if (S_ISFIFO(fi.cpt_i_mode) && !was_dentry_open) {
++			err = fixup_pipe_data(file, &fi, ctx);
++			if (err)
++				goto err_put;
++		}
++
++		obj = cpt_object_get(CPT_OBJ_FILE, file, ctx);
++		if (!obj) {
++			obj = cpt_object_add(CPT_OBJ_FILE, file, ctx);
++			if (obj)
++				get_file(file);
++		}
++		if (obj)
++			cpt_obj_setpos(obj, pos, ctx);
++
++		obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
++		if (obj) {
++			cpt_obj_setpos(obj, fi.cpt_inode, ctx);
++			if (!obj->o_parent || fi.cpt_lflags != CPT_DENTRY_DELETED)
++				obj->o_parent = file;
++		}
++
++		if (fi.cpt_next > fi.cpt_hdrlen) {
++			err = fixup_flocks(file, &fi, pos, ctx);
++			if (err)
++				goto err_put;
++		}
++	} else {
++		if (fi.cpt_lflags & CPT_DENTRY_PROC) {
++			dprintk_ctx("rst_file /proc delayed\n");
++			file = NULL;
++		}
++	}
++
++out:
++	if (name)
++		rst_put_name(name, ctx);
++	return file;
++
++err_put:
++	if (file)
++		fput(file);
++err_out:
++	if (name)
++		rst_put_name(name, ctx);
++	return ERR_PTR(err);
++}
++
++
++__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	__u32 flag = 0;
++
++	if (ti->cpt_files == CPT_NULL ||
++	    lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx))
++		flag |= CLONE_FILES;
++	if (ti->cpt_fs == CPT_NULL ||
++	    lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx))
++		flag |= CLONE_FS;
++	return flag;
++}
++
++static void local_close_files(struct files_struct * files)
++{
++	int i, j;
++
++	j = 0;
++	for (;;) {
++		unsigned long set;
++		i = j * __NFDBITS;
++		if (i >= files->fdt->max_fdset || i >= files->fdt->max_fds)
++			break;
++		set = files->fdt->open_fds->fds_bits[j];
++		while (set) {
++			if (set & 1) {
++				struct file * file = xchg(&files->fdt->fd[i], NULL);
++				if (file)
++					filp_close(file, files);
++			}
++			i++;
++			set >>= 1;
++		}
++		files->fdt->open_fds->fds_bits[j] = 0;
++		files->fdt->close_on_exec->fds_bits[j] = 0;
++		j++;
++	}
++}
++
++extern int expand_fdtable(struct files_struct *files, int nr);
++
++
++int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	struct cpt_files_struct_image fi;
++	struct files_struct *f = current->files;
++	cpt_object_t *obj;
++	loff_t pos, endpos;
++	int err;
++
++	if (ti->cpt_files == CPT_NULL) {
++		current->files = NULL;
++		if (f)
++			put_files_struct(f);
++		return 0;
++	}
++
++	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx);
++	if (obj) {
++		if (obj->o_obj != f) {
++			put_files_struct(f);
++			f = obj->o_obj;
++			atomic_inc(&f->count);
++			current->files = f;
++		}
++		return 0;
++	}
++
++	err = rst_get_object(CPT_OBJ_FILES, ti->cpt_files, &fi, ctx);
++	if (err)
++		return err;
++
++	local_close_files(f);
++
++	if (fi.cpt_max_fds > f->fdt->max_fds) {
++		spin_lock(&f->file_lock);
++		err = expand_fdtable(f, fi.cpt_max_fds-1);
++		spin_unlock(&f->file_lock);
++		if (err)
++			return err;
++	}
++
++	pos = ti->cpt_files + fi.cpt_hdrlen;
++	endpos = ti->cpt_files + fi.cpt_next;
++	while (pos < endpos) {
++		struct cpt_fd_image fdi;
++		struct file *filp;
++
++		err = rst_get_object(CPT_OBJ_FILEDESC, pos, &fdi, ctx);
++		if (err)
++			return err;
++		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
++		if (IS_ERR(filp)) {
++			eprintk_ctx("rst_file: %ld %Lu\n", PTR_ERR(filp), fdi.cpt_file);
++			return PTR_ERR(filp);
++		}
++		if (filp == NULL) {
++			int err = rst_filejob_queue(pos, ctx);
++			if (err)
++				return err;
++		} else {
++			if (fdi.cpt_fd >= f->fdt->max_fds) BUG();
++			f->fdt->fd[fdi.cpt_fd] = filp;
++			FD_SET(fdi.cpt_fd, f->fdt->open_fds);
++			if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
++				FD_SET(fdi.cpt_fd, f->fdt->close_on_exec);
++		}
++		pos += fdi.cpt_next;
++	}
++	f->fdt->next_fd = fi.cpt_next_fd;
++
++	obj = cpt_object_add(CPT_OBJ_FILES, f, ctx);
++	if (obj) {
++		cpt_obj_setpos(obj, ti->cpt_files, ctx);
++		cpt_obj_setindex(obj, fi.cpt_index, ctx);
++	}
++	return 0;
++}
++
++int rst_do_filejobs(cpt_context_t *ctx)
++{
++	struct filejob *j;
++
++	while ((j = ctx->filejob_queue) != NULL) {
++		int err;
++		task_t *tsk;
++		struct cpt_fd_image fdi;
++		struct file *filp;
++
++		read_lock(&tasklist_lock);
++		tsk = find_task_by_pid_ve(j->pid);
++		if (tsk)
++			get_task_struct(tsk);
++		read_unlock(&tasklist_lock);
++		if (!tsk)
++			return -EINVAL;
++
++		err = rst_get_object(CPT_OBJ_FILEDESC, j->fdi, &fdi, ctx);
++		if (err) {
++			put_task_struct(tsk);
++			return err;
++		}
++
++		if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
++		if (tsk->files->fdt->fd[fdi.cpt_fd] ||
++		    FD_ISSET(fdi.cpt_fd, tsk->files->fdt->open_fds)) {
++			eprintk_ctx("doing filejob %Ld: fd is busy\n", j->fdi);
++			put_task_struct(tsk);
++			return -EBUSY;
++		}
++
++		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
++		if (IS_ERR(filp)) {
++			eprintk_ctx("rst_do_filejobs: 1: %ld %Lu\n", PTR_ERR(filp), fdi.cpt_file);
++			put_task_struct(tsk);
++			return PTR_ERR(filp);
++		}
++		if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
++		tsk->files->fdt->fd[fdi.cpt_fd] = filp;
++		FD_SET(fdi.cpt_fd, tsk->files->fdt->open_fds);
++		if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
++			FD_SET(fdi.cpt_fd, tsk->files->fdt->close_on_exec);
++
++		dprintk_ctx("filejob %Ld done\n", j->fdi);
++
++		put_task_struct(tsk);
++		ctx->filejob_queue = j->next;
++		kfree(j);
++	}
++	return 0;
++}
++
++void rst_flush_filejobs(cpt_context_t *ctx)
++{
++	struct filejob *j;
++
++	while ((j = ctx->filejob_queue) != NULL) {
++		ctx->filejob_queue = j->next;
++		kfree(j);
++	}
++}
++
++int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	struct fs_struct *f = current->fs;
++	cpt_object_t *obj;
++
++	if (ti->cpt_fs == CPT_NULL) {
++		exit_fs(current);
++		return 0;
++	}
++
++	obj = lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx);
++	if (obj) {
++		if (obj->o_obj != f) {
++			exit_fs(current);
++			f = obj->o_obj;
++			atomic_inc(&f->count);
++			current->fs = f;
++		}
++		return 0;
++	}
++
++	/* Do _not_ restore root. Image contains absolute pathnames.
++	 * So, we fix it in context of rst process.
++	 */
++
++	obj = cpt_object_add(CPT_OBJ_FS, f, ctx);
++	if (obj)
++		cpt_obj_setpos(obj, ti->cpt_fs, ctx);
++
++	return 0;
++}
++
++static int get_dir(struct dentry **dp, struct vfsmount **mp,
++		   loff_t *pos, struct cpt_context *ctx)
++{
++	struct cpt_file_image fi;
++	struct file * file;
++	int err;
++
++	err = rst_get_object(CPT_OBJ_FILE, *pos, &fi, ctx);
++	if (err)
++		return err;
++
++	file = rst_file(*pos, -1, ctx);
++	if (IS_ERR(file))
++		return PTR_ERR(file);
++
++	*dp = dget(file->f_dentry);
++	*mp = mntget(file->f_vfsmnt);
++	*pos += fi.cpt_next;
++	fput(file);
++	return 0;
++}
++
++static void __set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
++			  struct dentry *dentry)
++{
++	struct dentry *old_root;
++	struct vfsmount *old_rootmnt;
++	write_lock(&fs->lock);
++	old_root = fs->root;
++	old_rootmnt = fs->rootmnt;
++	fs->rootmnt = mnt;
++	fs->root = dentry;
++	write_unlock(&fs->lock);
++	if (old_root) {
++		dput(old_root);
++		mntput(old_rootmnt);
++	}
++}
++
++static void __set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
++			 struct dentry *dentry)
++{
++	struct dentry *old_pwd;
++	struct vfsmount *old_pwdmnt;
++
++	write_lock(&fs->lock);
++	old_pwd = fs->pwd;
++	old_pwdmnt = fs->pwdmnt;
++	fs->pwdmnt = mnt;
++	fs->pwd = dentry;
++	write_unlock(&fs->lock);
++
++	if (old_pwd) {
++		dput(old_pwd);
++		mntput(old_pwdmnt);
++	}
++}
++
++
++int rst_restore_fs(struct cpt_context *ctx)
++{
++	loff_t pos;
++	cpt_object_t *obj;
++	int err = 0;
++
++	for_each_object(obj, CPT_OBJ_FS) {
++		struct cpt_fs_struct_image fi;
++		struct fs_struct *fs = obj->o_obj;
++		int i;
++		struct dentry *d[3];
++		struct vfsmount *m[3];
++
++		err = rst_get_object(CPT_OBJ_FS, obj->o_pos, &fi, ctx);
++		if (err)
++			return err;
++
++		fs->umask = fi.cpt_umask;
++
++		pos = obj->o_pos + fi.cpt_hdrlen;
++		d[0] = d[1] = d[2] = NULL;
++		m[0] = m[1] = m[2] = NULL;
++		i = 0;
++		while (pos < obj->o_pos + fi.cpt_next && i<3) {
++			err = get_dir(d+i, m+i, &pos, ctx);
++			if (err) {
++				eprintk_ctx("cannot get_dir: %d", err);
++				for (--i; i >= 0; i--) {
++					if (d[i])
++						dput(d[i]);
++					if (m[i])
++						mntput(m[i]);
++				}
++				return err;
++			}
++			i++;
++		}
++		if (d[0])
++			__set_fs_root(fs, m[0], d[0]);
++		if (d[1])
++			__set_fs_pwd(fs, m[1], d[1]);
++		if (d[2]) {
++			struct dentry *olddentry;
++			struct vfsmount *oldmnt;
++			write_lock(&fs->lock);
++			oldmnt = fs->altrootmnt;
++			olddentry = fs->altroot;
++			fs->altrootmnt = m[2];
++			fs->altroot = d[2];
++			write_unlock(&fs->lock);
++
++			if (olddentry) {
++				dput(olddentry);
++				mntput(oldmnt);
++			}
++		}
++	}
++	return err;
++}
++
++int do_one_mount(char *mntpnt, char *mnttype, char *mntbind, unsigned long flags, struct cpt_context *ctx)
++{
++	int err;
++
++	if (mntbind && (strcmp(mntbind, "/") == 0 || strcmp(mntbind, "") == 0))
++		mntbind = NULL;
++
++	if (mntbind)
++		flags |= MS_BIND;
++
++	err = sc_mount(mntbind, mntpnt, mnttype, flags);
++	if (err < 0) {
++		eprintk_ctx("%d mounting %s %s %08lx\n", err, mntpnt, mnttype, flags);
++		return err;
++	}
++	return 0;
++}
++
++static int undumptmpfs(void *arg)
++{
++	int i;
++	int *pfd = arg;
++	char *argv[] = { "tar", "x", "-C", "/", "-S", NULL };
++
++	if (pfd[0] != 0)
++		sc_dup2(pfd[0], 0);
++
++	for (i=1; i<current->files->fdt->max_fds; i++)
++		sc_close(i);
++
++	module_put(THIS_MODULE);
++
++	set_fs(KERNEL_DS);
++	i = sc_execve("/bin/tar", argv, NULL);
++	eprintk("failed to exec /bin/tar: %d\n", i);
++	return -1;
++}
++
++static int rst_restore_tmpfs(loff_t *pos, struct cpt_context * ctx)
++{
++	int err;
++	int pfd[2];
++	struct file *f;
++	struct cpt_object_hdr v;
++	int n;
++	loff_t end;
++	int pid;
++
++	err = rst_get_object(CPT_OBJ_NAME, *pos, &v, ctx);
++	if (err < 0)
++		return err;
++
++	err = sc_pipe(pfd);
++	if (err < 0)
++		return err;
++	pid = err = local_kernel_thread(undumptmpfs, (void*)pfd, SIGCHLD, 0);
++	if (err < 0)
++		goto out;
++	f = fget(pfd[1]);
++	sc_close(pfd[1]);
++	sc_close(pfd[0]);
++
++	ctx->file->f_pos = *pos + v.cpt_hdrlen;
++	end = *pos + v.cpt_next;
++	*pos += v.cpt_next;
++	do {
++		char buf[16];
++		mm_segment_t oldfs;
++
++		n = end - ctx->file->f_pos;
++		if (n > sizeof(buf))
++			n = sizeof(buf);
++
++		if (ctx->read(buf, n, ctx))
++			break;
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		f->f_op->write(f, buf, n, &f->f_pos);
++		set_fs(oldfs);
++	} while (ctx->file->f_pos < end);
++
++	fput(f);
++
++	clear_tsk_thread_flag(current,TIF_SIGPENDING);
++
++	if ((err = sc_waitx(pid, 0)) < 0)
++		eprintk_ctx("wait4: %d\n", err);
++
++	return 0;
++
++out:
++	if (pfd[1] >= 0)
++		sc_close(pfd[1]);
++	if (pfd[0] >= 0)
++		sc_close(pfd[0]);
++	return err;
++}
++
++int restore_one_vfsmount(struct cpt_vfsmount_image *mi, loff_t pos, struct cpt_context *ctx)
++{
++	int err;
++	loff_t endpos;
++
++	endpos = pos + mi->cpt_next;
++	pos += mi->cpt_hdrlen;
++
++	while (pos < endpos) {
++		char *mntdev;
++		char *mntpnt;
++		char *mnttype;
++		char *mntbind;
++
++		mntdev = __rst_get_name(&pos, ctx);
++		mntpnt = __rst_get_name(&pos, ctx);
++		mnttype = __rst_get_name(&pos, ctx);
++		mntbind = __rst_get_name(&pos, ctx);
++		err = -EINVAL;
++		if (mnttype && mntpnt) {
++			err = 0;
++			if (strcmp(mntpnt, "/"))
++				err = do_one_mount(mntpnt, mnttype, mntbind, mi->cpt_flags, ctx);
++			if (strcmp(mnttype, "tmpfs") == 0) {
++				rst_restore_tmpfs(&pos, ctx);
++			}
++		}
++		if (mntdev)
++			rst_put_name(mntdev, ctx);
++		if (mntpnt)
++			rst_put_name(mntpnt, ctx);
++		if (mnttype)
++			rst_put_name(mnttype, ctx);
++		if (mntbind)
++			rst_put_name(mntbind, ctx);
++		if (err)
++			return err;
++	}
++	return 0;
++}
++
++int restore_one_namespace(loff_t pos, loff_t endpos, struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_vfsmount_image mi;
++
++	while (pos < endpos) {
++		err = rst_get_object(CPT_OBJ_VFSMOUNT, pos, &mi, ctx);
++		if (err)
++			return err;
++		err = restore_one_vfsmount(&mi, pos, ctx);
++		if (err)
++			return err;
++		pos += mi.cpt_next;
++	}
++	return 0;
++}
++
++int rst_root_namespace(struct cpt_context *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_NAMESPACE];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++	struct cpt_object_hdr sbuf;
++	int done = 0;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_NAMESPACE || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		err = rst_get_object(CPT_OBJ_NAMESPACE, sec, &sbuf, ctx);
++		if (err)
++			return err;
++		if (done) {
++			eprintk_ctx("multiple namespaces are not supported\n");
++			break;
++		}
++		done++;
++		err = restore_one_namespace(sec+sbuf.cpt_hdrlen, sec+sbuf.cpt_next, ctx);
++		if (err)
++			return err;
++		sec += sbuf.cpt_next;
++	}
++
++	return 0;
++}
++
++int rst_stray_files(struct cpt_context *ctx)
++{
++	int err = 0;
++	loff_t sec = ctx->sections[CPT_SECT_FILES];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_FILES || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		struct cpt_object_hdr sbuf;
++		cpt_object_t *obj;
++
++		err = _rst_get_object(CPT_OBJ_FILE, sec, &sbuf, sizeof(sbuf), ctx);
++		if (err)
++			break;
++
++		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, sec, ctx);
++		if (!obj) {
++			struct file *file;
++
++			dprintk_ctx("stray file %Ld\n", sec);
++
++			file = rst_sysv_shm(sec, ctx);
++
++			if (IS_ERR(file)) {
++				eprintk_ctx("rst_stray_files: %ld\n", PTR_ERR(file));
++				return PTR_ERR(file);
++			} else {
++				fput(file);
++			}
++		}
++		sec += sbuf.cpt_next;
++	}
++
++	return err;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_i386.S linux-2.6.16-026test015/kernel/cpt/rst_i386.S
+--- linux-2.6.16.orig/kernel/cpt/rst_i386.S	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_i386.S	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,40 @@
++#define ASSEMBLY 1
++
++#include <linux/config.h>
++#include <linux/linkage.h>
++#include <asm/thread_info.h>
++#include <asm/errno.h>
++#include <asm/segment.h>
++#include <asm/page.h>
++#include <asm/smp.h>
++#include <asm/page.h>
++
++	.section .text
++	.align 4
++	.global ret_last_siginfo
++ret_last_siginfo:
++	call rlsi
++	movl %eax,%esp
++	ret
++
++	.align 8
++	.global ret_child_tid
++ret_child_tid:
++	push %esp
++	call rct
++	movl %eax,%esp
++	ret
++
++	.align 4
++	.global ret_from_rst
++ret_from_rst:
++	pushl	%eax
++	jmp ret_from_fork+6
++
++	.align 4
++	.global pre_ret_from_fork
++pre_ret_from_fork:
++	pushl %eax
++	call schedule_tail
++	popl %eax
++	ret
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_mm.c linux-2.6.16-026test015/kernel/cpt/rst_mm.c
+--- linux-2.6.16.orig/kernel/cpt/rst_mm.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_mm.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,986 @@
++/*
++ *
++ *  kernel/cpt/rst_mm.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/virtinfo.h>
++#include <linux/hugetlb.h>
++#include <linux/errno.h>
++#include <linux/errno.h>
++#include <linux/pagemap.h>
++#include <linux/mman.h>
++#include <linux/vmalloc.h>
++#include <linux/rmap.h>
++#include <linux/hash.h>
++#include <asm/pgalloc.h>
++#include <asm/tlb.h>
++#include <asm/tlbflush.h>
++#include <asm/pgtable.h>
++#include <asm/mmu.h>
++#include <asm/ldt.h>
++#include <asm/desc.h>
++#include <asm/mmu_context.h>
++#include <linux/swapops.h>
++#include <linux/cpt_image.h>
++
++#ifdef CONFIG_VE
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++#endif
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_files.h"
++#include "cpt_ubc.h"
++#include "cpt_mm.h"
++#include "cpt_kernel.h"
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++#include "cpt_pagein.h"
++#endif
++
++#include "cpt_syscalls.h"
++
++#define __PAGE_NX (1ULL<<63)
++
++static unsigned long make_prot(struct cpt_vma_image *vmai)
++{
++	unsigned long prot = 0;
++
++	if (vmai->cpt_flags&VM_READ)
++		prot |= PROT_READ;
++	if (vmai->cpt_flags&VM_WRITE)
++		prot |= PROT_WRITE;
++	if (vmai->cpt_flags&VM_EXEC)
++		prot |= PROT_EXEC;
++	if (vmai->cpt_flags&VM_GROWSDOWN)
++		prot |= PROT_GROWSDOWN;
++	if (vmai->cpt_flags&VM_GROWSUP)
++		prot |= PROT_GROWSUP;
++	return prot;
++}
++
++static unsigned long make_flags(struct cpt_vma_image *vmai)
++{
++	unsigned long flags = MAP_FIXED;
++
++	if (vmai->cpt_flags&(VM_SHARED|VM_MAYSHARE))
++		flags |= MAP_SHARED;
++	else
++		flags |= MAP_PRIVATE;
++
++	if (vmai->cpt_file == CPT_NULL)
++		flags |= MAP_ANONYMOUS;
++	if (vmai->cpt_flags&VM_GROWSDOWN)
++		flags |= MAP_GROWSDOWN;
++	if (vmai->cpt_flags&VM_DENYWRITE)
++		flags |= MAP_DENYWRITE;
++	if (vmai->cpt_flags&VM_EXECUTABLE)
++		flags |= MAP_EXECUTABLE;
++	if (!(vmai->cpt_flags&VM_ACCOUNT))
++		flags |= MAP_NORESERVE;
++	return flags;
++}
++
++
++#if !defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)
++static int __alloc_ldt(mm_context_t *pc, int mincount)
++{
++	int oldsize, newsize, i;
++
++	if (mincount <= pc->size)
++		return 0;
++	/*
++	 * LDT got larger - reallocate if necessary.
++	 */
++	oldsize = pc->size;
++	mincount = (mincount+511)&(~511);
++	newsize = mincount*LDT_ENTRY_SIZE;
++	for (i = 0; i < newsize; i += PAGE_SIZE) {
++		int nr = i/PAGE_SIZE;
++		BUG_ON(i >= 64*1024);
++		if (!pc->ldt_pages[nr]) {
++			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC);
++			if (!pc->ldt_pages[nr])
++				return -ENOMEM;
++			clear_highpage(pc->ldt_pages[nr]);
++		}
++	}
++	pc->size = mincount;
++	return 0;
++}
++
++static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
++{
++	struct mm_struct *mm = current->mm;
++	int i;
++	int err;
++	int size;
++
++	err = __alloc_ldt(&mm->context, li->cpt_size/LDT_ENTRY_SIZE);
++	if (err)
++		return err;
++
++	size = mm->context.size*LDT_ENTRY_SIZE;
++
++	for (i = 0; i < size; i += PAGE_SIZE) {
++		int nr = i / PAGE_SIZE, bytes;
++		char *kaddr = kmap(mm->context.ldt_pages[nr]);
++
++		bytes = size - i;
++		if (bytes > PAGE_SIZE)
++			bytes = PAGE_SIZE;
++		err = ctx->pread(kaddr, bytes, ctx, pos + li->cpt_hdrlen + i);
++		kunmap(mm->context.ldt_pages[nr]);
++		if (err)
++			return err;
++	}
++
++	load_LDT(&mm->context);
++	return 0;
++}
++
++#else
++
++static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
++{
++	struct mm_struct *mm = current->mm;
++	int oldsize = mm->context.size;
++	void *oldldt;
++	void *newldt;
++	int err;
++
++	if (li->cpt_size > PAGE_SIZE)
++		newldt = vmalloc(li->cpt_size);
++	else
++		newldt = kmalloc(li->cpt_size, GFP_KERNEL);
++
++	if (!newldt)
++		return -ENOMEM;
++
++	err = ctx->pread(newldt, li->cpt_size, ctx, pos + li->cpt_hdrlen);
++	if (err)
++		return err;
++
++	oldldt = mm->context.ldt;
++	mm->context.ldt = newldt;
++	mm->context.size = li->cpt_size/LDT_ENTRY_SIZE;
++
++	load_LDT(&mm->context);
++
++	if (oldsize) {
++		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
++			vfree(oldldt);
++		else
++			kfree(oldldt);
++	}
++	return 0;
++}
++#endif
++
++static int
++restore_aio_ring(struct kioctx *aio_ctx, struct cpt_aio_ctx_image *aimg)
++{
++	struct aio_ring_info *info = &aio_ctx->ring_info;
++	unsigned nr_events = aio_ctx->max_reqs;
++	unsigned long size;
++	int nr_pages;
++
++	/* We recalculate parameters of the ring exactly like
++	 * fs/aio.c does and then compare calculated values
++	 * with ones, stored in dump. They must be the same. */
++
++	nr_events += 2;
++
++	size = sizeof(struct aio_ring);
++	size += sizeof(struct io_event) * nr_events;
++	nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT;
++
++	if (nr_pages != aimg->cpt_ring_pages)
++		return -EINVAL;
++
++	info->nr_pages = nr_pages;
++
++	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
++
++	if (nr_events != aimg->cpt_nr)
++		return -EINVAL;
++
++	info->nr = 0;
++	info->ring_pages = info->internal_pages;
++	if (nr_pages > AIO_RING_PAGES) {
++		info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
++		if (!info->ring_pages)
++			return -ENOMEM;
++		memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
++	}
++
++	info->mmap_size = nr_pages * PAGE_SIZE;
++
++	/* This piece of shit is not entirely my fault. Kernel aio.c makes
++	 * something odd mmap()ping some pages and then pinning them.
++	 * I guess it is just some mud remained of failed attempt to show ring
++	 * to user space. The result is odd. :-) Immediately after
++	 * creation of AIO context, kernel shares those pages with user
++	 * and user can read and even write there. But after the first
++	 * fork, pages are marked COW with evident consequences.
++	 * I remember, I did the same mistake in the first version
++	 * of mmapped packet socket, luckily that crap never reached
++	 * mainstream.
++	 *
++	 * So, what are we going to do? I can simulate this odd behaviour
++	 * exactly, but I am not insane yet. For now just take the pages
++	 * from user space. Alternatively, we could keep kernel copy
++	 * in AIO context image, which would be more correct.
++	 *
++	 * What is wrong now? If the pages are COWed, ring is transferred
++	 * incorrectly.
++	 */
++	down_read(&current->mm->mmap_sem);
++	info->mmap_base = aimg->cpt_mmap_base;
++	info->nr_pages = get_user_pages(current, current->mm,
++					info->mmap_base, nr_pages, 
++					1, 0, info->ring_pages, NULL);
++	up_read(&current->mm->mmap_sem);
++
++	if (unlikely(info->nr_pages != nr_pages)) {
++		int i;
++
++		for (i=0; i<info->nr_pages; i++)
++			put_page(info->ring_pages[i]);
++		if (info->ring_pages && info->ring_pages != info->internal_pages)
++			kfree(info->ring_pages);
++		return -EFAULT;
++	}
++
++	aio_ctx->user_id = info->mmap_base;
++
++	info->nr = nr_events;
++	info->tail = aimg->cpt_tail;
++
++	return 0;
++}
++
++static int do_rst_aio(struct cpt_aio_ctx_image *aimg, loff_t pos, cpt_context_t *ctx)
++{
++	int err;
++	struct kioctx *aio_ctx;
++	extern spinlock_t aio_nr_lock;
++
++	aio_ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
++	if (!aio_ctx)
++		return -ENOMEM;
++
++	memset(aio_ctx, 0, sizeof(*aio_ctx));
++	aio_ctx->max_reqs = aimg->cpt_max_reqs;
++
++	if ((err = restore_aio_ring(aio_ctx, aimg)) < 0) {
++		kmem_cache_free(kioctx_cachep, aio_ctx);
++		eprintk_ctx("AIO %Ld restore_aio_ring: %d\n", pos, err);
++		return err;
++	}
++
++	aio_ctx->mm = current->mm;
++	atomic_inc(&aio_ctx->mm->mm_count);
++	atomic_set(&aio_ctx->users, 1);
++	spin_lock_init(&aio_ctx->ctx_lock);
++	spin_lock_init(&aio_ctx->ring_info.ring_lock);
++	init_waitqueue_head(&aio_ctx->wait);
++	INIT_LIST_HEAD(&aio_ctx->active_reqs);
++	INIT_LIST_HEAD(&aio_ctx->run_list);
++	INIT_WORK(&aio_ctx->wq, aio_kick_handler, ctx);
++
++	spin_lock(&aio_nr_lock);
++	aio_nr += aio_ctx->max_reqs;
++	spin_unlock(&aio_nr_lock);
++
++	write_lock(&aio_ctx->mm->ioctx_list_lock);
++	aio_ctx->next = aio_ctx->mm->ioctx_list;
++	aio_ctx->mm->ioctx_list = aio_ctx;
++	write_unlock(&aio_ctx->mm->ioctx_list_lock);
++
++	return 0;
++}
++
++struct anonvma_map
++{
++	struct hlist_node	list;
++	struct anon_vma		*avma;
++	__u64			id;
++};
++
++static int verify_create_anonvma(struct mm_struct *mm,
++				 struct cpt_vma_image *vmai,
++				 cpt_context_t *ctx)
++{
++	struct anon_vma *avma = NULL;
++	struct anon_vma *new_avma;
++	struct vm_area_struct *vma;
++	int h;
++
++	if (!ctx->anonvmas) {
++		if (CPT_ANONVMA_HSIZE*sizeof(struct hlist_head) > PAGE_SIZE)
++			return -EINVAL;
++		if ((ctx->anonvmas = (void*)__get_free_page(GFP_KERNEL)) == NULL)
++			return -ENOMEM;
++		for (h = 0; h < CPT_ANONVMA_HSIZE; h++)
++			INIT_HLIST_HEAD(&ctx->anonvmas[h]);
++	} else {
++		struct anonvma_map *map;
++		struct hlist_node *elem;
++
++		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
++		hlist_for_each_entry(map, elem, &ctx->anonvmas[h], list) {
++			if (map->id == vmai->cpt_anonvmaid) {
++				avma = map->avma;
++				break;
++			}
++		}
++	}
++
++	down_read(&mm->mmap_sem);
++	if ((vma = find_vma(mm, vmai->cpt_start)) == NULL) {
++		up_read(&mm->mmap_sem);
++		return -ESRCH;
++	}
++	if (vma->vm_start != vmai->cpt_start) {
++		up_read(&mm->mmap_sem);
++		eprintk_ctx("vma start mismatch\n");
++		return -EINVAL;
++	}
++	if (vma->vm_pgoff != vmai->cpt_pgoff) { 
++		dprintk_ctx("vma pgoff mismatch, fixing\n");
++		if (vma->vm_file || (vma->vm_flags&(VM_SHARED|VM_MAYSHARE))) {
++			eprintk_ctx("cannot fixup vma pgoff\n");
++			up_read(&mm->mmap_sem);	
++			return -EINVAL;
++		}
++		vma->vm_pgoff = vmai->cpt_pgoff;
++	}
++
++	if (!vma->anon_vma) {
++		if (avma) {
++			vma->anon_vma = avma;
++			anon_vma_link(vma);
++		} else {
++			int err;
++
++			err = anon_vma_prepare(vma);
++
++			if (err) {
++				up_read(&mm->mmap_sem);
++				return err;
++			}
++		}
++	} else {
++		/* Note, we _can_ arrive to the situation, when two
++		 * different anonvmaid's point to one anon_vma, this happens
++		 * f.e. when mmap() merged new area to previous one and
++		 * they will share one anon_vma even if they did not on
++		 * original host.
++		 *
++		 * IT IS OK. To all that I understand, we may merge all
++		 * the anon_vma's and rmap can scan all the huge list of vmas
++		 * searching for page. It is just "suboptimal".
++		 *
++		 * Real disaster would happen, if vma already got an anon_vma
++		 * with different id. It is very rare case, kernel does the
++		 * best efforts to merge anon_vmas when some attributes are
++		 * different. In this case we will fall to copying memory.
++		 */
++		if (avma && vma->anon_vma != avma) {
++			up_read(&mm->mmap_sem);
++			wprintk_ctx("anon_vma mismatch\n");
++			return 0;
++		}
++	}
++
++	new_avma = vma->anon_vma;
++	up_read(&mm->mmap_sem);
++
++	if (!avma) {
++		struct anonvma_map *map;
++
++		if (!new_avma)
++			return -EINVAL;
++
++		if ((map = kmalloc(sizeof(*map), GFP_KERNEL)) == NULL)
++			return -ENOMEM;
++
++		map->id = vmai->cpt_anonvmaid;
++		map->avma = new_avma;
++		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
++		hlist_add_head(&map->list, &ctx->anonvmas[h]);
++	}
++	return 0;
++}
++
++static int copy_mm_pages(struct mm_struct *src, unsigned long start,
++			 unsigned long end)
++{
++	int err;
++
++	for (; start < end; start += PAGE_SIZE) {
++		struct page *page;
++		struct page *spage;
++		void *maddr, *srcaddr;
++
++		err = get_user_pages(current, current->mm,
++				     start, 1, 1, 1, &page, NULL);
++		if (err == 0)
++			err = -EFAULT;
++		if (err < 0)
++			return err;
++
++		err = get_user_pages(current, src,
++				     start, 1, 0, 1, &spage, NULL);
++
++		if (err == 0)
++			err = -EFAULT;
++		if (err < 0) {
++			page_cache_release(page);
++			return err;
++		}
++
++		srcaddr = kmap(spage);
++		maddr = kmap(page);
++		memcpy(maddr, srcaddr, PAGE_SIZE);
++		set_page_dirty_lock(page);
++		kunmap(page);
++		kunmap(spage);
++		page_cache_release(page);
++		page_cache_release(spage);
++	}
++	return 0;
++}
++
++static int do_rst_vma(struct cpt_vma_image *vmai, loff_t vmapos, loff_t mmpos, struct cpt_context *ctx)
++{
++	int err = 0;
++	unsigned long addr;
++	struct mm_struct *mm = current->mm;
++	struct vm_area_struct *vma;
++	struct file *file = NULL;
++	unsigned long prot;
++	int checked = 0;
++
++	prot = make_prot(vmai);
++
++	if (vmai->cpt_file != CPT_NULL) {
++		if (vmai->cpt_type == CPT_VMA_TYPE_0) {
++			file = rst_file(vmai->cpt_file, -1, ctx);
++			if (IS_ERR(file)) {
++				eprintk_ctx("do_rst_vma: rst_file: %Ld\n", vmai->cpt_file);
++				return PTR_ERR(file);
++			}
++		} else if (vmai->cpt_type == CPT_VMA_TYPE_SHM) {
++			file = rst_sysv_shm(vmai->cpt_file, ctx);
++			if (IS_ERR(file))
++				return PTR_ERR(file);
++		}
++	}
++
++	down_write(&mm->mmap_sem);
++	addr = do_mmap_pgoff(file, vmai->cpt_start,
++			     vmai->cpt_end-vmai->cpt_start,
++			     prot, make_flags(vmai),
++			     vmai->cpt_pgoff);
++
++	if (addr != vmai->cpt_start) {
++		up_write(&mm->mmap_sem);
++
++		err = -EINVAL;
++		if (IS_ERR((void*)addr))
++			err = addr;
++		goto out;
++	}
++
++	vma = find_vma(mm, vmai->cpt_start);
++	if (vma == NULL) {
++		up_write(&mm->mmap_sem);
++		eprintk_ctx("cannot find mmapped vma\n");
++		err = -ESRCH;
++		goto out;
++	}
++
++	/* do_mmap_pgoff() can merge new area to previous one (not to the next,
++	 * we mmap in order, the rest of mm is still unmapped). This can happen
++	 * f.e. if flags are to be adjusted later, or if we had different
++	 * anon_vma on two adjacent regions. Split it by brute force. */
++	if (vma->vm_start != vmai->cpt_start) {
++		dprintk_ctx("vma %Ld merged, split\n", vmapos);
++		err = split_vma(mm, vma, (unsigned long)vmai->cpt_start, 0);
++		if (err) {
++			up_write(&mm->mmap_sem);
++			eprintk_ctx("cannot split vma\n");
++			goto out;
++		}
++	}
++	up_write(&mm->mmap_sem);
++
++	if (vmai->cpt_anonvma && vmai->cpt_anonvmaid) {
++		err = verify_create_anonvma(mm, vmai, ctx);
++		if (err) {
++			eprintk_ctx("cannot verify_create_anonvma %Ld\n", vmapos);
++			goto out;
++		}
++	}
++
++	if (vmai->cpt_next > vmai->cpt_hdrlen) {
++		loff_t offset = vmapos + vmai->cpt_hdrlen;
++
++		do {
++			union {
++				struct cpt_page_block pb;
++				struct cpt_remappage_block rpb;
++				struct cpt_copypage_block cpb;
++				struct cpt_lazypage_block lpb;
++			} u;
++			loff_t pos;
++
++			err = rst_get_object(-1, offset, &u, ctx);
++			if (err) {
++				eprintk_ctx("vma fix object: %d\n", err);
++				goto out;
++			}
++			if (u.rpb.cpt_object == CPT_OBJ_REMAPPAGES) {
++				err = sc_remap_file_pages(u.rpb.cpt_start,
++							  u.rpb.cpt_end-u.rpb.cpt_start,
++							  0, u.rpb.cpt_pgoff, 0);
++				if (err < 0) {
++					eprintk_ctx("remap_file_pages: %d (%08x,%u,%u)\n", err,
++					       (__u32)u.rpb.cpt_start, (__u32)(u.rpb.cpt_end-u.rpb.cpt_start), 
++					       (__u32)u.rpb.cpt_pgoff);
++					goto out;
++				}
++				offset += u.rpb.cpt_next;
++				continue;
++			} else if (u.cpb.cpt_object == CPT_OBJ_LAZYPAGES) {
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++				unsigned long addr = u.lpb.cpt_start;
++
++				down_read(&mm->mmap_sem);
++				if ((vma = find_vma(mm, u.lpb.cpt_start)) == NULL) {
++					up_read(&mm->mmap_sem);
++					eprintk_ctx("lost vm_area_struct\n");
++					err = -ESRCH;
++					goto out;
++				}
++				err = anon_vma_prepare(vma);
++				if (err) {
++					up_read(&mm->mmap_sem);
++					goto out;
++				}
++				while (addr < u.lpb.cpt_end) {
++					err = rst_pagein(vma, u.lpb.cpt_index + (addr-u.lpb.cpt_start)/PAGE_SIZE,
++							 addr, ctx);
++					if (err)
++						break;
++					addr += PAGE_SIZE;
++				}
++				up_read(&mm->mmap_sem);
++#else
++				err = -EINVAL;
++#endif
++				if (err)
++					goto out;
++				offset += u.cpb.cpt_next;
++				continue;
++			} else if (u.cpb.cpt_object == CPT_OBJ_COPYPAGES) {
++				struct vm_area_struct *vma, *vma1;
++				struct mm_struct *src;
++				struct anon_vma *src_anon;
++				cpt_object_t *mobj;
++
++				if (!vmai->cpt_anonvmaid) {
++					err = -EINVAL;
++					eprintk_ctx("CPT_OBJ_COPYPAGES in !anonvma\n");
++					goto out;
++				}
++
++				mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, u.cpb.cpt_source, ctx);
++				if (!mobj) {
++					eprintk_ctx("lost mm_struct to clone pages from\n");
++					err = -ESRCH;
++					goto out;
++				}
++				src = mobj->o_obj;
++
++				down_read(&src->mmap_sem);
++				src_anon = NULL;
++				vma1 = find_vma(src, u.cpb.cpt_start);
++				if (vma1)
++					src_anon = vma1->anon_vma;
++				up_read(&src->mmap_sem);
++
++				if (!vma1) {
++					eprintk_ctx("lost src vm_area_struct\n");
++					err = -ESRCH;
++					goto out;
++				}
++
++				down_read(&mm->mmap_sem);
++				if ((vma = find_vma(mm, u.cpb.cpt_start)) == NULL) {
++					up_read(&mm->mmap_sem);
++					eprintk_ctx("lost vm_area_struct\n");
++					err = -ESRCH;
++					goto out;
++				}
++
++				if (!src_anon ||
++				    !vma->anon_vma ||
++				    vma->anon_vma != src_anon ||
++				    vma->vm_start - vma1->vm_start !=
++				    (vma->vm_pgoff - vma1->vm_pgoff) << PAGE_SHIFT) {
++					up_read(&mm->mmap_sem);
++					wprintk_ctx("anon_vma mismatch in vm_area_struct %Ld\n", vmapos);
++					err = copy_mm_pages(mobj->o_obj,
++							    u.cpb.cpt_start,
++							    u.cpb.cpt_end);
++				} else {
++					err = __copy_page_range(vma, vma1,
++								u.cpb.cpt_start,
++								u.cpb.cpt_end-u.cpb.cpt_start);
++					up_read(&mm->mmap_sem);
++				}
++				if (err) {
++					eprintk_ctx("clone_page_range: %d (%08x,%u,%ld)\n", err,
++						(__u32)u.cpb.cpt_start, (__u32)(u.cpb.cpt_end-u.cpb.cpt_start), 
++						(long)u.cpb.cpt_source);
++					goto out;
++				}
++
++				offset += u.cpb.cpt_next;
++				continue;
++			}
++			if (u.pb.cpt_object != CPT_OBJ_PAGES) {
++				eprintk_ctx("unknown vma fix object %d\n", u.pb.cpt_object);
++				err = -EINVAL;
++				goto out;
++			}
++			pos = offset + sizeof(u.pb);
++			if (!(vmai->cpt_flags&VM_ACCOUNT) && !(prot&PROT_WRITE)) {
++				/* I guess this is get_user_pages() messed things,
++				 * this happens f.e. when gdb inserts breakpoints.
++				 */
++				int i;
++				for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/PAGE_SIZE; i++) {
++					struct page *page;
++					void *maddr;
++					err = get_user_pages(current, current->mm,
++							     (unsigned long)u.pb.cpt_start + i*PAGE_SIZE,
++							     1, 1, 1, &page, NULL);
++					if (err == 0)
++						err = -EFAULT;
++					if (err < 0) {
++						eprintk_ctx("get_user_pages: %d\n", err);
++						goto out;
++					}
++					err = 0;
++					maddr = kmap(page);
++					if (u.pb.cpt_content == CPT_CONTENT_VOID) {
++						memset(maddr, 0, PAGE_SIZE);
++					} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
++						err = ctx->pread(maddr, PAGE_SIZE,
++								 ctx, pos + i*PAGE_SIZE);
++						if (err) {
++							kunmap(page);
++							goto out;
++						}
++					} else {
++						err = -EINVAL;
++						kunmap(page);
++						goto out;
++					}
++					set_page_dirty_lock(page);
++					kunmap(page);
++					page_cache_release(page);
++				}
++			} else {
++				if (!(prot&PROT_WRITE))
++					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
++				if (u.pb.cpt_content == CPT_CONTENT_VOID) {
++					int i;
++					for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/sizeof(unsigned long); i++) {
++						err = __put_user(0UL, ((unsigned long __user*)(unsigned long)u.pb.cpt_start) + i);
++						if (err) {
++							eprintk_ctx("__put_user 2 %d\n", err);
++							goto out;
++						}
++					}
++				} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
++					loff_t tpos = pos;
++					err = ctx->file->f_op->read(ctx->file, cpt_ptr_import(u.pb.cpt_start),
++							 u.pb.cpt_end-u.pb.cpt_start,
++							 &tpos);
++					if (err != u.pb.cpt_end-u.pb.cpt_start) {
++						if (err >= 0)
++							err = -EIO;
++						goto out;
++					}
++				} else {
++					err = -EINVAL;
++					goto out;
++				}
++				if (!(prot&PROT_WRITE))
++					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
++			}
++			err = 0;
++			offset += u.pb.cpt_next;
++		} while (offset < vmapos + vmai->cpt_next);
++	}
++
++check:
++	do {
++		struct vm_area_struct *vma;
++		down_read(&mm->mmap_sem);
++		vma = find_vma(mm, addr);
++		if (vma) {
++			if ((vma->vm_flags^vmai->cpt_flags)&VM_READHINTMASK) {
++				VM_ClearReadHint(vma);
++				vma->vm_flags |= vmai->cpt_flags&VM_READHINTMASK;
++			}
++			if ((vma->vm_flags^vmai->cpt_flags)&VM_LOCKED) {
++				dprintk_ctx("fixing up VM_LOCKED %Ld\n", vmapos);
++				up_read(&mm->mmap_sem);
++				if (vma->vm_flags&VM_LOCKED)
++					err = sc_munlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
++				else
++					err = sc_mlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
++				if (err)
++					goto out;
++				goto check;
++			}
++			if ((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&~__PAGE_NX)
++				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
++				       (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
++#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
++			if (((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&__PAGE_NX) &&
++			    (ctx->kernel_config_flags&CPT_KERNEL_CONFIG_PAE))
++				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
++				       (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
++#endif
++			if (vma->vm_flags != vmai->cpt_flags) {
++				unsigned long x = vma->vm_flags ^ vmai->cpt_flags;
++				if (x & VM_EXEC) {
++					/* Crap. On i386 this is OK.
++					 * It is impossible to make via mmap/mprotect
++					 * exec.c clears VM_EXEC on stack. */
++					vma->vm_flags &= ~VM_EXEC;
++				} else if ((x & VM_ACCOUNT) && !checked) {
++					checked = 1;
++					if (!(prot&PROT_WRITE)) {
++						up_read(&mm->mmap_sem);
++						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
++						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
++						goto check;
++					}
++					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
++					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
++				} else {
++					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
++					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
++				}
++			}
++		} else {
++			wprintk_ctx("no VMA for %08lx@%ld\n", addr, (long)vmapos);
++		}
++		up_read(&mm->mmap_sem);
++	} while (0);
++
++out:
++	if (file)
++		fput(file);
++	return err;
++}
++
++static int do_rst_mm(struct cpt_mm_image *vmi, loff_t pos, struct cpt_context *ctx)
++{
++	int err = 0;
++	unsigned int def_flags;
++	struct mm_struct *mm = current->mm;
++
++	down_write(&mm->mmap_sem);
++	do_munmap(mm, 0, TASK_SIZE);
++
++	mm->start_code = vmi->cpt_start_code;
++	mm->end_code = vmi->cpt_end_code;
++	mm->start_data = vmi->cpt_start_data;
++	mm->end_data = vmi->cpt_end_data;
++	mm->start_brk = vmi->cpt_start_brk;
++	mm->brk = vmi->cpt_brk;
++	mm->start_stack = vmi->cpt_start_stack;
++	mm->arg_start = vmi->cpt_start_arg;
++	mm->arg_end = vmi->cpt_end_arg;
++	mm->env_start = vmi->cpt_start_env;
++	mm->env_end = vmi->cpt_end_env;
++	mm->def_flags = 0;
++	def_flags = vmi->cpt_def_flags;
++
++	mm->dumpable = (vmi->cpt_dumpable != 0);
++	mm->vps_dumpable = (vmi->cpt_vps_dumpable != 0);
++
++#if 0 /* def CONFIG_HUGETLB_PAGE*/
++/* NB: ? */
++	int used_hugetlb;
++#endif
++	up_write(&mm->mmap_sem);
++
++	if (vmi->cpt_next > vmi->cpt_hdrlen) {
++		loff_t offset = pos + vmi->cpt_hdrlen;
++		do {
++			union {
++				struct cpt_vma_image vmai;
++				struct cpt_aio_ctx_image aioi;
++				struct cpt_obj_bits bits;
++			} u;
++			err = rst_get_object(-1, offset, &u, ctx);
++			if (err)
++				goto out;
++			if (u.vmai.cpt_object == CPT_OBJ_VMA) {
++				err = do_rst_vma(&u.vmai, offset, pos, ctx);
++				if (err)
++					goto out;
++			} else if (u.bits.cpt_object == CPT_OBJ_BITS &&
++				   u.bits.cpt_content == CPT_CONTENT_MM_CONTEXT) {
++				err = do_rst_ldt(&u.bits, offset, ctx);
++				if (err)
++					goto out;
++			} else if (u.aioi.cpt_object == CPT_OBJ_AIO_CONTEXT) {
++				err = do_rst_aio(&u.aioi, offset, ctx);
++				if (err)
++					goto out;
++			} else {
++				eprintk_ctx("unknown object %u in mm image\n", u.vmai.cpt_object);
++				err = -EINVAL;
++				goto out;
++			}
++			offset += u.vmai.cpt_next;
++		} while (offset < pos + vmi->cpt_next);
++	}
++
++	down_write(&mm->mmap_sem);
++	mm->def_flags = def_flags;
++	up_write(&mm->mmap_sem);
++
++
++out:
++	return err;
++}
++
++extern void exit_mm(struct task_struct * tsk);
++
++int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	int err = 0;
++	cpt_object_t *mobj;
++	void *tmp = (void*)__get_free_page(GFP_KERNEL);
++	struct cpt_mm_image *vmi = (struct cpt_mm_image *)tmp;
++
++	if (!tmp)
++		return -ENOMEM;
++
++	if (ti->cpt_mm == CPT_NULL) {
++		if (current->mm)
++			exit_mm(current);
++		goto out;
++	}
++
++	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
++	if (mobj) {
++		if (current->mm != mobj->o_obj) BUG();
++		goto out;
++	}
++
++	if (current->mm == NULL) {
++		struct mm_struct *mm = mm_alloc();
++		if (mm == NULL) {
++			err = -ENOMEM;
++			goto out;
++		}
++		err = init_new_context(current, mm);
++		if (err) {
++			mmdrop(mm);
++			goto out;
++		}
++		current->mm = mm;
++	}
++
++	if ((err = rst_get_object(CPT_OBJ_MM, ti->cpt_mm, vmi, ctx)) != 0)
++		goto out;
++	if ((err = do_rst_mm(vmi, ti->cpt_mm, ctx)) != 0) {
++		eprintk_ctx("do_rst_mm %Ld\n", ti->cpt_mm);
++		goto out;
++	}
++	err = -ENOMEM;
++	mobj = cpt_object_add(CPT_OBJ_MM, current->mm, ctx);
++	if (mobj != NULL) {
++		err = 0;
++		cpt_obj_setpos(mobj, ti->cpt_mm, ctx);
++	}
++
++out:
++	if (tmp)
++		free_page((unsigned long)tmp);
++	return err;
++}
++
++/* This is part of mm setup, made in parent context. Mostly, it is the place,
++ * where we graft mm of another process to child.
++ */
++
++int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	task_t *tsk = obj->o_obj;
++	cpt_object_t *mobj;
++
++	/* Task without mm. Just get rid of this. */
++	if (ti->cpt_mm == CPT_NULL) {
++		if (tsk->mm) {
++			mmput(tsk->mm);
++			tsk->mm = NULL;
++		}
++		return 0;
++	}
++
++	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
++	if (mobj) {
++		struct mm_struct *newmm = mobj->o_obj;
++		/* Good, the MM is already created. */
++		if (newmm == tsk->mm) {
++			/* Already done by clone(). */
++			return 0;
++		}
++		mmput(tsk->mm);
++		atomic_inc(&newmm->mm_users);
++		tsk->mm = newmm;
++		tsk->active_mm = newmm;
++	}
++	return 0;
++}
++
++/* We use CLONE_VM when mm of child is going to be shared with parent.
++ * Otherwise mm is copied.
++ */
++
++__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	if (ti->cpt_mm == CPT_NULL ||
++	    lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx))
++		return CLONE_VM;
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_net.c linux-2.6.16-026test015/kernel/cpt/rst_net.c
+--- linux-2.6.16.orig/kernel/cpt/rst_net.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_net.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,481 @@
++/*
++ *
++ *  kernel/cpt/rst_net.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/socket.h>
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <linux/rtnetlink.h>
++#include <linux/ve.h>
++#include <linux/ve_proto.h>
++#include <net/route.h>
++#include <net/ip_fib.h>
++#include <net/addrconf.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_kernel.h"
++#include "cpt_net.h"
++
++#include "cpt_syscalls.h"
++
++extern struct in_ifaddr *inet_alloc_ifa(void);
++extern int inet_insert_ifa(struct in_ifaddr *ifa);
++
++int rst_restore_ifaddr(struct cpt_context *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_NET_IFADDR];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++	struct cpt_ifaddr_image di;
++	struct net_device *dev;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_NET_IFADDR || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		int cindex = -1;
++		int err;
++		err = rst_get_object(CPT_OBJ_NET_IFADDR, sec, &di, ctx);
++		if (err)
++			return err;
++		if (di.cpt_index == ctx->lo_index_old)
++			cindex = ctx->lo_index;
++		else if (di.cpt_index == ctx->venet_index_old)
++			cindex = ctx->venet_index;
++		if (cindex <= 0)
++			eprintk_ctx("unknown ifaddr for %d\n", di.cpt_index);
++		rtnl_lock();
++		dev = __dev_get_by_index(cindex);
++		if (dev && di.cpt_family == AF_INET) {
++			struct in_device *in_dev;
++			struct in_ifaddr *ifa;
++			if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
++				in_dev = inetdev_init(dev);
++			ifa = inet_alloc_ifa();
++			if (ifa) {
++				ifa->ifa_local = di.cpt_address[0];
++				ifa->ifa_address = di.cpt_peer[0];
++				ifa->ifa_broadcast = di.cpt_broadcast[0];
++				ifa->ifa_prefixlen = di.cpt_masklen;
++				ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
++				ifa->ifa_flags = di.cpt_flags;
++				ifa->ifa_scope = di.cpt_scope;
++				memcpy(ifa->ifa_label, di.cpt_label, IFNAMSIZ);
++				in_dev_hold(in_dev);
++				ifa->ifa_dev   = in_dev;
++				err = inet_insert_ifa(ifa);
++				if (err && err != -EEXIST) {
++					rtnl_unlock();
++					eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
++					return err;
++				}
++			}
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++		} else if (dev && di.cpt_family == AF_INET6) {
++			err = inet6_addr_add(dev->ifindex,
++					     (struct in6_addr *)di.cpt_address,
++					     di.cpt_masklen);
++			if (err && err != -EEXIST) {
++				rtnl_unlock();
++				eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
++				return err;
++			}
++#endif
++		} else {
++			rtnl_unlock();
++			eprintk_ctx("unknown ifaddr 2 for %d\n", di.cpt_index);
++			return -EINVAL;
++		}
++		rtnl_unlock();
++		sec += di.cpt_next;
++	}
++	return 0;
++}
++
++static int rewrite_rtmsg(struct nlmsghdr *nlh, struct cpt_context *ctx)
++{
++	int min_len = NLMSG_LENGTH(sizeof(struct rtmsg));
++	struct rtmsg *rtm = NLMSG_DATA(nlh);
++	int idx = -1;
++	__u32 prefix0 = 0;
++
++	if (nlh->nlmsg_len > min_len) {
++		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
++		struct rtattr *rta = (void*)nlh + NLMSG_ALIGN(min_len);
++
++		while (RTA_OK(rta, attrlen)) {
++			if (rta->rta_type == RTA_OIF) {
++				idx = *(int*)RTA_DATA(rta);
++				if (idx == ctx->lo_index_old)
++					idx = ctx->lo_index;
++				else if (idx == ctx->venet_index_old)
++					idx = ctx->venet_index;
++				else {
++					eprintk_ctx("unknown iface %d\n", idx);
++					return -ENODEV;
++				}
++				*(int*)RTA_DATA(rta) = idx;
++			} else if (rta->rta_type == RTA_DST) {
++				prefix0 = *(__u32*)RTA_DATA(rta);
++			}
++			rta = RTA_NEXT(rta, attrlen);
++		}
++	}
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++	if (rtm->rtm_family == AF_INET6) {
++		if (rtm->rtm_type == RTN_LOCAL)
++			return 2;
++		if (rtm->rtm_flags & RTM_F_CLONED)
++			return 2;
++		if (rtm->rtm_protocol == RTPROT_UNSPEC ||
++		    rtm->rtm_protocol == RTPROT_RA ||
++		    rtm->rtm_protocol == RTPROT_REDIRECT ||
++		    rtm->rtm_protocol == RTPROT_KERNEL)
++			return 2;
++		if (rtm->rtm_protocol == RTPROT_BOOT &&
++		    ((rtm->rtm_dst_len == 8 && prefix0 == htonl(0xFF000000)) ||
++		     (rtm->rtm_dst_len == 64 && prefix0 == htonl(0xFE800000))))
++			return 2;
++	}
++#endif
++	return rtm->rtm_protocol == RTPROT_KERNEL;
++}
++
++int rst_restore_route(struct cpt_context *ctx)
++{
++	int err;
++	struct socket *sock;
++	struct msghdr msg;
++	struct iovec iov;
++	struct sockaddr_nl nladdr;
++	mm_segment_t oldfs;
++	loff_t sec = ctx->sections[CPT_SECT_NET_ROUTE];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++	struct cpt_object_hdr v;
++	char *pg;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_NET_ROUTE || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	if (h.cpt_hdrlen >= h.cpt_next)
++		return 0;
++
++	sec += h.cpt_hdrlen;
++	err = rst_get_object(CPT_OBJ_NET_ROUTE, sec, &v, ctx);
++	if (err < 0)
++		return err;
++
++	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
++	if (err)
++		return err;
++
++	pg = (char*)__get_free_page(GFP_KERNEL);
++	if (pg == NULL) {
++		err = -ENOMEM;
++		goto out_sock;
++	}
++
++	memset(&nladdr, 0, sizeof(nladdr));
++	nladdr.nl_family = AF_NETLINK;
++
++	endsec = sec + v.cpt_next;
++	sec += v.cpt_hdrlen;
++
++	while (sec < endsec) {
++		struct nlmsghdr *n;
++		struct nlmsghdr nh;
++		int kernel_flag;
++
++		err = ctx->pread(&nh, sizeof(nh), ctx, sec);
++		if (err)
++			goto out_sock_pg;
++		if (nh.nlmsg_len > PAGE_SIZE) {
++			err = -EINVAL;
++			goto out_sock_pg;
++		}
++		err = ctx->pread(pg, nh.nlmsg_len, ctx, sec);
++		if (err)
++			goto out_sock_pg;
++
++		n = (struct nlmsghdr*)pg;
++		n->nlmsg_flags = NLM_F_REQUEST|NLM_F_APPEND|NLM_F_CREATE;
++
++		err = rewrite_rtmsg(n, ctx);
++		if (err < 0)
++			goto out_sock_pg;
++		kernel_flag = err;
++
++		if (kernel_flag == 2)
++			goto do_next;
++
++		iov.iov_base=n;
++		iov.iov_len=nh.nlmsg_len;
++		msg.msg_name=&nladdr;
++		msg.msg_namelen=sizeof(nladdr);
++		msg.msg_iov=&iov;
++		msg.msg_iovlen=1;
++		msg.msg_control=NULL;
++		msg.msg_controllen=0;
++		msg.msg_flags=MSG_DONTWAIT;
++
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		err = sock_sendmsg(sock, &msg, nh.nlmsg_len);
++		set_fs(oldfs);
++
++		if (err < 0)
++			goto out_sock_pg;
++		err = 0;
++
++		iov.iov_base=pg;
++		iov.iov_len=PAGE_SIZE;
++
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
++		set_fs(oldfs);
++		if (err != -EAGAIN) {
++			if (err == NLMSG_LENGTH(sizeof(struct nlmsgerr)) &&
++			    n->nlmsg_type == NLMSG_ERROR) {
++				struct nlmsgerr *e = NLMSG_DATA(n);
++				if (e->error != -EEXIST || !kernel_flag)
++					eprintk_ctx("NLMERR: %d\n", e->error);
++			} else {
++				eprintk_ctx("Res: %d %d\n", err, n->nlmsg_type);
++			}
++		}
++do_next:
++		err = 0;
++		sec += NLMSG_ALIGN(nh.nlmsg_len);
++	}
++
++out_sock_pg:
++	free_page((unsigned long)pg);
++out_sock:
++	sock_release(sock);
++	return err;
++}
++
++int rst_resume_network(struct cpt_context *ctx)
++{
++	struct ve_struct *env;
++
++	env = get_ve_by_id(ctx->ve_id);
++	if (!env)
++		return -ESRCH;
++	env->disable_net = 0;
++	put_ve(env);
++	return 0;
++}
++
++int rst_restore_netdev(struct cpt_context *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_NET_DEVICE];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++	struct cpt_netdev_image di;
++	struct net_device *dev;
++
++	get_exec_env()->disable_net = 1;
++
++	dev = __dev_get_by_name("lo");
++	if (!dev) {
++		eprintk_ctx("cannot find loopback netdevice\n");
++		return -EINVAL;
++	}
++	ctx->lo_index = dev->ifindex;
++	ctx->lo_index_old = -1;
++	dev = __dev_get_by_name("venet0");
++	if (!dev) {
++		eprintk_ctx("cannot find venet0 netdevice\n");
++		return -EINVAL;
++	}
++	ctx->venet_index = dev->ifindex;
++	ctx->venet_index_old = -1;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_NET_DEVICE || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		int err;
++		err = rst_get_object(CPT_OBJ_NET_DEVICE, sec, &di, ctx);
++		if (err)
++			return err;
++		if (strcmp(di.cpt_name, "lo") == 0) {
++			ctx->lo_index_old = di.cpt_index;
++		} else if (strcmp(di.cpt_name, "venet0") == 0) {
++			ctx->venet_index_old = di.cpt_index;
++		} else {
++			eprintk_ctx("unknown interface %s\n", di.cpt_name);
++		}
++		dev = __dev_get_by_name(di.cpt_name);
++		if (dev) {
++			if (di.cpt_flags^dev->flags) {
++				rtnl_lock();
++				err = dev_change_flags(dev, di.cpt_flags);
++				rtnl_unlock();
++				if (err)
++					eprintk_ctx("dev_change_flags err: %d\n", err);
++			}
++		} else {
++			eprintk_ctx("unknown interface 2 %s\n", di.cpt_name);
++		}
++		sec += di.cpt_next;
++	}
++	return 0;
++}
++
++static int dumpfn(void *arg)
++{
++	int i;
++	int *pfd = arg;
++	char *argv[] = { "iptables-restore", "-c", NULL };
++
++	if (pfd[0] != 0)
++		sc_dup2(pfd[0], 0);
++
++	for (i=1; i<current->files->fdt->max_fds; i++)
++		sc_close(i);
++
++	module_put(THIS_MODULE);
++
++	set_fs(KERNEL_DS);
++	i = sc_execve("/sbin/iptables-restore", argv, NULL);
++	eprintk("failed to exec /sbin/iptables-restore: %d\n", i);
++	return -1;
++}
++
++static int rst_restore_iptables(struct cpt_context * ctx)
++{
++	int err;
++	int pfd[2];
++	struct file *f;
++	struct cpt_object_hdr v;
++	int n;
++	struct cpt_section_hdr h;
++	loff_t sec = ctx->sections[CPT_SECT_NET_IPTABLES];
++	loff_t end;
++	int pid;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_NET_IPTABLES || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	if (h.cpt_hdrlen == h.cpt_next)
++		return 0;
++	if (h.cpt_hdrlen > h.cpt_next)
++		return -EINVAL;
++	sec += h.cpt_hdrlen;
++	err = rst_get_object(CPT_OBJ_NAME, sec, &v, ctx);
++	if (err < 0)
++		return err;
++
++	err = sc_pipe(pfd);
++	if (err < 0)
++		return err;
++	pid = err = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
++	if (err < 0)
++		goto out;
++	f = fget(pfd[1]);
++	sc_close(pfd[1]);
++	sc_close(pfd[0]);
++
++	ctx->file->f_pos = sec + v.cpt_hdrlen;
++	end = sec + v.cpt_next;
++	do {
++		char *p;
++		char buf[16];
++		mm_segment_t oldfs;
++
++		n = end - ctx->file->f_pos;
++		if (n > sizeof(buf))
++			n = sizeof(buf);
++
++		if (ctx->read(buf, n, ctx))
++			break;
++		if ((p = memchr(buf, 0, n)) != NULL)
++			n = p - buf;
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		f->f_op->write(f, buf, n, &f->f_pos);
++		set_fs(oldfs);
++	} while (ctx->file->f_pos < end);
++
++	fput(f);
++
++	clear_tsk_thread_flag(current,TIF_SIGPENDING);
++
++	if ((err = sc_waitx(pid, 0)) < 0)
++		eprintk_ctx("wait4: %d\n", err);
++
++	return 0;
++
++out:
++	if (pfd[1] >= 0)
++		sc_close(pfd[1]);
++	if (pfd[0] >= 0)
++		sc_close(pfd[0]);
++	return err;
++}
++
++int rst_restore_net(struct cpt_context *ctx)
++{
++	int err;
++
++	err = rst_restore_netdev(ctx);
++	if (!err)
++		err = rst_restore_ifaddr(ctx);
++	if (!err)
++		err = rst_restore_route(ctx);
++	if (!err)
++		err = rst_restore_iptables(ctx);
++	if (!err)
++		err = rst_restore_ip_conntrack(ctx);
++	return err;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_proc.c linux-2.6.16-026test015/kernel/cpt/rst_proc.c
+--- linux-2.6.16.orig/kernel/cpt/rst_proc.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_proc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,604 @@
++/*
++ *
++ *  kernel/cpt/rst_proc.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/proc_fs.h>
++#include <linux/smp_lock.h>
++#include <asm/uaccess.h>
++#include <linux/cpt_ioctl.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_dump.h"
++#include "cpt_files.h"
++#include "cpt_mm.h"
++#include "cpt_kernel.h"
++
++MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
++MODULE_LICENSE("GPL");
++
++/* List of contexts and lock protecting the list */
++static struct list_head cpt_context_list;
++static spinlock_t cpt_context_lock;
++
++static int proc_read(char *buffer, char **start, off_t offset,
++		     int length, int *eof, void *data)
++{
++	off_t pos = 0;
++	off_t begin = 0;
++	int len = 0;
++	cpt_context_t *ctx;
++
++	len += sprintf(buffer, "Ctx      Id       VE       State\n");
++
++	spin_lock(&cpt_context_lock);
++
++	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
++		len += sprintf(buffer+len,"%p %08x %-8u %d",
++			       ctx,
++			       ctx->contextid,
++			       ctx->ve_id,
++			       ctx->ctx_state
++			       );
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++		len += pagein_info_printf(buffer+len, ctx);
++#endif
++
++		buffer[len++] = '\n';
++
++		pos = begin+len;
++		if (pos < offset) {
++			len = 0;
++			begin = pos;
++		}
++		if (pos > offset+length)
++			goto done;
++	}
++	*eof = 1;
++
++done:
++	spin_unlock(&cpt_context_lock);
++	*start = buffer + (offset - begin);
++	len -= (offset - begin);
++	if(len > length)
++		len = length;
++	if(len < 0)
++		len = 0;
++	return len;
++}
++
++void rst_context_release(cpt_context_t *ctx)
++{
++	list_del(&ctx->ctx_list);
++	spin_unlock(&cpt_context_lock);
++
++	if (ctx->ctx_state > 0)
++		rst_resume(ctx);
++	ctx->ctx_state = CPT_CTX_ERROR;
++
++	rst_close_dumpfile(ctx);
++
++	if (ctx->anonvmas) {
++		int h;
++		for (h = 0; h < CPT_ANONVMA_HSIZE; h++) {
++			while (!hlist_empty(&ctx->anonvmas[h])) {
++				struct hlist_node *elem = ctx->anonvmas[h].first;
++				hlist_del(elem);
++				kfree(elem);
++			}
++		}
++		free_page((unsigned long)ctx->anonvmas);
++	}
++	cpt_flush_error(ctx);
++	if (ctx->errorfile) {
++		fput(ctx->errorfile);
++		ctx->errorfile = NULL;
++	}
++	if (ctx->error_msg) {
++		free_page((unsigned long)ctx->error_msg);
++		ctx->error_msg = NULL;
++	}
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	if (ctx->pagein_file_out)
++		fput(ctx->pagein_file_out);
++	if (ctx->pagein_file_in)
++		fput(ctx->pagein_file_in);
++	if (ctx->pgin_task)
++		put_task_struct(ctx->pgin_task);
++#endif
++	if (ctx->filejob_queue)
++		rst_flush_filejobs(ctx);
++	if (ctx->objcount)
++		eprintk_ctx("%d objects leaked\n", ctx->objcount);
++	kfree(ctx);
++
++	spin_lock(&cpt_context_lock);
++}
++
++static void __cpt_context_put(cpt_context_t *ctx)
++{
++	if (!--ctx->refcount)
++		rst_context_release(ctx);
++}
++
++static void cpt_context_put(cpt_context_t *ctx)
++{
++	spin_lock(&cpt_context_lock);
++	__cpt_context_put(ctx);
++	spin_unlock(&cpt_context_lock);
++}
++
++cpt_context_t * rst_context_open(void)
++{
++	cpt_context_t *ctx;
++
++	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
++		rst_context_init(ctx);
++		spin_lock(&cpt_context_lock);
++		list_add_tail(&ctx->ctx_list, &cpt_context_list);
++		spin_unlock(&cpt_context_lock);
++		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
++		if (ctx->error_msg != NULL)
++			ctx->error_msg[0] = 0;
++	}
++	return ctx;
++}
++
++void rst_report_error(int err, cpt_context_t *ctx)
++{
++	if (ctx->statusfile) {
++		mm_segment_t oldfs;
++		int status = 7 /* VZ_ENVCREATE_ERROR */;
++
++		oldfs = get_fs(); set_fs(KERNEL_DS);
++		if (ctx->statusfile->f_op && ctx->statusfile->f_op->write)
++			ctx->statusfile->f_op->write(ctx->statusfile, (char*)&status, sizeof(status), &ctx->statusfile->f_pos);
++		set_fs(oldfs);
++		fput(ctx->statusfile);
++		ctx->statusfile = NULL;
++	}
++}
++
++
++static cpt_context_t * cpt_context_lookup(unsigned int ctxid)
++{
++	cpt_context_t *ctx;
++
++	spin_lock(&cpt_context_lock);
++	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
++		if (ctx->contextid == ctxid) {
++			ctx->refcount++;
++			spin_unlock(&cpt_context_lock);
++			return ctx;
++		}
++	}
++	spin_unlock(&cpt_context_lock);
++	return NULL;
++}
++
++static int rst_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
++{
++	int err = 0;
++	cpt_context_t *ctx;
++	struct file *dfile = NULL;
++
++	unlock_kernel();
++
++	if (cmd == CPT_TEST_CAPS) {
++		err = test_cpu_caps();
++		goto out_lock;
++	}
++
++	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
++		cpt_context_t *old_ctx;
++
++		ctx = NULL;
++		if (cmd == CPT_JOIN_CONTEXT) {
++			err = -ENOENT;
++			ctx = cpt_context_lookup(arg);
++			if (!ctx)
++				goto out_lock;
++		}
++
++		spin_lock(&cpt_context_lock);
++		old_ctx = (cpt_context_t*)file->private_data;
++		file->private_data = ctx;
++
++		if (old_ctx) {
++			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
++				old_ctx->sticky = 0;
++				old_ctx->refcount--;
++			}
++			__cpt_context_put(old_ctx);
++		}
++		spin_unlock(&cpt_context_lock);
++		err = 0;
++		goto out_lock;
++	}
++
++	spin_lock(&cpt_context_lock);
++	ctx = (cpt_context_t*)file->private_data;
++	if (ctx)
++		ctx->refcount++;
++	spin_unlock(&cpt_context_lock);
++
++	if (!ctx) {
++		cpt_context_t *old_ctx;
++
++		err = -ENOMEM;
++		ctx = rst_context_open();
++		if (!ctx)
++			goto out_lock;
++
++		spin_lock(&cpt_context_lock);
++		old_ctx = (cpt_context_t*)file->private_data;
++		if (!old_ctx) {
++			ctx->refcount++;
++			file->private_data = ctx;
++		} else {
++			old_ctx->refcount++;
++		}
++		if (old_ctx) {
++			__cpt_context_put(ctx);
++			ctx = old_ctx;
++		}
++		spin_unlock(&cpt_context_lock);
++	}
++
++	if (cmd == CPT_GET_CONTEXT) {
++		unsigned int contextid = (unsigned int)arg;
++
++		err = -EINVAL;
++		if (ctx->contextid && ctx->contextid != contextid)
++			goto out_nosem;
++		if (!ctx->contextid) {
++			cpt_context_t *c1 = cpt_context_lookup(contextid);
++			if (c1) {
++				cpt_context_put(c1);
++				err = -EEXIST;
++				goto out_nosem;
++			}
++			ctx->contextid = contextid;
++		}
++		spin_lock(&cpt_context_lock);
++		if (!ctx->sticky) {
++			ctx->sticky = 1;
++			ctx->refcount++;
++		}
++		spin_unlock(&cpt_context_lock);
++		err = 0;
++		goto out_nosem;
++	}
++
++	down(&ctx->main_sem);
++
++	err = -EBUSY;
++	if (ctx->ctx_state < 0)
++		goto out;
++
++	err = 0;
++	switch (cmd) {
++	case CPT_SET_DUMPFD:
++		if (ctx->ctx_state > 0) {
++			err = -EBUSY;
++			break;
++		}
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++			if (dfile->f_op == NULL ||
++			    dfile->f_op->read == NULL) {
++				fput(dfile);
++				err = -EBADF;
++				break;
++			}
++		}
++		if (ctx->file)
++			fput(ctx->file);
++		ctx->file = dfile;
++		break;
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	case CPT_SET_PAGEINFDIN:
++		if (ctx->ctx_state > 0) {
++			err = -EBUSY;
++			break;
++		}
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++		}
++		if (ctx->pagein_file_in)
++			fput(ctx->pagein_file_in);
++		ctx->pagein_file_in = dfile;
++		break;
++	case CPT_SET_PAGEINFDOUT:
++		if (ctx->ctx_state > 0) {
++			err = -EBUSY;
++			break;
++		}
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++		}
++		if (ctx->pagein_file_out)
++			fput(ctx->pagein_file_out);
++		ctx->pagein_file_out = dfile;
++		break;
++	case CPT_PAGEIND:
++		err = rst_pageind(ctx);
++		break;
++#endif
++	case CPT_SET_LOCKFD:
++		if (ctx->ctx_state > 0) {
++			err = -EBUSY;
++			break;
++		}
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++		}
++		if (ctx->lockfile)
++			fput(ctx->lockfile);
++		ctx->lockfile = dfile;
++		break;
++	case CPT_SET_STATUSFD:
++		if (ctx->ctx_state > 0) {
++			err = -EBUSY;
++			break;
++		}
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++		}
++		if (ctx->statusfile)
++			fput(ctx->statusfile);
++		ctx->statusfile = dfile;
++		break;
++	case CPT_SET_ERRORFD:
++		if (arg >= 0) {
++			dfile = fget(arg);
++			if (IS_ERR(dfile)) {
++				err = PTR_ERR(dfile);
++				break;
++			}
++		}
++		if (ctx->errorfile)
++			fput(ctx->errorfile);
++		ctx->errorfile = dfile;
++		break;
++	case CPT_SET_VEID:
++		if (ctx->ctx_state > 0) {
++			err = -EBUSY;
++			break;
++		}
++		ctx->ve_id = arg;
++		break;
++	case CPT_UNDUMP:
++		if (ctx->ctx_state > 0) {
++			err = -ENOENT;
++			break;
++		}
++		ctx->ctx_state = CPT_CTX_UNDUMPING;
++		err = vps_rst_undump(ctx);
++		if (err) {
++			rst_report_error(err, ctx);
++			if (rst_kill(ctx) == 0)
++				ctx->ctx_state = CPT_CTX_IDLE;
++		} else {
++			ctx->ctx_state = CPT_CTX_UNDUMPED;
++		}
++		break;
++	case CPT_RESUME:
++		if (!ctx->ctx_state) {
++			err = -ENOENT;
++			break;
++		}
++		err = rst_resume(ctx);
++		if (!err)
++			ctx->ctx_state = CPT_CTX_IDLE;
++		break;
++	case CPT_KILL:
++		if (!ctx->ctx_state) {
++			err = -ENOENT;
++			break;
++		}
++		err = rst_kill(ctx);
++		if (!err)
++			ctx->ctx_state = CPT_CTX_IDLE;
++		break;
++	default:
++		err = -EINVAL;
++		break;
++	}
++
++out:
++	cpt_flush_error(ctx);
++	up(&ctx->main_sem);
++out_nosem:
++	cpt_context_put(ctx);
++out_lock:
++	lock_kernel();
++	return err;
++}
++
++static int rst_open(struct inode * inode, struct file * file)
++{
++	if (!try_module_get(THIS_MODULE))
++		return -EBUSY;
++
++	return 0;
++}
++
++static int rst_release(struct inode * inode, struct file * file)
++{
++	cpt_context_t *ctx;
++
++	spin_lock(&cpt_context_lock);
++	ctx = (cpt_context_t*)file->private_data;
++	file->private_data = NULL;
++	if (ctx)
++		__cpt_context_put(ctx);
++	spin_unlock(&cpt_context_lock);
++
++
++	module_put(THIS_MODULE);
++	return 0;
++}
++
++static struct file_operations rst_fops =
++{
++	.owner		= THIS_MODULE,
++	.ioctl		= rst_ioctl,
++	.open		= rst_open,
++	.release	= rst_release,
++};
++
++
++static struct proc_dir_entry *proc_ent;
++extern void *schedule_tail_p;
++extern void schedule_tail_hook(void);
++
++static struct ctl_table_header *ctl_header;
++
++static ctl_table debug_table[] = {
++	{
++		.ctl_name	= 9476,
++		.procname	= "rst",
++		.data		= &debug_level,
++		.maxlen		= sizeof(debug_level),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{ .ctl_name = 0 }
++};
++static ctl_table root_table[] = {
++	{
++		.ctl_name	= CTL_DEBUG,
++		.procname	= "debug",
++		.mode		= 0555,
++		.child		= debug_table,
++	},
++	{ .ctl_name = 0 }
++};
++
++#ifdef CONFIG_X86_64
++
++static void *vzentry_forkret_get(void)
++{
++	unsigned char *p;
++
++	p = (unsigned char *)ret_from_fork;
++	return (void *)(*(u32 *)(p + 1) + p + 5);
++}
++
++static void vzentry_forkret_set(void *data)
++{
++	unsigned char *p;
++	long offset;
++
++	p = (unsigned char *)ret_from_fork;
++	offset = (unsigned long)data - (unsigned long)(p + 5);
++	if ((long)(s32)offset != offset) {
++		printk("vzentry_forkret_set: too long hook offset\n");
++		BUG();
++	}
++	*(u32 *)(p + 1) = offset;
++}
++#endif
++
++static int __init init_rst(void)
++{
++	int err;
++
++	err = -ENOMEM;
++	ctl_header = register_sysctl_table(root_table, 0);
++	if (!ctl_header)
++		goto err_mon;
++
++	spin_lock_init(&cpt_context_lock);
++	INIT_LIST_HEAD(&cpt_context_list);
++
++	err = -EINVAL;
++	proc_ent = create_proc_entry("rst", 0600, NULL);
++	if (!proc_ent)
++		goto err_out;
++
++	rst_fops.read = proc_ent->proc_fops->read;
++	rst_fops.write = proc_ent->proc_fops->write;
++	rst_fops.llseek = proc_ent->proc_fops->llseek;
++	proc_ent->proc_fops = &rst_fops;
++
++	proc_ent->read_proc = proc_read;
++	proc_ent->data = NULL;
++	proc_ent->owner = THIS_MODULE;
++#ifdef CONFIG_X86_64
++	schedule_tail_p = vzentry_forkret_get();
++	vzentry_forkret_set(&schedule_tail_hook);
++#endif
++	return 0;
++
++err_out:
++	unregister_sysctl_table(ctl_header);
++err_mon:
++	return err;
++}
++module_init(init_rst);
++
++static void __exit exit_rst(void)
++{
++#ifdef CONFIG_X86_64
++	/* This is wrong, of course. But still the best what we can do. */
++	vzentry_forkret_set(schedule_tail_p);
++#endif
++
++	remove_proc_entry("rst", NULL);
++	unregister_sysctl_table(ctl_header);
++
++	spin_lock(&cpt_context_lock);
++	while (!list_empty(&cpt_context_list)) {
++		cpt_context_t *ctx;
++		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
++
++		if (!ctx->sticky)
++			ctx->refcount++;
++		ctx->sticky = 0;
++
++		BUG_ON(ctx->refcount != 1);
++
++		__cpt_context_put(ctx);
++	}
++	spin_unlock(&cpt_context_lock);
++}
++module_exit(exit_rst);
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_process.c linux-2.6.16-026test015/kernel/cpt/rst_process.c
+--- linux-2.6.16.orig/kernel/cpt/rst_process.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_process.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,1257 @@
++/*
++ *
++ *  kernel/cpt/rst_process.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/virtinfo.h>
++#include <linux/kmem_cache.h>
++#include <linux/errno.h>
++#include <linux/pagemap.h>
++#include <linux/ptrace.h>
++#include <linux/tty.h>
++#include <asm/desc.h>
++#include <asm/unistd.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_misc.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_files.h"
++#include "cpt_mm.h"
++#include "cpt_ubc.h"
++#include "cpt_process.h"
++#include "cpt_kernel.h"
++
++#ifdef CONFIG_X86_64
++
++#define _TIF_RESUME (1<<22)
++
++#define SYSCALL_NR(regs) ((regs)->orig_rax)
++#define SYSCALL_RETVAL(regs) ((regs)->rax)
++#define SYSCALL_PC(regs) ((regs)->rip)
++
++#define ESP(tsk) (tsk)->thread.rsp
++
++#define __NR32_restart_syscall	0
++#define __NR32_rt_sigtimedwait	177
++#define __NR32_pause		29
++#define __NR32_futex		240
++
++#define syscall_is(tsk,regs,name) ((!((tsk)->thread_info->flags&_TIF_IA32) && \
++				    SYSCALL_NR(regs) == __NR_##name) || \
++				   (((tsk)->thread_info->flags&_TIF_IA32) && \
++				    SYSCALL_NR(regs) == __NR32_##name))
++#else
++
++#define SYSCALL_NR(regs) ((regs)->orig_eax)
++#define SYSCALL_RETVAL(regs) ((regs)->eax)
++#define SYSCALL_PC(regs) ((regs)->eip)
++
++#define ESP(tsk) (tsk)->thread.esp
++
++#define syscall_is(tsk,regs,name) (SYSCALL_NR(regs) == __NR_##name)
++
++#undef task_pt_regs
++#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
++
++#endif
++
++static void decode_siginfo(siginfo_t *info, struct cpt_siginfo_image *si)
++{
++	memset(info, 0, sizeof(*info));
++	switch(si->cpt_code & __SI_MASK) {
++	case __SI_TIMER:
++		info->si_tid = si->cpt_pid;
++		info->si_overrun = si->cpt_uid;
++		info->_sifields._timer._sigval.sival_ptr = cpt_ptr_import(si->cpt_sigval);
++		info->si_sys_private = si->cpt_utime;
++		break;
++	case __SI_POLL:
++		info->si_band = si->cpt_pid;
++		info->si_fd = si->cpt_uid;
++		break;
++	case __SI_FAULT:
++		info->si_addr = cpt_ptr_import(si->cpt_sigval);
++#ifdef __ARCH_SI_TRAPNO
++		info->si_trapno = si->cpt_pid;
++#endif
++		break;
++	case __SI_CHLD:
++		info->si_pid = si->cpt_pid;
++		info->si_uid = si->cpt_uid;
++		info->si_status = si->cpt_sigval;
++		info->si_stime = si->cpt_stime;
++		info->si_utime = si->cpt_utime;
++		break;
++	case __SI_KILL:
++	case __SI_RT:
++	case __SI_MESGQ:
++	default:
++		info->si_pid = si->cpt_pid;
++		info->si_uid = si->cpt_uid;
++		info->si_ptr = cpt_ptr_import(si->cpt_sigval);
++		break;
++	}
++	info->si_signo = si->cpt_signo;
++	info->si_errno = si->cpt_errno;
++	info->si_code = si->cpt_code;
++}
++
++static int restore_sigqueue(task_t *tsk,
++			    struct sigpending *queue, unsigned long start,
++			    unsigned long end)
++{
++	while (start < end) {
++		struct cpt_siginfo_image *si = (struct cpt_siginfo_image *)start;
++		if (si->cpt_object == CPT_OBJ_SIGINFO) {
++			struct sigqueue *q = NULL;
++			struct user_struct *up;
++			up = alloc_uid(si->cpt_user);
++			if (!up)
++				return -ENOMEM;
++			q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
++			if (!q) {
++				free_uid(up);
++				return -ENOMEM;
++			}
++			if (ub_siginfo_charge(q, get_exec_ub())) {
++				kmem_cache_free(sigqueue_cachep, q);
++				free_uid(up);
++				return -ENOMEM;
++			}
++
++			INIT_LIST_HEAD(&q->list);
++			/* Preallocated elements (posix timers) are not
++			 * supported yet. It is safe to replace them with
++			 * a private one. */
++			q->flags = 0;
++			q->user = up;
++			atomic_inc(&q->user->sigpending);
++
++			decode_siginfo(&q->info, si);
++			list_add_tail(&q->list, &queue->list);
++		}
++		start += si->cpt_next;
++	}
++	return 0;
++}
++
++int rst_process_linkage(cpt_context_t *ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++		struct cpt_task_image *ti = obj->o_image;
++
++		if (tsk == NULL) {
++			eprintk_ctx("task %u(%s) is missing\n", ti->cpt_pid, ti->cpt_comm);
++			return -EINVAL;
++		}
++
++		if (virt_pgid(tsk) != ti->cpt_pgrp) {
++			int pid;
++
++			if ((pid = vpid_to_pid(ti->cpt_pgrp)) < 0) {
++				eprintk_ctx("illegal PGRP " CPT_FID "\n", CPT_TID(tsk));
++				return -EINVAL;
++			}
++
++			write_lock_irq(&tasklist_lock);
++			detach_pid(tsk, PIDTYPE_PGID);
++			tsk->signal->pgrp = pid;
++			set_virt_pgid(tsk, ti->cpt_pgrp);
++			if (thread_group_leader(tsk))
++				attach_pid(tsk, PIDTYPE_PGID, pid);
++			write_unlock_irq(&tasklist_lock);
++		}
++		if (virt_sid(tsk) != ti->cpt_session) {
++			int pid;
++
++			if ((pid = vpid_to_pid(ti->cpt_session)) < 0) {
++				eprintk_ctx("illegal SID " CPT_FID "\n", CPT_TID(tsk));
++				return -EINVAL;
++			}
++
++			write_lock_irq(&tasklist_lock);
++			detach_pid(tsk, PIDTYPE_SID);
++			tsk->signal->session = pid;
++			set_virt_sid(tsk, ti->cpt_session);
++			if (thread_group_leader(tsk))
++				attach_pid(tsk, PIDTYPE_SID, pid);
++			write_unlock_irq(&tasklist_lock);
++		}
++		if (ti->cpt_old_pgrp > 0 && tsk->signal->tty_old_pgrp == 0) {
++			int pid;
++
++			if ((pid = vpid_to_pid(ti->cpt_old_pgrp)) < 0) {
++				eprintk_ctx("illegal OLD_PGRP " CPT_FID "\n", CPT_TID(tsk));
++				return -EINVAL;
++			}
++
++			tsk->signal->tty_old_pgrp = pid;
++		}
++	}
++
++	return 0;
++}
++
++static int restore_one_signal_struct(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_signal_image *si = cpt_get_buf(ctx);
++
++	current->signal->tty = NULL;
++
++	err = rst_get_object(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, si, ctx);
++	if (err) {
++		cpt_release_buf(ctx);
++		return err;
++	}
++
++	if (virt_pgid(current) != si->cpt_pgrp) {
++		int err;
++		int pid = 0;
++
++		if (si->cpt_pgrp_type == CPT_PGRP_ORPHAN) {
++			pid = alloc_pidmap();
++			if (pid < 0) {
++				cpt_release_buf(ctx);
++				return -EINVAL;
++			}
++			if ((err = alloc_vpid(pid, si->cpt_pgrp)) < 0) {
++				free_pidmap(pid);
++				pid = 0;
++				if (err != -EEXIST) {
++					cpt_release_buf(ctx);
++					return err;
++				}
++			}
++		}
++		if (pid ||
++		    (pid = vpid_to_pid(si->cpt_pgrp)) > 0) {
++			write_lock_irq(&tasklist_lock);
++			detach_pid(current, PIDTYPE_PGID);
++			current->signal->pgrp = pid;
++			set_virt_pgid(current, si->cpt_pgrp);
++			if (thread_group_leader(current))
++				attach_pid(current, PIDTYPE_PGID, pid);
++			write_unlock_irq(&tasklist_lock);
++		}
++	}
++
++	current->signal->tty_old_pgrp = 0;
++	if ((int)si->cpt_old_pgrp > 0) {
++		if (si->cpt_old_pgrp_type == CPT_PGRP_STRAY) {
++			current->signal->tty_old_pgrp = alloc_pidmap();
++			if (current->signal->tty_old_pgrp < 0) {
++				eprintk_ctx("failed to allocate stray tty_old_pgrp\n");
++				cpt_release_buf(ctx);
++				return -EINVAL;
++			}
++			free_pidmap(current->signal->tty_old_pgrp);
++		} else {
++			current->signal->tty_old_pgrp = vpid_to_pid(si->cpt_old_pgrp);
++			if (current->signal->tty_old_pgrp < 0) {
++				dprintk_ctx("forward old tty PGID\n");
++				current->signal->tty_old_pgrp = 0;
++			}
++		}
++	}
++
++	if (virt_sid(current) != si->cpt_session) {
++		int err;
++		int pid = 0;
++
++		if (si->cpt_session_type == CPT_PGRP_ORPHAN) {
++			pid = alloc_pidmap();
++			if (pid < 0) {
++				cpt_release_buf(ctx);
++				return -EINVAL;
++			}
++			if ((err = alloc_vpid(pid, si->cpt_session)) < 0) {
++				free_pidmap(pid);
++				pid = 0;
++				if (err != -EEXIST) {
++					cpt_release_buf(ctx);
++					return err;
++				}
++			}
++		}
++		if (pid ||
++		    (pid = vpid_to_pid(si->cpt_session)) > 0) {
++			write_lock_irq(&tasklist_lock);
++			detach_pid(current, PIDTYPE_SID);
++			set_virt_sid(current, si->cpt_session);
++			current->signal->session = pid;
++			if (thread_group_leader(current))
++				attach_pid(current, PIDTYPE_SID, pid);
++			write_unlock_irq(&tasklist_lock);
++		}
++	}
++
++	cpt_sigset_import(&current->signal->shared_pending.signal, si->cpt_sigpending);
++	current->signal->leader = si->cpt_leader;
++	if (si->cpt_ctty != CPT_NULL) {
++		cpt_object_t *obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, si->cpt_ctty, ctx);
++		if (obj) {
++			struct tty_struct *tty = obj->o_obj;
++			if (tty->session == 0 || tty->session == current->signal->session) {
++				tty->session = current->signal->session;
++				current->signal->tty = tty;
++			} else {
++				wprintk_ctx("tty session mismatch\n");
++			}
++		}
++	}
++
++	if (si->cpt_curr_target)
++		current->signal->curr_target = find_task_by_pid_ve(si->cpt_curr_target);
++	current->signal->flags = 0;
++	if (si->cpt_group_exit)
++		current->signal->flags |= SIGNAL_GROUP_EXIT;
++	current->signal->group_exit_code = si->cpt_group_exit_code;
++	if (si->cpt_group_exit_task) {
++		current->signal->group_exit_task = find_task_by_pid_ve(si->cpt_group_exit_task);
++		if (current->signal->group_exit_task == NULL) {
++			eprintk_ctx("oops, group_exit_task=NULL, pid=%u\n", si->cpt_group_exit_task);
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++	}
++	current->signal->notify_count = si->cpt_notify_count;
++	current->signal->group_stop_count = si->cpt_group_stop_count;
++
++	if (si->cpt_next > si->cpt_hdrlen) {
++		char *buf = kmalloc(si->cpt_next - si->cpt_hdrlen, GFP_KERNEL);
++		if (buf == NULL) {
++			cpt_release_buf(ctx);
++			return -ENOMEM;
++		}
++		err = ctx->pread(buf, si->cpt_next - si->cpt_hdrlen, ctx,
++				 ti->cpt_signal + si->cpt_hdrlen);
++		if (err) {
++			kfree(buf);
++			cpt_release_buf(ctx);
++			return err;
++		}
++		restore_sigqueue(current,
++				 &current->signal->shared_pending, (unsigned long)buf,
++				 (unsigned long)buf + si->cpt_next - si->cpt_hdrlen);
++		kfree(buf);
++	}
++	cpt_release_buf(ctx);
++	return 0;
++}
++
++int restore_one_sighand_struct(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_sighand_image si;
++	int i;
++	loff_t pos, endpos;
++	
++	err = rst_get_object(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, &si, ctx);
++	if (err)
++		return err;
++
++	for (i=0; i<_NSIG; i++) {
++		current->sighand->action[i].sa.sa_handler = SIG_DFL;
++		current->sighand->action[i].sa.sa_restorer = 0;
++		current->sighand->action[i].sa.sa_flags = SA_ONESHOT | SA_NOMASK;
++		memset(&current->sighand->action[i].sa.sa_mask, 0, sizeof(sigset_t));
++	}
++
++	pos = ti->cpt_sighand + si.cpt_hdrlen;
++	endpos = ti->cpt_sighand + si.cpt_next;
++	while (pos < endpos) {
++		struct cpt_sighandler_image shi;
++
++		err = rst_get_object(CPT_OBJ_SIGHANDLER, pos, &shi, ctx);
++		if (err)
++			return err;
++		current->sighand->action[shi.cpt_signo].sa.sa_handler = (void*)(unsigned long)shi.cpt_handler;
++		current->sighand->action[shi.cpt_signo].sa.sa_restorer = (void*)(unsigned long)shi.cpt_restorer;
++		current->sighand->action[shi.cpt_signo].sa.sa_flags = shi.cpt_flags;
++		cpt_sigset_import(&current->sighand->action[shi.cpt_signo].sa.sa_mask, shi.cpt_mask);
++		pos += shi.cpt_next;
++	}
++
++	return 0;
++}
++
++
++__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	__u32 flag = 0;
++
++	if (lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx))
++		flag |= CLONE_THREAD;
++	if (ti->cpt_sighand == CPT_NULL ||
++	    lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx))
++		flag |= CLONE_SIGHAND;
++	return flag;
++}
++
++int rst_signal_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	int err;
++	cpt_object_t *obj;
++
++	if (ti->cpt_signal == CPT_NULL || ti->cpt_sighand == CPT_NULL) {
++		return -EINVAL;
++	}
++
++	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx);
++	if (obj) {
++		struct sighand_struct *sig = current->sighand;
++		if (obj->o_obj != sig) {
++			return -EINVAL;
++		}
++	} else {
++		obj = cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, current->sighand, ctx);
++		if (obj == NULL)
++			return -ENOMEM;
++		cpt_obj_setpos(obj, ti->cpt_sighand, ctx);
++		err = restore_one_sighand_struct(ti, ctx);
++		if (err)
++			return err;
++	}
++
++
++	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx);
++	if (obj) {
++		struct signal_struct *sig = current->signal;
++		if (obj->o_obj != sig) {
++			return -EINVAL;
++		}
++		if (current->signal) {
++			set_virt_pgid(current, pid_type_to_vpid(PIDTYPE_PGID, current->signal->pgrp));
++			set_virt_sid(current, pid_type_to_vpid(PIDTYPE_SID, current->signal->session));
++		}
++	} else {
++		obj = cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, current->signal, ctx);
++		if (obj == NULL)
++			return -ENOMEM;
++		cpt_obj_setpos(obj, ti->cpt_signal, ctx);
++		err = restore_one_signal_struct(ti, ctx);
++		if (err)
++			return err;
++	}
++
++	return 0;
++}
++
++static u32 decode_segment(u32 segid)
++{
++	if (segid == CPT_SEG_ZERO)
++		return 0;
++
++	/* TLS descriptors */
++	if (segid <= CPT_SEG_TLS3)
++		return ((GDT_ENTRY_TLS_MIN + segid-CPT_SEG_TLS1)<<3) + 3;
++
++	/* LDT descriptor, it is just an index to LDT array */
++	if (segid >= CPT_SEG_LDT)
++		return ((segid - CPT_SEG_LDT) << 3) | 7;
++
++	/* Check for one of standard descriptors */
++#ifdef CONFIG_X86_64
++	if (segid == CPT_SEG_USER32_DS)
++		return __USER32_DS;
++	if (segid == CPT_SEG_USER32_CS)
++		return __USER32_CS;
++	if (segid == CPT_SEG_USER64_DS)
++		return __USER_DS;
++	if (segid == CPT_SEG_USER64_CS)
++		return __USER_CS;
++#else
++	if (segid == CPT_SEG_USER32_DS)
++		return __USER_DS;
++	if (segid == CPT_SEG_USER32_CS)
++		return __USER_CS;
++#endif
++	wprintk("Invalid segment reg %d\n", segid);
++	return 0;
++}
++
++unsigned long rct(unsigned long *child_tids)
++{
++	dprintk("rct: " CPT_FID "\n", CPT_TID(current));
++	current->clear_child_tid = (void*)child_tids[0];
++	current->set_child_tid = (void*)child_tids[1];
++	module_put(THIS_MODULE);
++	return (unsigned long)(child_tids+2);
++}
++
++unsigned long rlsi(void)
++{
++	int signr;
++	siginfo_t *info = current->last_siginfo;
++	struct pt_regs *regs = task_pt_regs(current);
++	struct k_sigaction *ka;
++	int ptrace_id;
++
++	dprintk("rlsi: " CPT_FID "\n", CPT_TID(current));
++
++	spin_lock_irq(&current->sighand->siglock);
++	current->last_siginfo = NULL;
++	recalc_sigpending();
++
++	ptrace_id = current->pn_state;
++	clear_pn_state(current);
++
++	switch (ptrace_id) {
++	case PN_STOP_TF:
++	case PN_STOP_TF_RT:
++		/* frame_*signal */
++		dprintk("SIGTRAP %u/%u(%s) %u/%u %u %ld %lu %lu\n",
++		       virt_pid(current), current->pid, current->comm,
++		       info->si_signo, info->si_code,
++		       current->exit_code, SYSCALL_NR(regs),
++		       current->ptrace, current->ptrace_message);
++		goto out;
++	case PN_STOP_ENTRY:
++	case PN_STOP_LEAVE:
++		/* do_syscall_trace */
++		spin_unlock_irq(&current->sighand->siglock);
++		dprintk("ptrace do_syscall_trace: %d %d\n", ptrace_id, current->exit_code);
++		if (current->exit_code) {
++			send_sig(current->exit_code, current, 1);
++			current->exit_code = 0;
++		}
++		if (ptrace_id == PN_STOP_ENTRY && SYSCALL_RETVAL(regs) == -ENOSYS) {
++			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
++			SYSCALL_PC(regs) -= 2;
++		} else if (syscall_is(current, regs, rt_sigtimedwait)) {
++			if (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR) {
++				SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
++				SYSCALL_PC(regs) -= 2;
++			}
++		}
++		goto out_nolock;
++	case PN_STOP_FORK:
++		/* fork */
++		SYSCALL_RETVAL(regs) = current->ptrace_message;
++		dprintk("ptrace fork returns pid %ld\n", SYSCALL_RETVAL(regs));
++		goto out;
++	case PN_STOP_VFORK:
++		/* after vfork */
++		SYSCALL_RETVAL(regs) = current->ptrace_message;
++		dprintk("ptrace after vfork returns pid %ld\n", SYSCALL_RETVAL(regs));
++		goto out;
++	case PN_STOP_SIGNAL:
++		/* normal case : dequeue signal */
++		break;
++	case PN_STOP_EXIT:
++		dprintk("ptrace exit caught\n");
++		current->ptrace &= ~PT_TRACE_EXIT;
++		spin_unlock_irq(&current->sighand->siglock);
++		module_put(THIS_MODULE);
++		complete_and_exit(NULL, current->ptrace_message);
++		BUG();
++	case PN_STOP_EXEC:
++		eprintk("ptrace after exec caught: must not happen\n");
++		BUG();
++	default:
++		eprintk("ptrace with unknown identity %d\n", ptrace_id);
++		BUG();
++	}
++
++	signr = current->exit_code;
++	if (signr == 0) {
++		dprintk("rlsi: canceled signal %d\n", info->si_signo);
++		goto out;
++	}
++	current->exit_code = 0;
++
++	if (signr != info->si_signo) {
++		info->si_signo = signr;
++		info->si_errno = 0;
++		info->si_code = SI_USER;
++		info->si_pid = virt_pid(current->parent);
++		info->si_uid = current->parent->uid;
++	}
++
++	/* If the (new) signal is now blocked, requeue it.  */
++	if (sigismember(&current->blocked, signr)) {
++		dprintk("going to requeue signal %d\n", signr);
++		goto out_resend_sig;
++	}
++
++	ka = &current->sighand->action[signr-1];
++	if (ka->sa.sa_handler == SIG_IGN) {
++		dprintk("going to resend signal %d (ignored)\n", signr);
++		goto out;
++	}
++	if (ka->sa.sa_handler != SIG_DFL) {
++		dprintk("going to resend signal %d (not SIG_DFL)\n", signr);
++		goto out_resend_sig;
++	}
++        if (signr == SIGCONT ||
++	    signr == SIGCHLD ||
++	    signr == SIGWINCH ||
++	    signr == SIGURG ||
++	    current->pid == 1)
++		goto out;
++
++	/* All the rest, which we cannot handle are requeued. */
++	dprintk("going to resend signal %d (sigh)\n", signr);
++out_resend_sig:
++	spin_unlock_irq(&current->sighand->siglock);
++	send_sig_info(signr, info, current);
++	module_put(THIS_MODULE);
++	return (unsigned long)(info+1);
++
++out:
++	spin_unlock_irq(&current->sighand->siglock);
++out_nolock:
++	module_put(THIS_MODULE);
++	return (unsigned long)(info+1);
++}
++
++static void ret_finish_stop(void)
++{
++	/* ...
++	 * do_signal() ->
++	 *   get_signal_to_deliver() ->
++	 *     do_signal_stop() ->
++	 *       finish_stop()
++	 *
++	 * Normally after SIGCONT it will dequeue the next signal. If no signal
++	 * is found, do_signal restarts syscall unconditionally.
++	 * Otherwise signal handler is pushed on user stack.
++	 */
++
++	dprintk("rfs: " CPT_FID "\n", CPT_TID(current));
++
++	clear_stop_state(current);
++	current->exit_code = 0;
++
++	module_put(THIS_MODULE);
++}
++
++static void ret_restart_sys(void)
++{
++	struct pt_regs *regs = task_pt_regs(current);
++
++	/* This hook is supposed to be executed, when we have
++	 * to complete some interrupted syscall.
++	 */
++	dprintk("rrs: " CPT_FID "\n", CPT_TID(current));
++
++	if (syscall_is(current,regs,pause)) {
++		if (SYSCALL_RETVAL(regs) == -ERESTARTNOHAND) {
++			current->state = TASK_INTERRUPTIBLE;
++			schedule();
++		}
++	} else if (syscall_is(current,regs,rt_sigtimedwait)) {
++		if (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR) {
++			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
++			SYSCALL_PC(regs) -= 2;
++		}
++	} else if (syscall_is(current,regs,futex)) {
++		if (SYSCALL_RETVAL(regs) == -EINTR) {
++			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
++			SYSCALL_PC(regs) -= 2;
++		}
++	}
++
++	if (!signal_pending(current)) {
++		if (SYSCALL_RETVAL(regs) == -ERESTARTSYS ||
++		    SYSCALL_RETVAL(regs) == -ERESTARTNOINTR ||
++		    SYSCALL_RETVAL(regs) == -ERESTARTNOHAND) {
++			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
++			SYSCALL_PC(regs) -= 2;
++		} else if (SYSCALL_RETVAL(regs) == -ERESTART_RESTARTBLOCK) {
++			SYSCALL_RETVAL(regs) = __NR_restart_syscall;
++#ifdef CONFIG_X86_64
++			if (current->thread_info->flags&_TIF_IA32)
++				SYSCALL_RETVAL(regs) = __NR32_restart_syscall;
++#endif
++			SYSCALL_PC(regs) -= 2;
++		}
++	}
++
++	module_put(THIS_MODULE);
++}
++
++extern void ret_last_siginfo(void);
++extern void ret_child_tid(void);
++extern void ret_from_rst(void);
++extern void pre_ret_from_fork(void);
++
++#ifndef CONFIG_X86_64
++
++/* tsk->thread.eip points to pre_ret_from_fork
++ * Stack layout:
++ * [eip of the last hook]
++ * [args of the last hook]
++ * [eip of previous hook]
++ * [args of previous hook]
++ * ...
++ * [eip of the first hook]
++ * [args of the first hook]
++ * [ret_from_rst]
++ */
++
++static void * add_hook(task_t *tsk, void (*hook)(void), int argsize, int *hooks)
++{
++	ESP(tsk) -= sizeof(unsigned long);
++	*(unsigned long*)ESP(tsk) = tsk->thread.eip;
++	ESP(tsk) -= argsize;
++	tsk->thread.eip = (unsigned long)hook;
++	if (!try_module_get(THIS_MODULE)) BUG();
++	(*hooks)++;
++	return (void*)ESP(tsk);
++}
++
++static int restore_registers(task_t *tsk, struct pt_regs *regs,
++			     struct cpt_task_image *ti, struct cpt_x86_regs *b)
++{
++	if (b->cpt_object != CPT_OBJ_X86_REGS)
++		return -EINVAL;
++
++	tsk->thread.esp = (unsigned long) regs;
++	tsk->thread.esp0 = (unsigned long) (regs+1);
++	tsk->thread.eip = (unsigned long) ret_from_rst;
++
++	tsk->thread.fs = decode_segment(b->cpt_fs);
++	tsk->thread.gs = decode_segment(b->cpt_gs);
++	tsk->thread.debugreg[0] = b->cpt_debugreg[0];
++	tsk->thread.debugreg[1] = b->cpt_debugreg[1];
++	tsk->thread.debugreg[2] = b->cpt_debugreg[2];
++	tsk->thread.debugreg[3] = b->cpt_debugreg[3];
++	tsk->thread.debugreg[4] = b->cpt_debugreg[4];
++	tsk->thread.debugreg[5] = b->cpt_debugreg[5];
++	tsk->thread.debugreg[6] = b->cpt_debugreg[6];
++	tsk->thread.debugreg[7] = b->cpt_debugreg[7];
++
++	memcpy(regs, &b->cpt_ebx, sizeof(struct pt_regs));
++
++	regs->xcs = decode_segment(b->cpt_xcs);
++	regs->xss = decode_segment(b->cpt_xss);
++	regs->xds = decode_segment(b->cpt_xds);
++	regs->xes = decode_segment(b->cpt_xes);
++
++	return 0;
++}
++
++#else
++
++/* Stack layout:
++ *
++ * [eip of the last hook]
++ * [args of the last hook]
++ * ...
++ * [eip of the first hook]
++ * [args of the first hook]
++ * [ret_from_fork+5]
++ */
++
++static void * add_hook(task_t *tsk, void (*hook)(void), int argsize, int *hooks)
++{
++	if (!*hooks) {
++		extern void ret_from_fork2(void);
++		ESP(tsk) -= sizeof(unsigned long);
++		*(unsigned long*)ESP(tsk) = (unsigned long)ret_from_fork2;
++		tsk->thread_info->flags |= _TIF_RESUME;
++	}
++	ESP(tsk) -= argsize + sizeof(unsigned long);
++	*(unsigned long*)ESP(tsk) = (unsigned long)hook;
++	if (!try_module_get(THIS_MODULE)) BUG();
++	(*hooks)++;
++	return (void*)(ESP(tsk) + sizeof(unsigned long));
++}
++
++static void xlate_ptregs_32_to_64(struct pt_regs *d, struct cpt_x86_regs *s)
++{
++	memset(d, 0, sizeof(struct pt_regs));
++	d->rbp = s->cpt_ebp;
++	d->rbx = s->cpt_ebx;
++	d->rax = (s32)s->cpt_eax;
++	d->rcx = s->cpt_ecx;
++	d->rdx = s->cpt_edx;
++	d->rsi = s->cpt_esi;
++	d->rdi = s->cpt_edi;
++	d->orig_rax = (s32)s->cpt_orig_eax;
++	d->rip = s->cpt_eip;
++	d->cs = s->cpt_xcs;
++	d->eflags = s->cpt_eflags;
++	d->rsp = s->cpt_esp;
++	d->ss = s->cpt_xss;
++}
++
++static int restore_registers(task_t *tsk, struct pt_regs *regs,
++			     struct cpt_task_image *ti, struct cpt_obj_bits *hdr)
++{
++	if (hdr->cpt_object == CPT_OBJ_X86_64_REGS) {
++		struct cpt_x86_64_regs *b = (void*)hdr;
++
++		tsk->thread.rsp = (unsigned long) regs;
++		tsk->thread.rsp0 = (unsigned long) (regs+1);
++
++		tsk->thread.fs = b->cpt_fsbase;
++		tsk->thread.gs = b->cpt_gsbase;
++		tsk->thread.fsindex = decode_segment(b->cpt_fsindex);
++		tsk->thread.gsindex = decode_segment(b->cpt_gsindex);
++		tsk->thread.ds = decode_segment(b->cpt_ds);
++		tsk->thread.es = decode_segment(b->cpt_es);
++		tsk->thread.debugreg0 = b->cpt_debugreg[0];
++		tsk->thread.debugreg1 = b->cpt_debugreg[1];
++		tsk->thread.debugreg2 = b->cpt_debugreg[2];
++		tsk->thread.debugreg3 = b->cpt_debugreg[3];
++		tsk->thread.debugreg6 = b->cpt_debugreg[6];
++		tsk->thread.debugreg7 = b->cpt_debugreg[7];
++
++		memcpy(regs, &b->cpt_r15, sizeof(struct pt_regs));
++
++		tsk->thread.userrsp = regs->rsp;
++		regs->cs = decode_segment(b->cpt_cs);
++		regs->ss = decode_segment(b->cpt_ss);
++	} else if (hdr->cpt_object == CPT_OBJ_X86_REGS) {
++		struct cpt_x86_regs *b = (void*)hdr;
++
++		tsk->thread.rsp = (unsigned long) regs;
++		tsk->thread.rsp0 = (unsigned long) (regs+1);
++
++		tsk->thread.fs = 0;
++		tsk->thread.gs = 0;
++		tsk->thread.fsindex = decode_segment(b->cpt_fs);
++		tsk->thread.gsindex = decode_segment(b->cpt_gs);
++		tsk->thread.debugreg0 = b->cpt_debugreg[0];
++		tsk->thread.debugreg1 = b->cpt_debugreg[1];
++		tsk->thread.debugreg2 = b->cpt_debugreg[2];
++		tsk->thread.debugreg3 = b->cpt_debugreg[3];
++		tsk->thread.debugreg6 = b->cpt_debugreg[6];
++		tsk->thread.debugreg7 = b->cpt_debugreg[7];
++
++		xlate_ptregs_32_to_64(regs, b);
++
++		tsk->thread.userrsp = regs->rsp;
++		regs->cs = decode_segment(b->cpt_xcs);
++		regs->ss = decode_segment(b->cpt_xss);
++		tsk->thread.ds = decode_segment(b->cpt_xds);
++		tsk->thread.es = decode_segment(b->cpt_xes);
++	} else {
++		return -EINVAL;
++	}
++	return 0;
++}
++
++#endif
++
++int rst_restore_process(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++		struct cpt_task_image *ti = obj->o_image;
++		struct pt_regs * regs;
++		struct cpt_object_hdr *b;
++		struct cpt_siginfo_image *lsi = NULL;
++		struct group_info *gids, *ogids;
++		int hooks = 0;
++		int i;
++
++		if (tsk == NULL) {
++			eprintk_ctx("oops, task %d/%s is missing\n", ti->cpt_pid, ti->cpt_comm);
++			return -EFAULT;
++		}
++
++		wait_task_inactive(tsk);
++		regs = task_pt_regs(tsk);
++
++		if (!tsk->exit_state) {
++			tsk->lock_depth = -1;
++#ifdef CONFIG_PREEMPT
++			tsk->thread_info->preempt_count--;
++#endif
++		}
++
++		if (tsk->static_prio != ti->cpt_static_prio)
++			set_user_nice(tsk, PRIO_TO_NICE(ti->cpt_static_prio));
++
++		cpt_sigset_import(&tsk->blocked, ti->cpt_sigblocked);
++		cpt_sigset_import(&tsk->real_blocked, ti->cpt_sigrblocked);
++		cpt_sigset_import(&tsk->saved_sigmask, ti->cpt_sigsuspend_blocked);
++		cpt_sigset_import(&tsk->pending.signal, ti->cpt_sigpending);
++
++		tsk->uid = ti->cpt_uid;
++		tsk->euid = ti->cpt_euid;
++		tsk->suid = ti->cpt_suid;
++		tsk->fsuid = ti->cpt_fsuid;
++		tsk->gid = ti->cpt_gid;
++		tsk->egid = ti->cpt_egid;
++		tsk->sgid = ti->cpt_sgid;
++		tsk->fsgid = ti->cpt_fsgid;
++		memcpy(&tsk->cap_effective, &ti->cpt_ecap, sizeof(tsk->cap_effective));
++		memcpy(&tsk->cap_inheritable, &ti->cpt_icap, sizeof(tsk->cap_inheritable));
++		memcpy(&tsk->cap_permitted, &ti->cpt_pcap, sizeof(tsk->cap_permitted));
++		tsk->keep_capabilities = (ti->cpt_keepcap != 0);
++		tsk->did_exec = (ti->cpt_did_exec != 0);
++		gids = groups_alloc(ti->cpt_ngids);
++		ogids = tsk->group_info;
++		if (gids) {
++			int i;
++			for (i=0; i<32; i++)
++				gids->small_block[i] = ti->cpt_gids[i];
++			tsk->group_info = gids;
++		}
++		if (ogids)
++			put_group_info(ogids);
++		tsk->utime = ti->cpt_utime;
++		tsk->stime = ti->cpt_stime;
++		if (ctx->image_version == 0) {
++			tsk->start_time = _ns_to_timespec(ti->cpt_starttime*TICK_NSEC);
++		} else {
++			cpt_timespec_import(&tsk->start_time, ti->cpt_starttime);
++		}
++		_set_normalized_timespec(&tsk->start_time,
++					tsk->start_time.tv_sec -
++					get_exec_env()->init_entry->start_time.tv_sec,
++					tsk->start_time.tv_nsec -
++					get_exec_env()->init_entry->start_time.tv_nsec);
++
++		tsk->nvcsw = ti->cpt_nvcsw;
++		tsk->nivcsw = ti->cpt_nivcsw;
++		tsk->min_flt = ti->cpt_min_flt;
++		tsk->maj_flt = ti->cpt_maj_flt;
++
++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
++		tsk->cutime = ti->cpt_cutime;
++		tsk->cstime = ti->cpt_cstime;
++		tsk->cnvcsw = ti->cpt_cnvcsw;
++		tsk->cnivcsw = ti->cpt_cnivcsw;
++		tsk->cmin_flt = ti->cpt_cmin_flt;
++		tsk->cmaj_flt = ti->cpt_cmaj_flt;
++
++		if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
++			__asm__("undefined\n");
++
++		for (i=0; i<RLIM_NLIMITS; i++) {
++			tsk->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
++			tsk->rlim[i].rlim_max = ti->cpt_rlim_max[i];
++		}
++#else
++		if (thread_group_leader(tsk) && tsk->signal) {
++			tsk->signal->utime = ti->cpt_utime;
++			tsk->signal->stime = ti->cpt_stime;
++			tsk->signal->cutime = ti->cpt_cutime;
++			tsk->signal->cstime = ti->cpt_cstime;
++			tsk->signal->nvcsw = ti->cpt_nvcsw;
++			tsk->signal->nivcsw = ti->cpt_nivcsw;
++			tsk->signal->cnvcsw = ti->cpt_cnvcsw;
++			tsk->signal->cnivcsw = ti->cpt_cnivcsw;
++			tsk->signal->min_flt = ti->cpt_min_flt;
++			tsk->signal->maj_flt = ti->cpt_maj_flt;
++			tsk->signal->cmin_flt = ti->cpt_cmin_flt;
++			tsk->signal->cmaj_flt = ti->cpt_cmaj_flt;
++
++			if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
++				__asm__("undefined\n");
++
++			for (i=0; i<RLIM_NLIMITS; i++) {
++				tsk->signal->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
++				tsk->signal->rlim[i].rlim_max = ti->cpt_rlim_max[i];
++			}
++		}
++#endif
++
++		for (i=0; i<3; i++) {
++			if (i >= GDT_ENTRY_TLS_ENTRIES) {
++				eprintk_ctx("too many tls descs\n");
++			} else {
++#ifndef CONFIG_X86_64
++				tsk->thread.tls_array[i].a = ti->cpt_tls[i]&0xFFFFFFFF;
++				tsk->thread.tls_array[i].b = ti->cpt_tls[i]>>32;
++#else
++				tsk->thread.tls_array[i] = ti->cpt_tls[i];
++#endif
++			}
++		}
++
++		clear_stopped_child_used_math(tsk);
++
++		b = (void *)(ti+1);
++		while ((void*)b < ((void*)ti) + ti->cpt_next) {
++			/* Siginfo objects are at the end of obj array */
++			if (b->cpt_object == CPT_OBJ_SIGINFO) {
++				struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
++				restore_sigqueue(tsk, &tsk->pending, (unsigned long)b, (unsigned long)ti + ti->cpt_next);
++				set_exec_env(env);
++				break;
++			}
++
++			switch (b->cpt_object) {
++			case CPT_OBJ_BITS:
++				if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE &&
++				    cpu_has_fxsr) {
++					memcpy(&tsk->thread.i387,
++					       (void*)b + b->cpt_hdrlen,
++					       sizeof(struct i387_fxsave_struct));
++					if (ti->cpt_used_math)
++						set_stopped_child_used_math(tsk);
++				}
++#ifdef CONFIG_X86_32
++				else if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE_OLD &&
++					 !cpu_has_fxsr) {		
++					memcpy(&tsk->thread.i387,
++					       (void*)b + b->cpt_hdrlen,
++					       sizeof(struct i387_fsave_struct));
++					if (ti->cpt_used_math)
++						set_stopped_child_used_math(tsk);
++				}
++#endif
++				break;
++			case CPT_OBJ_LASTSIGINFO:
++				lsi = (void*)b;
++				break;
++			case CPT_OBJ_X86_REGS:
++			case CPT_OBJ_X86_64_REGS:
++				if (restore_registers(tsk, regs, ti, (void*)b)) {
++					eprintk_ctx("cannot restore registers: image is corrupted\n");
++					return -EINVAL;
++				}
++				break;
++			case CPT_OBJ_SIGALTSTACK: {
++				struct cpt_sigaltstack_image *sas;
++				sas = (struct cpt_sigaltstack_image *)b;
++				tsk->sas_ss_sp = sas->cpt_stack;
++				tsk->sas_ss_size = sas->cpt_stacksize;
++				break;
++			    }
++			}
++			b = ((void*)b) + b->cpt_next;
++		}
++
++		if (ti->cpt_ppid != ti->cpt_rppid) {
++			task_t *parent;
++			struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
++			write_lock_irq(&tasklist_lock);
++			parent = find_task_by_pid_ve(ti->cpt_ppid);
++			if (parent && parent != tsk->parent) {
++				list_add(&tsk->ptrace_list, &tsk->parent->ptrace_children);
++				REMOVE_LINKS(tsk);
++				tsk->parent = parent;
++				SET_LINKS(tsk);
++			}
++			write_unlock_irq(&tasklist_lock);
++			set_exec_env(env);
++		}
++
++		tsk->ptrace_message = ti->cpt_ptrace_message;
++		tsk->pn_state = ti->cpt_pn_state;
++		tsk->stopped_state = ti->cpt_stopped_state;
++		tsk->thread_info->flags = ti->cpt_thrflags;
++
++		/* The image was created with kernel < 2.6.16, while
++		 * task hanged in sigsuspend -> do_signal.
++		 *
++		 * FIXME! This needs more brain efforts...
++		 */
++		if (ti->cpt_sigsuspend_state) {
++			tsk->thread_info->flags |= _TIF_RESTORE_SIGMASK;
++		}
++
++#ifdef CONFIG_X86_64
++		tsk->thread_info->flags |= _TIF_FORK;
++		if (!ti->cpt_64bit)
++			tsk->thread_info->flags |= _TIF_IA32;
++#endif
++
++#ifndef CONFIG_X86_64
++		do {
++			if (regs->orig_eax == __NR__newselect && regs->edi) {
++				struct timeval tv;
++				if (access_process_vm(tsk, regs->edi, &tv, 
++						sizeof(tv), 0) != sizeof(tv)) {
++					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm: edi %ld\n",
++						virt_pid(tsk), tsk->pid, tsk->comm,
++					       regs->edi);
++					break;
++				}
++				dprintk_ctx("task %d/%d(%s): Old timeval in newselect: %ld.%ld\n",
++				       virt_pid(tsk), tsk->pid, tsk->comm,
++				       tv.tv_sec, tv.tv_usec);
++				tv.tv_sec -= ctx->delta_time.tv_sec;
++				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
++					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
++					tv.tv_sec--;
++				} else {
++					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
++				}
++				if (tv.tv_sec < 0) {
++					tv.tv_sec = 0;
++					tv.tv_usec = 0;
++				}
++				dprintk_ctx("task %d/%d(%s): New timeval in newselect: %ld.%ld\n",
++					virt_pid(tsk), tsk->pid, tsk->comm,
++				       tv.tv_sec, tv.tv_usec);
++				if (access_process_vm(tsk, regs->edi, &tv, 
++						sizeof(tv), 1) != sizeof(tv)) {
++					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm write: edi %ld\n",
++						virt_pid(tsk), tsk->pid, tsk->comm, regs->edi);
++				}
++				
++			} else if (regs->orig_eax == __NR_select && regs->edi) {
++				struct {
++					unsigned long n;
++					fd_set __user *inp, *outp, *exp;
++					struct timeval __user *tvp;
++				} a;
++				struct timeval tv;
++				if (access_process_vm(tsk, regs->ebx, &a, 
++						sizeof(a), 0) != sizeof(a)) {
++					wprintk_ctx("task %d: Error 2 in access_process_vm\n", tsk->pid);
++					break;
++				}
++				if (access_process_vm(tsk, (unsigned long)a.tvp,
++						&tv, sizeof(tv), 0) != sizeof(tv)) {
++					wprintk_ctx("task %d: Error 3 in access_process_vm\n", tsk->pid);
++					break;
++				}
++				dprintk_ctx("task %d: Old timeval in select: %ld.%ld\n",
++					tsk->pid, tv.tv_sec, tv.tv_usec);
++				tv.tv_sec -= ctx->delta_time.tv_sec;
++				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
++					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
++					tv.tv_sec--;
++				} else {
++					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
++				}
++				if (tv.tv_sec < 0) {
++					tv.tv_sec = 0;
++					tv.tv_usec = 0;
++				}
++				dprintk_ctx("task %d: New timeval in select: %ld.%ld\n",
++					tsk->pid, tv.tv_sec, tv.tv_usec);
++				if (access_process_vm(tsk, (unsigned long)a.tvp,
++						&tv, sizeof(tv), 1) != sizeof(tv)) {
++					wprintk_ctx("task %d: Error 3 in access_process_vm write\n", tsk->pid);
++				}
++			}
++		} while (0);
++#endif
++
++		if (!tsk->exit_state && (long)SYSCALL_NR(regs) >= 0) {
++			if (SYSCALL_RETVAL(regs) == -ERESTARTSYS ||
++			    SYSCALL_RETVAL(regs) == -ERESTARTNOINTR ||
++			    SYSCALL_RETVAL(regs) == -ERESTARTNOHAND ||
++			    SYSCALL_RETVAL(regs) == -ERESTART_RESTARTBLOCK ||
++			    syscall_is(tsk,regs,pause) ||
++			    (syscall_is(tsk,regs,rt_sigtimedwait) &&
++			     (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR)) ||
++			    (syscall_is(tsk,regs,futex) &&
++			     (SYSCALL_RETVAL(regs) == -EINTR)))
++				add_hook(tsk, ret_restart_sys, 0, &hooks);
++		}
++
++		if (lsi || tsk->pn_state) {
++			/* ... -> ptrace_notify()
++			 * or
++			 * ... -> do_signal() -> get_signal_to_deliver() ->
++			 *   ptrace stop
++			 */
++			tsk->last_siginfo = add_hook(tsk, ret_last_siginfo, sizeof(siginfo_t), &hooks);
++			memset(tsk->last_siginfo, 0, sizeof(siginfo_t));
++			if (lsi)
++				decode_siginfo(tsk->last_siginfo, lsi);
++		}
++
++		tsk->ptrace = ti->cpt_ptrace;
++		tsk->flags = ti->cpt_flags & ~PF_FROZEN;
++		clear_tsk_thread_flag(tsk, TIF_FREEZE);
++		tsk->exit_signal = ti->cpt_exit_signal;
++
++		if (tsk->stopped_state) {
++			dprintk_ctx("finish_stop\n");
++			if (ti->cpt_state != TASK_STOPPED)
++				eprintk_ctx("Hellooo, state is %u\n", (unsigned)ti->cpt_state);
++			add_hook(tsk, ret_finish_stop, 0, &hooks);
++		}
++
++		if (!tsk->exit_state &&
++		    (ti->cpt_set_tid || ti->cpt_clear_tid)) {
++			unsigned long *ptr = add_hook(tsk, ret_child_tid, sizeof(unsigned long)*2, &hooks);
++			ptr[0] = ti->cpt_clear_tid;
++			ptr[1] = ti->cpt_set_tid;
++			dprintk_ctx("settids\n");
++		}
++
++#ifdef CONFIG_X86_64
++		if (!hooks && (long)SYSCALL_NR(regs) < 0) {
++			extern void ret_from_fork2(void);
++			ESP(tsk) -= sizeof(unsigned long);
++			*(unsigned long*)ESP(tsk) = (unsigned long)ret_from_fork2;
++			tsk->thread_info->flags |= _TIF_RESUME;
++		}
++#else
++		tsk->thread.esp -= 4;
++		*(__u32*)tsk->thread.esp = tsk->thread.eip;
++		tsk->thread.eip = (unsigned long)pre_ret_from_fork;
++#endif
++
++		if (ti->cpt_state == TASK_TRACED)
++			tsk->state = TASK_TRACED;
++		else if (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD)) {
++			tsk->signal->it_virt_expires = 0;
++			tsk->signal->it_prof_expires = 0;
++			if (tsk->state != EXIT_DEAD)
++				eprintk_ctx("oops, schedule() did not make us dead\n");
++		}
++
++		if (thread_group_leader(tsk) &&
++		    ti->cpt_it_real_value &&
++		    !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
++			DEFINE_KTIME(val);
++
++			if (ctx->image_version != 0) {
++				ktime_t delta;
++
++				val = ktime_add_ns(val, ti->cpt_it_real_value);
++				delta = timespec_to_ktime(ctx->delta_time);
++				val = ktime_sub(val, delta);
++				if (val.tv64 <= 0)
++					val.tv64 = NSEC_PER_USEC;
++				dprintk("rst itimer " CPT_FID " +%Ld %Ld %Lu\n", CPT_TID(tsk), val.tv64, delta.tv64, ti->cpt_it_real_value);
++			} else {
++				unsigned long jif = ti->cpt_it_real_value -
++					timespec_to_jiffies(&ctx->delta_time);
++				if ((long)jif <= 0)
++					jif = 1;
++				val = ktime_add_ns(val, (u64)jif*TICK_NSEC);
++			}
++			spin_lock_irq(&tsk->sighand->siglock);
++			if (hrtimer_try_to_cancel(&tsk->signal->real_timer) >= 0) {
++				/* FIXME. Check!!!! */
++				hrtimer_start(&tsk->signal->real_timer, val, HRTIMER_REL);
++			} else {
++				wprintk_ctx("Timer clash. Impossible?\n");
++			}
++			spin_unlock_irq(&tsk->sighand->siglock);
++
++			dprintk_ctx("itimer " CPT_FID " +%Lu\n", CPT_TID(tsk), val.tv64);
++		}
++
++		module_put(THIS_MODULE);
++	}
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_socket.c linux-2.6.16-026test015/kernel/cpt/rst_socket.c
+--- linux-2.6.16.orig/kernel/cpt/rst_socket.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_socket.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,876 @@
++/*
++ *
++ *  kernel/cpt/rst_socket.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/socket.h>
++#include <linux/un.h>
++#include <net/tcp.h>
++#include <net/sock.h>
++#include <net/scm.h>
++#include <net/af_unix.h>
++
++#include <ub/ub_mem.h>
++#include <ub/ub_orphan.h>
++#include <ub/ub_orphan.h>
++#include <ub/ub_net.h>
++#include <ub/ub_tcp.h>
++
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_files.h"
++#include "cpt_socket.h"
++#include "cpt_kernel.h"
++
++#include "cpt_syscalls.h"
++
++
++static int setup_sock_common(struct sock *sk, struct cpt_sock_image *si,
++			     loff_t pos, struct cpt_context *ctx)
++{
++	if (sk->sk_socket) {
++		sk->sk_socket->flags = si->cpt_ssflags;
++		sk->sk_socket->state = si->cpt_sstate;
++	}
++	sk->sk_reuse = si->cpt_reuse;
++	sk->sk_shutdown = si->cpt_shutdown;
++	sk->sk_userlocks = si->cpt_userlocks;
++	sk->sk_no_check = si->cpt_no_check;
++	sock_reset_flag(sk, SOCK_DBG);
++	if (si->cpt_debug)
++		sock_set_flag(sk, SOCK_DBG);
++	sock_reset_flag(sk, SOCK_RCVTSTAMP);
++	if (si->cpt_rcvtstamp)
++		sock_set_flag(sk, SOCK_RCVTSTAMP);
++	sock_reset_flag(sk, SOCK_LOCALROUTE);
++	if (si->cpt_localroute)
++		sock_set_flag(sk, SOCK_LOCALROUTE);
++	sk->sk_protocol = si->cpt_protocol;
++	sk->sk_err = si->cpt_err;
++	sk->sk_err_soft = si->cpt_err_soft;
++	sk->sk_priority = si->cpt_priority;
++	sk->sk_rcvlowat = si->cpt_rcvlowat;
++	sk->sk_rcvtimeo = si->cpt_rcvtimeo;
++	if (si->cpt_rcvtimeo == CPT_NULL)
++		sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
++	sk->sk_sndtimeo = si->cpt_sndtimeo;
++	if (si->cpt_sndtimeo == CPT_NULL)
++		sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
++	sk->sk_rcvbuf = si->cpt_rcvbuf;
++	sk->sk_sndbuf = si->cpt_sndbuf;
++	sk->sk_bound_dev_if = si->cpt_bound_dev_if;
++	sk->sk_flags = si->cpt_flags;
++	sk->sk_lingertime = si->cpt_lingertime;
++	if (si->cpt_lingertime == CPT_NULL)
++		sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
++	sk->sk_peercred.pid = si->cpt_peer_pid;
++	sk->sk_peercred.uid = si->cpt_peer_uid;
++	sk->sk_peercred.gid = si->cpt_peer_gid;
++	cpt_timeval_import(&sk->sk_stamp, si->cpt_stamp);
++	return 0;
++}
++
++static struct file *sock_mapfile(struct socket *sock)
++{
++	int fd = sock_map_fd(sock);
++
++	if (fd >= 0) {
++		struct file *file = sock->file;
++		get_file(file);
++		sc_close(fd);
++		return file;
++	}
++	return ERR_PTR(fd);
++}
++
++/* Assumption is that /tmp exists and writable.
++ * In previous versions we assumed that listen() will autobind
++ * the socket. It does not do this for AF_UNIX by evident reason:
++ * socket in abstract namespace is accessible, unlike socket bound
++ * to deleted FS object.
++ */
++
++static int
++select_deleted_name(char * name, cpt_context_t *ctx)
++{
++	int i;
++
++	for (i=0; i<100; i++) {
++		struct nameidata nd;
++		unsigned int rnd = net_random();
++
++		sprintf(name, "/tmp/SOCK.%08x", rnd);
++
++		if (path_lookup(name, 0, &nd) != 0)
++			return 0;
++
++		path_release(&nd);
++	}
++
++	eprintk_ctx("failed to allocate deleted socket inode\n");
++	return -ELOOP;
++}
++
++static int
++bind_unix_socket(struct socket *sock, struct cpt_sock_image *si,
++		 cpt_context_t *ctx)
++{
++	int err;
++	char *name;
++	struct sockaddr* addr;
++	int addrlen;
++	struct sockaddr_un sun;
++	struct nameidata nd;
++
++	if ((addrlen = si->cpt_laddrlen) <= 2)
++		return 0;
++
++	nd.dentry = NULL;
++	name = ((char*)si->cpt_laddr) + 2;
++	addr = (struct sockaddr *)si->cpt_laddr;
++
++	if (name[0]) {
++		err = path_lookup(name, 0, &nd);
++		if (err) {
++			nd.dentry = NULL;
++		} else {
++			if (si->cpt_deleted) {
++				path_release(&nd);
++				nd.dentry = NULL;
++				addr = (struct sockaddr*)&sun;
++				addr->sa_family = AF_UNIX;
++				name = ((char*)addr) + 2;
++				err = select_deleted_name(name, ctx);
++				if (err)
++					return err;
++				addrlen = 2 + strlen(name);
++			} else if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) {
++				eprintk_ctx("bind_unix_socket: not a socket dentry\n");
++				path_release(&nd);
++				return -EINVAL;
++			}
++		}
++		if (nd.dentry)
++			sc_unlink(name);
++	}
++
++	err = sock->ops->bind(sock, addr, addrlen);
++
++	if (!err) {
++		if (nd.dentry) {
++			sc_chown(name, nd.dentry->d_inode->i_uid,
++				 nd.dentry->d_inode->i_gid);
++			sc_chmod(name, nd.dentry->d_inode->i_mode);
++		}
++		if (si->cpt_deleted && name[0])
++			sc_unlink(name);
++	}
++	if (nd.dentry)
++		path_release(&nd);
++	return err;
++}
++
++static int fixup_unix_address(struct socket *sock, struct cpt_sock_image *si,
++			      struct cpt_context *ctx)
++{
++	struct sock *sk = sock->sk;
++	cpt_object_t *obj;
++	struct sock *parent;
++
++	if (sk->sk_family != AF_UNIX || sk->sk_state == TCP_LISTEN)
++		return 0;
++
++	if (si->cpt_parent == -1)
++		return bind_unix_socket(sock, si, ctx);
++
++	obj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
++	if (!obj)
++		return 0;
++
++	parent = obj->o_obj;
++	if (unix_sk(parent)->addr) {
++		if (unix_sk(sk)->addr &&
++		    atomic_dec_and_test(&unix_sk(sk)->addr->refcnt))
++			kfree(unix_sk(sk)->addr);
++		atomic_inc(&unix_sk(parent)->addr->refcnt);
++		unix_sk(sk)->addr = unix_sk(parent)->addr;
++	}
++	return 0;
++}
++
++
++static int open_socket(cpt_object_t *obj, struct cpt_sock_image *si,
++		       struct cpt_context *ctx)
++{
++	int err;
++	struct socket *sock;
++	struct socket *sock2 = NULL;
++	struct file *file;
++	cpt_object_t *fobj;
++	cpt_object_t *pobj = NULL;
++
++	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
++			       &sock);
++	if (err)
++		return err;
++
++	if (si->cpt_socketpair) {
++		err = sock_create_kern(si->cpt_family, si->cpt_type,
++				       si->cpt_protocol, &sock2);
++		if (err)
++			goto err_out;
++
++		err = sock->ops->socketpair(sock, sock2);
++		if (err < 0)
++			goto err_out;
++
++		/* Socketpair with a peer outside our environment.
++		 * So, we create real half-open pipe and do not worry
++		 * about dead end anymore. */
++		if (si->cpt_peer == -1) {
++			sock_release(sock2);
++			sock2 = NULL;
++		}
++	}
++
++	cpt_obj_setobj(obj, sock->sk, ctx);
++
++	if (si->cpt_file != CPT_NULL) {
++		file = sock_mapfile(sock);
++		err = PTR_ERR(file);
++		if (IS_ERR(file))
++			goto err_out;
++
++		err = -ENOMEM;
++
++		obj->o_parent = file;
++
++		if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
++			goto err_out;
++		cpt_obj_setpos(fobj, si->cpt_file, ctx);
++		cpt_obj_setindex(fobj, si->cpt_index, ctx);
++	}
++
++	if (sock2) {
++		struct file *file2;
++
++		pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_peer, ctx);
++		if (!pobj) BUG();
++		if (pobj->o_obj) BUG();
++		cpt_obj_setobj(pobj, sock2->sk, ctx);
++
++		if (pobj->o_ppos != CPT_NULL) {
++			file2 = sock_mapfile(sock2);
++			err = PTR_ERR(file2);
++			if (IS_ERR(file2))
++				goto err_out;
++
++			err = -ENOMEM;
++			if ((fobj = cpt_object_add(CPT_OBJ_FILE, file2, ctx)) == NULL)
++				goto err_out;
++			cpt_obj_setpos(fobj, pobj->o_ppos, ctx);
++			cpt_obj_setindex(fobj, si->cpt_peer, ctx);
++
++			pobj->o_parent = file2;
++		}
++	}
++
++	setup_sock_common(sock->sk, si, obj->o_pos, ctx);
++	if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6) {
++		inet_sk(sock->sk)->freebind = 1;
++		if (si->cpt_laddrlen) {
++			err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
++			if (err) {
++				dprintk_ctx("binding failed: %d, do not worry\n", err);
++			}
++		}
++		rst_socket_in(si, obj->o_pos, sock->sk, ctx);
++	} else if (sock->sk->sk_family == AF_NETLINK) {
++		err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
++		if (err) {
++			eprintk_ctx("AF_NETLINK binding failed: %d\n", err);
++		}
++		if (si->cpt_raddrlen) {
++			err = sock->ops->connect(sock, (struct sockaddr *)&si->cpt_raddr, si->cpt_raddrlen, O_NONBLOCK);
++			if (err) {
++				eprintk_ctx("oops, AF_NETLINK connect failed: %d\n", err);
++			}
++		}
++	}
++	fixup_unix_address(sock, si, ctx);
++
++	if (sock2) {
++		err = rst_get_object(CPT_OBJ_SOCKET, pobj->o_pos, si, ctx);
++		if (err)
++			return err;
++		setup_sock_common(sock2->sk, si, pobj->o_pos, ctx);
++		fixup_unix_address(sock2, si, ctx);
++	}
++
++	if ((sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
++	    && (int)si->cpt_parent != -1) {
++		cpt_object_t *lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
++		if (lobj && cpt_attach_accept(lobj->o_obj, sock->sk, ctx) == 0)
++			sock->sk = NULL;
++	}
++
++
++	if (si->cpt_file == CPT_NULL && sock->sk &&
++	    sock->sk->sk_family == AF_INET) {
++		struct sock *sk = sock->sk;
++
++		if (sk) {
++			sock->sk = NULL;
++
++			local_bh_disable();
++			bh_lock_sock(sk);
++			if (sock_owned_by_user(sk))
++				eprintk_ctx("oops, sock is locked by user\n");
++
++			sock_hold(sk);
++			sock_orphan(sk);
++			ub_inc_orphan_count(sk);
++			bh_unlock_sock(sk);
++			local_bh_enable();
++			sock_put(sk);
++			dprintk_ctx("orphaning socket %p\n", sk);
++		}
++	}
++
++	if (si->cpt_file == CPT_NULL && sock->sk == NULL)
++		sock_release(sock);
++
++	return 0;
++
++err_out:
++	if (sock2)
++		sock_release(sock2);
++	sock_release(sock);
++	return err;
++}
++
++static int open_listening_socket(loff_t pos, struct cpt_sock_image *si,
++				 struct cpt_context *ctx)
++{
++	int err;
++	struct socket *sock;
++	struct file *file;
++	cpt_object_t *obj, *fobj;
++
++	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
++			       &sock);
++	if (err) {
++		eprintk_ctx("open_listening_socket: sock_create_kern: %d\n", err);
++		return err;
++	}
++
++	sock->sk->sk_reuse = 2;
++	sock->sk->sk_bound_dev_if = si->cpt_bound_dev_if;
++
++	if (sock->sk->sk_family == AF_UNIX) {
++		err = bind_unix_socket(sock, si, ctx);
++	} else if (si->cpt_laddrlen) {
++		if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
++			inet_sk(sock->sk)->freebind = 1;
++
++		err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
++
++		if (err) {
++			eprintk_ctx("open_listening_socket: bind: %d\n", err);
++			goto err_out;
++		}
++	}
++
++	err = sock->ops->listen(sock, si->cpt_max_ack_backlog);
++	if (err) {
++		eprintk_ctx("open_listening_socket: listen: %d, %Ld, %d\n", err, pos, si->cpt_deleted);
++		goto err_out;
++	}
++
++	/* Now we may access socket body directly and fixup all the things. */
++
++	file = sock_mapfile(sock);
++	err = PTR_ERR(file);
++	if (IS_ERR(file)) {
++		eprintk_ctx("open_listening_socket: map: %d\n", err);
++		goto err_out;
++	}
++
++	err = -ENOMEM;
++	if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
++		goto err_out;
++	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sock->sk, ctx)) == NULL)
++		goto err_out;
++	cpt_obj_setpos(obj, pos, ctx);
++	cpt_obj_setindex(obj, si->cpt_index, ctx);
++	obj->o_parent = file;
++	cpt_obj_setpos(fobj, si->cpt_file, ctx);
++	cpt_obj_setindex(fobj, si->cpt_index, ctx);
++
++	setup_sock_common(sock->sk, si, pos, ctx);
++
++	if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6)
++		rst_restore_synwait_queue(sock->sk, si, pos, ctx);
++
++	return 0;
++
++err_out:
++	sock_release(sock);
++	return err;
++}
++
++static int
++rst_sock_attr_mcfilter(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
++{
++	int err;
++	loff_t pos = *pos_p;
++	struct cpt_sockmc_image v;
++
++	err = rst_get_object(CPT_OBJ_SOCK_MCADDR, pos, &v, ctx);
++	if (err)
++		return err;
++
++	*pos_p += v.cpt_next;
++
++	if (v.cpt_family == AF_INET)
++		return rst_sk_mcfilter_in(sk, &v, pos, ctx);
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	else if (v.cpt_family == AF_INET6)
++		return rst_sk_mcfilter_in6(sk, &v, pos, ctx); 
++#endif
++	else
++		return -EAFNOSUPPORT;
++}
++
++
++static int
++rst_sock_attr_skfilter(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
++{
++	int err;
++	struct sk_filter *fp, *old_fp; 
++	loff_t pos = *pos_p;
++	struct cpt_obj_bits v;
++
++	err = rst_get_object(CPT_OBJ_SKFILTER, pos, &v, ctx);
++	if (err)
++		return err;
++
++	*pos_p += v.cpt_next;
++
++	if (v.cpt_size % sizeof(struct sock_filter))
++		return -EINVAL;
++
++	fp = sock_kmalloc(sk, v.cpt_size+sizeof(*fp), GFP_KERNEL_UBC);
++	if (fp == NULL)
++		return -ENOMEM;
++	atomic_set(&fp->refcnt, 1);
++	fp->len = v.cpt_size/sizeof(struct sock_filter);
++
++	err = ctx->pread(fp->insns, v.cpt_size, ctx, pos+v.cpt_hdrlen);
++	if (err) {
++		sk_filter_release(sk, fp);
++		return err;
++	}
++
++	old_fp = sk->sk_filter;
++	sk->sk_filter = fp;
++	if (old_fp)
++		sk_filter_release(sk, old_fp);
++	return 0;
++}
++
++
++int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
++{
++	int err;
++	loff_t pos = *pos_p;
++
++	err = rst_sock_attr_skfilter(pos_p, sk, ctx);
++	if (err && pos == *pos_p)
++		err = rst_sock_attr_mcfilter(pos_p, sk, ctx);
++	return err;
++}
++
++struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx)
++{
++	int err;
++	struct sk_buff *skb;
++	struct cpt_skb_image v;
++	loff_t pos = *pos_p;
++	struct scm_fp_list *fpl = NULL;
++	struct timeval tmptv;
++
++	err = rst_get_object(CPT_OBJ_SKB, pos, &v, ctx);
++	if (err)
++		return ERR_PTR(err);
++	*pos_p = pos + v.cpt_next;
++
++	if (owner)
++		*owner = v.cpt_owner;
++	if (queue)
++		*queue = v.cpt_queue;
++
++	skb = alloc_skb(v.cpt_len + v.cpt_hspace + v.cpt_tspace, GFP_KERNEL);
++	if (skb == NULL)
++		return ERR_PTR(-ENOMEM);
++	skb_reserve(skb, v.cpt_hspace);
++	skb_put(skb, v.cpt_len);
++	skb->h.raw = skb->head + v.cpt_h;
++	skb->nh.raw = skb->head + v.cpt_nh;
++	skb->mac.raw = skb->head + v.cpt_mac;
++	if (sizeof(skb->cb) < sizeof(v.cpt_cb)) BUG();
++	memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
++	skb->mac_len = v.cpt_mac_len;
++
++	skb->csum = v.cpt_csum;
++	skb->local_df = v.cpt_local_df;
++	skb->pkt_type = v.cpt_pkt_type;
++	skb->ip_summed = v.cpt_ip_summed;
++	skb->priority = v.cpt_priority;
++	skb->protocol = v.cpt_protocol;
++	cpt_timeval_import(&tmptv, v.cpt_stamp);
++	skb_set_timestamp(skb, &tmptv);
++
++	skb_shinfo(skb)->tso_segs = v.cpt_tso_segs;
++	skb_shinfo(skb)->tso_size = v.cpt_tso_size;
++	if (ctx->image_version == 0) {
++		skb_shinfo(skb)->tso_segs = 1;
++		skb_shinfo(skb)->tso_size = 0;
++	}
++
++	if (v.cpt_next > v.cpt_hdrlen) {
++		pos = pos + v.cpt_hdrlen;
++		while (pos < *pos_p) {
++			union {
++				struct cpt_obj_bits b;
++				struct cpt_fd_image f;
++			} u;
++
++			err = rst_get_object(-1, pos, &u, ctx);
++			if (err) {
++				kfree_skb(skb);
++				return ERR_PTR(err);
++			}
++			if (u.b.cpt_object == CPT_OBJ_BITS) {
++				if (u.b.cpt_size != v.cpt_hspace + skb->len) {
++					eprintk_ctx("invalid skb image %u != %u + %u\n", u.b.cpt_size, v.cpt_hspace, skb->len);
++					kfree_skb(skb);
++					return ERR_PTR(-EINVAL);
++				}
++
++				err = ctx->pread(skb->head, u.b.cpt_size, ctx, pos+u.b.cpt_hdrlen);
++				if (err) {
++					kfree_skb(skb);
++					return ERR_PTR(err);
++				}
++			} else if (u.f.cpt_object == CPT_OBJ_FILEDESC) {
++				if (!fpl) {
++					fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
++					if (!fpl) {
++						kfree_skb(skb);
++						return ERR_PTR(-ENOMEM);
++					}
++					fpl->count = 0;
++					UNIXCB(skb).fp = fpl;
++				}
++				fpl->fp[fpl->count] = rst_file(u.f.cpt_file, -1, ctx);
++				if (!IS_ERR(fpl->fp[fpl->count]))
++					fpl->count++;
++			}
++			pos += u.b.cpt_next;
++		}
++	}
++
++	return skb;
++}
++
++static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
++{
++	int i;
++	scm->fp = UNIXCB(skb).fp;
++	skb->destructor = sock_wfree;
++	UNIXCB(skb).fp = NULL;
++
++	for (i=scm->fp->count-1; i>=0; i--)
++		unix_notinflight(scm->fp->fp[i]);
++}
++
++static void unix_destruct_fds(struct sk_buff *skb)
++{
++	struct scm_cookie scm;
++	memset(&scm, 0, sizeof(scm));
++	unix_detach_fds(&scm, skb);
++	scm_destroy(&scm);
++	sock_wfree(skb);
++	module_put(THIS_MODULE);
++}
++
++
++static int restore_unix_rqueue(struct sock *sk, struct cpt_sock_image *si,
++			       loff_t pos, struct cpt_context *ctx)
++{
++	loff_t endpos;
++
++	pos = pos + si->cpt_hdrlen;
++	endpos = pos + si->cpt_next;
++	while (pos < endpos) {
++		struct sk_buff *skb;
++		struct sock *owner_sk;
++		__u32 owner;
++
++		skb = rst_skb(&pos, &owner, NULL, ctx);
++		if (IS_ERR(skb)) {
++			if (PTR_ERR(skb) == -EINVAL) {
++				int err;
++
++				err = rst_sock_attr(&pos, sk, ctx);
++				if (err)
++					return err;
++			}
++			return PTR_ERR(skb);
++		}
++
++		owner_sk = unix_peer(sk);
++		if (owner != -1) {
++			cpt_object_t *pobj;
++			pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, owner, ctx);
++			if (pobj == NULL) {
++				eprintk_ctx("orphan af_unix skb?\n");
++				kfree_skb(skb);
++				continue;
++			}
++			owner_sk = pobj->o_obj;
++		}
++		if (owner_sk == NULL) {
++			dprintk_ctx("orphan af_unix skb 2?\n");
++			kfree_skb(skb);
++			continue;
++		}
++		skb_set_owner_w(skb, owner_sk);
++		if (UNIXCB(skb).fp) {
++			skb->destructor = unix_destruct_fds;
++			if (!try_module_get(THIS_MODULE)) BUG();
++		}
++		skb_queue_tail(&sk->sk_receive_queue, skb);
++		if (sk->sk_state == TCP_LISTEN) {
++			struct socket *sock = skb->sk->sk_socket;
++			if (sock == NULL) BUG();
++			if (sock->file) BUG();
++			skb->sk->sk_socket = NULL;
++			skb->sk->sk_sleep = NULL;
++			sock->sk = NULL;
++			sock_release(sock);
++		}
++	}
++	return 0;
++}
++
++
++/* All the sockets are created before we start to open files */
++
++int rst_sockets(struct cpt_context *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_SOCKET];
++	loff_t endsec;
++	cpt_object_t *obj;
++	struct cpt_section_hdr h;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err) {
++		eprintk_ctx("rst_sockets: ctx->pread: %d\n", err);
++		return err;
++	}
++	if (h.cpt_section != CPT_SECT_SOCKET || h.cpt_hdrlen < sizeof(h)) {
++		eprintk_ctx("rst_sockets: hdr err\n");
++		return -EINVAL;
++	}
++
++	/* The first pass: we create socket index and open listening sockets. */
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
++		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
++		if (err) {
++			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
++			cpt_release_buf(ctx);
++			return err;
++		}
++		if (sbuf->cpt_state == TCP_LISTEN) {
++			err = open_listening_socket(sec, sbuf, ctx); 
++			cpt_release_buf(ctx);
++			if (err) {
++				eprintk_ctx("rst_sockets: open_listening_socket: %d\n", err);
++				return err;
++			}
++		} else {
++			cpt_release_buf(ctx);
++			obj = alloc_cpt_object(GFP_KERNEL, ctx);
++			if (obj == NULL)
++				return -ENOMEM;
++			cpt_obj_setindex(obj, sbuf->cpt_index, ctx);
++			cpt_obj_setpos(obj, sec, ctx);
++			obj->o_ppos  = sbuf->cpt_file;
++			intern_cpt_object(CPT_OBJ_SOCKET, obj, ctx);
++		}
++		sec += sbuf->cpt_next;
++	}
++
++	/* Pass 2: really restore sockets */
++	for_each_object(obj, CPT_OBJ_SOCKET) {
++		struct cpt_sock_image *sbuf;
++		if (obj->o_obj != NULL)
++			continue;
++		sbuf = cpt_get_buf(ctx);
++		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
++		if (err) {
++			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
++			cpt_release_buf(ctx);
++			return err;
++		}
++		if (sbuf->cpt_state == TCP_LISTEN) BUG();
++		err = open_socket(obj, sbuf, ctx); 
++		cpt_release_buf(ctx);
++		if (err) {
++			eprintk_ctx("rst_sockets: open_socket: %d\n", err);
++			return err;
++		}
++	}
++
++	return 0;
++}
++
++int rst_orphans(struct cpt_context *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_ORPHANS];
++	loff_t endsec;
++	cpt_object_t *obj;
++	struct cpt_section_hdr h;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_ORPHANS || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
++		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
++		if (err) {
++			cpt_release_buf(ctx);
++			return err;
++		}
++		obj = alloc_cpt_object(GFP_KERNEL, ctx);
++		if (obj == NULL) {
++			cpt_release_buf(ctx);
++			return -ENOMEM;
++		}
++		obj->o_pos = sec;
++		obj->o_ppos  = sbuf->cpt_file;
++		err = open_socket(obj, sbuf, ctx);
++		dprintk_ctx("Restoring orphan: %d\n", err);
++		free_cpt_object(obj, ctx);
++		cpt_release_buf(ctx);
++		if (err)
++			return err;
++		sec += sbuf->cpt_next;
++	}
++
++	return 0;
++}
++
++
++/* Pass 3: I understand, this is not funny already :-),
++ * but we have to do another pass to establish links between
++ * not-paired AF_UNIX SOCK_DGRAM sockets and to restore AF_UNIX
++ * skb queues with proper skb->sk links.
++ *
++ * This could be made at the end of rst_sockets(), but we defer
++ * restoring af_unix queues up to the end of restoring files to
++ * make restoring passed FDs cleaner.
++ */
++
++int rst_sockets_complete(struct cpt_context *ctx)
++{
++	int err;
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_SOCKET) {
++		struct cpt_sock_image *sbuf;
++		struct sock *sk = obj->o_obj;
++		struct sock *peer;
++
++		if (!sk) BUG();
++
++		if (sk->sk_family != AF_UNIX)
++			continue;
++
++		sbuf = cpt_get_buf(ctx);
++		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
++		if (err) {
++			cpt_release_buf(ctx);
++			return err;
++		}
++
++		if (sbuf->cpt_next > sbuf->cpt_hdrlen)
++			restore_unix_rqueue(sk, sbuf, obj->o_pos, ctx);
++
++		cpt_release_buf(ctx);
++
++		if (sk->sk_type == SOCK_DGRAM && unix_peer(sk) == NULL) {
++			cpt_object_t *pobj;
++
++			sbuf = cpt_get_buf(ctx);
++			err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
++			if (err) {
++				cpt_release_buf(ctx);
++				return err;
++			}
++
++			if (sbuf->cpt_peer != -1) {
++				pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, sbuf->cpt_peer, ctx);
++				if (pobj) {
++					peer = pobj->o_obj;
++					sock_hold(peer);
++					unix_peer(sk) = peer;
++				}
++			}
++			cpt_release_buf(ctx);
++		}
++	}
++
++	rst_orphans(ctx);
++
++	return 0;
++}
++
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_socket_in.c linux-2.6.16-026test015/kernel/cpt/rst_socket_in.c
+--- linux-2.6.16.orig/kernel/cpt/rst_socket_in.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_socket_in.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,494 @@
++/*
++ *
++ *  kernel/cpt/rst_socket_in.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/socket.h>
++#include <linux/tcp.h>
++#include <linux/jhash.h>
++#include <net/sock.h>
++#include <net/tcp.h>
++#include <linux/ipv6.h>
++#include <linux/igmp.h>
++#include <net/addrconf.h>
++#include <net/inet6_connection_sock.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_socket.h"
++#include "cpt_kernel.h"
++
++static inline unsigned long jiffies_import(__u32 tmo)
++{
++	__s32 delta = tmo;
++	return jiffies + (long)delta;
++}
++
++static inline __u32 tcp_jiffies_import(__u32 tmo)
++{
++	return ((__u32)jiffies) + tmo;
++}
++
++
++static int restore_queues(struct sock *sk, struct cpt_sock_image *si,
++			  loff_t pos, struct cpt_context *ctx)
++{
++	loff_t endpos;
++
++	pos = pos + si->cpt_hdrlen;
++	endpos = pos + si->cpt_next;
++	while (pos < endpos) {
++		struct sk_buff *skb;
++		__u32 type;
++
++		skb = rst_skb(&pos, NULL, &type, ctx);
++		if (IS_ERR(skb)) {
++			if (PTR_ERR(skb) == -EINVAL) {
++				int err;
++
++				err = rst_sock_attr(&pos, sk, ctx);
++				if (err)
++					return err;
++			}
++			return PTR_ERR(skb);
++		}
++
++		if (sk->sk_type == SOCK_STREAM) {
++			if (type == CPT_SKB_RQ) {
++				sk_stream_set_owner_r(skb, sk);
++				ub_tcprcvbuf_charge_forced(sk, skb);
++				skb_queue_tail(&sk->sk_receive_queue, skb);
++			} else if (type == CPT_SKB_OFOQ) {
++				struct tcp_sock *tp = tcp_sk(sk);
++				sk_stream_set_owner_r(skb, sk);
++				ub_tcprcvbuf_charge_forced(sk, skb);
++				skb_queue_tail(&tp->out_of_order_queue, skb);
++			} else if (type == CPT_SKB_WQ) {
++				sk->sk_wmem_queued += skb->truesize;
++				sk->sk_forward_alloc -= skb->truesize;
++				ub_tcpsndbuf_charge_forced(sk, skb);
++				skb_queue_tail(&sk->sk_write_queue, skb);
++			} else {
++				wprintk_ctx("strange stream queue type %u\n", type);
++				kfree_skb(skb);
++			}
++		} else {
++			if (type == CPT_SKB_RQ) {
++				skb_set_owner_r(skb, sk);
++				skb_queue_tail(&sk->sk_receive_queue, skb);
++			} else if (type == CPT_SKB_WQ) {
++				struct inet_sock *inet = inet_sk(sk);
++				if (inet->cork.fragsize) {
++					skb_set_owner_w(skb, sk);
++					skb_queue_tail(&sk->sk_write_queue, skb);
++				} else {
++					eprintk_ctx("cork skb is dropped\n");
++					kfree_skb(skb);
++				}
++			} else {
++				wprintk_ctx("strange dgram queue type %u\n", type);
++				kfree_skb(skb);
++			}
++		}
++	}
++	return 0;
++}
++
++static struct sock *find_parent(__u16 sport, cpt_context_t *ctx)
++{
++	cpt_object_t *obj;
++	for_each_object(obj, CPT_OBJ_SOCKET) {
++		struct sock *sk = obj->o_obj;
++		if (sk &&
++		    sk->sk_state == TCP_LISTEN &&
++		    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
++		    inet_sk(sk)->sport == sport)
++			return sk;
++	}
++	return NULL;
++}
++
++static int rst_socket_tcp(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
++			  struct cpt_context *ctx)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct sk_buff *skb;
++	tp->pred_flags = si->cpt_pred_flags;
++	tp->rcv_nxt = si->cpt_rcv_nxt;
++	tp->snd_nxt = si->cpt_snd_nxt;
++	tp->snd_una = si->cpt_snd_una;
++	tp->snd_sml = si->cpt_snd_sml;
++	tp->rcv_tstamp = tcp_jiffies_import(si->cpt_rcv_tstamp);
++	tp->lsndtime = tcp_jiffies_import(si->cpt_lsndtime);
++	tp->tcp_header_len = si->cpt_tcp_header_len;
++	inet_csk(sk)->icsk_ack.pending = si->cpt_ack_pending;
++	inet_csk(sk)->icsk_ack.quick = si->cpt_quick;
++	inet_csk(sk)->icsk_ack.pingpong = si->cpt_pingpong;
++	inet_csk(sk)->icsk_ack.blocked = si->cpt_blocked;
++	inet_csk(sk)->icsk_ack.ato = si->cpt_ato;
++	inet_csk(sk)->icsk_ack.timeout = jiffies_import(si->cpt_ack_timeout);
++	inet_csk(sk)->icsk_ack.lrcvtime = tcp_jiffies_import(si->cpt_lrcvtime);
++	inet_csk(sk)->icsk_ack.last_seg_size = si->cpt_last_seg_size;
++	inet_csk(sk)->icsk_ack.rcv_mss = si->cpt_rcv_mss;
++	tp->snd_wl1 = si->cpt_snd_wl1;
++	tp->snd_wnd = si->cpt_snd_wnd;
++	tp->max_window = si->cpt_max_window;
++	inet_csk(sk)->icsk_pmtu_cookie = si->cpt_pmtu_cookie;
++	tp->mss_cache = si->cpt_mss_cache;
++	tp->rx_opt.mss_clamp = si->cpt_mss_clamp;
++	inet_csk(sk)->icsk_ext_hdr_len = si->cpt_ext_header_len;
++	inet_csk(sk)->icsk_ca_state = si->cpt_ca_state;
++	inet_csk(sk)->icsk_retransmits = si->cpt_retransmits;
++	tp->reordering = si->cpt_reordering;
++	tp->frto_counter = si->cpt_frto_counter;
++	tp->frto_highmark = si->cpt_frto_highmark;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
++	// // tp->adv_cong = si->cpt_adv_cong;
++#endif
++	inet_csk(sk)->icsk_accept_queue.rskq_defer_accept = si->cpt_defer_accept;
++	inet_csk(sk)->icsk_backoff = si->cpt_backoff;
++	tp->srtt = si->cpt_srtt;
++	tp->mdev = si->cpt_mdev;
++	tp->mdev_max = si->cpt_mdev_max;
++	tp->rttvar = si->cpt_rttvar;
++	tp->rtt_seq = si->cpt_rtt_seq;
++	inet_csk(sk)->icsk_rto = si->cpt_rto;
++	tp->packets_out = si->cpt_packets_out;
++	tp->left_out = si->cpt_left_out;
++	tp->retrans_out = si->cpt_retrans_out;
++	tp->lost_out = si->cpt_lost_out;
++	tp->sacked_out = si->cpt_sacked_out;
++	tp->fackets_out = si->cpt_fackets_out;
++	tp->snd_ssthresh = si->cpt_snd_ssthresh;
++	tp->snd_cwnd = si->cpt_snd_cwnd;
++	tp->snd_cwnd_cnt = si->cpt_snd_cwnd_cnt;
++	tp->snd_cwnd_clamp = si->cpt_snd_cwnd_clamp;
++	tp->snd_cwnd_used = si->cpt_snd_cwnd_used;
++	tp->snd_cwnd_stamp = tcp_jiffies_import(si->cpt_snd_cwnd_stamp);
++	inet_csk(sk)->icsk_timeout = tcp_jiffies_import(si->cpt_timeout);
++	tp->rcv_wnd = si->cpt_rcv_wnd;
++	tp->rcv_wup = si->cpt_rcv_wup;
++	tp->write_seq = si->cpt_write_seq;
++	tp->pushed_seq = si->cpt_pushed_seq;
++	tp->copied_seq = si->cpt_copied_seq;
++	tp->rx_opt.tstamp_ok = si->cpt_tstamp_ok;
++	tp->rx_opt.wscale_ok = si->cpt_wscale_ok;
++	tp->rx_opt.sack_ok = si->cpt_sack_ok;
++	tp->rx_opt.saw_tstamp = si->cpt_saw_tstamp;
++	tp->rx_opt.snd_wscale = si->cpt_snd_wscale;
++	tp->rx_opt.rcv_wscale = si->cpt_rcv_wscale;
++	tp->nonagle = si->cpt_nonagle;
++	tp->keepalive_probes = si->cpt_keepalive_probes;
++	tp->rx_opt.rcv_tsval = si->cpt_rcv_tsval;
++	tp->rx_opt.rcv_tsecr = si->cpt_rcv_tsecr;
++	tp->rx_opt.ts_recent = si->cpt_ts_recent;
++	tp->rx_opt.ts_recent_stamp = si->cpt_ts_recent_stamp;
++	tp->rx_opt.user_mss = si->cpt_user_mss;
++	tp->rx_opt.dsack = si->cpt_dsack;
++	tp->rx_opt.eff_sacks = si->cpt_num_sacks;
++	tp->duplicate_sack[0].start_seq = si->cpt_sack_array[0];
++	tp->duplicate_sack[0].end_seq = si->cpt_sack_array[1];
++	tp->selective_acks[0].start_seq = si->cpt_sack_array[2];
++	tp->selective_acks[0].end_seq = si->cpt_sack_array[3];
++	tp->selective_acks[1].start_seq = si->cpt_sack_array[4];
++	tp->selective_acks[1].end_seq = si->cpt_sack_array[5];
++	tp->selective_acks[2].start_seq = si->cpt_sack_array[6];
++	tp->selective_acks[2].end_seq = si->cpt_sack_array[7];
++	tp->selective_acks[3].start_seq = si->cpt_sack_array[8];
++	tp->selective_acks[3].end_seq = si->cpt_sack_array[9];
++
++	tp->window_clamp = si->cpt_window_clamp;
++	tp->rcv_ssthresh = si->cpt_rcv_ssthresh;
++	inet_csk(sk)->icsk_probes_out = si->cpt_probes_out;
++	tp->rx_opt.num_sacks = si->cpt_num_sacks;
++	tp->advmss = si->cpt_advmss;
++	inet_csk(sk)->icsk_syn_retries = si->cpt_syn_retries;
++	tp->ecn_flags = si->cpt_ecn_flags;
++	tp->prior_ssthresh = si->cpt_prior_ssthresh;
++	tp->high_seq = si->cpt_high_seq;
++	tp->retrans_stamp = si->cpt_retrans_stamp;
++	tp->undo_marker = si->cpt_undo_marker;
++	tp->undo_retrans = si->cpt_undo_retrans;
++	tp->urg_seq = si->cpt_urg_seq;
++	tp->urg_data = si->cpt_urg_data;
++	inet_csk(sk)->icsk_pending = si->cpt_pending;
++	tp->urg_mode = si->cpt_urg_mode;
++	tp->snd_up = si->cpt_snd_up;
++	tp->keepalive_time = si->cpt_keepalive_time;
++	tp->keepalive_intvl = si->cpt_keepalive_intvl;
++	tp->linger2 = si->cpt_linger2;
++
++	sk->sk_send_head = NULL;
++	for (skb = skb_peek(&sk->sk_write_queue);
++	     skb && skb != (struct sk_buff*)&sk->sk_write_queue;
++	     skb = skb->next) {
++		if (!after(tp->snd_nxt, TCP_SKB_CB(skb)->seq)) {
++			sk->sk_send_head = skb;
++			break;
++		}
++	}
++
++	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) {
++		struct inet_sock *inet = inet_sk(sk);
++		if (inet->num == 0) {
++			cpt_object_t *lobj = NULL;
++
++			if ((int)si->cpt_parent != -1)
++				lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
++
++			if (lobj && lobj->o_obj) {
++				inet->num = ntohs(inet->sport);
++				local_bh_disable();
++				__inet_inherit_port(&tcp_hashinfo, lobj->o_obj, sk);
++				local_bh_enable();
++				dprintk_ctx("port inherited from parent\n");
++			} else {
++				struct sock *lsk = find_parent(inet->sport, ctx);
++				if (lsk) {
++					inet->num = ntohs(inet->sport);
++					local_bh_disable();
++					__inet_inherit_port(&tcp_hashinfo, lsk, sk);
++					local_bh_enable();
++					dprintk_ctx("port inherited\n");
++				} else {
++					eprintk_ctx("we are kinda lost...\n");
++				}
++			}
++		}
++
++		sk->sk_prot->hash(sk);
++
++		if (inet_csk(sk)->icsk_ack.pending&ICSK_ACK_TIMER)
++			sk_reset_timer(sk, &inet_csk(sk)->icsk_delack_timer, inet_csk(sk)->icsk_ack.timeout);
++		if (inet_csk(sk)->icsk_pending)
++			sk_reset_timer(sk, &inet_csk(sk)->icsk_retransmit_timer,
++				       inet_csk(sk)->icsk_timeout);
++		if (sock_flag(sk, SOCK_KEEPOPEN)) {
++			unsigned long expires = jiffies_import(si->cpt_ka_timeout);
++			if (time_after(jiffies, expires))
++				expires = jiffies + HZ;
++			sk_reset_timer(sk, &sk->sk_timer, expires);
++		}
++	}
++
++	return 0;
++}
++
++
++int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
++		  struct cpt_context *ctx)
++{
++	struct inet_sock *inet = inet_sk(sk);
++
++	lock_sock(sk);
++
++	sk->sk_state = si->cpt_state;
++
++	inet->daddr = si->cpt_daddr;
++	inet->dport = si->cpt_dport;
++	inet->saddr = si->cpt_saddr;
++	inet->rcv_saddr = si->cpt_rcv_saddr;
++	inet->sport = si->cpt_sport;
++	inet->uc_ttl = si->cpt_uc_ttl;
++	inet->tos = si->cpt_tos;
++	inet->cmsg_flags = si->cpt_cmsg_flags;
++	inet->mc_index = si->cpt_mc_index;
++	inet->mc_addr = si->cpt_mc_addr;
++	inet->hdrincl = si->cpt_hdrincl;
++	inet->mc_ttl = si->cpt_mc_ttl;
++	inet->mc_loop = si->cpt_mc_loop;
++	inet->pmtudisc = si->cpt_pmtudisc;
++	inet->recverr = si->cpt_recverr;
++	inet->freebind = si->cpt_freebind;
++	inet->id = si->cpt_idcounter;
++
++	inet->cork.flags = si->cpt_cork_flags;
++	inet->cork.fragsize = si->cpt_cork_fragsize;
++	inet->cork.length = si->cpt_cork_length;
++	inet->cork.addr = si->cpt_cork_addr;
++	inet->cork.fl.fl4_src = si->cpt_cork_saddr;
++	inet->cork.fl.fl4_dst = si->cpt_cork_daddr;
++	inet->cork.fl.oif = si->cpt_cork_oif;
++	if (inet->cork.fragsize) {
++		if (ip_route_output_key(&inet->cork.rt, &inet->cork.fl)) {
++			eprintk_ctx("failed to restore cork route\n");
++			inet->cork.fragsize = 0;
++		}
++	}
++
++	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
++		struct udp_sock *up = udp_sk(sk);
++		up->pending = si->cpt_udp_pending;
++		up->corkflag = si->cpt_udp_corkflag;
++		up->encap_type = si->cpt_udp_encap;
++		up->len = si->cpt_udp_len;
++	}
++
++	if (sk->sk_family == AF_INET6) {
++		struct ipv6_pinfo *np = inet6_sk(sk);
++
++		memcpy(&np->saddr, si->cpt_saddr6, 16);
++		memcpy(&np->rcv_saddr, si->cpt_rcv_saddr6, 16);
++		memcpy(&np->daddr, si->cpt_daddr6, 16);
++		np->flow_label = si->cpt_flow_label6;
++		np->frag_size = si->cpt_frag_size6;
++		np->hop_limit = si->cpt_hop_limit6;
++		np->mcast_hops = si->cpt_mcast_hops6;
++		np->mcast_oif = si->cpt_mcast_oif6;
++		np->rxopt.all = si->cpt_rxopt6;
++		np->mc_loop = si->cpt_mc_loop6;
++		np->recverr = si->cpt_recverr6;
++		np->sndflow = si->cpt_sndflow6;
++		np->pmtudisc = si->cpt_pmtudisc6;
++		np->ipv6only = si->cpt_ipv6only6;
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++		if (si->cpt_mapped) {
++			extern struct inet_connection_sock_af_ops ipv6_mapped;
++			if (sk->sk_type == SOCK_STREAM &&
++			    sk->sk_protocol == IPPROTO_TCP) {
++				inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
++				sk->sk_backlog_rcv = tcp_v4_do_rcv;
++			}
++		}
++#endif
++	}
++
++	restore_queues(sk, si, pos, ctx);
++
++	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
++		rst_socket_tcp(si, pos, sk, ctx);
++
++	release_sock(sk);
++	return 0;
++}
++
++int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *ctx)
++{
++	struct request_sock *req;
++
++	if (lsk->sk_state != TCP_LISTEN)
++		return -EINVAL;
++
++	req = reqsk_alloc(&tcp_request_sock_ops);
++	if (!req)
++		return -ENOMEM;
++
++	sk->sk_socket = NULL;
++	sk->sk_sleep = NULL;
++	inet_csk_reqsk_queue_add(lsk, req, sk);
++	return 0;
++}
++
++static __inline__ u32 __tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
++{
++	return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
++}
++
++int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si,
++			      loff_t pos, struct cpt_context *ctx)
++{
++	int err;
++	loff_t end = si->cpt_next;
++
++	pos += si->cpt_hdrlen;
++	while (pos < end) {
++		struct cpt_openreq_image oi;
++
++		err = rst_get_object(CPT_OBJ_OPENREQ, pos, &oi, ctx);
++		if (err) {
++			err = rst_sock_attr(&pos, sk, ctx);
++			if (err)
++				return err;
++			continue;
++		}
++
++		if (oi.cpt_object == CPT_OBJ_OPENREQ) {
++			struct request_sock *req = reqsk_alloc(&tcp_request_sock_ops);
++			if (req == NULL)
++				return -ENOMEM;
++
++			memset(req, 0, sizeof(*req));
++			tcp_rsk(req)->rcv_isn = oi.cpt_rcv_isn;
++			tcp_rsk(req)->snt_isn = oi.cpt_snt_isn;
++			inet_rsk(req)->rmt_port = oi.cpt_rmt_port;
++			req->mss = oi.cpt_mss;
++			req->retrans = oi.cpt_retrans;
++			inet_rsk(req)->snd_wscale = oi.cpt_snd_wscale;
++			inet_rsk(req)->rcv_wscale = oi.cpt_rcv_wscale;
++			inet_rsk(req)->tstamp_ok = oi.cpt_tstamp_ok;
++			inet_rsk(req)->sack_ok = oi.cpt_sack_ok;
++			inet_rsk(req)->wscale_ok = oi.cpt_wscale_ok;
++			inet_rsk(req)->ecn_ok = oi.cpt_ecn_ok;
++			inet_rsk(req)->acked = oi.cpt_acked;
++			req->window_clamp = oi.cpt_window_clamp;
++			req->rcv_wnd = oi.cpt_rcv_wnd;
++			req->ts_recent = oi.cpt_ts_recent;
++			req->expires = jiffies_import(oi.cpt_expires);
++
++			if (oi.cpt_family == AF_INET) {
++				memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
++				memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
++				inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
++			} else {
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++				memcpy(&inet6_rsk(req)->loc_addr, oi.cpt_loc_addr, 16);
++				memcpy(&inet6_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 16);
++				inet6_rsk(req)->iif = oi.cpt_iif;
++				inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
++#endif
++			}
++		}
++		pos += oi.cpt_next;
++	}
++	return 0;
++}
++
++int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
++		       loff_t pos, cpt_context_t *ctx)
++{
++	struct ip_mreqn imr;
++
++	if (v->cpt_mode || v->cpt_next != v->cpt_hdrlen) {
++		eprintk_ctx("IGMPv3 is still not supported\n");
++		return -EINVAL;
++	}
++
++	memset(&imr, 0, sizeof(imr));
++	imr.imr_ifindex = v->cpt_ifindex;
++	imr.imr_multiaddr.s_addr = v->cpt_mcaddr[0];
++	return ip_mc_join_group(sk, &imr);
++}
++
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++int rst_sk_mcfilter_in6(struct sock *sk, struct cpt_sockmc_image *v,
++			loff_t pos, cpt_context_t *ctx)
++{
++
++	if (v->cpt_mode || v->cpt_next != v->cpt_hdrlen) {
++		eprintk_ctx("IGMPv3 is still not supported\n");
++		return -EINVAL;
++	}
++
++	return ipv6_sock_mc_join(sk, v->cpt_ifindex,
++				 (struct in6_addr*)v->cpt_mcaddr);
++}
++#endif
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_sysvipc.c linux-2.6.16-026test015/kernel/cpt/rst_sysvipc.c
+--- linux-2.6.16.orig/kernel/cpt/rst_sysvipc.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_sysvipc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,409 @@
++/*
++ *
++ *  kernel/cpt/rst_sysvipc.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/pipe_fs_i.h>
++#include <linux/mman.h>
++#include <linux/shm.h>
++/* FIXME. x86_64 has asm/ipc.h forgotten? */
++#include <asm-generic/ipc.h>
++#include <asm/uaccess.h>
++#include <asm/unistd.h>
++#include <ub/ub_mem.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_kernel.h"
++
++struct _warg {
++		struct file		*file;
++		struct cpt_sysvshm_image	*v;
++};
++
++static int fixup_one_shm(struct shmid_kernel *shp, void *arg)
++{
++	struct _warg *warg = arg;
++
++	if (shp->shm_file != warg->file)
++		return 0;
++	if (shp->shm_nattch)
++		return -EEXIST;
++
++	shp->shm_perm.uid = warg->v->cpt_uid;
++	shp->shm_perm.gid = warg->v->cpt_gid;
++	shp->shm_perm.cuid = warg->v->cpt_cuid;
++	shp->shm_perm.cgid = warg->v->cpt_cgid;
++	shp->shm_perm.mode = warg->v->cpt_mode;
++
++	shp->shm_atim = warg->v->cpt_atime;
++	shp->shm_dtim = warg->v->cpt_dtime;
++	shp->shm_ctim = warg->v->cpt_ctime;
++	shp->shm_cprid = warg->v->cpt_creator;
++	shp->shm_lprid = warg->v->cpt_last;
++
++	/* TODO: fix shp->mlock_user? */
++	return 1;
++}
++
++static int fixup_shm(struct file *file, struct cpt_sysvshm_image *v)
++{
++	struct _warg warg;
++
++	warg.file = file;
++	warg.v = v;
++
++	return sysvipc_walk_shm(fixup_one_shm, &warg);
++}
++
++static int fixup_shm_data(struct file *file, loff_t pos, loff_t end,
++			  struct cpt_context *ctx)
++{
++	struct cpt_page_block pgb;
++	ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
++
++	do_write = file->f_dentry->d_inode->i_fop->write;
++	if (do_write == NULL) {
++		eprintk_ctx("No TMPFS? Cannot restore content of SYSV SHM\n");
++		return -EINVAL;
++	}
++
++	while (pos < end) {
++		loff_t opos;
++		loff_t ipos;
++		int count;
++		int err;
++
++		err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
++		if (err)
++			return err;
++		dprintk_ctx("restoring SHM block: %08x-%08x\n",
++		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
++		ipos = pos + pgb.cpt_hdrlen;
++		opos = pgb.cpt_start;
++		count = pgb.cpt_end-pgb.cpt_start;
++		while (count > 0) {
++			mm_segment_t oldfs;
++			int copy = count;
++
++			if (copy > PAGE_SIZE)
++				copy = PAGE_SIZE;
++			(void)cpt_get_buf(ctx);
++			oldfs = get_fs(); set_fs(KERNEL_DS);
++			err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
++			set_fs(oldfs);
++			if (err) {
++				__cpt_release_buf(ctx);
++				return err;
++			}
++			oldfs = get_fs(); set_fs(KERNEL_DS);
++			ipos += copy;
++			err = do_write(file, ctx->tmpbuf, copy, &opos);
++			set_fs(oldfs);
++			__cpt_release_buf(ctx);
++			if (err != copy) {
++				eprintk_ctx("write() failure\n");
++				if (err >= 0)
++					err = -EIO;
++				return err;
++			}
++			count -= copy;
++		}
++		pos += pgb.cpt_next;
++	}
++	return 0;
++}
++
++struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx)
++{
++	struct file *file;
++	int err;
++	loff_t dpos, epos;
++	union {
++		struct cpt_file_image		fi;
++		struct cpt_sysvshm_image	shmi;
++		struct cpt_inode_image 		ii;
++	} u;
++
++	err = rst_get_object(CPT_OBJ_FILE, pos, &u.fi, ctx);
++	if (err < 0)
++		goto err_out;
++	pos = u.fi.cpt_inode;
++	err = rst_get_object(CPT_OBJ_INODE, pos, &u.ii, ctx);
++	if (err < 0)
++		goto err_out;
++	dpos = pos + u.ii.cpt_hdrlen;
++	epos = pos + u.ii.cpt_next;
++	err = rst_get_object(CPT_OBJ_SYSV_SHM, pos + u.ii.cpt_hdrlen, &u.shmi, ctx);
++	if (err < 0)
++		goto err_out;
++	dpos += u.shmi.cpt_next;
++
++	file = sysvipc_setup_shm(u.shmi.cpt_key, u.shmi.cpt_id, 
++				 u.shmi.cpt_segsz, u.shmi.cpt_mode);
++	if (!IS_ERR(file)) {
++		err = fixup_shm(file, &u.shmi);
++		if (err != -EEXIST && dpos < epos)
++			err = fixup_shm_data(file, dpos, epos, ctx);
++	}
++
++	return file;
++
++err_out:
++	return ERR_PTR(err);
++}
++
++static int attach_one_undo(int semid, struct sem_array *sma, void *arg)
++{
++	struct sem_undo *su = arg;
++	struct sem_undo_list *undo_list = current->sysvsem.undo_list;
++
++	if (semid != su->semid)
++		return 0;
++
++	su->proc_next = undo_list->proc_list;
++	undo_list->proc_list = su;
++
++	su->id_next = sma->undo;
++	sma->undo = su;
++
++	return 1;
++}
++
++static int attach_undo(struct sem_undo *su)
++{
++	return sysvipc_walk_sem(attach_one_undo, su);
++}
++
++static int do_rst_semundo(struct cpt_object_hdr *sui, loff_t pos, struct cpt_context *ctx)
++{
++	int err;
++	struct sem_undo_list *undo_list;
++
++	if (current->sysvsem.undo_list) {
++		eprintk_ctx("Funny undo_list\n");
++		return 0;
++	}
++
++	undo_list = ub_kmalloc(sizeof(struct sem_undo_list), GFP_KERNEL);
++	if (undo_list == NULL)
++		return -ENOMEM;
++	memset(undo_list, 0, sizeof(struct sem_undo_list));
++	atomic_set(&undo_list->refcnt, 1);
++	spin_lock_init(&undo_list->lock);
++	current->sysvsem.undo_list = undo_list;
++
++	if (sui->cpt_next > sui->cpt_hdrlen) {
++		loff_t offset = pos + sui->cpt_hdrlen;
++		do {
++			struct sem_undo *new;
++			struct cpt_sysvsem_undo_image spi;
++			err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO_REC, offset, &spi, ctx);
++			if (err)
++				goto out;
++			new = ub_kmalloc(sizeof(struct sem_undo) +
++					 sizeof(short)*spi.cpt_nsem, GFP_KERNEL);
++			if (!new) {
++				err = -ENOMEM;
++				goto out;
++			}
++
++			memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*spi.cpt_nsem);
++			new->semadj = (short *) &new[1];
++			new->semid = spi.cpt_id;
++			err = ctx->pread(new->semadj, spi.cpt_nsem*sizeof(short), ctx, offset + spi.cpt_hdrlen);
++			if (err) {
++				kfree(new);
++				goto out;
++			}
++			err = attach_undo(new);
++			if (err <= 0) {
++				if (err == 0)
++					err = -ENOENT;
++				kfree(new);
++				goto out;
++			}
++			offset += spi.cpt_next;
++		} while (offset < pos + sui->cpt_next);
++	}
++	err = 0;
++
++out:
++	return err;
++}
++
++__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	__u32 flag = 0;
++
++#if 0
++	if (ti->cpt_sysvsem_undo == CPT_NULL ||
++	    lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo))
++		flag |= CLONE_SYSVSEM;
++#endif
++	return flag;
++}
++
++int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	int err;
++	struct sem_undo_list *f = current->sysvsem.undo_list;
++	cpt_object_t *obj;
++	struct cpt_object_hdr sui;
++
++	if (ti->cpt_sysvsem_undo == CPT_NULL) {
++		exit_sem(current);
++		return 0;
++	}
++
++	obj = lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, ctx);
++	if (obj) {
++		if (obj->o_obj != f) {
++			exit_sem(current);
++			f = obj->o_obj;
++			atomic_inc(&f->refcnt);
++			current->sysvsem.undo_list = f;
++		}
++		return 0;
++	}
++
++	if ((err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, &sui, ctx)) != 0)
++		goto out;
++
++	if ((err = do_rst_semundo(&sui, ti->cpt_sysvsem_undo, ctx)) != 0)
++		goto out;
++
++	err = -ENOMEM;
++	obj = cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, f, ctx);
++	if (obj) {
++		err = 0;
++		cpt_obj_setpos(obj, ti->cpt_sysvsem_undo, ctx);
++	}
++
++	return 0;
++
++out:
++	return err;
++}
++
++struct _sarg {
++	int semid;
++	struct cpt_sysvsem_image	*v;
++	__u32				*arr;
++};
++
++static int fixup_one_sem(int semid, struct sem_array *sma, void *arg)
++{
++	struct _sarg *warg = arg;
++
++	if (semid != warg->semid)
++		return 0;
++
++	sma->sem_perm.uid = warg->v->cpt_uid;
++	sma->sem_perm.gid = warg->v->cpt_gid;
++	sma->sem_perm.cuid = warg->v->cpt_cuid;
++	sma->sem_perm.cgid = warg->v->cpt_cgid;
++	sma->sem_perm.mode = warg->v->cpt_mode;
++	sma->sem_perm.seq = warg->v->cpt_seq;
++
++	sma->sem_ctime = warg->v->cpt_ctime;
++	sma->sem_otime = warg->v->cpt_otime;
++	memcpy(sma->sem_base, warg->arr, sma->sem_nsems*8);
++	return 1;
++}
++
++static int fixup_sem(int semid, struct cpt_sysvsem_image *v, __u32 *arr)
++{
++	struct _sarg warg;
++
++	warg.semid = semid;
++	warg.v = v;
++	warg.arr = arr;
++
++	return sysvipc_walk_sem(fixup_one_sem, &warg);
++}
++
++
++static int restore_sem(loff_t pos, struct cpt_sysvsem_image *si,
++		       struct cpt_context *ctx)
++{
++	int err;
++	__u32 *arr;
++	int nsems = (si->cpt_next - si->cpt_hdrlen)/8;
++
++	arr = kmalloc(nsems*8, GFP_KERNEL);
++	if (!arr)
++		return -ENOMEM;
++
++	err = ctx->pread(arr, nsems*8, ctx, pos+si->cpt_hdrlen);
++	if (err)
++		goto out;
++	err = sysvipc_setup_sem(si->cpt_key, si->cpt_id, nsems, si->cpt_mode);
++	if (err < 0) {
++		eprintk_ctx("SEM 3\n");
++		goto out;
++	}
++	err = fixup_sem(si->cpt_id, si, arr);
++	if (err == 0)
++		err = -ESRCH;
++	if (err > 0)
++		err = 0;
++out:
++	kfree(arr);
++	return err;
++}
++
++static int rst_sysv_sem(struct cpt_context *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_SYSV_SEM];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++	struct cpt_sysvsem_image sbuf;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_SYSV_SEM || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		int err;
++		err = rst_get_object(CPT_OBJ_SYSV_SEM, sec, &sbuf, ctx);
++		if (err)
++			return err;
++		err = restore_sem(sec, &sbuf, ctx);
++		if (err)
++			return err;
++		sec += sbuf.cpt_next;
++	}
++	return 0;
++}
++
++int rst_sysv_ipc(struct cpt_context *ctx)
++{
++	return rst_sysv_sem(ctx);
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_tty.c linux-2.6.16-026test015/kernel/cpt/rst_tty.c
+--- linux-2.6.16.orig/kernel/cpt/rst_tty.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_tty.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,380 @@
++/*
++ *
++ *  kernel/cpt/rst_tty.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/pipe_fs_i.h>
++#include <linux/mman.h>
++#include <linux/mount.h>
++#include <linux/tty.h>
++#include <linux/vmalloc.h>
++#include <asm/unistd.h>
++#include <asm/uaccess.h>
++#include <linux/cpt_image.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_mm.h"
++#include "cpt_files.h"
++#include "cpt_kernel.h"
++
++static int pty_setup(struct tty_struct *stty, loff_t pos,
++		     struct cpt_tty_image *pi, struct cpt_context *ctx)
++{
++	unsigned long flags;
++
++	stty->pgrp = -1;
++	stty->session = 0;
++	stty->packet = pi->cpt_packet;
++	stty->stopped = pi->cpt_stopped;
++	stty->hw_stopped = pi->cpt_hw_stopped;
++	stty->flow_stopped = pi->cpt_flow_stopped;
++#define DONOT_CHANGE ((1<<TTY_CHARGED)|(1<<TTY_CLOSING)|(1<<TTY_LDISC))
++	flags = stty->flags & DONOT_CHANGE;
++	stty->flags = flags | (pi->cpt_flags & ~DONOT_CHANGE);
++	stty->ctrl_status = pi->cpt_ctrl_status;
++	stty->winsize.ws_row = pi->cpt_ws_row;
++	stty->winsize.ws_col = pi->cpt_ws_col;
++	stty->winsize.ws_ypixel = pi->cpt_ws_prow;
++	stty->winsize.ws_xpixel = pi->cpt_ws_pcol;
++	stty->canon_column = pi->cpt_canon_column;
++	stty->column = pi->cpt_column;
++	stty->raw = pi->cpt_raw;
++	stty->real_raw = pi->cpt_real_raw;
++	stty->erasing = pi->cpt_erasing;
++	stty->lnext = pi->cpt_lnext;
++	stty->icanon = pi->cpt_icanon;
++	stty->closing = pi->cpt_closing;
++	stty->minimum_to_wake = pi->cpt_minimum_to_wake;
++
++	stty->termios->c_iflag = pi->cpt_c_iflag;
++	stty->termios->c_oflag = pi->cpt_c_oflag;
++	stty->termios->c_lflag = pi->cpt_c_lflag;
++	stty->termios->c_cflag = pi->cpt_c_cflag;
++	memcpy(&stty->termios->c_cc, &pi->cpt_c_cc, NCCS);
++	memcpy(stty->read_flags, pi->cpt_read_flags, sizeof(stty->read_flags));
++
++	if (pi->cpt_next > pi->cpt_hdrlen) {
++		int err;
++		struct cpt_obj_bits b;
++		err = rst_get_object(CPT_OBJ_BITS, pos + pi->cpt_hdrlen, &b, ctx);
++		if (err)
++			return err;
++		if (b.cpt_size == 0)
++			return 0;
++		err = ctx->pread(stty->read_buf, b.cpt_size, ctx, pos + pi->cpt_hdrlen + b.cpt_hdrlen);
++		if (err)
++			return err;
++
++		spin_lock_irq(&stty->read_lock);
++		stty->read_tail = 0;
++		stty->read_cnt = b.cpt_size;
++		stty->read_head = b.cpt_size;
++		stty->canon_head = stty->read_tail + pi->cpt_canon_head;
++		stty->canon_data = pi->cpt_canon_data;
++		spin_unlock_irq(&stty->read_lock);
++	}
++
++	return 0;
++}
++
++/* Find slave/master tty in image, when we already know master/slave.
++ * It might be optimized, of course. */
++static loff_t find_pty_pair(struct tty_struct *stty, loff_t pos, struct cpt_tty_image *pi, struct cpt_context *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_TTY];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++	struct cpt_tty_image *pibuf;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return CPT_NULL;
++	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
++		return CPT_NULL;
++	pibuf = kmalloc(sizeof(*pibuf), GFP_KERNEL);
++	if (pibuf == NULL) {
++		eprintk_ctx("cannot allocate buffer\n");
++		return CPT_NULL;
++	}
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx))
++			return CPT_NULL;
++		if (pibuf->cpt_index == pi->cpt_index &&
++		    !((pi->cpt_drv_flags^pibuf->cpt_drv_flags)&TTY_DRIVER_DEVPTS_MEM) &&
++		    pos != sec) {
++			pty_setup(stty, sec, pibuf, ctx);
++			return sec;
++		}
++		sec += pibuf->cpt_next;
++	}
++	kfree(pibuf);
++	return CPT_NULL;
++}
++
++static int fixup_tty_attrs(struct cpt_inode_image *ii, struct file *master,
++			   struct cpt_context *ctx)
++{
++	int err;
++	struct iattr newattrs;
++	struct dentry *d = master->f_dentry;
++
++	newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE;
++	newattrs.ia_uid = ii->cpt_uid;
++	newattrs.ia_gid = ii->cpt_gid;
++	newattrs.ia_mode = ii->cpt_mode;
++
++	mutex_lock(&d->d_inode->i_mutex);
++	err = notify_change(d, &newattrs);
++	mutex_unlock(&d->d_inode->i_mutex);
++
++	return err;
++}
++
++/* NOTE: "portable", but ugly thing. To allocate /dev/pts/N, we open
++ * /dev/ptmx until we get pty with desired index.
++ */
++
++struct file *ptmx_open(int index, unsigned int flags)
++{
++	struct file *file;
++	struct file **stack = NULL;
++	int depth = 0;
++
++	for (;;) {
++		struct tty_struct *tty;
++
++		file = filp_open("/dev/ptmx", flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
++		if (IS_ERR(file))
++			break;
++		tty = file->private_data;
++		if (tty->index == index)
++			break;
++
++		if (depth == PAGE_SIZE/sizeof(struct file *)) {
++			fput(file);
++			file = ERR_PTR(-EBUSY);
++			break;
++		}
++		if (stack == NULL) {
++			stack = (struct file **)__get_free_page(GFP_KERNEL);
++			if (!stack) {
++				fput(file);
++				file = ERR_PTR(-ENOMEM);
++				break;
++			}
++		}
++		stack[depth] = file;
++		depth++;
++	}
++	while (depth > 0) {
++		depth--;
++		fput(stack[depth]);
++	}
++	if (stack)
++		free_page((unsigned long)stack);
++	return file;
++}
++
++
++struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii,
++			   unsigned flags, struct cpt_context *ctx)
++{
++	int err;
++	cpt_object_t *obj;
++	struct file *master, *slave;
++	struct tty_struct *stty;
++	struct cpt_tty_image *pi;
++	static char *a = "pqrstuvwxyzabcde";
++	static char *b = "0123456789abcdef";
++	char pairname[16];
++	unsigned master_flags, slave_flags;
++
++	if (fi->cpt_priv == CPT_NULL)
++		return ERR_PTR(-EINVAL);
++
++	obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, fi->cpt_priv, ctx);
++	if (obj && obj->o_parent) {
++		dprintk_ctx("obtained pty as pair to existing\n");
++		master = obj->o_parent;
++		stty = master->private_data;
++
++		if (stty->driver->subtype == PTY_TYPE_MASTER &&
++		    (stty->driver->flags&TTY_DRIVER_DEVPTS_MEM)) {
++			wprintk_ctx("cloning ptmx\n");
++			get_file(master);
++			return master;
++		}
++
++		master = dentry_open(dget(master->f_dentry),
++				     mntget(master->f_vfsmnt), flags);
++		if (!IS_ERR(master)) {
++			stty = master->private_data;
++			if (stty->driver->subtype != PTY_TYPE_MASTER)
++				fixup_tty_attrs(ii, master, ctx);
++		}
++		return master;
++	}
++
++	pi = cpt_get_buf(ctx);
++	err = rst_get_object(CPT_OBJ_TTY, fi->cpt_priv, pi, ctx);
++	if (err) {
++		cpt_release_buf(ctx);
++		return ERR_PTR(err);
++	}
++
++	master_flags = slave_flags = 0;
++	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER)
++		master_flags = flags;
++	else
++		slave_flags = flags;
++
++	/*
++	 * Open pair master/slave.
++	 */
++	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM) {
++		master = ptmx_open(pi->cpt_index, master_flags);
++	} else {
++		sprintf(pairname, "/dev/pty%c%c", a[pi->cpt_index/16], b[pi->cpt_index%16]);
++		master = filp_open(pairname, master_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
++	}
++	if (IS_ERR(master)) {
++		eprintk_ctx("filp_open master: %Ld %ld\n", fi->cpt_priv, PTR_ERR(master));
++		cpt_release_buf(ctx);
++		return master;
++	}
++	stty = master->private_data;
++	clear_bit(TTY_PTY_LOCK, &stty->flags);
++	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM)
++		sprintf(pairname, "/dev/pts/%d", stty->index);
++	else
++		sprintf(pairname, "/dev/tty%c%c", a[stty->index/16], b[stty->index%16]);
++	slave = filp_open(pairname, slave_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
++	if (IS_ERR(slave)) {
++		eprintk_ctx("filp_open slave %s: %ld\n", pairname, PTR_ERR(slave));
++		fput(master);
++		cpt_release_buf(ctx);
++		return slave;
++	}
++
++	if (pi->cpt_drv_subtype != PTY_TYPE_MASTER)
++		fixup_tty_attrs(ii, slave, ctx);
++
++	cpt_object_add(CPT_OBJ_TTY, master->private_data, ctx);
++	cpt_object_add(CPT_OBJ_TTY, slave->private_data, ctx);
++	cpt_object_add(CPT_OBJ_FILE, master, ctx);
++	cpt_object_add(CPT_OBJ_FILE, slave, ctx);
++
++	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER) {
++		loff_t pos;
++		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
++		obj->o_parent = master;
++		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
++		pty_setup(stty, fi->cpt_priv, pi, ctx);
++
++		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
++		obj->o_parent = slave;
++		pos = find_pty_pair(stty->link, fi->cpt_priv, pi, ctx);
++		cpt_obj_setpos(obj, pos, ctx);
++
++		obj = lookup_cpt_object(CPT_OBJ_FILE, slave, ctx);
++		cpt_obj_setpos(obj, CPT_NULL, ctx);
++		get_file(master);
++		cpt_release_buf(ctx);
++		return master;
++	} else {
++		loff_t pos;
++		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
++		obj->o_parent = slave;
++		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
++		pty_setup(stty->link, fi->cpt_priv, pi, ctx);
++
++		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
++		obj->o_parent = master;
++		pos = find_pty_pair(stty, fi->cpt_priv, pi, ctx);
++		cpt_obj_setpos(obj, pos, ctx);
++
++		obj = lookup_cpt_object(CPT_OBJ_FILE, master, ctx);
++		cpt_obj_setpos(obj, CPT_NULL, ctx);
++		get_file(slave);
++		cpt_release_buf(ctx);
++		return slave;
++	}
++}
++
++int rst_tty_jobcontrol(struct cpt_context *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_TTY];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		cpt_object_t *obj;
++		struct cpt_tty_image *pibuf = cpt_get_buf(ctx);
++
++		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx)) {
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++
++		obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, sec, ctx);
++		if (obj) {
++			struct tty_struct *stty = obj->o_obj;
++			if ((int)pibuf->cpt_pgrp > 0) {
++				stty->pgrp = vpid_to_pid(pibuf->cpt_pgrp);
++				if (stty->pgrp == -1)
++					dprintk_ctx("unknown tty pgrp %d\n", pibuf->cpt_pgrp);
++			} else if (pibuf->cpt_pgrp) {
++				stty->pgrp = alloc_pidmap();
++				if (stty->pgrp < 0) {
++					eprintk_ctx("cannot allocate stray tty->pgrp");
++					cpt_release_buf(ctx);
++					return -EINVAL;
++				}
++				free_pidmap(stty->pgrp);
++			}
++			if ((int)pibuf->cpt_session > 0) {
++				int sess;
++				sess = vpid_to_pid(pibuf->cpt_session);
++				if (sess == -1) {
++					dprintk_ctx("unknown tty session %d\n", pibuf->cpt_session);
++				} else if (stty->session <= 0) {
++					stty->session = sess;
++				} else if (stty->session != sess) {
++					wprintk_ctx("tty session mismatch 2\n");
++				}
++			}
++		}
++		sec += pibuf->cpt_next;
++		cpt_release_buf(ctx);
++	}
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_ubc.c linux-2.6.16-026test015/kernel/cpt/rst_ubc.c
+--- linux-2.6.16.orig/kernel/cpt/rst_ubc.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_ubc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,108 @@
++/*
++ *
++ *  kernel/cpt/rst_ubc.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/types.h>
++#include <ub/beancounter.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++
++struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	obj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, pos, ctx);
++	if (obj == NULL) {
++		printk(KERN_ERR "RST: unknown ub @%Lu\n", pos);
++		return get_beancounter(get_exec_ub());
++	}
++	return get_beancounter(obj->o_obj);
++}
++
++static void restore_one_bc_parm(__u64 *dmp, struct ubparm *prm, int held)
++{
++	prm->barrier = (dmp[0] == CPT_NULL ? UB_MAXVALUE : dmp[0]);
++	prm->limit = (dmp[1] == CPT_NULL ? UB_MAXVALUE : dmp[1]);
++	if (held)
++		prm->held = dmp[2];
++	prm->maxheld = dmp[3];
++	prm->minheld = dmp[4];
++	prm->failcnt = dmp[5];
++}
++
++static int restore_one_bc(struct cpt_beancounter_image *v,
++		cpt_object_t *obj, struct cpt_context *ctx)
++{
++	struct user_beancounter *bc;
++	cpt_object_t *pobj;
++	int i;
++
++	if (v->cpt_parent != CPT_NULL) {
++		pobj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, v->cpt_parent, ctx);
++		if (pobj == NULL)
++			return -ESRCH;
++		bc = get_subbeancounter_byid(pobj->o_obj, v->cpt_id, 1);
++	} else {
++		bc = get_exec_ub();
++		while (bc->parent)
++			bc = bc->parent;
++		get_beancounter(bc);
++	}
++	if (bc == NULL)
++		return -ENOMEM;
++	obj->o_obj = bc;
++
++	for (i = 0; i < UB_RESOURCES; i++)
++		restore_one_bc_parm(v->cpt_parms, bc->ub_parms, 0);
++	for (i = 0; i < UB_RESOURCES; i++)
++		restore_one_bc_parm(v->cpt_parms + UB_RESOURCES * 6,
++				bc->ub_store, 1);
++	return 0;
++}
++
++int rst_undump_ubc(struct cpt_context *ctx)
++{
++	loff_t start, end;
++	struct cpt_beancounter_image *v;
++	cpt_object_t *obj;
++	int err;
++
++	err = rst_get_section(CPT_SECT_UBC, ctx, &start, &end);
++	if (err)
++		return err;
++
++	while (start < end) {
++		v = cpt_get_buf(ctx);
++		err = rst_get_object(CPT_OBJ_UBC, start, v, ctx);
++		if (err) {
++			cpt_release_buf(ctx);
++			return err;
++		}
++
++		obj = alloc_cpt_object(GFP_KERNEL, ctx);
++		cpt_obj_setpos(obj, start, ctx);
++		intern_cpt_object(CPT_OBJ_UBC, obj, ctx);
++
++		restore_one_bc(v, obj, ctx);
++
++		cpt_release_buf(ctx);
++		start += v->cpt_next;
++	}
++	return 0;
++}
++
++void rst_finish_ubc(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++
++	for_each_object(obj, CPT_OBJ_UBC)
++		put_beancounter(obj->o_obj);
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_undump.c linux-2.6.16-026test015/kernel/cpt/rst_undump.c
+--- linux-2.6.16.orig/kernel/cpt/rst_undump.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_undump.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,819 @@
++/*
++ *
++ *  kernel/cpt/rst_undump.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/pagemap.h>
++#include <linux/namespace.h>
++#include <linux/personality.h>
++#include <linux/binfmts.h>
++#include <linux/smp_lock.h>
++#include <linux/ve_proto.h>
++#include <linux/virtinfo.h>
++#include <linux/compat.h>
++#include <linux/vzcalluser.h>
++#include <ub/beancounter.h>
++#include <asm/desc.h>
++#include <asm/unistd.h>
++
++#include "cpt_obj.h"
++#include "cpt_context.h"
++#include "cpt_files.h"
++#include "cpt_mm.h"
++#include "cpt_process.h"
++#include "cpt_socket.h"
++#include "cpt_net.h"
++#include "cpt_ubc.h"
++#include "cpt_kernel.h"
++
++static int rst_utsname(cpt_context_t *ctx);
++
++
++struct thr_context {
++	struct completion init_complete;
++	struct completion task_done;
++	int error;
++	struct cpt_context *ctx;
++	cpt_object_t	*tobj;
++};
++
++static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx);
++
++static int vps_rst_veinfo(struct cpt_context *ctx)
++{
++	int err;
++	struct cpt_veinfo_image *i;
++	struct ve_struct *ve;
++	struct timespec delta;
++	loff_t start, end;
++
++	err = rst_get_section(CPT_SECT_VEINFO, ctx, &start, &end);
++	if (err)
++		goto out;
++
++	i = cpt_get_buf(ctx);
++	err = rst_get_object(CPT_OBJ_VEINFO, start, i, ctx);
++	if (err)
++		goto out_rel;
++
++	ve = get_exec_env();
++	ve->_shm_ctlall = i->shm_ctl_all;
++	ve->_shm_ctlmax = i->shm_ctl_max;
++	ve->_shm_ctlmni = i->shm_ctl_mni;
++
++	ve->_msg_ctlmax = i->msg_ctl_max;
++	ve->_msg_ctlmni = i->msg_ctl_mni;
++	ve->_msg_ctlmnb = i->msg_ctl_mnb;
++
++	BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i->sem_ctl_arr));
++	ve->_sem_ctls[0] = i->sem_ctl_arr[0];
++	ve->_sem_ctls[1] = i->sem_ctl_arr[1];
++	ve->_sem_ctls[2] = i->sem_ctl_arr[2];
++	ve->_sem_ctls[3] = i->sem_ctl_arr[3];
++
++	cpt_timespec_import(&delta, i->start_timespec_delta);
++	_set_normalized_timespec(&ve->start_timespec,
++			ve->start_timespec.tv_sec - delta.tv_sec,
++			ve->start_timespec.tv_nsec - delta.tv_nsec);
++	ve->start_jiffies -= i->start_jiffies_delta;
++	// // FIXME: what???
++	// // ve->start_cycles -= i->start_jiffies_delta * cycles_per_jiffy;
++
++	err = 0;
++out_rel:
++	cpt_release_buf(ctx);
++out:
++	return err;
++}
++
++static int vps_rst_reparent_root(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	int err;
++	struct env_create_param2 param;
++
++	ctx->cpt_jiffies64 = get_jiffies_64();
++	do_gettimespec(&ctx->delta_time);
++
++	ctx->delta_time.tv_sec -= ctx->start_time.tv_sec;
++	if (ctx->start_time.tv_nsec > ctx->delta_time.tv_nsec) {
++		ctx->delta_time.tv_sec--;
++		ctx->delta_time.tv_nsec = 1000000000 - (ctx->start_time.tv_nsec - ctx->delta_time.tv_nsec);
++	} else {
++		ctx->delta_time.tv_nsec -= ctx->start_time.tv_nsec;
++	}
++
++	memset(&param, 0, sizeof(param));
++	param.iptables_mask = ctx->iptables_mask;
++
++	err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK, 2, &param, sizeof(param));
++	if (err < 0)
++		eprintk_ctx("real_env_create: %d\n", err);
++	get_exec_env()->jiffies_fixup = ((ctx->delta_time.tv_sec < 0) ? 
++		0 : timespec_to_jiffies(&ctx->delta_time)) -
++		(unsigned long)(ctx->cpt_jiffies64 - ctx->virt_jiffies64);
++	return err < 0 ? err : 0;
++}
++
++
++static int hook(void *arg)
++{
++	struct thr_context *thr_ctx = arg;
++	struct cpt_context *ctx;
++	cpt_object_t *tobj;
++	struct cpt_task_image *ti;
++	int err = 0;
++
++	current->state = TASK_UNINTERRUPTIBLE;
++	complete(&thr_ctx->init_complete);
++	schedule();
++
++	ctx = thr_ctx->ctx;
++	tobj = thr_ctx->tobj;
++	ti = tobj->o_image;
++
++	current->fs->umask = 0;
++
++	if (ti->cpt_pid == 1) {
++		err = vps_rst_reparent_root(tobj, ctx);
++
++		if (err) {
++			rst_report_error(err, ctx);
++			goto out;
++		}
++
++		memcpy(&get_exec_env()->cap_default, &ti->cpt_ecap, sizeof(kernel_cap_t));
++
++		if (ctx->statusfile) {
++			fput(ctx->statusfile);
++			ctx->statusfile = NULL;
++		}
++
++		if (ctx->lockfile) {
++			mm_segment_t oldfs;
++			ssize_t err = -EINVAL;
++			char b;
++
++			oldfs = get_fs(); set_fs(KERNEL_DS);
++			if (ctx->lockfile->f_op && ctx->lockfile->f_op->read)
++				err = ctx->lockfile->f_op->read(ctx->lockfile, &b, 1, &ctx->lockfile->f_pos);
++			set_fs(oldfs);
++			fput(ctx->lockfile);
++			ctx->lockfile = NULL;
++		}
++
++		err = vps_rst_veinfo(ctx);
++		if (err) {
++			eprintk_ctx("rst_veinfo: %d\n", err);
++			goto out;
++		}
++
++		err = rst_utsname(ctx);
++		if (err) {
++			eprintk_ctx("rst_utsname: %d\n", err);
++			goto out;
++		}
++
++		err = rst_root_namespace(ctx);
++		if (err) {
++			eprintk_ctx("rst_namespace: %d\n", err);
++			goto out;
++		}
++
++		if ((err = rst_restore_net(ctx)) != 0) {
++			eprintk_ctx("rst_restore_net: %d\n", err);
++			goto out;
++		}
++
++		err = rst_sockets(ctx);
++		if (err) {
++			eprintk_ctx("rst_sockets: %d\n", err);
++			goto out;
++		}
++		err = rst_sysv_ipc(ctx);
++		if (err) {
++			eprintk_ctx("rst_sysv_ipc: %d\n", err);
++			goto out;
++		}
++	}
++
++	do {
++		if (current->user->uid != ti->cpt_user) {
++			struct user_struct *u = alloc_uid(ti->cpt_user);
++			if (!u) {
++				eprintk_ctx("alloc_user\n");
++			} else {
++				switch_uid(u);
++			}
++		}
++	} while (0);
++
++	if ((err = rst_mm_complete(ti, ctx)) != 0) {
++		eprintk_ctx("rst_mm: %d\n", err);
++		goto out;
++	}
++
++	if ((err = rst_files_complete(ti, ctx)) != 0) {
++		eprintk_ctx("rst_files: %d\n", err);
++		goto out;
++	}
++
++	if ((err = rst_fs_complete(ti, ctx)) != 0) {
++		eprintk_ctx("rst_fs: %d\n", err);
++		goto out;
++	}
++
++	if ((err = rst_semundo_complete(ti, ctx)) != 0) {
++		eprintk_ctx("rst_semundo: %d\n", err);
++		goto out;
++	}
++
++	if ((err = rst_signal_complete(ti, ctx)) != 0) {
++		eprintk_ctx("rst_signal: %d\n", err);
++		goto out;
++	}
++
++	if (ti->cpt_namespace == CPT_NULL)
++		exit_namespace(current);
++
++	if (ti->cpt_personality != 0)
++		__set_personality(ti->cpt_personality);
++
++	current->set_child_tid = NULL;
++	current->clear_child_tid = NULL;
++	current->flags &= ~(PF_FORKNOEXEC|PF_SUPERPRIV);
++	current->flags |= ti->cpt_flags&(PF_FORKNOEXEC|PF_SUPERPRIV);
++	current->exit_code = ti->cpt_exit_code;
++	current->pdeath_signal = ti->cpt_pdeath_signal;
++
++	if (ti->cpt_restart.fn != CPT_RBL_0) {
++		if (ti->cpt_restart.fn != CPT_RBL_NANOSLEEP
++		    && ti->cpt_restart.fn != CPT_RBL_COMPAT_NANOSLEEP
++		    ) {
++			eprintk_ctx("unknown restart block\n");
++		} else {
++			current->thread_info->restart_block.fn = nanosleep_restart;
++#ifdef CONFIG_X86_64
++			if (!ti->cpt_64bit)
++				current->thread_info->restart_block.fn = compat_nanosleep_restart;
++#endif
++			if (ctx->image_version != 0) {
++				current->thread_info->restart_block.arg0 = ti->cpt_restart.arg0;
++				current->thread_info->restart_block.arg1 = ti->cpt_restart.arg1;
++				current->thread_info->restart_block.arg2 = ti->cpt_restart.arg2;
++				current->thread_info->restart_block.arg3 = ti->cpt_restart.arg3;	
++				if (debug_level > 2) {
++					ktime_t e, e1;
++					struct timespec now;
++
++					do_posix_clock_monotonic_gettime(&now);
++					e = timespec_to_ktime(now);
++					e1.tv64 = ((u64)current->thread_info->restart_block.arg1 << 32) | (u64) current->thread_info->restart_block.arg0;
++					e = ktime_sub(e1, e);
++					dprintk("rst " CPT_FID " RBL %ld/%ld %Ld\n", CPT_TID(current),
++						current->thread_info->restart_block.arg1,
++						current->thread_info->restart_block.arg0, e.tv64);
++				}
++			} else {
++				struct timespec now;
++				ktime_t expire;
++				unsigned long val = ti->cpt_restart.arg0 -
++					timespec_to_jiffies(&ctx->delta_time);
++				if ((long)val <= 0)
++					val = 1;
++				do_posix_clock_monotonic_gettime(&now);
++				expire = ktime_add_ns(timespec_to_ktime(now), (u64)val*TICK_NSEC);
++				current->thread_info->restart_block.arg0 = expire.tv64 & 0xFFFFFFFF;
++				current->thread_info->restart_block.arg1 = expire.tv64 >> 32;
++				current->thread_info->restart_block.arg2 = ti->cpt_restart.arg1;
++				current->thread_info->restart_block.arg3 = CLOCK_MONOTONIC;	
++			}
++		}
++	}
++
++	if (thread_group_leader(current)) {
++		current->signal->it_real_incr.tv64 = 0;
++		if (ctx->image_version != 0) {
++			ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr);
++		} else {
++			ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr*TICK_NSEC);
++		}
++		current->signal->it_prof_incr = ti->cpt_it_prof_incr;
++		current->signal->it_virt_incr = ti->cpt_it_virt_incr; 
++		current->signal->it_prof_expires = ti->cpt_it_prof_value;
++		current->signal->it_virt_expires = ti->cpt_it_virt_value;
++	}
++
++	err = rst_clone_children(tobj, ctx);
++	if (err) {
++		eprintk_ctx("rst_clone_children\n");
++		goto out;
++	}
++
++	if (ti->cpt_pid == 1) {
++		if ((err = rst_process_linkage(ctx)) != 0) {
++			eprintk_ctx("rst_process_linkage: %d\n", err);
++			goto out;
++		}
++		if ((err = rst_do_filejobs(ctx)) != 0) {
++			eprintk_ctx("rst_do_filejobs: %d\n", err);
++			goto out;
++		}
++		if ((err = rst_eventpoll(ctx)) != 0) {
++			eprintk_ctx("rst_eventpoll: %d\n", err);
++			goto out;
++		}
++		if ((err = rst_sockets_complete(ctx)) != 0) {
++			eprintk_ctx("rst_sockets_complete: %d\n", err);
++			goto out;
++		}
++		if ((err = rst_stray_files(ctx)) != 0) {
++			eprintk_ctx("rst_stray_files: %d\n", err);
++			goto out;
++		}
++		if ((err = rst_posix_locks(ctx)) != 0) {
++			eprintk_ctx("rst_posix_locks: %d\n", err);
++			goto out;
++		}
++		if ((err = rst_tty_jobcontrol(ctx)) != 0) {
++			eprintk_ctx("rst_tty_jobcontrol: %d\n", err);
++			goto out;
++		}
++		if ((err = rst_restore_fs(ctx)) != 0) {
++			eprintk_ctx("rst_restore_fs: %d\n", err);
++			goto out;
++		}
++	}
++
++out:
++	thr_ctx->error = err;
++	lock_kernel();
++	complete(&thr_ctx->task_done);
++
++	if (!err && (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
++		preempt_disable();
++		current->exit_state = EXIT_ZOMBIE;
++		write_lock_irq(&tasklist_lock);
++		nr_zombie++;
++		write_unlock_irq(&tasklist_lock);
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
++		atomic_dec(&current->signal->live);
++#endif
++		current->flags |= PF_DEAD;
++		if (!(ti->cpt_flags&PF_DEAD))
++			wprintk_ctx("zombie %d,%d(%s) is not pf_dead\n", current->pid, virt_pid(current), current->comm);
++		module_put(current->thread_info->exec_domain->module);
++		if (current->binfmt)
++			module_put(current->binfmt->module);
++	} else {
++		__set_current_state(TASK_UNINTERRUPTIBLE);
++	}
++
++	schedule();
++
++	dprintk_ctx("leaked through %d/%d %p\n", current->pid, virt_pid(current), current->mm);
++
++	module_put(THIS_MODULE);
++	complete_and_exit(NULL, 0);
++	return 0;
++}
++
++#if 0
++static void set_task_ubs(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	struct task_beancounter *tbc;
++
++	tbc = task_bc(current);
++
++	put_beancounter(tbc->fork_sub);
++	tbc->fork_sub = rst_lookup_ubc(ti->cpt_task_ub, ctx);
++	if (ti->cpt_mm_ub != CPT_NULL) {
++		put_beancounter(tbc->exec_ub);
++		tbc->exec_ub = rst_lookup_ubc(ti->cpt_mm_ub, ctx);
++	}
++}
++#endif
++
++static int create_root_task(cpt_object_t *obj, struct cpt_context *ctx,
++		struct thr_context *thr_ctx)
++{
++	task_t *tsk;
++	int pid;
++
++	thr_ctx->ctx = ctx;
++	thr_ctx->error = 0;
++	init_completion(&thr_ctx->init_complete);
++	init_completion(&thr_ctx->task_done);
++#if 0
++	set_task_ubs(obj->o_image, ctx);
++#endif
++
++	pid = local_kernel_thread(hook, thr_ctx, 0, 0);
++	if (pid < 0)
++		return pid;
++	read_lock(&tasklist_lock);
++	tsk = find_task_by_pid_ve(pid);
++	if (tsk)
++		get_task_struct(tsk);
++	read_unlock(&tasklist_lock);
++	if (tsk == NULL)
++		return -ESRCH;
++	cpt_obj_setobj(obj, tsk, ctx);
++	thr_ctx->tobj = obj;
++	return 0;
++}
++
++static int rst_basic_init_task(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	task_t *tsk = obj->o_obj;
++	struct cpt_task_image *ti = obj->o_image;
++
++	memcpy(tsk->comm, ti->cpt_comm, sizeof(tsk->comm));
++	rst_mm_basic(obj, ti, ctx);
++	return 0;
++}
++
++static int make_baby(cpt_object_t *cobj,
++		     struct cpt_task_image *pi,
++		     struct cpt_context *ctx)
++{
++	unsigned long flags;
++	struct cpt_task_image *ci = cobj->o_image;
++	struct thr_context thr_ctx;
++	task_t *tsk;
++	pid_t pid;
++
++	flags = rst_mm_flag(ci, ctx) | rst_files_flag(ci, ctx)
++		| rst_signal_flag(ci, ctx) | rst_semundo_flag(ci, ctx);
++	if (ci->cpt_rppid != pi->cpt_pid) {
++		flags |= CLONE_THREAD|CLONE_PARENT;
++		if (ci->cpt_signal != pi->cpt_signal ||
++		    !(flags&CLONE_SIGHAND) ||
++		    (!(flags&CLONE_VM) && pi->cpt_mm != CPT_NULL)) {
++			eprintk_ctx("something is wrong with threads: %d %d %d %Ld %Ld %08lx\n",
++			       (int)ci->cpt_pid, (int)ci->cpt_rppid, (int)pi->cpt_pid,
++			       ci->cpt_signal, pi->cpt_signal, flags
++			       );
++			return -EINVAL;
++		}
++	}
++
++	thr_ctx.ctx = ctx;
++	thr_ctx.error = 0;
++	init_completion(&thr_ctx.init_complete);
++	init_completion(&thr_ctx.task_done);
++	thr_ctx.tobj = cobj;
++
++#if 0
++	set_task_ubs(ci, ctx);
++#endif
++
++	pid = local_kernel_thread(hook, &thr_ctx, flags, ci->cpt_pid);
++	if (pid < 0)
++		return pid;
++
++	read_lock(&tasklist_lock);
++	tsk = find_task_by_pid_ve(pid);
++	if (tsk)
++		get_task_struct(tsk);
++	read_unlock(&tasklist_lock);
++	if (tsk == NULL)
++		return -ESRCH;
++	cpt_obj_setobj(cobj, tsk, ctx);
++	thr_ctx.tobj = cobj;
++	wait_for_completion(&thr_ctx.init_complete);
++#ifdef CONFIG_SMP
++	wait_task_inactive(cobj->o_obj);
++#endif
++	rst_basic_init_task(cobj, ctx);
++
++	/* clone() increases group_stop_count if it was not zero and
++	 * CLONE_THREAD was asked. Undo.
++	 */
++	if (current->signal->group_stop_count && (flags & CLONE_THREAD)) {
++		if (tsk->signal != current->signal) BUG();
++		current->signal->group_stop_count--;
++	}
++
++	wake_up_process(tsk);
++	wait_for_completion(&thr_ctx.task_done);
++	wait_task_inactive(tsk);
++
++	return thr_ctx.error;
++}
++
++static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx)
++{
++	int err = 0;
++	struct cpt_task_image *ti = obj->o_image;
++	cpt_object_t *cobj;
++
++	for_each_object(cobj, CPT_OBJ_TASK) {
++		struct cpt_task_image *ci = cobj->o_image;
++		if (cobj == obj)
++			continue;
++		if ((ci->cpt_rppid == ti->cpt_pid && ci->cpt_tgid == ci->cpt_pid) ||
++		    (ci->cpt_leader == ti->cpt_pid &&
++		     ci->cpt_tgid != ci->cpt_pid && ci->cpt_pid != 1)) {
++			err = make_baby(cobj, ti, ctx);
++			if (err) {
++				eprintk_ctx("make_baby: %d\n", err);
++				return err;
++			}
++		}
++	}
++	return 0;
++}
++
++static int read_task_images(struct cpt_context *ctx)
++{
++	int err;
++	loff_t start, end;
++
++	err = rst_get_section(CPT_SECT_TASKS, ctx, &start, &end);
++	if (err)
++		return err;
++
++	while (start < end) {
++		cpt_object_t *obj;
++		struct cpt_task_image *ti = cpt_get_buf(ctx);
++
++		err = rst_get_object(CPT_OBJ_TASK, start, ti, ctx);
++		if (err) {
++			cpt_release_buf(ctx);
++			return err;
++		}
++		if (ti->cpt_pid != 1 && !__is_virtual_pid(ti->cpt_pid)) {
++			eprintk_ctx("BUG: pid %d is not virtual\n", ti->cpt_pid);
++			cpt_release_buf(ctx);
++			return -EINVAL;
++		}
++		obj = alloc_cpt_object(GFP_KERNEL, ctx);
++		cpt_obj_setpos(obj, start, ctx);
++		intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
++		obj->o_image = kmalloc(ti->cpt_next, GFP_KERNEL);
++		if (obj->o_image == NULL) {
++			cpt_release_buf(ctx);
++			return -ENOMEM;
++		}
++		memcpy(obj->o_image, ti, sizeof(*ti));
++		err = ctx->pread(obj->o_image + sizeof(*ti),
++				 ti->cpt_next - sizeof(*ti), ctx, start + sizeof(*ti));
++		cpt_release_buf(ctx);
++		if (err)
++			return err;
++		start += ti->cpt_next;
++	}
++	return 0;
++}
++
++
++static int vps_rst_restore_tree(struct cpt_context *ctx)
++{
++	int err;
++	cpt_object_t *obj;
++	struct thr_context thr_ctx_root;
++
++	err = read_task_images(ctx);
++	if (err)
++		return err;
++
++	err = rst_undump_ubc(ctx);
++	if (err)
++		return err;
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		err = create_root_task(obj, ctx, &thr_ctx_root);
++		if (err)
++			return err;
++
++		wait_for_completion(&thr_ctx_root.init_complete);
++#ifdef CONFIG_SMP
++		wait_task_inactive(obj->o_obj);
++#endif
++		rst_basic_init_task(obj, ctx);
++
++		wake_up_process(obj->o_obj);
++		wait_for_completion(&thr_ctx_root.task_done);
++		wait_task_inactive(obj->o_obj);
++		err = thr_ctx_root.error;
++		if (err)
++			return err;
++		break;
++	}
++
++	return err;
++}
++
++
++int vps_rst_undump(struct cpt_context *ctx)
++{
++	int err;
++	unsigned long umask;
++
++	err = rst_open_dumpfile(ctx);
++	if (err)
++		return err;
++
++#ifndef CONFIG_X86_64
++	if (ctx->tasks64) {
++		eprintk_ctx("Cannot restore 64 bit VE on this architecture\n");
++		return -EINVAL;
++	}
++#endif
++
++	umask = current->fs->umask;
++	current->fs->umask = 0;
++
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	err = rst_setup_pagein(ctx);
++#endif
++
++	if (err == 0)
++		err = vps_rst_restore_tree(ctx);
++
++	if (err == 0)
++		err = rst_restore_process(ctx);
++
++	current->fs->umask = umask;
++
++        return err;
++}
++
++static int rst_unlock_ve(struct cpt_context *ctx)
++{
++	struct ve_struct *env;
++
++	env = get_ve_by_id(ctx->ve_id);
++	if (!env)
++		return -ESRCH;
++	down_write(&env->op_sem);
++	env->is_locked = 0;
++	up_write(&env->op_sem);
++	put_ve(env);
++	return 0;
++}
++
++int rst_resume(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++	int err = 0;
++
++	for_each_object(obj, CPT_OBJ_FILE) {
++		struct file *file = obj->o_obj;
++
++		fput(file);
++	}
++
++	rst_resume_network(ctx);
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++		struct cpt_task_image *ti = obj->o_image;
++
++		if (!tsk)
++			continue;
++
++		if (ti->cpt_state == TASK_UNINTERRUPTIBLE) {
++			dprintk_ctx("task %d/%d(%s) is started\n", virt_pid(tsk), tsk->pid, tsk->comm);
++
++			/* Weird... If a signal is sent to stopped task,
++			 * nobody makes recalc_sigpending(). We have to do
++			 * this by hands after wake_up_process().
++			 * if we did this before a signal could arrive before
++			 * wake_up_process() and stall.
++			 */
++			spin_lock_irq(&tsk->sighand->siglock);
++			if (!signal_pending(tsk))
++				recalc_sigpending_tsk(tsk);
++			spin_unlock_irq(&tsk->sighand->siglock);
++
++			wake_up_process(tsk);
++		} else {
++			if (ti->cpt_state == TASK_STOPPED ||
++			    ti->cpt_state == TASK_TRACED) {
++				set_task_state(tsk, ti->cpt_state);
++			}
++		}
++		put_task_struct(tsk);
++	}
++
++	rst_unlock_ve(ctx);
++
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	rst_complete_pagein(ctx, 0);
++#endif
++
++	rst_finish_ubc(ctx);
++	cpt_object_destroy(ctx);
++
++        return err;
++}
++
++int rst_kill(struct cpt_context *ctx)
++{
++	cpt_object_t *obj;
++	int err = 0;
++
++	for_each_object(obj, CPT_OBJ_FILE) {
++		struct file *file = obj->o_obj;
++
++		fput(file);
++	}
++
++	for_each_object(obj, CPT_OBJ_TASK) {
++		task_t *tsk = obj->o_obj;
++
++		if (tsk == NULL)
++			continue;
++
++		if (tsk->exit_state == 0) {
++			send_sig(SIGKILL, tsk, 1);
++
++			spin_lock_irq(&tsk->sighand->siglock);
++			sigfillset(&tsk->blocked);
++			sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
++			set_tsk_thread_flag(tsk, TIF_SIGPENDING);
++			clear_tsk_thread_flag(tsk, TIF_FREEZE);
++			if (tsk->flags & PF_FROZEN)
++				tsk->flags &= ~PF_FROZEN;
++			spin_unlock_irq(&tsk->sighand->siglock);
++
++			wake_up_process(tsk);
++		}
++
++		put_task_struct(tsk);
++	}
++
++#ifdef CONFIG_VZ_CHECKPOINT_LAZY
++	rst_complete_pagein(ctx, 1);
++#endif
++
++	rst_finish_ubc(ctx);
++	cpt_object_destroy(ctx);
++
++        return err;
++}
++
++static int rst_utsname(cpt_context_t *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_UTSNAME];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++	struct cpt_object_hdr o;
++	int i;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_UTSNAME || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	i = 0;
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	while (sec < endsec) {
++		int len;
++		char *ptr;
++		err = rst_get_object(CPT_OBJ_NAME, sec, &o, ctx);
++		if (err)
++			return err;
++		len = o.cpt_next - o.cpt_hdrlen;
++		if (len > __NEW_UTS_LEN+1)
++			return -ENAMETOOLONG;
++		switch (i) {
++		case 0:
++			ptr = ve_utsname.nodename; break;
++		case 1:
++			ptr = ve_utsname.domainname; break;
++		default:
++			return -EINVAL;
++		}
++		err = ctx->pread(ptr, len, ctx, sec+o.cpt_hdrlen);
++		if (err)
++			return err;
++		i++;
++		sec += o.cpt_next;
++	}
++
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/cpt/rst_x8664.S linux-2.6.16-026test015/kernel/cpt/rst_x8664.S
+--- linux-2.6.16.orig/kernel/cpt/rst_x8664.S	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/cpt/rst_x8664.S	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,61 @@
++#define ASSEMBLY 1
++	
++#include <linux/config.h>
++#include <linux/linkage.h>
++#include <asm/segment.h>
++#include <asm/smp.h>
++#include <asm/cache.h>
++#include <asm/errno.h>
++#include <asm/dwarf2.h>
++#include <asm/calling.h>
++#include <asm/msr.h>
++#include <asm/unistd.h>
++#include <asm/thread_info.h>
++#include <asm/hw_irq.h>
++#include <asm/errno.h>
++#include <asm/asm-offsets.h>
++
++	.code64
++	.global schedule_tail_hook, schedule_tail_p
++	.align 8
++schedule_tail_hook:
++	movq schedule_tail_p(%rip),%r11
++	call *%r11
++	GET_THREAD_INFO(%rcx)
++	btr $22,threadinfo_flags(%rcx)	/* TIF_RESUME */
++	jc  1f
++	retq
++
++	/* If TIF_RESUME is set, (%rsp) is pointer to hook function
++	 * the hook will do the work and jump to the next hook,
++	 * everything should end at ret_from_fork+5.
++	 */
++1:	addq $8,%rsp
++	retq
++
++	.align 8
++	.global ret_from_fork2
++ret_from_fork2:
++	cmpq $0,ORIG_RAX(%rsp)
++	jge  ret_from_fork+5
++	RESTORE_REST
++	jmp  int_ret_from_sys_call	
++
++	.align 8
++	.global ret_last_siginfo
++ret_last_siginfo:
++	call rlsi
++	movq %rax,%rsp
++	retq
++
++	.align 8
++	.global ret_child_tid
++ret_child_tid:
++	movq %rsp,%rdi
++	call rct
++	movq %rax,%rsp
++	retq
++	
++	.data
++schedule_tail_p:
++	.quad	0
+diff -upr linux-2.6.16.orig/kernel/cpu.c linux-2.6.16-026test015/kernel/cpu.c
+--- linux-2.6.16.orig/kernel/cpu.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/cpu.c	2006-07-04 14:41:39.000000000 +0400
+@@ -21,6 +21,11 @@ static DECLARE_MUTEX(cpucontrol);
+ static struct notifier_block *cpu_chain;
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
++
++#ifdef CONFIG_SCHED_VCPU
++#error "CONFIG_HOTPLUG_CPU isn't supported with CONFIG_SCHED_VCPU"
++#endif
++
+ static struct task_struct *lock_cpu_hotplug_owner;
+ static int lock_cpu_hotplug_depth;
+ 
+@@ -95,8 +100,8 @@ static inline void check_for_tasks(int c
+ 	struct task_struct *p;
+ 
+ 	write_lock_irq(&tasklist_lock);
+-	for_each_process(p) {
+-		if (task_cpu(p) == cpu &&
++	for_each_process_all(p) {
++		if (task_pcpu(p) == cpu &&
+ 		    (!cputime_eq(p->utime, cputime_zero) ||
+ 		     !cputime_eq(p->stime, cputime_zero)))
+ 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+@@ -106,6 +111,13 @@ static inline void check_for_tasks(int c
+ 	write_unlock_irq(&tasklist_lock);
+ }
+ 
++#ifdef CONFIG_SCHED_VCPU
++#error VCPU vs. HOTPLUG: fix hotplug code below
++/*
++ * What should be fixed:
++ * - check for if (idle_cpu()) yield()
++ */
++#endif
+ /* Take this CPU down. */
+ static int take_cpu_down(void *unused)
+ {
+diff -upr linux-2.6.16.orig/kernel/cpuset.c linux-2.6.16-026test015/kernel/cpuset.c
+--- linux-2.6.16.orig/kernel/cpuset.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/cpuset.c	2006-07-04 14:41:38.000000000 +0400
+@@ -897,7 +897,7 @@ static int update_nodemask(struct cpuset
+ 	n = 0;
+ 
+ 	/* Load up mmarray[] with mm reference for each task in cpuset. */
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		struct mm_struct *mm;
+ 
+ 		if (n >= ntasks) {
+@@ -911,7 +911,7 @@ static int update_nodemask(struct cpuset
+ 		if (!mm)
+ 			continue;
+ 		mmarray[n++] = mm;
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 	write_unlock_irq(&tasklist_lock);
+ 
+ 	/*
+@@ -1125,7 +1125,7 @@ static int attach_task(struct cpuset *cs
+ 	if (pid) {
+ 		read_lock(&tasklist_lock);
+ 
+-		tsk = find_task_by_pid(pid);
++		tsk = find_task_by_pid_all(pid);
+ 		if (!tsk || tsk->flags & PF_EXITING) {
+ 			read_unlock(&tasklist_lock);
+ 			return -ESRCH;
+@@ -1561,13 +1561,13 @@ static int pid_array_load(pid_t *pidarra
+ 
+ 	read_lock(&tasklist_lock);
+ 
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		if (p->cpuset == cs) {
+ 			pidarray[n++] = p->pid;
+ 			if (unlikely(n == npids))
+ 				goto array_full;
+ 		}
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 
+ array_full:
+ 	read_unlock(&tasklist_lock);
+diff -upr linux-2.6.16.orig/kernel/exec_domain.c linux-2.6.16-026test015/kernel/exec_domain.c
+--- linux-2.6.16.orig/kernel/exec_domain.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/exec_domain.c	2006-07-04 14:41:36.000000000 +0400
+@@ -140,6 +140,7 @@ __set_personality(u_long personality)
+ 	ep = lookup_exec_domain(personality);
+ 	if (ep == current_thread_info()->exec_domain) {
+ 		current->personality = personality;
++		module_put(ep->module);
+ 		return 0;
+ 	}
+ 
+diff -upr linux-2.6.16.orig/kernel/exit.c linux-2.6.16-026test015/kernel/exit.c
+--- linux-2.6.16.orig/kernel/exit.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/exit.c	2006-07-04 14:41:39.000000000 +0400
+@@ -42,7 +42,7 @@ extern struct task_struct *child_reaper;
+ 
+ int getrusage(struct task_struct *, int, struct rusage __user *);
+ 
+-static void exit_mm(struct task_struct * tsk);
++void exit_mm(struct task_struct * tsk);
+ 
+ static void __unhash_process(struct task_struct *p)
+ {
+@@ -57,18 +57,19 @@ static void __unhash_process(struct task
+ 	}
+ 
+ 	REMOVE_LINKS(p);
++	REMOVE_VE_LINKS(p);
+ }
+ 
+ void release_task(struct task_struct * p)
+ {
+ 	int zap_leader;
+ 	task_t *leader;
+-	struct dentry *proc_dentry;
++	struct dentry *proc_dentry[2];
+ 
+ repeat: 
+ 	atomic_dec(&p->user->processes);
+ 	spin_lock(&p->proc_lock);
+-	proc_dentry = proc_pid_unhash(p);
++	proc_pid_unhash(p, proc_dentry);
+ 	write_lock_irq(&tasklist_lock);
+ 	if (unlikely(p->ptrace))
+ 		__ptrace_unlink(p);
+@@ -80,6 +81,8 @@ repeat: 
+ 	 * the process by __unhash_process.
+ 	 */
+ 	__unhash_process(p);
++	nr_zombie--;
++	atomic_inc(&nr_dead);
+ 
+ 	/*
+ 	 * If we are the last non-leader member of the thread
+@@ -107,6 +110,10 @@ repeat: 
+ 	spin_unlock(&p->proc_lock);
+ 	proc_pid_flush(proc_dentry);
+ 	release_thread(p);
++#ifdef CONFIG_VE
++	if (atomic_dec_and_test(&VE_TASK_INFO(p)->owner_env->pcounter))
++		do_env_cleanup(VE_TASK_INFO(p)->owner_env);
++#endif
+ 	put_task_struct(p);
+ 
+ 	p = leader;
+@@ -118,10 +125,10 @@ repeat: 
+ 
+ void unhash_process(struct task_struct *p)
+ {
+-	struct dentry *proc_dentry;
++	struct dentry *proc_dentry[2];
+ 
+ 	spin_lock(&p->proc_lock);
+-	proc_dentry = proc_pid_unhash(p);
++	proc_pid_unhash(p, proc_dentry);
+ 	write_lock_irq(&tasklist_lock);
+ 	__unhash_process(p);
+ 	write_unlock_irq(&tasklist_lock);
+@@ -139,14 +146,16 @@ int session_of_pgrp(int pgrp)
+ 	struct task_struct *p;
+ 	int sid = -1;
+ 
++	WARN_ON(is_virtual_pid(pgrp));
++
+ 	read_lock(&tasklist_lock);
+-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ 		if (p->signal->session > 0) {
+ 			sid = p->signal->session;
+ 			goto out;
+ 		}
+-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+-	p = find_task_by_pid(pgrp);
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
++	p = find_task_by_pid_ve(pgrp);
+ 	if (p)
+ 		sid = p->signal->session;
+ out:
+@@ -168,17 +177,19 @@ static int will_become_orphaned_pgrp(int
+ 	struct task_struct *p;
+ 	int ret = 1;
+ 
+-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
++	WARN_ON(is_virtual_pid(pgrp));
++
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ 		if (p == ignored_task
+ 				|| p->exit_state
+-				|| p->real_parent->pid == 1)
++				|| virt_pid(p->real_parent) == 1)
+ 			continue;
+ 		if (process_group(p->real_parent) != pgrp
+ 			    && p->real_parent->signal->session == p->signal->session) {
+ 			ret = 0;
+ 			break;
+ 		}
+-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ 	return ret;	/* (sighing) "Often!" */
+ }
+ 
+@@ -186,6 +197,8 @@ int is_orphaned_pgrp(int pgrp)
+ {
+ 	int retval;
+ 
++	WARN_ON(is_virtual_pid(pgrp));
++
+ 	read_lock(&tasklist_lock);
+ 	retval = will_become_orphaned_pgrp(pgrp, NULL);
+ 	read_unlock(&tasklist_lock);
+@@ -198,7 +211,7 @@ static int has_stopped_jobs(int pgrp)
+ 	int retval = 0;
+ 	struct task_struct *p;
+ 
+-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ 		if (p->state != TASK_STOPPED)
+ 			continue;
+ 
+@@ -214,7 +227,7 @@ static int has_stopped_jobs(int pgrp)
+ 
+ 		retval = 1;
+ 		break;
+-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ 	return retval;
+ }
+ 
+@@ -263,6 +276,9 @@ void __set_special_pids(pid_t session, p
+ {
+ 	struct task_struct *curr = current->group_leader;
+ 
++	WARN_ON(is_virtual_pid(pgrp));
++	WARN_ON(is_virtual_pid(session));
++
+ 	if (curr->signal->session != session) {
+ 		detach_pid(curr, PIDTYPE_SID);
+ 		curr->signal->session = session;
+@@ -281,6 +297,7 @@ void set_special_pids(pid_t session, pid
+ 	__set_special_pids(session, pgrp);
+ 	write_unlock_irq(&tasklist_lock);
+ }
++EXPORT_SYMBOL(set_special_pids);
+ 
+ /*
+  * Let kernel threads use this to say that they
+@@ -500,7 +517,7 @@ EXPORT_SYMBOL_GPL(exit_fs);
+  * Turn us into a lazy TLB process if we
+  * aren't already..
+  */
+-static void exit_mm(struct task_struct * tsk)
++void exit_mm(struct task_struct * tsk)
+ {
+ 	struct mm_struct *mm = tsk->mm;
+ 
+@@ -535,6 +552,7 @@ static void exit_mm(struct task_struct *
+ 	task_unlock(tsk);
+ 	mmput(mm);
+ }
++EXPORT_SYMBOL_GPL(exit_mm);
+ 
+ static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
+ {
+@@ -613,13 +631,12 @@ static void reparent_thread(task_t *p, t
+ static void forget_original_parent(struct task_struct * father,
+ 					  struct list_head *to_release)
+ {
+-	struct task_struct *p, *reaper = father;
++	struct task_struct *p, *tsk_reaper, *reaper = father;
+ 	struct list_head *_p, *_n;
+ 
+ 	do {
+ 		reaper = next_thread(reaper);
+ 		if (reaper == father) {
+-			reaper = child_reaper;
+ 			break;
+ 		}
+ 	} while (reaper->exit_state);
+@@ -641,9 +658,16 @@ static void forget_original_parent(struc
+ 		/* if father isn't the real parent, then ptrace must be enabled */
+ 		BUG_ON(father != p->real_parent && !ptrace);
+ 
++		tsk_reaper = reaper;
++		if (tsk_reaper == father)
++#ifdef CONFIG_VE
++			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
++		if (tsk_reaper == p)
++#endif
++			tsk_reaper = child_reaper;
+ 		if (father == p->real_parent) {
+-			/* reparent with a reaper, real father it's us */
+-			choose_new_parent(p, reaper, child_reaper);
++			/* reparent with a tsk_reaper, real father it's us */
++			choose_new_parent(p, tsk_reaper, child_reaper);
+ 			reparent_thread(p, father, 0);
+ 		} else {
+ 			/* reparent ptraced task to its real parent */
+@@ -664,7 +688,15 @@ static void forget_original_parent(struc
+ 	}
+ 	list_for_each_safe(_p, _n, &father->ptrace_children) {
+ 		p = list_entry(_p,struct task_struct,ptrace_list);
+-		choose_new_parent(p, reaper, child_reaper);
++
++		tsk_reaper = reaper;
++		if (tsk_reaper == father)
++#ifdef CONFIG_VE
++			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
++		if (tsk_reaper == p)
++#endif
++			tsk_reaper = child_reaper;
++		choose_new_parent(p, tsk_reaper, child_reaper);
+ 		reparent_thread(p, father, 1);
+ 	}
+ }
+@@ -760,6 +792,9 @@ static void exit_notify(struct task_stru
+ 	    && !capable(CAP_KILL))
+ 		tsk->exit_signal = SIGCHLD;
+ 
++	if (tsk->exit_signal != -1 && t == child_reaper)
++		/* We dont want people slaying init. */
++		tsk->exit_signal = SIGCHLD;
+ 
+ 	/* If something other than our normal parent is ptracing us, then
+ 	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
+@@ -778,6 +813,7 @@ static void exit_notify(struct task_stru
+ 	     unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT)))
+ 		state = EXIT_DEAD;
+ 	tsk->exit_state = state;
++	nr_zombie++;
+ 
+ 	write_unlock_irq(&tasklist_lock);
+ 
+@@ -792,6 +828,82 @@ static void exit_notify(struct task_stru
+ 		release_task(tsk);
+ }
+ 
++#ifdef CONFIG_VE
++/*
++ * Handle exitting of init process, it's a special case for VE.
++ */
++static void do_initproc_exit(void)
++{
++	struct task_struct *tsk;
++	struct ve_struct *env;
++	struct siginfo info;
++	struct task_struct *g, *p;
++	long delay = 1L;
++
++	tsk = current;
++	env = VE_TASK_INFO(current)->owner_env;
++	if (env->init_entry != tsk)
++		return;
++
++	if (ve_is_super(env) && tsk->pid == 1)
++		panic("Attempted to kill init!");
++
++	memset(&info, 0, sizeof(info));
++	info.si_errno = 0;
++	info.si_code = SI_KERNEL;
++	info.si_pid = virt_pid(tsk);
++	info.si_uid = current->uid;
++	info.si_signo = SIGKILL;
++
++	/*
++	 * Here the VE changes its state into "not running".
++	 * op_sem taken for write is a barrier to all VE manipulations from
++	 * ioctl: it waits for operations currently in progress and blocks all
++	 * subsequent operations until is_running is set to 0 and op_sem is
++	 * released.
++	 */
++	down_write(&env->op_sem);
++	env->is_running = 0;
++	up_write(&env->op_sem);
++
++	/* send kill to all processes of VE */
++	read_lock(&tasklist_lock);
++	do_each_thread_ve(g, p) {
++		force_sig_info(SIGKILL, &info, p);
++	} while_each_thread_ve(g, p);
++	read_unlock(&tasklist_lock);
++
++	/* wait for all init childs exit */
++	while (atomic_read(&env->pcounter) > 1) {
++		if (sys_wait4(-1, NULL, __WALL | WNOHANG, NULL) > 0)
++			continue;
++		/* it was ENOCHLD or no more children somehow */
++		if (atomic_read(&env->pcounter) == 1)
++			break;
++
++		/* clear all signals to avoid wakeups */
++		if (signal_pending(tsk))
++			flush_signals(tsk);
++		/* we have child without signal sent */
++		__set_current_state(TASK_INTERRUPTIBLE);
++		schedule_timeout(delay);
++		delay = (delay < HZ) ? (delay << 1) : HZ;
++		read_lock(&tasklist_lock);
++		do_each_thread_ve(g, p) {
++			if (p != tsk)
++				force_sig_info(SIGKILL, &info, p);
++		} while_each_thread_ve(g, p);
++		read_unlock(&tasklist_lock);
++	}
++	env->init_entry = child_reaper;
++	write_lock_irq(&tasklist_lock);
++	REMOVE_LINKS(tsk);
++	tsk->parent = tsk->real_parent = child_reaper;
++	SET_LINKS(tsk);
++	write_unlock_irq(&tasklist_lock);
++}
++#endif
++
+ fastcall NORET_TYPE void do_exit(long code)
+ {
+ 	struct task_struct *tsk = current;
+@@ -805,14 +917,20 @@ fastcall NORET_TYPE void do_exit(long co
+ 		panic("Aiee, killing interrupt handler!");
+ 	if (unlikely(!tsk->pid))
+ 		panic("Attempted to kill the idle task!");
++#ifdef CONFIG_VE
++	do_initproc_exit();
++#else
+ 	if (unlikely(tsk->pid == 1))
+ 		panic("Attempted to kill init!");
++#endif
+ 	if (tsk->io_context)
+ 		exit_io_context();
+ 
+ 	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
+ 		current->ptrace_message = code;
++		set_pn_state(current, PN_STOP_EXIT);
+ 		ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
++		clear_pn_state(current);
+ 	}
+ 
+ 	/*
+@@ -828,14 +946,6 @@ fastcall NORET_TYPE void do_exit(long co
+ 
+ 	tsk->flags |= PF_EXITING;
+ 
+-	/*
+-	 * Make sure we don't try to process any timer firings
+-	 * while we are already exiting.
+-	 */
+- 	tsk->it_virt_expires = cputime_zero;
+- 	tsk->it_prof_expires = cputime_zero;
+-	tsk->it_sched_expires = 0;
+-
+ 	if (unlikely(in_atomic()))
+ 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
+ 				current->comm, current->pid,
+@@ -911,7 +1021,14 @@ asmlinkage long sys_exit(int error_code)
+ 
+ task_t fastcall *next_thread(const task_t *p)
+ {
+-	return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
++	task_t *tsk;
++
++	tsk = pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
++#ifdef CONFIG_VE
++	/* all threads should belong to ONE ve! */
++	BUG_ON(VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(p)->owner_env);
++#endif
++	return tsk;
+ }
+ 
+ EXPORT_SYMBOL(next_thread);
+@@ -960,14 +1077,19 @@ asmlinkage void sys_exit_group(int error
+ static int eligible_child(pid_t pid, int options, task_t *p)
+ {
+ 	if (pid > 0) {
+-		if (p->pid != pid)
++		if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
+ 			return 0;
+ 	} else if (!pid) {
+ 		if (process_group(p) != process_group(current))
+ 			return 0;
+ 	} else if (pid != -1) {
+-		if (process_group(p) != -pid)
+-			return 0;
++		if (__is_virtual_pid(-pid)) {
++			if (virt_pgid(p) != -pid)
++				return 0;
++		} else {
++			if (process_group(p) != -pid)
++				return 0;
++		}
+ 	}
+ 
+ 	/*
+@@ -1157,7 +1279,7 @@ static int wait_task_zombie(task_t *p, i
+ 		p->exit_state = EXIT_ZOMBIE;
+ 		return retval;
+ 	}
+-	retval = p->pid;
++	retval = get_task_pid(p);
+ 	if (p->real_parent != p->parent) {
+ 		write_lock_irq(&tasklist_lock);
+ 		/* Double-check with lock held.  */
+@@ -1292,7 +1414,7 @@ bail_ref:
+ 	if (!retval && infop)
+ 		retval = put_user(p->uid, &infop->si_uid);
+ 	if (!retval)
+-		retval = p->pid;
++		retval = get_task_pid(p);
+ 	put_task_struct(p);
+ 
+ 	BUG_ON(!retval);
+@@ -1574,6 +1696,7 @@ asmlinkage long sys_wait4(pid_t pid, int
+ 	prevent_tail_call(ret);
+ 	return ret;
+ }
++EXPORT_SYMBOL_GPL(sys_wait4);
+ 
+ #ifdef __ARCH_WANT_SYS_WAITPID
+ 
+diff -upr linux-2.6.16.orig/kernel/fairsched.c linux-2.6.16-026test015/kernel/fairsched.c
+--- linux-2.6.16.orig/kernel/fairsched.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/fairsched.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,1288 @@
++/*
++ * Fair Scheduler
++ *
++ * Copyright (C) 2000-2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * Start-tag scheduling follows the theory presented in
++ * http://www.cs.utexas.edu/users/dmcl/papers/ps/SIGCOMM96.ps
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <asm/timex.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/sysctl.h>
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/sched.h>
++#include <linux/fairsched.h>
++#include <linux/vsched.h>
++
++/* we need it for vsched routines in sched.c */
++spinlock_t fairsched_lock = SPIN_LOCK_UNLOCKED;
++
++#ifdef CONFIG_FAIRSCHED
++
++#define FAIRSHED_DEBUG		" debug"
++
++
++/*********************************************************************/
++/*
++ * Special arithmetics
++ */
++/*********************************************************************/
++
++#define CYCLES_SHIFT (8)
++#define SCYCLES_TIME(time) \
++        ((scycles_t) {((time) + (1 << CYCLES_SHIFT) - 1)  >> CYCLES_SHIFT})
++
++#define CYCLES_ZERO (0)
++static inline int CYCLES_BEFORE(cycles_t x, cycles_t y)
++{
++        return (__s64)(x-y) < 0;
++}
++static inline int CYCLES_AFTER(cycles_t x, cycles_t y)
++{
++        return (__s64)(y-x) < 0;
++}
++static inline void CYCLES_DADD(cycles_t *x, fschdur_t y) {*x+=y.d;}
++
++#define FSCHDUR_ZERO (0)
++#define TICK_DUR ((fschdur_t){cycles_per_jiffy})
++static inline fschdur_t FSCHDURATION(cycles_t x, cycles_t y)
++{
++	return (fschdur_t){x - y};
++}
++static inline int FSCHDUR_CMP(fschdur_t x, fschdur_t y)
++{
++	if (x.d < y.d) return -1;
++	if (x.d > y.d) return 1;
++	return 0;
++}
++static inline fschdur_t FSCHDUR_SUB(fschdur_t x, fschdur_t y)
++{
++	return (fschdur_t){x.d - y.d};
++}
++
++#define FSCHTAG_ZERO ((fschtag_t){0})
++static inline int FSCHTAG_CMP(fschtag_t x, fschtag_t y)
++{
++	if (x.t < y.t) return -1;
++	if (x.t > y.t) return 1;
++	return 0;
++}
++static inline fschtag_t FSCHTAG_MAX(fschtag_t x, fschtag_t y)
++{
++	return x.t >= y.t ? x : y;
++}
++static inline int FSCHTAG_DADD(fschtag_t *tag, fschdur_t dur, unsigned w)
++{
++	cycles_t new_tag;
++	new_tag = tag->t + (cycles_t)dur.d * w;
++	if (new_tag < tag->t)
++		return -1;
++	/* DEBUG */
++	if (new_tag >= (1ULL << 48))
++		return -1;
++	tag->t = new_tag;
++	return 0;
++}
++static inline int FSCHTAG_ADD(fschtag_t *tag, fschtag_t y)
++{
++	cycles_t new_tag;
++	new_tag = tag->t + y.t;
++	if (new_tag < tag->t)
++		return -1;
++	tag->t = new_tag;
++	return 0;
++}
++static inline fschtag_t FSCHTAG_SUB(fschtag_t x, fschtag_t y)
++{
++	return (fschtag_t){x.t - y.t};
++}
++
++#define FSCHVALUE_ZERO ((fschvalue_t){0})
++#define TICK_VALUE ((fschvalue_t){(cycles_t)cycles_per_jiffy << FSCHRATE_SHIFT})
++static inline fschvalue_t FSCHVALUE(unsigned long t)
++{
++	return (fschvalue_t){(cycles_t)t << FSCHRATE_SHIFT};
++}
++static inline int FSCHVALUE_CMP(fschvalue_t x, fschvalue_t y)
++{
++	if (x.v < y.v) return -1;
++	if (x.v > y.v) return 1;
++	return 0;
++}
++static inline void FSCHVALUE_DADD(fschvalue_t *val, fschdur_t dur,
++		unsigned rate)
++{
++	val->v += (cycles_t)dur.d * rate;
++}
++static inline fschvalue_t FSCHVALUE_SUB(fschvalue_t x, fschvalue_t y)
++{
++	return (fschvalue_t){x.v - y.v};
++}
++static inline cycles_t FSCHVALUE_TO_DELAY(fschvalue_t val, unsigned rate)
++{
++	unsigned long t;
++	/*
++	 * Here we lose precision to make the division 32-bit on IA-32.
++	 * The value is not greater than TICK_VALUE.
++	 * (TICK_VALUE >> FSCHRATE_SHIFT) fits unsigned long.
++	 */
++	t = (val.v + (1 << FSCHRATE_SHIFT) - 1) >> FSCHRATE_SHIFT;
++	return (cycles_t)((t + rate - 1) / rate) << FSCHRATE_SHIFT;
++}
++
++
++/*********************************************************************/
++/*
++ * Global data
++ */
++/*********************************************************************/
++
++#define fsch_assert(x)							\
++	do {								\
++		static int count;					\
++		if (!(x) && count++ < 10)				\
++			printk("fsch_assert " #x " failed\n");		\
++	} while (0)
++
++/*
++ * Configurable parameters
++ */
++unsigned fairsched_max_latency = 25; /* jiffies */
++
++/*
++ * Parameters initialized at startup
++ */
++/* Number of online CPUs */
++unsigned fairsched_nr_cpus;
++/* Token Bucket depth (burst size) */
++static fschvalue_t max_value;
++
++struct fairsched_node fairsched_init_node = {
++	.id		= INT_MAX,
++#ifdef CONFIG_VE
++	.owner_env	= get_ve0(),
++#endif
++	.weight		= 1,
++};
++EXPORT_SYMBOL(fairsched_init_node);
++
++struct fairsched_node fairsched_idle_node = {
++	.id =			-1,
++};
++
++static int fairsched_nr_nodes;
++static LIST_HEAD(fairsched_node_head);
++static LIST_HEAD(fairsched_running_head);
++static LIST_HEAD(fairsched_delayed_head);
++
++DEFINE_PER_CPU(cycles_t, prev_schedule);
++static fschtag_t max_latency;
++
++static DECLARE_MUTEX(fairsched_mutex);
++
++/*********************************************************************/
++/*
++ * Small helper routines
++ */
++/*********************************************************************/
++
++/* this didn't proved to be very valuable statistics... */
++#define fairsched_inc_ve_strv(node, cycles)  do {} while(0)
++#define fairsched_dec_ve_strv(node, cycles)  do {} while(0)
++
++/*********************************************************************/
++/*
++ * Runlist management
++ */
++/*********************************************************************/
++
++/*
++ * Returns the start_tag of the first runnable node, or 0.
++ */
++static inline fschtag_t virtual_time(void)
++{
++	struct fairsched_node *p;
++
++	if (!list_empty(&fairsched_running_head)) {
++		p = list_first_entry(&fairsched_running_head,
++				struct fairsched_node, runlist);
++		return p->start_tag;
++	}
++	return FSCHTAG_ZERO;
++}
++
++static void fairsched_recompute_max_latency(void)
++{
++	struct fairsched_node *p;
++	unsigned w;
++	fschtag_t tag;
++
++	w = FSCHWEIGHT_MAX;
++	list_for_each_entry(p, &fairsched_node_head, nodelist) {
++		if (p->weight < w)
++			w = p->weight;
++	}
++	tag = FSCHTAG_ZERO;
++	(void) FSCHTAG_DADD(&tag, TICK_DUR,
++				fairsched_nr_cpus * fairsched_max_latency * w);
++	max_latency = tag;
++}
++
++static void fairsched_reset_start_tags(void)
++{
++	struct fairsched_node *cnode;
++	fschtag_t min_tag;
++
++	min_tag = virtual_time();
++	list_for_each_entry(cnode, &fairsched_node_head, nodelist) {
++		if (FSCHTAG_CMP(cnode->start_tag, min_tag) > 0)
++			cnode->start_tag = FSCHTAG_SUB(cnode->start_tag,
++						       min_tag);
++		else
++			cnode->start_tag = FSCHTAG_ZERO;
++	}
++}
++
++static void fairsched_running_insert(struct fairsched_node *node)
++{
++	struct list_head *tmp;
++	struct fairsched_node *p;
++	fschtag_t start_tag_max;
++
++	if (!list_empty(&fairsched_running_head)) {
++		start_tag_max = virtual_time();
++		if (!FSCHTAG_ADD(&start_tag_max, max_latency) &&
++		    FSCHTAG_CMP(start_tag_max, node->start_tag) < 0)
++			node->start_tag = start_tag_max;
++	}
++
++	list_for_each(tmp, &fairsched_running_head) {
++		p = list_entry(tmp, struct fairsched_node, runlist);
++		if (FSCHTAG_CMP(node->start_tag, p->start_tag) <= 0)
++			break;
++	}
++	/* insert node just before tmp */
++	list_add_tail(&node->runlist, tmp);
++}
++
++static inline void fairsched_running_insert_fromsleep(
++		struct fairsched_node *node)
++{
++	node->start_tag = FSCHTAG_MAX(node->start_tag, virtual_time());
++	fairsched_running_insert(node);
++}
++
++
++/*********************************************************************/
++/*
++ * CPU limiting helper functions
++ *
++ * These functions compute rates, delays and manipulate with sleep
++ * lists and so on.
++ */
++/*********************************************************************/
++
++/*
++ * Insert a node into the list of nodes removed from scheduling,
++ * sorted by the time at which the the node is allowed to run,
++ * historically called `delay'.
++ */
++static void fairsched_delayed_insert(struct fairsched_node *node)
++{
++	struct fairsched_node *p;
++	struct list_head *tmp;
++
++	list_for_each(tmp, &fairsched_delayed_head) {
++		p = list_entry(tmp, struct fairsched_node,
++				   runlist);
++		if (CYCLES_AFTER(p->delay, node->delay))
++			break;
++	}
++        /* insert node just before tmp */
++	list_add_tail(&node->runlist, tmp);
++}
++
++static inline void nodevalue_add(struct fairsched_node *node,
++		fschdur_t duration, unsigned rate)
++{
++	FSCHVALUE_DADD(&node->value, duration, rate);
++	if (FSCHVALUE_CMP(node->value, max_value) > 0)
++		node->value = max_value;
++}
++
++/*
++ * The node has been selected to run.
++ * This function accounts in advance for the time that the node will run.
++ * The advance not used by the node will be credited back.
++ */
++static void fairsched_ratelimit_charge_advance(
++		struct fairsched_node *node,
++		cycles_t time)
++{
++	fsch_assert(!node->delayed);
++	fsch_assert(FSCHVALUE_CMP(node->value, TICK_VALUE) >= 0);
++
++	/*
++	 * Account for the time passed since last update.
++	 * It might be needed if the node has become runnable because of
++	 * a wakeup, but hasn't gone through other functions updating
++	 * the bucket value.
++	 */
++	if (CYCLES_AFTER(time, node->last_updated_at)) {
++		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
++			      node->rate);
++		node->last_updated_at = time;
++	}
++
++	/* charge for the full tick the node might be running */
++	node->value = FSCHVALUE_SUB(node->value, TICK_VALUE);
++	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
++		list_del(&node->runlist);
++		node->delayed = 1;
++		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
++					FSCHVALUE_SUB(TICK_VALUE, node->value),
++					node->rate);
++		node->nr_ready = 0;
++		fairsched_delayed_insert(node);
++	}
++}
++
++static void fairsched_ratelimit_credit_unused(
++		struct fairsched_node *node,
++		cycles_t time, fschdur_t duration)
++{
++	/* account for the time passed since last update */
++	if (CYCLES_AFTER(time, node->last_updated_at)) {
++		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
++			      node->rate);
++		node->last_updated_at = time;
++	}
++
++	/*
++	 * When the node was given this CPU, it was charged for 1 tick.
++	 * Credit back the unused time.
++	 */
++	if (FSCHDUR_CMP(duration, TICK_DUR) < 0)
++		nodevalue_add(node, FSCHDUR_SUB(TICK_DUR, duration),
++			      1 << FSCHRATE_SHIFT);
++
++	/* check if the node is allowed to run */
++	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
++		/*
++		 * The node was delayed and remain such.
++		 * But since the bucket value has been updated,
++		 * update the delay time and move the node in the list.
++		 */
++		fsch_assert(node->delayed);
++		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
++					FSCHVALUE_SUB(TICK_VALUE, node->value),
++					node->rate);
++	} else if (node->delayed) {
++		/*
++		 * The node was delayed, but now it is allowed to run.
++		 * We do not manipulate with lists, it will be done by the
++		 * caller.
++		 */
++		node->nr_ready = node->nr_runnable;
++		node->delayed = 0;
++	}
++}
++
++static void fairsched_delayed_wake(cycles_t time)
++{
++	struct fairsched_node *p;
++
++	while (!list_empty(&fairsched_delayed_head)) {
++		p = list_entry(fairsched_delayed_head.next,
++				  struct fairsched_node,
++				  runlist);
++		if (CYCLES_AFTER(p->delay, time))
++			break;
++
++		/* ok, the delay period is completed */
++		/* account for the time passed since last update */
++		if (CYCLES_AFTER(time, p->last_updated_at)) {
++			nodevalue_add(p, FSCHDURATION(time, p->last_updated_at),
++					p->rate);
++			p->last_updated_at = time;
++		}
++
++		fsch_assert(FSCHVALUE_CMP(p->value, TICK_VALUE) >= 0);
++		p->nr_ready = p->nr_runnable;
++		p->delayed = 0;
++		list_del_init(&p->runlist);
++		if (p->nr_ready)
++			fairsched_running_insert_fromsleep(p);
++	}
++}
++
++static struct fairsched_node *fairsched_find(unsigned int id);
++
++void fairsched_cpu_online_map(int id, cpumask_t *mask)
++{
++	/* FIXME - obtain real map */
++	*mask = cpu_online_map;
++#if 0
++	struct fairsched_node *node;
++
++	down(&fairsched_mutex);
++	node = fairsched_find(id);
++	if (node == NULL)
++		*mask = CPU_MASK_NONE;
++	else
++		vsched_cpu_online_map(node->vsched, mask);
++	up(&fairsched_mutex);
++#endif
++}
++
++/*********************************************************************/
++/*
++ * The heart of the algorithm:
++ * fairsched_incrun, fairsched_decrun, fairsched_schedule
++ *
++ * Note: old property nr_ready >= nr_pcpu doesn't hold anymore.
++ * However, nr_runnable, nr_ready and delayed are maintained in sync.
++ */
++/*********************************************************************/
++
++/*
++ * Called on a wakeup inside the node.
++ */
++void fairsched_incrun(struct fairsched_node *node)
++{
++	if (!node->delayed && !node->nr_ready++)
++		/* the node wasn't on the running list, insert */
++		fairsched_running_insert_fromsleep(node);
++	node->nr_runnable++;
++}
++
++/*
++ * Called from inside schedule() when a sleeping state is entered.
++ */
++void fairsched_decrun(struct fairsched_node *node)
++{
++	if (!node->delayed && !--node->nr_ready)
++		/* nr_ready changed 1->0, remove from the running list */
++		list_del_init(&node->runlist);
++	--node->nr_runnable;
++}
++
++void fairsched_inccpu(struct fairsched_node *node)
++{
++	node->nr_pcpu++;
++	fairsched_dec_ve_strv(node, cycles);
++}
++
++static inline void __fairsched_deccpu(struct fairsched_node *node)
++{
++	node->nr_pcpu--;
++	fairsched_inc_ve_strv(node, cycles);
++}
++
++void fairsched_deccpu(struct fairsched_node *node)
++{
++	if (node == &fairsched_idle_node)
++		return;
++
++	__fairsched_deccpu(node);
++}
++
++static void fairsched_account(struct fairsched_node *node,
++		cycles_t time)
++{
++	fschdur_t duration;
++
++	duration = FSCHDURATION(time, __get_cpu_var(prev_schedule));
++#ifdef CONFIG_VE
++	CYCLES_DADD(&node->owner_env->cpu_used_ve, duration);
++#endif
++
++	/*
++	 * The duration is not greater than TICK_DUR since
++	 * task->need_resched is always 1.
++	 */
++	if (FSCHTAG_DADD(&node->start_tag, duration, node->weight)) {
++		fairsched_reset_start_tags();
++		(void) FSCHTAG_DADD(&node->start_tag, duration,
++					node->weight);
++	}
++
++	list_del_init(&node->runlist);
++	if (node->rate_limited)
++		fairsched_ratelimit_credit_unused(node, time, duration);
++	if (!node->delayed) {
++		if (node->nr_ready)
++			fairsched_running_insert(node);
++	} else
++		fairsched_delayed_insert(node);
++}
++
++/*
++ * Scheduling decision
++ *
++ * Updates CPU usage for the node releasing the CPU and selects a new node.
++ */
++struct fairsched_node *fairsched_schedule(
++		struct fairsched_node *prev_node,
++		struct fairsched_node *cur_node,
++		int cur_node_active,
++		cycles_t time)
++{
++	struct fairsched_node *p;
++
++	if (prev_node != &fairsched_idle_node)
++		fairsched_account(prev_node, time);
++	__get_cpu_var(prev_schedule) = time;
++
++	fairsched_delayed_wake(time);
++
++	list_for_each_entry(p, &fairsched_running_head, runlist) {
++		if (p->nr_pcpu < p->nr_ready ||
++		    (cur_node_active && p == cur_node)) {
++			if (p->rate_limited)
++				fairsched_ratelimit_charge_advance(p, time);
++			return p;
++		}
++	}
++	return NULL;
++}
++
++
++/*********************************************************************/
++/*
++ * System calls 
++ *
++ * All do_xxx functions are called under fairsched semaphore and after
++ * capability check.
++ *
++ * The binary interfaces follow some other Fair Scheduler implementations
++ * (although some system call arguments are not needed for our implementation).
++ */
++/*********************************************************************/
++
++static struct fairsched_node *fairsched_find(unsigned int id)
++{
++	struct fairsched_node *p;
++
++	list_for_each_entry(p, &fairsched_node_head, nodelist) {
++		if (p->id == id)
++			return p;
++	}
++	return NULL;
++}
++
++static int do_fairsched_mknod(unsigned int parent, unsigned int weight,
++		unsigned int newid)
++{
++	struct fairsched_node *node;
++	int retval;
++
++	retval = -EINVAL;
++	if (weight < 1 || weight > FSCHWEIGHT_MAX)
++		goto out;
++	if (newid < 0 || newid > INT_MAX)
++		goto out;
++
++	retval = -EBUSY;
++	if (fairsched_find(newid) != NULL)
++		goto out;
++
++	retval = -ENOMEM;
++	node = kmalloc(sizeof(*node), GFP_KERNEL);
++	if (node == NULL)
++		goto out;
++
++	memset(node, 0, sizeof(*node));
++	node->weight = weight;
++	INIT_LIST_HEAD(&node->runlist);
++	node->id = newid;
++#ifdef CONFIG_VE
++	node->owner_env = get_exec_env();
++#endif
++
++	spin_lock_irq(&fairsched_lock);
++	list_add(&node->nodelist, &fairsched_node_head);
++	fairsched_nr_nodes++;
++	fairsched_recompute_max_latency();
++	spin_unlock_irq(&fairsched_lock);
++
++	retval = newid;
++out:
++	return retval;
++}
++
++asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
++				    unsigned int newid)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_mknod(parent, weight, newid);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++EXPORT_SYMBOL(sys_fairsched_mknod);
++
++static int do_fairsched_rmnod(unsigned int id)
++{
++	struct fairsched_node *node;
++	int retval;
++
++	retval = -EINVAL;
++	node = fairsched_find(id);
++	if (node == NULL)
++		goto out;
++	if (node == &fairsched_init_node)
++		goto out;
++
++	retval = vsched_destroy(node->vsched);
++	if (retval)
++		goto out;
++
++	spin_lock_irq(&fairsched_lock);
++	list_del(&node->runlist); /* required for delayed nodes */
++	list_del(&node->nodelist);
++	fairsched_nr_nodes--;
++	fairsched_recompute_max_latency();
++	spin_unlock_irq(&fairsched_lock);
++
++	kfree(node);
++	retval = 0;
++out:
++	return retval;
++}
++
++asmlinkage int sys_fairsched_rmnod(unsigned int id)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_rmnod(id);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++EXPORT_SYMBOL(sys_fairsched_rmnod);
++
++int do_fairsched_chwt(unsigned int id, unsigned weight)
++{
++	struct fairsched_node *node;
++
++	if (id == 0)
++		return -EINVAL;
++	if (weight < 1 || weight > FSCHWEIGHT_MAX)
++		return -EINVAL;
++
++	node = fairsched_find(id);
++	if (node == NULL)
++		return -ENOENT;
++
++	spin_lock_irq(&fairsched_lock);
++	node->weight = weight;
++	fairsched_recompute_max_latency();
++	spin_unlock_irq(&fairsched_lock);
++
++	return 0;
++}
++
++asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned weight)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_chwt(id, weight);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++
++int do_fairsched_rate(unsigned int id, int op, unsigned rate)
++{
++	struct fairsched_node *node;
++	cycles_t time;
++	int retval;
++
++	if (id == 0)
++		return -EINVAL;
++	if (op == 0 && (rate < 1 || rate >= (1UL << 31)))
++		return -EINVAL;
++
++	node = fairsched_find(id);
++	if (node == NULL)
++		return -ENOENT;
++
++	retval = -EINVAL;
++	spin_lock_irq(&fairsched_lock);
++	time = get_cycles();
++	switch (op) {
++		case 0:
++			node->rate = rate;
++			if (node->rate > (fairsched_nr_cpus << FSCHRATE_SHIFT))
++				node->rate =
++					fairsched_nr_cpus << FSCHRATE_SHIFT;
++			node->rate_limited = 1;
++			node->value = max_value;
++			if (node->delayed) {
++				list_del(&node->runlist);
++				node->delay = time;
++				fairsched_delayed_insert(node);
++				node->last_updated_at = time;
++				fairsched_delayed_wake(time);
++			}
++			retval = node->rate;
++			break;
++		case 1:
++			node->rate = 0; /* This assignment is not needed
++					   for the kernel code, and it should
++					   not rely on rate being 0 when it's
++					   unset.  This is a band-aid for some
++					   existing tools (don't know which one
++					   exactly).  --SAW */
++			node->rate_limited = 0;
++			node->value = max_value;
++			if (node->delayed) {
++				list_del(&node->runlist);
++				node->delay = time;
++				fairsched_delayed_insert(node);
++				node->last_updated_at = time;
++				fairsched_delayed_wake(time);
++			}
++			retval = 0;
++			break;
++		case 2:
++			if (node->rate_limited)
++				retval = node->rate;
++			else
++				retval = -ENODATA;
++			break;
++	}
++	spin_unlock_irq(&fairsched_lock);
++
++	return retval;
++}
++
++asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_rate(id, op, rate);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++
++/*
++ * Called under fairsched_mutex.
++ */
++static int __do_fairsched_mvpr(struct task_struct *p,
++		struct fairsched_node *node)
++{
++	int retval;
++
++	if (node->vsched == NULL) {
++		retval = vsched_create(node->id, node);
++		if (retval < 0)
++			return retval;
++	}
++
++	/* no need to destroy vsched in case of mvpr failure */
++	return vsched_mvpr(p, node->vsched);
++}
++
++int do_fairsched_mvpr(pid_t pid, unsigned int nodeid)
++{
++	struct task_struct *p;
++	struct fairsched_node *node;
++	int retval;
++
++	retval = -ENOENT;
++	node = fairsched_find(nodeid);
++	if (node == NULL)
++		goto out;
++
++	read_lock(&tasklist_lock);
++	retval = -ESRCH;
++	p = find_task_by_pid_all(pid);
++	if (p == NULL)
++		goto out_unlock;
++	get_task_struct(p);
++	read_unlock(&tasklist_lock);
++
++	retval = __do_fairsched_mvpr(p, node);
++	put_task_struct(p);
++	return retval;
++
++out_unlock:
++	read_unlock(&tasklist_lock);
++out:
++	return retval;
++}
++
++asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_mvpr(pid, nodeid);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++EXPORT_SYMBOL(sys_fairsched_mvpr);
++
++
++/*********************************************************************/
++/*
++ * proc interface
++ */
++/*********************************************************************/
++
++struct fairsched_node_dump {
++#ifdef CONFIG_VE
++	envid_t veid;
++#endif
++	int id;
++	unsigned weight;
++	unsigned rate;
++	unsigned rate_limited : 1,
++		 delayed : 1;
++	fschtag_t start_tag;
++	fschvalue_t value;
++	cycles_t delay;
++	int nr_ready;
++	int nr_runnable;
++	int nr_pcpu;
++	int nr_tasks, nr_runtasks;
++};
++
++struct fairsched_dump {
++	int len, compat;
++	struct fairsched_node_dump nodes[0];
++};
++
++static struct fairsched_dump *fairsched_do_dump(int compat)
++{
++	int nr_nodes;
++	int len, i;
++	struct fairsched_dump *dump;
++	struct fairsched_node *node;
++	struct fairsched_node_dump *p;
++	unsigned long flags;
++
++start:
++	nr_nodes = (ve_is_super(get_exec_env()) ? fairsched_nr_nodes + 16 : 1);
++	len = sizeof(*dump) + nr_nodes * sizeof(dump->nodes[0]);
++	dump = ub_vmalloc(len);
++	if (dump == NULL)
++		goto out;
++
++	spin_lock_irqsave(&fairsched_lock, flags);
++	if (ve_is_super(get_exec_env()) && nr_nodes < fairsched_nr_nodes)
++		goto repeat;
++	p = dump->nodes;
++	list_for_each_entry_reverse(node, &fairsched_node_head, nodelist) {
++		if ((char *)p - (char *)dump >= len)
++			break;
++		p->nr_tasks = 0;
++		p->nr_runtasks = 0;
++#ifdef CONFIG_VE
++		if (!ve_accessible(node->owner_env, get_exec_env()))
++			continue;
++		p->veid = node->owner_env->veid;
++		if (compat) {
++			p->nr_tasks = atomic_read(&node->owner_env->pcounter);
++			for (i = 0; i < NR_CPUS; i++)
++				p->nr_runtasks +=
++					VE_CPU_STATS(node->owner_env, i)
++								->nr_running;
++			if (p->nr_runtasks < 0)
++				p->nr_runtasks = 0;
++		}
++#endif
++		p->id = node->id;
++		p->weight = node->weight;
++		p->rate = node->rate;
++		p->rate_limited = node->rate_limited;
++		p->delayed = node->delayed;
++		p->start_tag = node->start_tag;
++		p->value = node->value;
++		p->delay = node->delay;
++		p->nr_ready = node->nr_ready;
++		p->nr_runnable = node->nr_runnable;
++		p->nr_pcpu = node->nr_pcpu;
++		p++;
++	}
++	dump->len = p - dump->nodes;
++	dump->compat = compat;
++	spin_unlock_irqrestore(&fairsched_lock, flags);
++
++out:
++	return dump;
++
++repeat:
++	spin_unlock_irqrestore(&fairsched_lock, flags);
++	vfree(dump);
++	goto start;
++}
++
++#define FAIRSCHED_PROC_HEADLINES 2
++
++#if defined(CONFIG_VE)
++/*
++ * File format is dictated by compatibility reasons.
++ */
++static int fairsched_seq_show(struct seq_file *m, void *v)
++{
++	struct fairsched_dump *dump;
++	struct fairsched_node_dump *p;
++	unsigned vid, nid, pid, r;
++
++	dump = m->private;
++	p = (struct fairsched_node_dump *)((unsigned long)v & ~3UL);
++	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
++		if (p == dump->nodes)
++			seq_printf(m, "Version: 2.6 debug\n");
++		else if (p == dump->nodes + 1)
++			seq_printf(m,
++				       "      veid "
++				       "        id "
++				       "    parent "
++				       "weight "
++				       " rate "
++  				       "tasks "
++				       "  run "
++				       "cpus"
++				       " "
++				       "flg "
++				       "ready "
++				       "           start_tag "
++				       "               value "
++				       "               delay"
++				       "\n");
++	} else {
++		p -= FAIRSCHED_PROC_HEADLINES;
++		vid = nid = pid = 0;
++		r = (unsigned long)v & 3;
++		if (p == dump->nodes) {
++			if (r == 2)
++				nid = p->id;
++		} else {
++			if (!r)
++				nid = p->id;
++			else if (r == 1)
++				vid = pid = p->id;
++			else
++				vid = p->id, nid = 1;
++		}
++		seq_printf(m,
++			       "%10u "
++			       "%10u %10u %6u %5u %5u %5u %4u"
++			       " "
++			       " %c%c %5u %20Lu %20Lu %20Lu"
++			       "\n",
++			       vid,
++			       nid,
++			       pid,
++			       p->weight,
++			       p->rate,
++			       p->nr_tasks,
++			       p->nr_runtasks,
++			       p->nr_pcpu,
++			       p->rate_limited ? 'L' : '.',
++			       p->delayed ? 'D' : '.',
++			       p->nr_ready,
++			       p->start_tag.t,
++			       p->value.v,
++			       p->delay
++			       );
++	}
++
++	return 0;
++}
++
++static void *fairsched_seq_start(struct seq_file *m, loff_t *pos)
++{
++	struct fairsched_dump *dump;
++	unsigned long l;
++
++	dump = m->private;
++	if (*pos >= dump->len * 3 - 1 + FAIRSCHED_PROC_HEADLINES)
++		return NULL;
++	if (*pos < FAIRSCHED_PROC_HEADLINES)
++		return dump->nodes + *pos;
++	/* guess why... */
++	l = (unsigned long)(dump->nodes +
++		((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) / 3);
++	l |= ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) % 3;
++	return (void *)l;
++}
++static void *fairsched_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	++*pos;
++	return fairsched_seq_start(m, pos);
++}
++#endif
++
++static int fairsched2_seq_show(struct seq_file *m, void *v)
++{
++	struct fairsched_dump *dump;
++	struct fairsched_node_dump *p;
++
++	dump = m->private;
++	p = v;
++	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
++		if (p == dump->nodes)
++			seq_printf(m, "Version: 2.7" FAIRSHED_DEBUG "\n");
++		else if (p == dump->nodes + 1)
++			seq_printf(m,
++				       "        id "
++				       "weight "
++				       " rate "
++				       "  run "
++				       "cpus"
++#ifdef FAIRSHED_DEBUG
++				       " "
++				       "flg "
++				       "ready "
++				       "           start_tag "
++				       "               value "
++				       "               delay"
++#endif
++				       "\n");
++	} else {
++		p -= FAIRSCHED_PROC_HEADLINES;
++		seq_printf(m,
++			       "%10u %6u %5u %5u %4u"
++#ifdef FAIRSHED_DEBUG
++			       " "
++			       " %c%c %5u %20Lu %20Lu %20Lu"
++#endif
++			       "\n",
++			       p->id,
++			       p->weight,
++			       p->rate,
++			       p->nr_runnable,
++			       p->nr_pcpu
++#ifdef FAIRSHED_DEBUG
++			       ,
++			       p->rate_limited ? 'L' : '.',
++			       p->delayed ? 'D' : '.',
++			       p->nr_ready,
++			       p->start_tag.t,
++			       p->value.v,
++			       p->delay
++#endif
++			       );
++	}
++
++	return 0;
++}
++
++static void *fairsched2_seq_start(struct seq_file *m, loff_t *pos)
++{
++	struct fairsched_dump *dump;
++
++	dump = m->private;
++	if (*pos >= dump->len + FAIRSCHED_PROC_HEADLINES)
++		return NULL;
++	return dump->nodes + *pos;
++}
++static void *fairsched2_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	++*pos;
++	return fairsched2_seq_start(m, pos);
++}
++static void fairsched2_seq_stop(struct seq_file *m, void *v)
++{
++}
++
++#ifdef CONFIG_VE
++static struct seq_operations fairsched_seq_op = {
++	.start		= fairsched_seq_start,
++	.next		= fairsched_seq_next,
++	.stop		= fairsched2_seq_stop,
++	.show		= fairsched_seq_show
++};
++#endif
++static struct seq_operations fairsched2_seq_op = {
++	.start		= fairsched2_seq_start,
++	.next		= fairsched2_seq_next,
++	.stop		= fairsched2_seq_stop,
++	.show		= fairsched2_seq_show
++};
++static int fairsched_seq_open(struct inode *inode, struct file *file)
++{
++	int ret;
++	struct seq_file *m;
++	int compat;
++
++#ifdef CONFIG_VE
++	compat = (file->f_dentry->d_name.len == sizeof("fairsched") - 1);
++	ret = seq_open(file, compat ? &fairsched_seq_op : &fairsched2_seq_op);
++#else
++	compat = 0;
++	ret = seq_open(file, fairsched2_seq_op);
++#endif
++	if (ret)
++		return ret;
++	m = file->private_data;
++	m->private = fairsched_do_dump(compat);
++	if (m->private == NULL) {
++		seq_release(inode, file);
++		ret = -ENOMEM;
++	}
++	return ret;
++}
++static int fairsched_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *m;
++	struct fairsched_dump *dump;
++
++	m = file->private_data;
++	dump = m->private;
++	m->private = NULL;
++	vfree(dump);
++	seq_release(inode, file);
++	return 0;
++}
++static struct file_operations proc_fairsched_operations = {
++	.open		= fairsched_seq_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= fairsched_seq_release
++};
++
++
++/*********************************************************************/
++/*
++ * Fairsched initialization
++ */
++/*********************************************************************/
++
++int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
++			void *buffer, size_t *lenp, loff_t *ppos)
++{
++	int *valp = ctl->data;
++	int val = *valp;
++	int ret;
++
++	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
++
++	if (!write || *valp == val)
++		return ret;
++
++	spin_lock_irq(&fairsched_lock);
++	fairsched_recompute_max_latency();
++	spin_unlock_irq(&fairsched_lock);
++	return ret;
++}
++
++static void fairsched_calibrate(void)
++{
++	fairsched_nr_cpus = num_online_cpus();
++	max_value = FSCHVALUE(cycles_per_jiffy * (fairsched_nr_cpus + 1));
++}
++
++void __init fairsched_init_early(void)
++{
++	list_add(&fairsched_init_node.nodelist, &fairsched_node_head);
++	fairsched_nr_nodes++;
++}
++
++/*
++ * Note: this function is execute late in the initialization sequence.
++ * We ourselves need calibrated cycles and initialized procfs...
++ * The consequence of this late initialization is that start tags are
++ * efficiently ignored and each node preempts others on insertion.
++ * But it isn't a problem (only init node can be runnable).
++ */
++void __init fairsched_init_late(void)
++{
++	struct proc_dir_entry *entry;
++
++	if (get_cycles() == 0)
++		panic("FAIRSCHED: no TSC!\n");
++	fairsched_calibrate();
++	fairsched_recompute_max_latency();
++
++	entry = create_proc_glob_entry("fairsched", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &proc_fairsched_operations;
++	entry = create_proc_glob_entry("fairsched2", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &proc_fairsched_operations;
++}
++
++
++#else /* CONFIG_FAIRSCHED */
++
++
++/*********************************************************************/
++/*
++ * No Fairsched
++ */
++/*********************************************************************/
++
++asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
++				    unsigned int newid)
++{
++	return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_rmnod(unsigned int id)
++{
++	return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned int weight)
++{
++	return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
++{
++	return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
++{
++	return -ENOSYS;
++}
++
++void __init fairsched_init_late(void)
++{
++}
++
++#endif /* CONFIG_FAIRSCHED */
+diff -upr linux-2.6.16.orig/kernel/fork.c linux-2.6.16-026test015/kernel/fork.c
+--- linux-2.6.16.orig/kernel/fork.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/fork.c	2006-07-04 14:41:39.000000000 +0400
+@@ -20,6 +20,7 @@
+ #include <linux/vmalloc.h>
+ #include <linux/completion.h>
+ #include <linux/namespace.h>
++#include <linux/file.h>
+ #include <linux/personality.h>
+ #include <linux/mempolicy.h>
+ #include <linux/sem.h>
+@@ -52,11 +53,15 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++#include <ub/ub_misc.h>
++
+ /*
+  * Protected counters by write_lock_irq(&tasklist_lock)
+  */
+ unsigned long total_forks;	/* Handle normal Linux uptimes. */
+ int nr_threads; 		/* The idle threads do not count.. */
++EXPORT_SYMBOL(nr_threads);
+ 
+ int max_threads;		/* tunable limit on nr_threads */
+ 
+@@ -103,6 +108,7 @@ static kmem_cache_t *mm_cachep;
+ 
+ void free_task(struct task_struct *tsk)
+ {
++	ub_task_uncharge(tsk);
+ 	free_thread_info(tsk->thread_info);
+ 	free_task_struct(tsk);
+ }
+@@ -122,9 +128,14 @@ void __put_task_struct_cb(struct rcu_hea
+ 	free_uid(tsk->user);
+ 	put_group_info(tsk->group_info);
+ 
++#ifdef CONFIG_VE
++	put_ve(VE_TASK_INFO(tsk)->owner_env);
++	atomic_dec(&nr_dead);
++#endif
+ 	if (!profile_handoff_task(tsk))
+ 		free_task(tsk);
+ }
++EXPORT_SYMBOL_GPL(__put_task_struct_cb);
+ 
+ void __init fork_init(unsigned long mempages)
+ {
+@@ -135,7 +146,7 @@ void __init fork_init(unsigned long memp
+ 	/* create a slab on which task_structs can be allocated */
+ 	task_struct_cachep =
+ 		kmem_cache_create("task_struct", sizeof(struct task_struct),
+-			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
++			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_UBC, NULL, NULL);
+ #endif
+ 
+ 	/*
+@@ -166,22 +177,30 @@ static struct task_struct *dup_task_stru
+ 
+ 	tsk = alloc_task_struct();
+ 	if (!tsk)
+-		return NULL;
++		goto out;
+ 
+ 	ti = alloc_thread_info(tsk);
+-	if (!ti) {
+-		free_task_struct(tsk);
+-		return NULL;
+-	}
++	if (!ti)
++		goto out_tsk;
+ 
+ 	*tsk = *orig;
+ 	tsk->thread_info = ti;
+ 	setup_thread_stack(tsk, orig);
+ 
++	if (ub_task_charge(orig, tsk))
++		goto out_ti;
++
+ 	/* One for us, one for whoever does the "release_task()" (usually parent) */
+ 	atomic_set(&tsk->usage,2);
+ 	atomic_set(&tsk->fs_excl, 0);
+ 	return tsk;
++
++out_ti:
++	free_thread_info(ti);
++out_tsk:
++	free_task_struct(tsk);
++out:
++	return NULL;
+ }
+ 
+ #ifdef CONFIG_MMU
+@@ -219,7 +238,12 @@ static inline int dup_mmap(struct mm_str
+ 								-pages);
+ 			continue;
+ 		}
++
+ 		charge = 0;
++		if (ub_memory_charge(mm, mpnt->vm_end - mpnt->vm_start,
++					mpnt->vm_flags & ~VM_LOCKED,
++					mpnt->vm_file, UB_HARD))
++			goto fail_noch;
+ 		if (mpnt->vm_flags & VM_ACCOUNT) {
+ 			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ 			if (security_vm_enough_memory(len))
+@@ -238,6 +262,7 @@ static inline int dup_mmap(struct mm_str
+ 		tmp->vm_flags &= ~VM_LOCKED;
+ 		tmp->vm_mm = mm;
+ 		tmp->vm_next = NULL;
++		set_vma_rss(tmp, 0);
+ 		anon_vma_link(tmp);
+ 		file = tmp->vm_file;
+ 		if (file) {
+@@ -266,7 +291,7 @@ static inline int dup_mmap(struct mm_str
+ 		rb_parent = &tmp->vm_rb;
+ 
+ 		mm->map_count++;
+-		retval = copy_page_range(mm, oldmm, mpnt);
++		retval = copy_page_range(mm, oldmm, tmp, mpnt);
+ 
+ 		if (tmp->vm_ops && tmp->vm_ops->open)
+ 			tmp->vm_ops->open(tmp);
+@@ -283,6 +308,9 @@ out:
+ fail_nomem_policy:
+ 	kmem_cache_free(vm_area_cachep, tmp);
+ fail_nomem:
++	ub_memory_uncharge(mm, mpnt->vm_end - mpnt->vm_start,
++			mpnt->vm_flags & ~VM_LOCKED, mpnt->vm_file);
++fail_noch:
+ 	retval = -ENOMEM;
+ 	vm_unacct_memory(charge);
+ 	goto out;
+@@ -313,7 +341,8 @@ static inline void mm_free_pgd(struct mm
+ 
+ #include <linux/init_task.h>
+ 
+-static struct mm_struct * mm_init(struct mm_struct * mm)
++static struct mm_struct * mm_init(struct mm_struct * mm,
++		struct task_struct *tsk)
+ {
+ 	atomic_set(&mm->mm_users, 1);
+ 	atomic_set(&mm->mm_count, 1);
+@@ -328,11 +357,14 @@ static struct mm_struct * mm_init(struct
+ 	mm->ioctx_list = NULL;
+ 	mm->free_area_cache = TASK_UNMAPPED_BASE;
+ 	mm->cached_hole_size = ~0UL;
++	set_mm_ub(mm, tsk);
+ 
+ 	if (likely(!mm_alloc_pgd(mm))) {
+ 		mm->def_flags = 0;
+ 		return mm;
+ 	}
++
++	put_mm_ub(mm);
+ 	free_mm(mm);
+ 	return NULL;
+ }
+@@ -347,10 +379,11 @@ struct mm_struct * mm_alloc(void)
+ 	mm = allocate_mm();
+ 	if (mm) {
+ 		memset(mm, 0, sizeof(*mm));
+-		mm = mm_init(mm);
++		mm = mm_init(mm, NULL);
+ 	}
+ 	return mm;
+ }
++EXPORT_SYMBOL_GPL(mm_alloc);
+ 
+ /*
+  * Called when the last reference to the mm
+@@ -362,8 +395,10 @@ void fastcall __mmdrop(struct mm_struct 
+ 	BUG_ON(mm == &init_mm);
+ 	mm_free_pgd(mm);
+ 	destroy_context(mm);
++	put_mm_ub(mm);
+ 	free_mm(mm);
+ }
++EXPORT_SYMBOL_GPL(__mmdrop);
+ 
+ /*
+  * Decrement the use count and release all resources for an mm.
+@@ -466,7 +501,7 @@ static struct mm_struct *dup_mm(struct t
+ 
+ 	memcpy(mm, oldmm, sizeof(*mm));
+ 
+-	if (!mm_init(mm))
++	if (!mm_init(mm, tsk))
+ 		goto fail_nomem;
+ 
+ 	if (init_new_context(tsk, mm))
+@@ -720,7 +755,7 @@ out_release:
+ 	free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
+ 	free_fd_array(new_fdt->fd, new_fdt->max_fds);
+ 	kmem_cache_free(files_cachep, newf);
+-	goto out;
++	return NULL;
+ }
+ 
+ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+@@ -896,7 +931,7 @@ asmlinkage long sys_set_tid_address(int 
+ {
+ 	current->clear_child_tid = tidptr;
+ 
+-	return current->pid;
++	return virt_pid(current);
+ }
+ 
+ /*
+@@ -913,7 +948,7 @@ static task_t *copy_process(unsigned lon
+ 				 unsigned long stack_size,
+ 				 int __user *parent_tidptr,
+ 				 int __user *child_tidptr,
+-				 int pid)
++				 int pid, long pid0)
+ {
+ 	int retval;
+ 	struct task_struct *p = NULL;
+@@ -974,12 +1009,20 @@ static task_t *copy_process(unsigned lon
+ 	p->did_exec = 0;
+ 	copy_flags(clone_flags, p);
+ 	p->pid = pid;
++#ifdef CONFIG_VE
++	set_virt_pid(p, alloc_vpid(p->pid, pid0 ? : -1));
++	if (virt_pid(p) < 0)
++		goto bad_fork_cleanup_module;
++#endif
+ 	retval = -EFAULT;
+ 	if (clone_flags & CLONE_PARENT_SETTID)
+-		if (put_user(p->pid, parent_tidptr))
++		if (put_user(virt_pid(p), parent_tidptr))
+ 			goto bad_fork_cleanup;
+ 
+ 	p->proc_dentry = NULL;
++#ifdef CONFIG_VE
++	p->ve_task_info.glob_proc_dentry = NULL;
++#endif
+ 
+ 	INIT_LIST_HEAD(&p->children);
+ 	INIT_LIST_HEAD(&p->sibling);
+@@ -1027,8 +1070,13 @@ static task_t *copy_process(unsigned lon
+ #endif
+ 
+ 	p->tgid = p->pid;
+-	if (clone_flags & CLONE_THREAD)
++	set_virt_tgid(p, virt_pid(p));
++	set_virt_pgid(p, virt_pgid(current));
++	set_virt_sid(p, virt_sid(current));
++	if (clone_flags & CLONE_THREAD) {
+ 		p->tgid = current->tgid;
++		set_virt_tgid(p, virt_tgid(current));
++	}
+ 
+ 	if ((retval = security_task_alloc(p)))
+ 		goto bad_fork_cleanup_policy;
+@@ -1111,8 +1159,8 @@ static task_t *copy_process(unsigned lon
+ 	 */
+ 	p->cpus_allowed = current->cpus_allowed;
+ 	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
+-			!cpu_online(task_cpu(p))))
+-		set_task_cpu(p, smp_processor_id());
++			!vcpu_online(task_cpu(p))))
++		set_task_cpu(p, task_cpu(current));
+ 
+ 	/*
+ 	 * Check for pending SIGKILL! The new thread should not be allowed
+@@ -1181,6 +1229,12 @@ static task_t *copy_process(unsigned lon
+ 	if (unlikely(p->ptrace & PT_PTRACED))
+ 		__ptrace_link(p, current->parent);
+ 
++#ifdef CONFIG_VE
++	SET_VE_LINKS(p);
++	atomic_inc(&p->ve_task_info.owner_env->pcounter);
++	get_ve(p->ve_task_info.owner_env);
++	seqcount_init(&p->ve_task_info.wakeup_lock);
++#endif
+ 	if (thread_group_leader(p)) {
+ 		p->signal->tty = current->signal->tty;
+ 		p->signal->pgrp = process_group(current);
+@@ -1228,6 +1282,11 @@ bad_fork_cleanup_cpuset:
+ #endif
+ 	cpuset_exit(p);
+ bad_fork_cleanup:
++#ifdef CONFIG_VE
++	if (virt_pid(p) != p->pid && virt_pid(p) > 0)
++		free_vpid(virt_pid(p), get_exec_env());
++bad_fork_cleanup_module:
++#endif
+ 	if (p->binfmt)
+ 		module_put(p->binfmt->module);
+ bad_fork_cleanup_put_domain:
+@@ -1253,7 +1312,7 @@ task_t * __devinit fork_idle(int cpu)
+ 	task_t *task;
+ 	struct pt_regs regs;
+ 
+-	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
++	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0, 0);
+ 	if (!task)
+ 		return ERR_PTR(-ENOMEM);
+ 	init_idle(task, cpu);
+@@ -1283,12 +1342,13 @@ static inline int fork_traceflag (unsign
+  * It copies the process, and if successful kick-starts
+  * it and waits for it to finish using the VM if required.
+  */
+-long do_fork(unsigned long clone_flags,
++long do_fork_pid(unsigned long clone_flags,
+ 	      unsigned long stack_start,
+ 	      struct pt_regs *regs,
+ 	      unsigned long stack_size,
+ 	      int __user *parent_tidptr,
+-	      int __user *child_tidptr)
++	      int __user *child_tidptr,
++	      long pid0)
+ {
+ 	struct task_struct *p;
+ 	int trace = 0;
+@@ -1302,7 +1362,8 @@ long do_fork(unsigned long clone_flags,
+ 			clone_flags |= CLONE_PTRACE;
+ 	}
+ 
+-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
++	p = copy_process(clone_flags, stack_start, regs, stack_size,
++			parent_tidptr, child_tidptr, pid, pid0);
+ 	/*
+ 	 * Do this prior waking up the new thread - the thread pointer
+ 	 * might get invalid after that point, if the thread exits quickly.
+@@ -1310,6 +1371,7 @@ long do_fork(unsigned long clone_flags,
+ 	if (!IS_ERR(p)) {
+ 		struct completion vfork;
+ 
++		pid = virt_pid(p);
+ 		if (clone_flags & CLONE_VFORK) {
+ 			p->vfork_done = &vfork;
+ 			init_completion(&vfork);
+@@ -1330,13 +1392,18 @@ long do_fork(unsigned long clone_flags,
+ 
+ 		if (unlikely (trace)) {
+ 			current->ptrace_message = pid;
++			set_pn_state(current, PN_STOP_FORK);
+ 			ptrace_notify ((trace << 8) | SIGTRAP);
++			clear_pn_state(current);
+ 		}
+ 
+ 		if (clone_flags & CLONE_VFORK) {
+ 			wait_for_completion(&vfork);
+-			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
++			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
++				set_pn_state(current, PN_STOP_VFORK);
+ 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
++				clear_pn_state(current);
++			}
+ 		}
+ 	} else {
+ 		free_pidmap(pid);
+@@ -1349,26 +1416,39 @@ long do_fork(unsigned long clone_flags,
+ #define ARCH_MIN_MMSTRUCT_ALIGN 0
+ #endif
+ 
++EXPORT_SYMBOL(do_fork_pid);
++
++long do_fork(unsigned long clone_flags,
++		unsigned long stack_start,
++		struct pt_regs *regs,
++		unsigned long stack_size,
++		int __user *parent_tidptr,
++		int __user *child_tidptr)
++{
++	return do_fork_pid(clone_flags, stack_start, regs, stack_size,
++			parent_tidptr, child_tidptr, 0);
++}
++
+ void __init proc_caches_init(void)
+ {
+ 	sighand_cachep = kmem_cache_create("sighand_cache",
+ 			sizeof(struct sighand_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 	signal_cachep = kmem_cache_create("signal_cache",
+ 			sizeof(struct signal_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 	files_cachep = kmem_cache_create("files_cache", 
+ 			sizeof(struct files_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 	fs_cachep = kmem_cache_create("fs_cache", 
+ 			sizeof(struct fs_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 	vm_area_cachep = kmem_cache_create("vm_area_struct",
+ 			sizeof(struct vm_area_struct), 0,
+ 			SLAB_PANIC, NULL, NULL);
+ 	mm_cachep = kmem_cache_create("mm_struct",
+ 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ }
+ 
+ 
+diff -upr linux-2.6.16.orig/kernel/hrtimer.c linux-2.6.16-026test015/kernel/hrtimer.c
+--- linux-2.6.16.orig/kernel/hrtimer.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/hrtimer.c	2006-07-04 14:41:39.000000000 +0400
+@@ -439,6 +439,7 @@ hrtimer_start(struct hrtimer *timer, kti
+ 
+ 	return ret;
+ }
++EXPORT_SYMBOL_GPL(hrtimer_start);
+ 
+ /**
+  * hrtimer_try_to_cancel - try to deactivate a timer
+@@ -467,6 +468,7 @@ int hrtimer_try_to_cancel(struct hrtimer
+ 	return ret;
+ 
+ }
++EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
+ 
+ /**
+  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
+@@ -504,6 +506,7 @@ ktime_t hrtimer_get_remaining(const stru
+ 
+ 	return rem;
+ }
++EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
+ 
+ #ifdef CONFIG_NO_IDLE_HZ
+ /**
+@@ -670,7 +673,7 @@ void hrtimer_run_queues(void)
+  * The current task state is guaranteed to be TASK_RUNNING when this
+  * routine returns.
+  */
+-static ktime_t __sched
++ktime_t __sched
+ schedule_hrtimer(struct hrtimer *timer, const enum hrtimer_mode mode)
+ {
+ 	/* fn stays NULL, meaning single-shot wakeup: */
+@@ -697,7 +700,7 @@ schedule_hrtimer_interruptible(struct hr
+ 	return schedule_hrtimer(timer, mode);
+ }
+ 
+-static long __sched nanosleep_restart(struct restart_block *restart)
++long __sched nanosleep_restart(struct restart_block *restart)
+ {
+ 	struct timespec __user *rmtp;
+ 	struct timespec tu;
+@@ -726,6 +729,7 @@ static long __sched nanosleep_restart(st
+ 	/* The other values in restart are already filled in */
+ 	return -ERESTART_RESTARTBLOCK;
+ }
++EXPORT_SYMBOL_GPL(nanosleep_restart);
+ 
+ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
+ 		       const enum hrtimer_mode mode, const clockid_t clockid)
+diff -upr linux-2.6.16.orig/kernel/irq/handle.c linux-2.6.16-026test015/kernel/irq/handle.c
+--- linux-2.6.16.orig/kernel/irq/handle.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/irq/handle.c	2006-07-04 14:41:37.000000000 +0400
+@@ -14,6 +14,8 @@
+ 
+ #include "internals.h"
+ 
++#include <ub/beancounter.h>
++
+ /*
+  * Linux has a controller-independent interrupt architecture.
+  * Every controller has a 'controller-template', that is used
+@@ -80,10 +82,12 @@ fastcall int handle_IRQ_event(unsigned i
+ 				struct irqaction *action)
+ {
+ 	int ret, retval = 0, status = 0;
++	struct user_beancounter *ub;
+ 
+ 	if (!(action->flags & SA_INTERRUPT))
+ 		local_irq_enable();
+ 
++	ub = set_exec_ub(get_ub0());
+ 	do {
+ 		ret = action->handler(irq, action->dev_id, regs);
+ 		if (ret == IRQ_HANDLED)
+@@ -91,6 +95,7 @@ fastcall int handle_IRQ_event(unsigned i
+ 		retval |= ret;
+ 		action = action->next;
+ 	} while (action);
++	(void)set_exec_ub(ub);
+ 
+ 	if (status & SA_SAMPLE_RANDOM)
+ 		add_interrupt_randomness(irq);
+diff -upr linux-2.6.16.orig/kernel/kmod.c linux-2.6.16-026test015/kernel/kmod.c
+--- linux-2.6.16.orig/kernel/kmod.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/kmod.c	2006-07-04 14:41:38.000000000 +0400
+@@ -78,6 +78,10 @@ int request_module(const char *fmt, ...)
+ #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
+ 	static int kmod_loop_msg;
+ 
++	/* Don't allow request_module() inside VE. */
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
+ 	va_start(args, fmt);
+ 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+ 	va_end(args);
+@@ -246,6 +250,9 @@ int call_usermodehelper_keys(char *path,
+ 	};
+ 	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
+ 
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
+ 	if (!khelper_wq)
+ 		return -EBUSY;
+ 
+diff -upr linux-2.6.16.orig/kernel/kthread.c linux-2.6.16-026test015/kernel/kthread.c
+--- linux-2.6.16.orig/kernel/kthread.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/kthread.c	2006-07-04 14:41:38.000000000 +0400
+@@ -114,7 +114,7 @@ static void keventd_create_kthread(void 
+ 		create->result = ERR_PTR(pid);
+ 	} else {
+ 		wait_for_completion(&create->started);
+-		create->result = find_task_by_pid(pid);
++		create->result = find_task_by_pid_all(pid);
+ 	}
+ 	complete(&create->done);
+ }
+diff -upr linux-2.6.16.orig/kernel/module.c linux-2.6.16-026test015/kernel/module.c
+--- linux-2.6.16.orig/kernel/module.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/module.c	2006-07-04 14:41:38.000000000 +0400
+@@ -2130,6 +2130,8 @@ static void *m_start(struct seq_file *m,
+ 	loff_t n = 0;
+ 
+ 	down(&module_mutex);
++	if (!ve_is_super(get_exec_env()))
++		return NULL;
+ 	list_for_each(i, &modules) {
+ 		if (n++ == *pos)
+ 			break;
+diff -upr linux-2.6.16.orig/kernel/mutex-debug.c linux-2.6.16-026test015/kernel/mutex-debug.c
+--- linux-2.6.16.orig/kernel/mutex-debug.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/mutex-debug.c	2006-07-04 14:41:38.000000000 +0400
+@@ -193,12 +193,12 @@ retry:
+ 	if (count != 10)
+ 		printk(" locked it.\n");
+ 
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		show_task_locks(p);
+ 		if (!unlock)
+ 			if (read_trylock(&tasklist_lock))
+ 				unlock = 1;
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 
+ 	printk("\n");
+ 	show_held_locks(NULL);
+diff -upr linux-2.6.16.orig/kernel/panic.c linux-2.6.16-026test015/kernel/panic.c
+--- linux-2.6.16.orig/kernel/panic.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/panic.c	2006-07-04 14:41:38.000000000 +0400
+@@ -23,6 +23,8 @@
+ int panic_timeout;
+ int panic_on_oops;
+ int tainted;
++int kernel_text_csum_broken;
++EXPORT_SYMBOL(kernel_text_csum_broken);
+ 
+ EXPORT_SYMBOL(panic_timeout);
+ 
+@@ -156,7 +158,8 @@ const char *print_tainted(void)
+ {
+ 	static char buf[20];
+ 	if (tainted) {
+-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
++		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c",
++			kernel_text_csum_broken ? 'B' : ' ',
+ 			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
+ 			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
+ 			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
+diff -upr linux-2.6.16.orig/kernel/pid.c linux-2.6.16-026test015/kernel/pid.c
+--- linux-2.6.16.orig/kernel/pid.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/pid.c	2006-07-04 14:41:39.000000000 +0400
+@@ -27,6 +27,10 @@
+ #include <linux/bootmem.h>
+ #include <linux/hash.h>
+ 
++#ifdef CONFIG_VE
++static void __free_vpid(int vpid, struct ve_struct *ve);
++#endif
++
+ #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
+ static struct hlist_head *pid_hash[PIDTYPE_MAX];
+ static int pidhash_shift;
+@@ -57,8 +61,14 @@ typedef struct pidmap {
+ 	void *page;
+ } pidmap_t;
+ 
++#ifdef CONFIG_VE
++#define PIDMAP_NRFREE (BITS_PER_PAGE/2)
++#else
++#define PIDMAP_NRFREE BITS_PER_PAGE
++#endif
++
+ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
+-	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
++	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(PIDMAP_NRFREE), NULL } };
+ 
+ static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
+ 
+@@ -67,9 +77,13 @@ fastcall void free_pidmap(int pid)
+ 	pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
+ 	int offset = pid & BITS_PER_PAGE_MASK;
+ 
+-	clear_bit(offset, map->page);
++	BUG_ON(__is_virtual_pid(pid) || pid == 1);
++
++	if (test_and_clear_bit(offset, map->page) == 0)
++		BUG();
+ 	atomic_inc(&map->nr_free);
+ }
++EXPORT_SYMBOL_GPL(free_pidmap);
+ 
+ int alloc_pidmap(void)
+ {
+@@ -77,6 +91,8 @@ int alloc_pidmap(void)
+ 	pidmap_t *map;
+ 
+ 	pid = last + 1;
++	if (__is_virtual_pid(pid))
++		pid += VPID_DIV;
+ 	if (pid >= pid_max)
+ 		pid = RESERVED_PIDS;
+ 	offset = pid & BITS_PER_PAGE_MASK;
+@@ -106,6 +122,8 @@ int alloc_pidmap(void)
+ 					return pid;
+ 				}
+ 				offset = find_next_offset(map, offset);
++				if (__is_virtual_pid(offset))
++					offset += VPID_DIV;
+ 				pid = mk_pid(map, offset);
+ 			/*
+ 			 * find_next_offset() found a bit, the pid from it
+@@ -130,6 +148,7 @@ int alloc_pidmap(void)
+ 	}
+ 	return -1;
+ }
++EXPORT_SYMBOL_GPL(alloc_pidmap);
+ 
+ struct pid * fastcall find_pid(enum pid_type type, int nr)
+ {
+@@ -143,6 +162,7 @@ struct pid * fastcall find_pid(enum pid_
+ 	}
+ 	return NULL;
+ }
++EXPORT_SYMBOL(find_pid);
+ 
+ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
+ {
+@@ -162,6 +182,7 @@ int fastcall attach_pid(task_t *task, en
+ 
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(attach_pid);
+ 
+ static fastcall int __detach_pid(task_t *task, enum pid_type type)
+ {
+@@ -201,13 +222,27 @@ void fastcall detach_pid(task_t *task, e
+ 		if (tmp != type && find_pid(tmp, nr))
+ 			return;
+ 
++#ifdef CONFIG_VE
++	__free_vpid(task->pids[type].vnr, VE_TASK_INFO(task)->owner_env);
++#endif
+ 	free_pidmap(nr);
+ }
++EXPORT_SYMBOL_GPL(detach_pid);
+ 
+ task_t *find_task_by_pid_type(int type, int nr)
+ {
++	BUG();
++	return NULL;
++}
++
++EXPORT_SYMBOL(find_task_by_pid_type);
++
++task_t *find_task_by_pid_type_all(int type, int nr)
++{
+ 	struct pid *pid;
+ 
++	BUG_ON(nr != -1 && is_virtual_pid(nr));
++
+ 	pid = find_pid(type, nr);
+ 	if (!pid)
+ 		return NULL;
+@@ -215,7 +250,35 @@ task_t *find_task_by_pid_type(int type, 
+ 	return pid_task(&pid->pid_list, type);
+ }
+ 
+-EXPORT_SYMBOL(find_task_by_pid_type);
++EXPORT_SYMBOL(find_task_by_pid_type_all);
++
++#ifdef CONFIG_VE
++
++task_t *find_task_by_pid_type_ve(int type, int nr)
++{
++	task_t *tsk;
++	int gnr = nr;
++	struct pid *pid;
++
++	if (is_virtual_pid(nr)) {
++		gnr = __vpid_to_pid(nr);
++		if (unlikely(gnr == -1))
++			return NULL;
++	}
++
++	pid = find_pid(type, gnr);
++	if (!pid)
++		return NULL;
++
++	tsk = pid_task(&pid->pid_list, type);
++	if (!ve_accessible(VE_TASK_INFO(tsk)->owner_env, get_exec_env()))
++		return NULL;
++	return tsk;
++}
++
++EXPORT_SYMBOL(find_task_by_pid_type_ve);
++
++#endif
+ 
+ /*
+  * This function switches the PIDs if a non-leader thread calls
+@@ -234,12 +297,16 @@ void switch_exec_pids(task_t *leader, ta
+ 
+ 	leader->pid = leader->tgid = thread->pid;
+ 	thread->pid = thread->tgid;
++	set_virt_tgid(leader, virt_pid(thread));
++	set_virt_pid(leader, virt_pid(thread));
++	set_virt_pid(thread, virt_tgid(thread));
+ 
+ 	attach_pid(thread, PIDTYPE_PID, thread->pid);
+ 	attach_pid(thread, PIDTYPE_TGID, thread->tgid);
+ 	attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
+ 	attach_pid(thread, PIDTYPE_SID, thread->signal->session);
+ 	list_add_tail(&thread->tasks, &init_task.tasks);
++	SET_VE_LINKS(thread);
+ 
+ 	attach_pid(leader, PIDTYPE_PID, leader->pid);
+ 	attach_pid(leader, PIDTYPE_TGID, leader->tgid);
+@@ -247,6 +314,362 @@ void switch_exec_pids(task_t *leader, ta
+ 	attach_pid(leader, PIDTYPE_SID, leader->signal->session);
+ }
+ 
++#ifdef CONFIG_VE
++
++/* Virtual PID bits.
++ *
++ * At the moment all internal structures in kernel store real global pid.
++ * The only place, where virtual PID is used, is at user frontend. We
++ * remap virtual pids obtained from user to global ones (vpid_to_pid) and
++ * map globals to virtuals before showing them to user (virt_pid_type).
++ *
++ * We hold virtual PIDs inside struct pid, so map global -> virtual is easy.
++ */
++
++pid_t _pid_type_to_vpid(int type, pid_t pid)
++{
++	struct pid * p;
++
++	if (unlikely(is_virtual_pid(pid)))
++		return -1;
++
++	read_lock(&tasklist_lock);
++	p = find_pid(type, pid);
++	if (p) {
++		pid = p->vnr;
++	} else {
++		pid = -1;
++	}
++	read_unlock(&tasklist_lock);
++	return pid;
++}
++EXPORT_SYMBOL_GPL(_pid_type_to_vpid);
++
++pid_t pid_type_to_vpid(int type, pid_t pid)
++{
++	int vpid;
++
++	if (unlikely(pid <= 0))
++		return pid;
++
++	BUG_ON(is_virtual_pid(pid));
++
++	if (ve_is_super(get_exec_env()))
++		return pid;
++
++	vpid = _pid_type_to_vpid(type, pid);
++	if (unlikely(vpid == -1)) {
++		/* It is allowed: global pid can be used everywhere.
++		 * This can happen, when kernel remembers stray pids:
++		 * signal queues, locks etc.
++		 */
++		vpid = pid;
++	}
++	return vpid;
++}
++EXPORT_SYMBOL_GPL(pid_type_to_vpid);
++
++/* To map virtual pids to global we maintain special hash table.
++ *
++ * Mapping entries are allocated when a process with non-trivial
++ * mapping is forked, which is possible only after VE migrated.
++ * Mappings are destroyed, when a global pid is removed from global
++ * pidmap, which means we do not need to refcount mappings.
++ */
++
++static struct hlist_head *vpid_hash;
++
++struct vpid_mapping
++{
++	int	vpid;
++	int	veid;
++	int	pid;
++	struct hlist_node link;
++	struct rcu_head rcu;
++};
++
++static kmem_cache_t *vpid_mapping_cachep;
++
++static inline int vpid_hashfn(int vnr, int veid)
++{
++	return hash_long((unsigned long)(vnr+(veid<<16)), pidhash_shift);
++}
++
++struct vpid_mapping *__lookup_vpid_mapping(int vnr, int veid)
++{
++	struct hlist_node *elem;
++	struct vpid_mapping *map;
++
++	hlist_for_each_entry_rcu(map, elem,
++			&vpid_hash[vpid_hashfn(vnr, veid)], link) {
++		if (map->vpid == vnr && map->veid == veid)
++			return map;
++	}
++	return NULL;
++}
++
++/* __vpid_to_pid() is raw version of vpid_to_pid(). It is to be used
++ * only under tasklist_lock. In some places we must use only this version
++ * (f.e. __kill_pg_info is called under write lock!)
++ *
++ * Caller should pass virtual pid. This function returns an error, when
++ * seeing a global pid.
++ */
++int __vpid_to_pid(int pid)
++{
++	struct vpid_mapping *map;
++
++	if (unlikely(!is_virtual_pid(pid) || ve_is_super(get_exec_env())))
++		return -1;
++
++	if (!get_exec_env()->sparse_vpid) {
++		if (pid != 1)
++			return pid - VPID_DIV;
++		return get_exec_env()->init_entry->pid;
++	}
++
++	map = __lookup_vpid_mapping(pid, VEID(get_exec_env()));
++	if (map)
++		return map->pid;
++	return -1;
++}
++EXPORT_SYMBOL_GPL(__vpid_to_pid);
++
++int vpid_to_pid(int pid)
++{
++	/* User gave bad pid. It is his problem. */
++	if (unlikely(pid <= 0))
++		return pid;
++
++	if (!is_virtual_pid(pid))
++		return pid;
++
++	read_lock(&tasklist_lock);
++	pid = __vpid_to_pid(pid);
++	read_unlock(&tasklist_lock);
++	return pid;
++}
++EXPORT_SYMBOL_GPL(vpid_to_pid);
++
++/* VEs which never migrated have trivial "arithmetic" mapping pid <-> vpid:
++ *
++ * vpid == 1 -> ve->init_task->pid
++ * else	        pid & ~VPID_DIV
++ *
++ * In this case VE has ve->sparse_vpid = 0 and we do not use vpid hash table.
++ *
++ * When VE migrates and we see non-trivial mapping the first time, we
++ * scan process table and populate mapping hash table.
++ */
++
++static int add_mapping(int pid, int vpid, int veid, struct hlist_head *cache)
++{
++        if (unlikely(pid <= 0 || vpid <= 0))
++		return 0;
++
++	/* VE can contain non-virtual (VE_ENTER'ed) processes when
++	 * switching to sparse mapping. We should not create mappings
++	 * for them. */
++	if (unlikely(!__is_virtual_pid(vpid) && vpid != 1)) {
++		printk("DEBUG (do not worry, but report): non-virtual pid while switching mode %d %d\n", pid, vpid);
++		return 0;
++	}
++
++	if (!__lookup_vpid_mapping(vpid, veid)) {
++		struct vpid_mapping *m;
++		if (hlist_empty(cache)) {
++			m = kmem_cache_alloc(vpid_mapping_cachep, GFP_ATOMIC);
++			if (unlikely(m == NULL))
++				return -ENOMEM;
++		} else {
++			m = hlist_entry(cache->first, struct vpid_mapping, link);
++			hlist_del_rcu(&m->link);
++		}
++		m->pid = pid;
++		m->vpid = vpid;
++		m->veid = veid;
++		hlist_add_head_rcu(&m->link,
++			       &vpid_hash[vpid_hashfn(vpid, veid)]);
++	}
++	return 0;
++}
++
++static int switch_to_sparse_mapping(int pid)
++{
++	struct ve_struct *env = get_exec_env();
++	struct hlist_head cache;
++	task_t *g, *t;
++	int pcount;
++	int err;
++
++	/* Transition happens under write_lock_irq, so we try to make
++	 * it more reliable and fast preallocating mapping entries.
++	 * pcounter may be not enough, we could have lots of orphaned
++	 * process groups and sessions, which also require mappings.
++	 */
++	INIT_HLIST_HEAD(&cache);
++	pcount = atomic_read(&env->pcounter);
++	err = -ENOMEM;
++	while (pcount > 0) {
++		struct vpid_mapping *m;
++		m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
++		if (!m)
++			goto out;
++		hlist_add_head(&m->link, &cache);
++		pcount--;
++	}
++
++	write_lock_irq(&tasklist_lock);
++	err = 0;
++	if (env->sparse_vpid)
++		goto out_unlock;
++
++	err = -ENOMEM;
++	do_each_thread_ve(g, t) {
++		if (t->pid == pid)
++			continue;
++		if (add_mapping(t->pid, virt_pid(t), VEID(env), &cache))
++			goto out_unlock;
++	} while_each_thread_ve(g, t);
++
++	for_each_process_ve(t) {
++		if (t->pid == pid)
++			continue;
++
++		if (add_mapping(t->tgid, virt_tgid(t), VEID(env), &cache))
++			goto out_unlock;
++		if (add_mapping(t->signal->pgrp, virt_pgid(t), VEID(env), &cache))
++			goto out_unlock;
++		if (add_mapping(t->signal->session, virt_sid(t), VEID(env), &cache))
++			goto out_unlock;
++	}
++	env->sparse_vpid = 1;
++	err = 0;
++
++out_unlock:
++	if (err) {
++		int i;
++
++		for (i=0; i<(1<<pidhash_shift); i++) {
++			struct hlist_node *elem, *next;
++			struct vpid_mapping *map;
++
++			hlist_for_each_entry_safe(map, elem, next, &vpid_hash[i], link) {
++				if (map->veid == VEID(env)) {
++					hlist_del(elem);
++					hlist_add_head(elem, &cache);
++				}
++			}
++		}
++	}
++	write_unlock_irq(&tasklist_lock);
++
++out:
++	while (!hlist_empty(&cache)) {
++		struct vpid_mapping *m;
++		m = hlist_entry(cache.first, struct vpid_mapping, link);
++		hlist_del_rcu(&m->link);
++		kmem_cache_free(vpid_mapping_cachep, m);
++	}
++	return err;
++}
++
++int alloc_vpid(int pid, int virt_pid)
++{
++	int result;
++	struct vpid_mapping *m;
++	struct ve_struct *env = get_exec_env();
++
++	if (ve_is_super(env) || !env->virt_pids)
++		return pid;
++
++	if (!env->sparse_vpid) {
++		if (virt_pid == -1)
++			return pid + VPID_DIV;
++
++		if (virt_pid == 1 || virt_pid == pid + VPID_DIV)
++			return virt_pid;
++
++		if ((result = switch_to_sparse_mapping(pid)) < 0)
++			return result;
++	}
++
++	m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
++	if (!m)
++		return -ENOMEM;
++
++	m->pid = pid;
++	m->veid = VEID(env);
++
++	result = (virt_pid == -1) ? pid + VPID_DIV : virt_pid;
++
++	write_lock_irq(&tasklist_lock);
++	if (unlikely(__lookup_vpid_mapping(result, m->veid))) {
++		if (virt_pid > 0) {
++			result = -EEXIST;
++			goto out;
++		}
++
++		/* No luck. Now we search for some not-existing vpid.
++		 * It is weak place. We do linear search. */
++		do {
++			result++;
++			if (!__is_virtual_pid(result))
++				result += VPID_DIV;
++			if (result >= pid_max)
++				result = RESERVED_PIDS + VPID_DIV;
++		} while (__lookup_vpid_mapping(result, m->veid) != NULL);
++
++		/* And set last_pid in hope future alloc_pidmap to avoid
++		 * collisions after future alloc_pidmap() */
++		last_pid = result - VPID_DIV;
++	}
++	if (result > 0) {
++		m->vpid = result;
++		hlist_add_head_rcu(&m->link,
++			       &vpid_hash[vpid_hashfn(result, m->veid)]);
++	}
++out:
++	write_unlock_irq(&tasklist_lock);
++	if (result < 0)
++		kmem_cache_free(vpid_mapping_cachep, m);
++	return result;
++}
++EXPORT_SYMBOL(alloc_vpid);
++
++static void vpid_free_cb(struct rcu_head *rhp)
++{
++	struct vpid_mapping *m;
++
++	m = container_of(rhp, struct vpid_mapping, rcu);
++	kmem_cache_free(vpid_mapping_cachep, m);
++}
++
++static void __free_vpid(int vpid, struct ve_struct *ve)
++{
++	struct vpid_mapping *m;
++
++	if (!ve->sparse_vpid)
++		return;
++
++	if (!__is_virtual_pid(vpid) && (vpid != 1 || ve_is_super(ve)))
++		return;
++
++	m = __lookup_vpid_mapping(vpid, ve->veid);
++	BUG_ON(m == NULL);
++	hlist_del_rcu(&m->link);
++	call_rcu(&m->rcu, vpid_free_cb);
++}
++
++void free_vpid(int vpid, struct ve_struct *ve)
++{
++	write_lock_irq(&tasklist_lock);
++	__free_vpid(vpid, ve);
++	write_unlock_irq(&tasklist_lock);
++}
++EXPORT_SYMBOL(free_vpid);
++#endif
++
+ /*
+  * The pid hash table is scaled according to the amount of memory in the
+  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
+@@ -273,6 +696,14 @@ void __init pidhash_init(void)
+ 		for (j = 0; j < pidhash_size; j++)
+ 			INIT_HLIST_HEAD(&pid_hash[i][j]);
+ 	}
++
++#ifdef CONFIG_VE
++	vpid_hash = alloc_bootmem(pidhash_size * sizeof(struct hlist_head));
++	if (!vpid_hash)
++		panic("Could not alloc vpid_hash!\n");
++	for (j = 0; j < pidhash_size; j++)
++		INIT_HLIST_HEAD(&vpid_hash[j]);
++#endif
+ }
+ 
+ void __init pidmap_init(void)
+@@ -289,4 +720,12 @@ void __init pidmap_init(void)
+ 
+ 	for (i = 0; i < PIDTYPE_MAX; i++)
+ 		attach_pid(current, i, 0);
++
++#ifdef CONFIG_VE
++	vpid_mapping_cachep =
++		kmem_cache_create("vpid_mapping",
++				  sizeof(struct vpid_mapping),
++				  __alignof__(struct vpid_mapping),
++				  SLAB_PANIC|SLAB_UBC, NULL, NULL);
++#endif
+ }
+diff -upr linux-2.6.16.orig/kernel/posix-cpu-timers.c linux-2.6.16-026test015/kernel/posix-cpu-timers.c
+--- linux-2.6.16.orig/kernel/posix-cpu-timers.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/posix-cpu-timers.c	2006-07-04 14:41:38.000000000 +0400
+@@ -20,7 +20,7 @@ static int check_clock(const clockid_t w
+ 		return 0;
+ 
+ 	read_lock(&tasklist_lock);
+-	p = find_task_by_pid(pid);
++	p = find_task_by_pid_ve(pid);
+ 	if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
+ 		   p->tgid != current->tgid : p->tgid != pid)) {
+ 		error = -EINVAL;
+@@ -292,7 +292,7 @@ int posix_cpu_clock_get(const clockid_t 
+ 		 */
+ 		struct task_struct *p;
+ 		read_lock(&tasklist_lock);
+-		p = find_task_by_pid(pid);
++		p = find_task_by_pid_ve(pid);
+ 		if (p) {
+ 			if (CPUCLOCK_PERTHREAD(which_clock)) {
+ 				if (p->tgid == current->tgid) {
+@@ -336,7 +336,7 @@ int posix_cpu_timer_create(struct k_itim
+ 		if (pid == 0) {
+ 			p = current;
+ 		} else {
+-			p = find_task_by_pid(pid);
++			p = find_task_by_pid_ve(pid);
+ 			if (p && p->tgid != current->tgid)
+ 				p = NULL;
+ 		}
+@@ -344,7 +344,7 @@ int posix_cpu_timer_create(struct k_itim
+ 		if (pid == 0) {
+ 			p = current->group_leader;
+ 		} else {
+-			p = find_task_by_pid(pid);
++			p = find_task_by_pid_ve(pid);
+ 			if (p && p->tgid != pid)
+ 				p = NULL;
+ 		}
+@@ -1173,6 +1173,9 @@ static void check_process_timers(struct 
+ 		}
+ 		t = tsk;
+ 		do {
++			if (unlikely(t->flags & PF_EXITING))
++				continue;
++
+ 			ticks = cputime_add(cputime_add(t->utime, t->stime),
+ 					    prof_left);
+ 			if (!cputime_eq(prof_expires, cputime_zero) &&
+@@ -1193,11 +1196,7 @@ static void check_process_timers(struct 
+ 					      t->it_sched_expires > sched)) {
+ 				t->it_sched_expires = sched;
+ 			}
+-
+-			do {
+-				t = next_thread(t);
+-			} while (unlikely(t->flags & PF_EXITING));
+-		} while (t != tsk);
++		} while ((t = next_thread(t)) != tsk);
+ 	}
+ }
+ 
+@@ -1289,30 +1288,30 @@ void run_posix_cpu_timers(struct task_st
+ 
+ #undef	UNEXPIRED
+ 
+-	BUG_ON(tsk->exit_state);
+-
+ 	/*
+ 	 * Double-check with locks held.
+ 	 */
+ 	read_lock(&tasklist_lock);
+-	spin_lock(&tsk->sighand->siglock);
++	if (likely(tsk->signal != NULL)) {
++		spin_lock(&tsk->sighand->siglock);
+ 
+-	/*
+-	 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
+-	 * all the timers that are firing, and put them on the firing list.
+-	 */
+-	check_thread_timers(tsk, &firing);
+-	check_process_timers(tsk, &firing);
++		/*
++		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
++		 * all the timers that are firing, and put them on the firing list.
++		 */
++		check_thread_timers(tsk, &firing);
++		check_process_timers(tsk, &firing);
+ 
+-	/*
+-	 * We must release these locks before taking any timer's lock.
+-	 * There is a potential race with timer deletion here, as the
+-	 * siglock now protects our private firing list.  We have set
+-	 * the firing flag in each timer, so that a deletion attempt
+-	 * that gets the timer lock before we do will give it up and
+-	 * spin until we've taken care of that timer below.
+-	 */
+-	spin_unlock(&tsk->sighand->siglock);
++		/*
++		 * We must release these locks before taking any timer's lock.
++		 * There is a potential race with timer deletion here, as the
++		 * siglock now protects our private firing list.  We have set
++		 * the firing flag in each timer, so that a deletion attempt
++		 * that gets the timer lock before we do will give it up and
++		 * spin until we've taken care of that timer below.
++		 */
++		spin_unlock(&tsk->sighand->siglock);
++	}
+ 	read_unlock(&tasklist_lock);
+ 
+ 	/*
+diff -upr linux-2.6.16.orig/kernel/posix-timers.c linux-2.6.16-026test015/kernel/posix-timers.c
+--- linux-2.6.16.orig/kernel/posix-timers.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/posix-timers.c	2006-07-04 14:41:38.000000000 +0400
+@@ -31,6 +31,7 @@
+  * POSIX clocks & timers
+  */
+ #include <linux/mm.h>
++#include <linux/module.h>
+ #include <linux/smp_lock.h>
+ #include <linux/interrupt.h>
+ #include <linux/slab.h>
+@@ -48,6 +49,8 @@
+ #include <linux/workqueue.h>
+ #include <linux/module.h>
+ 
++#include <ub/beancounter.h>
++
+ /*
+  * Management arrays for POSIX timers.	 Timers are kept in slab memory
+  * Timer ids are allocated by an external routine that keeps track of the
+@@ -241,7 +244,8 @@ static __init int init_posix_timers(void
+ 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
+ 
+ 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
+-					sizeof (struct k_itimer), 0, 0, NULL, NULL);
++					sizeof (struct k_itimer), 0,
++					SLAB_UBC, NULL, NULL);
+ 	idr_init(&posix_timers_id);
+ 	return 0;
+ }
+@@ -294,6 +298,13 @@ void do_schedule_next_timer(struct sigin
+ 
+ int posix_timer_event(struct k_itimer *timr,int si_private)
+ {
++	int ret;
++	struct ve_struct *ve;
++	struct user_beancounter *ub;
++
++	ve = set_exec_env(timr->it_process->ve_task_info.owner_env);
++	ub = set_exec_ub(timr->it_process->task_bc.task_ub);
++
+ 	memset(&timr->sigq->info, 0, sizeof(siginfo_t));
+ 	timr->sigq->info.si_sys_private = si_private;
+ 	/* Send signal to the process that owns this timer.*/
+@@ -306,11 +317,11 @@ int posix_timer_event(struct k_itimer *t
+ 
+ 	if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
+ 		struct task_struct *leader;
+-		int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
++		ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
+ 					timr->it_process);
+ 
+ 		if (likely(ret >= 0))
+-			return ret;
++			goto out;
+ 
+ 		timr->it_sigev_notify = SIGEV_SIGNAL;
+ 		leader = timr->it_process->group_leader;
+@@ -318,8 +329,12 @@ int posix_timer_event(struct k_itimer *t
+ 		timr->it_process = leader;
+ 	}
+ 
+-	return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
++	ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
+ 				   timr->it_process);
++out:
++	(void)set_exec_ub(ub);
++	(void)set_exec_env(ve);
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(posix_timer_event);
+ 
+@@ -366,7 +381,7 @@ static struct task_struct * good_sigeven
+ 	struct task_struct *rtn = current->group_leader;
+ 
+ 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+-		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
++		(!(rtn = find_task_by_pid_ve(event->sigev_notify_thread_id)) ||
+ 		 rtn->tgid != current->tgid ||
+ 		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+ 		return NULL;
+diff -upr linux-2.6.16.orig/kernel/power/Kconfig linux-2.6.16-026test015/kernel/power/Kconfig
+--- linux-2.6.16.orig/kernel/power/Kconfig	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/power/Kconfig	2006-07-04 14:41:39.000000000 +0400
+@@ -38,7 +38,7 @@ config PM_DEBUG
+ 
+ config SOFTWARE_SUSPEND
+ 	bool "Software Suspend"
+-	depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)
++	depends on PM && SWAP && X86 || ((FRV || PPC32) && !SMP)
+ 	---help---
+ 	  Enable the possibility of suspending the machine.
+ 	  It doesn't need APM.
+diff -upr linux-2.6.16.orig/kernel/power/process.c linux-2.6.16-026test015/kernel/power/process.c
+--- linux-2.6.16.orig/kernel/power/process.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/power/process.c	2006-07-04 14:41:39.000000000 +0400
+@@ -38,18 +38,23 @@ void refrigerator(void)
+ 	   processes around? */
+ 	long save;
+ 	save = current->state;
++	current->state = TASK_UNINTERRUPTIBLE;
+ 	pr_debug("%s entered refrigerator\n", current->comm);
+-	printk("=");
++	/* printk("="); */
+ 
+-	frozen_process(current);
+ 	spin_lock_irq(&current->sighand->siglock);
+-	recalc_sigpending(); /* We sent fake signal, clean it up */
++	if (test_and_clear_thread_flag(TIF_FREEZE)) {
++		recalc_sigpending(); /* We sent fake signal, clean it up */
++		current->flags |= PF_FROZEN;
++	} else {
++		/* Freeze request could be canceled before we entered
++		 * refrigerator(). In this case we do nothing. */
++		current->state = save;
++	}
+ 	spin_unlock_irq(&current->sighand->siglock);
+ 
+-	while (frozen(current)) {
+-		current->state = TASK_UNINTERRUPTIBLE;
++	while (current->flags & PF_FROZEN)
+ 		schedule();
+-	}
+ 	pr_debug("%s left refrigerator\n", current->comm);
+ 	current->state = save;
+ }
+@@ -67,7 +72,7 @@ int freeze_processes(void)
+ 	do {
+ 		todo = 0;
+ 		read_lock(&tasklist_lock);
+-		do_each_thread(g, p) {
++		do_each_thread_all(g, p) {
+ 			if (!freezeable(p))
+ 				continue;
+ 			if (frozen(p))
+@@ -78,7 +83,7 @@ int freeze_processes(void)
+ 			signal_wake_up(p, 0);
+ 			spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ 			todo++;
+-		} while_each_thread(g, p);
++		} while_each_thread_all(g, p);
+ 		read_unlock(&tasklist_lock);
+ 		yield();			/* Yield is okay here */
+ 		if (todo && time_after(jiffies, start_time + TIMEOUT)) {
+@@ -95,15 +100,15 @@ int freeze_processes(void)
+ 	 */
+ 	if (todo) {
+ 		read_lock(&tasklist_lock);
+-		do_each_thread(g, p)
++		do_each_thread_all(g, p)
+ 			if (freezing(p)) {
+ 				pr_debug("  clean up: %s\n", p->comm);
+-				p->flags &= ~PF_FREEZE;
+ 				spin_lock_irqsave(&p->sighand->siglock, flags);
++				clear_tsk_thread_flag(p, TIF_FREEZE);
+ 				recalc_sigpending_tsk(p);
+ 				spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ 			}
+-		while_each_thread(g, p);
++		while_each_thread_all(g, p);
+ 		read_unlock(&tasklist_lock);
+ 		return todo;
+ 	}
+@@ -119,12 +124,12 @@ void thaw_processes(void)
+ 
+ 	printk( "Restarting tasks..." );
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		if (!freezeable(p))
+ 			continue;
+ 		if (!thaw_process(p))
+ 			printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 
+ 	read_unlock(&tasklist_lock);
+ 	schedule();
+diff -upr linux-2.6.16.orig/kernel/printk.c linux-2.6.16-026test015/kernel/printk.c
+--- linux-2.6.16.orig/kernel/printk.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/printk.c	2006-07-04 14:41:38.000000000 +0400
+@@ -30,7 +30,9 @@
+ #include <linux/smp.h>
+ #include <linux/security.h>
+ #include <linux/bootmem.h>
++#include <linux/vzratelimit.h>
+ #include <linux/syscalls.h>
++#include <linux/veprintk.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -83,7 +85,7 @@ static int console_locked;
+  * It is also used in interesting ways to provide interlocking in
+  * release_console_sem().
+  */
+-static DEFINE_SPINLOCK(logbuf_lock);
++DEFINE_SPINLOCK(logbuf_lock);
+ 
+ #define LOG_BUF_MASK	(log_buf_len-1)
+ #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
+@@ -114,6 +116,7 @@ static int preferred_console = -1;
+ 
+ /* Flag: console code may call schedule() */
+ static int console_may_schedule;
++int console_silence_loglevel;
+ 
+ #ifdef CONFIG_PRINTK
+ 
+@@ -160,6 +163,19 @@ static int __init console_setup(char *st
+ 
+ __setup("console=", console_setup);
+ 
++static int __init setup_console_silencelevel(char *str)
++{
++	int level;
++
++	if (get_option(&str, &level) != 1)
++		return 0;
++
++	console_silence_loglevel = level;
++	return 1;
++}
++
++__setup("silencelevel=", setup_console_silencelevel);
++
+ static int __init log_buf_len_setup(char *str)
+ {
+ 	unsigned long size = memparse(str, &str);
+@@ -223,6 +239,10 @@ int do_syslog(int type, char __user *buf
+ 	char c;
+ 	int error = 0;
+ 
++	if (!ve_is_super(get_exec_env()) &&
++			(type == 6 || type == 7 || type == 8))
++		goto out;
++
+ 	error = security_syslog(type);
+ 	if (error)
+ 		return error;
+@@ -243,15 +263,15 @@ int do_syslog(int type, char __user *buf
+ 			error = -EFAULT;
+ 			goto out;
+ 		}
+-		error = wait_event_interruptible(log_wait,
+-							(log_start - log_end));
++		error = wait_event_interruptible(ve_log_wait,
++						(ve_log_start - ve_log_end));
+ 		if (error)
+ 			goto out;
+ 		i = 0;
+ 		spin_lock_irq(&logbuf_lock);
+-		while (!error && (log_start != log_end) && i < len) {
+-			c = LOG_BUF(log_start);
+-			log_start++;
++		while (!error && (ve_log_start != ve_log_end) && i < len) {
++			c = VE_LOG_BUF(ve_log_start);
++			ve_log_start++;
+ 			spin_unlock_irq(&logbuf_lock);
+ 			error = __put_user(c,buf);
+ 			buf++;
+@@ -277,15 +297,17 @@ int do_syslog(int type, char __user *buf
+ 			error = -EFAULT;
+ 			goto out;
+ 		}
++		if (ve_log_buf == NULL)
++			goto out;
+ 		count = len;
+-		if (count > log_buf_len)
+-			count = log_buf_len;
++		if (count > ve_log_buf_len)
++			count = ve_log_buf_len;
+ 		spin_lock_irq(&logbuf_lock);
+-		if (count > logged_chars)
+-			count = logged_chars;
++		if (count > ve_logged_chars)
++			count = ve_logged_chars;
+ 		if (do_clear)
+-			logged_chars = 0;
+-		limit = log_end;
++			ve_logged_chars = 0;
++		limit = ve_log_end;
+ 		/*
+ 		 * __put_user() could sleep, and while we sleep
+ 		 * printk() could overwrite the messages
+@@ -294,9 +316,9 @@ int do_syslog(int type, char __user *buf
+ 		 */
+ 		for (i = 0; i < count && !error; i++) {
+ 			j = limit-1-i;
+-			if (j + log_buf_len < log_end)
++			if (j + ve_log_buf_len < ve_log_end)
+ 				break;
+-			c = LOG_BUF(j);
++			c = VE_LOG_BUF(j);
+ 			spin_unlock_irq(&logbuf_lock);
+ 			error = __put_user(c,&buf[count-1-i]);
+ 			cond_resched();
+@@ -320,7 +342,7 @@ int do_syslog(int type, char __user *buf
+ 		}
+ 		break;
+ 	case 5:		/* Clear ring buffer */
+-		logged_chars = 0;
++		ve_logged_chars = 0;
+ 		break;
+ 	case 6:		/* Disable logging to console */
+ 		console_loglevel = minimum_console_loglevel;
+@@ -338,10 +360,10 @@ int do_syslog(int type, char __user *buf
+ 		error = 0;
+ 		break;
+ 	case 9:		/* Number of chars in the log buffer */
+-		error = log_end - log_start;
++		error = ve_log_end - ve_log_start;
+ 		break;
+ 	case 10:	/* Size of the log buffer */
+-		error = log_buf_len;
++		error = ve_log_buf_len;
+ 		break;
+ 	default:
+ 		error = -EINVAL;
+@@ -439,14 +461,14 @@ static void call_console_drivers(unsigne
+ 
+ static void emit_log_char(char c)
+ {
+-	LOG_BUF(log_end) = c;
+-	log_end++;
+-	if (log_end - log_start > log_buf_len)
+-		log_start = log_end - log_buf_len;
+-	if (log_end - con_start > log_buf_len)
+-		con_start = log_end - log_buf_len;
+-	if (logged_chars < log_buf_len)
+-		logged_chars++;
++	VE_LOG_BUF(ve_log_end) = c;
++	ve_log_end++;
++	if (ve_log_end - ve_log_start > ve_log_buf_len)
++		ve_log_start = ve_log_end - ve_log_buf_len;
++	if (ve_is_super(get_exec_env()) && ve_log_end - con_start > ve_log_buf_len)
++		con_start = ve_log_end - ve_log_buf_len;
++	if (ve_logged_chars < ve_log_buf_len)
++		ve_logged_chars++;
+ }
+ 
+ /*
+@@ -511,6 +533,30 @@ __attribute__((weak)) unsigned long long
+  * printf(3)
+  */
+ 
++static inline int ve_log_init(void)
++{
++#ifdef CONFIG_VE
++	if (ve_log_buf != NULL)
++		return 0;
++
++	if (ve_is_super(get_exec_env())) {
++		ve0._log_wait = &log_wait;
++		ve0._log_start = &log_start;
++		ve0._log_end = &log_end;
++		ve0._logged_chars = &logged_chars;
++		ve0.log_buf = log_buf;
++		return 0;
++	}
++
++	ve_log_buf = kmalloc(ve_log_buf_len, GFP_ATOMIC);
++	if (!ve_log_buf)
++		return -ENOMEM;
++
++	memset(ve_log_buf, 0, ve_log_buf_len);
++#endif
++	return 0;
++}
++
+ asmlinkage int printk(const char *fmt, ...)
+ {
+ 	va_list args;
+@@ -526,13 +572,14 @@ asmlinkage int printk(const char *fmt, .
+ /* cpu currently holding logbuf_lock */
+ static volatile unsigned int printk_cpu = UINT_MAX;
+ 
+-asmlinkage int vprintk(const char *fmt, va_list args)
++asmlinkage int __vprintk(const char *fmt, va_list args)
+ {
+ 	unsigned long flags;
+ 	int printed_len;
+ 	char *p;
+ 	static char printk_buf[1024];
+ 	static int log_level_unknown = 1;
++	int err, need_wake;
+ 
+ 	preempt_disable();
+ 	if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
+@@ -544,6 +591,12 @@ asmlinkage int vprintk(const char *fmt, 
+ 	spin_lock_irqsave(&logbuf_lock, flags);
+ 	printk_cpu = smp_processor_id();
+ 
++	err = ve_log_init();
++	if (err) {
++		spin_unlock_irqrestore(&logbuf_lock, flags);
++		return err;
++	}
++
+ 	/* Emit the output into the temporary buffer */
+ 	printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+ 
+@@ -615,7 +668,12 @@ asmlinkage int vprintk(const char *fmt, 
+ 		spin_unlock_irqrestore(&logbuf_lock, flags);
+ 		goto out;
+ 	}
+-	if (!down_trylock(&console_sem)) {
++	if (!ve_is_super(get_exec_env())) {
++		need_wake = (ve_log_start != ve_log_end);
++		spin_unlock_irqrestore(&logbuf_lock, flags);
++		if (!oops_in_progress && need_wake)
++			wake_up_interruptible(&ve_log_wait);
++	} else if (!down_trylock(&console_sem)) {
+ 		console_locked = 1;
+ 		/*
+ 		 * We own the drivers.  We can drop the spinlock and let
+@@ -641,6 +699,38 @@ out:
+ EXPORT_SYMBOL(printk);
+ EXPORT_SYMBOL(vprintk);
+ 
++asmlinkage int vprintk(const char *fmt, va_list args)
++{
++	int i;
++	struct ve_struct *env;
++
++	env = set_exec_env(get_ve0());
++	i = __vprintk(fmt, args);
++	set_exec_env(env);
++	return i;
++}
++
++asmlinkage int ve_printk(int dst, const char *fmt, ...)
++{
++	va_list args;
++	int printed_len;
++
++	printed_len = 0;
++	if (ve_is_super(get_exec_env()) || (dst & VE0_LOG)) {
++		va_start(args, fmt);
++		printed_len = vprintk(fmt, args);
++		va_end(args);
++	}
++	if (!ve_is_super(get_exec_env()) && (dst & VE_LOG)) {
++		va_start(args, fmt);
++		printed_len = __vprintk(fmt, args);
++		va_end(args);
++	}
++	return printed_len;
++}
++EXPORT_SYMBOL(ve_printk);
++
++
+ #else
+ 
+ asmlinkage long sys_syslog(int type, char __user *buf, int len)
+@@ -732,6 +822,12 @@ int is_console_locked(void)
+ }
+ EXPORT_SYMBOL(is_console_locked);
+ 
++void wake_up_klogd(void)
++{
++	if (!oops_in_progress && waitqueue_active(&log_wait))
++		wake_up_interruptible(&log_wait);
++}
++
+ /**
+  * release_console_sem - unlock the console system
+  *
+@@ -768,8 +864,8 @@ void release_console_sem(void)
+ 	console_may_schedule = 0;
+ 	up(&console_sem);
+ 	spin_unlock_irqrestore(&logbuf_lock, flags);
+-	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
+-		wake_up_interruptible(&log_wait);
++	if (wake_klogd)
++		wake_up_klogd();
+ }
+ EXPORT_SYMBOL(release_console_sem);
+ 
+@@ -1049,3 +1145,33 @@ int printk_ratelimit(void)
+ 				printk_ratelimit_burst);
+ }
+ EXPORT_SYMBOL(printk_ratelimit);
++
++/*
++ *	Rate limiting stuff.
++ */
++int vz_ratelimit(struct vz_rate_info *p)
++{
++	unsigned long cjif, djif;
++	unsigned long flags;
++	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
++	long new_bucket;
++
++	spin_lock_irqsave(&ratelimit_lock, flags);
++	cjif = jiffies;
++	djif = cjif - p->last;
++	if (djif < p->interval) {
++		if (p->bucket >= p->burst) {
++			spin_unlock_irqrestore(&ratelimit_lock, flags);
++			return 0;
++		}
++		p->bucket++;
++	} else {
++		new_bucket = p->bucket - (djif / (unsigned)p->interval);
++		if (new_bucket < 0)
++			new_bucket = 0;
++		p->bucket = new_bucket + 1;
++	}
++	p->last = cjif;
++	spin_unlock_irqrestore(&ratelimit_lock, flags);
++	return 1;
++}
+diff -upr linux-2.6.16.orig/kernel/ptrace.c linux-2.6.16-026test015/kernel/ptrace.c
+--- linux-2.6.16.orig/kernel/ptrace.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/ptrace.c	2006-07-04 14:41:39.000000000 +0400
+@@ -57,10 +57,6 @@ void ptrace_untrace(task_t *child)
+ 			signal_wake_up(child, 1);
+ 		}
+ 	}
+-	if (child->signal->flags & SIGNAL_GROUP_EXIT) {
+-		sigaddset(&child->pending.signal, SIGKILL);
+-		signal_wake_up(child, 1);
+-	}
+ 	spin_unlock(&child->sighand->siglock);
+ }
+ 
+@@ -82,7 +78,8 @@ void __ptrace_unlink(task_t *child)
+ 		SET_LINKS(child);
+ 	}
+ 
+-	ptrace_untrace(child);
++	if (child->state == TASK_TRACED)
++		ptrace_untrace(child);
+ }
+ 
+ /*
+@@ -136,7 +133,10 @@ static int may_attach(struct task_struct
+ 	smp_rmb();
+ 	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+ 		return -EPERM;
+-
++	if (!task->mm->vps_dumpable && !ve_is_super(get_exec_env()))
++		return -EPERM;
++	if (!ve_accessible(VE_TASK_INFO(task)->owner_env, get_exec_env()))
++		return -EPERM;
+ 	return security_ptrace(current, task);
+ }
+ 
+@@ -152,12 +152,34 @@ int ptrace_may_attach(struct task_struct
+ int ptrace_attach(struct task_struct *task)
+ {
+ 	int retval;
+-	task_lock(task);
++
+ 	retval = -EPERM;
+ 	if (task->pid <= 1)
+-		goto bad;
++		goto out;
+ 	if (task->tgid == current->tgid)
+-		goto bad;
++		goto out;
++
++repeat:
++	/*
++	 * Nasty, nasty.
++	 *
++	 * We want to hold both the task-lock and the
++	 * tasklist_lock for writing at the same time.
++	 * But that's against the rules (tasklist_lock
++	 * is taken for reading by interrupts on other
++	 * cpu's that may have task_lock).
++	 */
++	task_lock(task);
++	local_irq_disable();
++	if (!write_trylock(&tasklist_lock)) {
++		local_irq_enable();
++		task_unlock(task);
++		do {
++			cpu_relax();
++		} while (!write_can_lock(&tasklist_lock));
++		goto repeat;
++	}
++
+ 	/* the same process cannot be attached many times */
+ 	if (task->ptrace & PT_PTRACED)
+ 		goto bad;
+@@ -170,17 +192,15 @@ int ptrace_attach(struct task_struct *ta
+ 				      ? PT_ATTACHED : 0);
+ 	if (capable(CAP_SYS_PTRACE))
+ 		task->ptrace |= PT_PTRACE_CAP;
+-	task_unlock(task);
+ 
+-	write_lock_irq(&tasklist_lock);
+ 	__ptrace_link(task, current);
+-	write_unlock_irq(&tasklist_lock);
+ 
+ 	force_sig_specific(SIGSTOP, task);
+-	return 0;
+ 
+ bad:
++	write_unlock_irq(&tasklist_lock);
+ 	task_unlock(task);
++out:
+ 	return retval;
+ }
+ 
+@@ -263,6 +283,7 @@ int access_process_vm(struct task_struct
+ 	
+ 	return buf - old_buf;
+ }
++EXPORT_SYMBOL_GPL(access_process_vm);
+ 
+ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
+ {
+@@ -421,21 +442,22 @@ int ptrace_request(struct task_struct *c
+  */
+ int ptrace_traceme(void)
+ {
+-	int ret;
++	int ret = -EPERM;
+ 
+ 	/*
+ 	 * Are we already being traced?
+ 	 */
+-	if (current->ptrace & PT_PTRACED)
+-		return -EPERM;
+-	ret = security_ptrace(current->parent, current);
+-	if (ret)
+-		return -EPERM;
+-	/*
+-	 * Set the ptrace bit in the process ptrace flags.
+-	 */
+-	current->ptrace |= PT_PTRACED;
+-	return 0;
++	task_lock(current);
++	if (!(current->ptrace & PT_PTRACED)) {
++		ret = security_ptrace(current->parent, current);
++		/*
++		 * Set the ptrace bit in the process ptrace flags.
++		 */
++		if (!ret)
++			current->ptrace |= PT_PTRACED;
++	}
++	task_unlock(current);
++	return ret;
+ }
+ 
+ /**
+@@ -459,7 +481,7 @@ struct task_struct *ptrace_get_task_stru
+ 		return ERR_PTR(-EPERM);
+ 
+ 	read_lock(&tasklist_lock);
+-	child = find_task_by_pid(pid);
++	child = find_task_by_pid_ve(pid);
+ 	if (child)
+ 		get_task_struct(child);
+ 	read_unlock(&tasklist_lock);
+diff -upr linux-2.6.16.orig/kernel/sched.c linux-2.6.16-026test015/kernel/sched.c
+--- linux-2.6.16.orig/kernel/sched.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/sched.c	2006-07-04 14:41:39.000000000 +0400
+@@ -49,6 +49,8 @@
+ #include <linux/syscalls.h>
+ #include <linux/times.h>
+ #include <linux/acct.h>
++#include <linux/vsched.h>
++#include <linux/fairsched.h>
+ #include <asm/tlb.h>
+ 
+ #include <asm/unistd.h>
+@@ -134,7 +136,7 @@
+ #ifdef CONFIG_SMP
+ #define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
+ 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
+-			num_online_cpus())
++			vsched_num_online_vcpus(task_vsched(p)))
+ #else
+ #define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
+ 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
+@@ -199,6 +201,7 @@ struct prio_array {
+  * (such as the load balancing or the thread migration code), lock
+  * acquire operations must be ordered by ascending &runqueue.
+  */
++typedef struct vcpu_info *vcpu_t;
+ struct runqueue {
+ 	spinlock_t lock;
+ 
+@@ -220,9 +223,12 @@ struct runqueue {
+ 	 */
+ 	unsigned long nr_uninterruptible;
+ 
++	unsigned long nr_sleeping;
++	unsigned long nr_stopped;
++
+ 	unsigned long expired_timestamp;
+ 	unsigned long long timestamp_last_tick;
+-	task_t *curr, *idle;
++	task_t *curr;
+ 	struct mm_struct *prev_mm;
+ 	prio_array_t *active, *expired, arrays[2];
+ 	int best_expired_prio;
+@@ -233,11 +239,12 @@ struct runqueue {
+ 
+ 	/* For active balancing */
+ 	int active_balance;
+-	int push_cpu;
++#endif
++	vcpu_t push_cpu;
+ 
+ 	task_t *migration_thread;
+ 	struct list_head migration_queue;
+-#endif
++	int cpu;
+ 
+ #ifdef CONFIG_SCHEDSTATS
+ 	/* latency stats */
+@@ -260,7 +267,51 @@ struct runqueue {
+ #endif
+ };
+ 
+-static DEFINE_PER_CPU(struct runqueue, runqueues);
++/* VCPU scheduler state description */
++struct vcpu_info;
++struct vcpu_scheduler {
++	struct list_head idle_list;
++	struct list_head active_list;
++	struct list_head running_list;
++#ifdef CONFIG_FAIRSCHED
++	struct fairsched_node *node;
++#endif
++	struct vcpu_info *vcpu[NR_CPUS];
++	int id;
++	cpumask_t vcpu_online_map, vcpu_running_map;
++	cpumask_t pcpu_running_map;
++	int num_online_vcpus;
++} ____cacheline_internodealigned_in_smp;
++
++/* virtual CPU description */
++struct vcpu_info {
++	struct runqueue rq;
++#ifdef CONFIG_SCHED_VCPU
++	unsigned active : 1,
++		 running : 1;
++	struct list_head list;
++	struct vcpu_scheduler *vsched;
++	int last_pcpu;
++	u32 start_time;
++#endif
++	int id;
++} ____cacheline_internodealigned_in_smp;
++
++/* physical CPU description */
++struct pcpu_info {
++	struct vcpu_scheduler *vsched;
++	struct vcpu_info *vcpu;
++	task_t *idle;
++#ifdef CONFIG_SMP
++	struct sched_domain *sd;
++#endif
++	int id;
++} ____cacheline_internodealigned_in_smp;
++
++struct pcpu_info pcpu_info[NR_CPUS];
++
++#define pcpu(nr)		(&pcpu_info[nr])
++#define this_pcpu()		(pcpu(smp_processor_id()))
+ 
+ /*
+  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+@@ -269,13 +320,399 @@ static DEFINE_PER_CPU(struct runqueue, r
+  * The domain tree of any CPU may only be accessed from within
+  * preempt-disabled sections.
+  */
++#define for_each_pdomain(sd, domain) \
++for (domain = rcu_dereference(sd); domain; domain = domain->parent)
++
+ #define for_each_domain(cpu, domain) \
+-for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
++	for_each_pdomain(vcpu_rq(cpu)->sd, domain)
++
++#ifdef CONFIG_SCHED_VCPU
++
++u32 vcpu_sched_timeslice = 5;
++u32 vcpu_timeslice = 0;
++EXPORT_SYMBOL(vcpu_sched_timeslice);
++EXPORT_SYMBOL(vcpu_timeslice);
++
++extern spinlock_t fairsched_lock;
++static struct vcpu_scheduler default_vsched, idle_vsched;
++static struct vcpu_info boot_vcpu, boot_idle_vcpu;
++
++#define vsched_default_vsched()	(&default_vsched)
++#define vsched_default_vcpu(id)	(default_vsched.vcpu[id])
++
++/* 
++ * All macroses below could be used without locks, if there is no
++ * strict ordering requirements, because we assume, that:
++ *
++ * 1. VCPU could not disappear "on the fly" (FIXME)
++ *
++ * 2. p->vsched access is atomic.
++ */
++
++#define task_vsched(tsk)	((tsk)->vsched)
++#define this_vsched()		(task_vsched(current))
++
++#define vsched_vcpu(vsched, id)	((vsched)->vcpu[id])
++#define this_vcpu()		(task_vcpu(current))
++#define task_vcpu(p)		((p)->vcpu)
++
++#define vsched_id(vsched)	((vsched)->id)
++#define vsched_vcpu_online_map(vsched)	((vsched)->vcpu_online_map)
++#define vsched_num_online_vcpus(vsched)	((vsched)->num_online_vcpus)
++#define vsched_pcpu_running_map(vsched)	((vsched)->pcpu_running_map)
++
++#define vcpu_vsched(vcpu)	((vcpu)->vsched)
++#define vcpu_last_pcpu(vcpu)	((vcpu)->last_pcpu)
++#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
++#define vcpu_is_offline(vcpu)	(!vcpu_isset(vcpu, \
++					vcpu_vsched(vcpu)->vcpu_online_map))
++
++static int __add_vcpu(struct vcpu_scheduler *vsched, int id);
++
++#else	/* CONFIG_SCHED_VCPU */
++
++static DEFINE_PER_CPU(struct vcpu_info, vcpu_info);
++
++#define task_vsched(p)		NULL
++#define this_vcpu()		(task_vcpu(current))
++#define task_vcpu(p)		(vcpu(task_cpu(p)))
++
++#define vsched_vcpu(sched, id)	(vcpu(id))
++#define vsched_id(vsched)	0
++#define vsched_default_vsched()	NULL
++#define vsched_default_vcpu(id)	(vcpu(id))
++
++#define vsched_vcpu_online_map(vsched)	(cpu_online_map)
++#define vsched_num_online_vcpus(vsched)	(num_online_cpus())
++#define vsched_pcpu_running_map(vsched)	(cpu_online_map)
++
++#define vcpu(id)		(&per_cpu(vcpu_info, id))
++
++#define vcpu_vsched(vcpu)	NULL
++#define vcpu_last_pcpu(vcpu)	((vcpu)->id)
++#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
++#define vcpu_is_offline(vcpu)	(cpu_is_offline((vcpu)->id))
++
++#endif	/* CONFIG_SCHED_VCPU */
++
++#define this_rq()		(vcpu_rq(this_vcpu()))
++#define task_rq(p)		(vcpu_rq(task_vcpu(p)))
++#define vcpu_rq(vcpu)		(&(vcpu)->rq)
++#define get_vcpu()		({ preempt_disable(); this_vcpu(); })
++#define put_vcpu()		({ put_cpu(); })
++#define rq_vcpu(__rq)		(container_of((__rq), struct vcpu_info, rq))
++
++/**
++ * idle_task - return the idle task for a given cpu.
++ * @cpu: the processor in question.
++ */
++task_t *idle_task(int cpu) 
++{
++	return pcpu(cpu)->idle;
++}
++
++#ifdef CONFIG_SMP
++static inline void update_rq_cpu_load(runqueue_t *this_rq)
++{
++	unsigned long old_load, this_load;
++	int i;
++
++	if (unlikely(this_rq->nr_running == 0)) {
++		for (i = 0; i < 3; i++)
++			this_rq->cpu_load[i] = 0;
++		return;
++	}
++
++	this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
++	for (i = 0; i < 3; i++) {
++		unsigned long new_load = this_load;
++		int scale = 1 << i;
++		old_load = this_rq->cpu_load[i];
++		/*
++		 * Round up the averaging division if load is increasing. This
++		 * prevents us from getting stuck on 9 if the load is 10, for
++		 * example.
++		 */
++		if (new_load > old_load)
++			new_load += scale-1;
++		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
++	}
++}
++#else	/* CONFIG_SMP */
++static inline void update_rq_cpu_load(runqueue_t *this_rq)
++{
++}
++#endif	/* CONFIG_SMP */
++
++#ifdef CONFIG_SCHED_VCPU
++
++void fastcall vsched_cpu_online_map(struct vcpu_scheduler *vsched,
++		cpumask_t *mask)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&fairsched_lock, flags);
++	*mask = vsched->vcpu_online_map;
++	spin_unlock_irqrestore(&fairsched_lock, flags);
++}
++
++static inline void set_task_vsched(task_t *p, struct vcpu_scheduler *vsched)
++{
++	/* NOTE: set_task_cpu() is required after every set_task_vsched()! */
++	p->vsched = vsched;
++	p->vsched_id = vsched_id(vsched);
++}
++
++inline void set_task_cpu(struct task_struct *p, unsigned int vcpu_id)
++{
++	p->vcpu = vsched_vcpu(task_vsched(p), vcpu_id);
++	p->vcpu_id = vcpu_id;
++}
++
++static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
++{
++	p->vcpu = vcpu;
++	p->vcpu_id = vcpu->id;
++}
++
++/* this is called when rq->nr_running changes from 0 to 1 */
++static void vcpu_attach(runqueue_t *rq)
++{
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++
++	vcpu = rq_vcpu(rq);
++	vsched = vcpu_vsched(vcpu);
++
++	BUG_ON(vcpu->active);
++	spin_lock(&fairsched_lock);
++	vcpu->active = 1;
++	if (!vcpu->running)
++		list_move_tail(&vcpu->list, &vsched->active_list);
++
++	fairsched_incrun(vsched->node);
++	spin_unlock(&fairsched_lock);
++}
++
++/* this is called when rq->nr_running changes from 1 to 0 */
++static void vcpu_detach(runqueue_t *rq)
++{
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++
++	vcpu = rq_vcpu(rq);
++	vsched = vcpu_vsched(vcpu);
++	BUG_ON(!vcpu->active);
++
++	spin_lock(&fairsched_lock);
++	fairsched_decrun(vsched->node);
++
++	vcpu->active = 0;
++	if (!vcpu->running)
++		list_move_tail(&vcpu->list, &vsched->idle_list);
++	spin_unlock(&fairsched_lock);
++}
++
++static inline void __vcpu_get(vcpu_t vcpu)
++{
++	struct pcpu_info *pcpu;
++	struct vcpu_scheduler *vsched;
++
++	BUG_ON(!this_vcpu()->running);
++
++	pcpu = this_pcpu();
++	vsched = vcpu_vsched(vcpu);
++
++	pcpu->vcpu = vcpu;
++	pcpu->vsched = vsched;
++
++	fairsched_inccpu(vsched->node);
++
++	list_move_tail(&vcpu->list, &vsched->running_list);
++	vcpu->start_time = jiffies;
++	vcpu->last_pcpu = pcpu->id;
++	vcpu->running = 1;
++	__set_bit(vcpu->id, vsched->vcpu_running_map.bits);
++	__set_bit(pcpu->id, vsched->pcpu_running_map.bits);
++#ifdef CONFIG_SMP
++	vcpu_rq(vcpu)->sd = pcpu->sd;
++#endif
++}
++
++static void vcpu_put(vcpu_t vcpu)
++{
++	struct vcpu_scheduler *vsched;
++	struct pcpu_info *cur_pcpu;
++	runqueue_t *rq;
++
++	vsched = vcpu_vsched(vcpu);
++	rq = vcpu_rq(vcpu);
++	cur_pcpu = this_pcpu();
++
++	BUG_ON(!vcpu->running);
++
++	spin_lock(&fairsched_lock);
++	vcpu->running = 0;
++	list_move_tail(&vcpu->list,
++		vcpu->active ? &vsched->active_list : &vsched->idle_list);
++	fairsched_deccpu(vsched->node);
++	__clear_bit(vcpu->id, vsched->vcpu_running_map.bits);
++	if (vsched != this_vsched())
++		__clear_bit(cur_pcpu->id, vsched->pcpu_running_map.bits);
++
++	if (!rq->nr_running)
++		rq->expired_timestamp = 0;
++	/* from this point task_running(prev_rq, prev) will be 0 */
++	rq->curr = cur_pcpu->idle;
++	update_rq_cpu_load(rq);
++	spin_unlock(&fairsched_lock);
++}
++
++static vcpu_t schedule_vcpu(vcpu_t cur_vcpu, cycles_t cycles)
++{
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++	runqueue_t *rq;
++#ifdef CONFIG_FAIRSCHED
++	struct fairsched_node *node, *nodec;
++
++	nodec = vcpu_vsched(cur_vcpu)->node;
++	node = nodec;
++#endif
++
++	BUG_ON(!cur_vcpu->running);
++restart:
++	if (unlikely(system_state == SYSTEM_BOOTING))
++		goto affine;
++
++	spin_lock(&fairsched_lock);
++#ifdef CONFIG_FAIRSCHED
++	node = fairsched_schedule(node, nodec,
++			cur_vcpu->active,
++			cycles);
++	if (unlikely(node == NULL))
++		goto idle;
++
++	vsched = node->vsched;
++#else
++	vsched = &default_vsched;
++#endif
++	/* FIXME: optimize vcpu switching, maybe we do not need to call
++	   fairsched_schedule() at all if vcpu is still active and too
++	   little time have passed so far */
++	if (cur_vcpu->vsched == vsched && cur_vcpu->active &&
++	    jiffies - cur_vcpu->start_time < msecs_to_jiffies(vcpu_sched_timeslice)) {
++		vcpu = cur_vcpu;
++		goto done;
++	}
++
++	if (list_empty(&vsched->active_list)) {
++		/* nothing except for this cpu can be scheduled */
++		if (likely(cur_vcpu->vsched == vsched && cur_vcpu->active)) {
++			/* 
++			 * Current vcpu is the one we need. We have not
++			 * put it yet, so it's not on the active_list.
++			 */
++			vcpu = cur_vcpu;
++			goto done;
++		} else
++			goto none;
++	}
++
++	/* select vcpu and add to running list */
++	vcpu = list_entry(vsched->active_list.next, struct vcpu_info, list);
++	__vcpu_get(vcpu);
++done:
++	spin_unlock(&fairsched_lock);
++
++	rq = vcpu_rq(vcpu);
++	if (unlikely(vcpu != cur_vcpu)) {
++		spin_unlock(&vcpu_rq(cur_vcpu)->lock);
++		spin_lock(&rq->lock);
++		if (unlikely(!rq->nr_running)) {
++			/* race with balancing? */
++			spin_unlock(&rq->lock);
++			vcpu_put(vcpu);
++			spin_lock(&vcpu_rq(cur_vcpu)->lock);
++			goto restart;
++		}
++	}
++	BUG_ON(!rq->nr_running);
++	return vcpu;
++
++none:
++#ifdef CONFIG_FAIRSCHED
++	spin_unlock(&fairsched_lock);
++
++	/* fairsched doesn't schedule more CPUs than we have active */
++	BUG_ON(1);
++#else
++	goto idle;
++#endif
++
++idle:
++	vcpu = task_vcpu(this_pcpu()->idle);
++	__vcpu_get(vcpu);
++	spin_unlock(&fairsched_lock);
++	spin_unlock(&vcpu_rq(cur_vcpu)->lock);
++
++	spin_lock(&vcpu_rq(vcpu)->lock);
++	return vcpu;
++
++affine:
++	vcpu = vsched_vcpu(&default_vsched, raw_smp_processor_id());
++	/* current VCPU busy, continue */
++	if (cur_vcpu == vcpu && vcpu->active)
++		return cur_vcpu;
++	/* current is idle and nothing to run, keep idle */
++	if (vcpu_vsched(cur_vcpu) == &idle_vsched && !vcpu->active)
++		return cur_vcpu;
++
++	/* need to switch to idle... */
++	if (cur_vcpu == vcpu) {
++		spin_lock(&fairsched_lock);
++		goto idle;
++	}
++
++	/* ... and from idle */
++	spin_lock(&fairsched_lock);
++	__vcpu_get(vcpu);
++	goto done;
++}
++
++#else /* CONFIG_SCHED_VCPU */
++
++#define set_task_vsched(task, vsched)		do { } while (0)
++
++static inline void vcpu_attach(runqueue_t *rq)
++{
++}
++
++static inline void vcpu_detach(runqueue_t *rq)
++{
++}
++
++static inline void vcpu_put(vcpu_t vcpu)
++{
++}
++
++static inline vcpu_t schedule_vcpu(vcpu_t prev_vcpu, cycles_t cycles)
++{
++	return prev_vcpu;
++}
++
++static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
++{
++	set_task_pcpu(p, vcpu->id);
++}
++
++#endif /* CONFIG_SCHED_VCPU */
++
++int vcpu_online(int cpu)
++{
++	return cpu_isset(cpu, vsched_vcpu_online_map(this_vsched()));
++}
+ 
+-#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
+-#define this_rq()		(&__get_cpu_var(runqueues))
+-#define task_rq(p)		cpu_rq(task_cpu(p))
+-#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
+ 
+ #ifndef prepare_arch_switch
+ # define prepare_arch_switch(next)	do { } while (0)
+@@ -284,6 +721,11 @@ for (domain = rcu_dereference(cpu_rq(cpu
+ # define finish_arch_switch(prev)	do { } while (0)
+ #endif
+ 
++struct kernel_stat_glob kstat_glob;
++spinlock_t kstat_glb_lock = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(kstat_glob);
++EXPORT_SYMBOL(kstat_glb_lock);
++
+ #ifndef __ARCH_WANT_UNLOCKED_CTXSW
+ static inline int task_running(runqueue_t *rq, task_t *p)
+ {
+@@ -300,7 +742,7 @@ static inline void finish_lock_switch(ru
+ 	/* this is a valid case when another task releases the spinlock */
+ 	rq->lock.owner = current;
+ #endif
+-	spin_unlock_irq(&rq->lock);
++	spin_unlock(&rq->lock);
+ }
+ 
+ #else /* __ARCH_WANT_UNLOCKED_CTXSW */
+@@ -374,6 +816,208 @@ static inline void task_rq_unlock(runque
+ 	spin_unlock_irqrestore(&rq->lock, *flags);
+ }
+ 
++#ifdef CONFIG_VE
++#define ve_nr_iowait_inc(env, cpu) 					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_iowait++;		\
++	} while(0)
++#define ve_nr_iowait_dec(env, cpu)					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_iowait--;		\
++	} while(0)
++#define ve_nr_unint_inc(env, cpu)					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_unint++;			\
++	} while(0)
++#define ve_nr_unint_dec(env, cpu)					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_unint--;			\
++	} while(0)
++
++#define cycles_after(a, b)	((long long)(b) - (long long)(a) < 0)
++
++cycles_t ve_sched_get_idle_time(struct ve_struct *ve, int cpu)
++{
++	struct ve_cpu_stats *ve_stat;
++	unsigned v;
++	cycles_t strt, ret, cycles;
++
++	ve_stat = VE_CPU_STATS(ve, cpu);
++	do {
++		v = read_seqcount_begin(&ve_stat->stat_lock);
++		ret = ve_stat->idle_time;
++		strt = ve_stat->strt_idle_time;
++		if (strt && nr_uninterruptible_ve(ve) == 0) {
++			cycles = get_cycles();
++			if (cycles_after(cycles, strt))
++				ret += cycles - strt;
++		}
++	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
++	return ret;
++}
++EXPORT_SYMBOL(ve_sched_get_idle_time);
++
++cycles_t ve_sched_get_iowait_time(struct ve_struct *ve, int cpu)
++{
++	struct ve_cpu_stats *ve_stat;
++	unsigned v;
++	cycles_t strt, ret, cycles;
++
++	ve_stat = VE_CPU_STATS(ve, cpu);
++	do {
++		v = read_seqcount_begin(&ve_stat->stat_lock);
++		ret = ve_stat->iowait_time;
++		strt = ve_stat->strt_idle_time;
++		if (strt && nr_iowait_ve(ve) > 0) {
++			cycles = get_cycles();
++			if (cycles_after(cycles, strt))
++				ret += cycles - strt;
++		}
++	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
++	return ret;
++}
++
++EXPORT_SYMBOL(ve_sched_get_iowait_time);
++
++static inline void ve_stop_idle(struct ve_struct *ve,
++		unsigned int cpu, cycles_t cycles)
++{
++	struct ve_cpu_stats *ve_stat;
++
++	ve_stat = VE_CPU_STATS(ve, cpu);
++
++	write_seqcount_begin(&ve_stat->stat_lock);
++	if (ve_stat->strt_idle_time) {
++		if (cycles_after(cycles, ve_stat->strt_idle_time)) {
++			if (nr_iowait_ve(ve) == 0)
++				ve_stat->idle_time += cycles -
++					ve_stat->strt_idle_time;
++			else
++				ve_stat->iowait_time += cycles - 
++					ve_stat->strt_idle_time;
++		}
++		ve_stat->strt_idle_time = 0;
++	}
++	write_seqcount_end(&ve_stat->stat_lock);
++}
++
++static inline void ve_strt_idle(struct ve_struct *ve,
++		unsigned int cpu, cycles_t cycles)
++{
++	struct ve_cpu_stats *ve_stat;
++
++	ve_stat = VE_CPU_STATS(ve, cpu);
++
++	write_seqcount_begin(&ve_stat->stat_lock);
++	ve_stat->strt_idle_time = cycles;
++	write_seqcount_end(&ve_stat->stat_lock);
++}
++
++#define ve_nr_running_inc(env, cpu, cycles)	do {			\
++		if (++VE_CPU_STATS((env), (cpu))->nr_running == 1)	\
++			ve_stop_idle(env, cpu, cycles);			\
++	} while (0)
++#define ve_nr_running_dec(env, cpu, cyclses)	do {			\
++		if (--VE_CPU_STATS((env), (cpu))->nr_running == 0)	\
++			ve_strt_idle(env, cpu, cycles);			\
++	} while (0)
++
++void ve_sched_attach(struct ve_struct *envid)
++{
++	struct task_struct *tsk;
++	unsigned int cpu;
++	cycles_t cycles;
++
++	tsk = current;
++	preempt_disable();
++	cycles = get_cycles();
++	cpu = task_cpu(tsk);
++	ve_nr_running_dec(VE_TASK_INFO(tsk)->owner_env, cpu, cycles);
++	ve_nr_running_inc(envid, cpu, cycles);
++	preempt_enable();
++}
++EXPORT_SYMBOL(ve_sched_attach);
++
++static inline void write_wakeup_stamp(struct task_struct *p, cycles_t cyc)
++{
++	struct ve_task_info *ti;
++
++	ti = VE_TASK_INFO(p);
++	write_seqcount_begin(&ti->wakeup_lock);
++	ti->wakeup_stamp = cyc;
++	write_seqcount_end(&ti->wakeup_lock);
++}
++
++static inline void update_sched_lat(struct task_struct *t, cycles_t cycles)
++{
++	int cpu;
++	cycles_t ve_wstamp;
++
++	/* safe due to runqueue lock */
++	cpu = smp_processor_id();
++	ve_wstamp = t->ve_task_info.wakeup_stamp;
++
++	if (ve_wstamp && cycles > ve_wstamp) {
++		KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat,
++				cpu, cycles - ve_wstamp);
++		KSTAT_LAT_PCPU_ADD(&t->ve_task_info.exec_env->sched_lat_ve,
++				cpu, cycles - ve_wstamp);
++	}
++}
++
++static inline void update_ve_task_info(task_t *prev, cycles_t cycles)
++{
++#ifdef CONFIG_FAIRSCHED
++	if (prev != this_pcpu()->idle) {
++#else
++	if (prev != this_rq()->idle) {
++#endif
++		VE_CPU_STATS(prev->ve_task_info.owner_env,
++				smp_processor_id())->used_time +=
++			cycles - prev->ve_task_info.sched_time;
++
++		prev->ve_task_info.sched_time = cycles;
++	}
++}
++
++#else
++#define ve_nr_running_inc(env, cpu, cycles)	do { } while(0)
++#define ve_nr_running_dec(env, cpu, cycles)	do { } while(0)
++#define ve_nr_iowait_inc(env, cpu)		do { } while(0)
++#define ve_nr_iowait_dec(env, cpu)		do { } while(0)
++#define ve_nr_unint_inc(env, cpu)		do { } while(0)
++#define ve_nr_unint_dec(env, cpu)		do { } while(0)
++#define update_ve_task_info(prev, cycles)	do { } while (0)
++#endif
++
++struct task_nrs_struct {
++	long nr_running;
++	long nr_unint;
++	long nr_stopped;
++	long nr_sleeping;
++	long nr_iowait;
++	long long nr_switches;
++} ____cacheline_aligned_in_smp;
++
++static struct task_nrs_struct glob_task_nrs[NR_CPUS];
++#define nr_running_inc(cpu)	do { glob_task_nrs[cpu].nr_running++; } while (0)
++#define nr_running_dec(cpu)	do { glob_task_nrs[cpu].nr_running--; } while (0)
++#define nr_unint_inc(cpu)	do { glob_task_nrs[cpu].nr_unint++; } while (0)
++#define nr_unint_dec(cpu)	do { glob_task_nrs[cpu].nr_unint--; } while (0)
++#define nr_stopped_inc(cpu)	do { glob_task_nrs[cpu].nr_stopped++; } while (0)
++#define nr_stopped_dec(cpu)	do { glob_task_nrs[cpu].nr_stopped--; } while (0)
++#define nr_sleeping_inc(cpu)	do { glob_task_nrs[cpu].nr_sleeping++; } while (0)
++#define nr_sleeping_dec(cpu)	do { glob_task_nrs[cpu].nr_sleeping--; } while (0)
++#define nr_iowait_inc(cpu)	do { glob_task_nrs[cpu].nr_iowait++; } while (0)
++#define nr_iowait_dec(cpu)	do { glob_task_nrs[cpu].nr_iowait--; } while (0)
++
++
++unsigned long nr_zombie = 0;   /* protected by tasklist_lock */
++EXPORT_SYMBOL(nr_zombie);
++
++atomic_t nr_dead = ATOMIC_INIT(0);
++EXPORT_SYMBOL(nr_dead);
++ 
+ #ifdef CONFIG_SCHEDSTATS
+ /*
+  * bump this up when changing the output format or the meaning of an existing
+@@ -666,8 +1310,19 @@ static int effective_prio(task_t *p)
+  */
+ static inline void __activate_task(task_t *p, runqueue_t *rq)
+ {
++	cycles_t cycles;
++
++#ifdef CONFIG_VE
++	cycles = get_cycles();
++	write_wakeup_stamp(p, cycles);
++	p->ve_task_info.sleep_time += cycles;
++#endif
+ 	enqueue_task(p, rq->active);
+ 	rq->nr_running++;
++	ve_nr_running_inc(VE_TASK_INFO(p)->owner_env, task_cpu(p), cycles);
++	nr_running_inc(smp_processor_id());
++	if (rq->nr_running == 1)
++		vcpu_attach(rq);
+ }
+ 
+ /*
+@@ -800,9 +1455,38 @@ static void activate_task(task_t *p, run
+  */
+ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
+ {
++	cycles_t cycles;
++#ifdef CONFIG_VE
++	unsigned int cpu, pcpu;
++	struct ve_struct *ve;
++
++	cycles = get_cycles();
++	cpu = task_cpu(p);
++	pcpu = smp_processor_id();
++	ve = p->ve_task_info.owner_env;
++
++	p->ve_task_info.sleep_time -= cycles;
++#endif
++	if (p->state == TASK_UNINTERRUPTIBLE) {
++		ve_nr_unint_inc(ve, cpu);
++		nr_unint_inc(pcpu);
++	}
++	if (p->state == TASK_INTERRUPTIBLE) {
++		rq->nr_sleeping++;
++		nr_sleeping_inc(pcpu);
++	}
++	if (p->state == TASK_STOPPED) {
++		rq->nr_stopped++;
++		nr_stopped_inc(pcpu);
++	}
++
++	ve_nr_running_dec(VE_TASK_INFO(p)->owner_env, cpu, cycles);
++	nr_running_dec(pcpu);
+ 	rq->nr_running--;
+ 	dequeue_task(p, p->array);
+ 	p->array = NULL;
++	if (rq->nr_running == 0)
++		vcpu_detach(rq);
+ }
+ 
+ /*
+@@ -813,18 +1497,22 @@ static void deactivate_task(struct task_
+  * the target CPU.
+  */
+ #ifdef CONFIG_SMP
++/* FIXME: need to add vsched arg */
+ static void resched_task(task_t *p)
+ {
+ 	int cpu;
+ 
++#if 0
++	/* FIXME: this fails due to idle rq->curre == idle */
+ 	assert_spin_locked(&task_rq(p)->lock);
++#endif
+ 
+ 	if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+ 		return;
+ 
+ 	set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+ 
+-	cpu = task_cpu(p);
++	cpu = task_pcpu(p);
+ 	if (cpu == smp_processor_id())
+ 		return;
+ 
+@@ -847,15 +1535,35 @@ static inline void resched_task(task_t *
+  */
+ inline int task_curr(const task_t *p)
+ {
+-	return cpu_curr(task_cpu(p)) == p;
++	return task_rq(p)->curr == p;
+ }
+ 
+-#ifdef CONFIG_SMP
++/**
++ * idle_cpu - is a given cpu idle currently?
++ * @cpu: the processor in question.
++ */
++inline int idle_cpu(int cpu)
++{
++	return pcpu(cpu)->vsched == &idle_vsched;
++}
++
++EXPORT_SYMBOL_GPL(idle_cpu);
++
++static inline int idle_vcpu(vcpu_t cpu)
++{
++#ifdef CONFIG_SCHED_VCPU
++	return !cpu->active;
++#else
++	return idle_cpu(cpu->id);
++#endif
++}
++
++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
+ typedef struct {
+ 	struct list_head list;
+ 
+ 	task_t *task;
+-	int dest_cpu;
++	vcpu_t dest_cpu;
+ 
+ 	struct completion done;
+ } migration_req_t;
+@@ -864,7 +1572,7 @@ typedef struct {
+  * The task's runqueue lock must be held.
+  * Returns true if you have to wait for migration thread.
+  */
+-static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
++static int migrate_task(task_t *p, vcpu_t dest_cpu, migration_req_t *req)
+ {
+ 	runqueue_t *rq = task_rq(p);
+ 
+@@ -872,8 +1580,13 @@ static int migrate_task(task_t *p, int d
+ 	 * If the task is not on a runqueue (and not running), then
+ 	 * it is sufficient to simply update the task's cpu field.
+ 	 */
++#ifdef CONFIG_SCHED_VCPU
++	BUG_ON(task_vsched(p) == &idle_vsched);
++	BUG_ON(vcpu_vsched(dest_cpu) == &idle_vsched);
++#endif
+ 	if (!p->array && !task_running(rq, p)) {
+-		set_task_cpu(p, dest_cpu);
++		set_task_vsched(p, vcpu_vsched(dest_cpu));
++		set_task_vcpu(p, dest_cpu);
+ 		return 0;
+ 	}
+ 
+@@ -913,6 +1626,7 @@ repeat:
+ 	}
+ 	task_rq_unlock(rq, &flags);
+ }
++EXPORT_SYMBOL_GPL(wait_task_inactive);
+ 
+ /***
+  * kick_process - kick a running thread to enter/exit the kernel
+@@ -932,21 +1646,26 @@ void kick_process(task_t *p)
+ 	int cpu;
+ 
+ 	preempt_disable();
+-	cpu = task_cpu(p);
++	cpu = task_pcpu(p);
+ 	if ((cpu != smp_processor_id()) && task_curr(p))
++		/* FIXME: ??? think over */
++		/* should add something like get_pcpu(cpu)->vcpu->id == task_cpu(p),
++		   but with serialization of vcpu access... */
+ 		smp_send_reschedule(cpu);
+ 	preempt_enable();
+ }
++#endif
+ 
++#ifdef CONFIG_SMP
+ /*
+  * Return a low guess at the load of a migration-source cpu.
+  *
+  * We want to under-estimate the load of migration sources, to
+  * balance conservatively.
+  */
+-static inline unsigned long source_load(int cpu, int type)
++static inline unsigned long source_load(vcpu_t cpu, int type)
+ {
+-	runqueue_t *rq = cpu_rq(cpu);
++	runqueue_t *rq = vcpu_rq(cpu);
+ 	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+ 	if (type == 0)
+ 		return load_now;
+@@ -957,9 +1676,9 @@ static inline unsigned long source_load(
+ /*
+  * Return a high guess at the load of a migration-target cpu
+  */
+-static inline unsigned long target_load(int cpu, int type)
++static inline unsigned long target_load(vcpu_t cpu, int type)
+ {
+-	runqueue_t *rq = cpu_rq(cpu);
++	runqueue_t *rq = vcpu_rq(cpu);
+ 	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+ 	if (type == 0)
+ 		return load_now;
+@@ -972,33 +1691,35 @@ static inline unsigned long target_load(
+  * domain.
+  */
+ static struct sched_group *
+-find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
++find_idlest_group(struct sched_domain *sd, struct task_struct *p, vcpu_t this_cpu)
+ {
+ 	struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
+ 	unsigned long min_load = ULONG_MAX, this_load = 0;
+ 	int load_idx = sd->forkexec_idx;
+ 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++	int this_pcpu;
+ 
++	vsched = vcpu_vsched(this_cpu);
++	this_pcpu = vcpu_last_pcpu(this_cpu);
+ 	do {
+ 		unsigned long load, avg_load;
+ 		int local_group;
+ 		int i;
+ 
+-		/* Skip over this group if it has no CPUs allowed */
+-		if (!cpus_intersects(group->cpumask, p->cpus_allowed))
+-			goto nextgroup;
+-
+-		local_group = cpu_isset(this_cpu, group->cpumask);
++		local_group = cpu_isset(this_pcpu, group->cpumask);
+ 
+ 		/* Tally up the load of all CPUs in the group */
+ 		avg_load = 0;
+ 
+ 		for_each_cpu_mask(i, group->cpumask) {
++			vcpu = pcpu(i)->vcpu;
+ 			/* Bias balancing toward cpus of our domain */
+ 			if (local_group)
+-				load = source_load(i, load_idx);
++				load = source_load(vcpu, load_idx);
+ 			else
+-				load = target_load(i, load_idx);
++				load = target_load(vcpu, load_idx);
+ 
+ 			avg_load += load;
+ 		}
+@@ -1013,7 +1734,6 @@ find_idlest_group(struct sched_domain *s
+ 			min_load = avg_load;
+ 			idlest = group;
+ 		}
+-nextgroup:
+ 		group = group->next;
+ 	} while (group != sd->groups);
+ 
+@@ -1025,23 +1745,31 @@ nextgroup:
+ /*
+  * find_idlest_queue - find the idlest runqueue among the cpus in group.
+  */
+-static int
+-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
++static vcpu_t 
++find_idlest_cpu(struct sched_group *group, struct task_struct *p, vcpu_t this_cpu)
+ {
+-	cpumask_t tmp;
+ 	unsigned long load, min_load = ULONG_MAX;
+-	int idlest = -1;
++	cpumask_t vmask;
++	struct vcpu_scheduler *vsched;
++	vcpu_t idlest = (vcpu_t)-1;
++	vcpu_t vcpu;
+ 	int i;
+ 
+-	/* Traverse only the allowed CPUs */
+-	cpus_and(tmp, group->cpumask, p->cpus_allowed);
++	vsched = vcpu_vsched(this_cpu);
++	BUG_ON(vsched != task_vsched(p));
+ 
+-	for_each_cpu_mask(i, tmp) {
+-		load = source_load(i, 0);
++	cpus_and(vmask, vsched_vcpu_online_map(vsched), p->cpus_allowed);
++	for_each_cpu_mask(i, vmask) {
++		vcpu = vsched_vcpu(vsched, i);
+ 
+-		if (load < min_load || (load == min_load && i == this_cpu)) {
++		if (!cpu_isset(vcpu_last_pcpu(vcpu), group->cpumask))
++			continue;
++
++		load = source_load(vcpu, 0);
++
++		if (load < min_load || (load == min_load && vcpu == this_cpu)) {
+ 			min_load = load;
+-			idlest = i;
++			idlest = vcpu;
+ 		}
+ 	}
+ 
+@@ -1059,7 +1787,7 @@ find_idlest_cpu(struct sched_group *grou
+  *
+  * preempt must be disabled.
+  */
+-static int sched_balance_self(int cpu, int flag)
++static vcpu_t sched_balance_self(vcpu_t cpu, int flag)
+ {
+ 	struct task_struct *t = current;
+ 	struct sched_domain *tmp, *sd = NULL;
+@@ -1071,7 +1799,7 @@ static int sched_balance_self(int cpu, i
+ 	while (sd) {
+ 		cpumask_t span;
+ 		struct sched_group *group;
+-		int new_cpu;
++		vcpu_t new_cpu;
+ 		int weight;
+ 
+ 		span = sd->span;
+@@ -1080,7 +1808,7 @@ static int sched_balance_self(int cpu, i
+ 			goto nextlevel;
+ 
+ 		new_cpu = find_idlest_cpu(group, t, cpu);
+-		if (new_cpu == -1 || new_cpu == cpu)
++		if (new_cpu == (vcpu_t)(-1) || new_cpu == cpu)
+ 			goto nextlevel;
+ 
+ 		/* Now try balancing at a lower domain level */
+@@ -1111,21 +1839,27 @@ nextlevel:
+  * Returns the CPU we should wake onto.
+  */
+ #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
+-static int wake_idle(int cpu, task_t *p)
++static vcpu_t wake_idle(vcpu_t cpu, task_t *p)
+ {
+-	cpumask_t tmp;
++	cpumask_t vtmp;
+ 	struct sched_domain *sd;
++	struct vcpu_scheduler *vsched;
+ 	int i;
+ 
+-	if (idle_cpu(cpu))
++	if (idle_vcpu(cpu))
+ 		return cpu;
+ 
++	vsched = vcpu_vsched(cpu);
++	cpus_and(vtmp, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+ 	for_each_domain(cpu, sd) {
+ 		if (sd->flags & SD_WAKE_IDLE) {
+-			cpus_and(tmp, sd->span, p->cpus_allowed);
+-			for_each_cpu_mask(i, tmp) {
+-				if (idle_cpu(i))
+-					return i;
++			for_each_cpu_mask(i, vtmp) {
++				vcpu_t vcpu;
++				vcpu = vsched_vcpu(vsched, i);
++				if (!cpu_isset(vcpu_last_pcpu(vcpu), sd->span))
++					continue;
++				if (idle_vcpu(vcpu))
++					return vcpu;
+ 			}
+ 		}
+ 		else
+@@ -1134,7 +1868,7 @@ static int wake_idle(int cpu, task_t *p)
+ 	return cpu;
+ }
+ #else
+-static inline int wake_idle(int cpu, task_t *p)
++static inline vcpu_t wake_idle(vcpu_t cpu, task_t *p)
+ {
+ 	return cpu;
+ }
+@@ -1156,15 +1890,17 @@ static inline int wake_idle(int cpu, tas
+  */
+ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
+ {
+-	int cpu, this_cpu, success = 0;
++	vcpu_t cpu, this_cpu;
++	int success = 0;
+ 	unsigned long flags;
+ 	long old_state;
+ 	runqueue_t *rq;
+ #ifdef CONFIG_SMP
+ 	unsigned long load, this_load;
+ 	struct sched_domain *sd, *this_sd = NULL;
+-	int new_cpu;
++	vcpu_t new_cpu;
+ #endif
++	cpu = NULL;
+ 
+ 	rq = task_rq_lock(p, &flags);
+ 	old_state = p->state;
+@@ -1174,8 +1910,8 @@ static int try_to_wake_up(task_t *p, uns
+ 	if (p->array)
+ 		goto out_running;
+ 
+-	cpu = task_cpu(p);
+-	this_cpu = smp_processor_id();
++	cpu = task_vcpu(p);
++	this_cpu = this_vcpu();
+ 
+ #ifdef CONFIG_SMP
+ 	if (unlikely(task_running(rq, p)))
+@@ -1184,20 +1920,23 @@ static int try_to_wake_up(task_t *p, uns
+ 	new_cpu = cpu;
+ 
+ 	schedstat_inc(rq, ttwu_cnt);
++	/* FIXME: add vsched->last_vcpu array to optimize wakeups in different vsched */
++	if (vcpu_vsched(cpu) != vcpu_vsched(this_cpu))
++		goto out_set_cpu;
+ 	if (cpu == this_cpu) {
+ 		schedstat_inc(rq, ttwu_local);
+ 		goto out_set_cpu;
+ 	}
+ 
+ 	for_each_domain(this_cpu, sd) {
+-		if (cpu_isset(cpu, sd->span)) {
++		if (cpu_isset(vcpu_last_pcpu(cpu), sd->span)) {
+ 			schedstat_inc(sd, ttwu_wake_remote);
+ 			this_sd = sd;
+ 			break;
+ 		}
+ 	}
+ 
+-	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
++	if (unlikely(!vcpu_isset(this_cpu, p->cpus_allowed)))
+ 		goto out_set_cpu;
+ 
+ 	/*
+@@ -1253,7 +1992,7 @@ static int try_to_wake_up(task_t *p, uns
+ out_set_cpu:
+ 	new_cpu = wake_idle(new_cpu, p);
+ 	if (new_cpu != cpu) {
+-		set_task_cpu(p, new_cpu);
++		set_task_vcpu(p, new_cpu);
+ 		task_rq_unlock(rq, &flags);
+ 		/* might preempt at this point */
+ 		rq = task_rq_lock(p, &flags);
+@@ -1263,13 +2002,21 @@ out_set_cpu:
+ 		if (p->array)
+ 			goto out_running;
+ 
+-		this_cpu = smp_processor_id();
+-		cpu = task_cpu(p);
++		this_cpu = this_vcpu();
++		cpu = task_vcpu(p);
+ 	}
+ 
+ out_activate:
+ #endif /* CONFIG_SMP */
+-	if (old_state == TASK_UNINTERRUPTIBLE) {
++	if (old_state == TASK_INTERRUPTIBLE) {
++		nr_sleeping_dec(smp_processor_id());
++		rq->nr_sleeping--;
++	} else if (old_state == TASK_STOPPED) {
++		nr_stopped_dec(smp_processor_id());
++		rq->nr_stopped--;
++	} else if (old_state == TASK_UNINTERRUPTIBLE) {
++		nr_unint_dec(smp_processor_id());
++		ve_nr_unint_dec(p->ve_task_info.owner_env, task_cpu(p));
+ 		rq->nr_uninterruptible--;
+ 		/*
+ 		 * Tasks on involuntary sleep don't earn
+@@ -1324,17 +2071,45 @@ int fastcall wake_up_state(task_t *p, un
+ }
+ 
+ /*
++ * init is special, it is forked from swapper (idle_vsched) and should
++ * belong to default_vsched, so we have to change it's vsched/fairsched manually
++ */
++static void wake_up_init(task_t *p)
++{
++	runqueue_t *rq;
++	unsigned long flags;
++
++	/* we should change both fairsched node and vsched here */
++	set_task_vsched(p, &default_vsched);
++	set_task_cpu(p, 0);
++
++	/*
++	 * can't call wake_up_new_task() directly here,
++	 * since it assumes that a child belongs to the same vsched
++	 */
++	p->state = TASK_RUNNING;
++	p->sleep_avg = 0;
++	p->prio = effective_prio(p);
++
++	rq = task_rq_lock(p, &flags);
++	__activate_task(p, rq);
++	task_rq_unlock(rq, &flags);
++}
++
++/*
+  * Perform scheduler related setup for a newly forked process p.
+  * p is forked by current.
+  */
+ void fastcall sched_fork(task_t *p, int clone_flags)
+ {
+-	int cpu = get_cpu();
+-
++	vcpu_t cpu;
++       
++	preempt_disable();
++	cpu = this_vcpu();
+ #ifdef CONFIG_SMP
+ 	cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
+ #endif
+-	set_task_cpu(p, cpu);
++	set_task_vcpu(p, cpu);
+ 
+ 	/*
+ 	 * We mark the process as running here, but have not actually
+@@ -1369,6 +2144,10 @@ void fastcall sched_fork(task_t *p, int 
+ 	p->first_time_slice = 1;
+ 	current->time_slice >>= 1;
+ 	p->timestamp = sched_clock();
++#ifdef CONFIG_VE
++	/*cosmetic: sleep till wakeup below*/
++	p->ve_task_info.sleep_time -= get_cycles();
++#endif
+ 	if (unlikely(!current->time_slice)) {
+ 		/*
+ 		 * This case is rare, it happens when the parent has only
+@@ -1379,7 +2158,7 @@ void fastcall sched_fork(task_t *p, int 
+ 		scheduler_tick();
+ 	}
+ 	local_irq_enable();
+-	put_cpu();
++	preempt_enable();
+ }
+ 
+ /*
+@@ -1392,13 +2171,19 @@ void fastcall sched_fork(task_t *p, int 
+ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
+ {
+ 	unsigned long flags;
+-	int this_cpu, cpu;
++	vcpu_t this_cpu, cpu;
+ 	runqueue_t *rq, *this_rq;
+ 
++	if (unlikely(p->pid == 1)) {
++		wake_up_init(p);
++		return;
++	}
++
+ 	rq = task_rq_lock(p, &flags);
+ 	BUG_ON(p->state != TASK_RUNNING);
+-	this_cpu = smp_processor_id();
+-	cpu = task_cpu(p);
++	BUG_ON(task_vsched(current) != task_vsched(p));
++	this_cpu = this_vcpu();
++	cpu = task_vcpu(p);
+ 
+ 	/*
+ 	 * We decrease the sleep average of forking parents
+@@ -1426,6 +2211,9 @@ void fastcall wake_up_new_task(task_t *p
+ 				p->array = current->array;
+ 				p->array->nr_active++;
+ 				rq->nr_running++;
++				ve_nr_running_inc(VE_TASK_INFO(p)->owner_env,
++						task_cpu(p), get_cycles());
++				nr_running_inc(smp_processor_id());
+ 			}
+ 			set_need_resched();
+ 		} else
+@@ -1439,7 +2227,7 @@ void fastcall wake_up_new_task(task_t *p
+ 		 */
+ 		this_rq = rq;
+ 	} else {
+-		this_rq = cpu_rq(this_cpu);
++		this_rq = vcpu_rq(this_cpu);
+ 
+ 		/*
+ 		 * Not the local CPU - must adjust timestamp. This should
+@@ -1482,7 +2270,7 @@ void fastcall sched_exit(task_t *p)
+ 	 * the sleep_avg of the parent as well.
+ 	 */
+ 	rq = task_rq_lock(p->parent, &flags);
+-	if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) {
++	if (p->first_time_slice && task_vcpu(p) == task_vcpu(p->parent)) {
+ 		p->parent->time_slice += p->time_slice;
+ 		if (unlikely(p->parent->time_slice > task_timeslice(p)))
+ 			p->parent->time_slice = task_timeslice(p);
+@@ -1532,7 +2320,10 @@ static inline void finish_task_switch(ru
+ {
+ 	struct mm_struct *mm = rq->prev_mm;
+ 	unsigned long prev_task_flags;
++	vcpu_t prev_vcpu, vcpu;
+ 
++	prev_vcpu = task_vcpu(prev);
++	vcpu = rq_vcpu(rq);
+ 	rq->prev_mm = NULL;
+ 
+ 	/*
+@@ -1549,6 +2340,10 @@ static inline void finish_task_switch(ru
+ 	prev_task_flags = prev->flags;
+ 	finish_arch_switch(prev);
+ 	finish_lock_switch(rq, prev);
++	if (prev_vcpu != vcpu)
++		vcpu_put(prev_vcpu);
++	local_irq_enable();
++
+ 	if (mm)
+ 		mmdrop(mm);
+ 	if (unlikely(prev_task_flags & PF_DEAD))
+@@ -1569,8 +2364,9 @@ asmlinkage void schedule_tail(task_t *pr
+ 	preempt_enable();
+ #endif
+ 	if (current->set_child_tid)
+-		put_user(current->pid, current->set_child_tid);
++		put_user(virt_pid(current), current->set_child_tid);
+ }
++EXPORT_SYMBOL_GPL(schedule_tail);
+ 
+ /*
+  * context_switch - switch to the new MM and the new
+@@ -1610,20 +2406,26 @@ task_t * context_switch(runqueue_t *rq, 
+  */
+ unsigned long nr_running(void)
+ {
+-	unsigned long i, sum = 0;
++	unsigned long i, sum;
+ 
++	sum = 0;
+ 	for_each_online_cpu(i)
+-		sum += cpu_rq(i)->nr_running;
++		sum += glob_task_nrs[i].nr_running;
++
++	if (unlikely((long)sum < 0))
++		sum = 0;
+ 
+ 	return sum;
+ }
++EXPORT_SYMBOL(nr_running);
+ 
+ unsigned long nr_uninterruptible(void)
+ {
+-	unsigned long i, sum = 0;
++	unsigned long i, sum;
+ 
++	sum = 0;
+ 	for_each_cpu(i)
+-		sum += cpu_rq(i)->nr_uninterruptible;
++		sum += glob_task_nrs[i].nr_unint;
+ 
+ 	/*
+ 	 * Since we read the counters lockless, it might be slightly
+@@ -1635,31 +2437,133 @@ unsigned long nr_uninterruptible(void)
+ 	return sum;
+ }
+ 
++EXPORT_SYMBOL(nr_uninterruptible);
++
+ unsigned long long nr_context_switches(void)
+ {
+-	unsigned long long i, sum = 0;
++	unsigned long long i, sum;
+ 
++	sum = 0;
+ 	for_each_cpu(i)
+-		sum += cpu_rq(i)->nr_switches;
++		sum += glob_task_nrs[i].nr_switches;
++
++	if (unlikely((long)sum < 0))
++		sum = 0;
+ 
+ 	return sum;
+ }
+ 
++EXPORT_SYMBOL(nr_context_switches);
++
+ unsigned long nr_iowait(void)
+ {
+-	unsigned long i, sum = 0;
++	unsigned long i, sum;
+ 
++	sum = 0;
+ 	for_each_cpu(i)
+-		sum += atomic_read(&cpu_rq(i)->nr_iowait);
++		sum += glob_task_nrs[i].nr_iowait;
++
++	if (unlikely((long)sum < 0))
++		sum = 0;
+ 
+ 	return sum;
+ }
+ 
+-#ifdef CONFIG_SMP
++EXPORT_SYMBOL(nr_iowait);
++
++unsigned long nr_stopped(void)
++{
++	unsigned long i, sum;
++
++	sum = 0;
++	for_each_cpu(i)
++		sum += glob_task_nrs[i].nr_stopped;
++
++	if (unlikely((long)sum < 0))
++		sum = 0;
++
++	return sum;
++}
++
++EXPORT_SYMBOL(nr_stopped);
++
++unsigned long nr_sleeping(void)
++{
++	unsigned long i, sum;
++
++	sum = 0;
++	for_each_cpu(i)
++		sum += glob_task_nrs[i].nr_sleeping;
++
++	if (unlikely((long)sum < 0))
++		sum = 0;
++
++	return sum;
++}
++
++EXPORT_SYMBOL(nr_sleeping);
++
++#ifdef CONFIG_VE
++unsigned long nr_running_ve(struct ve_struct *ve)
++{
++	int i;
++	long sum;
++	cpumask_t ve_cpus;
++
++	sum = 0;
++	ve_cpu_online_map(ve, &ve_cpus);
++	for_each_cpu_mask(i, ve_cpus)
++		sum += VE_CPU_STATS(ve, i)->nr_running;
++	return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++EXPORT_SYMBOL(nr_running_ve);
++
++unsigned long nr_uninterruptible_ve(struct ve_struct *ve)
++{
++	int i;
++	long sum;
++	cpumask_t ve_cpus;
++
++	sum = 0;
++	ve_cpu_online_map(ve, &ve_cpus);
++	for_each_cpu_mask(i, ve_cpus)
++		sum += VE_CPU_STATS(ve, i)->nr_unint;
++	return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++EXPORT_SYMBOL(nr_uninterruptible_ve);
++
++unsigned long nr_iowait_ve(struct ve_struct *ve)
++{
++	int i;
++	long sum;
++	cpumask_t ve_cpus;
++
++	sum = 0;
++	ve_cpu_online_map(ve, &ve_cpus);
++	for_each_cpu_mask(i, ve_cpus)
++		sum += VE_CPU_STATS(ve, i)->nr_iowait;
++	return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++EXPORT_SYMBOL(nr_iowait_ve);
++#endif
++
++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
++
++#ifdef CONFIG_SCHED_VCPU
++#define rq_compare(rq1, rq2)	(rq1 < rq2)
++#else
++#define rq_compare(rq1, rq2)	(rq1->cpu < rq2->cpu)
++#endif
+ 
+ /*
+  * double_rq_lock - safely lock two runqueues
+  *
++ * We must take them in cpu order to match code in
++ * dependent_sleeper and wake_dependent_sleeper.
++ *
+  * Note this does not disable interrupts like task_rq_lock,
+  * you need to do so manually before calling.
+  */
+@@ -1671,7 +2575,7 @@ static void double_rq_lock(runqueue_t *r
+ 		spin_lock(&rq1->lock);
+ 		__acquire(rq2->lock);	/* Fake it out ;) */
+ 	} else {
+-		if (rq1 < rq2) {
++		if (rq_compare(rq1, rq2)) {
+ 			spin_lock(&rq1->lock);
+ 			spin_lock(&rq2->lock);
+ 		} else {
+@@ -1699,38 +2603,20 @@ static void double_rq_unlock(runqueue_t 
+ }
+ 
+ /*
+- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+- */
+-static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
+-	__releases(this_rq->lock)
+-	__acquires(busiest->lock)
+-	__acquires(this_rq->lock)
+-{
+-	if (unlikely(!spin_trylock(&busiest->lock))) {
+-		if (busiest < this_rq) {
+-			spin_unlock(&this_rq->lock);
+-			spin_lock(&busiest->lock);
+-			spin_lock(&this_rq->lock);
+-		} else
+-			spin_lock(&busiest->lock);
+-	}
+-}
+-
+-/*
+  * If dest_cpu is allowed for this process, migrate the task to it.
+  * This is accomplished by forcing the cpu_allowed mask to only
+  * allow dest_cpu, which will force the cpu onto dest_cpu.  Then
+  * the cpu_allowed mask is restored.
+  */
+-static void sched_migrate_task(task_t *p, int dest_cpu)
++static void sched_migrate_task(task_t *p, vcpu_t dest_cpu)
+ {
+ 	migration_req_t req;
+ 	runqueue_t *rq;
+ 	unsigned long flags;
+ 
+ 	rq = task_rq_lock(p, &flags);
+-	if (!cpu_isset(dest_cpu, p->cpus_allowed)
+-	    || unlikely(cpu_is_offline(dest_cpu)))
++	if (!vcpu_isset(dest_cpu, p->cpus_allowed)
++	    || unlikely(vcpu_is_offline(dest_cpu)))
+ 		goto out;
+ 
+ 	/* force the process onto the specified CPU */
+@@ -1747,6 +2633,26 @@ static void sched_migrate_task(task_t *p
+ out:
+ 	task_rq_unlock(rq, &flags);
+ }
++#endif
++
++#ifdef CONFIG_SMP
++/*
++ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
++ */
++static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
++	__releases(this_rq->lock)
++	__acquires(busiest->lock)
++	__acquires(this_rq->lock)
++{
++	if (unlikely(!spin_trylock(&busiest->lock))) {
++		if (rq_compare(busiest, this_rq)) {
++			spin_unlock(&this_rq->lock);
++			spin_lock(&busiest->lock);
++			spin_lock(&this_rq->lock);
++		} else
++			spin_lock(&busiest->lock);
++	}
++}
+ 
+ /*
+  * sched_exec - execve() is a valuable balancing opportunity, because at
+@@ -1754,9 +2660,12 @@ out:
+  */
+ void sched_exec(void)
+ {
+-	int new_cpu, this_cpu = get_cpu();
++	vcpu_t new_cpu, this_cpu;
++
++	preempt_disable();
++	this_cpu = this_vcpu();
+ 	new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC);
+-	put_cpu();
++	preempt_enable();
+ 	if (new_cpu != this_cpu)
+ 		sched_migrate_task(current, new_cpu);
+ }
+@@ -1767,12 +2676,24 @@ void sched_exec(void)
+  */
+ static
+ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
+-	       runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
++	       runqueue_t *this_rq, prio_array_t *this_array, vcpu_t this_cpu)
+ {
++	struct ve_struct *ve;
++	cycles_t cycles;
++
++	cycles = get_cycles();
++	ve = VE_TASK_INFO(p)->owner_env;
++
+ 	dequeue_task(p, src_array);
+ 	src_rq->nr_running--;
+-	set_task_cpu(p, this_cpu);
++	ve_nr_running_dec(ve, task_cpu(p), cycles);
++	if (src_rq->nr_running == 0)
++		vcpu_detach(src_rq);
++	set_task_vcpu(p, this_cpu);
++	if (this_rq->nr_running == 0)
++		vcpu_attach(this_rq);
+ 	this_rq->nr_running++;
++	ve_nr_running_inc(ve, task_cpu(p), cycles);
+ 	enqueue_task(p, this_array);
+ 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
+ 				+ this_rq->timestamp_last_tick;
+@@ -1788,7 +2709,7 @@ void pull_task(runqueue_t *src_rq, prio_
+  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
+  */
+ static
+-int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
++int can_migrate_task(task_t *p, runqueue_t *rq, vcpu_t this_cpu,
+ 		     struct sched_domain *sd, enum idle_type idle,
+ 		     int *all_pinned)
+ {
+@@ -1798,7 +2719,7 @@ int can_migrate_task(task_t *p, runqueue
+ 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
+ 	 * 3) are cache-hot on their current CPU.
+ 	 */
+-	if (!cpu_isset(this_cpu, p->cpus_allowed))
++	if (!vcpu_isset(this_cpu, p->cpus_allowed))
+ 		return 0;
+ 	*all_pinned = 0;
+ 
+@@ -1826,7 +2747,7 @@ int can_migrate_task(task_t *p, runqueue
+  *
+  * Called with both runqueues locked.
+  */
+-static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
++static int move_tasks(runqueue_t *this_rq, vcpu_t this_cpu, runqueue_t *busiest,
+ 		      unsigned long max_nr_move, struct sched_domain *sd,
+ 		      enum idle_type idle, int *all_pinned)
+ {
+@@ -1919,13 +2840,19 @@ out:
+  * moved to restore balance via the imbalance parameter.
+  */
+ static struct sched_group *
+-find_busiest_group(struct sched_domain *sd, int this_cpu,
++find_busiest_group(struct sched_domain *sd, vcpu_t this_cpu,
+ 		   unsigned long *imbalance, enum idle_type idle, int *sd_idle)
+ {
+ 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
+ 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
+ 	unsigned long max_pull;
+ 	int load_idx;
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++	int this_pcpu;
++
++	vsched = vcpu_vsched(this_cpu);
++	this_pcpu = vcpu_last_pcpu(this_cpu);
+ 
+ 	max_load = this_load = total_load = total_pwr = 0;
+ 	if (idle == NOT_IDLE)
+@@ -1936,24 +2863,27 @@ find_busiest_group(struct sched_domain *
+ 		load_idx = sd->idle_idx;
+ 
+ 	do {
++		cpumask_t tmp;
+ 		unsigned long load;
+ 		int local_group;
+ 		int i;
+ 
+-		local_group = cpu_isset(this_cpu, group->cpumask);
++		local_group = cpu_isset(this_pcpu, group->cpumask);
+ 
+ 		/* Tally up the load of all CPUs in the group */
+ 		avg_load = 0;
++		cpus_and(tmp, group->cpumask, vsched_pcpu_running_map(vsched));
+ 
+-		for_each_cpu_mask(i, group->cpumask) {
++		for_each_cpu_mask(i, tmp) {
++			vcpu = pcpu(i)->vcpu;
+ 			if (*sd_idle && !idle_cpu(i))
+ 				*sd_idle = 0;
+ 
+ 			/* Bias balancing toward cpus of our domain */
+ 			if (local_group)
+-				load = target_load(i, load_idx);
++				load = target_load(vcpu, load_idx);
+ 			else
+-				load = source_load(i, load_idx);
++				load = source_load(vcpu, load_idx);
+ 
+ 			avg_load += load;
+ 		}
+@@ -1976,6 +2906,8 @@ find_busiest_group(struct sched_domain *
+ 
+ 	if (!busiest || this_load >= max_load || max_load <= SCHED_LOAD_SCALE)
+ 		goto out_balanced;
++	if (!this)
++		this = busiest; /* this->cpu_power is needed below */
+ 
+ 	avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
+ 
+@@ -2058,25 +2990,57 @@ out_balanced:
+ /*
+  * find_busiest_queue - find the busiest runqueue among the cpus in group.
+  */
+-static runqueue_t *find_busiest_queue(struct sched_group *group,
++static vcpu_t find_busiest_queue(vcpu_t this_cpu, struct sched_group *group,
+ 	enum idle_type idle)
+ {
+ 	unsigned long load, max_load = 0;
+-	runqueue_t *busiest = NULL;
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu, busiest = NULL;
++	cpumask_t tmp;
+ 	int i;
+ 
++	vsched = vcpu_vsched(this_cpu);
+ 	for_each_cpu_mask(i, group->cpumask) {
+-		load = source_load(i, 0);
++		vcpu = pcpu(i)->vcpu;
++		if (vcpu_vsched(vcpu) != vsched && idle != SCHED_IDLE)
++			continue;
++		load = source_load(vcpu, 0);
++		if (load > max_load) {
++			max_load = load;
++			busiest = vcpu;
++		}
++	}
+ 
++#ifdef CONFIG_SCHED_VCPU
++	cpus_andnot(tmp, vsched->vcpu_online_map, vsched->vcpu_running_map);
++	for_each_cpu_mask(i, tmp) {
++		vcpu = vsched_vcpu(vsched, i);
++		load = source_load(vcpu, 0);
+ 		if (load > max_load) {
+ 			max_load = load;
+-			busiest = cpu_rq(i);
++			busiest = vcpu;
+ 		}
+ 	}
++#endif
+ 
+ 	return busiest;
+ }
+ 
++#ifdef CONFIG_SCHED_VCPU
++vcpu_t find_idle_vcpu(struct vcpu_scheduler *vsched)
++{
++	vcpu_t vcpu;
++
++	vcpu = NULL;
++	spin_lock(&fairsched_lock);
++	if (!list_empty(&vsched->idle_list))
++		vcpu = list_entry(vsched->idle_list.next,
++				struct vcpu_info, list);
++	spin_unlock(&fairsched_lock);
++	return vcpu;
++}
++#endif
++
+ /*
+  * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
+  * so long as it is large enough.
+@@ -2089,10 +3053,11 @@ static runqueue_t *find_busiest_queue(st
+  *
+  * Called with this_rq unlocked.
+  */
+-static int load_balance(int this_cpu, runqueue_t *this_rq,
++static int load_balance(vcpu_t this_cpu, runqueue_t *this_rq,
+ 			struct sched_domain *sd, enum idle_type idle)
+ {
+ 	struct sched_group *group;
++	vcpu_t busiest_vcpu;
+ 	runqueue_t *busiest;
+ 	unsigned long imbalance;
+ 	int nr_moved, all_pinned = 0;
+@@ -2110,13 +3075,24 @@ static int load_balance(int this_cpu, ru
+ 		goto out_balanced;
+ 	}
+ 
+-	busiest = find_busiest_queue(group, idle);
+-	if (!busiest) {
++	busiest_vcpu = find_busiest_queue(this_cpu, group, idle);
++	if (!busiest_vcpu) {
+ 		schedstat_inc(sd, lb_nobusyq[idle]);
+ 		goto out_balanced;
+ 	}
+ 
+-	BUG_ON(busiest == this_rq);
++#ifdef CONFIG_SCHED_VCPU
++	if (vcpu_vsched(this_cpu) != vcpu_vsched(busiest_vcpu)) {
++		this_cpu = find_idle_vcpu(vcpu_vsched(busiest_vcpu));
++		if (!this_cpu)
++			goto out_one_pinned;
++		this_rq = vcpu_rq(this_cpu);
++	}
++#endif
++	busiest = vcpu_rq(busiest_vcpu);
++
++	if (unlikely(busiest == this_rq))
++		goto out_balanced;
+ 
+ 	schedstat_add(sd, lb_imbalance[idle], imbalance);
+ 
+@@ -2149,7 +3125,7 @@ static int load_balance(int this_cpu, ru
+ 			/* don't kick the migration_thread, if the curr
+ 			 * task on busiest cpu can't be moved to this_cpu
+ 			 */
+-			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
++			if (!vcpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+ 				spin_unlock(&busiest->lock);
+ 				all_pinned = 1;
+ 				goto out_one_pinned;
+@@ -2214,11 +3190,12 @@ out_one_pinned:
+  * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
+  * this_rq is locked.
+  */
+-static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
++static int load_balance_newidle(vcpu_t this_cpu, runqueue_t *this_rq,
+ 				struct sched_domain *sd)
+ {
+ 	struct sched_group *group;
+-	runqueue_t *busiest = NULL;
++	runqueue_t *busiest;
++	vcpu_t busiest_vcpu;
+ 	unsigned long imbalance;
+ 	int nr_moved = 0;
+ 	int sd_idle = 0;
+@@ -2233,13 +3210,12 @@ static int load_balance_newidle(int this
+ 		goto out_balanced;
+ 	}
+ 
+-	busiest = find_busiest_queue(group, NEWLY_IDLE);
+-	if (!busiest) {
++	busiest_vcpu = find_busiest_queue(this_cpu, group, NEWLY_IDLE);
++	if (!busiest_vcpu || busiest_vcpu == this_cpu) {
+ 		schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
+ 		goto out_balanced;
+ 	}
+-
+-	BUG_ON(busiest == this_rq);
++	busiest = vcpu_rq(busiest_vcpu);
+ 
+ 	schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance);
+ 
+@@ -2272,8 +3248,11 @@ out_balanced:
+ /*
+  * idle_balance is called by schedule() if this_cpu is about to become
+  * idle. Attempts to pull tasks from other CPUs.
++ *
++ * Returns whether to continue with another runqueue
++ * instead of switching to idle.
+  */
+-static void idle_balance(int this_cpu, runqueue_t *this_rq)
++static int idle_balance(vcpu_t this_cpu, runqueue_t *this_rq)
+ {
+ 	struct sched_domain *sd;
+ 
+@@ -2281,10 +3260,11 @@ static void idle_balance(int this_cpu, r
+ 		if (sd->flags & SD_BALANCE_NEWIDLE) {
+ 			if (load_balance_newidle(this_cpu, this_rq, sd)) {
+ 				/* We've pulled tasks over so stop searching */
+-				break;
++				return 1;
+ 			}
+ 		}
+ 	}
++	return 0;
+ }
+ 
+ /*
+@@ -2294,18 +3274,26 @@ static void idle_balance(int this_cpu, r
+  * logical imbalances.
+  *
+  * Called with busiest_rq locked.
++ *
++ * In human terms: balancing of CPU load by moving tasks between CPUs is
++ * performed by 2 methods, push and pull.
++ * In certain places when CPU is found to be idle, it performs pull from busy
++ * CPU to current (idle) CPU.
++ * active_load_balance implements push method, with migration thread getting
++ * scheduled on a busy CPU (hence, making all running processes on this CPU sit
++ * in the queue) and selecting where to push and which task.
+  */
+-static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
++static void active_load_balance(runqueue_t *busiest_rq, vcpu_t busiest_cpu)
+ {
+ 	struct sched_domain *sd;
+ 	runqueue_t *target_rq;
+-	int target_cpu = busiest_rq->push_cpu;
++	vcpu_t target_cpu = busiest_rq->push_cpu;
+ 
+ 	if (busiest_rq->nr_running <= 1)
+ 		/* no task to move */
+ 		return;
+ 
+-	target_rq = cpu_rq(target_cpu);
++	target_rq = vcpu_rq(target_cpu);
+ 
+ 	/*
+ 	 * This condition is "impossible", if it occurs
+@@ -2317,10 +3305,17 @@ static void active_load_balance(runqueue
+ 	/* move a task from busiest_rq to target_rq */
+ 	double_lock_balance(busiest_rq, target_rq);
+ 
++	/*
++	 * Our main candidate where to push our tasks is busiest->push_cpu.
++	 * First, find the domain that spans over both that candidate CPU and
++	 * the current one.
++	 *
++	 * FIXME: make sure that push_cpu doesn't disappear before we get here.
++	 */
+ 	/* Search for an sd spanning us and the target CPU. */
+ 	for_each_domain(target_cpu, sd)
+ 		if ((sd->flags & SD_LOAD_BALANCE) &&
+-			cpu_isset(busiest_cpu, sd->span))
++			cpu_isset(vcpu_last_pcpu(busiest_cpu), sd->span))
+ 				break;
+ 
+ 	if (unlikely(sd == NULL))
+@@ -2346,31 +3341,17 @@ out:
+  */
+ 
+ /* Don't have all balancing operations going off at once */
+-#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS)
++#define CPU_OFFSET(cpu) (HZ * (cpu) / NR_CPUS)
+ 
+-static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
++static void rebalance_tick(vcpu_t this_cpu, runqueue_t *this_rq,
+ 			   enum idle_type idle)
+ {
+-	unsigned long old_load, this_load;
+-	unsigned long j = jiffies + CPU_OFFSET(this_cpu);
++	unsigned long j;
+ 	struct sched_domain *sd;
+-	int i;
+ 
+-	this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
+ 	/* Update our load */
+-	for (i = 0; i < 3; i++) {
+-		unsigned long new_load = this_load;
+-		int scale = 1 << i;
+-		old_load = this_rq->cpu_load[i];
+-		/*
+-		 * Round up the averaging division if load is increasing. This
+-		 * prevents us from getting stuck on 9 if the load is 10, for
+-		 * example.
+-		 */
+-		if (new_load > old_load)
+-			new_load += scale-1;
+-		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
+-	}
++	update_rq_cpu_load(this_rq);
++	j = jiffies + CPU_OFFSET(smp_processor_id());
+ 
+ 	for_each_domain(this_cpu, sd) {
+ 		unsigned long interval;
+@@ -2404,17 +3385,19 @@ static void rebalance_tick(int this_cpu,
+ /*
+  * on UP we do not need to balance between CPUs:
+  */
+-static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle)
++static inline void rebalance_tick(vcpu_t cpu, runqueue_t *rq, enum idle_type idle)
+ {
+ }
+-static inline void idle_balance(int cpu, runqueue_t *rq)
++static inline void idle_balance(vcpu_t cpu, runqueue_t *rq)
+ {
+ }
+ #endif
+ 
+-static inline int wake_priority_sleeper(runqueue_t *rq)
++static inline int wake_priority_sleeper(runqueue_t *rq, task_t *idle)
+ {
+ 	int ret = 0;
++#ifndef CONFIG_SCHED_VCPU
++	/* FIXME: can we implement SMT priority sleeping for this? */
+ #ifdef CONFIG_SCHED_SMT
+ 	spin_lock(&rq->lock);
+ 	/*
+@@ -2422,11 +3405,13 @@ static inline int wake_priority_sleeper(
+ 	 * reasons reschedule the idle task to see if it can now run.
+ 	 */
+ 	if (rq->nr_running) {
+-		resched_task(rq->idle);
++		/* FIXME */
++		resched_task(idle);
+ 		ret = 1;
+ 	}
+ 	spin_unlock(&rq->lock);
+ #endif
++#endif
+ 	return ret;
+ }
+ 
+@@ -2476,6 +3461,15 @@ unsigned long long current_sched_time(co
+ 			STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
+ 			((rq)->curr->static_prio > (rq)->best_expired_prio))
+ 
++#ifdef CONFIG_VE
++#define update_ve_cpu_time(p, time, tick)	do {		\
++		VE_CPU_STATS((p)->ve_task_info.owner_env,	\
++			task_cpu(p))->time += tick;		\
++	} while (0)
++#else
++#define update_ve_cpu_time(p, time, tick)	do { } while (0)
++#endif
++
+ /*
+  * Account user cpu time to a process.
+  * @p: the process that the cpu time gets accounted to
+@@ -2491,10 +3485,13 @@ void account_user_time(struct task_struc
+ 
+ 	/* Add user time to cpustat. */
+ 	tmp = cputime_to_cputime64(cputime);
+-	if (TASK_NICE(p) > 0)
++	if (TASK_NICE(p) > 0) {
+ 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
+-	else
++		update_ve_cpu_time(p, nice, tmp);
++	} else {
+ 		cpustat->user = cputime64_add(cpustat->user, tmp);
++		update_ve_cpu_time(p, user, tmp);
++	}
+ }
+ 
+ /*
+@@ -2511,14 +3508,16 @@ void account_system_time(struct task_str
+ 	cputime64_t tmp;
+ 
+ 	p->stime = cputime_add(p->stime, cputime);
++	tmp = cputime_to_cputime64(cputime);
++
++	update_ve_cpu_time(p, system, tmp);
+ 
+ 	/* Add system time to cpustat. */
+-	tmp = cputime_to_cputime64(cputime);
+ 	if (hardirq_count() - hardirq_offset)
+ 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ 	else if (softirq_count())
+ 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+-	else if (p != rq->idle)
++	else if (p != this_pcpu()->idle)
+ 		cpustat->system = cputime64_add(cpustat->system, tmp);
+ 	else if (atomic_read(&rq->nr_iowait) > 0)
+ 		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
+@@ -2539,7 +3538,7 @@ void account_steal_time(struct task_stru
+ 	cputime64_t tmp = cputime_to_cputime64(steal);
+ 	runqueue_t *rq = this_rq();
+ 
+-	if (p == rq->idle) {
++	if (p == this_pcpu()->idle) {
+ 		p->stime = cputime_add(p->stime, steal);
+ 		if (atomic_read(&rq->nr_iowait) > 0)
+ 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
+@@ -2559,18 +3558,23 @@ void account_steal_time(struct task_stru
+ void scheduler_tick(void)
+ {
+ 	int cpu = smp_processor_id();
+-	runqueue_t *rq = this_rq();
++	vcpu_t vcpu;
++	runqueue_t *rq;
+ 	task_t *p = current;
+ 	unsigned long long now = sched_clock();
+ 
++	vcpu = this_vcpu();
++	rq = vcpu_rq(vcpu);
+ 	update_cpu_clock(p, rq, now);
+ 
+ 	rq->timestamp_last_tick = now;
+ 
+-	if (p == rq->idle) {
+-		if (wake_priority_sleeper(rq))
++	set_tsk_need_resched(p); //FIXME
++
++	if (p == pcpu(cpu)->idle) {
++		if (wake_priority_sleeper(rq, pcpu(cpu)->idle))
+ 			goto out;
+-		rebalance_tick(cpu, rq, SCHED_IDLE);
++		rebalance_tick(vcpu, rq, SCHED_IDLE);
+ 		return;
+ 	}
+ 
+@@ -2646,10 +3650,14 @@ void scheduler_tick(void)
+ out_unlock:
+ 	spin_unlock(&rq->lock);
+ out:
+-	rebalance_tick(cpu, rq, NOT_IDLE);
++	rebalance_tick(vcpu, rq, NOT_IDLE);
+ }
+ 
+-#ifdef CONFIG_SCHED_SMT
++#if defined(CONFIG_SCHED_SMT) && !defined(CONFIG_SCHED_VCPU)
++/* FIXME: SMT scheduling
++ * rq->cpu is initialized with rq address if FAIRSCED is on
++ * this is not correct for SMT case
++ */
+ static inline void wakeup_busy_runqueue(runqueue_t *rq)
+ {
+ 	/* If an SMT runqueue is sleeping due to priority reasons wake it up */
+@@ -2657,7 +3665,7 @@ static inline void wakeup_busy_runqueue(
+ 		resched_task(rq->idle);
+ }
+ 
+-static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
++static void wake_sleeping_dependent(vcpu_t this_cpu)
+ {
+ 	struct sched_domain *tmp, *sd = NULL;
+ 	cpumask_t sibling_map;
+@@ -2711,7 +3719,7 @@ static inline unsigned long smt_slice(ta
+ 	return p->time_slice * (100 - sd->per_cpu_gain) / 100;
+ }
+ 
+-static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
++static int dependent_sleeper(vcpu_t this_cpu)
+ {
+ 	struct sched_domain *tmp, *sd = NULL;
+ 	cpumask_t sibling_map;
+@@ -2812,11 +3820,11 @@ out_unlock:
+ 	return ret;
+ }
+ #else
+-static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
++static inline void wake_sleeping_dependent(vcpu_t this_cpu)
+ {
+ }
+ 
+-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
++static inline int dependent_sleeper(vcpu_t this_cpu)
+ {
+ 	return 0;
+ }
+@@ -2866,7 +3874,9 @@ asmlinkage void __sched schedule(void)
+ 	struct list_head *queue;
+ 	unsigned long long now;
+ 	unsigned long run_time;
+-	int cpu, idx, new_prio;
++	int idx, new_prio;
++	vcpu_t vcpu;
++	cycles_t cycles;
+ 
+ 	/*
+ 	 * Test if we are atomic.  Since do_exit() needs to call into
+@@ -2888,13 +3898,14 @@ need_resched:
+ 	prev = current;
+ 	release_kernel_lock(prev);
+ need_resched_nonpreemptible:
++	cycles = get_cycles();
+ 	rq = this_rq();
+ 
+ 	/*
+ 	 * The idle thread is not allowed to schedule!
+ 	 * Remove this check after it has been exercised a bit.
+ 	 */
+-	if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) {
++	if (unlikely(prev == this_pcpu()->idle) && prev->state != TASK_RUNNING) {
+ 		printk(KERN_ERR "bad: scheduling from the idle thread!\n");
+ 		dump_stack();
+ 	}
+@@ -2932,25 +3943,35 @@ need_resched_nonpreemptible:
+ 		}
+ 	}
+ 
+-	cpu = smp_processor_id();
++	prev->sleep_avg -= run_time;
++	if ((long)prev->sleep_avg <= 0)
++		prev->sleep_avg = 0;
++
++	vcpu = rq_vcpu(rq);
++	if (rq->nr_running &&
++	    jiffies - vcpu->start_time < msecs_to_jiffies(vcpu_timeslice))
++		goto same_vcpu;
++
++	if (unlikely(!rq->nr_running))
++		idle_balance(vcpu, rq);
++	vcpu = schedule_vcpu(vcpu, cycles);
++	rq = vcpu_rq(vcpu);
++
+ 	if (unlikely(!rq->nr_running)) {
+ go_idle:
+-		idle_balance(cpu, rq);
+-		if (!rq->nr_running) {
+-			next = rq->idle;
+-			rq->expired_timestamp = 0;
+-			wake_sleeping_dependent(cpu, rq);
+-			/*
+-			 * wake_sleeping_dependent() might have released
+-			 * the runqueue, so break out if we got new
+-			 * tasks meanwhile:
+-			 */
+-			if (!rq->nr_running)
+-				goto switch_tasks;
+-		}
++		next = this_pcpu()->idle;
++		rq->expired_timestamp = 0;
++		wake_sleeping_dependent(vcpu);
++		/*
++		 * wake_sleeping_dependent() might have released
++		 * the runqueue, so break out if we got new
++		 * tasks meanwhile:
++		 */
++		if (!rq->nr_running)
++			goto switch_tasks;
+ 	} else {
+-		if (dependent_sleeper(cpu, rq)) {
+-			next = rq->idle;
++		if (dependent_sleeper(vcpu)) {
++			next = this_pcpu()->idle;
+ 			goto switch_tasks;
+ 		}
+ 		/*
+@@ -2962,6 +3983,7 @@ go_idle:
+ 			goto go_idle;
+ 	}
+ 
++same_vcpu:
+ 	array = rq->active;
+ 	if (unlikely(!array->nr_active)) {
+ 		/*
+@@ -2998,28 +4020,50 @@ go_idle:
+ 			requeue_task(next, array);
+ 	}
+ 	next->activated = 0;
++
+ switch_tasks:
+-	if (next == rq->idle)
++	if (next == this_pcpu()->idle)
+ 		schedstat_inc(rq, sched_goidle);
+ 	prefetch(next);
+ 	prefetch_stack(next);
+ 	clear_tsk_need_resched(prev);
+-	rcu_qsctr_inc(task_cpu(prev));
++	rcu_qsctr_inc(task_pcpu(prev));
+ 
+ 	update_cpu_clock(prev, rq, now);
+ 
+-	prev->sleep_avg -= run_time;
+-	if ((long)prev->sleep_avg <= 0)
+-		prev->sleep_avg = 0;
++	/* updated w/o rq->lock, which is ok due to after-read-checks */
+ 	prev->timestamp = prev->last_ran = now;
+ 
+ 	sched_info_switch(prev, next);
+ 	if (likely(prev != next)) {
++		cycles_t cycles;
++
++		/* current physical CPU id should be valid after switch */
++		set_task_vcpu(next, vcpu);
++		set_task_pcpu(next, task_pcpu(prev));
++		cycles = get_cycles();
+ 		next->timestamp = now;
+ 		rq->nr_switches++;
++		glob_task_nrs[smp_processor_id()].nr_switches++;
+ 		rq->curr = next;
+ 		++*switch_count;
+ 
++#ifdef CONFIG_VE
++		prev->ve_task_info.sleep_stamp = cycles;
++		if (prev->state == TASK_RUNNING && prev != this_pcpu()->idle)
++			write_wakeup_stamp(prev, cycles);
++		update_sched_lat(next, cycles);
++
++		/* because next & prev are protected with
++		 * runqueue lock we may not worry about
++		 * wakeup_stamp and sched_time protection
++		 * (same thing in 'else' branch below)
++		 */
++		update_ve_task_info(prev, cycles);
++		next->ve_task_info.sched_time = cycles;
++		write_wakeup_stamp(next, 0);
++#endif
++
+ 		prepare_task_switch(rq, next);
+ 		prev = context_switch(rq, prev, next);
+ 		barrier();
+@@ -3029,8 +4073,10 @@ switch_tasks:
+ 		 * frame will be invalid.
+ 		 */
+ 		finish_task_switch(this_rq(), prev);
+-	} else
++	} else {
++		update_ve_task_info(prev, get_cycles());
+ 		spin_unlock_irq(&rq->lock);
++	}
+ 
+ 	prev = current;
+ 	if (unlikely(reacquire_kernel_lock(prev) < 0))
+@@ -3565,27 +4611,9 @@ int task_prio(const task_t *p)
+  */
+ int task_nice(const task_t *p)
+ {
+-	return TASK_NICE(p);
+-}
+-EXPORT_SYMBOL_GPL(task_nice);
+-
+-/**
+- * idle_cpu - is a given cpu idle currently?
+- * @cpu: the processor in question.
+- */
+-int idle_cpu(int cpu)
+-{
+-	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
+-}
+-
+-/**
+- * idle_task - return the idle task for a given cpu.
+- * @cpu: the processor in question.
+- */
+-task_t *idle_task(int cpu)
+-{
+-	return cpu_rq(cpu)->idle;
++	return TASK_NICE(p);
+ }
++EXPORT_SYMBOL_GPL(task_nice);
+ 
+ /**
+  * find_process_by_pid - find a process with a matching PID value.
+@@ -3593,7 +4621,7 @@ task_t *idle_task(int cpu)
+  */
+ static inline task_t *find_process_by_pid(pid_t pid)
+ {
+-	return pid ? find_task_by_pid(pid) : current;
++	return pid ? find_task_by_pid_ve(pid) : current;
+ }
+ 
+ /* Actually do priority change: must hold rq lock. */
+@@ -3653,7 +4681,7 @@ recheck:
+ 	/*
+ 	 * Allow unprivileged RT tasks to decrease priority:
+ 	 */
+-	if (!capable(CAP_SYS_NICE)) {
++	if (!capable(CAP_SYS_ADMIN)) {
+ 		/*
+ 		 * can't change policy, except between SCHED_NORMAL
+ 		 * and SCHED_BATCH:
+@@ -4110,10 +5138,19 @@ EXPORT_SYMBOL(yield);
+  */
+ void __sched io_schedule(void)
+ {
+-	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
++	struct runqueue *rq = this_rq();
++
++#ifdef CONFIG_VE
++	struct ve_struct *ve;
++	ve = current->ve_task_info.owner_env;
++#endif
+ 
+ 	atomic_inc(&rq->nr_iowait);
++	ve_nr_iowait_inc(ve, task_cpu(current));
++	nr_iowait_inc(smp_processor_id());
+ 	schedule();
++	nr_iowait_dec(smp_processor_id());
++	ve_nr_iowait_dec(ve, task_cpu(current));
+ 	atomic_dec(&rq->nr_iowait);
+ }
+ 
+@@ -4121,11 +5158,20 @@ EXPORT_SYMBOL(io_schedule);
+ 
+ long __sched io_schedule_timeout(long timeout)
+ {
+-	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
++	struct runqueue *rq = this_rq();
+ 	long ret;
+ 
++#ifdef CONFIG_VE
++	struct ve_struct *ve;
++	ve = current->ve_task_info.owner_env;
++#endif
++
+ 	atomic_inc(&rq->nr_iowait);
++	ve_nr_iowait_inc(ve, task_cpu(current));
++	nr_iowait_inc(smp_processor_id());
+ 	ret = schedule_timeout(timeout);
++	nr_iowait_dec(smp_processor_id());
++	ve_nr_iowait_dec(ve, task_cpu(current));
+ 	atomic_dec(&rq->nr_iowait);
+ 	return ret;
+ }
+@@ -4248,15 +5294,9 @@ static void show_task(task_t *p)
+ 	else
+ 		printk("?");
+ #if (BITS_PER_LONG == 32)
+-	if (state == TASK_RUNNING)
+-		printk(" running ");
+-	else
+-		printk(" %08lX ", thread_saved_pc(p));
++	printk(" %08lX ", (unsigned long)p);
+ #else
+-	if (state == TASK_RUNNING)
+-		printk("  running task   ");
+-	else
+-		printk(" %016lx ", thread_saved_pc(p));
++	printk(" %016lx ", (unsigned long)p);
+ #endif
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+ 	{
+@@ -4295,26 +5335,41 @@ void show_state(void)
+ #if (BITS_PER_LONG == 32)
+ 	printk("\n"
+ 	       "                                               sibling\n");
+-	printk("  task             PC      pid father child younger older\n");
++	printk("  task       taskaddr      pid father child younger older\n");
+ #else
+ 	printk("\n"
+ 	       "                                                       sibling\n");
+-	printk("  task                 PC          pid father child younger older\n");
++	printk("  task           taskaddr          pid father child younger older\n");
+ #endif
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		/*
+ 		 * reset the NMI-timeout, listing all files on a slow
+ 		 * console might take alot of time:
+ 		 */
+ 		touch_nmi_watchdog();
+ 		show_task(p);
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 
+ 	read_unlock(&tasklist_lock);
+ 	mutex_debug_show_all_locks();
+ }
+ 
++static void init_boot_vcpus(long cpu)
++{
++	if (vsched_vcpu(&idle_vsched, cpu) != NULL)
++		return;
++
++	if (__add_vcpu(&idle_vsched, cpu) != 0)
++		panic("Can't create idle vcpu %ld\n", cpu);
++
++	/* Also create vcpu for default_vsched */
++	if (__add_vcpu(&default_vsched, cpu) != 0)
++		panic("Can't create default vcpu %ld\n", cpu);
++
++	cpu_set(cpu, idle_vsched.pcpu_running_map);
++}
++
+ /**
+  * init_idle - set up an idle thread for a given CPU
+  * @idle: task in question
+@@ -4325,22 +5380,47 @@ void show_state(void)
+  */
+ void __devinit init_idle(task_t *idle, int cpu)
+ {
+-	runqueue_t *rq = cpu_rq(cpu);
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++	runqueue_t *rq;
+ 	unsigned long flags;
+ 
++#ifdef CONFIG_SCHED_VCPU
++	init_boot_vcpus(cpu);
++#endif
++	vsched = &idle_vsched;
++	vcpu = vsched_vcpu(vsched, cpu);
++	rq = vcpu_rq(vcpu);
++
+ 	idle->timestamp = sched_clock();
+ 	idle->sleep_avg = 0;
+ 	idle->array = NULL;
+ 	idle->prio = MAX_PRIO;
+ 	idle->state = TASK_RUNNING;
+ 	idle->cpus_allowed = cpumask_of_cpu(cpu);
++	set_task_vsched(idle, &idle_vsched);
+ 	set_task_cpu(idle, cpu);
+ 
+ 	spin_lock_irqsave(&rq->lock, flags);
+-	rq->curr = rq->idle = idle;
++	pcpu(cpu)->idle = idle;
++	rq->curr = idle;
+ #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
+ 	idle->oncpu = 1;
+ #endif
++	set_task_pcpu(idle, cpu);
++	set_task_vsched(idle, vsched);
++	set_task_vcpu(idle, vcpu);
++#ifdef CONFIG_SCHED_VCPU
++	/* the following code is very close to vcpu_get */
++	spin_lock(&fairsched_lock);
++	pcpu(cpu)->vcpu = vcpu;
++	pcpu(cpu)->vsched = vcpu->vsched;
++	list_move_tail(&vcpu->list, &vsched->running_list);
++	__set_bit(cpu, vsched->vcpu_running_map.bits);
++	__set_bit(cpu, vsched->pcpu_running_map.bits);
++	vcpu->running = 1;
++	spin_unlock(&fairsched_lock);
++#endif
+ 	spin_unlock_irqrestore(&rq->lock, flags);
+ 
+ 	/* Set the preempt count _outside_ the spinlocks! */
+@@ -4360,7 +5440,6 @@ void __devinit init_idle(task_t *idle, i
+  */
+ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+ 
+-#ifdef CONFIG_SMP
+ /*
+  * This is how migration works:
+  *
+@@ -4377,6 +5456,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+  * 7) we wake up and the migration is done.
+  */
+ 
++#ifdef CONFIG_SMP
+ /*
+  * Change a given task's CPU affinity. Migrate the thread to a
+  * proper CPU and schedule it away if the CPU it's executing on
+@@ -4392,9 +5472,11 @@ int set_cpus_allowed(task_t *p, cpumask_
+ 	int ret = 0;
+ 	migration_req_t req;
+ 	runqueue_t *rq;
++	struct vcpu_scheduler *vsched;
+ 
++	vsched = task_vsched(p);
+ 	rq = task_rq_lock(p, &flags);
+-	if (!cpus_intersects(new_mask, cpu_online_map)) {
++	if (!cpus_intersects(new_mask, vsched_vcpu_online_map(vsched))) {
+ 		ret = -EINVAL;
+ 		goto out;
+ 	}
+@@ -4404,7 +5486,8 @@ int set_cpus_allowed(task_t *p, cpumask_
+ 	if (cpu_isset(task_cpu(p), new_mask))
+ 		goto out;
+ 
+-	if (migrate_task(p, any_online_cpu(new_mask), &req)) {
++	if (migrate_task(p, vsched_vcpu(vsched, any_online_cpu(new_mask)),
++								&req)) {
+ 		/* Need help from migration thread: drop lock and wait. */
+ 		task_rq_unlock(rq, &flags);
+ 		wake_up_process(rq->migration_thread);
+@@ -4418,6 +5501,7 @@ out:
+ }
+ 
+ EXPORT_SYMBOL_GPL(set_cpus_allowed);
++#endif
+ 
+ /*
+  * Move (not current) task off this cpu, onto dest cpu.  We're doing
+@@ -4428,25 +5512,30 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
+  * So we race with normal scheduler movements, but that's OK, as long
+  * as the task is no longer on this CPU.
+  */
+-static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
++static void __migrate_task(struct task_struct *p, vcpu_t src_cpu, vcpu_t dest_cpu)
+ {
+ 	runqueue_t *rq_dest, *rq_src;
+ 
+-	if (unlikely(cpu_is_offline(dest_cpu)))
++	if (unlikely(vcpu_is_offline(dest_cpu)))
+ 		return;
+ 
+-	rq_src = cpu_rq(src_cpu);
+-	rq_dest = cpu_rq(dest_cpu);
++#ifdef CONFIG_SCHED_VCPU
++	BUG_ON(vcpu_vsched(src_cpu) == &idle_vsched);
++#endif
++	rq_src = vcpu_rq(src_cpu);
++	rq_dest = vcpu_rq(dest_cpu);
+ 
+ 	double_rq_lock(rq_src, rq_dest);
+ 	/* Already moved. */
+-	if (task_cpu(p) != src_cpu)
++	if (task_vcpu(p) != src_cpu)
+ 		goto out;
+ 	/* Affinity changed (again). */
+-	if (!cpu_isset(dest_cpu, p->cpus_allowed))
++	if (!vcpu_isset(dest_cpu, p->cpus_allowed))
+ 		goto out;
+ 
+-	set_task_cpu(p, dest_cpu);
++	BUG_ON(task_running(rq_src, p));
++	set_task_vsched(p, vcpu_vsched(dest_cpu));
++	set_task_vcpu(p, dest_cpu);
+ 	if (p->array) {
+ 		/*
+ 		 * Sync timestamp with rq_dest's before activating.
+@@ -4474,9 +5563,9 @@ out:
+ static int migration_thread(void *data)
+ {
+ 	runqueue_t *rq;
+-	int cpu = (long)data;
++	vcpu_t cpu = (vcpu_t)data;
+ 
+-	rq = cpu_rq(cpu);
++	rq = vcpu_rq(cpu);
+ 	BUG_ON(rq->migration_thread != current);
+ 
+ 	set_current_state(TASK_INTERRUPTIBLE);
+@@ -4488,15 +5577,17 @@ static int migration_thread(void *data)
+ 
+ 		spin_lock_irq(&rq->lock);
+ 
+-		if (cpu_is_offline(cpu)) {
++		if (vcpu_is_offline(cpu)) {
+ 			spin_unlock_irq(&rq->lock);
+ 			goto wait_to_die;
+ 		}
+ 
++#ifdef CONFIG_SMP
+ 		if (rq->active_balance) {
+ 			active_load_balance(rq, cpu);
+ 			rq->active_balance = 0;
+ 		}
++#endif
+ 
+ 		head = &rq->migration_queue;
+ 
+@@ -4529,14 +5620,16 @@ wait_to_die:
+ 	return 0;
+ }
+ 
+-#ifdef CONFIG_HOTPLUG_CPU
+ /* Figure out where task on dead CPU should go, use force if neccessary. */
+-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
++static void move_task_off_dead_cpu(vcpu_t dead_cpu, struct task_struct *tsk)
+ {
+ 	int dest_cpu;
++	struct vcpu_scheduler *vsched;
+ 	cpumask_t mask;
+ 
++#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_SCHED_VCPU)
+ 	/* On same node? */
++#error FIXME: wrong code
+ 	mask = node_to_cpumask(cpu_to_node(dead_cpu));
+ 	cpus_and(mask, mask, tsk->cpus_allowed);
+ 	dest_cpu = any_online_cpu(mask);
+@@ -4560,9 +5653,20 @@ static void move_task_off_dead_cpu(int d
+ 			       "longer affine to cpu%d\n",
+ 			       tsk->pid, tsk->comm, dead_cpu);
+ 	}
+-	__migrate_task(tsk, dead_cpu, dest_cpu);
++#elif defined(CONFIG_SCHED_VCPU) 
++	vsched = vcpu_vsched(dead_cpu);
++	mask = vsched_vcpu_online_map(vsched);
++	cpus_and(mask, mask, tsk->cpus_allowed);
++	dest_cpu = any_online_cpu(mask);
++
++	/* On any allowed CPU? */
++	if (dest_cpu == NR_CPUS)
++		dest_cpu = any_online_cpu(vsched_vcpu_online_map(vsched));
++#endif
++	__migrate_task(tsk, dead_cpu, vsched_vcpu(vsched, dest_cpu));
+ }
+ 
++#ifdef CONFIG_HOTPLUG_CPU
+ /*
+  * While a dead CPU has no uninterruptible tasks queued at this point,
+  * it might still have a nonzero ->nr_uninterruptible counter, because
+@@ -4582,25 +5686,30 @@ static void migrate_nr_uninterruptible(r
+ 	double_rq_unlock(rq_src, rq_dest);
+ 	local_irq_restore(flags);
+ }
++#endif
+ 
+ /* Run through task list and migrate tasks from the dead cpu. */
+-static void migrate_live_tasks(int src_cpu)
++static void migrate_live_tasks(vcpu_t src_cpu)
+ {
+ 	struct task_struct *tsk, *t;
+ 
++	BUG_ON(vcpu_isset(src_cpu, vsched_vcpu_online_map(vcpu_vsched(src_cpu))));
+ 	write_lock_irq(&tasklist_lock);
+ 
+-	do_each_thread(t, tsk) {
++	do_each_thread_all(t, tsk) {
+ 		if (tsk == current)
+ 			continue;
++		if (tsk == vcpu_rq(src_cpu)->migration_thread)
++			continue;
+ 
+-		if (task_cpu(tsk) == src_cpu)
++		if (task_vcpu(tsk) == src_cpu)
+ 			move_task_off_dead_cpu(src_cpu, tsk);
+-	} while_each_thread(t, tsk);
++	} while_each_thread_all(t, tsk);
+ 
+ 	write_unlock_irq(&tasklist_lock);
+ }
+ 
++#ifdef CONFIG_HOTPLUG_CPU
+ /* Schedules idle task to be the next runnable task on current CPU.
+  * It does so by boosting its priority to highest possible and adding it to
+  * the _front_ of runqueue. Used by CPU offline code.
+@@ -4622,6 +5731,9 @@ void sched_idle_next(void)
+ 
+ 	__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
+ 	/* Add idle task to _front_ of it's priority queue */
++#ifdef CONFIG_SCHED_VCPU
++#error "FIXME: VCPU vs. HOTPLUG: fix the code below"
++#endif
+ 	__activate_idle_task(p, rq);
+ 
+ 	spin_unlock_irqrestore(&rq->lock, flags);
+@@ -4683,48 +5795,83 @@ static void migrate_dead_tasks(unsigned 
+ }
+ #endif /* CONFIG_HOTPLUG_CPU */
+ 
++static void migration_thread_bind(struct task_struct *k, vcpu_t cpu)
++{
++	BUG_ON(k->state != TASK_INTERRUPTIBLE);
++	/* Must have done schedule() in kthread() before we set_task_cpu */
++	wait_task_inactive(k);
++
++	set_task_vsched(k, vcpu_vsched(cpu));
++	set_task_vcpu(k, cpu);
++	k->cpus_allowed = cpumask_of_cpu(cpu->id);
++}
++
++static void migration_thread_stop(runqueue_t *rq)
++{
++	struct task_struct *thread;
++
++	thread = rq->migration_thread;
++	if (thread == NULL)
++		return;
++
++	get_task_struct(thread);
++	kthread_stop(thread);
++
++	/* We MUST ensure, that the do_exit of the migration thread is
++	 * completed and it will never scheduled again before vsched_destroy.
++	 * The task with flag PF_DEAD if unscheduled will never receive
++	 * CPU again. */
++	while (!(thread->flags & PF_DEAD) || task_running(rq, thread))
++		yield();
++	put_task_struct(thread);
++
++	rq->migration_thread = NULL;
++}
++
+ /*
+  * migration_call - callback that gets triggered when a CPU is added.
+  * Here we can start up the necessary migration thread for the new CPU.
+  */
+-static int migration_call(struct notifier_block *nfb, unsigned long action,
++static int vmigration_call(struct notifier_block *nfb, unsigned long action,
+ 			  void *hcpu)
+ {
+-	int cpu = (long)hcpu;
++	vcpu_t cpu = (vcpu_t)hcpu;
+ 	struct task_struct *p;
+ 	struct runqueue *rq;
+ 	unsigned long flags;
+ 
+ 	switch (action) {
+ 	case CPU_UP_PREPARE:
+-		p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
++		p = kthread_create(migration_thread, hcpu, "migration/%d/%d", 
++			vsched_id(vcpu_vsched(cpu)), cpu->id);
+ 		if (IS_ERR(p))
+ 			return NOTIFY_BAD;
+ 		p->flags |= PF_NOFREEZE;
+-		kthread_bind(p, cpu);
+-		/* Must be high prio: stop_machine expects to yield to it. */
++
++		migration_thread_bind(p, cpu);
+ 		rq = task_rq_lock(p, &flags);
++		/* Must be high prio: stop_machine expects to yield to it. */
+ 		__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
+ 		task_rq_unlock(rq, &flags);
+-		cpu_rq(cpu)->migration_thread = p;
++		vcpu_rq(cpu)->migration_thread = p;
+ 		break;
+ 	case CPU_ONLINE:
+ 		/* Strictly unneccessary, as first user will wake it. */
+-		wake_up_process(cpu_rq(cpu)->migration_thread);
++		wake_up_process(vcpu_rq(cpu)->migration_thread);
+ 		break;
+-#ifdef CONFIG_HOTPLUG_CPU
++#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SCHED_VCPU)
++#error "FIXME: CPU down code doesn't work yet with VCPUs"
++#endif
+ 	case CPU_UP_CANCELED:
+ 		/* Unbind it from offline cpu so it can run.  Fall thru. */
+-		kthread_bind(cpu_rq(cpu)->migration_thread,
+-			     any_online_cpu(cpu_online_map));
+-		kthread_stop(cpu_rq(cpu)->migration_thread);
+-		cpu_rq(cpu)->migration_thread = NULL;
++		migration_thread_bind(vcpu_rq(cpu)->migration_thread, this_vcpu());
++		migration_thread_stop(vcpu_rq(cpu));
+ 		break;
+ 	case CPU_DEAD:
+ 		migrate_live_tasks(cpu);
+-		rq = cpu_rq(cpu);
+-		kthread_stop(rq->migration_thread);
+-		rq->migration_thread = NULL;
++		rq = vcpu_rq(cpu);
++		migration_thread_stop(rq);
++#ifdef CONFIG_HOTPLUG_CPU
+ 		/* Idle task back to normal (off runqueue, low prio) */
+ 		rq = task_rq_lock(rq->idle, &flags);
+ 		deactivate_task(rq->idle, rq);
+@@ -4734,6 +5881,7 @@ static int migration_call(struct notifie
+ 		task_rq_unlock(rq, &flags);
+ 		migrate_nr_uninterruptible(rq);
+ 		BUG_ON(rq->nr_running != 0);
++#endif
+ 
+ 		/* No need to migrate the tasks: it was best-effort if
+ 		 * they didn't do lock_cpu_hotplug().  Just wake up
+@@ -4748,11 +5896,19 @@ static int migration_call(struct notifie
+ 		}
+ 		spin_unlock_irq(&rq->lock);
+ 		break;
+-#endif
+ 	}
+ 	return NOTIFY_OK;
+ }
+ 
++static int migration_call(struct notifier_block *nfb, unsigned long action,
++			  void *hcpu)
++{
++	if (action == CPU_UP_PREPARE)
++		init_boot_vcpus((long)hcpu);
++	/* we need to translate pcpu to vcpu */
++	return vmigration_call(nfb, action, vsched_default_vcpu((long)hcpu));
++}
++
+ /* Register at highest priority so that task migration (migrate_all_tasks)
+  * happens before everything else.
+  */
+@@ -4770,7 +5926,6 @@ int __init migration_init(void)
+ 	register_cpu_notifier(&migration_notifier);
+ 	return 0;
+ }
+-#endif
+ 
+ #ifdef CONFIG_SMP
+ #undef SCHED_DOMAIN_DEBUG
+@@ -4798,7 +5953,7 @@ static void sched_domain_debug(struct sc
+ 		printk(KERN_DEBUG);
+ 		for (i = 0; i < level + 1; i++)
+ 			printk(" ");
+-		printk("domain %d: ", level);
++		printk("domain %d, flags %x: ", level, sd->flags);
+ 
+ 		if (!(sd->flags & SD_LOAD_BALANCE)) {
+ 			printk("does not load-balance\n");
+@@ -4923,7 +6078,7 @@ static int sd_parent_degenerate(struct s
+  */
+ static void cpu_attach_domain(struct sched_domain *sd, int cpu)
+ {
+-	runqueue_t *rq = cpu_rq(cpu);
++	runqueue_t *rq = vcpu_rq(vsched_default_vcpu(cpu));
+ 	struct sched_domain *tmp;
+ 
+ 	/* Remove the sched domains which do not contribute to scheduling. */
+@@ -4940,6 +6095,7 @@ static void cpu_attach_domain(struct sch
+ 
+ 	sched_domain_debug(sd, cpu);
+ 
++	rcu_assign_pointer(pcpu(cpu)->sd, sd);
+ 	rcu_assign_pointer(rq->sd, sd);
+ }
+ 
+@@ -5118,7 +6274,7 @@ static unsigned long domain_distance(int
+ 	unsigned long distance = 0;
+ 	struct sched_domain *sd;
+ 
+-	for_each_domain(cpu1, sd) {
++	for_each_pdomain(pcpu(cpu1)->sd, sd) {
+ 		WARN_ON(!cpu_isset(cpu1, sd->span));
+ 		if (cpu_isset(cpu2, sd->span))
+ 			return distance;
+@@ -5440,7 +6596,7 @@ static void calibrate_migration_costs(co
+ 	 */
+ 	for_each_cpu_mask(cpu, *cpu_map) {
+ 		distance = 0;
+-		for_each_domain(cpu, sd) {
++		for_each_pdomain(pcpu(cpu)->sd, sd) {
+ 			sd->cache_hot_time = migration_cost[distance];
+ 			distance++;
+ 		}
+@@ -6012,42 +7168,398 @@ int in_sched_functions(unsigned long add
+ 		&& addr < (unsigned long)__sched_text_end);
+ }
+ 
+-void __init sched_init(void)
++static void init_rq(struct runqueue *rq, int cpu)
++{
++	int j, k;
++	prio_array_t *array;
++
++	spin_lock_init(&rq->lock);
++	rq->nr_running = 0;
++	rq->active = rq->arrays;
++	rq->expired = rq->arrays + 1;
++	rq->best_expired_prio = MAX_PRIO;
++
++#ifdef CONFIG_SMP
++	rq->sd = NULL;
++	for (j = 0; j < 3; j++)
++		rq->cpu_load[j] = 0;
++	rq->active_balance = 0;
++#endif
++	rq->push_cpu = 0;
++	rq->migration_thread = NULL;
++	INIT_LIST_HEAD(&rq->migration_queue);
++	rq->cpu = cpu;
++	atomic_set(&rq->nr_iowait, 0);
++
++	for (j = 0; j < 2; j++) {
++		array = rq->arrays + j;
++		for (k = 0; k < MAX_PRIO; k++) {
++			INIT_LIST_HEAD(array->queue + k);
++			__clear_bit(k, array->bitmap);
++		}
++		// delimiter for bitsearch
++		__set_bit(MAX_PRIO, array->bitmap);
++	}
++}
++
++#if defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED)
++static void init_vcpu(vcpu_t vcpu, int id)
++{
++	memset(vcpu, 0, sizeof(struct vcpu_info));
++	vcpu->id = id;
++#ifdef CONFIG_SCHED_VCPU
++	vcpu->last_pcpu = id;
++#endif
++	init_rq(vcpu_rq(vcpu), id);
++}
++
++/* both rq and vsched lock should be taken */
++static void __install_vcpu(struct vcpu_scheduler *vsched, vcpu_t vcpu)
++{
++	int id;
++
++	id = vcpu->id;
++	vcpu->vsched = vsched;
++	vsched->vcpu[id] = vcpu;
++	vcpu->last_pcpu = id;
++	wmb();
++	/* FIXME: probably locking should be reworked, e.g.
++	   we don't have corresponding rmb(), so we need to update mask
++	   only after quiscent state */
++	/* init_boot_vcpu() should be remade if RCU is used here */
++	list_add(&vcpu->list, &vsched->idle_list);
++	cpu_set(id, vsched->vcpu_online_map);
++	vsched->num_online_vcpus++;
++}
++
++static int install_vcpu(vcpu_t vcpu, struct vcpu_scheduler *vsched)
+ {
+ 	runqueue_t *rq;
+-	int i, j, k;
++	unsigned long flags;
++	int res = 0;
+ 
+-	for_each_cpu(i) {
+-		prio_array_t *array;
++	rq = vcpu_rq(vcpu);
++	spin_lock_irqsave(&rq->lock, flags);
++	spin_lock(&fairsched_lock);
+ 
+-		rq = cpu_rq(i);
+-		spin_lock_init(&rq->lock);
+-		rq->nr_running = 0;
+-		rq->active = rq->arrays;
+-		rq->expired = rq->arrays + 1;
+-		rq->best_expired_prio = MAX_PRIO;
++	if (vsched->vcpu[vcpu->id] != NULL)
++		res = -EBUSY;
++	else
++		__install_vcpu(vsched, vcpu);
+ 
+-#ifdef CONFIG_SMP
+-		rq->sd = NULL;
+-		for (j = 1; j < 3; j++)
+-			rq->cpu_load[j] = 0;
+-		rq->active_balance = 0;
+-		rq->push_cpu = 0;
+-		rq->migration_thread = NULL;
+-		INIT_LIST_HEAD(&rq->migration_queue);
+-#endif
+-		atomic_set(&rq->nr_iowait, 0);
+-
+-		for (j = 0; j < 2; j++) {
+-			array = rq->arrays + j;
+-			for (k = 0; k < MAX_PRIO; k++) {
+-				INIT_LIST_HEAD(array->queue + k);
+-				__clear_bit(k, array->bitmap);
+-			}
+-			// delimiter for bitsearch
+-			__set_bit(MAX_PRIO, array->bitmap);
++	spin_unlock(&fairsched_lock);
++	spin_unlock_irqrestore(&rq->lock, flags);
++	return res;
++}
++
++static int __add_vcpu(struct vcpu_scheduler *vsched, int id)
++{
++	vcpu_t vcpu;
++	int res;
++
++	res = -ENOMEM;
++	vcpu = kmalloc(sizeof(struct vcpu_info), GFP_KERNEL);
++	if (vcpu == NULL)
++		goto out;
++
++	init_vcpu(vcpu, id);
++	vcpu_rq(vcpu)->curr = this_pcpu()->idle;
++	res = install_vcpu(vcpu, vsched);
++	if (res < 0)
++		goto out_free;
++	return 0;
++
++out_free:
++	kfree(vcpu);
++out:
++	return res;
++}
++
++void vsched_init(struct vcpu_scheduler *vsched, int id)
++{
++	memset(vsched, 0, sizeof(*vsched));
++
++	INIT_LIST_HEAD(&vsched->idle_list);
++	INIT_LIST_HEAD(&vsched->active_list);
++	INIT_LIST_HEAD(&vsched->running_list);
++	vsched->num_online_vcpus = 0;
++	vsched->vcpu_online_map = CPU_MASK_NONE;
++	vsched->vcpu_running_map = CPU_MASK_NONE;
++	vsched->pcpu_running_map = CPU_MASK_NONE;
++	vsched->id = id;
++}
++
++#ifdef CONFIG_FAIRSCHED
++
++/* No locks supposed to be held */
++static void vsched_del_vcpu(vcpu_t vcpu);
++static int vsched_add_vcpu(struct vcpu_scheduler *vsched)
++{
++	int res, err;
++	vcpu_t vcpu;
++	int id;
++	static DECLARE_MUTEX(id_mutex);
++
++	down(&id_mutex);
++	id = find_first_zero_bit(vsched->vcpu_online_map.bits, NR_CPUS);
++	if (id >= NR_CPUS) {
++		err = -EBUSY;
++		goto out_up;
++	}
++
++	err = __add_vcpu(vsched, id);
++	if (err < 0)
++		goto out_up;
++
++	vcpu = vsched_vcpu(vsched, id);
++	err = -ENOMEM;
++
++	res = vmigration_call(&migration_notifier, CPU_UP_PREPARE, vcpu);
++	if (res != NOTIFY_OK)
++		goto out_del_up;
++
++	res = vmigration_call(&migration_notifier, CPU_ONLINE, vcpu);
++	if (res != NOTIFY_OK)
++		goto out_cancel_del_up;
++
++	err = 0;
++
++out_up:
++	up(&id_mutex);
++	return err;
++
++out_cancel_del_up:
++	vmigration_call(&migration_notifier, CPU_UP_CANCELED, vcpu);
++out_del_up:
++	vsched_del_vcpu(vcpu);
++	goto out_up;
++}
++
++static void vsched_del_vcpu(vcpu_t vcpu)
++{
++	struct vcpu_scheduler *vsched;
++	runqueue_t *rq;
++
++	vsched = vcpu_vsched(vcpu);
++	rq = vcpu_rq(vcpu);
++
++	spin_lock_irq(&rq->lock);
++	spin_lock(&fairsched_lock);
++	cpu_clear(vcpu->id, vsched->vcpu_online_map);
++	vsched->num_online_vcpus--;
++	spin_unlock(&fairsched_lock);
++	spin_unlock_irq(&rq->lock);
++
++	/*
++        * FIXME: ideas for VCPU hotplug:
++        *
++        * - push_cpu should be checked/cleanuped
++        * - serialization
++        */
++
++	/*
++	 * all tasks should migrate from this VCPU somewhere,
++	 * also, since this moment VCPU is offline, so migration_thread
++	 * won't accept any new tasks...
++	 */
++	vmigration_call(&migration_notifier, CPU_DEAD, vcpu);
++	BUG_ON(rq->nr_running != 0);
++
++	/* vcpu_put() is called after deactivate_task. This loop makes sure
++	 * that vcpu_put() was finished and vcpu can be freed */
++	while ((volatile int)vcpu->running)
++		yield();
++
++	BUG_ON(vcpu->active);	/* should be in idle_list */
++	BUG_ON(vcpu_rq(vcpu)->prev_mm != NULL);
++
++	spin_lock_irq(&fairsched_lock);
++	list_del(&vcpu->list);
++	vsched_vcpu(vsched, vcpu->id) = NULL;
++	spin_unlock_irq(&fairsched_lock);
++
++	kfree(vcpu);
++}
++
++int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched)
++{
++	vcpu_t dest_vcpu;
++	int id;
++	int res;
++
++	res = 0;
++	while(1) {
++		/* FIXME: we suppose here that vcpu can't dissapear on the fly */
++		for(id = first_cpu(vsched->vcpu_online_map); id < NR_CPUS; 
++		    id++) {
++			if ((vsched->vcpu[id] != NULL) && 
++			    !vcpu_isset(vsched->vcpu[id], p->cpus_allowed))
++				continue;
++			else
++				break;
++		}
++		if (id >= NR_CPUS) {
++			res = -EINVAL;
++			goto out;
++		}
++
++		dest_vcpu = vsched_vcpu(vsched, id);
++		while(1) {
++			sched_migrate_task(p, dest_vcpu);
++			if (task_vsched_id(p) == vsched_id(vsched))
++				goto out;
++			if (!vcpu_isset(vsched->vcpu[id], p->cpus_allowed))
++				break;
+ 		}
+ 	}
++out:
++	return res;
++}
++
++void vsched_fairsched_link(struct vcpu_scheduler *vsched,
++		struct fairsched_node *node)
++{
++	vsched->node = node;
++	node->vsched = vsched;
++}
++
++void vsched_fairsched_unlink(struct vcpu_scheduler *vsched,
++		struct fairsched_node *node)
++{
++	vsched->node = NULL;
++	node->vsched = NULL;
++}
++
++int vsched_create(int id, struct fairsched_node *node)
++{
++	struct vcpu_scheduler *vsched;
++	int i, res;
++
++	vsched = kmalloc(sizeof(*vsched), GFP_KERNEL);
++	if (vsched == NULL)
++		return -ENOMEM;
++
++	vsched_init(vsched, node->id);
++	vsched_fairsched_link(vsched, node);
++
++	for(i = 0; i < num_online_cpus(); i++) {
++		res = vsched_add_vcpu(vsched);
++		if (res < 0)
++			goto err_add;
++	}
++	return 0;
++
++err_add:
++	vsched_destroy(vsched);
++	return res;
++}
++
++int vsched_destroy(struct vcpu_scheduler *vsched)
++{
++	vcpu_t vcpu;
++
++	if (vsched == NULL)
++		return 0;
++
++	spin_lock_irq(&fairsched_lock);
++	while(1) {
++		if (!list_empty(&vsched->running_list))
++			vcpu = list_entry(vsched->running_list.next,
++						struct vcpu_info, list);
++		else if (!list_empty(&vsched->active_list))
++			vcpu = list_entry(vsched->active_list.next,
++						struct vcpu_info, list);
++		else if (!list_empty(&vsched->idle_list))
++			vcpu = list_entry(vsched->idle_list.next,
++						struct vcpu_info, list);
++		else
++			break;
++		spin_unlock_irq(&fairsched_lock);
++		vsched_del_vcpu(vcpu);
++		spin_lock_irq(&fairsched_lock);
++	}
++	if (vsched->num_online_vcpus)
++		goto err_busy;
++	spin_unlock_irq(&fairsched_lock);
++
++	vsched_fairsched_unlink(vsched, vsched->node);
++	kfree(vsched);
++	return 0;
++
++err_busy:
++	printk(KERN_ERR "BUG in vsched_destroy, vsched id %d\n",
++			vsched->id);
++	spin_unlock_irq(&fairsched_lock);
++	return -EBUSY;
++	
++}
++#endif /* defined(CONFIG_FAIRSCHED) */
++#endif /* defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED) */
++
++static void init_boot_vcpu(void)
++{
++	int res;
++
++	/*
++	 * We setup boot_vcpu and it's runqueue until init_idle() happens
++	 * on cpu0. This is required since timer interrupts can happen
++	 * between sched_init() and init_idle().
++	 */
++	init_vcpu(&boot_idle_vcpu, 0);
++	vcpu_rq(&boot_idle_vcpu)->curr = current;
++	res = install_vcpu(&boot_idle_vcpu, &idle_vsched);
++	if (res < 0)
++		panic("Can't install boot idle vcpu");
++
++	init_vcpu(&boot_vcpu, 0);
++	vcpu_rq(&boot_vcpu)->curr = current;
++	res = install_vcpu(&boot_vcpu, &default_vsched);
++	if (res < 0)
++		panic("Can't install boot vcpu");
++
++	this_pcpu()->vcpu = &boot_idle_vcpu;
++	this_pcpu()->vsched = &idle_vsched;
++}
++
++static void init_pcpu(int id)
++{
++	struct pcpu_info *pcpu;
++
++	pcpu = pcpu(id);
++	pcpu->id = id;
++#ifdef CONFIG_SMP
++	pcpu->sd = NULL;
++#endif
++
++#ifndef CONFIG_SCHED_VCPU
++	init_vcpu(vcpu(id), id);
++#endif
++}
++
++static void init_pcpus(void)
++{
++	int i;
++	for (i = 0; i < NR_CPUS; i++)
++		init_pcpu(i);
++}
++
++void __init sched_init(void)
++{
++	init_pcpus();
++#if defined(CONFIG_SCHED_VCPU)
++	vsched_init(&idle_vsched, -1);
++	vsched_init(&default_vsched, 0);
++#if defined(CONFIG_FAIRSCHED)
++	fairsched_init_early();
++	vsched_fairsched_link(&idle_vsched, &fairsched_idle_node);
++	vsched_fairsched_link(&default_vsched, &fairsched_init_node);
++#endif
++	init_boot_vcpu();
++#else
++#if defined(CONFIG_FAIRSCHED)
++	fairsched_init_early();
++#endif
++#endif
+ 
+ 	/*
+ 	 * The boot idle thread does lazy MMU switching as well:
+@@ -6064,6 +7576,149 @@ void __init sched_init(void)
+ 	init_idle(current, smp_processor_id());
+ }
+ 
++#ifdef CONFIG_SCHED_VCPU
++static void show_vcpu_list(struct vcpu_scheduler *vsched, struct list_head *lh)
++{
++	cpumask_t m;
++	vcpu_t vcpu;
++	int i;
++
++	cpus_clear(m);
++	list_for_each_entry(vcpu, lh, list)
++		cpu_set(vcpu->id, m);
++
++	for (i = 0; i < NR_CPUS; i++)
++		if (cpu_isset(i, m))
++			printk("%d ", i);
++}
++
++#define PRINT(s, sz, fmt...)				\
++	do {						\
++		int __out;				\
++		__out = scnprintf(*s, *sz, fmt);	\
++		*s += __out;				\
++		*sz -= __out;				\
++	} while(0)
++
++static void show_rq_array(prio_array_t *array, char *header, char **s, int *sz)
++{
++	struct list_head *list;
++	task_t *p;
++	int k, h;
++
++	h = 0;
++	for (k = 0; k < MAX_PRIO; k++) {
++		list = array->queue + k;
++		if (list_empty(list))
++			continue;
++
++		if (!h) {
++			PRINT(s, sz, header);
++			h = 1;
++		}
++
++		PRINT(s, sz, " prio %d (", k);
++		list_for_each_entry(p, list, run_list)
++			PRINT(s, sz, "%s[%d] ", p->comm, p->pid);
++		PRINT(s, sz, ")");
++	}
++	if (h)
++		PRINT(s, sz, "\n");
++}
++
++static void show_vcpu(vcpu_t vcpu)
++{
++	runqueue_t *rq;
++	char buf[1024], *s;
++	unsigned long flags;
++	int sz;
++
++	if (vcpu == NULL)
++		return;
++
++	rq = vcpu_rq(vcpu);
++	spin_lock_irqsave(&rq->lock, flags);
++	printk("  vcpu %d: last_pcpu %d, state %s%s\n",
++			vcpu->id, vcpu->last_pcpu,
++			vcpu->active ? "A" : "",
++			vcpu->running ? "R" : "");
++
++	printk("    rq: running %lu, load {%lu,%lu,%lu}, sw %Lu, sd %p, curr %p\n",
++			rq->nr_running,
++#ifdef CONFIG_SMP
++			rq->cpu_load[0], rq->cpu_load[1], rq->cpu_load[2],
++#else
++			0LU, 0LU, 0LU,
++#endif
++			rq->nr_switches,
++#ifdef CONFIG_SMP
++			rq->sd,
++#else
++			NULL,
++#endif
++			rq->curr
++	      );
++
++	s = buf;
++	sz = sizeof(buf) - 1;
++
++	show_rq_array(rq->active, "      active:", &s, &sz);
++	show_rq_array(rq->expired, "      expired:", &s, &sz);
++	spin_unlock_irqrestore(&rq->lock, flags);
++
++	*s = 0;
++	printk(buf);
++}
++
++static inline void fairsched_show_node(struct vcpu_scheduler *vsched)
++{
++#ifdef CONFIG_FAIRSCHED
++	struct fairsched_node *node;
++
++	node = vsched->node;
++	printk("fsnode: ready %d run %d cpu %d vsched %p, pcpu %d\n",
++			node->nr_ready, node->nr_runnable, node->nr_pcpu,
++			node->vsched, smp_processor_id());
++#endif
++}
++
++static void __show_vsched(struct vcpu_scheduler *vsched)
++{
++	char mask[NR_CPUS + 1];
++	int i;
++	unsigned long flags;
++
++	spin_lock_irqsave(&fairsched_lock, flags);
++	printk("vsched id=%d\n", vsched_id(vsched));
++	fairsched_show_node(vsched);
++
++	printk("  idle cpus ");
++	show_vcpu_list(vsched, &vsched->idle_list);
++	printk("; active cpus ");
++	show_vcpu_list(vsched, &vsched->active_list);
++	printk("; running cpus ");
++	show_vcpu_list(vsched, &vsched->running_list);
++	printk("\n");
++
++	cpumask_scnprintf(mask, NR_CPUS, vsched->vcpu_online_map);
++	printk("  num_online_cpus=%d, mask=%s (w=%d)\n",
++			vsched->num_online_vcpus, mask,
++			cpus_weight(vsched->vcpu_online_map));
++	spin_unlock_irqrestore(&fairsched_lock, flags);
++
++	for (i = 0; i < NR_CPUS; i++)
++		show_vcpu(vsched->vcpu[i]);
++}
++
++void show_vsched(void)
++{
++	oops_in_progress = 1;
++	__show_vsched(&idle_vsched);
++	__show_vsched(&default_vsched);
++	oops_in_progress = 0;
++}
++#endif /* CONFIG_SCHED_VCPU */
++
+ #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
+ void __might_sleep(char *file, int line)
+ {
+@@ -6095,7 +7750,7 @@ void normalize_rt_tasks(void)
+ 	runqueue_t *rq;
+ 
+ 	read_lock_irq(&tasklist_lock);
+-	for_each_process (p) {
++	for_each_process_all (p) {
+ 		if (!rt_task(p))
+ 			continue;
+ 
+@@ -6136,7 +7791,7 @@ void normalize_rt_tasks(void)
+  */
+ task_t *curr_task(int cpu)
+ {
+-	return cpu_curr(cpu);
++	return vcpu_rq(pcpu(cpu)->vcpu)->curr;
+ }
+ 
+ /**
+@@ -6156,7 +7811,7 @@ task_t *curr_task(int cpu)
+  */
+ void set_curr_task(int cpu, task_t *p)
+ {
+-	cpu_curr(cpu) = p;
++	vcpu_rq(pcpu(cpu)->vcpu)->curr = p;
+ }
+ 
+ #endif
+diff -upr linux-2.6.16.orig/kernel/signal.c linux-2.6.16-026test015/kernel/signal.c
+--- linux-2.6.16.orig/kernel/signal.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/signal.c	2006-07-04 14:41:39.000000000 +0400
+@@ -25,17 +25,20 @@
+ #include <linux/posix-timers.h>
+ #include <linux/signal.h>
+ #include <linux/audit.h>
++#include <linux/kmem_cache.h>
+ #include <linux/capability.h>
+ #include <asm/param.h>
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+ #include <asm/siginfo.h>
++#include <ub/ub_misc.h>
+ 
+ /*
+  * SLAB caches for signal bits.
+  */
+ 
+-static kmem_cache_t *sigqueue_cachep;
++kmem_cache_t *sigqueue_cachep;
++EXPORT_SYMBOL_GPL(sigqueue_cachep);
+ 
+ /*
+  * In POSIX a signal is sent either to a specific thread (Linux task)
+@@ -221,6 +224,7 @@ fastcall void recalc_sigpending_tsk(stru
+ 	else
+ 		clear_tsk_thread_flag(t, TIF_SIGPENDING);
+ }
++EXPORT_SYMBOL_GPL(recalc_sigpending_tsk);
+ 
+ void recalc_sigpending(void)
+ {
+@@ -271,8 +275,13 @@ static struct sigqueue *__sigqueue_alloc
+ 	atomic_inc(&t->user->sigpending);
+ 	if (override_rlimit ||
+ 	    atomic_read(&t->user->sigpending) <=
+-			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
++			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
+ 		q = kmem_cache_alloc(sigqueue_cachep, flags);
++		if (q && ub_siginfo_charge(q, get_task_ub(t))) {
++			kmem_cache_free(sigqueue_cachep, q);
++			q = NULL;
++		}
++	}
+ 	if (unlikely(q == NULL)) {
+ 		atomic_dec(&t->user->sigpending);
+ 	} else {
+@@ -289,6 +298,7 @@ static void __sigqueue_free(struct sigqu
+ 		return;
+ 	atomic_dec(&q->user->sigpending);
+ 	free_uid(q->user);
++	ub_siginfo_uncharge(q);
+ 	kmem_cache_free(sigqueue_cachep, q);
+ }
+ 
+@@ -378,8 +388,11 @@ void __exit_signal(struct task_struct *t
+ 			wake_up_process(sig->group_exit_task);
+ 			sig->group_exit_task = NULL;
+ 		}
+-		if (tsk == sig->curr_target)
++		if (tsk == sig->curr_target) {
+ 			sig->curr_target = next_thread(tsk);
++			if (tsk == sig->curr_target)
++				sig->curr_target = NULL;
++		}
+ 		tsk->signal = NULL;
+ 		/*
+ 		 * Accumulate here the counters for all threads but the
+@@ -524,7 +537,16 @@ static int __dequeue_signal(struct sigpe
+ {
+ 	int sig = 0;
+ 
+-	sig = next_signal(pending, mask);
++	/* SIGKILL must have priority, otherwise it is quite easy
++	 * to create an unkillable process, sending sig < SIGKILL
++	 * to self */
++	if (unlikely(sigismember(&pending->signal, SIGKILL))) {
++		if (!sigismember(mask, SIGKILL))
++			sig = SIGKILL;
++	}
++
++	if (likely(!sig))
++		sig = next_signal(pending, mask);
+ 	if (sig) {
+ 		if (current->notifier) {
+ 			if (sigismember(current->notifier_mask, sig)) {
+@@ -618,6 +640,7 @@ void signal_wake_up(struct task_struct *
+ 	if (!wake_up_state(t, mask))
+ 		kick_process(t);
+ }
++EXPORT_SYMBOL_GPL(signal_wake_up);
+ 
+ /*
+  * Remove signals in mask from the pending set and queue.
+@@ -838,7 +861,7 @@ static int send_signal(int sig, struct s
+ 			q->info.si_signo = sig;
+ 			q->info.si_errno = 0;
+ 			q->info.si_code = SI_USER;
+-			q->info.si_pid = current->pid;
++			q->info.si_pid = virt_pid(current);
+ 			q->info.si_uid = current->uid;
+ 			break;
+ 		case (unsigned long) SEND_SIG_PRIV:
+@@ -975,7 +998,6 @@ __group_complete_signal(int sig, struct 
+ 		if (t == NULL)
+ 			/* restart balancing at this thread */
+ 			t = p->signal->curr_target = p;
+-		BUG_ON(t->tgid != p->tgid);
+ 
+ 		while (!wants_signal(sig, t)) {
+ 			t = next_thread(t);
+@@ -1159,13 +1181,18 @@ int __kill_pg_info(int sig, struct sigin
+ 	if (pgrp <= 0)
+ 		return -EINVAL;
+ 
++	/* Use __vpid_to_pid(). This function is used under write_lock
++	 * tasklist_lock. */
++	if (is_virtual_pid(pgrp))
++		pgrp = __vpid_to_pid(pgrp);
++
+ 	success = 0;
+ 	retval = -ESRCH;
+-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ 		int err = group_send_sig_info(sig, info, p);
+ 		success |= !err;
+ 		retval = err;
+-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ 	return success ? 0 : retval;
+ }
+ 
+@@ -1193,7 +1220,7 @@ kill_proc_info(int sig, struct siginfo *
+ 		read_lock(&tasklist_lock);
+ 		acquired_tasklist_lock = 1;
+ 	}
+-	p = find_task_by_pid(pid);
++	p = find_task_by_pid_ve(pid);
+ 	error = -ESRCH;
+ 	if (p)
+ 		error = group_send_sig_info(sig, info, p);
+@@ -1214,7 +1241,7 @@ int kill_proc_info_as_uid(int sig, struc
+ 		return ret;
+ 
+ 	read_lock(&tasklist_lock);
+-	p = find_task_by_pid(pid);
++	p = find_task_by_pid_ve(pid);
+ 	if (!p) {
+ 		ret = -ESRCH;
+ 		goto out_unlock;
+@@ -1253,8 +1280,8 @@ static int kill_something_info(int sig, 
+ 		struct task_struct * p;
+ 
+ 		read_lock(&tasklist_lock);
+-		for_each_process(p) {
+-			if (p->pid > 1 && p->tgid != current->tgid) {
++		for_each_process_ve(p) {
++			if (virt_pid(p) > 1 && p->tgid != current->tgid) {
+ 				int err = group_send_sig_info(sig, info, p);
+ 				++count;
+ 				if (err != -EPERM)
+@@ -1562,9 +1589,17 @@ void do_notify_parent(struct task_struct
+ 	BUG_ON(!tsk->ptrace &&
+ 	       (tsk->group_leader != tsk || !thread_group_empty(tsk)));
+ 
++#ifdef CONFIG_VE
++	/* Allow to send only SIGCHLD from VE */
++	if (sig != SIGCHLD &&
++			tsk->ve_task_info.owner_env != 
++			tsk->parent->ve_task_info.owner_env)
++		sig = SIGCHLD;
++#endif
++
+ 	info.si_signo = sig;
+ 	info.si_errno = 0;
+-	info.si_pid = tsk->pid;
++	info.si_pid = get_task_pid_ve(tsk, tsk->parent->ve_task_info.owner_env);
+ 	info.si_uid = tsk->uid;
+ 
+ 	/* FIXME: find out whether or not this is supposed to be c*time. */
+@@ -1629,7 +1664,7 @@ static void do_notify_parent_cldstop(str
+ 
+ 	info.si_signo = SIGCHLD;
+ 	info.si_errno = 0;
+-	info.si_pid = tsk->pid;
++	info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(parent)->owner_env);
+ 	info.si_uid = tsk->uid;
+ 
+ 	/* FIXME: find out whether or not this is supposed to be c*time. */
+@@ -1763,7 +1798,9 @@ finish_stop(int stop_count)
+ 	read_unlock(&tasklist_lock);
+ 
+ out:
++	set_stop_state(current);
+ 	schedule();
++	clear_stop_state(current);
+ 	/*
+ 	 * Now we don't run again until continued.
+ 	 */
+@@ -1940,11 +1977,13 @@ relock:
+ 			ptrace_signal_deliver(regs, cookie);
+ 
+ 			/* Let the debugger run.  */
++			set_pn_state(current, PN_STOP_SIGNAL);
+ 			ptrace_stop(signr, signr, info);
++			clear_pn_state(current);
+ 
+-			/* We're back.  Did the debugger cancel the sig or group_exit? */
++			/* We're back.  Did the debugger cancel the sig?  */
+ 			signr = current->exit_code;
+-			if (signr == 0 || current->signal->flags & SIGNAL_GROUP_EXIT)
++			if (signr == 0)
+ 				continue;
+ 
+ 			current->exit_code = 0;
+@@ -1957,7 +1996,7 @@ relock:
+ 				info->si_signo = signr;
+ 				info->si_errno = 0;
+ 				info->si_code = SI_USER;
+-				info->si_pid = current->parent->pid;
++				info->si_pid = virt_pid(current->parent);
+ 				info->si_uid = current->parent->uid;
+ 			}
+ 
+@@ -1988,8 +2027,14 @@ relock:
+ 			continue;
+ 
+ 		/* Init gets no signals it doesn't want.  */
+-		if (current->pid == 1)
++		if (virt_pid(current) == 1) {
++			/* Allow SIGKILL for non-root VE */
++#ifdef CONFIG_VE
++			if (current->pid == 1 ||
++			    signr != SIGKILL)
++#endif
+ 			continue;
++		}
+ 
+ 		if (sig_kernel_stop(signr)) {
+ 			/*
+@@ -2307,7 +2352,6 @@ sys_rt_sigtimedwait(const sigset_t __use
+ 
+ 			timeout = schedule_timeout_interruptible(timeout);
+ 
+-			try_to_freeze();
+ 			spin_lock_irq(&current->sighand->siglock);
+ 			sig = dequeue_signal(current, &these, &info);
+ 			current->blocked = current->real_blocked;
+@@ -2340,7 +2384,7 @@ sys_kill(int pid, int sig)
+ 	info.si_signo = sig;
+ 	info.si_errno = 0;
+ 	info.si_code = SI_USER;
+-	info.si_pid = current->tgid;
++	info.si_pid = virt_tgid(current);
+ 	info.si_uid = current->uid;
+ 
+ 	return kill_something_info(sig, &info, pid);
+@@ -2356,12 +2400,12 @@ static int do_tkill(int tgid, int pid, i
+ 	info.si_signo = sig;
+ 	info.si_errno = 0;
+ 	info.si_code = SI_TKILL;
+-	info.si_pid = current->tgid;
++	info.si_pid = virt_tgid(current);
+ 	info.si_uid = current->uid;
+ 
+ 	read_lock(&tasklist_lock);
+-	p = find_task_by_pid(pid);
+-	if (p && (tgid <= 0 || p->tgid == tgid)) {
++	p = find_task_by_pid_ve(pid);
++	if (p && (tgid <= 0 || virt_tgid(p) == tgid)) {
+ 		error = check_kill_permission(sig, &info, p);
+ 		/*
+ 		 * The null signal is a permissions and process existence
+diff -upr linux-2.6.16.orig/kernel/softirq.c linux-2.6.16-026test015/kernel/softirq.c
+--- linux-2.6.16.orig/kernel/softirq.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/softirq.c	2006-07-04 14:41:38.000000000 +0400
+@@ -13,10 +13,13 @@
+ #include <linux/mm.h>
+ #include <linux/notifier.h>
+ #include <linux/percpu.h>
++#include <linux/sysctl.h>
+ #include <linux/cpu.h>
+ #include <linux/kthread.h>
+ #include <linux/rcupdate.h>
+ 
++#include <ub/beancounter.h>
++
+ #include <asm/irq.h>
+ /*
+    - No shared variables, all the data are CPU local.
+@@ -44,6 +47,8 @@ EXPORT_SYMBOL(irq_stat);
+ static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
+ 
+ static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
++static DEFINE_PER_CPU(struct task_struct *, ksoftirqd_wakeup);
++static int ksoftirqd_stat[NR_CPUS];
+ 
+ /*
+  * we cannot loop indefinitely here to avoid userspace starvation,
+@@ -54,7 +59,7 @@ static DEFINE_PER_CPU(struct task_struct
+ static inline void wakeup_softirqd(void)
+ {
+ 	/* Interrupts are disabled: no need to stop preemption */
+-	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
++	struct task_struct *tsk = __get_cpu_var(ksoftirqd_wakeup);
+ 
+ 	if (tsk && tsk->state != TASK_RUNNING)
+ 		wake_up_process(tsk);
+@@ -73,10 +78,14 @@ static inline void wakeup_softirqd(void)
+ 
+ asmlinkage void __do_softirq(void)
+ {
++	struct user_beancounter *ub;
+ 	struct softirq_action *h;
+ 	__u32 pending;
+ 	int max_restart = MAX_SOFTIRQ_RESTART;
+ 	int cpu;
++	struct ve_struct *envid;
++
++	envid = set_exec_env(get_ve0());
+ 
+ 	pending = local_softirq_pending();
+ 
+@@ -90,6 +99,7 @@ restart:
+ 
+ 	h = softirq_vec;
+ 
++	ub = set_exec_ub(get_ub0());
+ 	do {
+ 		if (pending & 1) {
+ 			h->action(h);
+@@ -98,6 +108,7 @@ restart:
+ 		h++;
+ 		pending >>= 1;
+ 	} while (pending);
++	(void)set_exec_ub(ub);
+ 
+ 	local_irq_disable();
+ 
+@@ -108,6 +119,7 @@ restart:
+ 	if (pending)
+ 		wakeup_softirqd();
+ 
++	(void)set_exec_env(envid);
+ 	__local_bh_enable();
+ }
+ 
+@@ -483,6 +495,52 @@ static int __devinit cpu_callback(struct
+ 	return NOTIFY_OK;
+ }
+ 
++static int proc_ksoftirqd(ctl_table *ctl, int write, struct file *filp,
++		void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	int ret, cpu;
++
++	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
++	if (!write)
++		return ret;
++
++	for_each_online_cpu(cpu) {
++		per_cpu(ksoftirqd_wakeup, cpu) =
++			ksoftirqd_stat[cpu] ? per_cpu(ksoftirqd, cpu) : NULL;
++	}
++	return ret;
++}
++
++static int sysctl_ksoftirqd(ctl_table *table, int *name, int nlen,
++		void *oldval, size_t *oldlenp, void *newval, size_t newlen,
++		void **context)
++{
++	return -EINVAL;
++}
++
++static ctl_table debug_table[] = {
++	{
++		.ctl_name	= 1246,
++		.procname	= "ksoftirqd",
++		.data		= ksoftirqd_stat,
++		.maxlen		= sizeof(ksoftirqd_stat),
++		.mode		= 0644,
++		.proc_handler	= &proc_ksoftirqd,
++		.strategy	= &sysctl_ksoftirqd
++	},
++	{0}
++};
++
++static ctl_table root_table[] = {
++	{
++		.ctl_name	= CTL_DEBUG,
++		.procname	= "debug",
++		.mode		= 0555,
++		.child		= debug_table
++	},
++	{0}
++};
++
+ static struct notifier_block __devinitdata cpu_nfb = {
+ 	.notifier_call = cpu_callback
+ };
+@@ -493,5 +551,6 @@ __init int spawn_ksoftirqd(void)
+ 	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
+ 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
+ 	register_cpu_notifier(&cpu_nfb);
++	register_sysctl_table(root_table, 0);
+ 	return 0;
+ }
+diff -upr linux-2.6.16.orig/kernel/stop_machine.c linux-2.6.16-026test015/kernel/stop_machine.c
+--- linux-2.6.16.orig/kernel/stop_machine.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/stop_machine.c	2006-07-04 14:41:39.000000000 +0400
+@@ -96,7 +96,7 @@ static int stop_machine(void)
+ 	stopmachine_state = STOPMACHINE_WAIT;
+ 
+ 	for_each_online_cpu(i) {
+-		if (i == raw_smp_processor_id())
++		if (i == task_cpu(current))
+ 			continue;
+ 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
+ 		if (ret < 0)
+@@ -178,7 +178,7 @@ struct task_struct *__stop_machine_run(i
+ 
+ 	/* If they don't care which CPU fn runs on, bind to any online one. */
+ 	if (cpu == NR_CPUS)
+-		cpu = raw_smp_processor_id();
++		cpu = task_cpu(current);
+ 
+ 	p = kthread_create(do_stop, &smdata, "kstopmachine");
+ 	if (!IS_ERR(p)) {
+diff -upr linux-2.6.16.orig/kernel/sys.c linux-2.6.16-026test015/kernel/sys.c
+--- linux-2.6.16.orig/kernel/sys.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/sys.c	2006-07-04 14:41:39.000000000 +0400
+@@ -11,6 +11,7 @@
+ #include <linux/mman.h>
+ #include <linux/smp_lock.h>
+ #include <linux/notifier.h>
++#include <linux/virtinfo.h>
+ #include <linux/reboot.h>
+ #include <linux/prctl.h>
+ #include <linux/init.h>
+@@ -236,6 +237,94 @@ int capable(int cap)
+ EXPORT_SYMBOL(capable);
+ #endif
+ 
++static DECLARE_MUTEX(virtinfo_sem);
++static struct vnotifier_block *virtinfo_chain[VIRT_TYPES];
++
++void virtinfo_notifier_register(int type, struct vnotifier_block *nb)
++{
++	struct vnotifier_block **p;
++
++	down(&virtinfo_sem);
++	for (p = &virtinfo_chain[type];
++	     *p != NULL && nb->priority < (*p)->priority;
++	     p = &(*p)->next);
++	nb->next = *p;
++	smp_wmb();
++	*p = nb;
++	up(&virtinfo_sem);
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_register);
++
++struct virtinfo_cnt_struct {
++	volatile unsigned long exit[NR_CPUS];
++	volatile unsigned long entry;
++};
++static DEFINE_PER_CPU(struct virtinfo_cnt_struct, virtcnt);
++
++void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb)
++{
++	struct vnotifier_block **p;
++	int entry_cpu, exit_cpu;
++	unsigned long cnt, ent;
++
++	down(&virtinfo_sem);
++	for (p = &virtinfo_chain[type]; *p != nb; p = &(*p)->next);
++	*p = nb->next;
++	smp_mb();
++
++	for_each_cpu_mask(entry_cpu, cpu_possible_map) {
++		while (1) {
++			cnt = 0;
++			for_each_cpu_mask(exit_cpu, cpu_possible_map)
++				cnt +=
++				    per_cpu(virtcnt, entry_cpu).exit[exit_cpu];
++			smp_rmb();
++			ent = per_cpu(virtcnt, entry_cpu).entry;
++			if (cnt == ent)
++				break;
++			__set_current_state(TASK_UNINTERRUPTIBLE);
++			schedule_timeout(HZ / 100);
++		}
++	}
++	up(&virtinfo_sem);
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_unregister);
++
++int virtinfo_notifier_call(int type, unsigned long n, void *data)
++{
++	int ret;
++	int entry_cpu, exit_cpu;
++	struct vnotifier_block *nb;
++
++	entry_cpu = get_cpu();
++	per_cpu(virtcnt, entry_cpu).entry++;
++	smp_wmb();
++	put_cpu();
++
++	nb = virtinfo_chain[type];
++	ret = NOTIFY_DONE;
++	while (nb)
++	{
++		ret = nb->notifier_call(nb, n, data, ret);
++		if(ret & NOTIFY_STOP_MASK) {
++			ret &= ~NOTIFY_STOP_MASK;
++			break;
++		}
++		nb = nb->next;
++	}
++
++	exit_cpu = get_cpu();
++	smp_wmb();
++	per_cpu(virtcnt, entry_cpu).exit[exit_cpu]++;
++	put_cpu();
++
++	return ret;
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_call);
++
+ static int set_one_prio(struct task_struct *p, int niceval, int error)
+ {
+ 	int no_nice;
+@@ -281,17 +370,19 @@ asmlinkage long sys_setpriority(int whic
+ 	switch (which) {
+ 		case PRIO_PROCESS:
+ 			if (!who)
+-				who = current->pid;
+-			p = find_task_by_pid(who);
++				who = virt_pid(current);
++			p = find_task_by_pid_ve(who);
+ 			if (p)
+ 				error = set_one_prio(p, niceval, error);
+ 			break;
+ 		case PRIO_PGRP:
+ 			if (!who)
+ 				who = process_group(current);
+-			do_each_task_pid(who, PIDTYPE_PGID, p) {
++			else
++				who = vpid_to_pid(who);
++			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ 				error = set_one_prio(p, niceval, error);
+-			} while_each_task_pid(who, PIDTYPE_PGID, p);
++			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ 			break;
+ 		case PRIO_USER:
+ 			user = current->user;
+@@ -301,10 +392,10 @@ asmlinkage long sys_setpriority(int whic
+ 				if ((who != current->uid) && !(user = find_user(who)))
+ 					goto out_unlock;	/* No processes for this user */
+ 
+-			do_each_thread(g, p)
++			do_each_thread_ve(g, p)
+ 				if (p->uid == who)
+ 					error = set_one_prio(p, niceval, error);
+-			while_each_thread(g, p);
++			while_each_thread_ve(g, p);
+ 			if (who != current->uid)
+ 				free_uid(user);		/* For find_user() */
+ 			break;
+@@ -334,8 +425,8 @@ asmlinkage long sys_getpriority(int whic
+ 	switch (which) {
+ 		case PRIO_PROCESS:
+ 			if (!who)
+-				who = current->pid;
+-			p = find_task_by_pid(who);
++				who = virt_pid(current);
++			p = find_task_by_pid_ve(who);
+ 			if (p) {
+ 				niceval = 20 - task_nice(p);
+ 				if (niceval > retval)
+@@ -345,11 +436,13 @@ asmlinkage long sys_getpriority(int whic
+ 		case PRIO_PGRP:
+ 			if (!who)
+ 				who = process_group(current);
+-			do_each_task_pid(who, PIDTYPE_PGID, p) {
++			else
++				who = vpid_to_pid(who);
++			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ 				niceval = 20 - task_nice(p);
+ 				if (niceval > retval)
+ 					retval = niceval;
+-			} while_each_task_pid(who, PIDTYPE_PGID, p);
++			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ 			break;
+ 		case PRIO_USER:
+ 			user = current->user;
+@@ -359,13 +452,13 @@ asmlinkage long sys_getpriority(int whic
+ 				if ((who != current->uid) && !(user = find_user(who)))
+ 					goto out_unlock;	/* No processes for this user */
+ 
+-			do_each_thread(g, p)
++			do_each_thread_ve(g, p)
+ 				if (p->uid == who) {
+ 					niceval = 20 - task_nice(p);
+ 					if (niceval > retval)
+ 						retval = niceval;
+ 				}
+-			while_each_thread(g, p);
++			while_each_thread_ve(g, p);
+ 			if (who != current->uid)
+ 				free_uid(user);		/* for find_user() */
+ 			break;
+@@ -497,6 +590,35 @@ asmlinkage long sys_reboot(int magic1, i
+ 	                magic2 != LINUX_REBOOT_MAGIC2C))
+ 		return -EINVAL;
+ 
++#ifdef CONFIG_VE
++	if (!ve_is_super(get_exec_env()))
++		switch (cmd) {
++		case LINUX_REBOOT_CMD_RESTART:
++		case LINUX_REBOOT_CMD_HALT:
++		case LINUX_REBOOT_CMD_POWER_OFF:
++		case LINUX_REBOOT_CMD_RESTART2: {
++				struct siginfo info;
++
++				info.si_errno = 0;
++				info.si_code = SI_KERNEL;
++				info.si_pid = virt_pid(current);
++				info.si_uid = current->uid;
++				info.si_signo = SIGKILL;
++
++				/* Sending to real init is safe */
++				send_sig_info(SIGKILL, &info,
++						get_exec_env()->init_entry);
++			}
++
++		case LINUX_REBOOT_CMD_CAD_ON:
++		case LINUX_REBOOT_CMD_CAD_OFF:
++			return 0;
++
++		default:
++			return -EINVAL;
++		}
++#endif
++
+ 	/* Instead of trying to make the power_off code look like
+ 	 * halt when pm_power_off is not set do it the easy way.
+ 	 */
+@@ -686,7 +808,7 @@ asmlinkage long sys_setgid(gid_t gid)
+ 	return 0;
+ }
+   
+-static int set_user(uid_t new_ruid, int dumpclear)
++int set_user(uid_t new_ruid, int dumpclear)
+ {
+ 	struct user_struct *new_user;
+ 
+@@ -711,6 +833,7 @@ static int set_user(uid_t new_ruid, int 
+ 	current->uid = new_ruid;
+ 	return 0;
+ }
++EXPORT_SYMBOL(set_user);
+ 
+ /*
+  * Unprivileged users may change the real uid to the effective uid
+@@ -1079,7 +1202,12 @@ asmlinkage long sys_times(struct tms __u
+ 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
+ 			return -EFAULT;
+ 	}
++#ifndef CONFIG_VE
+ 	return (long) jiffies_64_to_clock_t(get_jiffies_64());
++#else
++	return (long) jiffies_64_to_clock_t(get_jiffies_64() -
++			get_exec_env()->start_jiffies);
++#endif
+ }
+ 
+ /*
+@@ -1100,21 +1228,24 @@ asmlinkage long sys_setpgid(pid_t pid, p
+ 	struct task_struct *p;
+ 	struct task_struct *group_leader = current->group_leader;
+ 	int err = -EINVAL;
++	int _pgid;
+ 
+ 	if (!pid)
+-		pid = group_leader->pid;
++		pid = virt_pid(group_leader);
+ 	if (!pgid)
+ 		pgid = pid;
+ 	if (pgid < 0)
+ 		return -EINVAL;
+ 
++	_pgid = vpid_to_pid(pgid);
++
+ 	/* From this point forward we keep holding onto the tasklist lock
+ 	 * so that our parent does not change from under us. -DaveM
+ 	 */
+ 	write_lock_irq(&tasklist_lock);
+ 
+ 	err = -ESRCH;
+-	p = find_task_by_pid(pid);
++	p = find_task_by_pid_ve(pid);
+ 	if (!p)
+ 		goto out;
+ 
+@@ -1139,25 +1270,35 @@ asmlinkage long sys_setpgid(pid_t pid, p
+ 	if (p->signal->leader)
+ 		goto out;
+ 
+-	if (pgid != pid) {
++	pgid = virt_pid(p);
++	if (_pgid != p->pid) {
+ 		struct task_struct *p;
+ 
+-		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
+-			if (p->signal->session == group_leader->signal->session)
++		do_each_task_pid_ve(_pgid, PIDTYPE_PGID, p) {
++			if (p->signal->session == group_leader->signal->session) {
++				pgid = virt_pgid(p);
+ 				goto ok_pgid;
+-		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
++			}
++		} while_each_task_pid_ve(_pgid, PIDTYPE_PGID, p);
+ 		goto out;
+ 	}
+ 
+ ok_pgid:
+-	err = security_task_setpgid(p, pgid);
++	err = security_task_setpgid(p, _pgid);
+ 	if (err)
+ 		goto out;
+ 
+-	if (process_group(p) != pgid) {
++	if (process_group(p) != _pgid) {
+ 		detach_pid(p, PIDTYPE_PGID);
+-		p->signal->pgrp = pgid;
+-		attach_pid(p, PIDTYPE_PGID, pgid);
++		p->signal->pgrp = _pgid;
++		set_virt_pgid(p, pgid);
++		attach_pid(p, PIDTYPE_PGID, _pgid);
++		if (atomic_read(&p->signal->count) != 1) {
++			task_t *t;
++			for (t = next_thread(p); t != p; t = next_thread(t)) {
++				set_virt_pgid(t, pgid);
++			}
++		}
+ 	}
+ 
+ 	err = 0;
+@@ -1170,19 +1311,19 @@ out:
+ asmlinkage long sys_getpgid(pid_t pid)
+ {
+ 	if (!pid) {
+-		return process_group(current);
++		return virt_pgid(current);
+ 	} else {
+ 		int retval;
+ 		struct task_struct *p;
+ 
+ 		read_lock(&tasklist_lock);
+-		p = find_task_by_pid(pid);
++		p = find_task_by_pid_ve(pid);
+ 
+ 		retval = -ESRCH;
+ 		if (p) {
+ 			retval = security_task_getpgid(p);
+ 			if (!retval)
+-				retval = process_group(p);
++				retval = virt_pgid(p);
+ 		}
+ 		read_unlock(&tasklist_lock);
+ 		return retval;
+@@ -1194,7 +1335,7 @@ asmlinkage long sys_getpgid(pid_t pid)
+ asmlinkage long sys_getpgrp(void)
+ {
+ 	/* SMP - assuming writes are word atomic this is fine */
+-	return process_group(current);
++	return virt_pgid(current);
+ }
+ 
+ #endif
+@@ -1202,19 +1343,19 @@ asmlinkage long sys_getpgrp(void)
+ asmlinkage long sys_getsid(pid_t pid)
+ {
+ 	if (!pid) {
+-		return current->signal->session;
++		return virt_sid(current);
+ 	} else {
+ 		int retval;
+ 		struct task_struct *p;
+ 
+ 		read_lock(&tasklist_lock);
+-		p = find_task_by_pid(pid);
++		p = find_task_by_pid_ve(pid);
+ 
+ 		retval = -ESRCH;
+ 		if(p) {
+ 			retval = security_task_getsid(p);
+ 			if (!retval)
+-				retval = p->signal->session;
++				retval = virt_sid(p);
+ 		}
+ 		read_unlock(&tasklist_lock);
+ 		return retval;
+@@ -1236,9 +1377,20 @@ asmlinkage long sys_setsid(void)
+ 
+ 	group_leader->signal->leader = 1;
+ 	__set_special_pids(group_leader->pid, group_leader->pid);
++	set_virt_pgid(group_leader, virt_pid(group_leader));
++	set_virt_sid(group_leader, virt_pid(group_leader));
+ 	group_leader->signal->tty = NULL;
+ 	group_leader->signal->tty_old_pgrp = 0;
+-	err = process_group(group_leader);
++	if (atomic_read(&group_leader->signal->count) != 1) {
++		task_t *t;
++		for (t = next_thread(group_leader); t != group_leader;
++					t = next_thread(t)) {
++			set_virt_pgid(t, virt_pid(group_leader));
++			set_virt_sid(t, virt_pid(group_leader));
++		}
++	}
++
++	err = virt_pgid(group_leader);
+ out:
+ 	write_unlock_irq(&tasklist_lock);
+ 	up(&tty_sem);
+@@ -1518,7 +1670,7 @@ asmlinkage long sys_newuname(struct new_
+ 	int errno = 0;
+ 
+ 	down_read(&uts_sem);
+-	if (copy_to_user(name,&system_utsname,sizeof *name))
++	if (copy_to_user(name,&ve_utsname,sizeof *name))
+ 		errno = -EFAULT;
+ 	up_read(&uts_sem);
+ 	return errno;
+@@ -1529,15 +1681,15 @@ asmlinkage long sys_sethostname(char __u
+ 	int errno;
+ 	char tmp[__NEW_UTS_LEN];
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 	if (len < 0 || len > __NEW_UTS_LEN)
+ 		return -EINVAL;
+ 	down_write(&uts_sem);
+ 	errno = -EFAULT;
+ 	if (!copy_from_user(tmp, name, len)) {
+-		memcpy(system_utsname.nodename, tmp, len);
+-		system_utsname.nodename[len] = 0;
++		memcpy(ve_utsname.nodename, tmp, len);
++		ve_utsname.nodename[len] = 0;
+ 		errno = 0;
+ 	}
+ 	up_write(&uts_sem);
+@@ -1553,11 +1705,11 @@ asmlinkage long sys_gethostname(char __u
+ 	if (len < 0)
+ 		return -EINVAL;
+ 	down_read(&uts_sem);
+-	i = 1 + strlen(system_utsname.nodename);
++	i = 1 + strlen(ve_utsname.nodename);
+ 	if (i > len)
+ 		i = len;
+ 	errno = 0;
+-	if (copy_to_user(name, system_utsname.nodename, i))
++	if (copy_to_user(name, ve_utsname.nodename, i))
+ 		errno = -EFAULT;
+ 	up_read(&uts_sem);
+ 	return errno;
+@@ -1574,7 +1726,7 @@ asmlinkage long sys_setdomainname(char _
+ 	int errno;
+ 	char tmp[__NEW_UTS_LEN];
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 	if (len < 0 || len > __NEW_UTS_LEN)
+ 		return -EINVAL;
+@@ -1582,8 +1734,8 @@ asmlinkage long sys_setdomainname(char _
+ 	down_write(&uts_sem);
+ 	errno = -EFAULT;
+ 	if (!copy_from_user(tmp, name, len)) {
+-		memcpy(system_utsname.domainname, tmp, len);
+-		system_utsname.domainname[len] = 0;
++		memcpy(ve_utsname.domainname, tmp, len);
++		ve_utsname.domainname[len] = 0;
+ 		errno = 0;
+ 	}
+ 	up_write(&uts_sem);
+@@ -1657,7 +1809,19 @@ asmlinkage long sys_setrlimit(unsigned i
+ 	    (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
+ 	     new_rlim.rlim_cur <= cputime_to_secs(
+ 		     current->signal->it_prof_expires))) {
+-		cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur);
++		unsigned long rlim_cur = new_rlim.rlim_cur;
++		cputime_t cputime;
++
++		if (rlim_cur == 0) {
++			/*
++			 * The caller is asking for an immediate RLIMIT_CPU
++			 * expiry.  But we use the zero value to mean "it was
++			 * never set".  So let's cheat and make it one second
++			 * instead
++			 */
++			rlim_cur = 1;
++		}
++		cputime = secs_to_cputime(rlim_cur);
+ 		read_lock(&tasklist_lock);
+ 		spin_lock_irq(&current->sighand->siglock);
+ 		set_process_cpu_timer(current, CPUCLOCK_PROF,
+diff -upr linux-2.6.16.orig/kernel/sysctl.c linux-2.6.16-026test015/kernel/sysctl.c
+--- linux-2.6.16.orig/kernel/sysctl.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/sysctl.c	2006-07-04 14:41:39.000000000 +0400
+@@ -25,6 +25,8 @@
+ #include <linux/slab.h>
+ #include <linux/sysctl.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve_owner.h>
++#include <linux/ve.h>
+ #include <linux/capability.h>
+ #include <linux/ctype.h>
+ #include <linux/utsname.h>
+@@ -63,6 +65,7 @@ extern int max_threads;
+ extern int sysrq_enabled;
+ extern int core_uses_pid;
+ extern int suid_dumpable;
++extern int sysctl_at_vsyscall;
+ extern char core_pattern[];
+ extern int cad_pid;
+ extern int pid_max;
+@@ -72,6 +75,12 @@ extern int printk_ratelimit_burst;
+ extern int pid_max_min, pid_max_max;
+ extern int sysctl_drop_caches;
+ extern int percpu_pagelist_fraction;
++#ifdef CONFIG_VE
++int glob_virt_pids = 1;
++EXPORT_SYMBOL(glob_virt_pids);
++#endif
++
++extern int ve_area_access_check; /* fs/namei.c */
+ 
+ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+ int unknown_nmi_panic;
+@@ -101,6 +110,10 @@ extern int msg_ctlmnb;
+ extern int msg_ctlmni;
+ extern int sem_ctls[];
+ #endif
++#ifdef CONFIG_SCHED_VCPU
++extern u32 vcpu_sched_timeslice;
++extern u32 vcpu_timeslice;
++#endif
+ 
+ #ifdef __sparc__
+ extern char reboot_command [];
+@@ -108,6 +121,8 @@ extern int stop_a_enabled;
+ extern int scons_pwroff;
+ #endif
+ 
++extern int alloc_fail_warn;
++
+ #ifdef __hppa__
+ extern int pwrsw_enabled;
+ extern int unaligned_enabled;
+@@ -122,6 +137,7 @@ extern int spin_retry;
+ #endif
+ 
+ extern int sysctl_hz_timer;
++int decode_call_traces = 1;
+ 
+ #ifdef CONFIG_BSD_PROCESS_ACCT
+ extern int acct_parm[];
+@@ -131,10 +147,14 @@ extern int acct_parm[];
+ extern int no_unaligned_warning;
+ #endif
+ 
++#ifdef CONFIG_FAIRSCHED
++extern int fairsched_max_latency;
++int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
++		        void __user *buffer, size_t *lenp, loff_t *ppos);
++#endif
++
+ static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
+ 		       ctl_table *, void **);
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+-		  void __user *buffer, size_t *lenp, loff_t *ppos);
+ 
+ static ctl_table root_table[];
+ static struct ctl_table_header root_table_header =
+@@ -178,6 +198,8 @@ static void register_proc_table(ctl_tabl
+ static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
+ #endif
+ 
++extern struct new_utsname virt_utsname;
++
+ /* The default sysctl tables: */
+ 
+ static ctl_table root_table[] = {
+@@ -276,6 +298,15 @@ static ctl_table kern_table[] = {
+ 		.strategy	= &sysctl_string,
+ 	},
+ 	{
++		.ctl_name	= KERN_VIRT_OSRELEASE,
++		.procname	= "virt_osrelease",
++		.data		= virt_utsname.release,
++		.maxlen		= sizeof(virt_utsname.release),
++		.mode		= 0644,
++		.proc_handler	= &proc_doutsstring,
++		.strategy	= &sysctl_string,
++	},
++	{
+ 		.ctl_name	= KERN_PANIC,
+ 		.procname	= "panic",
+ 		.data		= &panic_timeout,
+@@ -353,6 +384,22 @@ static ctl_table kern_table[] = {
+ 		.proc_handler	= &proc_dointvec,
+ 	},
+ #endif
++	{
++		.ctl_name	= KERN_SILENCE_LEVEL,
++		.procname	= "silence-level",
++		.data		= &console_silence_loglevel,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{
++		.ctl_name	= KERN_ALLOC_FAIL_WARN,
++		.procname	= "alloc_fail_warn",
++		.data		= &alloc_fail_warn,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
+ #ifdef __hppa__
+ 	{
+ 		.ctl_name	= KERN_HPPA_PWRSW,
+@@ -579,6 +626,24 @@ static ctl_table kern_table[] = {
+ 		.proc_handler	= &proc_dointvec,
+ 	},
+ #endif
++#ifdef CONFIG_SCHED_VCPU
++	{
++		.ctl_name	= KERN_VCPU_SCHED_TIMESLICE,
++		.procname	= "vcpu_sched_timeslice",
++		.data		= &vcpu_sched_timeslice,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_VCPU_TIMESLICE,
++		.procname	= "vcpu_timeslice",
++		.data		= &vcpu_timeslice,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++#endif
+ 	{
+ 		.ctl_name	= KERN_PIDMAX,
+ 		.procname	= "pid_max",
+@@ -590,6 +655,16 @@ static ctl_table kern_table[] = {
+ 		.extra1		= &pid_max_min,
+ 		.extra2		= &pid_max_max,
+ 	},
++#ifdef CONFIG_VE
++	{
++		.ctl_name	= KERN_VIRT_PIDS,
++		.procname	= "virt_pids",
++		.data		= &glob_virt_pids,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++#endif
+ 	{
+ 		.ctl_name	= KERN_PANIC_ON_OOPS,
+ 		.procname	= "panic_on_oops",
+@@ -683,6 +758,16 @@ static ctl_table kern_table[] = {
+ 		.proc_handler	= &proc_dointvec,
+ 	},
+ #endif
++#ifdef CONFIG_FAIRSCHED
++	{
++		.ctl_name	= KERN_FAIRSCHED_MAX_LATENCY,
++		.procname	= "fairsched-max-latency",
++		.data		=  &fairsched_max_latency,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &fsch_sysctl_latency
++	},
++#endif
+ 	{ .ctl_name = 0 }
+ };
+ 
+@@ -1046,10 +1131,26 @@ static ctl_table fs_table[] = {
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec,
+ 	},
++	{
++		.ctl_name	= FS_AT_VSYSCALL,
++		.procname	= "vsyscall",
++		.data		= &sysctl_at_vsyscall,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
+ 	{ .ctl_name = 0 }
+ };
+ 
+ static ctl_table debug_table[] = {
++	{
++		.ctl_name	= DBG_DECODE_CALLTRACES,
++		.procname	= "decode_call_traces",
++		.data		= &decode_call_traces,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
+ 	{ .ctl_name = 0 }
+ };
+ 
+@@ -1113,6 +1214,7 @@ int do_sysctl(int __user *name, int nlen
+ {
+ 	struct list_head *tmp;
+ 	int error = -ENOTDIR;
++	struct ve_struct *ve;
+ 
+ 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
+ 		return -ENOTDIR;
+@@ -1121,13 +1223,24 @@ int do_sysctl(int __user *name, int nlen
+ 		if (!oldlenp || get_user(old_len, oldlenp))
+ 			return -EFAULT;
+ 	}
++	ve = get_exec_env();
+ 	spin_lock(&sysctl_lock);
++#ifdef CONFIG_VE
++	tmp = ve->sysctl_lh.next;
++#else
+ 	tmp = &root_table_header.ctl_entry;
++#endif
+ 	do {
+-		struct ctl_table_header *head =
+-			list_entry(tmp, struct ctl_table_header, ctl_entry);
++		struct ctl_table_header *head;
+ 		void *context = NULL;
+ 
++#ifdef CONFIG_VE
++		if (tmp == &ve->sysctl_lh)
++			/* second pass over global variables */
++			tmp = &root_table_header.ctl_entry;
++#endif
++
++		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
+ 		if (!use_table(head))
+ 			continue;
+ 
+@@ -1181,10 +1294,14 @@ static int test_perm(int mode, int op)
+ static inline int ctl_perm(ctl_table *table, int op)
+ {
+ 	int error;
++	int mode = table->mode;
++
+ 	error = security_sysctl(table, op);
+ 	if (error)
+ 		return error;
+-	return test_perm(table->mode, op);
++	if (!ve_accessible(table->owner_env, get_exec_env()))
++		mode &= ~0222; /* disable write access */
++	return test_perm(mode, op);
+ }
+ 
+ static int parse_table(int __user *name, int nlen,
+@@ -1350,6 +1467,8 @@ struct ctl_table_header *register_sysctl
+ 					       int insert_at_head)
+ {
+ 	struct ctl_table_header *tmp;
++	struct list_head *lh;
++
+ 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
+ 	if (!tmp)
+ 		return NULL;
+@@ -1358,17 +1477,52 @@ struct ctl_table_header *register_sysctl
+ 	tmp->used = 0;
+ 	tmp->unregistering = NULL;
+ 	spin_lock(&sysctl_lock);
++#ifdef CONFIG_VE
++	lh = &get_exec_env()->sysctl_lh;
++#else
++	lh = &root_table_header.ctl_entry;
++#endif
+ 	if (insert_at_head)
+-		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
++		list_add(&tmp->ctl_entry, lh);
+ 	else
+-		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
++		list_add_tail(&tmp->ctl_entry, lh);
+ 	spin_unlock(&sysctl_lock);
+ #ifdef CONFIG_PROC_FS
++#ifdef CONFIG_VE
++	register_proc_table(table, get_exec_env()->proc_sys_root, tmp);
++#else
+ 	register_proc_table(table, proc_sys_root, tmp);
+ #endif
++#endif
+ 	return tmp;
+ }
+ 
++void free_sysctl_clone(ctl_table *clone)
++{
++	kfree(clone);
++}
++
++ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr)
++{
++	int i;
++	ctl_table *clone;
++
++	clone = kmalloc(nr * sizeof(ctl_table), GFP_KERNEL);
++	if (clone == NULL)
++		return NULL;
++
++	memcpy(clone, tmpl, nr * sizeof(ctl_table));
++	for (i = 0; i < nr; i++) {
++		if (tmpl[i].ctl_name == 0)
++			continue;
++		clone[i].owner_env = get_exec_env();
++		if (tmpl[i].child == NULL)
++			continue;
++		clone[i].child = clone + (tmpl[i].child - tmpl);
++	}
++	return clone;
++}
++
+ /**
+  * unregister_sysctl_table - unregister a sysctl table hierarchy
+  * @header: the header returned from register_sysctl_table
+@@ -1382,8 +1536,12 @@ void unregister_sysctl_table(struct ctl_
+ 	spin_lock(&sysctl_lock);
+ 	start_unregistering(header);
+ #ifdef CONFIG_PROC_FS
++#ifdef CONFIG_VE
++	unregister_proc_table(header->ctl_table, get_exec_env()->proc_sys_root);
++#else
+ 	unregister_proc_table(header->ctl_table, proc_sys_root);
+ #endif
++#endif
+ 	spin_unlock(&sysctl_lock);
+ 	kfree(header);
+ }
+@@ -1469,11 +1627,6 @@ static void unregister_proc_table(ctl_ta
+ 		 * its fields.  We are under sysctl_lock here.
+ 		 */
+ 		de->data = NULL;
+-
+-		/* Don't unregister proc entries that are still being used.. */
+-		if (atomic_read(&de->count))
+-			continue;
+-
+ 		table->de = NULL;
+ 		remove_proc_entry(table->procname, root);
+ 	}
+@@ -1615,7 +1768,7 @@ int proc_dostring(ctl_table *table, int 
+  *	to observe. Should this be in kernel/sys.c ????
+  */
+  
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
++int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+ 		  void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ 	int r;
+@@ -2190,7 +2343,7 @@ int proc_dostring(ctl_table *table, int 
+ 	return -ENOSYS;
+ }
+ 
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
++int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+ 			    void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ 	return -ENOSYS;
+@@ -2494,6 +2647,14 @@ void unregister_sysctl_table(struct ctl_
+ {
+ }
+ 
++ctl_table * clone_sysctl_template(ctl_table *tmpl, int nr)
++{
++	return NULL;
++}
++
++void free_sysctl_clone(ctl_table *tmpl)
++{
++}
+ #endif /* CONFIG_SYSCTL */
+ 
+ /*
+@@ -2506,6 +2667,7 @@ EXPORT_SYMBOL(proc_dointvec_minmax);
+ EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
+ EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
+ EXPORT_SYMBOL(proc_dostring);
++EXPORT_SYMBOL(proc_doutsstring);
+ EXPORT_SYMBOL(proc_doulongvec_minmax);
+ EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
+ EXPORT_SYMBOL(register_sysctl_table);
+@@ -2514,3 +2676,5 @@ EXPORT_SYMBOL(sysctl_jiffies);
+ EXPORT_SYMBOL(sysctl_ms_jiffies);
+ EXPORT_SYMBOL(sysctl_string);
+ EXPORT_SYMBOL(unregister_sysctl_table);
++EXPORT_SYMBOL(clone_sysctl_template);
++EXPORT_SYMBOL(free_sysctl_clone);
+diff -upr linux-2.6.16.orig/kernel/timer.c linux-2.6.16-026test015/kernel/timer.c
+--- linux-2.6.16.orig/kernel/timer.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/timer.c	2006-07-04 14:41:38.000000000 +0400
+@@ -460,7 +460,11 @@ static inline void __run_timers(tvec_bas
+ 			spin_unlock_irq(&base->t_base.lock);
+ 			{
+ 				int preempt_count = preempt_count();
++				struct ve_struct *ve;
++
++				ve = set_exec_env(get_ve0());
+ 				fn(data);
++				(void)set_exec_env(ve);
+ 				if (preempt_count != preempt_count()) {
+ 					printk(KERN_WARNING "huh, entered %p "
+ 					       "with preempt_count %08x, exited"
+@@ -868,6 +872,23 @@ EXPORT_SYMBOL(avenrun);
+  * calc_load - given tick count, update the avenrun load estimates.
+  * This is called while holding a write_lock on xtime_lock.
+  */
++
++static void calc_load_ve(void)
++{
++	unsigned long flags, nr_unint;
++
++	nr_unint = nr_uninterruptible() * FIXED_1;
++	spin_lock_irqsave(&kstat_glb_lock, flags);
++	CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
++	CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
++	CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
++	spin_unlock_irqrestore(&kstat_glb_lock, flags);
++
++#ifdef CONFIG_VE
++	do_update_load_avg_ve();
++#endif
++}
++
+ static inline void calc_load(unsigned long ticks)
+ {
+ 	unsigned long active_tasks; /* fixed-point */
+@@ -880,6 +901,7 @@ static inline void calc_load(unsigned lo
+ 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+ 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+ 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
++		calc_load_ve();
+ 	}
+ }
+ 
+@@ -990,7 +1012,7 @@ asmlinkage unsigned long sys_alarm(unsig
+  */
+ asmlinkage long sys_getpid(void)
+ {
+-	return current->tgid;
++	return virt_tgid(current);
+ }
+ 
+ /*
+@@ -1012,12 +1034,13 @@ asmlinkage long sys_getpid(void)
+ asmlinkage long sys_getppid(void)
+ {
+ 	int pid;
++#ifndef CONFIG_DEBUG_SLAB
+ 	struct task_struct *me = current;
+ 	struct task_struct *parent;
+ 
+ 	parent = me->group_leader->real_parent;
+ 	for (;;) {
+-		pid = parent->tgid;
++		pid = virt_tgid(parent);
+ #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+ {
+ 		struct task_struct *old = parent;
+@@ -1034,6 +1057,16 @@ asmlinkage long sys_getppid(void)
+ #endif
+ 		break;
+ 	}
++#else
++	/*
++	 * ->real_parent could be released before dereference and
++	 * we accessed freed kernel memory, which faults with debugging on.
++	 * Keep it simple and stupid.
++	 */
++	read_lock(&tasklist_lock);
++	pid = virt_tgid(current->group_leader->real_parent);
++	read_unlock(&tasklist_lock);
++#endif
+ 	return pid;
+ }
+ 
+@@ -1164,7 +1197,7 @@ EXPORT_SYMBOL(schedule_timeout_uninterru
+ /* Thread ID - the internal kernel "pid" */
+ asmlinkage long sys_gettid(void)
+ {
+-	return current->pid;
++	return virt_pid(current);
+ }
+ 
+ /*
+@@ -1176,11 +1209,12 @@ asmlinkage long sys_sysinfo(struct sysin
+ 	unsigned long mem_total, sav_total;
+ 	unsigned int mem_unit, bitcount;
+ 	unsigned long seq;
++	unsigned long *__avenrun;
++	struct timespec tp;
+ 
+ 	memset((char *)&val, 0, sizeof(struct sysinfo));
+ 
+ 	do {
+-		struct timespec tp;
+ 		seq = read_seqbegin(&xtime_lock);
+ 
+ 		/*
+@@ -1197,14 +1231,25 @@ asmlinkage long sys_sysinfo(struct sysin
+ 			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
+ 			tp.tv_sec++;
+ 		}
+-		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+-
+-		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+-		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+-		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
++	} while (read_seqretry(&xtime_lock, seq));
+ 
++	if (ve_is_super(get_exec_env())) {
++		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
++		__avenrun = &avenrun[0];
+ 		val.procs = nr_threads;
+-	} while (read_seqretry(&xtime_lock, seq));
++	}
++#ifdef CONFIG_VE
++	else {
++		struct ve_struct *ve;
++		ve = get_exec_env();
++		__avenrun = &ve->avenrun[0];
++		val.procs = atomic_read(&ve->pcounter);
++		val.uptime = tp.tv_sec - ve->start_timespec.tv_sec;
++	}
++#endif
++	val.loads[0] = __avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
++	val.loads[1] = __avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
++	val.loads[2] = __avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+ 
+ 	si_meminfo(&val);
+ 	si_swapinfo(&val);
+diff -upr linux-2.6.16.orig/kernel/ub/Kconfig linux-2.6.16-026test015/kernel/ub/Kconfig
+--- linux-2.6.16.orig/kernel/ub/Kconfig	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/Kconfig	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,89 @@
++#
++# User resources part (UBC)
++#
++# Copyright (C) 2005  SWsoft
++# All rights reserved.
++#
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++menu "User resources"
++
++config USER_RESOURCE
++	bool "Enable user resource accounting"
++	default y
++	help 
++          This patch provides accounting and allows to configure
++          limits for user's consumption of exhaustible system resources.
++          The most important resource controlled by this patch is unswappable 
++          memory (either mlock'ed or used by internal kernel structures and 
++          buffers). The main goal of this patch is to protect processes
++          from running short of important resources because of an accidental
++          misbehavior of processes or malicious activity aiming to ``kill'' 
++          the system. It's worth to mention that resource limits configured 
++          by setrlimit(2) do not give an acceptable level of protection 
++          because they cover only small fraction of resources and work on a 
++          per-process basis.  Per-process accounting doesn't prevent malicious
++          users from spawning a lot of resource-consuming processes.
++
++config USER_RSS_ACCOUNTING
++	bool "Account physical memory usage"
++	default y
++	depends on USER_RESOURCE
++	help
++          This allows to estimate per beancounter physical memory usage.
++          Implemented alghorithm accounts shared pages of memory as well,
++          dividing them by number of beancounter which use the page.
++
++config USER_SWAP_ACCOUNTING
++	bool "Account swap usage"
++	default y
++	depends on USER_RESOURCE
++	help
++          This allows accounting of swap usage.
++
++config USER_RESOURCE_PROC
++	bool "Report resource usage in /proc"
++	default y
++	depends on USER_RESOURCE
++	help
++          Allows a system administrator to inspect resource accounts and limits.
++
++config UBC_DEBUG
++	bool "User resources debug features"
++	default n
++	depends on USER_RESOURCE
++	help
++	  Enables to setup debug features for user resource accounting
++
++config UBC_DEBUG_KMEM
++	bool "Debug kmemsize with cache counters"
++	default n
++	depends on UBC_DEBUG
++	help
++	  Adds /proc/user_beancounters_debug entry to get statistics
++	  about cache usage of each beancounter
++
++config UBC_KEEP_UNUSED
++	bool "Keep unused beancounter alive"
++	default y
++	depends on UBC_DEBUG
++	help
++	  If on, unused beancounters are kept on the hash and maxheld value
++	  can be looked through.
++
++config UBC_DEBUG_ITEMS
++	bool "Account resources in items rather than in bytes"
++	default y
++	depends on UBC_DEBUG
++	help
++	  When true some of the resources (e.g. kmemsize) are accounted
++	  in items instead of bytes.
++
++config UBC_UNLIMITED
++	bool "Use unlimited ubc settings"
++	default y
++	depends on UBC_DEBUG
++	help
++	  When ON all limits and barriers are set to max values.
++
++endmenu
+diff -upr linux-2.6.16.orig/kernel/ub/Makefile linux-2.6.16-026test015/kernel/ub/Makefile
+--- linux-2.6.16.orig/kernel/ub/Makefile	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/Makefile	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,20 @@
++#
++# User resources part (UBC)
++#
++# Copyright (C) 2005  SWsoft
++# All rights reserved.
++#
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++obj-y := ub_sys.o
++obj-$(CONFIG_USER_RESOURCE) += beancounter.o
++obj-$(CONFIG_USER_RESOURCE) += ub_dcache.o
++obj-$(CONFIG_USER_RESOURCE) += ub_mem.o
++obj-$(CONFIG_USER_RESOURCE) += ub_misc.o
++obj-$(CONFIG_USER_RESOURCE) += ub_net.o
++obj-$(CONFIG_USER_RESOURCE) += ub_pages.o
++obj-$(CONFIG_USER_RESOURCE) += ub_stat.o
++# obj-$(CONFIG_USER_RESOURCE) += ub_oom.o
++
++obj-$(CONFIG_USER_RSS_ACCOUNTING) += ub_page_bc.o
++obj-$(CONFIG_USER_RESOURCE_PROC)  += ub_proc.o
+diff -upr linux-2.6.16.orig/kernel/ub/beancounter.c linux-2.6.16-026test015/kernel/ub/beancounter.c
+--- linux-2.6.16.orig/kernel/ub/beancounter.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/beancounter.c	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,675 @@
++/*
++ *  linux/kernel/ub/beancounter.c
++ *
++ *  Copyright (C) 1998  Alan Cox
++ *                1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
++ *  Copyright (C) 2000-2005 SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ *   - more intelligent limit check in mremap(): currently the new size is
++ *     charged and _then_ old size is uncharged
++ *     (almost done: !move_vma case is completely done,
++ *      move_vma in its current implementation requires too many conditions to
++ *      do things right, because it may be not only expansion, but shrinking
++ *      also, plus do_munmap will require an additional parameter...)
++ *   - problem: bad pmd page handling
++ *   - consider /proc redesign
++ *   - TCP/UDP ports
++ *   + consider whether __charge_beancounter_locked should be inline
++ *
++ * Changes:
++ *   1999/08/17  Marcelo Tosatti <marcelo@conectiva.com.br>
++ *	- Set "barrier" and "limit" parts of limits atomically.
++ *   1999/10/06  Marcelo Tosatti <marcelo@conectiva.com.br>
++ *	- setublimit system call.
++ */
++
++#include <linux/slab.h>
++#include <linux/module.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_vmpages.h>
++
++static kmem_cache_t *ub_cachep;
++static struct user_beancounter default_beancounter;
++struct user_beancounter ub0;
++
++const char *ub_rnames[] = {
++	"kmemsize",	/* 0 */
++	"lockedpages",
++	"privvmpages",
++	"shmpages",
++	"dummy",
++	"numproc",	/* 5 */
++	"physpages",
++	"vmguarpages",
++	"oomguarpages",
++	"numtcpsock",
++	"numflock",	/* 10 */
++	"numpty",
++	"numsiginfo",
++	"tcpsndbuf",
++	"tcprcvbuf",
++	"othersockbuf",	/* 15 */
++	"dgramrcvbuf",
++	"numothersock",
++	"dcachesize",
++	"numfile",
++	"dummy",	/* 20 */
++	"dummy",
++	"dummy",
++	"numiptent",
++	"unused_privvmpages",	/* UB_RESOURCES */
++	"tmpfs_respages",
++	"swap_pages",
++	"held_pages",
++};
++
++static void init_beancounter_struct(struct user_beancounter *ub);
++static void init_beancounter_store(struct user_beancounter *ub);
++static void init_beancounter_nolimits(struct user_beancounter *ub);
++
++void print_ub_uid(struct user_beancounter *ub, char *buf, int size)
++{
++	if (ub->parent != NULL)
++		snprintf(buf, size, "%u.%u", ub->parent->ub_uid, ub->ub_uid);
++	else
++		snprintf(buf, size, "%u", ub->ub_uid);
++}
++EXPORT_SYMBOL(print_ub_uid);
++
++#define ub_hash_fun(x) ((((x) >> 8) ^ (x)) & (UB_HASH_SIZE - 1))
++#define ub_subhash_fun(p, id) ub_hash_fun((p)->ub_uid + (id) * 17)
++struct ub_hash_slot ub_hash[UB_HASH_SIZE];
++spinlock_t ub_hash_lock;
++EXPORT_SYMBOL(ub_hash);
++EXPORT_SYMBOL(ub_hash_lock);
++
++/*
++ *	Per user resource beancounting. Resources are tied to their luid.
++ *	The resource structure itself is tagged both to the process and
++ *	the charging resources (a socket doesn't want to have to search for
++ *	things at irq time for example). Reference counters keep things in
++ *	hand.
++ *
++ *	The case where a user creates resource, kills all his processes and
++ *	then starts new ones is correctly handled this way. The refcounters
++ *	will mean the old entry is still around with resource tied to it.
++ */
++struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
++{
++	struct user_beancounter *new_ub, *ub;
++	unsigned long flags;
++	struct ub_hash_slot *slot;
++
++	slot = &ub_hash[ub_hash_fun(uid)];
++	new_ub = NULL;
++
++retry:
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	ub = slot->ubh_beans;
++	while (ub != NULL && (ub->ub_uid != uid || ub->parent != NULL))
++		ub = ub->ub_next;
++
++	if (ub != NULL) {
++		/* found */
++		get_beancounter(ub);
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		if (new_ub != NULL)
++			kmem_cache_free(ub_cachep, new_ub);
++		return ub;
++	}
++
++	if (!create) {
++		/* no ub found */
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return NULL;
++	}
++
++	if (new_ub != NULL) {
++		/* install new ub */
++		new_ub->ub_next = slot->ubh_beans;
++		slot->ubh_beans = new_ub;
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return new_ub;
++	}
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++	/* alloc new ub */
++	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, 
++			GFP_KERNEL);
++	if (new_ub == NULL)
++		return NULL;
++
++	ub_debug(UBD_ALLOC, "Creating ub %p in slot %p\n", new_ub, slot);
++	memcpy(new_ub, &default_beancounter, sizeof(*new_ub));
++	init_beancounter_struct(new_ub);
++	new_ub->ub_uid = uid;
++	goto retry;
++}
++EXPORT_SYMBOL(get_beancounter_byuid);
++
++struct user_beancounter *get_subbeancounter_byid(struct user_beancounter *p,
++		int id, int create)
++{
++	struct user_beancounter *new_ub, *ub;
++	unsigned long flags;
++	struct ub_hash_slot *slot;
++
++	slot = &ub_hash[ub_subhash_fun(p, id)];
++	new_ub = NULL;
++
++retry:
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	ub = slot->ubh_beans;
++	while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
++		ub = ub->ub_next;
++
++	if (ub != NULL) {
++		/* found */
++		get_beancounter(ub);
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		if (new_ub != NULL) {
++			put_beancounter(new_ub->parent);
++			kmem_cache_free(ub_cachep, new_ub);
++		}
++		return ub;
++	}
++
++	if (!create) {
++		/* no ub found */
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return NULL;
++	}
++
++	if (new_ub != NULL) {
++		/* install new ub */
++		get_beancounter(new_ub);
++		new_ub->ub_next = slot->ubh_beans;
++		slot->ubh_beans = new_ub;
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return new_ub;
++	}
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++	/* alloc new ub */
++	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, 
++			GFP_KERNEL);
++	if (new_ub == NULL)
++		return NULL;
++
++	ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", new_ub, slot);
++	memset(new_ub, 0, sizeof(*new_ub));
++	init_beancounter_nolimits(new_ub);
++	init_beancounter_store(new_ub);
++	init_beancounter_struct(new_ub);
++	atomic_set(&new_ub->ub_refcount, 0);
++	new_ub->ub_uid = id;
++	new_ub->parent = get_beancounter(p);
++	goto retry;
++}
++EXPORT_SYMBOL(get_subbeancounter_byid);
++
++struct user_beancounter *subbeancounter_findcreate(struct user_beancounter *p,
++		int id)
++{
++	struct user_beancounter *ub;
++	unsigned long flags;
++	struct ub_hash_slot *slot;
++
++	slot = &ub_hash[ub_subhash_fun(p, id)];
++
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	ub = slot->ubh_beans;
++	while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
++		ub = ub->ub_next;
++
++	if (ub != NULL) {
++		/* found */
++		get_beancounter(ub);
++		goto done;
++	}
++
++	/* alloc new ub */
++	/* Can be called from non-atomic contexts. Den */
++	ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, GFP_ATOMIC);
++	if (ub == NULL)
++		goto done;
++
++	ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", ub, slot);
++	memset(ub, 0, sizeof(*ub));
++	init_beancounter_nolimits(ub);
++	init_beancounter_store(ub);
++	init_beancounter_struct(ub);
++	atomic_set(&ub->ub_refcount, 0);
++	ub->ub_uid = id;
++	ub->parent = get_beancounter(p);
++
++	/* install new ub */
++	get_beancounter(ub);
++	ub->ub_next = slot->ubh_beans;
++	slot->ubh_beans = ub;
++
++done:
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++	return ub;
++}
++EXPORT_SYMBOL(subbeancounter_findcreate);
++#ifndef CONFIG_UBC_KEEP_UNUSED
++
++static int verify_res(struct user_beancounter *ub, int resource,
++		unsigned long held)
++{
++	char id[64];
++
++	if (likely(held == 0))
++		return 1;
++
++	print_ub_uid(ub, id, sizeof(id));
++	printk(KERN_WARNING "Ub %s helds %lu in %s on put\n",
++			id, held, ub_rnames[resource]);
++	return 0;
++}
++
++static inline void verify_held(struct user_beancounter *ub)
++{
++	int i, clean;
++
++	clean = 1;
++	for (i = 0; i < UB_RESOURCES; i++)
++		clean &= verify_res(ub, i, ub->ub_parms[i].held);
++
++	clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
++	clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
++	clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
++	clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
++
++	ub_debug_trace(!clean, 5, 60*HZ);
++}
++
++static void __unhash_beancounter(struct user_beancounter *ub)
++{
++	struct user_beancounter **ubptr;
++	struct ub_hash_slot *slot;
++
++	if (ub->parent != NULL)
++		slot = &ub_hash[ub_subhash_fun(ub->parent, ub->ub_uid)];
++	else
++	       	slot = &ub_hash[ub_hash_fun(ub->ub_uid)];
++	ubptr = &slot->ubh_beans;
++
++	while (*ubptr != NULL) {
++		if (*ubptr == ub) {
++			verify_held(ub);
++			*ubptr = ub->ub_next;
++			return;
++		}
++		ubptr = &((*ubptr)->ub_next);
++	}
++	printk(KERN_ERR "Invalid beancounter %p, luid=%d on free, slot %p\n",
++			ub, ub->ub_uid, slot);
++}
++#endif
++
++void __put_beancounter(struct user_beancounter *ub)
++{
++	unsigned long flags;
++	struct user_beancounter *parent;
++
++again:
++	parent = ub->parent;
++	ub_debug(UBD_ALLOC, "__put bc %p (cnt %d) for %.20s pid %d "
++			"cur %08lx cpu %d.\n",
++			ub, atomic_read(&ub->ub_refcount), 
++			current->comm, current->pid, 
++			(unsigned long)current, smp_processor_id());
++
++	/* equevalent to atomic_dec_and_lock_irqsave() */
++	local_irq_save(flags);
++	if (likely(!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock))) {
++		if (unlikely(atomic_read(&ub->ub_refcount) < 0))
++			printk(KERN_ERR "UB: Bad ub refcount: ub=%p, "
++					"luid=%d, ref=%d\n",
++					ub, ub->ub_uid,
++					atomic_read(&ub->ub_refcount));
++		local_irq_restore(flags);
++		return;
++	}
++
++	if (unlikely(ub == get_ub0())) {
++		printk(KERN_ERR "Trying to put ub0\n");
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return;
++	}
++
++#ifndef CONFIG_UBC_KEEP_UNUSED
++	__unhash_beancounter(ub);
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++	ub_free_counters(ub);
++	kmem_cache_free(ub_cachep, ub);
++#else
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++#endif
++	ub = parent;
++	if (ub != NULL)
++		goto again;
++}
++EXPORT_SYMBOL(__put_beancounter);
++
++/*
++ *	Generic resource charging stuff
++ */
++
++int __charge_beancounter_locked(struct user_beancounter *ub,
++		int resource, unsigned long val, enum severity strict)
++{
++	ub_debug_resource(resource, "Charging %lu for %d of %p with %lu\n",
++			val, resource, ub, ub->ub_parms[resource].held);
++	/*
++	 * ub_value <= UB_MAXVALUE, value <= UB_MAXVALUE, and only one addition
++	 * at the moment is possible so an overflow is impossible.  
++	 */
++	ub->ub_parms[resource].held += val;
++
++	switch (strict) {
++		case UB_HARD:
++			if (ub->ub_parms[resource].held >
++					ub->ub_parms[resource].barrier)
++				break;
++		case UB_SOFT:
++			if (ub->ub_parms[resource].held >
++					ub->ub_parms[resource].limit)
++				break;
++		case UB_FORCE:
++			ub_adjust_maxheld(ub, resource);
++			return 0;
++		default:
++			BUG();
++	}
++
++	if (strict == UB_SOFT && ub_ratelimit(&ub->ub_limit_rl))
++		printk(KERN_INFO "Fatal resource shortage: %s, UB %d.\n",
++		       ub_rnames[resource], ub->ub_uid);
++	ub->ub_parms[resource].failcnt++;
++	ub->ub_parms[resource].held -= val;
++	return -ENOMEM;
++}
++
++int charge_beancounter(struct user_beancounter *ub,
++		int resource, unsigned long val, enum severity strict)
++{
++	int retval;
++	struct user_beancounter *p, *q;
++	unsigned long flags;
++
++	retval = -EINVAL;
++	if (val > UB_MAXVALUE)
++		goto out;
++
++	local_irq_save(flags);
++	for (p = ub; p != NULL; p = p->parent) {
++		spin_lock(&p->ub_lock);
++		retval = __charge_beancounter_locked(p, resource, val, strict);
++		spin_unlock(&p->ub_lock);
++		if (retval)
++			goto unroll;
++	}
++out_restore:
++	local_irq_restore(flags);
++out:
++	return retval;
++
++unroll:
++	for (q = ub; q != p; q = q->parent) {
++		spin_lock(&q->ub_lock);
++		__uncharge_beancounter_locked(q, resource, val);
++		spin_unlock(&q->ub_lock);
++	}
++	goto out_restore;
++}
++
++EXPORT_SYMBOL(charge_beancounter);
++
++void charge_beancounter_notop(struct user_beancounter *ub,
++		int resource, unsigned long val)
++{
++	struct user_beancounter *p;
++	unsigned long flags;
++
++	local_irq_save(flags);
++	for (p = ub; p->parent != NULL; p = p->parent) {
++		spin_lock(&p->ub_lock);
++		__charge_beancounter_locked(p, resource, val, UB_FORCE);
++		spin_unlock(&p->ub_lock);
++	}
++	local_irq_restore(flags);
++}
++
++EXPORT_SYMBOL(charge_beancounter_notop);
++
++void uncharge_warn(struct user_beancounter *ub, int resource,
++		unsigned long val, unsigned long held)
++{
++	char id[64];
++
++	print_ub_uid(ub, id, sizeof(id));
++	printk(KERN_ERR "Uncharging too much %lu h %lu, res %s ub %s\n",
++			val, held, ub_rnames[resource], id);
++	ub_debug_trace(1, 10, 10*HZ);
++}
++
++void __uncharge_beancounter_locked(struct user_beancounter *ub,
++		int resource, unsigned long val)
++{
++	ub_debug_resource(resource, "Uncharging %lu for %d of %p with %lu\n",
++			val, resource, ub, ub->ub_parms[resource].held);
++	if (ub->ub_parms[resource].held < val) {
++		uncharge_warn(ub, resource,
++				val, ub->ub_parms[resource].held);
++		val = ub->ub_parms[resource].held;
++	}
++	ub->ub_parms[resource].held -= val;
++}
++
++void uncharge_beancounter(struct user_beancounter *ub,
++		int resource, unsigned long val)
++{
++	unsigned long flags;
++	struct user_beancounter *p;
++
++	for (p = ub; p != NULL; p = p->parent) {
++		spin_lock_irqsave(&p->ub_lock, flags);
++		__uncharge_beancounter_locked(p, resource, val);
++		spin_unlock_irqrestore(&p->ub_lock, flags);
++	}
++}
++
++EXPORT_SYMBOL(uncharge_beancounter);
++
++void uncharge_beancounter_notop(struct user_beancounter *ub,
++		int resource, unsigned long val)
++{
++	struct user_beancounter *p;
++	unsigned long flags;
++
++	local_irq_save(flags);
++	for (p = ub; p->parent != NULL; p = p->parent) {
++		spin_lock(&p->ub_lock);
++		__uncharge_beancounter_locked(p, resource, val);
++		spin_unlock(&p->ub_lock);
++	}
++	local_irq_restore(flags);
++}
++
++EXPORT_SYMBOL(uncharge_beancounter_notop);
++
++
++/*
++ *	Rate limiting stuff.
++ */
++int ub_ratelimit(struct ub_rate_info *p)
++{
++	unsigned long cjif, djif;
++	unsigned long flags;
++	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
++	long new_bucket;
++
++	spin_lock_irqsave(&ratelimit_lock, flags);
++	cjif = jiffies;
++	djif = cjif - p->last;
++	if (djif < p->interval) {
++		if (p->bucket >= p->burst) {
++			spin_unlock_irqrestore(&ratelimit_lock, flags);
++			return 0;
++		}
++		p->bucket++;
++	} else {
++		new_bucket = p->bucket - (djif / (unsigned)p->interval);
++		if (new_bucket < 0)
++			new_bucket = 0;
++		p->bucket = new_bucket + 1;
++	}
++	p->last = cjif;
++	spin_unlock_irqrestore(&ratelimit_lock, flags);
++	return 1;
++}
++EXPORT_SYMBOL(ub_ratelimit);
++
++
++/*
++ *	Initialization
++ *
++ *	struct user_beancounter contains
++ *	 - limits and other configuration settings,
++ *	   with a copy stored for accounting purposes,
++ *	 - structural fields: lists, spinlocks and so on.
++ *
++ *	Before these parts are initialized, the structure should be memset
++ *	to 0 or copied from a known clean structure.  That takes care of a lot
++ *	of fields not initialized explicitly.
++ */
++
++static void init_beancounter_struct(struct user_beancounter *ub)
++{
++	ub->ub_magic = UB_MAGIC;
++	atomic_set(&ub->ub_refcount, 1);
++	spin_lock_init(&ub->ub_lock);
++	INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
++	INIT_LIST_HEAD(&ub->ub_other_sk_list);
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	INIT_LIST_HEAD(&ub->ub_cclist);
++#endif
++}
++
++static void init_beancounter_store(struct user_beancounter *ub)
++{
++	int k;
++
++	for (k = 0; k < UB_RESOURCES; k++) {
++		memcpy(&ub->ub_store[k], &ub->ub_parms[k],
++				sizeof(struct ubparm));
++	}
++}
++
++static void init_beancounter_nolimits(struct user_beancounter *ub)
++{
++	int k;
++
++	for (k = 0; k < UB_RESOURCES; k++) {
++		ub->ub_parms[k].limit = UB_MAXVALUE;
++		/* FIXME: whether this is right for physpages and guarantees? */
++		ub->ub_parms[k].barrier = UB_MAXVALUE;
++	}
++
++	/* FIXME: set unlimited rate? */
++	ub->ub_limit_rl.burst = 4;
++	ub->ub_limit_rl.interval = 300*HZ;
++}
++
++static void init_beancounter_syslimits(struct user_beancounter *ub,
++		unsigned long mp)
++{
++	extern int max_threads;
++	int k;
++
++	ub->ub_parms[UB_KMEMSIZE].limit = 
++		mp > (192*1024*1024 >> PAGE_SHIFT) ?
++				32*1024*1024 : (mp << PAGE_SHIFT) / 6;
++	ub->ub_parms[UB_LOCKEDPAGES].limit = 8;
++	ub->ub_parms[UB_PRIVVMPAGES].limit = UB_MAXVALUE;
++	ub->ub_parms[UB_SHMPAGES].limit = 64;
++	ub->ub_parms[UB_NUMPROC].limit = max_threads / 2;
++	ub->ub_parms[UB_NUMTCPSOCK].limit = 1024;
++	ub->ub_parms[UB_TCPSNDBUF].limit = 1024*4*1024; /* 4k per socket */
++	ub->ub_parms[UB_TCPRCVBUF].limit = 1024*6*1024; /* 6k per socket */
++	ub->ub_parms[UB_NUMOTHERSOCK].limit = 256;
++	ub->ub_parms[UB_DGRAMRCVBUF].limit = 256*4*1024; /* 4k per socket */
++	ub->ub_parms[UB_OTHERSOCKBUF].limit = 256*8*1024; /* 8k per socket */
++	ub->ub_parms[UB_NUMFLOCK].limit = 1024;
++	ub->ub_parms[UB_NUMPTY].limit = 16;
++	ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
++	ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
++	ub->ub_parms[UB_NUMFILE].limit = 1024;
++
++	for (k = 0; k < UB_RESOURCES; k++)
++		ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
++
++	ub->ub_limit_rl.burst = 4;
++	ub->ub_limit_rl.interval = 300*HZ;
++}
++
++void __init ub_init_ub0(void)
++{
++	struct user_beancounter *ub;
++
++	init_cache_counters();
++	ub = get_ub0();
++	memset(ub, 0, sizeof(*ub));
++	ub->ub_uid = 0;
++	init_beancounter_nolimits(ub);
++	init_beancounter_store(ub);
++	init_beancounter_struct(ub);
++
++	memset(&current->task_bc, 0, sizeof(struct task_beancounter));
++	(void)set_exec_ub(get_ub0());
++	current->task_bc.fork_sub = get_beancounter(get_ub0());
++	init_mm.mm_ub = get_beancounter(ub);
++}
++
++void __init ub_hash_init(void)
++{
++	struct ub_hash_slot *slot;
++
++	spin_lock_init(&ub_hash_lock);
++	/* insert ub0 into the hash */
++	slot = &ub_hash[ub_hash_fun(get_ub0()->ub_uid)];
++	slot->ubh_beans = get_ub0();
++}
++
++void __init ub_init_cache(unsigned long mempages)
++{
++	extern int skbc_cache_init(void);
++	int res;
++
++	res = 0; /* skbc_cache_init(); */
++	ub_cachep = kmem_cache_create("user_beancounters",
++			sizeof(struct user_beancounter),
++			0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++	if (res < 0 || ub_cachep == NULL)
++		panic("Can't create ubc caches\n");
++
++	memset(&default_beancounter, 0, sizeof(default_beancounter));
++#ifdef CONFIG_UBC_UNLIMITED
++	init_beancounter_nolimits(&default_beancounter);
++#else
++	init_beancounter_syslimits(&default_beancounter, mempages);
++#endif
++	init_beancounter_store(&default_beancounter);
++	init_beancounter_struct(&default_beancounter);
++
++	ub_hash_init();
++}
+diff -upr linux-2.6.16.orig/kernel/ub/ub_dcache.c linux-2.6.16-026test015/kernel/ub/ub_dcache.c
+--- linux-2.6.16.orig/kernel/ub/ub_dcache.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/ub_dcache.c	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,325 @@
++/*
++ *  kernel/ub/ub_dcache.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/dcache.h>
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/fs.h>
++#include <linux/err.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_dcache.h>
++
++/*
++ * Locking
++ *                          traverse  dcache_lock  d_lock
++ *        ub_dentry_charge   +         +            +
++ *      ub_dentry_uncharge   +         -            +
++ * ub_dentry_charge_nofail   +         +            -
++ *
++ * d_inuse is atomic so that we can inc dentry's parent d_inuse in 
++ * ub_dentry_charhe with the only dentry's d_lock held.
++ *
++ * Race in uncharge vs charge_nofail is handled with dcache_lock.
++ * Race in charge vs charge_nofail is inessential since they both inc d_inuse.
++ * Race in uncharge vs charge is handled by altering d_inuse under d_lock.
++ *
++ * Race with d_move is handled this way:
++ *  - charge_nofail and uncharge are protected by dcache_lock;
++ *  - charge works only with dentry and dentry->d_parent->d_inuse, so
++ *    it's enough to lock only the dentry.
++ */
++
++/*
++ * Beancounting
++ * UB argument must NOT be NULL
++ */
++
++static int do_charge_dcache(struct user_beancounter *ub, unsigned long size, 
++		enum severity sv)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size), sv))
++		goto out_mem;
++	if (__charge_beancounter_locked(ub, UB_DCACHESIZE, size, sv))
++		goto out_dcache;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return 0;
++
++out_dcache:
++	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
++out_mem:
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return -ENOMEM;
++}
++
++static void do_uncharge_dcache(struct user_beancounter *ub, 
++		unsigned long size)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
++	__uncharge_beancounter_locked(ub, UB_DCACHESIZE, size);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static int charge_dcache(struct user_beancounter *ub, unsigned long size, 
++		enum severity sv)
++{
++	struct user_beancounter *p, *q;
++
++	for (p = ub; p != NULL; p = p->parent) {
++		if (do_charge_dcache(p, size, sv))
++			goto unroll;
++	}
++	return 0;
++
++unroll:
++	for (q = ub; q != p; q = q->parent)
++		do_uncharge_dcache(q, size);
++	return -ENOMEM;
++}
++
++void uncharge_dcache(struct user_beancounter *ub, unsigned long size)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_uncharge_dcache(ub, size);
++}
++
++static inline void charge_dcache_forced(struct user_beancounter *ub, 
++		unsigned long size)
++{
++	charge_dcache(ub, size, UB_FORCE);
++}
++
++static inline void d_forced_charge(struct dentry_beancounter *d_bc)
++{
++	d_bc->d_ub = get_beancounter(get_exec_ub());
++	if (d_bc->d_ub == NULL)
++		return;
++
++	charge_dcache_forced(d_bc->d_ub, d_bc->d_ubsize);
++}
++
++static inline void d_uncharge(struct dentry_beancounter *d_bc)
++{
++	if (d_bc->d_ub == NULL)
++		return;
++
++	uncharge_dcache(d_bc->d_ub, d_bc->d_ubsize);
++	put_beancounter(d_bc->d_ub);
++	d_bc->d_ub = NULL;
++}
++
++/*
++ * Alloc / free dentry_beancounter
++ */
++
++static inline int d_alloc_beancounter(struct dentry *d)
++{
++	return 0;
++}
++
++static inline void d_free_beancounter(struct dentry_beancounter *d_bc)
++{
++}
++
++static inline unsigned long d_charge_size(struct dentry *dentry)
++{
++	/* dentry's d_name is already set to appropriate value (see d_alloc) */
++	return inode_cachep->objuse + dentry_cache->objuse +
++		(dname_external(dentry) ?
++		 kmem_obj_memusage((void *)dentry->d_name.name) : 0);
++}
++
++/*
++ * dentry mark in use operation
++ * d_lock is held
++ */
++
++static int d_inc_inuse(struct dentry *dentry)
++{
++	struct user_beancounter *ub;
++	struct dentry_beancounter *d_bc;
++
++	if (dentry != dentry->d_parent) {
++		struct dentry *parent;
++
++		/*
++		 * Increment d_inuse of parent.
++		 * It can't change since dentry->d_lock is held.
++		 */
++		parent = dentry->d_parent;
++		if (ub_dget_testone(parent))
++			BUG();
++	}
++
++	d_bc = &dentry->dentry_bc;
++	ub = get_beancounter(get_exec_ub());
++
++	if (ub != NULL && charge_dcache(ub, d_bc->d_ubsize, UB_SOFT))
++		goto out_err;
++
++	d_bc->d_ub = ub;
++	return 0;
++
++out_err:
++	put_beancounter(ub);
++	d_bc->d_ub = NULL;
++	return -ENOMEM;
++}
++
++/* 
++ * no locks
++ */
++int ub_dentry_alloc(struct dentry *dentry)
++{
++	int err;
++	struct dentry_beancounter *d_bc;
++
++	err = d_alloc_beancounter(dentry);
++	if (err < 0)
++		return err;
++
++	d_bc = &dentry->dentry_bc;
++	d_bc->d_ub = get_beancounter(get_exec_ub());
++	atomic_set(&d_bc->d_inuse, INUSE_INIT); /* see comment in ub_dcache.h */
++	d_bc->d_ubsize = d_charge_size(dentry);
++
++	err = 0;
++	if (d_bc->d_ub != NULL &&
++			charge_dcache(d_bc->d_ub, d_bc->d_ubsize, UB_HARD)) {
++		put_beancounter(d_bc->d_ub);
++		d_free_beancounter(d_bc);
++		err = -ENOMEM;
++	}
++
++	return err;
++}
++
++/*
++ * Charge / uncharge functions.
++ *
++ * We take d_lock to protect dentry_bc from concurrent acces
++ * when simultaneous __d_lookup and d_put happens on one dentry.
++ */
++
++/*
++ * no dcache_lock, d_lock and rcu_read_lock are held
++ * drops d_lock, rcu_read_lock and returns error if any
++ */
++int ub_dentry_charge(struct dentry *dentry)
++{
++	int err;
++
++	err = 0;
++	if (ub_dget_testone(dentry))
++		err = d_inc_inuse(dentry);
++
++	/*
++	 * d_lock and rcu_read_lock are dropped here
++	 * (see also __d_lookup)
++	 */
++	spin_unlock(&dentry->d_lock);
++	rcu_read_unlock();
++
++	if (!err)
++		return 0;
++
++	/*
++	 * d_invlaidate is required for real_lookup
++	 * since it tries to create new dentry on
++	 * d_lookup failure.
++	 */
++	if (!d_invalidate(dentry))
++		return err;
++
++	/* didn't succeeded, force dentry to be charged */
++	d_forced_charge(&dentry->dentry_bc);
++	return 0;
++}
++
++/*
++ * dcache_lock is held
++ * no d_locks, sequentaly takes and drops from dentry upward
++ */
++void ub_dentry_uncharge(struct dentry *dentry)
++{
++	struct dentry *parent;
++
++	/* go up until status is changed and root is not reached */
++	while (1) {
++		/*
++		 * We need d_lock here to handle 
++		 * the race with ub_dentry_charge
++		 */
++		spin_lock(&dentry->d_lock);
++		if (!ub_dput_testzero(dentry)) {
++			spin_unlock(&dentry->d_lock);
++			break;
++		}
++
++		/* state transition 0 => -1 */
++		d_uncharge(&dentry->dentry_bc);
++		parent = dentry->d_parent;
++		spin_unlock(&dentry->d_lock);
++
++		/*
++		 * dcache_lock is held (see comment in __dget_locked)
++		 * so we can safely move upwards.
++		 */
++		if (dentry == parent)
++			break;
++		dentry = parent;
++	}
++}
++
++/* 
++ * forced version. for dget in clean cache, when error is not an option
++ *
++ * dcache_lock is held
++ * no d_locks
++ */
++void ub_dentry_charge_nofail(struct dentry *dentry)
++{
++	struct dentry *parent;
++
++	/* go up until status is changed and root is not reached */
++	while (1) {
++		if (!ub_dget_testone(dentry))
++			break;
++
++		/*
++		 * state transition -1 => 0
++		 *
++		 * No need to lock dentry before atomic_inc
++		 * like we do in ub_dentry_uncharge.
++		 * We can't race with ub_dentry_uncharge due
++		 * to dcache_lock. The only possible race with
++		 * ub_dentry_charge is OK since they both
++		 * do atomic_inc.
++		 */
++		d_forced_charge(&dentry->dentry_bc);
++		/*
++		 * dcache_lock is held (see comment in __dget_locked)
++		 * so we can safely move upwards.
++		 */
++		parent = dentry->d_parent;
++
++		if (dentry == parent)
++			break;
++		dentry = parent;
++	}
++}
+diff -upr linux-2.6.16.orig/kernel/ub/ub_mem.c linux-2.6.16-026test015/kernel/ub/ub_mem.c
+--- linux-2.6.16.orig/kernel/ub/ub_mem.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/ub_mem.c	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,384 @@
++/*
++ *  kernel/ub/ub_mem.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/kmem_slab.h>
++#include <linux/highmem.h>
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include <linux/gfp.h>
++#include <linux/swap.h>
++#include <linux/spinlock.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_hash.h>
++
++/*
++ * Initialization
++ */
++
++/*
++ * Slab accounting
++ */
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++
++#define CC_HASH_SIZE	1024
++static struct ub_cache_counter *cc_hash[CC_HASH_SIZE];
++spinlock_t cc_lock;
++
++static void __free_cache_counters(struct user_beancounter *ub,
++		kmem_cache_t *cachep)
++{
++	struct ub_cache_counter *cc, **pprev, *del;
++	int i;
++	unsigned long flags;
++
++	del = NULL;
++	spin_lock_irqsave(&cc_lock, flags);
++	for (i = 0; i < CC_HASH_SIZE; i++) {
++		pprev = &cc_hash[i];
++		cc = cc_hash[i];
++		while (cc != NULL) {
++			if (cc->ub != ub && cc->cachep != cachep) {
++				pprev = &cc->next;
++				cc = cc->next;
++				continue;
++			}
++
++			list_del(&cc->ulist);
++			*pprev = cc->next;
++			cc->next = del;
++			del = cc;
++			cc = *pprev;
++		}
++	}
++	spin_unlock_irqrestore(&cc_lock, flags);
++
++	while (del != NULL) {
++		cc = del->next;
++		kfree(del);
++		del = cc;
++	}
++}
++
++void ub_free_counters(struct user_beancounter *ub)
++{
++	__free_cache_counters(ub, NULL);
++}
++
++void ub_kmemcache_free(kmem_cache_t *cachep)
++{
++	__free_cache_counters(NULL, cachep);
++}
++
++void __init init_cache_counters(void)
++{
++	memset(cc_hash, 0, CC_HASH_SIZE * sizeof(cc_hash[0]));
++	spin_lock_init(&cc_lock);
++}
++
++#define cc_hash_fun(ub, cachep)	(				\
++	(((unsigned long)(ub) >> L1_CACHE_SHIFT) ^		\
++	 ((unsigned long)(ub) >> (BITS_PER_LONG / 2)) ^		\
++	 ((unsigned long)(cachep) >> L1_CACHE_SHIFT) ^		\
++	 ((unsigned long)(cachep) >> (BITS_PER_LONG / 2))	\
++	) & (CC_HASH_SIZE - 1))
++
++static int change_slab_charged(struct user_beancounter *ub, void *objp,
++		unsigned long val, int mask)
++{
++	struct ub_cache_counter *cc, *new_cnt, **pprev;
++	kmem_cache_t *cachep;
++	unsigned long flags;
++
++	cachep = virt_to_cache(objp);
++	new_cnt = NULL;
++
++again:
++	spin_lock_irqsave(&cc_lock, flags);
++	cc = cc_hash[cc_hash_fun(ub, cachep)];
++	while (cc) {
++		if (cc->ub == ub && cc->cachep == cachep)
++			goto found;
++		cc = cc->next;
++	}
++
++	if (new_cnt != NULL)
++		goto insert;
++
++	spin_unlock_irqrestore(&cc_lock, flags);
++
++	new_cnt = kmalloc(sizeof(*new_cnt), mask & ~__GFP_UBC);
++	if (new_cnt == NULL)
++		return -ENOMEM;
++
++	new_cnt->counter = 0;
++	new_cnt->ub = ub;
++	new_cnt->cachep = cachep;
++	goto again;
++
++insert:
++	pprev = &cc_hash[cc_hash_fun(ub, cachep)];
++	new_cnt->next = *pprev;
++	*pprev = new_cnt;
++	list_add(&new_cnt->ulist, &ub->ub_cclist);
++	cc = new_cnt;
++	new_cnt = NULL;
++
++found:
++	cc->counter += val;
++	spin_unlock_irqrestore(&cc_lock, flags);
++	if (new_cnt)
++		kfree(new_cnt);
++	return 0;
++}
++
++static inline int inc_slab_charged(struct user_beancounter *ub,
++		void *objp, int mask)
++{
++	return change_slab_charged(ub, objp, 1, mask);
++}
++
++static inline void dec_slab_charged(struct user_beancounter *ub, void *objp)
++{
++	if (change_slab_charged(ub, objp, -1, 0) < 0)
++		BUG();
++}
++
++#include <linux/vmalloc.h>
++
++static inline int inc_pages_charged(struct user_beancounter *ub,
++		struct page *pg, int order)
++{
++	int cpu;
++
++	cpu = get_cpu();
++	ub->ub_stat[cpu].pages_charged += (1 << order);
++	put_cpu();
++	return 0;
++}
++
++static inline void dec_pages_charged(struct user_beancounter *ub,
++		struct page *pg, int order)
++{
++	int cpu;
++
++	cpu = get_cpu();
++	ub->ub_stat[cpu].pages_charged -= (1 << order);
++	put_cpu();
++}
++
++void inc_vmalloc_charged(struct vm_struct *vm, int flags)
++{
++	int cpu;
++	struct user_beancounter *ub;
++
++	if (!(flags & __GFP_UBC))
++		return;
++
++	ub = get_exec_ub();
++	if (ub == NULL)
++		return;
++
++	cpu = get_cpu();
++	ub->ub_stat[cpu].vmalloc_charged += vm->nr_pages;
++	put_cpu();
++}
++
++void dec_vmalloc_charged(struct vm_struct *vm)
++{
++	int cpu;
++	struct user_beancounter *ub;
++
++	ub = page_ub(vm->pages[0]);
++	if (ub == NULL)
++		return;
++
++	cpu = get_cpu();
++	ub->ub_stat[cpu].vmalloc_charged -= vm->nr_pages;
++	put_cpu();
++}
++
++#else
++#define inc_slab_charged(ub, o, m)	(0)
++#define dec_slab_charged(ub, o)		do { } while (0)
++#define inc_pages_charged(ub, pg, o) 	(0)
++#define dec_pages_charged(ub, pg, o)	do { } while (0)
++#endif
++
++static inline struct user_beancounter **slab_ub_ref(void *objp)
++{
++	kmem_cache_t *cachep;
++	struct slab *slabp;
++	int objnr;
++
++	cachep = virt_to_cache(objp);
++	BUG_ON(!(cachep->flags & SLAB_UBC));
++	slabp = virt_to_slab(objp);
++	objnr = (objp - slabp->s_mem) / cachep->buffer_size;
++	return slab_ubcs(cachep, slabp) + objnr;
++}
++
++struct user_beancounter *slab_ub(void *objp)
++{
++	struct user_beancounter **ub_ref;
++
++	ub_ref = slab_ub_ref(objp);
++	return *ub_ref;
++}
++
++EXPORT_SYMBOL(slab_ub);
++
++static inline int should_charge(void *objp, int flags)
++{
++	kmem_cache_t *cachep;
++
++	cachep = virt_to_cache(objp);
++	if (!(cachep->flags & SLAB_UBC))
++		return 0;
++	if ((cachep->flags & SLAB_NO_CHARGE) && !(flags & __GFP_UBC))
++		return 0;
++	return 1;
++}
++
++#define should_uncharge(objp)	should_charge(objp, __GFP_UBC)
++
++int ub_slab_charge(void *objp, int flags)
++{
++	unsigned int size;
++	struct user_beancounter *ub;
++
++	if (!should_charge(objp, flags))
++		return 0;
++
++	ub = get_beancounter(get_exec_ub());
++	if (ub == NULL)
++		return 0;
++
++	size = CHARGE_SIZE(kmem_obj_memusage(objp));
++	if (charge_beancounter(ub, UB_KMEMSIZE, size,
++				(flags & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
++		goto out_err;
++
++	if (inc_slab_charged(ub, objp, flags) < 0) {
++		uncharge_beancounter(ub, UB_KMEMSIZE, size);
++		goto out_err;
++	}
++	*slab_ub_ref(objp) = ub;
++	return 0;
++
++out_err:
++	put_beancounter(ub);
++	return -ENOMEM;
++}
++
++void ub_slab_uncharge(void *objp)
++{
++	unsigned int size;
++	struct user_beancounter **ub_ref;
++
++	if (!should_uncharge(objp))
++		return;
++
++	ub_ref = slab_ub_ref(objp);
++	if (*ub_ref == NULL)
++		return;
++
++	dec_slab_charged(*ub_ref, objp);
++	size = CHARGE_SIZE(kmem_obj_memusage(objp));
++	uncharge_beancounter(*ub_ref, UB_KMEMSIZE, size);
++	put_beancounter(*ub_ref);
++	*ub_ref = NULL;
++}
++
++/*
++ * Pages accounting
++ */
++
++inline int ub_page_charge(struct page *page, int order, int mask)
++{
++	struct user_beancounter *ub;
++
++	ub = NULL;
++	if (!(mask & __GFP_UBC))
++		goto out;
++
++	ub = get_beancounter(get_exec_ub());
++	if (ub == NULL)
++		goto out;
++
++	if (charge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order),
++				(mask & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
++		goto err;
++	if (inc_pages_charged(ub, page, order) < 0) {
++		uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
++		goto err;
++	}
++out:
++	BUG_ON(page_ub(page) != NULL);
++	page_ub(page) = ub;
++	return 0;
++
++err:
++	BUG_ON(page_ub(page) != NULL);
++	put_beancounter(ub);
++	return -ENOMEM;
++}
++
++inline void ub_page_uncharge(struct page *page, int order)
++{
++	struct user_beancounter *ub;
++
++	ub = page_ub(page);
++	if (ub == NULL)
++		return;
++
++	dec_pages_charged(ub, page, order);
++	BUG_ON(ub->ub_magic != UB_MAGIC);
++	uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
++	put_beancounter(ub);
++	page_ub(page) = NULL;
++}
++
++/* 
++ * takes init_mm.page_table_lock 
++ * some outer lock to protect pages from vmalloced area must be held
++ */
++struct user_beancounter *vmalloc_ub(void *obj)
++{
++	struct page *pg;
++
++	pg = vmalloc_to_page(obj);
++	if (pg == NULL)
++		return NULL;
++
++	return page_ub(pg);
++}
++
++EXPORT_SYMBOL(vmalloc_ub);
++
++struct user_beancounter *mem_ub(void *obj)
++{
++	struct user_beancounter *ub;
++
++	if ((unsigned long)obj >= VMALLOC_START &&
++	    (unsigned long)obj  < VMALLOC_END)
++		ub = vmalloc_ub(obj);
++	else
++		ub = slab_ub(obj);
++
++	return ub;
++}
++
++EXPORT_SYMBOL(mem_ub);
+diff -upr linux-2.6.16.orig/kernel/ub/ub_misc.c linux-2.6.16-026test015/kernel/ub/ub_misc.c
+--- linux-2.6.16.orig/kernel/ub/ub_misc.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/ub_misc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,244 @@
++/*
++ *  kernel/ub/ub_misc.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/tty.h>
++#include <linux/tty_driver.h>
++#include <linux/signal.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/sched.h>
++#include <linux/kmem_cache.h>
++#include <linux/module.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++
++/*
++ * Task staff
++ */
++
++static void init_task_sub(struct task_struct *tsk,
++		struct task_beancounter *old_bc)
++{
++	struct task_beancounter *new_bc;
++	struct user_beancounter *sub;
++
++	new_bc = &tsk->task_bc;
++	sub = old_bc->fork_sub;
++	new_bc->fork_sub = get_beancounter(sub);
++	new_bc->task_fnode = NULL;
++	new_bc->task_freserv = old_bc->task_freserv;
++	old_bc->task_freserv = NULL;
++	memset(&new_bc->task_data, 0, sizeof(new_bc->task_data));
++}
++
++int ub_task_charge(struct task_struct *parent, struct task_struct *task)
++{
++	struct task_beancounter *old_bc;
++	struct task_beancounter *new_bc;
++	struct user_beancounter *ub;
++
++	old_bc = &parent->task_bc;
++#if 0
++	if (old_bc->exec_ub == NULL) {
++		/* FIXME: this won't work if task_bc is outside task_struct */
++		init_task_sub(task, old_bc);
++		return 0;
++	}
++#endif
++	ub = old_bc->fork_sub;
++
++	if (charge_beancounter(ub, UB_NUMPROC, 1, UB_HARD) < 0)
++		return -ENOMEM;
++
++	new_bc = &task->task_bc;
++	new_bc->task_ub = get_beancounter(ub);
++	new_bc->exec_ub = get_beancounter(ub);
++	init_task_sub(task, old_bc);
++	return 0;
++}
++
++void ub_task_uncharge(struct task_struct *task)
++{
++	struct task_beancounter *task_bc;
++
++	task_bc = &task->task_bc;
++	if (task_bc->task_ub != NULL)
++		uncharge_beancounter(task_bc->task_ub, UB_NUMPROC, 1);
++
++	put_beancounter(task_bc->exec_ub);
++	put_beancounter(task_bc->task_ub);
++	put_beancounter(task_bc->fork_sub);
++	/* can't be freed elsewhere, failures possible in the middle of fork */
++	if (task_bc->task_freserv != NULL)
++		kfree(task_bc->task_freserv);
++
++	task_bc->exec_ub = (struct user_beancounter *)0xdeadbcbc;
++}
++
++/*
++ * Files and file locks.
++ */
++
++int ub_file_charge(struct file *f)
++{
++	struct user_beancounter *ub;
++
++	/* No need to get_beancounter here since it's already got in slab */
++	ub = slab_ub(f);
++	if (ub == NULL)
++		return 0;
++
++	return charge_beancounter(ub, UB_NUMFILE, 1, UB_HARD);
++}
++
++void ub_file_uncharge(struct file *f)
++{
++	struct user_beancounter *ub;
++
++	/* Ub will be put in slab */
++	ub = slab_ub(f);
++	if (ub == NULL)
++		return;
++
++	uncharge_beancounter(ub, UB_NUMFILE, 1);
++}
++
++int ub_flock_charge(struct file_lock *fl, int hard)
++{
++	struct user_beancounter *ub;
++	int err;
++
++	/* No need to get_beancounter here since it's already got in slab */
++	ub = slab_ub(fl);
++	if (ub == NULL)
++		return 0;
++
++	err = charge_beancounter(ub, UB_NUMFLOCK, 1, hard ? UB_HARD : UB_SOFT);
++	if (!err)
++		fl->fl_charged = 1;
++	return err;
++}
++
++void ub_flock_uncharge(struct file_lock *fl)
++{
++	struct user_beancounter *ub;
++
++	/* Ub will be put in slab */
++	ub = slab_ub(fl);
++	if (ub == NULL || !fl->fl_charged)
++		return;
++
++	uncharge_beancounter(ub, UB_NUMFLOCK, 1);
++	fl->fl_charged = 0;
++}
++
++/*
++ * Signal handling
++ */
++
++static int do_ub_siginfo_charge(struct user_beancounter *ub,
++		unsigned long size)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, size, UB_HARD))
++		goto out_kmem;
++
++	if (__charge_beancounter_locked(ub, UB_NUMSIGINFO, 1, UB_HARD))
++		goto out_num;
++
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return 0;
++
++out_num:
++	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
++out_kmem:
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return -ENOMEM;
++}
++
++static void do_ub_siginfo_uncharge(struct user_beancounter *ub,
++		unsigned long size)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
++	__uncharge_beancounter_locked(ub, UB_NUMSIGINFO, 1);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_siginfo_charge(struct sigqueue *sq, struct user_beancounter *ub)
++{
++	unsigned long size;
++	struct user_beancounter *p, *q;
++
++	size = CHARGE_SIZE(kmem_obj_memusage(sq));
++	for (p = ub; p != NULL; p = p->parent) {
++		if (do_ub_siginfo_charge(p, size))
++			goto unroll;
++	}
++
++	sq->sig_ub = get_beancounter(ub);
++	return 0;
++
++unroll:
++	for (q = ub; q != p; q = q->parent)
++		do_ub_siginfo_uncharge(q, size);
++	return -ENOMEM;
++}
++EXPORT_SYMBOL(ub_siginfo_charge);
++
++void ub_siginfo_uncharge(struct sigqueue *sq)
++{
++	unsigned long size;
++	struct user_beancounter *ub, *p;
++
++	p = ub = sq->sig_ub;
++	sq->sig_ub = NULL;
++	size = CHARGE_SIZE(kmem_obj_memusage(sq));
++	for (; ub != NULL; ub = ub->parent)
++		do_ub_siginfo_uncharge(ub, size);
++	put_beancounter(p);
++}
++
++/*
++ * PTYs
++ */
++
++int ub_pty_charge(struct tty_struct *tty)
++{
++	struct user_beancounter *ub;
++	int retval;
++
++	ub = slab_ub(tty);
++	retval = 0;
++	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
++			!test_bit(TTY_CHARGED, &tty->flags)) {
++		retval = charge_beancounter(ub, UB_NUMPTY, 1, UB_HARD);
++		if (!retval)
++			set_bit(TTY_CHARGED, &tty->flags);
++	}
++	return retval;
++}
++
++void ub_pty_uncharge(struct tty_struct *tty)
++{
++	struct user_beancounter *ub;
++
++	ub = slab_ub(tty);
++	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
++			test_bit(TTY_CHARGED, &tty->flags)) {
++		uncharge_beancounter(ub, UB_NUMPTY, 1);
++		clear_bit(TTY_CHARGED, &tty->flags);
++	}
++}
+diff -upr linux-2.6.16.orig/kernel/ub/ub_net.c linux-2.6.16-026test015/kernel/ub/ub_net.c
+--- linux-2.6.16.orig/kernel/ub/ub_net.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/ub_net.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,1044 @@
++/*
++ *  linux/kernel/ub/ub_net.c
++ *
++ *  Copyright (C) 1998-2004  Andrey V. Savochkin <saw@saw.sw.com.sg>
++ *  Copyright (C) 2005 SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ *   - sizeof(struct inode) charge
++ *   = tcp_mem_schedule() feedback based on ub limits
++ *   + measures so that one socket won't exhaust all send buffers,
++ *     see bug in bugzilla
++ *   = sk->socket check for NULL in snd_wakeups
++ *     (tcp_write_space checks for NULL itself)
++ *   + in tcp_close(), orphaned socket abortion should be based on ubc
++ *     resources (same in tcp_out_of_resources)
++ *     Beancounter should also have separate orphaned socket counter...
++ *   + for rcv, in-order segment should be accepted
++ *     if only barrier is exceeded
++ *   = tcp_rmem_schedule() feedback based on ub limits
++ *   - repair forward_alloc mechanism for receive buffers
++ *     It's idea is that some buffer space is pre-charged so that receive fast
++ *     path doesn't need to take spinlocks and do other heavy stuff
++ *   + tcp_prune_queue actions based on ub limits
++ *   + window adjustments depending on available buffers for receive
++ *   - window adjustments depending on available buffers for send
++ *   + race around usewreserv
++ *   + avoid allocating new page for each tiny-gram, see letter from ANK
++ *   + rename ub_sock_lock
++ *   + sk->sleep wait queue probably can be used for all wakeups, and
++ *     sk->ub_wait is unnecessary
++ *   + for UNIX sockets, the current algorithm will lead to
++ *     UB_UNIX_MINBUF-sized messages only for non-blocking case
++ *   - charge for af_packet sockets
++ *   + all datagram sockets should be charged to NUMUNIXSOCK
++ *   - we do not charge for skb copies and clones staying in device queues
++ *   + live-lock if number of sockets is big and buffer limits are small
++ *     [diff-ubc-dbllim3]
++ *   - check that multiple readers/writers on the same socket won't cause fatal
++ *     consequences
++ *   - check allocation/charge orders
++ *   + There is potential problem with callback_lock.  In *snd_wakeup we take
++ *     beancounter first, in sock_def_error_report - callback_lock first.
++ *     then beancounter.  This is not a problem if callback_lock taken
++ *     readonly, but anyway...
++ *   - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
++ * General kernel problems:
++ *   - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
++ *     notification won't get signals
++ *   - datagram_poll looks racy
++ *
++ */
++
++#include <linux/net.h>
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/gfp.h>
++#include <linux/err.h>
++#include <linux/socket.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++
++#include <net/sock.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_net.h>
++#include <ub/ub_debug.h>
++
++
++/* Skb truesize definition. Bad place. Den */
++
++static inline int skb_chargesize_head(struct sk_buff *skb)
++{
++	return skb_charge_size(skb->end - skb->head +
++				sizeof(struct skb_shared_info));
++}
++
++int skb_charge_fullsize(struct sk_buff *skb)
++{
++	int chargesize;
++	struct sk_buff *skbfrag;
++
++	chargesize = skb_chargesize_head(skb) +
++		PAGE_SIZE * skb_shinfo(skb)->nr_frags;
++	if (likely(skb_shinfo(skb)->frag_list == NULL))
++		return chargesize;
++	for (skbfrag = skb_shinfo(skb)->frag_list;
++	     skbfrag != NULL;
++	     skbfrag = skbfrag->next) {
++		chargesize += skb_charge_fullsize(skbfrag);
++	}
++	return chargesize;
++}
++EXPORT_SYMBOL(skb_charge_fullsize);
++
++static int ub_sock_makewreserv_locked(struct sock *sk, 
++		int bufid, int sockid, unsigned long size);
++
++int __ub_too_many_orphans(struct sock *sk, int count)
++{
++	struct user_beancounter *ub;
++
++	if (sock_has_ubc(sk)) {
++		for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
++		if (count >= ub->ub_parms[UB_NUMTCPSOCK].barrier >> 2)
++			return 1;
++	}
++	return 0;
++}
++
++/*
++ * Queueing
++ */
++
++static void ub_sock_snd_wakeup(struct user_beancounter *ub)
++{
++	struct list_head *p;
++	struct sock_beancounter *skbc;
++	struct sock *sk;
++	struct user_beancounter *cub;
++	unsigned long added;
++
++	while (!list_empty(&ub->ub_other_sk_list)) {
++		p = ub->ub_other_sk_list.next;
++		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
++		sk = skbc_sock(skbc);
++		ub_debug(UBD_NET_SLEEP, "Found sock to wake up\n");
++		added = -skbc->poll_reserv;
++		if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
++					UB_NUMOTHERSOCK, skbc->ub_waitspc))
++			break;
++		added += skbc->poll_reserv;
++
++		/*
++		 * See comments in ub_tcp_snd_wakeup.
++		 * Locking note: both unix_write_space and
++		 * sock_def_write_space take callback_lock themselves.
++		 * We take it here just to be on the safe side and to
++		 * act the same way as ub_tcp_snd_wakeup does.
++		 */
++		sk->sk_write_space(sk);
++
++		list_del_init(&skbc->ub_sock_list);
++
++		if (skbc->ub != ub && added) {
++			cub = get_beancounter(skbc->ub);
++			spin_unlock(&ub->ub_lock);
++			charge_beancounter_notop(cub, UB_OTHERSOCKBUF, added);
++			put_beancounter(cub);
++			spin_lock(&ub->ub_lock);
++		}
++	}
++}
++
++static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
++{
++	struct list_head *p;
++	struct sock *sk;
++	struct sock_beancounter *skbc;
++	struct socket *sock;
++	struct user_beancounter *cub;
++	unsigned long added;
++
++	while (!list_empty(&ub->ub_tcp_sk_list)) {
++		p = ub->ub_tcp_sk_list.next;
++		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
++		sk = skbc_sock(skbc);
++
++		added = 0;
++		sock = sk->sk_socket;
++		if (sock == NULL)
++			/* sk being destroyed */
++			goto cont;
++
++		ub_debug(UBD_NET_SLEEP, 
++				"Checking queue, waiting %lu, reserv %lu\n",
++				skbc->ub_waitspc, skbc->poll_reserv);
++		added = -skbc->poll_reserv;
++		if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
++					UB_NUMTCPSOCK, skbc->ub_waitspc))
++			break;
++		added += skbc->poll_reserv;
++
++		/*
++		 * Send async notifications and wake up.
++		 * Locking note: we get callback_lock here because
++		 * tcp_write_space is over-optimistic about calling context
++		 * (socket lock is presumed).  So we get the lock here although
++		 * it belongs to the callback.
++		 */
++		sk->sk_write_space(sk);
++
++cont:
++		list_del_init(&skbc->ub_sock_list);
++
++		if (skbc->ub != ub && added) {
++			cub = get_beancounter(skbc->ub);
++			spin_unlock(&ub->ub_lock);
++			charge_beancounter_notop(cub, UB_TCPSNDBUF, added);
++			put_beancounter(cub);
++			spin_lock(&ub->ub_lock);
++		}
++	}
++}
++
++void ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
++{
++	unsigned long flags;
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long added_reserv;
++
++	if (!sock_has_ubc(sk))
++		return;
++
++	skbc = sock_bc(sk);
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
++	added_reserv = -skbc->poll_reserv;
++	if (!ub_sock_makewreserv_locked(sk, res, bid2sid(res), size)) {
++		/*
++		 * It looks a bit hackish, but it is compatible with both
++		 * wait_for_xx_ubspace and poll.
++		 * This __set_current_state is equivalent to a wakeup event
++		 * right after spin_unlock_irqrestore.
++		 */
++		__set_current_state(TASK_RUNNING);
++		added_reserv += skbc->poll_reserv;
++		spin_unlock_irqrestore(&ub->ub_lock, flags);
++		if (added_reserv)
++			charge_beancounter_notop(skbc->ub, res, added_reserv);
++		return;
++	}
++
++	ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
++	skbc->ub_waitspc = size;
++	if (!list_empty(&skbc->ub_sock_list)) {
++		ub_debug(UBD_NET_SOCKET, 
++				"re-adding socket to beancounter %p.\n", ub);
++		goto out;
++	}
++
++	switch (res) {
++		case UB_TCPSNDBUF:
++			list_add_tail(&skbc->ub_sock_list, 
++					&ub->ub_tcp_sk_list);
++			break;
++		case UB_OTHERSOCKBUF:
++			list_add_tail(&skbc->ub_sock_list, 
++					&ub->ub_other_sk_list);
++			break;
++		default:
++			BUG();
++	}
++out:
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++
++/*
++ * Helpers
++ */
++
++void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
++		       unsigned long size, int resource)
++{
++	if (!sock_has_ubc(sk))
++		return;
++
++	if (sock_bc(sk)->ub == NULL)
++		BUG();
++	skb_bc(skb)->ub = sock_bc(sk)->ub;
++	skb_bc(skb)->charged = size;
++	skb_bc(skb)->resource = resource;
++
++	/* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
++	if (skb->sk == NULL)
++		skb->sk = sk;
++}
++
++static inline void ub_skb_set_uncharge(struct sk_buff *skb)
++{
++	skb_bc(skb)->ub = NULL;
++	skb_bc(skb)->charged = 0;
++	skb_bc(skb)->resource = 0;
++}
++
++static inline void __uncharge_sockbuf(struct sock_beancounter *skbc,
++		struct user_beancounter *ub, int resource, unsigned long size)
++{
++	if (ub != NULL)
++		__uncharge_beancounter_locked(ub, resource, size);
++
++	if (skbc != NULL) {
++		if (skbc->ub_wcharged > size)
++			skbc->ub_wcharged -= size;
++		else
++			skbc->ub_wcharged = 0;
++	}
++}
++
++static void ub_update_rmem_thres(struct sock_beancounter *skub)
++{
++	struct user_beancounter *ub;
++
++	if (skub && skub->ub) {
++		for (ub = skub->ub; ub->parent != NULL; ub = ub->parent);
++		ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
++			(ub->ub_parms[UB_NUMTCPSOCK].held + 1);
++	}
++}
++inline int ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask)
++{
++	memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
++	return 0;
++}
++
++inline void ub_skb_free_bc(struct sk_buff *skb)
++{
++}
++
++
++/*
++ * Charge socket number
++ */
++
++static inline int sk_alloc_beancounter(struct sock *sk)
++{
++	struct sock_beancounter *skbc;
++
++	skbc = sock_bc(sk);
++	memset(skbc, 0, sizeof(struct sock_beancounter));
++	return 0;
++}
++
++static inline void sk_free_beancounter(struct sock *sk)
++{
++}
++
++static int __sock_charge(struct sock *sk, int res)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++
++	ub = get_exec_ub();
++	if (ub == NULL)
++		return 0;
++	if (sk_alloc_beancounter(sk) < 0)
++		return -ENOMEM;
++
++	skbc = sock_bc(sk);
++	INIT_LIST_HEAD(&skbc->ub_sock_list);
++
++	if (charge_beancounter(ub, res, 1, UB_HARD) < 0)
++		goto out_limit;
++
++	/* TCP listen sock or process keeps referrence to UB */
++	skbc->ub = get_beancounter(ub);
++	return 0;
++
++out_limit:
++	sk_free_beancounter(sk);
++	return -ENOMEM;
++}
++
++int ub_tcp_sock_charge(struct sock *sk)
++{
++	int ret;
++
++	ret = __sock_charge(sk, UB_NUMTCPSOCK);
++	ub_update_rmem_thres(sock_bc(sk));
++
++	return ret;
++}
++
++int ub_other_sock_charge(struct sock *sk)
++{
++	return __sock_charge(sk, UB_NUMOTHERSOCK);
++}
++
++EXPORT_SYMBOL(ub_other_sock_charge);
++
++int ub_sock_charge(struct sock *sk, int family, int type)
++{
++	return (IS_TCP_SOCK(family, type) ? 
++			ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
++}
++EXPORT_SYMBOL(ub_sock_charge);
++
++/*
++ * Uncharge socket number
++ */
++
++void ub_sock_uncharge(struct sock *sk)
++{
++	int is_tcp_sock;
++	unsigned long flags;
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long reserv;
++
++	if (!sock_has_ubc(sk))
++		return;
++
++	is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
++	skbc = sock_bc(sk);
++	ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
++
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (!list_empty(&skbc->ub_sock_list)) {
++		ub_debug(UBD_NET_SOCKET, 
++			 "ub_sock_uncharge: removing from ub(%p) queue.\n",
++			 skbc);
++		list_del_init(&skbc->ub_sock_list);
++	}
++
++	reserv = skbc->poll_reserv;
++	__uncharge_beancounter_locked(ub,
++			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
++			reserv);
++	__uncharge_beancounter_locked(ub,
++			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
++
++	/* The check sk->sk_family != PF_NETLINK is made as the skb is
++	 * queued to the kernel end of socket while changed to the user one.
++	 * Den */
++	if (skbc->ub_wcharged > reserv &&
++	    sk->sk_family != PF_NETLINK) {
++		skbc->ub_wcharged -= reserv;
++		printk(KERN_WARNING
++		       "ub_sock_uncharge: wch=%lu for ub %p (%d).\n",
++		       skbc->ub_wcharged, skbc->ub, skbc->ub->ub_uid);
++	} else
++		skbc->ub_wcharged = 0;
++	skbc->poll_reserv = 0;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	uncharge_beancounter_notop(skbc->ub,
++			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
++			reserv);
++	uncharge_beancounter_notop(skbc->ub,
++			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
++
++	put_beancounter(skbc->ub);
++	sk_free_beancounter(sk);
++}
++
++/*
++ * Send - receive buffers
++ */
++
++/* Special case for netlink_dump - (un)charges precalculated size */
++int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
++{
++	int ret;
++	unsigned long chargesize;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	chargesize = skb_charge_fullsize(skb);
++	ret = charge_beancounter(sock_bc(sk)->ub,
++			UB_DGRAMRCVBUF, chargesize, UB_HARD);
++	if (ret < 0)
++		return ret;
++	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
++	return ret;
++}
++
++/*
++ * Poll reserv accounting
++ */
++static int ub_sock_makewreserv_locked(struct sock *sk, 
++		int bufid, int sockid, unsigned long size)
++{
++	unsigned long wcharge_added;
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++
++	if (!sock_has_ubc(sk))
++		goto out;
++
++	skbc = sock_bc(sk);
++	if (skbc->poll_reserv >= size) /* no work to be done */
++		goto out;
++
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	ub->ub_parms[bufid].held += size - skbc->poll_reserv;
++
++	wcharge_added = 0;
++	/*
++	 * Logic:
++	 *  1) when used memory hits barrier, we set wmem_pressure;
++	 *     wmem_pressure is reset under barrier/2;
++	 *     between barrier/2 and barrier we limit per-socket buffer growth;
++	 *  2) each socket is guaranteed to get (limit-barrier)/maxsockets
++	 *     calculated on the base of memory eaten after the barrier is hit
++	 */
++	skbc = sock_bc(sk);
++	if (!ub_hfbarrier_hit(ub, bufid)) {
++		if (ub->ub_wmem_pressure)
++			ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
++				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++				sk, size, skbc->poll_reserv,
++				ub->ub_parms[bufid].held,
++				skbc->ub_wcharged, sk->sk_sndbuf);
++		ub->ub_wmem_pressure = 0;
++	}
++	if (ub_barrier_hit(ub, bufid)) {
++		if (!ub->ub_wmem_pressure)
++			ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
++				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++				sk, size, skbc->poll_reserv,
++				ub->ub_parms[bufid].held,
++				skbc->ub_wcharged, sk->sk_sndbuf);
++		ub->ub_wmem_pressure = 1;
++		wcharge_added = size - skbc->poll_reserv;
++		skbc->ub_wcharged += wcharge_added;
++		if (skbc->ub_wcharged * ub->ub_parms[sockid].limit +
++				ub->ub_parms[bufid].barrier >
++					ub->ub_parms[bufid].limit)
++			goto unroll;
++	}
++	if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
++		goto unroll;
++
++	ub_adjust_maxheld(ub, bufid);
++	skbc->poll_reserv = size;
++out:
++	return 0;
++
++unroll:
++	ub_debug(UBD_NET_SEND, 
++			"makewres: deny "
++			"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++			sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
++			skbc->ub_wcharged, sk->sk_sndbuf);
++	skbc->ub_wcharged -= wcharge_added;
++	ub->ub_parms[bufid].failcnt++;
++	ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
++	return -ENOMEM;
++}
++
++int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long flags;
++	unsigned long added_reserv;
++	int err;
++
++	skbc = sock_bc(sk);
++
++	/*
++	 * This function provides that there is sufficient reserve upon return
++	 * only if sk has only one user.  We can check poll_reserv without
++	 * serialization and avoid locking if the reserve already exists.
++	 */
++	if (!sock_has_ubc(sk) || skbc->poll_reserv >= size)
++		return 0;
++
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	added_reserv = -skbc->poll_reserv;
++	err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
++	added_reserv += skbc->poll_reserv;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	if (added_reserv)
++		charge_beancounter_notop(skbc->ub, bufid, added_reserv);
++
++	return err;
++}
++
++int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long flags;
++	unsigned long added_reserv;
++	int err;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	skbc = sock_bc(sk);
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	added_reserv = -skbc->poll_reserv;
++	err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
++	added_reserv += skbc->poll_reserv;
++	if (!err)
++		skbc->poll_reserv -= size;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	if (added_reserv)
++		charge_beancounter_notop(skbc->ub, bufid, added_reserv);
++
++	return err;
++}
++
++void ub_sock_ret_wreserv(struct sock *sk, int bufid, 
++		unsigned long size, unsigned long ressize)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long extra;
++	unsigned long flags;
++	
++	if (!sock_has_ubc(sk))
++		return;
++
++	extra = 0;
++	skbc = sock_bc(sk);
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	skbc->poll_reserv += size;
++	if (skbc->poll_reserv > ressize) {
++		extra = skbc->poll_reserv - ressize;
++		__uncharge_beancounter_locked(ub, bufid, extra);
++
++		if (skbc->ub_wcharged > skbc->poll_reserv - ressize)
++			skbc->ub_wcharged -= skbc->poll_reserv - ressize;
++		else
++			skbc->ub_wcharged = 0;
++		skbc->poll_reserv = ressize;
++	}
++
++	ub_tcp_snd_wakeup(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	if (extra)
++		uncharge_beancounter_notop(skbc->ub, bufid, extra);
++}
++
++long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
++{
++	DECLARE_WAITQUEUE(wait, current);
++
++	add_wait_queue(sk->sk_sleep, &wait);
++	for (;;) {
++		if (signal_pending(current))
++			break;
++		set_current_state(TASK_INTERRUPTIBLE);
++		if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
++			break;
++
++		if (sk->sk_shutdown & SEND_SHUTDOWN)
++			break;
++		if (sk->sk_err)
++			break;
++		ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
++		timeo = schedule_timeout(timeo);
++	}
++	__set_current_state(TASK_RUNNING);
++	remove_wait_queue(sk->sk_sleep, &wait);
++	return timeo;
++}
++
++int ub_sock_makewres_other(struct sock *sk, unsigned long size)
++{
++	return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
++}
++
++int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
++{
++	return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
++}
++
++int ub_sock_getwres_other(struct sock *sk, unsigned long size)
++{
++	return ub_sock_get_wreserv(sk, UB_OTHERSOCKBUF, size);
++}
++
++int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
++{
++	return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
++}
++
++void ub_sock_retwres_other(struct sock *sk, unsigned long size, 
++		unsigned long ressize)
++{
++	ub_sock_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
++}
++
++void ub_sock_retwres_tcp(struct sock *sk, unsigned long size, 
++		unsigned long ressize)
++{
++	ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
++}
++
++void ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
++{
++	ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
++}
++
++void ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
++{
++	ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
++}
++
++void ub_sock_sndqueuedel(struct sock *sk)
++{
++	struct sock_beancounter *skbc;
++	unsigned long flags;
++
++	if (!sock_has_ubc(sk))
++		return;
++	skbc = sock_bc(sk);
++
++	/* race with write_space callback of other socket */
++	spin_lock_irqsave(&skbc->ub->ub_lock, flags);
++	list_del_init(&skbc->ub_sock_list);
++	spin_unlock_irqrestore(&skbc->ub->ub_lock, flags);
++}
++
++/*
++ * UB_DGRAMRCVBUF
++ */
++
++int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++	unsigned long chargesize;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	chargesize = skb_charge_fullsize(skb);
++	if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF, 
++				 chargesize, UB_HARD))
++		return -ENOMEM;
++
++	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
++	return 0;
++}
++
++EXPORT_SYMBOL(ub_sockrcvbuf_charge);
++
++static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
++{
++	uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
++			     skb_bc(skb)->charged);
++	ub_skb_set_uncharge(skb);
++}
++
++/*
++ * UB_TCPRCVBUF
++ */
++static int charge_tcprcvbuf(struct sock *sk, struct sk_buff *skb,
++			    enum severity strict)
++{
++	int retval;
++	unsigned long flags;
++	struct user_beancounter *ub;
++	unsigned long chargesize;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	/*
++	 * Memory pressure reactions:
++	 *  1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
++	 *  2) set UB_RMEM_SHRINK and tcp_clamp_window()
++	 *     tcp_collapse_queues() if rmem_alloc > rcvbuf
++	 *  3) drop OFO, tcp_purge_ofo()
++	 *  4) drop all.
++	 * Currently, we do #2 and #3 at once (which means that current
++	 * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
++	 * for example...)
++	 * On memory pressure we jump from #0 to #3, and when the pressure
++	 * subsides, to #1.
++	 */
++	retval = 0;
++	chargesize = skb_charge_fullsize(skb);
++
++	for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
++	if (ub->ub_parms[UB_TCPRCVBUF].held >
++			ub->ub_parms[UB_TCPRCVBUF].barrier &&
++			strict != UB_FORCE)
++		goto excess;
++	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++out:
++	if (retval == 0) {
++		charge_beancounter_notop(sock_bc(sk)->ub, UB_TCPRCVBUF,
++				chargesize);
++		ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
++	}
++	return retval;
++
++excess:
++	ub->ub_rmem_pressure = UB_RMEM_SHRINK;
++	if (strict == UB_HARD)
++		retval = -ENOMEM;
++	if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
++		retval = -ENOMEM;
++	/*
++	 * We try to leave numsock*maxadvmss as a reserve for sockets not
++	 * queueing any data yet (if the difference between the barrier and the
++	 * limit is enough for this reserve).
++	 */
++	if (ub->ub_parms[UB_TCPRCVBUF].held +
++			ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
++			> ub->ub_parms[UB_TCPRCVBUF].limit &&
++			atomic_read(&sk->sk_rmem_alloc))
++		retval = -ENOMEM;
++	if (retval) {
++		ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
++		ub->ub_parms[UB_TCPRCVBUF].failcnt++;
++	}
++	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	goto out;
++}
++
++int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++	return charge_tcprcvbuf(sk, skb, UB_HARD);
++}
++
++int ub_tcprcvbuf_charge_forced(struct sock *sk, struct sk_buff *skb)
++{
++	return charge_tcprcvbuf(sk, skb, UB_FORCE);
++}
++EXPORT_SYMBOL(ub_tcprcvbuf_charge_forced);
++
++static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
++{
++	unsigned long flags;
++	unsigned long held, bar;
++	int prev_pres;
++	struct user_beancounter *ub;
++
++	for (ub = skb_bc(skb)->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
++		printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
++				skb_bc(skb)->charged,
++				ub, ub->ub_parms[UB_TCPRCVBUF].held);
++		/* ass-saving bung */
++		skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
++	}
++	ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
++	held = ub->ub_parms[UB_TCPRCVBUF].held;
++	bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
++	prev_pres = ub->ub_rmem_pressure;
++	if (held <= bar - (bar >> 2))
++		ub->ub_rmem_pressure = UB_RMEM_EXPAND;
++	else if (held <= bar)
++		ub->ub_rmem_pressure = UB_RMEM_KEEP;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	uncharge_beancounter_notop(skb_bc(skb)->ub, UB_TCPRCVBUF,
++			skb_bc(skb)->charged);
++	ub_skb_set_uncharge(skb);
++}
++
++
++/*
++ * UB_OTHERSOCKBUF
++ */
++
++static void ub_socksndbuf_uncharge(struct sk_buff *skb)
++{
++	unsigned long flags;
++	struct user_beancounter *ub, *cub;
++	struct sock_beancounter *sk_bc;
++
++	/* resource was set. no check for ub required */
++	cub = skb_bc(skb)->ub;
++	for (ub = cub; ub->parent != NULL; ub = ub->parent);
++	skb_bc(skb)->ub = NULL;
++	if (skb->sk != NULL)
++		sk_bc = sock_bc(skb->sk);
++	else
++		sk_bc = NULL;
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_sockbuf(sk_bc, ub, UB_OTHERSOCKBUF,
++			   skb_bc(skb)->charged);
++	ub_sock_snd_wakeup(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	uncharge_beancounter_notop(cub, UB_OTHERSOCKBUF, skb_bc(skb)->charged);
++	ub_skb_set_uncharge(skb);
++}
++
++static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
++{
++	unsigned long flags;
++	struct user_beancounter *ub, *cub;
++
++	/* resource can be not set, called manually */
++	cub = skb_bc(skb)->ub;
++	if (cub == NULL)
++		return;
++	for (ub = cub; ub->parent != NULL; ub = ub->parent);
++	skb_bc(skb)->ub = NULL;
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_sockbuf(sock_bc(skb->sk), ub, UB_TCPSNDBUF,
++			   skb_bc(skb)->charged);
++	ub_tcp_snd_wakeup(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	uncharge_beancounter_notop(cub, UB_TCPSNDBUF, skb_bc(skb)->charged);
++	ub_skb_set_uncharge(skb);
++}
++
++void ub_skb_uncharge(struct sk_buff *skb)
++{
++	switch (skb_bc(skb)->resource) {
++		case UB_TCPSNDBUF:
++			ub_tcpsndbuf_uncharge(skb);
++			break;
++		case UB_TCPRCVBUF:
++			ub_tcprcvbuf_uncharge(skb);
++			break;
++		case UB_DGRAMRCVBUF:
++			ub_sockrcvbuf_uncharge(skb);
++			break;
++		case UB_OTHERSOCKBUF:
++			ub_socksndbuf_uncharge(skb);
++			break;
++	}
++}
++
++EXPORT_SYMBOL(ub_skb_uncharge);	/* due to skb_orphan()/conntracks */
++
++/*
++ * TCP send buffers accouting. Paged part
++ */
++int ub_sock_tcp_chargepage(struct sock *sk)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long added;
++	unsigned long flags;
++	int err;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	skbc = sock_bc(sk);
++
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	/* Try to charge full page */
++	err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
++					 PAGE_SIZE);
++	if (err == 0) {
++		skbc->poll_reserv -= PAGE_SIZE;
++		spin_unlock_irqrestore(&ub->ub_lock, flags);
++		charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, PAGE_SIZE);
++		return 0;
++	}
++
++	/* Try to charge page enough to satisfy sys_select. The possible
++	   overdraft for the rest of the page is generally better then
++	   requesting full page in tcp_poll. This should not happen
++	   frequently. Den */
++	added = -skbc->poll_reserv;
++	err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
++					 SOCK_MIN_UBCSPACE);
++	if (err < 0) {
++		spin_unlock_irqrestore(&ub->ub_lock, flags);
++		return err;
++	}
++	__charge_beancounter_locked(ub, UB_TCPSNDBUF,
++				    PAGE_SIZE - skbc->poll_reserv,
++				    UB_FORCE);
++	added += PAGE_SIZE;
++	skbc->poll_reserv = 0;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, added);
++
++	return 0;
++	 
++}
++
++void ub_sock_tcp_detachpage(struct sock *sk)
++{
++	struct sk_buff *skb;
++
++	if (!sock_has_ubc(sk))
++		return;
++
++	/* The page is just detached from socket. The last skb in queue
++	   with paged part holds referrence to it */
++	skb = skb_peek_tail(&sk->sk_write_queue);
++	if (skb == NULL) {
++	   	/* If the queue is empty - all data is sent and page is about
++		   to be freed */
++		uncharge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, PAGE_SIZE);
++		return;
++	}
++	/* Last skb is a good aproximation for a last skb with paged part */
++	skb_bc(skb)->charged += PAGE_SIZE;
++}
++
++static int charge_tcpsndbuf(struct sock *sk, struct sk_buff *skb,
++			    enum severity strict)
++{
++	int ret;
++	unsigned long chargesize;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	chargesize = skb_charge_fullsize(skb);
++	ret = charge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, chargesize,
++				 strict);
++	if (ret < 0)
++		return ret;
++	ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
++	sock_bc(sk)->ub_wcharged += chargesize;
++	return ret;
++}
++
++int ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++	return charge_tcpsndbuf(sk, skb, UB_HARD);
++}
++
++int ub_tcpsndbuf_charge_forced(struct sock *sk,	struct sk_buff *skb)
++{
++	return charge_tcpsndbuf(sk, skb, UB_FORCE);
++}
++EXPORT_SYMBOL(ub_tcpsndbuf_charge_forced);
++
++/*
++ * Initialization staff
++ */
++int __init skbc_cache_init(void)
++{
++	return 0;
++}
+diff -upr linux-2.6.16.orig/kernel/ub/ub_page_bc.c linux-2.6.16-026test015/kernel/ub/ub_page_bc.c
+--- linux-2.6.16.orig/kernel/ub/ub_page_bc.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/ub_page_bc.c	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,439 @@
++/*
++ *  kernel/ub/ub_page_bc.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/spinlock.h>
++#include <linux/slab.h>
++#include <linux/mm.h>
++#include <linux/gfp.h>
++#include <linux/vmalloc.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_vmpages.h>
++#include <ub/ub_page.h>
++
++static kmem_cache_t *pb_cachep;
++static spinlock_t pb_lock = SPIN_LOCK_UNLOCKED;
++static struct page_beancounter **pb_hash_table;
++static unsigned int pb_hash_mask;
++
++/*
++ * Auxiliary staff
++ */
++
++static inline struct page_beancounter *next_page_pb(struct page_beancounter *p)
++{
++	return list_entry(p->page_list.next, struct page_beancounter,
++			page_list);
++}
++
++static inline struct page_beancounter *prev_page_pb(struct page_beancounter *p)
++{
++	return list_entry(p->page_list.prev, struct page_beancounter,
++			page_list);
++}
++
++/*
++ * Held pages manipulation
++ */
++static inline void set_held_pages(struct user_beancounter *bc)
++{
++	/* all three depend on ub_held_pages */
++	__ub_update_physpages(bc);
++	__ub_update_oomguarpages(bc);
++	__ub_update_privvm(bc);
++}
++
++static inline void do_dec_held_pages(struct user_beancounter *ub, int value)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_held_pages -= value;
++	set_held_pages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static void dec_held_pages(struct user_beancounter *ub, int value)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_dec_held_pages(ub, value);
++}
++
++static inline void do_inc_held_pages(struct user_beancounter *ub, int value)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_held_pages += value;
++	set_held_pages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static void inc_held_pages(struct user_beancounter *ub, int value)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_inc_held_pages(ub, value);
++}
++
++/*
++ * Alloc - free
++ */
++
++inline int pb_alloc(struct page_beancounter **pbc)
++{
++	*pbc = kmem_cache_alloc(pb_cachep, GFP_KERNEL);
++	if (*pbc != NULL) {
++		(*pbc)->next_hash = NULL;
++		(*pbc)->pb_magic = PB_MAGIC;
++	}
++	return (*pbc == NULL);
++}
++
++inline void pb_free(struct page_beancounter **pb)
++{
++	if (*pb != NULL) {
++		kmem_cache_free(pb_cachep, *pb);
++		*pb = NULL;
++	}
++}
++
++void pb_free_list(struct page_beancounter **p_pb)
++{
++	struct page_beancounter *list, *pb;
++	
++	list = *p_pb;
++	if (list == PBC_COPY_SAME)
++		return;
++
++	while (list) {
++		pb = list;
++		list = list->next_hash;
++		pb_free(&pb);
++	}
++	*p_pb = NULL;
++}
++
++/*
++ * head -> <new objs> -> <old objs> -> ...
++ */
++static int __alloc_list(struct page_beancounter **head, int num)
++{
++	struct page_beancounter *pb;
++
++	while (num > 0) {
++		if (pb_alloc(&pb))
++			return -1;
++		pb->next_hash = *head;
++		*head = pb;
++		num--;
++	}
++
++	return num;
++}
++
++/* 
++ * Ensure that the list contains at least num elements.
++ * p_pb points to an initialized list, may be of the zero length. 
++ *
++ * mm->page_table_lock should be held
++ */
++int pb_alloc_list(struct page_beancounter **p_pb, int num)
++{
++	struct page_beancounter *list;
++
++	for (list = *p_pb; list != NULL && num; list = list->next_hash, num--);
++	if (!num)
++		return 0;
++
++	/*
++	 *  *p_pb(after)       *p_pb (before)
++	 *     \                  \
++	 *     <new objs> -...-> <old objs> -> ...
++	 */
++	if (__alloc_list(p_pb, num) < 0)
++		goto nomem;
++	return 0;
++
++nomem:
++	pb_free_list(p_pb);
++	return -ENOMEM;
++}
++
++/*
++ * Allocates a page_beancounter for each
++ * user_beancounter in a hash
++ */
++int pb_alloc_all(struct page_beancounter **pbs)
++{
++	int i, need_alloc;
++	unsigned long flags;
++	struct user_beancounter *ub;
++
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	need_alloc = 0;
++	for_each_beancounter(i, ub)
++		need_alloc++;
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++	if (!__alloc_list(pbs, need_alloc))
++		return 0;
++
++	pb_free_list(pbs);
++	return -ENOMEM;
++}
++
++/*
++ * Hash routines
++ */
++
++static inline int pb_hash(struct user_beancounter *ub, struct page *page)
++{
++	return (page_to_pfn(page) + (ub->ub_uid << 10)) & pb_hash_mask;
++}
++
++/* pb_lock should be held */
++static inline void insert_pb(struct page_beancounter *p, struct page *page,
++		struct user_beancounter *ub, int hash)
++{
++	p->page = page;
++	p->ub = get_beancounter(ub);
++	p->next_hash = pb_hash_table[hash];
++	pb_hash_table[hash] = p;
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	ub->ub_stat[smp_processor_id()].pbcs++;
++#endif
++}
++
++/*
++ * Heart
++ */
++
++static int __pb_dup_ref(struct page *page, struct user_beancounter *bc,
++		int hash)
++{
++	struct page_beancounter *p;
++
++	for (p = pb_hash_table[hash];
++			p != NULL && (p->page != page || p->ub != bc);
++			p = p->next_hash);
++	if (p == NULL)
++		return -1;
++
++	PB_COUNT_INC(p->refcount);
++	return 0;
++}
++
++static void __pb_add_ref(struct page *page, struct user_beancounter *bc,
++		struct page_beancounter **ppb, int hash)
++{
++	struct page_beancounter *head, *p;
++	int shift;
++
++	p = *ppb;
++	*ppb = p->next_hash;
++
++	insert_pb(p, page, bc, hash);
++	head = page_pbc(page);
++
++	if (head != NULL) {
++		/* 
++		 * Move the first element to the end of the list.
++		 * List head (pb_head) is set to the next entry.
++		 * Note that this code works even if head is the only element
++		 * on the list (because it's cyclic). 
++		 */
++		BUG_ON(head->pb_magic != PB_MAGIC);
++		page_pbc(page) = next_page_pb(head);
++		PB_SHIFT_INC(head->refcount);
++		shift = PB_SHIFT_GET(head->refcount);
++		/* 
++		 * Update user beancounter, the share of head has been changed.
++		 * Note that the shift counter is taken after increment. 
++		 */
++		dec_held_pages(head->ub, UB_PAGE_WEIGHT >> shift);
++		/* add the new page beancounter to the end of the list */
++		list_add_tail(&p->page_list, &page_pbc(page)->page_list);
++	} else {
++		page_pbc(page) = p;
++		shift = 0;
++		INIT_LIST_HEAD(&p->page_list);
++	}
++
++	p->refcount = PB_REFCOUNT_MAKE(shift, 1);
++	/* update user beancounter for the new page beancounter */
++	inc_held_pages(bc, UB_PAGE_WEIGHT >> shift);
++}
++
++void pb_add_ref(struct page *page, struct mm_struct *mm,
++		struct page_beancounter **p_pb)
++{
++	int hash;
++	struct user_beancounter *bc;
++
++	bc = mm->mm_ub;
++	if (bc == NULL)
++		return;
++
++	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
++		return;
++
++	hash = pb_hash(bc, page);
++
++	spin_lock(&pb_lock);
++	if (__pb_dup_ref(page, bc, hash))
++		__pb_add_ref(page, bc, p_pb, hash);
++	spin_unlock(&pb_lock);
++}
++
++void pb_dup_ref(struct page *page, struct mm_struct *mm,
++		struct page_beancounter **p_pb)
++{
++	int hash;
++	struct user_beancounter *bc;
++
++	bc = mm->mm_ub;
++	if (bc == NULL)
++		return;
++
++	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
++		return;
++
++	hash = pb_hash(bc, page);
++
++	spin_lock(&pb_lock);
++	if (page_pbc(page) == NULL)
++		/*
++		 * pages like ZERO_PAGE must not be accounted in pbc
++		 * so on fork we just skip them
++		 */
++		goto out_unlock;
++
++	if (unlikely(*p_pb != PBC_COPY_SAME))
++		__pb_add_ref(page, bc, p_pb, hash);
++	else if (unlikely(__pb_dup_ref(page, bc, hash)))
++		WARN_ON(1);
++out_unlock:
++	spin_unlock(&pb_lock);
++}
++
++void pb_remove_ref(struct page *page, struct mm_struct *mm)
++{
++	int hash;
++	struct user_beancounter *bc;
++	struct page_beancounter *p, **q;
++	int shift, shiftt;
++
++	bc = mm->mm_ub;
++	if (bc == NULL)
++		return;
++
++	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
++		return;
++
++	hash = pb_hash(bc, page);
++
++	spin_lock(&pb_lock);
++	BUG_ON(page_pbc(page) != NULL && page_pbc(page)->pb_magic != PB_MAGIC);
++	for (q = pb_hash_table + hash, p = *q;
++			p != NULL && (p->page != page || p->ub != bc);
++			q = &p->next_hash, p = *q);
++	if (p == NULL)
++		goto out_unlock;
++
++	PB_COUNT_DEC(p->refcount);
++	if (PB_COUNT_GET(p->refcount))
++		/* 
++		 * More references from the same user beancounter exist.
++		 * Nothing needs to be done. 
++		 */
++		goto out_unlock;
++
++	/* remove from the hash list */
++	*q = p->next_hash;
++
++	shift = PB_SHIFT_GET(p->refcount);
++
++	dec_held_pages(p->ub, UB_PAGE_WEIGHT >> shift);
++
++	if (page_pbc(page) == p) {
++		if (list_empty(&p->page_list))
++			goto out_free;
++		page_pbc(page) = next_page_pb(p);
++	}
++	list_del(&p->page_list);
++	put_beancounter(p->ub);
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	p->ub->ub_stat[smp_processor_id()].pbcs--;
++#endif
++	pb_free(&p);
++
++	/* Now balance the list.  Move the tail and adjust its shift counter. */
++	p = prev_page_pb(page_pbc(page));
++	shiftt = PB_SHIFT_GET(p->refcount);
++	page_pbc(page) = p;
++	PB_SHIFT_DEC(p->refcount);
++
++	inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
++
++	/* 
++	 * If the shift counter of the moved beancounter is different from the
++	 * removed one's, repeat the procedure for one more tail beancounter 
++	 */
++	if (shiftt > shift) {
++		p = prev_page_pb(page_pbc(page));
++		page_pbc(page) = p;
++		PB_SHIFT_DEC(p->refcount);
++		inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
++	}
++	spin_unlock(&pb_lock);
++	return;
++
++out_free:
++	page_pbc(page) = NULL;
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	p->ub->ub_stat[smp_processor_id()].pbcs--;
++#endif
++	put_beancounter(p->ub);
++	pb_free(&p);
++out_unlock:
++	spin_unlock(&pb_lock);
++	return;
++}
++
++struct user_beancounter *pb_grab_page_ub(struct page *page)
++{
++	struct page_beancounter *pb;
++	struct user_beancounter *ub;
++
++	spin_lock(&pb_lock);
++	pb = page_pbc(page);
++	ub = (pb == NULL ? ERR_PTR(-EINVAL) :
++			get_beancounter(pb->ub));
++	spin_unlock(&pb_lock);
++	return ub;
++}
++
++void __init ub_init_pbc(void)
++{
++	unsigned long hash_size;
++
++	pb_cachep = kmem_cache_create("page_beancounter", 
++			sizeof(struct page_beancounter), 0,
++			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
++	hash_size = num_physpages >> 2;
++	for (pb_hash_mask = 1;
++		(hash_size & pb_hash_mask) != hash_size;
++		pb_hash_mask = (pb_hash_mask << 1) + 1);
++	hash_size = pb_hash_mask + 1;
++	printk(KERN_INFO "Page beancounter hash is %lu entries.\n", hash_size);
++	pb_hash_table = vmalloc(hash_size * sizeof(struct page_beancounter *));
++	memset(pb_hash_table, 0, hash_size * sizeof(struct page_beancounter *));
++}
+diff -upr linux-2.6.16.orig/kernel/ub/ub_pages.c linux-2.6.16-026test015/kernel/ub/ub_pages.c
+--- linux-2.6.16.orig/kernel/ub/ub_pages.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/ub_pages.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,530 @@
++/*
++ *  kernel/ub/ub_pages.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/mm.h>
++#include <linux/highmem.h>
++#include <linux/virtinfo.h>
++#include <linux/module.h>
++#include <linux/shmem_fs.h>
++#include <linux/vmalloc.h>
++
++#include <asm/pgtable.h>
++#include <asm/page.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
++void warn_bad_rss(struct vm_area_struct *vma, unsigned long freed)
++{
++	static struct ub_rate_info ri = {
++		.burst = 10,
++		.interval = 40 * HZ,
++	};
++	struct user_beancounter *ub;
++	char ubuid[64] = "No UB";
++	unsigned long vmrss;
++
++	if (!ub_ratelimit(&ri))
++		return;
++
++	ub = vma->vm_mm->mm_ub;
++	if (ub)
++		print_ub_uid(ub, ubuid, sizeof(ubuid));
++
++	vmrss = get_vma_rss(vma) + freed;
++	printk(KERN_WARNING
++			"%s vm_rss: process pid %d comm %.20s flags %lx\n"
++			"vma %p/%p rss %lu/%lu freed %lu\n"
++			"flags %lx, ub %s\n",
++			vmrss > freed ? "Positive" : "Negative",
++			current->pid, current->comm, current->flags,
++			vma, vma->vm_mm, vmrss, vma_pages(vma), freed,
++			vma->vm_flags, ubuid);
++	dump_stack();
++}
++
++static inline unsigned long pages_in_pte_range(struct vm_area_struct *vma,
++		pmd_t *pmd, unsigned long addr, unsigned long end,
++		unsigned long *ret)
++{
++	pte_t *pte;
++	spinlock_t *ptl;
++
++	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
++	do {
++		if (!pte_none(*pte) && pte_present(*pte))
++			(*ret)++;
++	} while (pte++, addr += PAGE_SIZE, (addr != end));
++	pte_unmap_unlock(pte - 1, ptl);
++
++	return addr;
++}
++
++static inline unsigned long pages_in_pmd_range(struct vm_area_struct *vma,
++		pud_t *pud, unsigned long addr, unsigned long end,
++		unsigned long *ret)
++{
++	pmd_t *pmd;
++	unsigned long next;
++
++	pmd = pmd_offset(pud, addr);
++	do {
++		next = pmd_addr_end(addr, end);
++		if (pmd_none_or_clear_bad(pmd))
++			continue;
++		next = pages_in_pte_range(vma, pmd, addr, next, ret);
++	} while (pmd++, addr = next, (addr != end));
++
++	return addr;
++}
++
++static inline unsigned long pages_in_pud_range(struct vm_area_struct *vma,
++		pgd_t *pgd, unsigned long addr, unsigned long end,
++		unsigned long *ret)
++{
++	pud_t *pud;
++	unsigned long next;
++
++	pud = pud_offset(pgd, addr);
++	do {
++		next = pud_addr_end(addr, end);
++		if (pud_none_or_clear_bad(pud))
++			continue;
++		next = pages_in_pmd_range(vma, pud, addr, next, ret);
++	} while (pud++, addr = next, (addr != end));
++
++	return addr;
++}
++
++unsigned long pages_in_vma_range(struct vm_area_struct *vma,
++		unsigned long addr, unsigned long end)
++{
++	pgd_t *pgd;
++	unsigned long next;
++	unsigned long ret;
++
++	ret = 0;
++	BUG_ON(addr >= end);
++	pgd = pgd_offset(vma->vm_mm, addr);
++	do {
++		next = pgd_addr_end(addr, end);
++		if (pgd_none_or_clear_bad(pgd))
++			continue;
++		next = pages_in_pud_range(vma, pgd, addr, next, &ret);
++	} while (pgd++, addr = next, (addr != end));
++	return ret;
++}
++
++void fastcall __ub_update_physpages(struct user_beancounter *ub)
++{
++	ub->ub_parms[UB_PHYSPAGES].held = ub->ub_tmpfs_respages
++		+ (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT);
++	ub_adjust_maxheld(ub, UB_PHYSPAGES);
++}
++
++void fastcall __ub_update_oomguarpages(struct user_beancounter *ub)
++{
++	ub->ub_parms[UB_OOMGUARPAGES].held =
++		ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
++	ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
++}
++
++void fastcall __ub_update_privvm(struct user_beancounter *ub)
++{
++	ub->ub_parms[UB_PRIVVMPAGES].held =
++		(ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT)
++		+ ub->ub_unused_privvmpages
++		+ ub->ub_parms[UB_SHMPAGES].held;
++	ub_adjust_maxheld(ub, UB_PRIVVMPAGES);
++}
++
++static inline int __charge_privvm_locked(struct user_beancounter *ub, 
++		unsigned long s, enum severity strict)
++{
++	if (__charge_beancounter_locked(ub, UB_PRIVVMPAGES, s, strict) < 0)
++		return -ENOMEM;
++
++	ub->ub_unused_privvmpages += s;
++	return 0;
++}
++
++static void __unused_privvm_dec_locked(struct user_beancounter *ub, 
++		long size)
++{
++	/* catch possible overflow */
++	if (ub->ub_unused_privvmpages < size) {
++		uncharge_warn(ub, UB_UNUSEDPRIVVM,
++				size, ub->ub_unused_privvmpages);
++		size = ub->ub_unused_privvmpages;
++	}
++	ub->ub_unused_privvmpages -= size;
++	__ub_update_privvm(ub);
++}
++
++void __ub_unused_privvm_dec(struct mm_struct *mm, long size)
++{
++	unsigned long flags;
++	struct user_beancounter *ub;
++
++	ub = mm->mm_ub;
++	if (ub == NULL)
++		return;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__unused_privvm_dec_locked(ub, size);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_unused_privvm_sub(struct mm_struct *mm,
++		struct vm_area_struct *vma, unsigned long count)
++{
++	if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
++		__ub_unused_privvm_dec(mm, count);
++}
++
++void ub_unused_privvm_add(struct mm_struct *mm,
++		struct vm_area_struct *vma, unsigned long size)
++{
++	unsigned long flags;
++	struct user_beancounter *ub;
++
++	ub = mm->mm_ub;
++	if (ub == NULL || !VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
++		return;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_unused_privvmpages += size;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_protected_charge(struct mm_struct *mm, unsigned long size,
++		unsigned long newflags, struct vm_area_struct *vma)
++{
++	unsigned long flags;
++	struct file *file;
++	struct user_beancounter *ub;
++
++	ub = mm->mm_ub;
++	if (ub == NULL)
++		return PRIVVM_NO_CHARGE;
++
++	flags = vma->vm_flags;
++	if (!((newflags ^ flags) & VM_WRITE))
++		return PRIVVM_NO_CHARGE;
++
++	file = vma->vm_file;
++	if (!VM_UB_PRIVATE(newflags | VM_WRITE, file))
++		return PRIVVM_NO_CHARGE;
++
++	if (flags & VM_WRITE)
++		return PRIVVM_TO_SHARED;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (__charge_privvm_locked(ub, size, UB_SOFT) < 0)
++		goto err;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return PRIVVM_TO_PRIVATE;
++
++err:
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return PRIVVM_ERROR;
++}
++
++int ub_memory_charge(struct mm_struct *mm, unsigned long size,
++		unsigned vm_flags, struct file *vm_file, int sv)
++{
++	struct user_beancounter *ub, *ubl;
++	unsigned long flags;
++
++	ub = mm->mm_ub;
++	if (ub == NULL)
++		return 0;
++
++	size >>= PAGE_SHIFT;
++	if (size > UB_MAXVALUE)
++		return -EINVAL;
++
++	BUG_ON(sv != UB_SOFT && sv != UB_HARD);
++
++	if (vm_flags & VM_LOCKED) {
++		if (charge_beancounter(ub, UB_LOCKEDPAGES, size, sv))
++			goto out_err;
++	}
++	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
++		for (ubl = ub; ubl->parent != NULL; ubl = ubl->parent);
++		spin_lock_irqsave(&ubl->ub_lock, flags);
++		if (__charge_privvm_locked(ubl, size, sv))
++			goto out_private;
++		spin_unlock_irqrestore(&ubl->ub_lock, flags);
++	}
++	return 0;
++
++out_private:
++	spin_unlock_irqrestore(&ubl->ub_lock, flags);
++	if (vm_flags & VM_LOCKED)
++		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
++out_err:
++	return -ENOMEM;
++}
++
++void ub_memory_uncharge(struct mm_struct *mm, unsigned long size,
++		unsigned vm_flags, struct file *vm_file)
++{
++	struct user_beancounter *ub;
++	unsigned long flags;
++
++	ub = mm->mm_ub;
++	if (ub == NULL)
++		return;
++
++	size >>= PAGE_SHIFT;
++
++	if (vm_flags & VM_LOCKED)
++		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
++	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
++		for (; ub->parent != NULL; ub = ub->parent);
++		spin_lock_irqsave(&ub->ub_lock, flags);
++		__unused_privvm_dec_locked(ub, size);
++		spin_unlock_irqrestore(&ub->ub_lock, flags);
++	}
++}
++
++int ub_locked_charge(struct mm_struct *mm, unsigned long size)
++{
++	struct user_beancounter *ub;
++
++	ub = mm->mm_ub;
++	if (ub == NULL)
++		return 0;
++
++	return charge_beancounter(ub, UB_LOCKEDPAGES,
++			size >> PAGE_SHIFT, UB_HARD);
++}
++
++void ub_locked_uncharge(struct mm_struct *mm, unsigned long size)
++{
++	struct user_beancounter *ub;
++
++	ub = mm->mm_ub;
++	if (ub == NULL)
++		return;
++
++	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
++}
++
++int ub_lockedshm_charge(struct shmem_inode_info *shi, unsigned long size)
++{
++	struct user_beancounter *ub;
++
++	ub = shi->shmi_ub;
++	if (ub == NULL)
++		return 0;
++
++	return charge_beancounter(ub, UB_LOCKEDPAGES,
++			size >> PAGE_SHIFT, UB_HARD);
++}
++
++void ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)
++{
++	struct user_beancounter *ub;
++
++	ub = shi->shmi_ub;
++	if (ub == NULL)
++		return;
++
++	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
++}
++
++
++static inline void do_ub_tmpfs_respages_inc(struct user_beancounter *ub)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_tmpfs_respages++;
++	__ub_update_physpages(ub);
++	__ub_update_oomguarpages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_tmpfs_respages_inc(struct shmem_inode_info *shi)
++{
++	struct user_beancounter *ub;
++
++	for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
++		do_ub_tmpfs_respages_inc(ub);
++}
++
++static inline void do_ub_tmpfs_respages_sub(struct user_beancounter *ub,
++		unsigned long size)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	/* catch possible overflow */
++	if (ub->ub_tmpfs_respages < size) {
++		uncharge_warn(ub, UB_TMPFSPAGES,
++				size, ub->ub_tmpfs_respages);
++		size = ub->ub_tmpfs_respages;
++	}
++	ub->ub_tmpfs_respages -= size;
++	/* update values what is the most interesting */
++	__ub_update_physpages(ub);
++	__ub_update_oomguarpages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
++		unsigned long size)
++{
++	struct user_beancounter *ub;
++
++	for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
++		do_ub_tmpfs_respages_sub(ub, size);
++}
++
++int ub_shmpages_charge(struct shmem_inode_info *shi, unsigned long size)
++{
++	int ret;
++	unsigned long flags;
++	struct user_beancounter *ub;
++
++	ub = shi->shmi_ub;
++	if (ub == NULL)
++		return 0;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ret = __charge_beancounter_locked(ub, UB_SHMPAGES, size, UB_HARD);
++	if (ret == 0)
++		__ub_update_privvm(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return ret;
++}
++
++void ub_shmpages_uncharge(struct shmem_inode_info *shi, unsigned long size)
++{
++	unsigned long flags;
++	struct user_beancounter *ub;
++
++	ub = shi->shmi_ub;
++	if (ub == NULL)
++		return;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_beancounter_locked(ub, UB_SHMPAGES, size);
++	__ub_update_privvm(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++static inline void do_ub_swapentry_inc(struct user_beancounter *ub)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_swap_pages++;
++	__ub_update_oomguarpages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_swapentry_inc(struct swap_info_struct *si, pgoff_t num,
++		struct user_beancounter *ub)
++{
++	si->swap_ubs[num] = get_beancounter(ub);
++	for (; ub != NULL; ub = ub->parent)
++		do_ub_swapentry_inc(ub);
++}
++EXPORT_SYMBOL(ub_swapentry_inc);
++
++static inline void do_ub_swapentry_dec(struct user_beancounter *ub)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (ub->ub_swap_pages <= 0)
++		uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
++	else
++		ub->ub_swap_pages--;
++	__ub_update_oomguarpages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_swapentry_dec(struct swap_info_struct *si, pgoff_t num)
++{
++	struct user_beancounter *ub, *ubp;
++
++	ub = si->swap_ubs[num];
++	si->swap_ubs[num] = NULL;
++	for (ubp = ub; ubp != NULL; ubp = ubp->parent)
++		do_ub_swapentry_dec(ubp);
++	put_beancounter(ub);
++}
++EXPORT_SYMBOL(ub_swapentry_dec);
++
++int ub_swap_init(struct swap_info_struct *si, pgoff_t num)
++{
++	struct user_beancounter **ubs;
++
++	ubs = vmalloc(num * sizeof(struct user_beancounter *));
++	if (ubs == NULL)
++		return -ENOMEM;
++
++	memset(ubs, 0, num * sizeof(struct user_beancounter *));
++	si->swap_ubs = ubs;
++	return 0;
++}
++
++void ub_swap_fini(struct swap_info_struct *si)
++{
++	if (si->swap_ubs) {
++		vfree(si->swap_ubs);
++		si->swap_ubs = NULL;
++	}
++}
++#endif
++
++static int vmguar_enough_memory(struct vnotifier_block *self,
++		unsigned long event, void *arg, int old_ret)
++{
++	struct user_beancounter *ub;
++
++	if (event != VIRTINFO_ENOUGHMEM)
++		return old_ret;
++
++	for (ub = current->mm->mm_ub; ub->parent != NULL; ub = ub->parent);
++	if (ub->ub_parms[UB_PRIVVMPAGES].held >
++			ub->ub_parms[UB_VMGUARPAGES].barrier)
++		return old_ret;
++
++	return NOTIFY_OK;
++}
++
++static struct vnotifier_block vmguar_notifier_block = {
++	.notifier_call = vmguar_enough_memory
++};
++
++static int __init init_vmguar_notifier(void)
++{
++	virtinfo_notifier_register(VITYPE_GENERAL, &vmguar_notifier_block);
++	return 0;
++}
++
++static void __exit fini_vmguar_notifier(void)
++{
++	virtinfo_notifier_unregister(VITYPE_GENERAL, &vmguar_notifier_block);
++}
++
++module_init(init_vmguar_notifier);
++module_exit(fini_vmguar_notifier);
+diff -upr linux-2.6.16.orig/kernel/ub/ub_proc.c linux-2.6.16-026test015/kernel/ub/ub_proc.c
+--- linux-2.6.16.orig/kernel/ub/ub_proc.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/ub_proc.c	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,389 @@
++/*
++ *  linux/fs/proc/proc_ub.c
++ *
++ *  Copyright (C)  1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
++ *  Copyright (C)  2005       SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ *
++ * Changes:
++ */
++
++#include <linux/errno.h>
++#include <linux/sched.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/proc_fs.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_debug.h>
++#include <ub/ub_page.h>
++
++#include <asm/page.h>
++#include <asm/uaccess.h>
++
++/* 
++ * we have 8 format strings depending on:
++ * 1. BITS_PER_LONG
++ * 2. CONFIG_UBC_KEEP_UNUSED
++ * 3. resource number (see out_proc_beancounter)
++ */
++
++#ifdef CONFIG_UBC_KEEP_UNUSED
++#define REF_FORMAT	"%5.5s %4i: %-12s "
++#define UID_HEAD_STR	"uid ref"
++#else
++#define REF_FORMAT	"%10.10s: %-12s "
++#define UID_HEAD_STR	"uid"
++#endif
++#define REF2_FORMAT	"%10s  %-12s "
++
++#if BITS_PER_LONG == 32
++#define RES_FORMAT	"%10lu %10lu %10lu %10lu %10lu"
++#define HEAD_FORMAT	"%10s %10s %10s %10s %10s"
++#define UB_PROC_LINE_TEXT	(10+2+12+1+10+1+10+1+10+1+10+1+10)
++#else
++#define RES_FORMAT	"%20lu %20lu %20lu %20lu %20lu"
++#define HEAD_FORMAT	"%20s %20s %20s %20s %20s"
++#define UB_PROC_LINE_TEXT	(10+2+12+1+20+1+20+1+20+1+20+1+20)
++#endif
++
++#define UB_PROC_LINE_LEN	(UB_PROC_LINE_TEXT + 1)
++
++static void out_proc_version(char *buf)
++{
++	int len;
++
++	len = sprintf(buf, "Version: 2.5");
++	memset(buf + len, ' ', UB_PROC_LINE_TEXT - len);
++	buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static void out_proc_head(char *buf)
++{
++	sprintf(buf, REF2_FORMAT HEAD_FORMAT,
++			UID_HEAD_STR, "resource", "held", "maxheld", 
++			"barrier", "limit", "failcnt");
++	buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static void out_proc_beancounter(char *buf, struct user_beancounter *ub, int r)
++{
++	if (r == 0) {
++		char tmpbuf[64];
++		print_ub_uid(ub, tmpbuf, sizeof(tmpbuf));
++		sprintf(buf, REF_FORMAT RES_FORMAT, 
++			tmpbuf, 
++#ifdef CONFIG_UBC_KEEP_UNUSED
++			atomic_read(&ub->ub_refcount),
++#endif
++			ub_rnames[r], ub->ub_parms[r].held, 
++			ub->ub_parms[r].maxheld, ub->ub_parms[r].barrier, 
++			ub->ub_parms[r].limit, ub->ub_parms[r].failcnt);
++	} else
++		sprintf(buf, REF2_FORMAT RES_FORMAT, 
++			"", ub_rnames[r],
++			ub->ub_parms[r].held, ub->ub_parms[r].maxheld,
++			ub->ub_parms[r].barrier, ub->ub_parms[r].limit,
++			ub->ub_parms[r].failcnt);
++
++	buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static int ub_accessible(struct user_beancounter *ub,
++		struct user_beancounter *exec_ub,
++		struct file *file)
++{
++	struct user_beancounter *p, *q;
++
++	for (p = exec_ub; p->parent != NULL; p = p->parent);
++	for (q = ub; q->parent != NULL; q = q->parent);
++	if (p != get_ub0() && q != p)
++		return 0;
++	if (ub->parent == NULL)
++		return 1;
++	return file->private_data == NULL ? 0 : 1;
++}
++
++static ssize_t ub_proc_read(struct file *file, char *usrbuf, size_t len,
++		loff_t *poff)
++{
++	ssize_t retval;
++	char *buf;
++	unsigned long flags;
++	int i, resource;
++	struct ub_hash_slot *slot;
++	struct user_beancounter *ub;
++	struct user_beancounter *exec_ub = get_exec_ub();
++	loff_t n, off;
++	int rem, produced, job, tocopy;
++	const int is_capable =
++		(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH));
++
++	retval = -ENOBUFS;
++	buf = (char *)__get_free_page(GFP_KERNEL);
++	if (buf == NULL)
++		goto out;
++
++	retval = 0;
++	if (!is_capable)
++		goto out_free;
++
++	off = *poff;
++	if (off < 0) /* can't happen, just in case */
++		goto inval;
++
++again:
++	i = 0;
++	slot = ub_hash;
++	n = off; /* The amount of data tp skip */
++	produced = 0;
++	if (n < (UB_PROC_LINE_LEN * 2)) {
++		if (n < UB_PROC_LINE_LEN) {
++			out_proc_version(buf);
++			produced += UB_PROC_LINE_LEN;
++			n += UB_PROC_LINE_LEN;
++		}
++		out_proc_head(buf + produced);
++		produced += UB_PROC_LINE_LEN;
++		n += UB_PROC_LINE_LEN;
++	}
++	n -= (2 * UB_PROC_LINE_LEN);
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	while (1) {
++		for (ub = slot->ubh_beans; 
++		     ub != NULL && n >= (UB_RESOURCES * UB_PROC_LINE_LEN);
++		     ub = ub->ub_next)
++			if (is_capable && ub_accessible(ub, exec_ub, file))
++				n -= (UB_RESOURCES * UB_PROC_LINE_LEN);
++		if (ub != NULL || ++i >= UB_HASH_SIZE)
++			break;
++		++slot;
++	}
++	rem = n; /* the amount of the data in the buffer to skip */
++	job = PAGE_SIZE - UB_PROC_LINE_LEN + 1; /* end of buffer data */
++	if (len < job - rem)
++		job = rem + len;
++	while (ub != NULL && produced < job) {
++		if (is_capable && ub_accessible(ub, exec_ub, file))
++			for (resource = 0;
++				produced < job && resource < UB_RESOURCES;
++				resource++, produced += UB_PROC_LINE_LEN)
++			{
++				out_proc_beancounter(buf + produced,
++						ub, resource);
++			}
++		if (produced >= job)
++			break;
++		/* Find the next beancounter to produce more data. */
++		ub = ub->ub_next;
++		while (ub == NULL && ++i < UB_HASH_SIZE) {
++			++slot;
++			ub = slot->ubh_beans;
++		}
++	}
++
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++	ub_debug(UBD_ALLOC, KERN_DEBUG "UB_PROC: produced %d, job %d, rem %d\n",
++			produced, job, rem);
++
++	/* 
++	 * Temporary buffer `buf' contains `produced' bytes.
++	 * Extract no more than `len' bytes at offset `rem'.
++	 */
++	if (produced <= rem)
++		goto out_free;
++	tocopy = produced - rem;
++	if (len < tocopy)
++		tocopy = len;
++	if (!tocopy)
++		goto out_free;
++	if (copy_to_user(usrbuf, buf + rem, tocopy))
++		goto fault;
++	off += tocopy; /* can't overflow */
++	*poff = off;
++	len -= tocopy;
++	retval += tocopy;
++	if (!len)
++		goto out_free;
++	usrbuf += tocopy;
++	goto again;
++
++fault:
++	retval = -EFAULT;
++out_free:
++	free_page((unsigned long)buf);
++out:
++	return retval;
++
++inval:
++	retval = -EINVAL;
++	goto out_free;
++}
++
++static int ub_proc_open(struct inode *inode, struct file *file)
++{
++	file->private_data = strcmp(file->f_dentry->d_name.name,
++						"user_beancounters") ?
++						(void *)-1 : NULL;
++	return 0;
++}
++
++static struct file_operations ub_file_operations = {
++	.read = &ub_proc_read,
++	.open = &ub_proc_open
++};
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++#include <linux/seq_file.h>
++#include <linux/kmem_cache.h>
++
++static void *ubd_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t n = *pos;
++	struct user_beancounter *ub;
++	long slot;
++
++	spin_lock_irq(&ub_hash_lock);
++	for (slot = 0; slot < UB_HASH_SIZE; slot++)
++		for (ub = ub_hash[slot].ubh_beans; ub; ub = ub->ub_next) {
++			if (n == 0) {
++				m->private = (void *)slot;
++				return (void *)ub;
++			}
++			n--;
++		}
++	return NULL;
++}
++
++static void *ubd_next(struct seq_file *m, void *p, loff_t *pos)
++{
++	struct user_beancounter *ub;
++	long slot;
++
++	ub = (struct user_beancounter *)p;
++	slot = (long)m->private;
++
++	++*pos;
++	ub = ub->ub_next;
++	while (1) {
++		for (; ub; ub = ub->ub_next) {
++			m->private = (void *)slot;
++			return (void *)ub;
++		}
++		slot++;
++		if (slot == UB_HASH_SIZE)
++			break;
++		ub = ub_hash[slot].ubh_beans;
++	}
++	return NULL;
++}
++
++static void ubd_stop(struct seq_file *m, void *p)
++{
++	spin_unlock_irq(&ub_hash_lock);
++}
++
++#define PROC_LINE_FMT	"\t%-17s\t%5lu\t%5lu\n"
++
++static int ubd_show(struct seq_file *m, void *p)
++{
++	struct user_beancounter *ub;
++	struct ub_cache_counter *cc;
++	long pages, vmpages, pbc, swap, unmap;
++	int i;
++	char id[64];
++
++	ub = (struct user_beancounter *)p;
++	print_ub_uid(ub, id, sizeof(id));
++	seq_printf(m, "%s:%d\n", id, atomic_read(&ub->ub_refcount));
++
++	pages = vmpages = pbc = swap = unmap = 0;
++	for (i = 0; i < NR_CPUS; i++) {
++		pages += ub->ub_stat[i].pages_charged;
++		vmpages += ub->ub_stat[i].vmalloc_charged;
++		pbc += ub->ub_stat[i].pbcs;
++		swap += ub->ub_stat[i].swapin;
++		unmap += ub->ub_stat[i].unmap;
++	}
++	if (pages < 0)
++		pages = 0;
++	if (vmpages < 0)
++		vmpages = 0;
++	seq_printf(m, PROC_LINE_FMT, "pages", pages, PAGE_SIZE);
++	seq_printf(m, PROC_LINE_FMT, "vmalloced", vmpages, PAGE_SIZE);
++
++	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_UNUSEDPRIVVM],
++			ub->ub_unused_privvmpages, PAGE_SIZE);
++	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_TMPFSPAGES],
++			ub->ub_tmpfs_respages, PAGE_SIZE);
++	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_SWAPPAGES],
++			ub->ub_swap_pages, PAGE_SIZE);
++	seq_printf(m, PROC_LINE_FMT, "pbcs", pbc,
++			(unsigned long)sizeof(struct page_beancounter));
++
++	seq_printf(m, PROC_LINE_FMT, "swapin", swap, 0UL);
++	seq_printf(m, PROC_LINE_FMT, "unmap", unmap, 0UL);
++	/* interrupts are disabled by locking ub_hash_lock */
++	spin_lock(&cc_lock);
++	list_for_each_entry (cc, &ub->ub_cclist, ulist) {
++		kmem_cache_t *cachep;
++
++		cachep = cc->cachep;
++		seq_printf(m, PROC_LINE_FMT,
++				cachep->name,
++				cc->counter,
++				(unsigned long)cachep->objuse);
++	}
++	spin_unlock(&cc_lock);
++	return 0;
++}
++
++static struct seq_operations kmemdebug_op = {
++	.start	= ubd_start,
++	.next	= ubd_next,
++	.stop	= ubd_stop,
++	.show	= ubd_show,
++};
++
++static int kmem_debug_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &kmemdebug_op);
++}
++
++static struct file_operations kmem_debug_ops = {
++	.open		= kmem_debug_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= seq_release,
++};
++#endif
++
++void __init ub_init_proc(void)
++{
++	struct proc_dir_entry *entry;
++
++	entry = create_proc_entry("user_beancounters", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &ub_file_operations;
++	else
++		panic("Can't create /proc/user_beancounters entry!\n");
++
++	entry = create_proc_entry("user_beancounters_sub", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &ub_file_operations;
++	else
++		panic("Can't create /proc/user_beancounters2 entry!\n");
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	entry = create_proc_entry("user_beancounters_debug", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &kmem_debug_ops;
++	else
++		panic("Can't create /proc/user_beancounters_debug entry!\n");
++#endif
++}
+diff -upr linux-2.6.16.orig/kernel/ub/ub_stat.c linux-2.6.16-026test015/kernel/ub/ub_stat.c
+--- linux-2.6.16.orig/kernel/ub/ub_stat.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/ub_stat.c	2006-07-04 14:41:37.000000000 +0400
+@@ -0,0 +1,465 @@
++/*
++ *  kernel/ub/ub_stat.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/timer.h>
++#include <linux/sched.h>
++#include <linux/init.h>
++#include <linux/jiffies.h>
++#include <linux/list.h>
++#include <linux/errno.h>
++#include <linux/suspend.h>
++
++#include <asm/uaccess.h>
++#include <asm/param.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_stat.h>
++
++static spinlock_t ubs_notify_lock = SPIN_LOCK_UNLOCKED;
++static LIST_HEAD(ubs_notify_list);
++static long ubs_min_interval;
++static ubstattime_t ubs_start_time, ubs_end_time;
++static struct timer_list ubs_timer;
++
++static int ubstat_get_list(void *buf, long size)
++{
++	int retval;
++	unsigned long flags;
++	int slotnr;
++	struct ub_hash_slot *slot;
++	struct user_beancounter *ub, *last_ub;
++	long *page, *ptr, *end;
++	int len;
++
++	page = (long *)__get_free_page(GFP_KERNEL);
++	if (page == NULL)
++		return -ENOMEM;
++
++	retval = 0;
++	slotnr = 0;
++	slot = ub_hash;
++	last_ub = NULL;
++	while (1) {
++		ptr = page;
++		end = page + PAGE_SIZE / sizeof(*ptr);
++
++		spin_lock_irqsave(&ub_hash_lock, flags);
++		if (last_ub == NULL)
++			ub = slot->ubh_beans;
++		else
++			ub = last_ub->ub_next;
++		while (1) {
++			for (; ub != NULL; ub = ub->ub_next) {
++				if (ub->parent != NULL)
++					continue;
++				*ptr++ = ub->ub_uid;
++				if (ptr == end)
++					break;
++			}
++			if (ptr == end)
++				break;
++			++slot;
++			if (++slotnr >= UB_HASH_SIZE)
++				break;
++			ub = slot->ubh_beans;
++		}
++		if (ptr == page)
++			goto out_unlock;
++		if (ub != NULL)
++			get_beancounter(ub);
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++		if (last_ub != NULL)
++			put_beancounter(last_ub);
++		last_ub = ub; /* last visited beancounter in the slot */
++
++		len = min_t(long, (ptr - page) * sizeof(*ptr), size);
++		if (copy_to_user(buf, page, len)) {
++			retval = -EFAULT;
++			break;
++		}
++		retval += len;
++		if (len < PAGE_SIZE)
++			break;
++		buf += len;
++		size -= len;
++	}
++out:
++	if (last_ub != NULL)
++		put_beancounter(last_ub);
++	free_page((unsigned long)page);
++	return retval;
++
++out_unlock:
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++	goto out;
++}
++
++static int ubstat_gettime(void *buf, long size)
++{
++	ubgettime_t data;
++	int retval;
++
++	spin_lock(&ubs_notify_lock);
++	data.start_time = ubs_start_time;
++	data.end_time = ubs_end_time;
++	data.cur_time = ubs_start_time + (jiffies - ubs_start_time * HZ) / HZ;
++	spin_unlock(&ubs_notify_lock);
++
++	retval = min_t(long, sizeof(data), size);
++	if (copy_to_user(buf, &data, retval))
++		retval = -EFAULT;
++	return retval;
++}
++
++static int ubstat_do_read_one(struct user_beancounter *ub, int res, void *kbuf)
++{
++	struct {
++		ubstattime_t	start_time;
++		ubstattime_t	end_time;
++		ubstatparm_t	param[1];
++	} *data;
++
++	data = kbuf;
++	data->start_time = ubs_start_time;
++	data->end_time = ubs_end_time;
++
++	data->param[0].maxheld = ub->ub_store[res].maxheld;
++	data->param[0].failcnt = ub->ub_store[res].failcnt;
++
++	return sizeof(*data);
++}
++
++static int ubstat_do_read_all(struct user_beancounter *ub, void *kbuf, int size)
++{
++	int wrote;
++	struct {
++		ubstattime_t	start_time;
++		ubstattime_t	end_time;
++		ubstatparm_t	param[UB_RESOURCES];
++	} *data;
++	int resource;
++
++	data = kbuf;
++	data->start_time = ubs_start_time;
++	data->end_time = ubs_end_time;
++	wrote = sizeof(data->start_time) + sizeof(data->end_time);
++
++	for (resource = 0; resource < UB_RESOURCES; resource++) {
++		if (size < wrote + sizeof(data->param[resource]))
++			break;
++		data->param[resource].maxheld = ub->ub_store[resource].maxheld;
++		data->param[resource].failcnt = ub->ub_store[resource].failcnt;
++		wrote += sizeof(data->param[resource]); 
++	}
++
++	return wrote;
++}
++
++static int ubstat_do_read_full(struct user_beancounter *ub, void *kbuf,
++		int size)
++{
++	int wrote;
++	struct {
++		ubstattime_t	start_time;
++		ubstattime_t	end_time;
++		ubstatparmf_t	param[UB_RESOURCES];
++	} *data;
++	int resource;
++
++	data = kbuf;
++	data->start_time = ubs_start_time;
++	data->end_time = ubs_end_time;
++	wrote = sizeof(data->start_time) + sizeof(data->end_time);
++
++	for (resource = 0; resource < UB_RESOURCES; resource++) {
++		if (size < wrote + sizeof(data->param[resource]))
++			break;
++		/* The beginning of ubstatparmf_t matches struct ubparm. */
++		memcpy(&data->param[resource], &ub->ub_store[resource],
++				sizeof(ub->ub_store[resource]));
++		data->param[resource].__unused1 = 0;
++		data->param[resource].__unused2 = 0;
++		wrote += sizeof(data->param[resource]);
++	}
++	return wrote;
++}
++
++static int ubstat_get_stat(struct user_beancounter *ub, long cmd,
++		void *buf, long size)
++{
++	void *kbuf;
++	int retval;
++
++	kbuf = (void *)__get_free_page(GFP_KERNEL);
++	if (kbuf == NULL)
++		return -ENOMEM;
++
++	spin_lock(&ubs_notify_lock);
++	switch (UBSTAT_CMD(cmd)) {
++		case UBSTAT_READ_ONE:
++			retval = -EINVAL;
++			if (UBSTAT_PARMID(cmd) >= UB_RESOURCES)
++				break;
++			retval = ubstat_do_read_one(ub,
++					UBSTAT_PARMID(cmd), kbuf);
++			break;
++		case UBSTAT_READ_ALL:
++			retval = ubstat_do_read_all(ub, kbuf, PAGE_SIZE);
++			break;
++		case UBSTAT_READ_FULL:
++			retval = ubstat_do_read_full(ub, kbuf, PAGE_SIZE);
++			break;
++		default:
++			retval = -EINVAL;
++	}
++	spin_unlock(&ubs_notify_lock);
++
++	if (retval > 0) {
++		retval = min_t(long, retval, size);
++		if (copy_to_user(buf, kbuf, retval))
++			retval = -EFAULT;
++	}
++
++	free_page((unsigned long)kbuf);
++	return retval;
++}
++
++static int ubstat_handle_notifrq(ubnotifrq_t *req)
++{
++	int retval;
++	struct ub_stat_notify *new_notify;
++	struct list_head *entry;
++	struct task_struct *tsk_to_free;
++
++	new_notify = kmalloc(sizeof(new_notify), GFP_KERNEL);
++	if (new_notify == NULL)
++		return -ENOMEM;
++
++	tsk_to_free = NULL;
++	INIT_LIST_HEAD(&new_notify->list);
++
++	spin_lock(&ubs_notify_lock);
++	list_for_each(entry, &ubs_notify_list) {
++		struct ub_stat_notify *notify;
++
++		notify = list_entry(entry, struct ub_stat_notify, list);
++		if (notify->task == current) {
++			kfree(new_notify);
++			new_notify = notify;
++			break;
++		}
++	}
++
++	retval = -EINVAL;
++	if (req->maxinterval < 1)
++		goto out_unlock;
++	if (req->maxinterval > TIME_MAX_SEC)
++		req->maxinterval = TIME_MAX_SEC;
++	if (req->maxinterval < ubs_min_interval) {
++		unsigned long dif;
++
++		ubs_min_interval = req->maxinterval;
++		dif = (ubs_timer.expires - jiffies + HZ - 1) / HZ;
++		if (dif > req->maxinterval)
++			mod_timer(&ubs_timer,
++					ubs_timer.expires -
++					(dif - req->maxinterval) * HZ);
++	}
++
++	if (entry != &ubs_notify_list) {
++		list_del(&new_notify->list);
++		tsk_to_free = new_notify->task;
++	}
++	if (req->signum) {
++		new_notify->task = current;
++		get_task_struct(new_notify->task);
++		new_notify->signum = req->signum;
++		list_add(&new_notify->list, &ubs_notify_list);
++	} else
++		kfree(new_notify);
++	retval = 0;
++out_unlock:
++	spin_unlock(&ubs_notify_lock);
++	if (tsk_to_free != NULL)
++		put_task_struct(tsk_to_free);
++	return retval;
++}
++
++/*
++ * former sys_ubstat
++ */
++long do_ubstat(int func, unsigned long arg1, unsigned long arg2, void *buf, 
++		long size)
++{
++	int retval;
++	struct user_beancounter *ub;
++
++	if (func == UBSTAT_UBPARMNUM)
++		return UB_RESOURCES;
++	if (func == UBSTAT_UBLIST)
++		return ubstat_get_list(buf, size);
++	if (!(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)))
++		return -EPERM;
++
++	if (func == UBSTAT_GETTIME) {
++		retval = ubstat_gettime(buf, size);
++		goto notify;
++	}
++
++	ub = get_exec_ub();
++	if (ub != NULL && ub->ub_uid == arg1)
++		get_beancounter(ub);
++	else /* FIXME must be if (ve_is_super) */
++		ub = get_beancounter_byuid(arg1, 0);
++
++	if (ub == NULL)
++		return -ESRCH;
++
++	retval = ubstat_get_stat(ub, func, buf, size);
++	put_beancounter(ub);
++notify:
++	/* Handle request for notification */
++	if (retval >= 0) {
++		ubnotifrq_t notifrq;
++		int err;
++
++		err = -EFAULT;
++		if (!copy_from_user(&notifrq, (void *)arg2, sizeof(notifrq)))
++			err = ubstat_handle_notifrq(&notifrq);
++		if (err)
++			retval = err;
++	}
++
++	return retval;
++}
++
++static void ubstat_save_onestat(struct user_beancounter *ub)
++{
++	int resource;
++
++	/* called with local irq disabled */
++	spin_lock(&ub->ub_lock);
++	for (resource = 0; resource < UB_RESOURCES; resource++) {
++		memcpy(&ub->ub_store[resource], &ub->ub_parms[resource],
++			sizeof(struct ubparm));
++		ub->ub_parms[resource].minheld = 
++			ub->ub_parms[resource].maxheld =
++			ub->ub_parms[resource].held;
++	}
++	spin_unlock(&ub->ub_lock);
++}
++
++static void ubstat_save_statistics(void)
++{
++	unsigned long flags;
++	int i;
++	struct user_beancounter *ub;
++
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	for_each_beancounter(i, ub)
++			ubstat_save_onestat(ub);
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++}
++
++static void ubstatd_timeout(unsigned long __data)
++{
++	struct task_struct *p;
++
++	p = (struct task_struct *) __data;
++	wake_up_process(p);
++}
++
++/*
++ * Safe wrapper for send_sig. It prevents a race with release_task
++ * for sighand.
++ * Should be called under tasklist_lock.
++ */
++static void task_send_sig(struct ub_stat_notify *notify)
++{
++	if (likely(notify->task->sighand != NULL))
++		send_sig(notify->signum, notify->task, 1);
++}
++
++static inline void do_notifies(void)
++{
++	LIST_HEAD(notif_free_list);
++	struct ub_stat_notify *notify;
++	struct ub_stat_notify *tmp;
++
++	spin_lock(&ubs_notify_lock);
++	ubs_start_time = ubs_end_time;
++	/*
++	 * the expression below relies on time being unsigned long and
++	 * arithmetic promotion rules
++	 */
++	ubs_end_time += (ubs_timer.expires - ubs_start_time * HZ) / HZ;
++	mod_timer(&ubs_timer, ubs_timer.expires + ubs_min_interval * HZ);
++	ubs_min_interval = TIME_MAX_SEC;
++	/* save statistics accumulated for the interval */
++	ubstat_save_statistics();
++	/* send signals */
++	read_lock(&tasklist_lock);
++	while (!list_empty(&ubs_notify_list)) {
++		notify = list_entry(ubs_notify_list.next,
++				struct ub_stat_notify, list);
++		task_send_sig(notify);
++		list_del(&notify->list);
++		list_add(&notify->list, &notif_free_list);
++	}
++	read_unlock(&tasklist_lock);
++	spin_unlock(&ubs_notify_lock);
++
++	list_for_each_entry_safe(notify, tmp, &notif_free_list, list) {
++		put_task_struct(notify->task);
++		kfree(notify);
++	}
++}
++
++/*
++ * Kernel thread
++ */
++static int ubstatd(void *unused)
++{
++	/* daemonize call will take care of signals */
++	daemonize("ubstatd");
++
++	ubs_timer.data = (unsigned long)current;
++	ubs_timer.function = ubstatd_timeout;
++	add_timer(&ubs_timer);
++
++	while (1) {
++		set_task_state(current, TASK_INTERRUPTIBLE);
++		if (time_after(ubs_timer.expires, jiffies)) {
++			schedule();
++			try_to_freeze();
++			continue;
++		}
++
++		__set_task_state(current, TASK_RUNNING);
++		do_notifies();
++	}
++	return 0;
++}
++
++static int __init ubstatd_init(void)
++{
++	init_timer(&ubs_timer);
++	ubs_timer.expires = TIME_MAX_JIF;
++	ubs_min_interval = TIME_MAX_SEC;
++	ubs_start_time = ubs_end_time = 0;
++
++	kernel_thread(ubstatd, NULL, 0);
++	return 0;
++}
++
++module_init(ubstatd_init);
+diff -upr linux-2.6.16.orig/kernel/ub/ub_sys.c linux-2.6.16-026test015/kernel/ub/ub_sys.c
+--- linux-2.6.16.orig/kernel/ub/ub_sys.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ub/ub_sys.c	2006-07-04 14:41:38.000000000 +0400
+@@ -0,0 +1,154 @@
++/*
++ *  kernel/ub/ub_sys.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <asm/uaccess.h>
++
++#include <ub/beancounter.h>
++
++#ifndef CONFIG_USER_RESOURCE
++asmlinkage long sys_getluid(void)
++{
++	return -ENOSYS;
++}
++
++asmlinkage long sys_setluid(uid_t uid)
++{
++	return -ENOSYS;
++}
++
++asmlinkage long sys_setublimit(uid_t uid, unsigned long resource, 
++		unsigned long *limits)
++{
++	return -ENOSYS;
++}
++
++asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2, 
++		void *buf, long size)
++{
++	return -ENOSYS;
++}
++#else /* CONFIG_USER_RESOURCE */
++
++/*
++ *	The (rather boring) getluid syscall
++ */
++asmlinkage long sys_getluid(void)
++{
++	struct user_beancounter *ub;
++
++	ub = get_exec_ub();
++	if (ub == NULL)
++		return -EINVAL;
++
++	return ub->ub_uid;
++}
++
++/*
++ *	The setluid syscall
++ */
++asmlinkage long sys_setluid(uid_t uid)
++{
++	struct user_beancounter *ub;
++	struct task_beancounter *task_bc;
++	int error;
++
++	task_bc = &current->task_bc;
++
++	/* You may not disown a setluid */
++	error = -EINVAL;
++	if (uid == (uid_t)-1)
++		goto out;
++
++	/* You may only set an ub as root */
++	error = -EPERM;
++	if (!capable(CAP_SETUID))
++		goto out;
++
++	/* Ok - set up a beancounter entry for this user */
++	error = -ENOBUFS;
++	ub = get_beancounter_byuid(uid, 1);
++	if (ub == NULL)
++		goto out;
++
++	ub_debug(UBD_ALLOC | UBD_LIMIT, "setluid, bean %p (count %d) "
++			"for %.20s pid %d\n",
++			ub, atomic_read(&ub->ub_refcount),
++			current->comm, current->pid);
++	/* install bc */
++	put_beancounter(task_bc->exec_ub);
++	task_bc->exec_ub = ub;
++	put_beancounter(task_bc->fork_sub);
++	task_bc->fork_sub = get_beancounter(ub);
++	error = 0;
++out:
++	return error;
++}
++
++/*
++ *	The setbeanlimit syscall
++ */
++asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
++		unsigned long *limits)
++{
++	int error;
++	unsigned long flags;
++	struct user_beancounter *ub;
++	unsigned long new_limits[2];
++
++	error = -EPERM;
++	if(!capable(CAP_SYS_RESOURCE))
++		goto out;
++
++	if (!ve_is_super(get_exec_env()))
++		goto out;
++
++	error = -EINVAL;
++	if (resource >= UB_RESOURCES)
++		goto out;
++
++	error = -EFAULT;
++	if (copy_from_user(&new_limits, limits, sizeof(new_limits)))
++		goto out;
++
++	error = -EINVAL;
++	if (new_limits[0] > UB_MAXVALUE || new_limits[1] > UB_MAXVALUE)
++		goto out;
++
++	error = -ENOENT;
++	ub = get_beancounter_byuid(uid, 0);
++	if (ub == NULL) {
++		ub_debug(UBD_LIMIT, "No login bc for uid %d\n", uid);
++		goto out;
++	}
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_parms[resource].barrier = new_limits[0];
++	ub->ub_parms[resource].limit = new_limits[1];
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	put_beancounter(ub);
++
++	error = 0;
++out:
++	return error;
++}
++
++extern long do_ubstat(int func, unsigned long arg1, unsigned long arg2, 
++		void *buf, long size);
++asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2, 
++		void *buf, long size)
++{
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
++	return do_ubstat(func, arg1, arg2, buf, size);
++}
++#endif
+diff -upr linux-2.6.16.orig/kernel/uid16.c linux-2.6.16-026test015/kernel/uid16.c
+--- linux-2.6.16.orig/kernel/uid16.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/uid16.c	2006-07-04 14:41:36.000000000 +0400
+@@ -20,43 +20,67 @@
+ 
+ asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group)
+ {
+-	return sys_chown(filename, low2highuid(user), low2highgid(group));
++	long ret = sys_chown(filename, low2highuid(user), low2highgid(group));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group)
+ {
+-	return sys_lchown(filename, low2highuid(user), low2highgid(group));
++	long ret = sys_lchown(filename, low2highuid(user), low2highgid(group));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
+ {
+-	return sys_fchown(fd, low2highuid(user), low2highgid(group));
++	long ret = sys_fchown(fd, low2highuid(user), low2highgid(group));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
+ {
+-	return sys_setregid(low2highgid(rgid), low2highgid(egid));
++	long ret = sys_setregid(low2highgid(rgid), low2highgid(egid));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_setgid16(old_gid_t gid)
+ {
+-	return sys_setgid(low2highgid(gid));
++	long ret = sys_setgid(low2highgid(gid));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
+ {
+-	return sys_setreuid(low2highuid(ruid), low2highuid(euid));
++	long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_setuid16(old_uid_t uid)
+ {
+-	return sys_setuid(low2highuid(uid));
++	long ret = sys_setuid(low2highuid(uid));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
+ {
+-	return sys_setresuid(low2highuid(ruid), low2highuid(euid),
+-		low2highuid(suid));
++	long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid),
++				 low2highuid(suid));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid)
+@@ -72,8 +96,11 @@ asmlinkage long sys_getresuid16(old_uid_
+ 
+ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
+ {
+-	return sys_setresgid(low2highgid(rgid), low2highgid(egid),
+-		low2highgid(sgid));
++	long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid),
++				 low2highgid(sgid));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid)
+@@ -89,12 +116,18 @@ asmlinkage long sys_getresgid16(old_gid_
+ 
+ asmlinkage long sys_setfsuid16(old_uid_t uid)
+ {
+-	return sys_setfsuid(low2highuid(uid));
++	long ret = sys_setfsuid(low2highuid(uid));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ asmlinkage long sys_setfsgid16(old_gid_t gid)
+ {
+-	return sys_setfsgid(low2highgid(gid));
++	long ret = sys_setfsgid(low2highgid(gid));
++	/* avoid REGPARM breakage on x86: */
++	prevent_tail_call(ret);
++	return ret;
+ }
+ 
+ static int groups16_to_user(old_gid_t __user *grouplist,
+diff -upr linux-2.6.16.orig/kernel/user.c linux-2.6.16-026test015/kernel/user.c
+--- linux-2.6.16.orig/kernel/user.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/kernel/user.c	2006-07-04 14:41:39.000000000 +0400
+@@ -14,6 +14,7 @@
+ #include <linux/bitops.h>
+ #include <linux/key.h>
+ #include <linux/interrupt.h>
++#include <linux/module.h>
+ 
+ /*
+  * UID task count cache, to get fast user lookup in "alloc_uid"
+@@ -24,7 +25,20 @@
+ #define UIDHASH_SZ		(1 << UIDHASH_BITS)
+ #define UIDHASH_MASK		(UIDHASH_SZ - 1)
+ #define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
+-#define uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
++#define __uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
++
++#ifdef CONFIG_VE
++#define UIDHASH_MASK_VE			(UIDHASH_SZ_VE - 1)
++#define __uidhashfn_ve(uid)		(((uid >> UIDHASH_BITS_VE) ^ uid) & \
++						UIDHASH_MASK_VE)
++#define __uidhashentry_ve(uid, envid)	((envid)->uidhash_table + \
++						__uidhashfn_ve(uid))
++#define uidhashentry_ve(uid)		(ve_is_super(get_exec_env()) ?	\
++						__uidhashentry(uid) :	\
++						__uidhashentry_ve(uid, get_exec_env()))
++#else
++#define uidhashentry_ve(uid)		__uidhashentry(uid)
++#endif
+ 
+ static kmem_cache_t *uid_cachep;
+ static struct list_head uidhash_table[UIDHASH_SZ];
+@@ -96,7 +110,7 @@ struct user_struct *find_user(uid_t uid)
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&uidhash_lock, flags);
+-	ret = uid_hash_find(uid, uidhashentry(uid));
++	ret = uid_hash_find(uid, uidhashentry_ve(uid));
+ 	spin_unlock_irqrestore(&uidhash_lock, flags);
+ 	return ret;
+ }
+@@ -115,10 +129,11 @@ void free_uid(struct user_struct *up)
+ 	}
+ 	local_irq_restore(flags);
+ }
++EXPORT_SYMBOL_GPL(free_uid);
+ 
+ struct user_struct * alloc_uid(uid_t uid)
+ {
+-	struct list_head *hashent = uidhashentry(uid);
++	struct list_head *hashent = uidhashentry_ve(uid);
+ 	struct user_struct *up;
+ 
+ 	spin_lock_irq(&uidhash_lock);
+@@ -168,6 +183,7 @@ struct user_struct * alloc_uid(uid_t uid
+ 	}
+ 	return up;
+ }
++EXPORT_SYMBOL_GPL(alloc_uid);
+ 
+ void switch_uid(struct user_struct *new_user)
+ {
+@@ -186,21 +202,21 @@ void switch_uid(struct user_struct *new_
+ 	free_uid(old_user);
+ 	suid_keys(current);
+ }
+-
++EXPORT_SYMBOL_GPL(switch_uid);
+ 
+ static int __init uid_cache_init(void)
+ {
+ 	int n;
+ 
+ 	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
+-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 
+ 	for(n = 0; n < UIDHASH_SZ; ++n)
+ 		INIT_LIST_HEAD(uidhash_table + n);
+ 
+ 	/* Insert the root user immediately (init already runs as root) */
+ 	spin_lock_irq(&uidhash_lock);
+-	uid_hash_insert(&root_user, uidhashentry(0));
++	uid_hash_insert(&root_user, __uidhashentry(0));
+ 	spin_unlock_irq(&uidhash_lock);
+ 
+ 	return 0;
+diff -upr linux-2.6.16.orig/kernel/ve.c linux-2.6.16-026test015/kernel/ve.c
+--- linux-2.6.16.orig/kernel/ve.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/ve.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,205 @@
++/*
++ *  linux/kernel/ve.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * 've.c' helper file performing VE sub-system initialization
++ */
++
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/capability.h>
++#include <linux/ve.h>
++#include <linux/smp_lock.h>
++#include <linux/init.h>
++
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/slab.h>
++#include <linux/sys.h>
++#include <linux/kdev_t.h>
++#include <linux/termios.h>
++#include <linux/tty_driver.h>
++#include <linux/netdevice.h>
++#include <linux/utsname.h>
++#include <linux/proc_fs.h>
++#include <linux/kernel_stat.h>
++#include <linux/module.h>
++#include <linux/rcupdate.h>
++#include <linux/ve_proto.h>
++#include <linux/ve_owner.h>
++#include <linux/devpts_fs.h>
++
++#include <linux/nfcalls.h>
++
++unsigned long vz_rstamp = 0x37e0f59d;
++
++#ifdef CONFIG_MODULES
++struct module no_module = { .state = MODULE_STATE_GOING };
++EXPORT_SYMBOL(no_module);
++#endif
++
++#ifdef CONFIG_VE
++
++DCL_VE_OWNER(SKB, struct sk_buff, owner_env)
++DCL_VE_OWNER(SK, struct sock, sk_owner_env)
++DCL_VE_OWNER(TW, struct tcp_tw_bucket, tw_owner_env)
++DCL_VE_OWNER(FILP, struct file, owner_env)
++DCL_VE_OWNER(FSTYPE, struct file_system_type, owner_env)
++
++INIT_KSYM_MODULE(x_tables);
++INIT_KSYM_MODULE(xt_tcpudp);
++INIT_KSYM_MODULE(ip_tables);
++INIT_KSYM_MODULE(ip6_tables);
++INIT_KSYM_MODULE(iptable_filter);
++INIT_KSYM_MODULE(ip6table_filter);
++INIT_KSYM_MODULE(iptable_mangle);
++INIT_KSYM_MODULE(ip6table_mangle);
++INIT_KSYM_MODULE(xt_limit);
++INIT_KSYM_MODULE(ipt_multiport);
++INIT_KSYM_MODULE(ip6t_multiport);
++INIT_KSYM_MODULE(ipt_tos);
++INIT_KSYM_MODULE(ipt_TOS);
++INIT_KSYM_MODULE(ipt_REJECT);
++INIT_KSYM_MODULE(ip6t_REJECT);
++INIT_KSYM_MODULE(ipt_TCPMSS);
++INIT_KSYM_MODULE(xt_tcpmss);
++INIT_KSYM_MODULE(ipt_ttl);
++INIT_KSYM_MODULE(ipt_LOG);
++INIT_KSYM_MODULE(ip6t_LOG);
++INIT_KSYM_MODULE(xt_length);
++INIT_KSYM_MODULE(ip_conntrack);
++INIT_KSYM_MODULE(ip_conntrack_ftp);
++INIT_KSYM_MODULE(ip_conntrack_irc);
++INIT_KSYM_MODULE(xt_conntrack);
++INIT_KSYM_MODULE(xt_state);
++INIT_KSYM_MODULE(xt_helper);
++INIT_KSYM_MODULE(ip_nat);
++INIT_KSYM_MODULE(iptable_nat);
++INIT_KSYM_MODULE(ip_nat_ftp);
++INIT_KSYM_MODULE(ip_nat_irc);
++INIT_KSYM_MODULE(ipt_REDIRECT);
++
++INIT_KSYM_CALL(int, init_netfilter, (void));
++INIT_KSYM_CALL(int, init_xtables, (void));
++INIT_KSYM_CALL(int, init_xt_tcpudp, (void));
++INIT_KSYM_CALL(int, init_iptables, (void));
++INIT_KSYM_CALL(int, init_ip6tables, (void));
++INIT_KSYM_CALL(int, init_iptable_filter, (void));
++INIT_KSYM_CALL(int, init_ip6table_filter, (void));
++INIT_KSYM_CALL(int, init_iptable_mangle, (void));
++INIT_KSYM_CALL(int, init_ip6table_mangle, (void));
++INIT_KSYM_CALL(int, init_xt_limit, (void));
++INIT_KSYM_CALL(int, init_iptable_multiport, (void));
++INIT_KSYM_CALL(int, init_ip6table_multiport, (void));
++INIT_KSYM_CALL(int, init_iptable_tos, (void));
++INIT_KSYM_CALL(int, init_iptable_TOS, (void));
++INIT_KSYM_CALL(int, init_iptable_REJECT, (void));
++INIT_KSYM_CALL(int, init_ip6table_REJECT, (void));
++INIT_KSYM_CALL(int, init_iptable_TCPMSS, (void));
++INIT_KSYM_CALL(int, init_xt_tcpmss, (void));
++INIT_KSYM_CALL(int, init_iptable_ttl, (void));
++INIT_KSYM_CALL(int, init_iptable_LOG, (void));
++INIT_KSYM_CALL(int, init_ip6table_LOG, (void));
++INIT_KSYM_CALL(int, init_xt_length, (void));
++INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
++INIT_KSYM_CALL(int, init_iptable_ftp, (void));
++INIT_KSYM_CALL(int, init_iptable_irc, (void));
++INIT_KSYM_CALL(int, init_xt_conntrack_match, (void));
++INIT_KSYM_CALL(int, init_xt_state, (void));
++INIT_KSYM_CALL(int, init_xt_helper, (void));
++INIT_KSYM_CALL(int, ip_nat_init, (void));
++INIT_KSYM_CALL(int, init_iptable_nat, (void));
++INIT_KSYM_CALL(int, init_iptable_nat_ftp, (void));
++INIT_KSYM_CALL(int, init_iptable_nat_irc, (void));
++INIT_KSYM_CALL(int, init_iptable_REDIRECT, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat_irc, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat, (void));
++INIT_KSYM_CALL(void, ip_nat_cleanup, (void));
++INIT_KSYM_CALL(void, fini_xt_helper, (void));
++INIT_KSYM_CALL(void, fini_xt_state, (void));
++INIT_KSYM_CALL(void, fini_xt_conntrack_match, (void));
++INIT_KSYM_CALL(void, fini_iptable_irc, (void));
++INIT_KSYM_CALL(void, fini_iptable_ftp, (void));
++INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
++INIT_KSYM_CALL(void, fini_xt_length, (void));
++INIT_KSYM_CALL(void, fini_ip6table_LOG, (void));
++INIT_KSYM_CALL(void, fini_iptable_LOG, (void));
++INIT_KSYM_CALL(void, fini_iptable_ttl, (void));
++INIT_KSYM_CALL(void, fini_xt_tcpmss, (void));
++INIT_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
++INIT_KSYM_CALL(void, fini_ip6table_REJECT, (void));
++INIT_KSYM_CALL(void, fini_iptable_REJECT, (void));
++INIT_KSYM_CALL(void, fini_iptable_TOS, (void));
++INIT_KSYM_CALL(void, fini_iptable_tos, (void));
++INIT_KSYM_CALL(void, fini_ip6table_multiport, (void));
++INIT_KSYM_CALL(void, fini_iptable_multiport, (void));
++INIT_KSYM_CALL(void, fini_xt_limit, (void));
++INIT_KSYM_CALL(void, fini_iptable_filter, (void));
++INIT_KSYM_CALL(void, fini_ip6table_filter, (void));
++INIT_KSYM_CALL(void, fini_iptable_mangle, (void));
++INIT_KSYM_CALL(void, fini_ip6table_mangle, (void));
++INIT_KSYM_CALL(void, fini_ip6tables, (void));
++INIT_KSYM_CALL(void, fini_iptables, (void));
++INIT_KSYM_CALL(void, fini_xt_tcpudp, (void));
++INIT_KSYM_CALL(void, fini_xtables, (void));
++INIT_KSYM_CALL(void, fini_netfilter, (void));
++INIT_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
++
++INIT_KSYM_CALL(void, ipt_flush_table, (struct xt_table *table));
++INIT_KSYM_CALL(void, ip6t_flush_table, (struct xt_table *table));
++
++#if defined(CONFIG_VE_CALLS_MODULE) || defined(CONFIG_VE_CALLS)
++INIT_KSYM_MODULE(vzmon);
++INIT_KSYM_CALL(int, real_get_device_perms_ve,
++		(int dev_type, dev_t dev, int access_mode));
++INIT_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
++INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
++INIT_KSYM_CALL(void, real_update_load_avg_ve, (void));
++
++int get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
++{
++	return KSYMSAFECALL(int, vzmon, real_get_device_perms_ve,
++					(dev_type, dev, access_mode));
++}
++EXPORT_SYMBOL(get_device_perms_ve);
++
++void do_env_cleanup(struct ve_struct *env)
++{
++	KSYMSAFECALL_VOID(vzmon, real_do_env_cleanup, (env));
++}
++
++void do_env_free(struct ve_struct *env)
++{
++	KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
++}
++EXPORT_SYMBOL(do_env_free);
++
++void do_update_load_avg_ve(void)
++{
++	KSYMSAFECALL_VOID(vzmon, real_update_load_avg_ve, ());
++}
++#endif
++
++struct ve_struct ve0 = {
++	.utsname		= &system_utsname,
++	.vetask_lh		= LIST_HEAD_INIT(ve0.vetask_lh),
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	._net_dev_tail		= &ve0._net_dev_base,
++	.ifindex		= -1,
++#endif
++#ifdef CONFIG_UNIX98_PTYS
++	.devpts_config		= &devpts_config,
++#endif
++};
++
++EXPORT_SYMBOL(ve0);
++
++#endif /* CONFIG_VE */
+diff -upr linux-2.6.16.orig/kernel/vecalls.c linux-2.6.16-026test015/kernel/vecalls.c
+--- linux-2.6.16.orig/kernel/vecalls.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/vecalls.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,3547 @@
++/*
++ *  linux/kernel/vecalls.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ */
++
++/*
++ * 'vecalls.c' is file with basic VE support. It provides basic primities
++ * along with initialization script
++ */
++
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/capability.h>
++#include <linux/ve.h>
++#include <linux/smp_lock.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/ve_owner.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/sys.h>
++#include <linux/fs.h>
++#include <linux/namespace.h>
++#include <linux/termios.h>
++#include <linux/tty_driver.h>
++#include <linux/netdevice.h>
++#include <linux/wait.h>
++#include <linux/inetdevice.h>
++#include <net/addrconf.h>
++#include <linux/utsname.h>
++#include <linux/sysctl.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/kernel_stat.h>
++#include <linux/module.h>
++#include <linux/suspend.h>
++#include <linux/rcupdate.h>
++#include <linux/in.h>
++#include <linux/major.h>
++#include <linux/kdev_t.h>
++#include <linux/idr.h>
++#include <linux/inetdevice.h>
++#include <net/pkt_sched.h>
++#include <linux/divert.h>
++#include <ub/beancounter.h>
++
++#include <net/route.h>
++#include <net/ip_fib.h>
++#include <net/ip6_route.h>
++#include <net/arp.h>
++#include <net/ipv6.h>
++
++#include <linux/ve_proto.h>
++#include <linux/venet.h>
++#include <linux/vzctl.h>
++#include <linux/vzcalluser.h>
++#ifdef CONFIG_FAIRSCHED
++#include <linux/fairsched.h>
++#endif
++
++#include <linux/nfcalls.h>
++#include <linux/virtinfo.h>
++
++struct ve_struct *ve_list_head = NULL;
++int nr_ve = 1;	/* One VE always exists. Compatibility with vestat */
++rwlock_t ve_list_guard = RW_LOCK_UNLOCKED;
++static rwlock_t devperms_hash_guard = RW_LOCK_UNLOCKED;
++
++extern int glob_virt_pids;
++
++static int	do_env_enter(struct ve_struct *ve, unsigned int flags);
++static void	do_clean_devperms(envid_t veid);
++static int	alloc_ve_tty_drivers(struct ve_struct* ve);
++static void	free_ve_tty_drivers(struct ve_struct* ve);
++static int	register_ve_tty_drivers(struct ve_struct* ve);
++static void	unregister_ve_tty_drivers(struct ve_struct* ve);
++static int	init_ve_tty_drivers(struct ve_struct *);
++static void	fini_ve_tty_drivers(struct ve_struct *);
++static void	clear_termios(struct tty_driver* driver );
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static void	ve_mapped_devs_cleanup(struct ve_struct *ve);
++#endif
++
++static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf);
++
++static void vecalls_exit(void);
++
++struct ve_struct *__find_ve_by_id(envid_t veid)
++{
++	struct ve_struct *ve;
++	for (ve = ve_list_head;
++	     ve != NULL && ve->veid != veid;
++	     ve = ve->next);
++	return ve;
++}
++
++struct ve_struct *get_ve_by_id(envid_t veid)
++{
++	struct ve_struct *ve;
++	read_lock(&ve_list_guard);
++	ve = __find_ve_by_id(veid);
++	get_ve(ve);
++	read_unlock(&ve_list_guard);
++	return ve;
++}
++
++/*
++ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
++ */
++void real_do_env_free(struct ve_struct *ve);
++static inline void real_put_ve(struct ve_struct *ve)
++{
++	if (ve && atomic_dec_and_test(&ve->counter)) {
++		if (atomic_read(&ve->pcounter) > 0)
++			BUG();
++		if (ve->is_running)
++			BUG();
++		real_do_env_free(ve);
++	}
++}
++
++extern struct file_system_type devpts_fs_type;
++extern struct file_system_type sysfs_fs_type;
++extern struct file_system_type tmpfs_fs_type;
++extern struct file_system_type proc_fs_type;
++
++extern spinlock_t task_capability_lock;
++extern void ve_ipc_free(struct ve_struct * ve);
++extern void ip_fragment_cleanup(struct ve_struct *ve);
++
++static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf)
++{
++	struct ve_struct *ve;
++	struct vz_cpu_stat *vstat;
++	int retval;
++	int i, cpu;
++	unsigned long tmp;
++
++	if (!ve_is_super(get_exec_env()) && (veid != get_exec_env()->veid))
++		return -EPERM;
++	if (veid == 0)
++		return -ESRCH;
++
++	vstat = kmalloc(sizeof(*vstat), GFP_KERNEL);
++	if (!vstat)
++		return -ENOMEM;
++	memset(vstat, 0, sizeof(*vstat));
++	
++	retval = -ESRCH;
++	read_lock(&ve_list_guard);
++	ve = __find_ve_by_id(veid);
++	if (ve == NULL)
++		goto out_unlock;
++	for (cpu = 0; cpu < NR_CPUS; cpu++) {
++		struct ve_cpu_stats *st;
++
++		st = VE_CPU_STATS(ve, cpu);
++		vstat->user_jif += st->user;
++		vstat->nice_jif += st->nice;
++		vstat->system_jif += st->system;
++		vstat->idle_clk += ve_sched_get_idle_time(ve, cpu);
++	}
++	vstat->uptime_clk = get_cycles() - ve->start_cycles;
++	vstat->uptime_jif = jiffies - ve->start_jiffies;
++	for (i = 0; i < 3; i++) {
++		tmp = ve->avenrun[i] + (FIXED_1/200);
++		vstat->avenrun[i].val_int = LOAD_INT(tmp);
++		vstat->avenrun[i].val_frac = LOAD_FRAC(tmp);
++	}
++	read_unlock(&ve_list_guard);
++
++	retval = 0;
++	if (copy_to_user(buf, vstat, sizeof(*vstat)))
++		retval = -EFAULT;
++out_free:
++	kfree(vstat);
++	return retval;
++
++out_unlock:
++	read_unlock(&ve_list_guard);
++	goto out_free;
++}
++
++/**********************************************************************
++ * Devices permissions routines,
++ * character and block devices separately
++ **********************************************************************/
++
++/* Rules applied in the following order:
++   MAJOR!=0, MINOR!=0
++   MAJOR!=0, MINOR==0
++   MAJOR==0, MINOR==0
++*/
++struct devperms_struct
++{
++	dev_t   	dev;	/* device id */
++	unsigned char	mask;
++	unsigned 	type;
++	envid_t	 	veid;
++
++	struct devperms_struct *devhash_next;
++	struct devperms_struct **devhash_pprev;
++};
++
++static struct devperms_struct original_perms[] =
++{{
++	MKDEV(0,0),	/*device*/
++	S_IROTH | S_IWOTH,
++	S_IFCHR,	/*type*/
++	0,		/*veid*/
++	NULL, NULL
++},
++{
++	MKDEV(0,0),	/*device*/
++	S_IXGRP | S_IROTH | S_IWOTH,
++	S_IFBLK,	/*type*/
++	0,		/*veid*/
++	NULL, NULL
++}};
++
++static struct devperms_struct default_major_perms[] = {
++	{MKDEV(UNIX98_PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++	{MKDEV(UNIX98_PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++	{MKDEV(PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++	{MKDEV(PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++};
++static struct devperms_struct default_minor_perms[] = {
++	{MKDEV(MEM_MAJOR, 3), S_IROTH | S_IWOTH, S_IFCHR},   /* null */
++	{MKDEV(MEM_MAJOR, 5), S_IROTH | S_IWOTH, S_IFCHR},   /* zero */
++	{MKDEV(MEM_MAJOR, 7), S_IROTH | S_IWOTH, S_IFCHR},   /* full */
++	{MKDEV(TTYAUX_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},/* tty */
++	{MKDEV(TTYAUX_MAJOR, 2), S_IROTH | S_IWOTH, S_IFCHR},/* ptmx */
++	{MKDEV(MEM_MAJOR, 8), S_IROTH, S_IFCHR},  /* random */
++	{MKDEV(MEM_MAJOR, 9), S_IROTH, S_IFCHR},  /* urandom */
++};
++
++static struct devperms_struct default_deny_perms = {
++	MKDEV(0, 0), 0, S_IFCHR
++};
++
++static inline struct devperms_struct *find_default_devperms(int type,
++						    dev_t dev)
++{
++	int i;
++
++	/* XXX all defaults perms are S_IFCHR */
++	if (type != S_IFCHR)
++		return &default_deny_perms;
++
++	for (i = 0; 
++	     i < sizeof(default_minor_perms)/sizeof(struct devperms_struct);
++	     i++)
++		if (MAJOR(dev) == MAJOR(default_minor_perms[i].dev) &&
++		    MINOR(dev) == MINOR(default_minor_perms[i].dev))
++			return &default_minor_perms[i];
++	for (i = 0; 
++	     i < sizeof(default_major_perms)/sizeof(struct devperms_struct);
++	     i++)
++		if (MAJOR(dev) == MAJOR(default_major_perms[i].dev))
++			return &default_major_perms[i];
++
++	return &default_deny_perms;
++}
++
++#define DEVPERMS_HASH_SZ 512
++struct devperms_struct *devperms_hash[DEVPERMS_HASH_SZ];
++
++#define devperms_hashfn(id,dev) \
++	( (id << 5) ^ (id >> 5) ^ (MAJOR(dev)) ^ MINOR(dev) ) & \
++						(DEVPERMS_HASH_SZ - 1)
++
++static inline void hash_devperms(struct devperms_struct *p)
++{
++	struct devperms_struct **htable =
++		&devperms_hash[devperms_hashfn(p->veid,p->dev)];
++
++	if ((p->devhash_next = *htable) != NULL)
++		(*htable)->devhash_pprev = &p->devhash_next;
++	*htable = p;
++	p->devhash_pprev = htable;
++}
++
++static inline void unhash_devperms(struct devperms_struct *p)
++{
++	if (p->devhash_next)
++		p->devhash_next->devhash_pprev = p->devhash_pprev;
++	*p->devhash_pprev = p->devhash_next;
++}
++
++static int __init init_devperms_hash(void)
++{
++	write_lock_irq(&devperms_hash_guard);
++	memset(devperms_hash, 0, sizeof(devperms_hash));
++	hash_devperms(original_perms);
++	hash_devperms(original_perms+1);
++	write_unlock_irq(&devperms_hash_guard);
++	return 0;
++}
++
++static inline void fini_devperms_hash(void)
++{
++}
++
++static inline struct devperms_struct *find_devperms(envid_t veid,
++						    int type,
++						    dev_t dev)
++{
++	struct devperms_struct *p, **htable =
++		&devperms_hash[devperms_hashfn(veid,dev)];
++
++	for (p = *htable; p && !(p->type==type &&
++				 MAJOR(dev)==MAJOR(p->dev) &&
++				 MINOR(dev)==MINOR(p->dev) &&
++				 p->veid==veid);
++	     p = p->devhash_next)
++		;
++	return p;
++}
++
++
++static void do_clean_devperms(envid_t veid)
++{
++	int i;
++	struct devperms_struct* ve;
++
++	write_lock_irq(&devperms_hash_guard);
++	for (i = 0; i < DEVPERMS_HASH_SZ; i++)
++		for (ve = devperms_hash[i]; ve;) {
++			struct devperms_struct *next = ve->devhash_next;
++			if (ve->veid == veid) {
++				unhash_devperms(ve);
++				kfree(ve);
++			}
++
++			ve = next;
++		}
++	write_unlock_irq(&devperms_hash_guard);
++}
++
++/*
++ * Mode is a mask of
++ *	FMODE_READ	for read access (configurable by S_IROTH)
++ *	FMODE_WRITE	for write access (configurable by S_IWOTH)
++ *	FMODE_QUOTACTL	for quotactl access (configurable by S_IXGRP)
++ */
++int real_get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
++{
++	struct devperms_struct *perms;
++	struct ve_struct *ve;
++	envid_t veid;
++
++	perms = NULL;
++	ve = get_exec_env();
++	veid = ve->veid;
++
++	read_lock(&devperms_hash_guard);
++
++	perms = find_devperms(veid, dev_type|VE_USE_MINOR, dev);
++	if (perms)
++		goto end;
++
++	perms = find_devperms(veid, dev_type|VE_USE_MAJOR, MKDEV(MAJOR(dev),0));
++	if (perms)
++		goto end;
++
++	perms = find_devperms(veid, dev_type, MKDEV(0,0));
++	if (perms)
++		goto end;
++
++	perms = find_default_devperms(dev_type, dev);
++
++end:
++	read_unlock(&devperms_hash_guard);
++
++	access_mode = "\000\004\002\006\010\014\012\016"[access_mode];
++	return perms ?
++		(((perms->mask & access_mode) == access_mode) ? 0 : -EACCES) :
++		-ENODEV;
++}
++EXPORT_SYMBOL(real_get_device_perms_ve);
++
++int do_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
++{
++	struct devperms_struct   *perms;
++
++	write_lock_irq(&devperms_hash_guard);
++	perms = find_devperms(veid, type, dev);
++	if (!perms) {
++		struct devperms_struct   *perms_new;
++		write_unlock_irq(&devperms_hash_guard);
++
++		perms_new = kmalloc(sizeof(struct devperms_struct), GFP_KERNEL);
++		if (!perms_new)
++			return -ENOMEM;
++
++		write_lock_irq(&devperms_hash_guard);
++		perms = find_devperms(veid, type, dev);
++		if (perms) {
++			kfree(perms_new);
++			perms_new = perms;
++		}
++
++		switch (type & VE_USE_MASK) {
++		case 0:
++			dev = 0;
++			break;
++		case VE_USE_MAJOR:
++			dev = MKDEV(MAJOR(dev),0);
++			break;
++		}
++
++		perms_new->veid = veid;
++		perms_new->dev = dev;
++		perms_new->type = type;
++		perms_new->mask = mask & S_IALLUGO;
++		hash_devperms(perms_new);
++	} else
++		perms->mask = mask & S_IALLUGO;
++	write_unlock_irq(&devperms_hash_guard);
++	return 0;
++}
++EXPORT_SYMBOL(do_setdevperms);
++
++int real_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
++{
++	struct ve_struct *ve;
++	int err;
++
++	if (!capable(CAP_SETVEID) || veid == 0)
++		return -EPERM;
++
++	if ((ve = get_ve_by_id(veid)) == NULL)
++		return -ESRCH;
++
++	down_read(&ve->op_sem);
++	err = -ESRCH;
++	if (ve->is_running)
++		err = do_setdevperms(veid, type, dev, mask);
++	up_read(&ve->op_sem);
++	real_put_ve(ve);
++	return err;
++}
++
++void real_update_load_avg_ve(void)
++{
++	struct ve_struct *ve;
++	unsigned long nr_active;
++
++	read_lock(&ve_list_guard);
++	for (ve = ve_list_head; ve != NULL; ve = ve->next) {
++		nr_active = nr_running_ve(ve) + nr_uninterruptible_ve(ve);
++		nr_active *= FIXED_1;
++		CALC_LOAD(ve->avenrun[0], EXP_1, nr_active);
++		CALC_LOAD(ve->avenrun[1], EXP_5, nr_active);
++		CALC_LOAD(ve->avenrun[2], EXP_15, nr_active);
++	}
++	read_unlock(&ve_list_guard);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * FS-related helpers to VE start/stop
++ *
++ **********************************************************************
++ **********************************************************************/
++
++/*
++ * DEVPTS needs a virtualization: each environment should see each own list of
++ * pseudo-terminals.
++ * To implement it we need to have separate devpts superblocks for each
++ * VE, and each VE should mount its own one.
++ * Thus, separate vfsmount structures are required.
++ * To minimize intrusion into vfsmount lookup code, separate file_system_type
++ * structures are created.
++ *
++ * In addition to this, patch fo character device itself is required, as file
++ * system itself is used only for MINOR/MAJOR lookup.
++ */
++static int register_ve_fs_type(struct ve_struct *ve,
++		struct file_system_type *template,
++		struct file_system_type **p_fs_type, struct vfsmount **p_mnt)
++{
++	struct vfsmount *mnt;
++	struct file_system_type *local_fs_type;
++	int ret;
++
++	VZTRACE("register_ve_fs_type(\"%s\")\n", template->name);
++
++	local_fs_type = kmalloc(sizeof(*local_fs_type) + sizeof(void *),
++					GFP_KERNEL);
++	if (local_fs_type == NULL)
++		return -ENOMEM;
++
++	memset(local_fs_type, 0, sizeof(*local_fs_type));
++	local_fs_type->name = template->name;
++	local_fs_type->fs_flags = template->fs_flags;
++	local_fs_type->get_sb = template->get_sb;
++	local_fs_type->kill_sb = template->kill_sb;
++	local_fs_type->owner = template->owner;
++	/*
++	 * 1. we do not have refcounter on fstype
++	 * 2. fstype holds reference to ve using get_ve()/put_ve().
++	 * so we free fstype when freeing ve and we are sure it's ok to free it
++	 */
++	SET_VE_OWNER_FSTYPE(local_fs_type, ve);
++	get_filesystem(local_fs_type);	/* get_ve() inside */
++
++	ret = register_filesystem(local_fs_type); /* does not get */
++	if (ret)
++		goto reg_err;
++
++	mnt = kern_mount(local_fs_type);
++	if (IS_ERR(mnt))
++		goto mnt_err;
++
++	/* Usage counters after succesful execution kern_mount:
++	 * local_fs_type - +1 (get_fs_type,get_sb_single,put_filesystem)
++	 * mnt - +1 == 1 (alloc_vfsmnt)
++	 */
++
++	*p_fs_type = local_fs_type;
++	*p_mnt = mnt;
++	return 0;
++
++mnt_err:
++	ret = PTR_ERR(mnt);
++	unregister_filesystem(local_fs_type); /* does not put */
++
++reg_err:
++	put_filesystem(local_fs_type);
++	kfree(local_fs_type);
++	printk(KERN_DEBUG
++	       "register_ve_fs_type(\"%s\") err=%d\n", template->name, ret);
++	return ret;
++}
++
++static void umount_ve_fs_type(struct file_system_type *local_fs_type)
++{
++	struct vfsmount *mnt;
++	struct list_head *p, *q;
++	LIST_HEAD(kill);
++	LIST_HEAD(umount_list);
++
++	down_write(&namespace_sem);
++	spin_lock(&vfsmount_lock);
++	list_for_each_safe(p, q, &current->namespace->list) {
++		mnt = list_entry(p, struct vfsmount, mnt_list);
++		if (mnt->mnt_sb->s_type != local_fs_type)
++			continue;
++		list_del(p);
++		list_add(p, &kill);
++	}
++
++	while (!list_empty(&kill)) {
++		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
++		umount_tree(mnt, 1, &umount_list);
++	}
++	spin_unlock(&vfsmount_lock);
++	up_write(&namespace_sem);
++	release_mounts(&umount_list);
++}
++
++static void unregister_ve_fs_type(struct file_system_type *local_fs_type,
++		struct vfsmount *local_fs_mount)
++{
++	if (local_fs_mount == NULL ||
++	    local_fs_type == NULL) {
++		if (local_fs_mount != NULL ||
++		    local_fs_type != NULL)
++			BUG();
++		return;
++	}
++
++	VZTRACE("unregister_ve_fs_type(\"%s\")\n", local_fs_type->name);
++
++	unregister_filesystem(local_fs_type);
++	umount_ve_fs_type(local_fs_type);
++	kern_umount(local_fs_mount); /* alias to mntput, drop our ref */
++	put_filesystem(local_fs_type);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * FS-related helpers to VE start/stop
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#ifdef CONFIG_SYSCTL
++static ctl_table ve_sysctl_tables[] = {
++	/* kernel */
++	{
++		.ctl_name	= CTL_KERN,
++		.procname	= "kernel",
++		.mode		= 0555,
++		.child		= &ve_sysctl_tables[2],
++	},
++	{ .ctl_name = 0 },
++	/* kernel/[vars] */
++	{
++		.ctl_name	= KERN_NODENAME,
++		.procname	= "hostname",
++		.maxlen 	= 64,
++		.mode		= 0644,
++		.proc_handler	= &proc_doutsstring,
++		.strategy	= &sysctl_string,
++	},
++	{
++		.ctl_name	= KERN_DOMAINNAME,
++		.procname	= "domainname",
++		.maxlen		= 64,
++		.mode		= 0644,
++		.proc_handler	= &proc_doutsstring,
++		.strategy	= &sysctl_string,
++	},
++	{
++		.ctl_name	= KERN_SHMMAX,
++		.procname	= "shmmax",
++		.maxlen		= sizeof(size_t),
++		.mode		= 0644,
++		.proc_handler	= &proc_doulongvec_minmax,
++	},
++	{
++		.ctl_name	= KERN_SHMALL,
++		.procname	= "shmall",
++		.maxlen		= sizeof(size_t),
++		.mode		= 0644,
++		.proc_handler	= &proc_doulongvec_minmax,
++	},
++	{
++		.ctl_name	= KERN_SHMMNI,
++		.procname	= "shmmni",
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_MSGMAX,
++		.procname	= "msgmax",
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_MSGMNI,
++		.procname	= "msgmni",
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_MSGMNB,
++		.procname	= "msgmnb",
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_SEM,
++		.procname	= "sem",
++		.maxlen		= 4 * sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{ .ctl_name = 0, }
++};
++
++static int register_ve_sysctltables(struct ve_struct *ve)
++{
++	struct ctl_table_header *header;
++	ctl_table *root, *table;
++	
++	VZTRACE("register_ve_sysctltables\n");
++
++	root = clone_sysctl_template(ve_sysctl_tables,
++			sizeof(ve_sysctl_tables) / sizeof(ctl_table));
++	if (root == NULL)
++		goto out;
++
++	table = root->child;
++	table[0].data = &ve->utsname->nodename;
++	table[1].data = &ve->utsname->domainname;
++	table[2].data = &ve->_shm_ctlmax;
++	table[3].data = &ve->_shm_ctlall;
++	table[4].data = &ve->_shm_ctlmni;
++	table[5].data = &ve->_msg_ctlmax;
++	table[6].data = &ve->_msg_ctlmni;
++	table[7].data = &ve->_msg_ctlmnb;
++	table[8].data = &ve->_sem_ctls[0];
++
++	/* insert at head to override kern entries */
++	header = register_sysctl_table(root, 1);
++	if (header == NULL)
++		goto out_free;
++
++	ve->kern_header = header;
++	ve->kern_table = root;
++	return 0;
++
++out_free:
++	free_sysctl_clone(root);
++out:
++	return -ENOMEM;
++}
++
++static inline void unregister_ve_sysctltables(struct ve_struct *ve)
++{
++	unregister_sysctl_table(ve->kern_header);
++}
++
++static inline void free_ve_sysctltables(struct ve_struct *ve)
++{
++	free_sysctl_clone(ve->kern_table);
++}
++#endif
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE start: subsystems
++ *
++ **********************************************************************
++ **********************************************************************/
++
++extern struct new_utsname virt_utsname;
++
++static int init_ve_utsname(struct ve_struct *ve)
++{
++	ve->utsname = kmalloc(sizeof(*ve->utsname), GFP_KERNEL);
++	if (ve->utsname == NULL)
++		return -ENOMEM;
++
++	down_read(&uts_sem); /* protect the source */
++	memcpy(ve->utsname, &system_utsname, sizeof(*ve->utsname));
++	memcpy(ve->utsname->release, virt_utsname.release,
++			sizeof(virt_utsname.release));
++	up_read(&uts_sem);
++
++	return 0;
++}
++
++static void free_ve_utsname(struct ve_struct *ve)
++{
++	kfree(ve->utsname);
++	ve->utsname = NULL;
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#include <net/ip.h>
++#include <net/tcp.h>
++#include <net/udp.h>
++#include <net/icmp.h>
++
++static int init_fini_ve_mibs(struct ve_struct *ve, int fini)
++{
++	if (fini)
++		goto fini;
++	if (!(ve->_net_statistics[0] = alloc_percpu(struct linux_mib)))
++		goto out1;
++	if (!(ve->_net_statistics[1] = alloc_percpu(struct linux_mib)))
++		goto out2;
++	if (!(ve->_ip_statistics[0] = alloc_percpu(struct ipstats_mib)))
++		goto out3;
++	if (!(ve->_ip_statistics[1] = alloc_percpu(struct ipstats_mib)))
++		goto out4;
++	if (!(ve->_icmp_statistics[0] = alloc_percpu(struct icmp_mib)))
++		goto out5;
++	if (!(ve->_icmp_statistics[1] = alloc_percpu(struct icmp_mib)))
++		goto out6;
++	if (!(ve->_tcp_statistics[0] = alloc_percpu(struct tcp_mib)))
++		goto out7;
++	if (!(ve->_tcp_statistics[1] = alloc_percpu(struct tcp_mib)))
++		goto out8;
++	if (!(ve->_udp_statistics[0] = alloc_percpu(struct udp_mib)))
++		goto out9;
++	if (!(ve->_udp_statistics[1] = alloc_percpu(struct udp_mib)))
++		goto out10;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	if (!(ve->_ipv6_statistics[0] = alloc_percpu(struct ipstats_mib)))
++		goto out11;
++	if (!(ve->_ipv6_statistics[1] = alloc_percpu(struct ipstats_mib)))
++		goto out12;
++	if (!(ve->_icmpv6_statistics[0] = alloc_percpu(struct icmpv6_mib)))
++		goto out13;
++	if (!(ve->_icmpv6_statistics[1] = alloc_percpu(struct icmpv6_mib)))
++		goto out14;
++	if (!(ve->_udp_stats_in6[0] = alloc_percpu(struct udp_mib)))
++		goto out15;
++	if (!(ve->_udp_stats_in6[1] = alloc_percpu(struct udp_mib)))
++		goto out16;
++#endif
++	return 0;
++fini:
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	free_percpu(ve->_udp_stats_in6[1]);
++out16:
++	free_percpu(ve->_udp_stats_in6[0]);
++out15:
++	free_percpu(ve->_icmpv6_statistics[1]);
++out14:
++	free_percpu(ve->_icmpv6_statistics[0]);
++out13:
++	free_percpu(ve->_ipv6_statistics[1]);
++out12:
++	free_percpu(ve->_ipv6_statistics[0]);
++out11:
++#endif
++	free_percpu(ve->_udp_statistics[1]);
++out10:
++	free_percpu(ve->_udp_statistics[0]);
++out9:
++	free_percpu(ve->_tcp_statistics[1]);
++out8:
++	free_percpu(ve->_tcp_statistics[0]);
++out7:
++	free_percpu(ve->_icmp_statistics[1]);
++out6:
++	free_percpu(ve->_icmp_statistics[0]);
++out5:
++	free_percpu(ve->_ip_statistics[1]);
++out4:
++	free_percpu(ve->_ip_statistics[0]);
++out3:
++	free_percpu(ve->_net_statistics[1]);
++out2:
++	free_percpu(ve->_net_statistics[0]);
++out1:
++	return -ENOMEM;
++}
++
++static inline int init_ve_mibs(struct ve_struct *ve)
++{
++	return init_fini_ve_mibs(ve, 0);
++}
++
++static inline void fini_ve_mibs(struct ve_struct *ve)
++{
++	(void)init_fini_ve_mibs(ve, 1);
++}
++
++extern struct net_device templ_loopback_dev;
++static void veloop_setup(struct net_device *dev)
++{
++	int padded;
++	padded = dev->padded;
++	memcpy(dev, &templ_loopback_dev, sizeof(struct net_device));
++	dev->padded = padded;
++}
++
++static int init_ve_netdev(void)
++{
++	struct ve_struct *ve;
++	struct net_device_stats *stats;
++	int err;
++
++	ve = get_exec_env();
++	INIT_HLIST_HEAD(&ve->_net_dev_head);
++	ve->_net_dev_base = NULL;
++	ve->_net_dev_tail = &ve->_net_dev_base;
++
++	ve->_loopback_dev = alloc_netdev(0, templ_loopback_dev.name, 
++					 veloop_setup);
++	if (ve->_loopback_dev == NULL)
++		return -ENOMEM;
++	if (loopback_dev.get_stats != NULL) {
++		stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
++		if (stats != NULL) {
++			memset(stats, 0, sizeof(struct net_device_stats));
++			ve->_loopback_dev->priv = stats;
++			ve->_loopback_dev->get_stats = loopback_dev.get_stats;
++			ve->_loopback_dev->destructor = loopback_dev.destructor;
++		}
++	}
++	err = register_netdev(ve->_loopback_dev);
++	if (err) {
++		if (ve->_loopback_dev->priv != NULL)
++			kfree(ve->_loopback_dev->priv);
++		free_netdev(ve->_loopback_dev);
++	}
++	return err;
++}
++
++static void fini_ve_netdev(void)
++{
++	struct ve_struct *ve;
++	struct net_device *dev;
++
++	ve = get_exec_env();
++	while (1) {
++		rtnl_lock();
++		/* 
++		 * loopback is special, it can be referenced in  fib's, 
++		 * so it must be freed the last. Doing so is 
++		 * sufficient to guarantee absence of such references.
++		 */
++		if (dev_base == ve->_loopback_dev)
++			dev = dev_base->next;
++		else
++			dev = dev_base;
++		if (dev == NULL)
++			break;
++		unregister_netdevice(dev);
++		rtnl_unlock();
++		free_netdev(dev);
++	}
++	unregister_netdevice(ve->_loopback_dev);
++	rtnl_unlock();
++	free_netdev(ve->_loopback_dev);
++	ve->_loopback_dev = NULL;
++}
++#else
++#define init_ve_mibs(ve)	(0)
++#define fini_ve_mibs(ve)	do { } while (0)
++#define init_ve_netdev()	(0)
++#define fini_ve_netdev()	do { } while (0)
++#endif
++
++static int prepare_proc_root(struct ve_struct *ve)
++{
++	struct proc_dir_entry *de;
++
++	de = kmalloc(sizeof(struct proc_dir_entry) + 6, GFP_KERNEL);
++	if (de == NULL)
++		return -ENOMEM;
++	memset(de, 0, sizeof(struct proc_dir_entry));
++	memcpy(de + 1, "/proc", 6);
++	de->name = (char *)(de + 1);
++	de->namelen = 5;
++	de->mode = S_IFDIR | S_IRUGO | S_IXUGO;
++	de->nlink = 2;
++	atomic_set(&de->count, 1);
++
++	ve->proc_root = de;
++	return 0;
++}
++
++#ifdef CONFIG_PROC_FS
++static int init_ve_proc(struct ve_struct *ve)
++{
++	int err;
++	struct proc_dir_entry *de;
++
++	err = prepare_proc_root(ve);
++	if (err)
++		goto out_root;
++
++	err = register_ve_fs_type(ve, &proc_fs_type,
++			&ve->proc_fstype, &ve->proc_mnt);
++	if (err)
++		goto out_reg;
++
++	/* create necessary /proc subdirs in VE local proc tree */
++	err = -ENOMEM;
++	de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++	if (!de)
++		goto out_vz;
++
++#ifdef CONFIG_VE_IPTABLES
++	proc_net = proc_mkdir("net", NULL);
++	if (!proc_net)
++		goto out_net;
++#endif	
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	if (ve_snmp_proc_init())
++		goto out_snmp;
++#endif
++
++	return 0;
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++out_snmp:
++	remove_proc_entry("net", NULL);
++#endif
++#ifdef CONFIG_VE_IPTABLES
++out_net:
++	remove_proc_entry("vz", NULL);
++#endif
++out_vz:
++	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
++	ve->proc_mnt = NULL;
++out_reg:
++	/* proc_fstype and proc_root are freed in real_put_ve -> free_ve_proc */
++	;
++out_root:
++	return err;
++}
++
++static void fini_ve_proc(struct ve_struct *ve)
++{
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	ve_snmp_proc_fini();
++#endif
++#ifdef CONFIG_VE_IPTABLES
++	remove_proc_entry("net", NULL);
++	proc_net =  NULL;
++#endif
++	remove_proc_entry("vz", NULL);
++	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
++	ve->proc_mnt = NULL;
++}
++
++static void free_ve_proc(struct ve_struct *ve)
++{
++	/* proc filesystem frees proc_dir_entries on remove_proc_entry() only,
++	   so we check that everything was removed and not lost */
++	if (ve->proc_root && ve->proc_root->subdir) {
++		struct proc_dir_entry *p = ve->proc_root;
++		printk(KERN_WARNING "VPS: %d: proc entry /proc", ve->veid);
++		while ((p = p->subdir) != NULL)
++			printk("/%s", p->name);
++		printk(" is not removed!\n");
++	}
++
++	kfree(ve->proc_root);
++	kfree(ve->proc_fstype);
++
++	ve->proc_fstype = NULL;
++	ve->proc_root = NULL;
++}
++#else
++#define init_ve_proc(ve)	(0)
++#define fini_ve_proc(ve)	do { } while (0)
++#define free_ve_proc(ve)	do { } while (0)
++#endif
++
++#ifdef CONFIG_SYSCTL
++static int init_ve_sysctl(struct ve_struct *ve)
++{
++	int err;
++
++#ifdef CONFIG_PROC_FS
++	err = -ENOMEM;
++	ve->proc_sys_root = proc_mkdir("sys", 0);
++	if (ve->proc_sys_root == NULL)
++		goto out_proc;
++#endif
++	INIT_LIST_HEAD(&ve->sysctl_lh);
++	err = register_ve_sysctltables(ve);
++	if (err)
++		goto out_reg;
++
++	err = devinet_sysctl_init(ve);
++	if (err)
++		goto out_dev;
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	err = addrconf_sysctl_init(ve);
++	if (err)
++		goto out_dev6;
++#endif
++
++	return 0;
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++out_dev6:
++	devinet_sysctl_fini(ve);
++#endif
++out_dev:
++	unregister_ve_sysctltables(ve);
++	free_ve_sysctltables(ve);
++out_reg:
++#ifdef CONFIG_PROC_FS
++	remove_proc_entry("sys", NULL);
++out_proc:
++#endif
++	return err;
++}
++
++static void fini_ve_sysctl(struct ve_struct *ve)
++{
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	addrconf_sysctl_fini(ve);
++#endif
++	devinet_sysctl_fini(ve);
++	unregister_ve_sysctltables(ve);
++	remove_proc_entry("sys", NULL);
++}
++
++static void free_ve_sysctl(struct ve_struct *ve)
++{
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	addrconf_sysctl_free(ve);
++#endif
++	devinet_sysctl_free(ve);
++	free_ve_sysctltables(ve);
++}
++#else
++#define init_ve_sysctl(ve)	(0)
++#define fini_ve_sysctl(ve)	do { } while (0)
++#define free_ve_sysctl(ve)	do { } while (0)
++#endif
++
++#ifdef CONFIG_UNIX98_PTYS
++#include <linux/devpts_fs.h>
++
++static int init_ve_devpts(struct ve_struct *ve)
++{
++	int err;
++
++	err = -ENOMEM;
++	ve->devpts_config = kmalloc(sizeof(struct devpts_config), GFP_KERNEL);
++	if (ve->devpts_config == NULL)
++		goto out;
++	memset(ve->devpts_config, 0, sizeof(struct devpts_config));
++	ve->devpts_config->mode = 0600;
++	err = register_ve_fs_type(ve, &devpts_fs_type,
++			&ve->devpts_fstype, &ve->devpts_mnt);
++	if (err) {
++		kfree(ve->devpts_config);
++		ve->devpts_config = NULL;
++	}
++out:
++	return err;
++}
++
++static void fini_ve_devpts(struct ve_struct *ve)
++{
++	unregister_ve_fs_type(ve->devpts_fstype, ve->devpts_mnt);
++	/* devpts_fstype is freed in real_put_ve -> free_ve_filesystems */
++	ve->devpts_mnt = NULL;
++	kfree(ve->devpts_config);
++	ve->devpts_config = NULL;
++}
++#else
++#define init_ve_devpts(ve)	(0)
++#define fini_ve_devpts(ve)	do { } while (0)
++#endif
++
++static int init_ve_shmem(struct ve_struct *ve)
++{
++	return register_ve_fs_type(ve,
++				   &tmpfs_fs_type,
++				   &ve->shmem_fstype,
++				   &ve->shmem_mnt);
++}
++
++static void fini_ve_shmem(struct ve_struct *ve)
++{
++	unregister_ve_fs_type(ve->shmem_fstype, ve->shmem_mnt);
++	/* shmem_fstype is freed in real_put_ve -> free_ve_filesystems */
++	ve->shmem_mnt = NULL;
++}
++
++static inline int init_ve_sysfs_root(struct ve_struct *ve)
++{
++	struct sysfs_dirent *sysfs_root;
++
++	sysfs_root = kmalloc(sizeof(struct sysfs_dirent), GFP_KERNEL);
++	if (sysfs_root == NULL)
++		return -ENOMEM;
++
++	memset(sysfs_root, 0, sizeof(struct sysfs_dirent));
++	INIT_LIST_HEAD(&sysfs_root->s_sibling);
++	INIT_LIST_HEAD(&sysfs_root->s_children);
++	sysfs_root->s_type = SYSFS_ROOT;
++	ve->sysfs_root = sysfs_root;
++	return 0;
++}
++
++static int init_ve_sysfs(struct ve_struct *ve)
++{
++	struct subsystem *subsys;
++	struct class *nc;
++	int err;
++	extern struct subsystem class_obj_subsys;
++	extern struct subsystem class_subsys;
++	extern struct class net_class;
++
++#ifdef CONFIG_SYSFS
++	err = 0;
++	if (ve->features & VE_FEATURE_SYSFS) {
++		err = init_ve_sysfs_root(ve);
++		if (err != 0)
++			goto out;
++		err = register_ve_fs_type(ve,
++				   &sysfs_fs_type,
++				   &ve->sysfs_fstype,
++				   &ve->sysfs_mnt);
++	}
++	if (err != 0)
++		goto out_fs_type;
++#endif
++	err = -ENOMEM;
++	subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
++	if (subsys == NULL)
++		goto out_class_obj;
++	/* ick, this is ugly, the things we go through to keep from showing up
++	 * in sysfs... */
++	memset(subsys, 0, sizeof(*subsys));
++	memcpy(&subsys->kset.kobj.name, &class_obj_subsys.kset.kobj.name,
++			sizeof(subsys->kset.kobj.name));
++	subsys->kset.ktype = class_obj_subsys.kset.ktype;
++	subsys->kset.uevent_ops = class_obj_subsys.kset.uevent_ops;
++	subsystem_init(subsys);
++	if (!subsys->kset.subsys)
++			subsys->kset.subsys = subsys;
++	ve->class_obj_subsys = subsys;
++
++	err = -ENOMEM;
++	subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
++	if (subsys == NULL)
++		goto out_class_subsys;
++	/* ick, this is ugly, the things we go through to keep from showing up
++	 * in sysfs... */
++	memset(subsys, 0, sizeof(*subsys));
++	memcpy(&subsys->kset.kobj.name, &class_subsys.kset.kobj.name,
++			sizeof(subsys->kset.kobj.name));
++	subsys->kset.ktype = class_subsys.kset.ktype;
++	subsys->kset.uevent_ops = class_subsys.kset.uevent_ops;
++	ve->class_subsys = subsys;
++	err = subsystem_register(subsys);
++	if (err != 0)
++		goto out_register;
++
++	err = -ENOMEM;
++	nc = kmalloc(sizeof(*nc), GFP_KERNEL);
++	if (nc == NULL)
++		goto out_nc;
++	memset(nc, 0, sizeof(*nc));
++	nc->name = net_class.name;
++	nc->release = net_class.release;
++	nc->uevent = net_class.uevent;
++	err = class_register(nc);
++	if (err != 0)
++		goto out_class_register;
++	ve->net_class = nc;
++
++	return err;
++
++out_class_register:
++	kfree(nc);
++out_nc:
++	subsystem_unregister(subsys);
++out_register:
++	kfree(ve->class_subsys);
++out_class_subsys:
++	kfree(ve->class_obj_subsys);
++out_class_obj:
++#ifdef CONFIG_SYSFS
++	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
++	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
++out_fs_type:
++	kfree(ve->sysfs_root);
++	ve->sysfs_root = NULL;
++#endif
++	ve->class_subsys = NULL;
++	ve->class_obj_subsys = NULL;
++out:
++	return err;
++}
++
++static void fini_ve_sysfs(struct ve_struct *ve)
++{
++	class_unregister(ve->net_class);
++	subsystem_unregister(ve->class_subsys);
++
++	kfree(ve->net_class);
++	kfree(ve->class_subsys);
++	kfree(ve->class_obj_subsys);
++
++	ve->net_class = NULL;
++	ve->class_subsys = NULL;
++	ve->class_obj_subsys = NULL;
++#ifdef CONFIG_SYSFS
++	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
++	ve->sysfs_mnt = NULL;
++	kfree(ve->sysfs_root);
++	ve->sysfs_root = NULL;
++	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
++#endif
++}
++
++static void free_ve_filesystems(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSFS
++	kfree(ve->sysfs_fstype);
++	ve->sysfs_fstype = NULL;
++#endif
++	kfree(ve->shmem_fstype);
++	ve->shmem_fstype = NULL;
++
++	kfree(ve->devpts_fstype);
++	ve->devpts_fstype = NULL;
++
++	free_ve_proc(ve);
++}
++
++static int init_printk(struct ve_struct *ve)
++{
++	struct ve_prep_printk {
++		wait_queue_head_t       log_wait;
++		unsigned long           log_start;
++		unsigned long           log_end;
++		unsigned long           logged_chars;
++	} *tmp;
++
++	tmp = kmalloc(sizeof(struct ve_prep_printk), GFP_KERNEL);
++	if (!tmp)
++		return -ENOMEM;
++	memset(tmp, 0, sizeof(struct ve_prep_printk));
++	init_waitqueue_head(&tmp->log_wait);
++	ve->_log_wait = &tmp->log_wait;
++	ve->_log_start = &tmp->log_start;
++	ve->_log_end = &tmp->log_end;
++	ve->_logged_chars = &tmp->logged_chars;
++	/* ve->log_buf will be initialized later by ve_log_init() */
++	return 0;
++}
++
++static void fini_printk(struct ve_struct *ve)
++{
++	/* 
++	 * there is no spinlock protection here because nobody can use
++	 * log_buf at the moments when this code is called. 
++	 */
++	kfree(ve->log_buf);
++	kfree(ve->_log_wait);
++}
++
++static void fini_venet(struct ve_struct *ve)
++{
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	tcp_v4_kill_ve_sockets(ve);
++#endif
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	ve_mapped_devs_cleanup(ve);
++#endif
++}
++
++static int init_ve_sched(struct ve_struct *ve)
++{
++#ifdef CONFIG_FAIRSCHED
++	int err;
++
++	/*
++	 * We refuse to switch to an already existing node since nodes
++	 * keep a pointer to their ve_struct...
++	 */
++	err = sys_fairsched_mknod(0, 1, ve->veid);
++	if (err < 0) {
++		printk(KERN_WARNING "Can't create fairsched node %d\n",
++				ve->veid);
++		return err;
++	}
++	err = sys_fairsched_mvpr(current->pid, ve->veid);
++	if (err) {
++		printk(KERN_WARNING "Can't switch to fairsched node %d\n",
++				ve->veid);
++		if (sys_fairsched_rmnod(ve->veid))
++			printk(KERN_ERR "Can't clean fairsched node %d\n",
++					ve->veid);
++		return err;
++	}
++#endif
++	ve_sched_attach(ve);
++	return 0;
++}
++
++static void fini_ve_sched(struct ve_struct *ve)
++{
++#ifdef CONFIG_FAIRSCHED
++	if (task_vsched_id(current) == ve->veid)
++		if (sys_fairsched_mvpr(current->pid, fairsched_init_node.id))
++			printk(KERN_WARNING "Can't leave fairsched node %d\n",
++					ve->veid);
++	if (sys_fairsched_rmnod(ve->veid))
++		printk(KERN_ERR "Can't remove fairsched node %d\n",
++				ve->veid);
++#endif
++}
++
++static int init_ve_struct(struct ve_struct *ve, envid_t veid,
++		u32 class_id, env_create_param_t *data,
++		struct task_struct *init_tsk)
++{
++	int n;
++
++	memset(ve, 0, sizeof(*ve));
++	(void)get_ve(ve);
++	ve->veid = veid;
++	ve->class_id = class_id;
++	ve->init_entry = init_tsk;
++	ve->features = data->feature_mask;
++	INIT_LIST_HEAD(&ve->vetask_lh);
++	init_rwsem(&ve->op_sem);
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	ve->ifindex = -1;
++#endif
++
++	for(n = 0; n < UIDHASH_SZ_VE; ++n)
++		INIT_LIST_HEAD(&ve->uidhash_table[n]);
++
++	do_posix_clock_monotonic_gettime(&ve->start_timespec);
++	ve->start_jiffies = jiffies;
++	ve->start_cycles = get_cycles();
++	ve->virt_pids = glob_virt_pids;
++
++	return 0;
++}
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * /proc/meminfo virtualization
++ *
++ **********************************************************************
++ **********************************************************************/
++static int ve_set_meminfo(envid_t veid, unsigned long val)
++{
++	struct ve_struct *ve;
++
++	ve = get_ve_by_id(veid);
++	if (!ve)
++		return -EINVAL;
++
++	ve->meminfo_val = val;
++	real_put_ve(ve);
++	return 0;
++}
++
++static int init_ve_meminfo(struct ve_struct *ve)
++{
++	ve->meminfo_val = 0;
++	return 0;
++}
++
++static inline void fini_ve_meminfo(struct ve_struct *ve)
++{
++}
++
++static void set_ve_root(struct ve_struct *ve, struct task_struct *tsk)
++{
++	read_lock(&tsk->fs->lock);
++	ve->fs_rootmnt = tsk->fs->rootmnt;
++	ve->fs_root = tsk->fs->root;
++	read_unlock(&tsk->fs->lock);
++	mark_tree_virtual(ve->fs_rootmnt, ve->fs_root);
++}
++
++static void set_ve_caps(struct ve_struct *ve, struct task_struct *tsk)
++{
++	/* required for real_setdevperms from register_ve_<fs> above */
++	memcpy(&ve->cap_default, &tsk->cap_effective, sizeof(kernel_cap_t));
++	cap_lower(ve->cap_default, CAP_SETVEID);
++}
++
++static int ve_list_add(struct ve_struct *ve)
++{
++	write_lock_irq(&ve_list_guard);
++	if (__find_ve_by_id(ve->veid) != NULL)
++		goto err_exists;
++
++	ve->prev = NULL;
++	ve->next = ve_list_head;
++	if (ve_list_head)
++		ve_list_head->prev = ve;
++	ve_list_head = ve;
++	nr_ve++;
++	write_unlock_irq(&ve_list_guard);
++	return 0;
++
++err_exists:
++	write_unlock_irq(&ve_list_guard);
++	return -EEXIST;
++}
++
++static void ve_list_del(struct ve_struct *ve)
++{
++	write_lock_irq(&ve_list_guard);
++	if (ve->prev)
++		ve->prev->next = ve->next;
++	else
++		ve_list_head = ve->next;
++	if (ve->next)
++		ve->next->prev = ve->prev;
++	nr_ve--;
++	write_unlock_irq(&ve_list_guard);
++}
++
++static void set_task_ve_caps(struct task_struct *tsk, struct ve_struct *ve)
++{
++	spin_lock(&task_capability_lock);
++	cap_mask(tsk->cap_effective, ve->cap_default);
++	cap_mask(tsk->cap_inheritable, ve->cap_default);
++	cap_mask(tsk->cap_permitted, ve->cap_default);
++	spin_unlock(&task_capability_lock);
++}
++
++static void move_task(struct task_struct *tsk, struct ve_struct *new,
++		struct ve_struct *old)
++{
++	/* this probihibts ptracing of task entered to VPS from host system */
++	tsk->mm->vps_dumpable = 0;
++	/* setup capabilities before enter */
++	set_task_ve_caps(tsk, new);
++
++	write_lock_irq(&tasklist_lock);
++	VE_TASK_INFO(tsk)->owner_env = new;
++	VE_TASK_INFO(tsk)->exec_env = new;
++	REMOVE_VE_LINKS(tsk);
++	SET_VE_LINKS(tsk);
++
++	atomic_dec(&old->pcounter);
++	atomic_inc(&new->pcounter);
++	real_put_ve(old);
++	get_ve(new);
++	write_unlock_irq(&tasklist_lock);
++}
++
++#ifdef CONFIG_VE_IPTABLES
++extern int init_netfilter(void);
++extern void fini_netfilter(void);
++#define init_ve_netfilter()	init_netfilter()
++#define fini_ve_netfilter()	fini_netfilter()
++
++#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args)	\
++({								\
++	int ret = 0;						\
++	if (VE_IPT_CMP(mask, full_mask) &&			\
++		VE_IPT_CMP((ve)->_iptables_modules, 		\
++			full_mask & ~(full_mask##_MOD))) {	\
++		ret = KSYMERRCALL(1, mod, name, args);		\
++		if (ret == 0)					\
++			(ve)->_iptables_modules |=		\
++					full_mask##_MOD;	\
++		if (ret == 1)					\
++			ret = 0;				\
++	}							\
++	ret;							\
++})
++
++#define KSYMIPTFINI(mask, full_mask, mod, name, args)		\
++({								\
++ 	if (VE_IPT_CMP(mask, full_mask##_MOD))			\
++		KSYMSAFECALL_VOID(mod, name, args);		\
++})
++
++
++static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
++		int init_or_cleanup)
++{
++	int err;
++
++	err = 0;
++	if (!init_or_cleanup)
++		goto cleanup;
++
++	/* init part */
++#if defined(CONFIG_NETFILTER_XTABLES) || \
++    defined(CONFIG_NETFILTER_XTABLES_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
++			x_tables, init_xtables, ());
++	if (err < 0)
++		goto err_xtables;
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
++			xt_tcpudp, init_xt_tcpudp, ());
++	if (err < 0)
++		goto err_xt_tcpudp;
++#endif
++#if defined(CONFIG_IP_NF_IPTABLES) || \
++    defined(CONFIG_IP_NF_IPTABLES_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
++			ip_tables, init_iptables, ());
++	if (err < 0)
++		goto err_iptables;
++#endif
++#if defined(CONFIG_IP6_NF_IPTABLES) || \
++    defined(CONFIG_IP6_NF_IPTABLES_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
++			ip6_tables, init_ip6tables, ());
++	if (err < 0)
++		goto err_ip6tables;
++#endif
++#if defined(CONFIG_IP_NF_CONNTRACK) || \
++    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
++			ip_conntrack, init_iptable_conntrack, ());
++	if (err < 0)
++		goto err_iptable_conntrack;
++#endif
++#if defined(CONFIG_IP_NF_FTP) || \
++    defined(CONFIG_IP_NF_FTP_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_FTP,
++			ip_conntrack_ftp, init_iptable_ftp, ());
++	if (err < 0)
++		goto err_iptable_ftp;
++#endif
++#if defined(CONFIG_IP_NF_IRC) || \
++    defined(CONFIG_IP_NF_IRC_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_IRC,
++			ip_conntrack_irc, init_iptable_irc, ());
++	if (err < 0)
++		goto err_iptable_irc;
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_CONNTRACK,
++			xt_conntrack, init_xt_conntrack_match, ());
++	if (err < 0)
++		goto err_xt_conntrack_match;
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_STATE) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_STATE_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_STATE,
++			xt_state, init_xt_state, ());
++	if (err < 0)
++		goto err_xt_state;
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_HELPER) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_HELPER_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_HELPER,
++			xt_helper, init_xt_helper, ());
++	if (err < 0)
++		goto err_xt_helper;
++#endif
++#if defined(CONFIG_IP_NF_NAT) || \
++    defined(CONFIG_IP_NF_NAT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
++			ip_nat, ip_nat_init, ());
++	if (err < 0)
++		goto err_iptable_nat;
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
++			iptable_nat, init_iptable_nat, ());
++	if (err < 0)
++		goto err_iptable_nat2;
++#endif
++#if defined(CONFIG_IP_NF_NAT_FTP) || \
++    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_FTP,
++			ip_nat_ftp, init_iptable_nat_ftp, ());
++	if (err < 0)
++		goto err_iptable_nat_ftp;
++#endif
++#if defined(CONFIG_IP_NF_NAT_IRC) || \
++    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_IRC,
++			ip_nat_irc, init_iptable_nat_irc, ());
++	if (err < 0)
++		goto err_iptable_nat_irc;
++#endif
++#if defined(CONFIG_IP_NF_FILTER) || \
++    defined(CONFIG_IP_NF_FILTER_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
++			iptable_filter,	init_iptable_filter, ());
++	if (err < 0)
++		goto err_iptable_filter;
++#endif
++#if defined(CONFIG_IP6_NF_FILTER) || \
++    defined(CONFIG_IP6_NF_FILTER_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
++			ip6table_filter, init_ip6table_filter, ());
++	if (err < 0)
++		goto err_ip6table_filter;
++#endif
++#if defined(CONFIG_IP_NF_MANGLE) || \
++    defined(CONFIG_IP_NF_MANGLE_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
++			iptable_mangle,	init_iptable_mangle, ());
++	if (err < 0)
++		goto err_iptable_mangle;
++#endif
++#if defined(CONFIG_IP6_NF_MANGLE) || \
++    defined(CONFIG_IP6_NF_MANGLE_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
++			ip6table_mangle, init_ip6table_mangle, ());
++	if (err < 0)
++		goto err_ip6table_mangle;
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_LIMIT) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_LIMIT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LIMIT,
++			xt_limit, init_xt_limit, ());
++	if (err < 0)
++		goto err_xt_limit;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
++    defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_MULTIPORT,
++			ipt_multiport, init_iptable_multiport, ());
++	if (err < 0)
++		goto err_iptable_multiport;
++#endif
++#if defined(CONFIG_IP6_NF_MATCH_MULTIPORT) || \
++    defined(CONFIG_IP6_NF_MATCH_MULTIPORT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_MULTIPORT,
++			ip6t_multiport, init_ip6table_multiport, ());
++	if (err < 0)
++		goto err_ip6table_multiport;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TOS) || \
++    defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TOS,
++			ipt_tos, init_iptable_tos, ());
++	if (err < 0)
++		goto err_iptable_tos;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TOS) || \
++    defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TOS,
++			ipt_TOS, init_iptable_TOS, ());
++	if (err < 0)
++		goto err_iptable_TOS;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
++    defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REJECT,
++			ipt_REJECT, init_iptable_REJECT, ());
++	if (err < 0)
++		goto err_iptable_REJECT;
++#endif
++#if defined(CONFIG_IP6_NF_TARGET_REJECT) || \
++    defined(CONFIG_IP6_NF_TARGET_REJECT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REJECT,
++			ip6t_REJECT, init_ip6table_REJECT, ());
++	if (err < 0)
++		goto err_ip6table_REJECT;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
++    defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TCPMSS,
++			ipt_TCPMSS, init_iptable_TCPMSS, ());
++	if (err < 0)
++		goto err_iptable_TCPMSS;
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TCPMSS,
++			xt_tcpmss, init_xt_tcpmss, ());
++	if (err < 0)
++		goto err_xt_tcpmss;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TTL) || \
++    defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TTL,
++			ipt_ttl, init_iptable_ttl, ());
++	if (err < 0)
++		goto err_iptable_ttl;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_LOG) || \
++    defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_LOG,
++			ipt_LOG, init_iptable_LOG, ());
++	if (err < 0)
++		goto err_iptable_LOG;
++#endif
++#if defined(CONFIG_IP6_NF_TARGET_LOG) || \
++    defined(CONFIG_IP6_NF_TARGET_LOG_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_LOG,
++			ip6t_LOG, init_ip6table_LOG, ());
++	if (err < 0)
++		goto err_ip6table_LOG;
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_LENGTH) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_LENGTH_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LENGTH,
++			xt_length, init_xt_length, ());
++	if (err < 0)
++		goto err_xt_length;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
++    defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REDIRECT,
++			ipt_REDIRECT, init_iptable_REDIRECT, ());
++	if (err < 0)
++		goto err_iptable_REDIRECT;
++#endif
++	return 0;
++
++/* ------------------------------------------------------------------------- */
++
++cleanup:
++#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
++    defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REDIRECT,
++			ipt_REDIRECT, fini_iptable_REDIRECT, ());
++err_iptable_REDIRECT:
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_LENGTH) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_LENGTH_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LENGTH,
++			xt_length, fini_xt_length, ());
++err_xt_length:
++#endif
++#if defined(CONFIG_IP6_NF_TARGET_LOG) || \
++    defined(CONFIG_IP6_NF_TARGET_LOG_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_LOG,
++			ip6t_LOG, fini_ip6table_LOG, ());
++err_ip6table_LOG:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_LOG) || \
++    defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_LOG,
++			ipt_LOG, fini_iptable_LOG, ());
++err_iptable_LOG:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TTL) || \
++    defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TTL,
++			ipt_ttl, fini_iptable_ttl, ());
++err_iptable_ttl:
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TCPMSS,
++			xt_tcpmss, fini_xt_tcpmss, ());
++err_xt_tcpmss:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
++    defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TCPMSS,
++			ipt_TCPMSS, fini_iptable_TCPMSS, ());
++err_iptable_TCPMSS:
++#endif
++#if defined(CONFIG_IP6_NF_TARGET_REJECT) || \
++    defined(CONFIG_IP6_NF_TARGET_REJECT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REJECT,
++			ip6t_REJECT, fini_ip6table_REJECT, ());
++err_ip6table_REJECT:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
++    defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REJECT,
++			ipt_REJECT, fini_iptable_REJECT, ());
++err_iptable_REJECT:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TOS) || \
++    defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TOS,
++			ipt_TOS, fini_iptable_TOS, ());
++err_iptable_TOS:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TOS) || \
++    defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TOS,
++			ipt_tos, fini_iptable_tos, ());
++err_iptable_tos:
++#endif
++#if defined(CONFIG_IP6_NF_MATCH_MULTIPORT) || \
++    defined(CONFIG_IP6_NF_MATCH_MULTIPORT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_MULTIPORT,
++			ip6t_multiport, fini_ip6table_multiport, ());
++err_ip6table_multiport:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
++    defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_MULTIPORT,
++			ipt_multiport, fini_iptable_multiport, ());
++err_iptable_multiport:
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_LIMIT) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_LIMIT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LIMIT,
++			xt_limit, fini_xt_limit, ());
++err_xt_limit:
++#endif
++#if defined(CONFIG_IP6_NF_MANGLE) || \
++    defined(CONFIG_IP6_NF_MANGLE_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
++			ip6table_mangle, fini_ip6table_mangle, ());
++err_ip6table_mangle:
++#endif
++#if defined(CONFIG_IP_NF_MANGLE) || \
++    defined(CONFIG_IP_NF_MANGLE_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
++			iptable_mangle,	fini_iptable_mangle, ());
++err_iptable_mangle:
++#endif
++#if defined(CONFIG_IP6_NF_FILTER) || \
++    defined(CONFIG_IP6_NF_FILTER_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
++			ip6table_filter, fini_ip6table_filter, ());
++err_ip6table_filter:
++#endif
++#if defined(CONFIG_IP_NF_FILTER) || \
++    defined(CONFIG_IP_NF_FILTER_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
++			iptable_filter,	fini_iptable_filter, ());
++err_iptable_filter:
++#endif
++#if defined(CONFIG_IP_NF_NAT_IRC) || \
++    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_IRC,
++			ip_nat_irc, fini_iptable_nat_irc, ());
++err_iptable_nat_irc:
++#endif
++#if defined(CONFIG_IP_NF_NAT_FTP) || \
++    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_FTP,
++			ip_nat_ftp, fini_iptable_nat_ftp, ());
++err_iptable_nat_ftp:
++#endif
++#if defined(CONFIG_IP_NF_NAT) || \
++    defined(CONFIG_IP_NF_NAT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
++			iptable_nat, fini_iptable_nat, ());
++err_iptable_nat2:
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
++			ip_nat, ip_nat_cleanup, ());
++err_iptable_nat:
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_HELPER) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_HELPER_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_HELPER,
++			xt_helper, fini_xt_helper, ());
++err_xt_helper:
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_STATE) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_STATE_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_STATE,
++			xt_state, fini_xt_state, ());
++err_xt_state:
++#endif
++#if defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_CONNTRACK,
++			xt_conntrack, fini_xt_conntrack_match, ());
++err_xt_conntrack_match:
++#endif
++#if defined(CONFIG_IP_NF_IRC) || \
++    defined(CONFIG_IP_NF_IRC_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_IRC,
++			ip_conntrack_irc, fini_iptable_irc, ());
++err_iptable_irc:
++#endif
++#if defined(CONFIG_IP_NF_FTP) || \
++    defined(CONFIG_IP_NF_FTP_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_FTP,
++			ip_conntrack_ftp, fini_iptable_ftp, ());
++err_iptable_ftp:
++#endif
++#if defined(CONFIG_IP_NF_CONNTRACK) || \
++    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
++			ip_conntrack, fini_iptable_conntrack, ());
++err_iptable_conntrack:
++#endif
++#if defined(CONFIG_IP6_NF_IPTABLES) || \
++    defined(CONFIG_IP6_NF_IPTABLES_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
++			ip6_tables, fini_ip6tables, ());
++err_ip6tables:
++#endif
++#if defined(CONFIG_IP_NF_IPTABLES) || \
++    defined(CONFIG_IP_NF_IPTABLES_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
++			ip_tables, fini_iptables, ());
++err_iptables:
++#endif
++#if defined(CONFIG_NETFILTER_XTABLES) || \
++    defined(CONFIG_NETFILTER_XTABLES_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
++			xt_tcpudp, fini_xt_tcpudp, ());
++err_xt_tcpudp:
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
++			x_tables, fini_xtables, ());
++err_xtables:
++#endif
++	ve->_iptables_modules = 0;
++
++	return err;
++}
++
++static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
++{
++	return do_ve_iptables(ve, init_mask, 1);
++}
++
++static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
++{
++	(void)do_ve_iptables(ve, init_mask, 0);
++}
++
++static void flush_ve_iptables(struct ve_struct *ve)
++{
++	/*
++	 * flush all rule tables first,
++	 * this helps us to avoid refs to freed objs
++	 */
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE, ip_tables,
++			ipt_flush_table, (ve->_ipt_mangle_table));
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE, ip6_tables,
++			ip6t_flush_table, (ve->_ip6t_mangle_table));
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER, ip_tables,
++			ipt_flush_table, (ve->_ve_ipt_filter_pf));
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER, ip6_tables,
++			ip6t_flush_table, (ve->_ve_ip6t_filter_pf));
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT, ip_tables,
++			ipt_flush_table, (ve->_ip_conntrack->_ip_nat_table));
++}
++#else
++#define init_ve_iptables(x, y)	(0)
++#define fini_ve_iptables(x, y)	do { } while (0)
++#define flush_ve_iptables(x)	do { } while (0)
++#define init_ve_netfilter()	(0)
++#define fini_ve_netfilter()	do { } while (0)
++#endif
++
++static struct list_head ve_hooks[VE_MAX_HOOKS];
++static DECLARE_RWSEM(ve_hook_sem);
++
++int ve_hook_register(struct ve_hook *vh)
++{
++	struct list_head *lh;
++	struct ve_hook *tmp;
++
++	down_write(&ve_hook_sem);
++	list_for_each(lh, &ve_hooks[vh->hooknum]) {
++		tmp = list_entry(lh, struct ve_hook, list);
++		if (vh->priority < tmp->priority)
++			break;
++	}
++	list_add_tail(&vh->list, lh);
++	up_write(&ve_hook_sem);
++	return 0;
++}
++EXPORT_SYMBOL(ve_hook_register);
++
++void ve_hook_unregister(struct ve_hook *vh)
++{
++	down_write(&ve_hook_sem);
++	list_del(&vh->list);
++	up_write(&ve_hook_sem);
++}
++EXPORT_SYMBOL(ve_hook_unregister);
++
++static int ve_hook_iterate(unsigned int hooknum, void *data)
++{
++	struct ve_hook *vh;
++	int err;
++
++	err = 0;
++	down_read(&ve_hook_sem);
++	list_for_each_entry(vh, &ve_hooks[hooknum], list) {
++		if (!try_module_get(vh->owner))
++			continue;
++		err = vh->hook(hooknum, data);
++		module_put(vh->owner);
++		if (err)
++			break;
++	}
++
++	if (err) {
++		list_for_each_entry_continue_reverse(vh,
++					&ve_hooks[hooknum], list) {
++			if (!try_module_get(vh->owner))
++				continue;
++			if (vh->undo)
++				vh->undo(hooknum, data);
++			module_put(vh->owner);
++		}
++	}
++	up_read(&ve_hook_sem);
++	return err;
++}
++
++static void ve_hook_iterate_cleanup(unsigned int hooknum, void *data)
++{
++	struct ve_hook *vh;
++
++	down_read(&ve_hook_sem);
++	list_for_each_entry_reverse(vh, &ve_hooks[hooknum], list) {
++		if (!try_module_get(vh->owner))
++			continue;
++		(void)vh->hook(hooknum, data);
++		module_put(vh->owner);
++	}
++	up_read(&ve_hook_sem);
++}
++
++static int do_env_create(envid_t veid, unsigned int flags, u32 class_id,
++			 env_create_param_t *data, int datalen)
++{
++	struct task_struct *tsk;
++	struct ve_struct *old;
++	struct ve_struct *old_exec;
++	struct ve_struct *ve;
++ 	__u64 init_mask;
++	int err;
++
++	tsk = current;
++	old = VE_TASK_INFO(tsk)->owner_env;
++
++	if (!thread_group_leader(tsk))
++		return -EINVAL;
++
++	if (tsk->signal->tty) {
++		printk("ERR: VE init has controlling terminal\n");
++		return -EINVAL;
++	}
++	if (tsk->signal->pgrp != tsk->pid || tsk->signal->session != tsk->pid) {
++		int may_setsid;
++		read_lock(&tasklist_lock);
++		may_setsid = (find_pid(PIDTYPE_PGID, tsk->pid) == NULL);
++		read_unlock(&tasklist_lock);
++		if (!may_setsid) {
++			printk("ERR: VE init is process group leader\n");
++			return -EINVAL;
++		}
++	}
++
++
++	VZTRACE("%s: veid=%d classid=%d pid=%d\n",
++		__FUNCTION__, veid, class_id, current->pid);
++
++	err = -ENOMEM;
++	ve = kmalloc(sizeof(struct ve_struct), GFP_KERNEL);
++	if (ve == NULL)
++		goto err_struct;
++
++	init_ve_struct(ve, veid, class_id, data, tsk);
++	__module_get(THIS_MODULE);
++	down_write(&ve->op_sem);
++	if (flags & VE_LOCK)
++		ve->is_locked = 1;
++	if ((err = ve_list_add(ve)) < 0)
++		goto err_exist;
++
++	/* this should be done before context switching */
++	if ((err = init_printk(ve)) < 0)
++		goto err_log_wait;
++
++	old_exec = set_exec_env(ve);
++
++	if ((err = init_ve_sched(ve)) < 0)
++		goto err_sched;
++
++	/* move user to VE */
++	if ((err = set_user(0, 0)) < 0)
++		goto err_set_user;
++
++	set_ve_root(ve, tsk);
++
++	if ((err = init_ve_utsname(ve)))
++		goto err_utsname;
++
++	if ((err = init_ve_mibs(ve)))
++		goto err_mibs;
++
++	if ((err = init_ve_proc(ve)))
++		goto err_proc;
++
++	if ((err = init_ve_sysctl(ve)))
++		goto err_sysctl;
++
++	if ((err = init_ve_sysfs(ve)))
++		goto err_sysfs;
++
++	if ((err = ve_arp_init(ve)) < 0)
++		goto err_route;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	if ((err = ve_ndisc_init(ve)) < 0)
++		goto err_route;
++#endif
++
++	if ((err = init_ve_route(ve)) < 0)
++		goto err_route;
++
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	if ((err = init_ve_route6(ve)) < 0)
++		goto err_route;
++#endif
++
++	if ((err = init_ve_netdev()))
++		goto err_dev;
++
++	if ((err = init_ve_tty_drivers(ve)) < 0)
++		goto err_tty;
++
++	if ((err = init_ve_shmem(ve)))
++		goto err_shmem;
++
++	if ((err = init_ve_devpts(ve)))
++		goto err_devpts;
++
++	if((err = init_ve_meminfo(ve)))
++		goto err_meminf;
++
++	/* init SYSV IPC variables */
++	if ((err = init_ve_ipc(ve)) < 0)
++		goto err_ipc;
++
++	set_ve_caps(ve, tsk);
++
++	/* It is safe to initialize netfilter here as routing initialization and
++	   interface setup will be done below. This means that NO skb can be
++	   passed inside. Den */
++	/* iptables ve initialization for non ve0;
++	   ve0 init is in module_init */
++	if ((err = init_ve_netfilter()) < 0)
++		goto err_netfilter;
++
++	init_mask = data ? data->iptables_mask : VE_IP_DEFAULT;
++	if ((err = init_ve_iptables(ve, init_mask)) < 0)
++		goto err_iptables;
++
++	if ((err = alloc_vpid(tsk->pid, 1)) < 0)
++		goto err_vpid;
++
++	if ((err = ve_hook_iterate(VE_HOOK_INIT, (void *)ve)) < 0)
++		goto err_ve_hook;
++
++	/* finally: set vpids and move inside */
++	move_task(tsk, ve, old);
++
++	set_virt_pid(tsk, 1);
++	set_virt_tgid(tsk, 1);
++
++	set_special_pids(tsk->pid, tsk->pid);
++	current->signal->tty_old_pgrp = 0;
++	set_virt_pgid(tsk, 1);
++	set_virt_sid(tsk, 1);
++
++	ve->is_running = 1;
++	up_write(&ve->op_sem);
++
++	printk(KERN_INFO "VPS: %d: started\n", veid);
++	return veid;
++
++err_ve_hook:
++	free_vpid(1, ve);
++err_vpid:
++	fini_venet(ve);
++	fini_ve_iptables(ve, init_mask);
++err_iptables:
++	fini_ve_netfilter();
++err_netfilter:
++	fini_ve_ipc(ve);
++err_ipc:
++	fini_ve_meminfo(ve);
++err_meminf:
++	fini_ve_devpts(ve);
++err_devpts:
++	fini_ve_shmem(ve);
++err_shmem:
++	fini_ve_tty_drivers(ve);
++err_tty:
++	fini_ve_netdev();
++err_dev:
++	fini_ve_route(ve);
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	fini_ve_route6(ve);
++#endif
++err_route:
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	ve_ndisc_fini(ve);
++#endif
++	ve_arp_fini(ve);
++	fini_ve_sysfs(ve);
++err_sysfs:
++	fini_ve_sysctl(ve);
++err_sysctl:
++	fini_ve_proc(ve);
++err_proc:
++	do_clean_devperms(ve->veid); /* register procfs adds devperms */
++	fini_ve_mibs(ve);
++err_mibs:
++	/* free_ve_utsname() is called inside real_put_ve() */ ;
++err_utsname:
++	/* It is safe to restore current->envid here because
++	 * ve_fairsched_detach does not use current->envid. */
++	/* Really fairsched code uses current->envid in sys_fairsched_mknod 
++	 * only.  It is correct if sys_fairsched_mknod is called from
++	 * userspace.  If sys_fairsched_mknod is called from
++	 * ve_fairsched_attach, then node->envid and node->parent_node->envid
++	 * are explicitly set to valid value after the call. */
++	/* FIXME */
++	VE_TASK_INFO(tsk)->owner_env = old;
++	VE_TASK_INFO(tsk)->exec_env = old_exec;
++	/* move user back */
++	if (set_user(0, 0) < 0)
++		printk(KERN_WARNING"Can't restore UID\n");
++
++err_set_user:
++	fini_ve_sched(ve);
++err_sched:
++	(void)set_exec_env(old_exec);
++
++	/* we can jump here having incorrect envid */
++	VE_TASK_INFO(tsk)->owner_env = old;
++	fini_printk(ve);
++err_log_wait:
++	ve_list_del(ve);
++	up_write(&ve->op_sem);
++
++	real_put_ve(ve);
++err_struct:
++	printk(KERN_INFO "VPS: %d: failed to start with err=%d\n", veid, err);
++	return err;
++
++err_exist:
++	kfree(ve);
++	goto err_struct;
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE start/stop callbacks
++ *
++ **********************************************************************
++ **********************************************************************/
++
++int real_env_create(envid_t veid, unsigned flags, u32 class_id,
++			env_create_param_t *data, int datalen)
++{
++	int status;
++	struct ve_struct *ve;
++
++	if (!flags) {
++		status = get_exec_env()->veid;
++		goto out;
++	}
++
++	status = -EPERM;
++	if (!capable(CAP_SETVEID))
++		goto out;
++
++	status = -EINVAL;
++	if ((flags & VE_TEST) && (flags & (VE_ENTER|VE_CREATE)))
++		goto out;
++
++	status = -EINVAL;
++	ve = get_ve_by_id(veid);
++	if (ve) {
++		if (flags & VE_TEST) {
++			status = 0;
++			goto out_put;
++		}
++		if (flags & VE_EXCLUSIVE) {
++			status = -EACCES;
++			goto out_put;
++		}
++		if (flags & VE_CREATE) {
++			flags &= ~VE_CREATE;
++			flags |= VE_ENTER;
++		}
++	} else {
++		if (flags & (VE_TEST|VE_ENTER)) {
++			status = -ESRCH;
++			goto out;
++		}
++	}
++
++	if (flags & VE_CREATE) {
++		status = do_env_create(veid, flags, class_id, data, datalen);
++		goto out;
++	} else if (flags & VE_ENTER)
++		status = do_env_enter(ve, flags);
++
++	/* else: returning EINVAL */
++
++out_put:
++	real_put_ve(ve);
++out:
++	return status;
++}
++
++static int do_env_enter(struct ve_struct *ve, unsigned int flags)
++{
++	struct task_struct *tsk = current;
++	int err;
++
++	VZTRACE("%s: veid=%d\n", __FUNCTION__, ve->veid);
++
++	err = -EBUSY;
++	down_read(&ve->op_sem);
++	if (!ve->is_running)
++		goto out_up;
++	if (ve->is_locked && !(flags & VE_SKIPLOCK))
++		goto out_up;
++
++#ifdef CONFIG_FAIRSCHED
++	err = sys_fairsched_mvpr(current->pid, ve->veid);
++	if (err)
++		goto out_up;
++#endif
++
++	ve_sched_attach(ve);
++	move_task(current, ve, VE_TASK_INFO(tsk)->owner_env);
++	err = VE_TASK_INFO(tsk)->owner_env->veid;
++
++out_up:
++	up_read(&ve->op_sem);
++	return err;
++}
++
++static void env_cleanup(struct ve_struct *ve)
++{
++	struct ve_struct *old_ve;
++
++	VZTRACE("real_do_env_cleanup\n");
++
++	down_read(&ve->op_sem);
++	old_ve = set_exec_env(ve);
++
++	ve_hook_iterate_cleanup(VE_HOOK_FINI, (void *)ve);
++
++	fini_venet(ve);
++
++	/* no new packets in flight beyond this point */
++	synchronize_net();
++	/* skb hold dst_entry, and in turn lies in the ip fragment queue */
++	ip_fragment_cleanup(ve);
++
++	fini_ve_netdev();
++	fini_ve_route(ve);
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	fini_ve_route6(ve);
++#endif
++	ve_arp_fini(ve);
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	ve_ndisc_fini(ve);
++#endif
++
++	/* kill iptables */
++	/* No skb belonging to VE can exist at this point as unregister_netdev
++	   is an operation awaiting until ALL skb's gone */
++	flush_ve_iptables(ve);
++	fini_ve_iptables(ve, ve->_iptables_modules);
++	fini_ve_netfilter();
++
++	ve_ipc_cleanup();
++
++	fini_ve_sched(ve);
++	do_clean_devperms(ve->veid);
++
++	fini_ve_devpts(ve);
++	fini_ve_shmem(ve);
++	fini_ve_sysfs(ve);
++	unregister_ve_tty_drivers(ve);
++	fini_ve_sysctl(ve);
++	fini_ve_proc(ve);
++	fini_ve_meminfo(ve);
++
++	fini_ve_mibs(ve);
++
++	(void)set_exec_env(old_ve);
++	fini_printk(ve);	/* no printk can happen in ve context anymore */
++
++	ve_list_del(ve);
++	up_read(&ve->op_sem);
++
++	real_put_ve(ve);
++}
++
++static struct list_head ve_cleanup_list;
++static spinlock_t ve_cleanup_lock;
++
++static DECLARE_COMPLETION(vzmond_complete);
++static struct task_struct *vzmond_thread;
++static volatile int stop_vzmond;
++
++void real_do_env_cleanup(struct ve_struct *ve)
++{
++	spin_lock(&ve_cleanup_lock);
++	list_add_tail(&ve->cleanup_list, &ve_cleanup_list);
++	spin_unlock(&ve_cleanup_lock);
++	wake_up_process(vzmond_thread);
++}
++
++static void do_pending_env_cleanups(void)
++{
++	struct ve_struct *ve;
++
++	spin_lock(&ve_cleanup_lock);
++	while (1) {
++		if (list_empty(&ve_cleanup_list) || need_resched())
++			break;
++		ve = list_entry(ve_cleanup_list.next, struct ve_struct,
++				cleanup_list);
++		list_del(&ve->cleanup_list);
++		spin_unlock(&ve_cleanup_lock);
++		env_cleanup(ve);
++		spin_lock(&ve_cleanup_lock);
++	}
++	spin_unlock(&ve_cleanup_lock);
++}
++
++static int have_pending_cleanups(void)
++{
++	return !list_empty(&ve_cleanup_list);
++}
++
++static int vzmond(void *arg)
++{
++	daemonize("vzmond");
++	vzmond_thread = current;
++	set_current_state(TASK_INTERRUPTIBLE);
++
++	while (!stop_vzmond) {
++		schedule();
++		try_to_freeze();
++		if (signal_pending(current))
++			flush_signals(current);
++
++		do_pending_env_cleanups();
++		set_current_state(TASK_INTERRUPTIBLE);
++		if (have_pending_cleanups())
++			__set_current_state(TASK_RUNNING);
++	}
++
++	__set_task_state(current, TASK_RUNNING);
++	complete_and_exit(&vzmond_complete, 0);
++}
++
++static int __init init_vzmond(void)
++{
++	INIT_LIST_HEAD(&ve_cleanup_list);
++	spin_lock_init(&ve_cleanup_lock);
++	stop_vzmond = 0;
++	return kernel_thread(vzmond, NULL, 0);
++}
++
++static void fini_vzmond(void)
++{
++	stop_vzmond = 1;
++	wake_up_process(vzmond_thread);
++	wait_for_completion(&vzmond_complete);
++	WARN_ON(!list_empty(&ve_cleanup_list));
++}
++
++void real_do_env_free(struct ve_struct *ve)
++{
++	VZTRACE("real_do_env_free\n");
++
++	ve_ipc_free(ve); /* free SYSV IPC resources */
++	free_ve_tty_drivers(ve);
++	free_ve_utsname(ve);
++	free_ve_sysctl(ve); /* free per ve sysctl data */
++	free_ve_filesystems(ve);
++	printk(KERN_INFO "VPS: %d: stopped\n", VEID(ve));
++	kfree(ve);
++
++	module_put(THIS_MODULE);
++}
++EXPORT_SYMBOL(real_do_env_free);
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE TTY handling
++ *
++ **********************************************************************
++ **********************************************************************/
++
++DCL_VE_OWNER(TTYDRV, struct tty_driver, owner_env)
++
++static struct tty_driver *alloc_ve_tty_driver(struct tty_driver *base,
++					   struct ve_struct *ve)
++{
++	size_t size;
++	struct tty_driver *driver;
++
++	driver = kmalloc(sizeof(struct tty_driver), GFP_KERNEL);
++	if (!driver)
++		goto out;
++
++	memcpy(driver, base, sizeof(struct tty_driver));
++
++	driver->driver_state = NULL;
++
++	size = base->num * 3 * sizeof(void *);
++	if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
++		void **p;
++		p = kmalloc(size, GFP_KERNEL);
++		if (!p)
++			goto out_free;
++		memset(p, 0, size);
++		driver->ttys = (struct tty_struct **)p;
++		driver->termios = (struct termios **)(p + driver->num);
++		driver->termios_locked = (struct termios **)(p + driver->num * 2);
++	} else {
++		driver->ttys = NULL;
++		driver->termios = NULL;
++		driver->termios_locked = NULL;
++	}
++
++	SET_VE_OWNER_TTYDRV(driver, ve);
++	driver->flags |= TTY_DRIVER_INSTALLED;
++
++	return driver;
++
++out_free:
++	kfree(driver);
++out:
++	return NULL;
++}
++
++static void free_ve_tty_driver(struct tty_driver *driver)
++{
++	if (!driver)
++		return;
++
++	clear_termios(driver);
++	kfree(driver->ttys);
++	kfree(driver);
++}
++
++static int alloc_ve_tty_drivers(struct ve_struct* ve)
++{
++#ifdef CONFIG_LEGACY_PTYS
++	/* Traditional BSD devices */
++	ve->pty_driver = alloc_ve_tty_driver(pty_driver, ve);
++	if (!ve->pty_driver)
++		goto out_mem;
++
++	ve->pty_slave_driver = alloc_ve_tty_driver(pty_slave_driver, ve);
++	if (!ve->pty_slave_driver)
++		goto out_mem;
++
++	ve->pty_driver->other       = ve->pty_slave_driver;
++	ve->pty_slave_driver->other = ve->pty_driver;
++#endif	
++
++#ifdef CONFIG_UNIX98_PTYS
++	ve->ptm_driver = alloc_ve_tty_driver(ptm_driver, ve);
++	if (!ve->ptm_driver)
++		goto out_mem;
++
++	ve->pts_driver = alloc_ve_tty_driver(pts_driver, ve);
++	if (!ve->pts_driver)
++		goto out_mem;
++
++	ve->ptm_driver->other = ve->pts_driver;
++	ve->pts_driver->other = ve->ptm_driver;
++
++	ve->allocated_ptys = kmalloc(sizeof(*ve->allocated_ptys), GFP_KERNEL);
++	if (!ve->allocated_ptys)
++		goto out_mem;
++	idr_init(ve->allocated_ptys);
++#endif
++	return 0;
++
++out_mem:
++	free_ve_tty_drivers(ve);
++	return -ENOMEM;
++}
++
++static void free_ve_tty_drivers(struct ve_struct* ve)
++{
++#ifdef CONFIG_LEGACY_PTYS
++	free_ve_tty_driver(ve->pty_driver);
++	free_ve_tty_driver(ve->pty_slave_driver);
++	ve->pty_driver = ve->pty_slave_driver = NULL;
++#endif	
++#ifdef CONFIG_UNIX98_PTYS
++	free_ve_tty_driver(ve->ptm_driver);
++	free_ve_tty_driver(ve->pts_driver);
++	kfree(ve->allocated_ptys);
++	ve->ptm_driver = ve->pts_driver = NULL;
++	ve->allocated_ptys = NULL;
++#endif
++}
++
++static inline void __register_tty_driver(struct tty_driver *driver)
++{
++	list_add(&driver->tty_drivers, &tty_drivers);
++}
++
++static inline void __unregister_tty_driver(struct tty_driver *driver)
++{
++	if (!driver)
++		return;
++	list_del(&driver->tty_drivers);
++}
++
++static int register_ve_tty_drivers(struct ve_struct* ve)
++{
++	write_lock_irq(&tty_driver_guard);
++#ifdef CONFIG_UNIX98_PTYS
++	__register_tty_driver(ve->ptm_driver);
++	__register_tty_driver(ve->pts_driver);
++#endif
++#ifdef CONFIG_LEGACY_PTYS
++	__register_tty_driver(ve->pty_driver);
++	__register_tty_driver(ve->pty_slave_driver);
++#endif	
++	write_unlock_irq(&tty_driver_guard);
++
++	return 0;
++}
++
++static void unregister_ve_tty_drivers(struct ve_struct* ve)
++{
++	VZTRACE("unregister_ve_tty_drivers\n");
++
++	write_lock_irq(&tty_driver_guard);
++	__unregister_tty_driver(ve->pty_driver);
++	__unregister_tty_driver(ve->pty_slave_driver);
++#ifdef CONFIG_UNIX98_PTYS
++	__unregister_tty_driver(ve->ptm_driver);
++	__unregister_tty_driver(ve->pts_driver);
++#endif
++	write_unlock_irq(&tty_driver_guard);
++}
++
++static int init_ve_tty_drivers(struct ve_struct *ve)
++{
++	int err;
++
++	if ((err = alloc_ve_tty_drivers(ve)))
++		goto err_ttyalloc;
++	if ((err = register_ve_tty_drivers(ve)))
++		goto err_ttyreg;
++	return 0;
++
++err_ttyreg:
++	free_ve_tty_drivers(ve);
++err_ttyalloc:
++	return err;
++}
++
++static void fini_ve_tty_drivers(struct ve_struct *ve)
++{
++	unregister_ve_tty_drivers(ve);
++	free_ve_tty_drivers(ve);
++}
++
++/*
++ * Free the termios and termios_locked structures because
++ * we don't want to get memory leaks when modular tty
++ * drivers are removed from the kernel.
++ */
++static void clear_termios(struct tty_driver *driver)
++{
++	int i;
++	struct termios *tp;
++
++	if (driver->termios == NULL)
++		return;
++	for (i = 0; i < driver->num; i++) {
++		tp = driver->termios[i];
++		if (tp) {
++			driver->termios[i] = NULL;
++			kfree(tp);
++		}
++		tp = driver->termios_locked[i];
++		if (tp) {
++			driver->termios_locked[i] = NULL;
++			kfree(tp);
++		}
++	}
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * Pieces of VE network
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#include <asm/uaccess.h>
++#include <net/sock.h>
++#include <linux/netlink.h>
++#include <linux/rtnetlink.h>
++#include <net/route.h>
++#include <net/ip_fib.h>
++#endif
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static void ve_del_ip_addrs(struct net_device *dev)
++{
++	struct in_device *in_dev;
++
++	in_dev = in_dev_get(dev);
++	if (in_dev == NULL)
++		return;
++
++	while (in_dev->ifa_list != NULL) {
++		inet_del_ifa(in_dev, &in_dev->ifa_list, 1);
++	}
++	in_dev_put(in_dev);
++}
++
++static void ve_del_ipv6_addrs(struct net_device *dev)
++{
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	addrconf_ifdown(dev, 2);
++#endif
++}
++
++static int ve_netdev_cleanup(struct net_device *dev, int to_ve)
++{
++	int err;
++
++	err = 0;
++	ve_del_ip_addrs(dev);
++	ve_del_ipv6_addrs(dev);
++	if ((dev->flags & IFF_UP) != 0)
++		err = dev_close(dev);
++	synchronize_net();
++	dev_shutdown(dev);
++	dev_mc_discard(dev);
++	free_divert_blk(dev);
++	synchronize_net();
++
++	if (to_ve)
++		dev->orig_mtu = dev->mtu;
++	else {
++		int rc = dev_set_mtu(dev, dev->orig_mtu);
++		if (err == 0)
++			err = rc;
++	}
++
++	return err;
++}
++
++static void __ve_dev_move(struct net_device *dev, struct ve_struct *ve_src,
++	struct ve_struct *ve_dst, struct user_beancounter *exec_ub)
++{
++	struct net_device **dp, *d;
++	struct user_beancounter *ub;
++
++	for (d = ve_src->_net_dev_base, dp = NULL; d != NULL; 
++	     dp = &d->next, d = d->next) {
++		if (d == dev) {
++			hlist_del(&dev->name_hlist);
++			hlist_del(&dev->index_hlist);
++			if (ve_src->_net_dev_tail == &dev->next)
++				ve_src->_net_dev_tail = dp;
++			if (dp)
++				*dp = dev->next;
++			dev->next = NULL;
++			break;
++		}
++	}
++	*ve_dst->_net_dev_tail = dev;
++	ve_dst->_net_dev_tail = &dev->next;
++	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, ve_dst));
++	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, ve_dst));
++	dev->owner_env = ve_dst;
++
++	ub = netdev_bc(dev)->exec_ub;
++	netdev_bc(dev)->exec_ub = get_beancounter(exec_ub);
++	put_beancounter(ub);
++}
++
++static int ve_dev_add(envid_t veid, char *dev_name)
++{
++	int err;
++	struct net_device *dev;
++	struct ve_struct *ve;
++	struct hlist_node *p;
++
++	dev = NULL;
++	err = -ESRCH;
++
++	ve = get_ve_by_id(veid);
++	if (ve == NULL)
++		goto out;
++
++	rtnl_lock();
++
++	read_lock(&dev_base_lock);
++	hlist_for_each(p, dev_name_hash(dev_name, get_ve0())) {
++		struct net_device *d = hlist_entry(p, struct net_device, 
++						   name_hlist);
++		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
++			dev = d;
++			break;
++		}
++	}
++	read_unlock(&dev_base_lock);
++	if (dev == NULL)
++		goto out_unlock;
++
++	err = -EPERM;
++	if (!ve_is_dev_movable(dev))
++		goto out_unlock;
++
++	err = -EINVAL;
++	if (dev->flags & (IFF_SLAVE|IFF_MASTER))
++		goto out_unlock;
++
++	ve_netdev_cleanup(dev, 1);
++
++	write_lock_bh(&dev_base_lock);
++	__ve_dev_move(dev, get_ve0(), ve, get_exec_ub());
++	write_unlock_bh(&dev_base_lock);
++
++	err = 0;
++
++out_unlock:
++	rtnl_unlock();
++	real_put_ve(ve);
++
++	if (dev == NULL)
++		printk(KERN_WARNING "Device %s not found\n", dev_name);
++
++out:
++	return err;
++}
++
++static int ve_dev_del(envid_t veid, char *dev_name)
++{
++	int err;
++	struct net_device *dev;
++	struct ve_struct *ve, *old_exec;
++	struct hlist_node *p;
++
++	dev = NULL;
++	err = -ESRCH;
++
++	ve = get_ve_by_id(veid);
++	if (ve == NULL)
++		goto out;
++
++	rtnl_lock();
++
++	read_lock(&dev_base_lock);
++	hlist_for_each(p, dev_name_hash(dev_name, ve)) {
++		struct net_device *d = hlist_entry(p, struct net_device, 
++						   name_hlist);
++		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
++			dev = d;
++			break;
++		}
++	}
++	read_unlock(&dev_base_lock);
++	if (dev == NULL)
++		goto out_unlock;
++
++	err = -EPERM;
++	if (!ve_is_dev_movable(dev))
++		goto out_unlock;
++
++	old_exec = set_exec_env(ve);
++	ve_netdev_cleanup(dev, 0);
++	(void)set_exec_env(old_exec);
++
++	write_lock_bh(&dev_base_lock);
++	__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
++	write_unlock_bh(&dev_base_lock);
++
++	err = 0;
++
++out_unlock:
++	rtnl_unlock();
++	real_put_ve(ve);
++
++	if (dev == NULL)
++		printk(KERN_WARNING "Device %s not found\n", dev_name);
++
++out:
++	return err;
++}
++
++int real_ve_dev_map(envid_t veid, int op, char *dev_name)
++{
++	int err;
++	err = -EPERM;
++	if (!capable(CAP_SETVEID))
++		goto out;
++	switch (op)
++	{
++		case VE_NETDEV_ADD:
++			err = ve_dev_add(veid, dev_name);
++			break;
++		case VE_NETDEV_DEL:
++			err = ve_dev_del(veid, dev_name);
++			break;
++		default:
++			err = -EINVAL;
++			break;
++	}
++out:
++	return err;
++}
++
++static void ve_mapped_devs_cleanup(struct ve_struct *ve)
++{
++	struct net_device *dev;
++
++	rtnl_lock();
++	write_lock_bh(&dev_base_lock);
++restart:
++	for (dev = ve->_net_dev_base; dev != NULL; dev = dev->next)
++	{
++		if ((dev->features & NETIF_F_VENET) ||
++		    (dev == ve->_loopback_dev)) /* Skip loopback dev */
++			continue;
++		write_unlock_bh(&dev_base_lock);
++		ve_netdev_cleanup(dev, 0);
++		write_lock_bh(&dev_base_lock);
++		__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
++		goto restart;
++	}
++	write_unlock_bh(&dev_base_lock);
++	rtnl_unlock();
++}
++#endif
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE information via /proc
++ *
++ **********************************************************************
++ **********************************************************************/
++#ifdef CONFIG_PROC_FS
++static int devperms_seq_show(struct seq_file *m, void *v)
++{
++	struct devperms_struct *dp;
++	char dev_s[32], type_c;
++	unsigned use, type;
++	dev_t dev;
++
++	dp = (struct devperms_struct *)v;
++	if (dp == (struct devperms_struct *)1L) {
++		seq_printf(m, "Version: 2.7\n");
++		return 0;
++	}
++
++	use = dp->type & VE_USE_MASK;
++	type = dp->type & S_IFMT;
++	dev = dp->dev;
++
++	if ((use | VE_USE_MINOR) == use)
++		snprintf(dev_s, sizeof(dev_s), "%d:%d", MAJOR(dev), MINOR(dev));
++	else if ((use | VE_USE_MAJOR) == use)
++		snprintf(dev_s, sizeof(dev_s), "%d:*", MAJOR(dp->dev));
++	else
++		snprintf(dev_s, sizeof(dev_s), "*:*");
++
++	if (type == S_IFCHR)
++		type_c = 'c';
++	else if (type == S_IFBLK)
++		type_c = 'b';
++	else
++		type_c = '?';
++
++	seq_printf(m, "%10u %c %03o %s\n", dp->veid, type_c, dp->mask, dev_s);
++	return 0;
++}
++
++static void *devperms_seq_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t cpos;
++	long slot;
++	struct devperms_struct *dp;
++
++	cpos = *pos;
++	read_lock(&devperms_hash_guard);
++	if (cpos-- == 0)
++		return (void *)1L;
++
++	for (slot = 0; slot < DEVPERMS_HASH_SZ; slot++)
++		for (dp = devperms_hash[slot]; dp; dp = dp->devhash_next)
++			if (cpos-- == 0) {
++				m->private = (void *)slot;
++				return dp;
++			}
++	return NULL;
++}
++
++static void *devperms_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	long slot;
++	struct devperms_struct *dp;
++
++	dp = (struct devperms_struct *)v;
++
++	if (dp == (struct devperms_struct *)1L)
++		slot = 0;
++	else if (dp->devhash_next == NULL)
++		slot = (long)m->private + 1;
++	else {
++		(*pos)++;
++		return dp->devhash_next;
++	}
++
++	for (; slot < DEVPERMS_HASH_SZ; slot++)
++		if (devperms_hash[slot]) {
++			(*pos)++;
++			m->private = (void *)slot;
++			return devperms_hash[slot];
++		}
++	return NULL;
++}
++
++static void devperms_seq_stop(struct seq_file *m, void *v)
++{
++	read_unlock(&devperms_hash_guard);
++}
++
++static struct seq_operations devperms_seq_op = {
++	.start	= devperms_seq_start,
++	.next	= devperms_seq_next,
++	.stop	= devperms_seq_stop,
++	.show	= devperms_seq_show,
++};
++
++static int devperms_open(struct inode *inode, struct file *file)
++{
++        return seq_open(file, &devperms_seq_op);
++}
++
++static struct file_operations proc_devperms_ops = {
++	.open		= devperms_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= seq_release,
++};
++
++#if BITS_PER_LONG == 32
++#define VESTAT_LINE_WIDTH (6 * 11 + 6 * 21)
++#define VESTAT_LINE_FMT "%10u %10lu %10lu %10lu %10lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %10lu\n"
++#define VESTAT_HEAD_FMT "%10s %10s %10s %10s %10s %20s %20s %20s %20s %20s %20s %10s\n"
++#else
++#define VESTAT_LINE_WIDTH (12 * 21)
++#define VESTAT_LINE_FMT "%20u %20lu %20lu %20lu %20lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20lu\n"
++#define VESTAT_HEAD_FMT "%20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s\n"
++#endif
++
++static int vestat_seq_show(struct seq_file *m, void *v)
++{
++	struct ve_struct *ve = (struct ve_struct *)v;
++	struct ve_struct *curve;
++	int cpu;
++	unsigned long user_ve, nice_ve, system_ve, uptime;
++	cycles_t uptime_cycles, idle_time, strv_time, used;
++
++	curve = get_exec_env();
++	if (ve == ve_list_head ||
++	    (!ve_is_super(curve) && ve == curve)) {
++		/* print header */
++		seq_printf(m, "%-*s\n",
++			VESTAT_LINE_WIDTH - 1,
++			"Version: 2.2");
++		seq_printf(m, VESTAT_HEAD_FMT, "VEID",
++					"user", "nice", "system",
++					"uptime", "idle",
++					"strv", "uptime", "used",
++					"maxlat", "totlat", "numsched");
++	}
++
++	if (ve == get_ve0())
++		return 0;
++
++	user_ve = nice_ve = system_ve = 0;
++	idle_time = strv_time = used = 0;
++
++	for (cpu = 0; cpu < NR_CPUS; cpu++) {
++		struct ve_cpu_stats *st;
++
++		st = VE_CPU_STATS(ve, cpu);
++		user_ve += st->user;
++		nice_ve += st->nice;
++		system_ve += st->system;
++		used += VE_CPU_STATS(ve, cpu)->used_time;
++		idle_time += ve_sched_get_idle_time(ve, cpu);
++	}
++	uptime_cycles = get_cycles() - ve->start_cycles;
++	uptime = jiffies - ve->start_jiffies;
++
++	seq_printf(m, VESTAT_LINE_FMT, ve->veid,
++				user_ve, nice_ve, system_ve,
++				uptime, idle_time, 
++				strv_time, uptime_cycles, used,
++				ve->sched_lat_ve.last.maxlat,
++				ve->sched_lat_ve.last.totlat,
++				ve->sched_lat_ve.last.count);
++	return 0;
++}
++
++static void *ve_seq_start(struct seq_file *m, loff_t *pos)
++{
++	struct ve_struct *ve, *curve;
++	loff_t l;
++
++	curve = get_exec_env();
++	read_lock(&ve_list_guard);
++	if (!ve_is_super(curve)) {
++		if (*pos != 0)
++			return NULL;
++		return curve;
++	}
++	for (ve = ve_list_head, l = *pos;
++	     ve != NULL && l > 0;
++	     ve = ve->next, l--);
++	return ve;
++}
++
++static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct ve_struct *ve = (struct ve_struct *)v;
++
++	if (!ve_is_super(get_exec_env()))
++		return NULL;
++	(*pos)++;
++	return ve->next;
++}
++
++static void ve_seq_stop(struct seq_file *m, void *v)
++{
++	read_unlock(&ve_list_guard);
++}
++
++static struct seq_operations vestat_seq_op = {
++        start:  ve_seq_start,
++        next:   ve_seq_next,
++        stop:   ve_seq_stop,
++        show:   vestat_seq_show
++};
++
++static int vestat_open(struct inode *inode, struct file *file)
++{
++        return seq_open(file, &vestat_seq_op);
++}
++
++static struct file_operations proc_vestat_operations = {
++        open:           vestat_open,
++        read:           seq_read,
++        llseek:         seq_lseek,
++        release:        seq_release
++};
++
++static inline unsigned long ve_used_mem(struct user_beancounter *ub)
++{
++	return ub->ub_parms[UB_OOMGUARPAGES].held;
++}
++
++static inline void ve_mi_replace(struct meminfo *mi)
++{
++	struct user_beancounter *ub;
++	unsigned long meminfo_val;
++	unsigned long nodettram;
++	unsigned long usedmem;
++
++	meminfo_val = get_exec_env()->meminfo_val;
++
++	if(!meminfo_val)
++		return; /* No virtualization */
++
++	nodettram = mi->si.totalram;
++	ub = current->mm->mm_ub;
++	usedmem = ve_used_mem(ub);
++
++	memset(mi, 0, sizeof(*mi));
++
++	mi->si.totalram = (meminfo_val > nodettram) ?
++			nodettram : meminfo_val;
++	mi->si.freeram = (mi->si.totalram > usedmem) ?
++			(mi->si.totalram - usedmem) : 0;
++}
++
++static int meminfo_call(struct vnotifier_block *self,
++                unsigned long event, void *arg, int old_ret)
++{
++	if (event != VIRTINFO_MEMINFO)
++		return old_ret;
++
++	ve_mi_replace((struct meminfo *)arg);
++
++	return NOTIFY_OK;
++}
++
++
++static struct vnotifier_block meminfo_notifier_block = {
++	.notifier_call = meminfo_call
++};
++
++static int __init init_vecalls_proc(void)
++{
++	struct proc_dir_entry *de;
++
++	de = create_proc_glob_entry("vz/vestat",
++			S_IFREG|S_IRUSR, NULL);
++	if (de == NULL) {
++		/* create "vz" subdirectory, if not exist */
++		(void) create_proc_glob_entry("vz",
++					      S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++		de = create_proc_glob_entry("vz/vestat",
++				S_IFREG|S_IRUSR, NULL);
++	}
++	if (de)
++		de->proc_fops = &proc_vestat_operations;
++	else
++		printk(KERN_WARNING 
++				"VZMON: can't make vestat proc entry\n");
++
++	de = create_proc_entry("vz/devperms", S_IFREG | S_IRUSR, NULL);
++	if (de)
++		de->proc_fops = &proc_devperms_ops;
++	else
++		printk(KERN_WARNING
++				"VZMON: can't make devperms proc entry\n");
++
++	virtinfo_notifier_register(VITYPE_GENERAL, &meminfo_notifier_block);
++
++	return 0;
++}
++
++static void fini_vecalls_proc(void)
++{
++	remove_proc_entry("vz/devperms", NULL);
++	remove_proc_entry("vz/vestat", NULL);
++	virtinfo_notifier_unregister(VITYPE_GENERAL, &meminfo_notifier_block);
++}
++#else
++#define init_vecalls_proc()	(0)
++#define fini_vecalls_proc()	do { } while (0)
++#endif /* CONFIG_PROC_FS */
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * User ctl
++ *
++ **********************************************************************
++ **********************************************************************/
++
++int vzcalls_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
++static struct vzioctlinfo vzcalls = {
++	type: VZCTLTYPE,
++	func: vzcalls_ioctl,
++	owner: THIS_MODULE,
++};
++
++int vzcalls_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++		unsigned long arg)
++{
++	int err;
++
++	err = -ENOTTY;
++	switch(cmd) {
++	    case VZCTL_MARK_ENV_TO_DOWN: {
++		        /* Compatibility issue */
++		        err = 0;
++		}
++		break;
++	    case VZCTL_SETDEVPERMS: {
++			/* Device type was mistakenly declared as dev_t
++			 * in the old user-kernel interface.
++			 * That's wrong, dev_t is a kernel internal type.
++			 * I use `unsigned' not having anything better in mind.
++			 * 2001/08/11  SAW  */
++			struct vzctl_setdevperms s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = real_setdevperms(s.veid, s.type,
++					new_decode_dev(s.dev), s.mask);
++		}
++		break;
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	    case VZCTL_VE_NETDEV: {
++			struct vzctl_ve_netdev d;
++			char *s;
++			err = -EFAULT;
++			if (copy_from_user(&d, (void *)arg, sizeof(d)))
++				break;
++			err = -ENOMEM;
++			s = kmalloc(IFNAMSIZ+1, GFP_KERNEL);
++			if (s == NULL)
++				break;
++			err = -EFAULT;
++			if (strncpy_from_user(s, d.dev_name, IFNAMSIZ) > 0) {
++				s[IFNAMSIZ] = 0;
++				err = real_ve_dev_map(d.veid, d.op, s);
++			}
++			kfree(s);
++		}
++		break;
++#endif
++	    case VZCTL_ENV_CREATE: {
++			struct vzctl_env_create s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = real_env_create(s.veid, s.flags, s.class_id,
++				NULL, 0);
++		}
++		break;
++	    case VZCTL_ENV_CREATE_DATA: {
++			struct vzctl_env_create_data s;
++			env_create_param_t *data;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err=-EINVAL;
++			if (s.datalen < VZCTL_ENV_CREATE_DATA_MINLEN ||
++			    s.datalen > VZCTL_ENV_CREATE_DATA_MAXLEN ||
++			    s.data == 0)
++				break;
++			err = -ENOMEM;
++			data = kmalloc(sizeof(*data), GFP_KERNEL);
++			if (!data)
++				break;
++			memset(data, 0, sizeof(*data));
++			err = -EFAULT;
++			if (copy_from_user(data, (void *)s.data, s.datalen))
++				goto free_data;
++			err = real_env_create(s.veid, s.flags, s.class_id,
++				data, s.datalen);
++free_data:
++			kfree(data);
++		}
++		break;
++	    case VZCTL_GET_CPU_STAT: {
++			struct vzctl_cpustatctl s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = ve_get_cpu_stat(s.veid, s.cpustat);
++		}
++		break;
++	    case VZCTL_VE_MEMINFO: {
++			struct vzctl_ve_meminfo s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = ve_set_meminfo(s.veid, s.val);
++		}
++		break;
++	}
++	return err;
++}
++EXPORT_SYMBOL(real_env_create);
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * Init/exit stuff
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#ifdef CONFIG_VE_CALLS_MODULE
++static int __init init_vecalls_symbols(void)
++{
++	KSYMRESOLVE(real_get_device_perms_ve);
++	KSYMRESOLVE(real_do_env_cleanup);
++	KSYMRESOLVE(real_do_env_free);
++	KSYMRESOLVE(real_update_load_avg_ve);
++	KSYMMODRESOLVE(vzmon);
++	return 0;
++}
++
++static void fini_vecalls_symbols(void)
++{
++	KSYMMODUNRESOLVE(vzmon);
++	KSYMUNRESOLVE(real_get_device_perms_ve);
++	KSYMUNRESOLVE(real_do_env_cleanup);
++	KSYMUNRESOLVE(real_do_env_free);
++	KSYMUNRESOLVE(real_update_load_avg_ve);
++}
++#else
++#define init_vecalls_symbols()	(0)
++#define fini_vecalls_symbols()	do { } while (0)
++#endif
++
++static inline __init int init_vecalls_ioctls(void)
++{
++	vzioctl_register(&vzcalls);
++	return 0;
++}
++
++static inline void fini_vecalls_ioctls(void)
++{
++	vzioctl_unregister(&vzcalls);
++}
++
++static int __init vecalls_init(void)
++{
++	int err;
++	int i;
++
++	ve_list_head = get_ve0();
++
++	err = init_vzmond();
++	if (err < 0)
++		goto out_vzmond;
++
++	err = init_devperms_hash();
++	if (err < 0)
++		goto out_perms;
++
++	err = init_vecalls_symbols();
++	if (err < 0)
++		goto out_sym;
++
++	err = init_vecalls_proc();
++	if (err < 0)
++		goto out_proc;
++
++	err = init_vecalls_ioctls();
++	if (err < 0)
++		goto out_ioctls;
++
++	for (i = 0; i < VE_MAX_HOOKS; i++)
++		INIT_LIST_HEAD(&ve_hooks[i]);
++
++	return 0;
++
++out_ioctls:
++	fini_vecalls_proc();
++out_proc:
++	fini_vecalls_symbols();
++out_sym:
++	fini_devperms_hash();
++out_perms:
++	fini_vzmond();
++out_vzmond:
++	return err;
++}
++
++static void vecalls_exit(void)
++{
++	fini_vecalls_ioctls();
++	fini_vecalls_proc();
++	fini_vecalls_symbols();
++	fini_devperms_hash();
++	fini_vzmond();
++}
++
++EXPORT_SYMBOL(get_ve_by_id);
++EXPORT_SYMBOL(__find_ve_by_id);
++EXPORT_SYMBOL(ve_list_guard);
++EXPORT_SYMBOL(ve_list_head);
++EXPORT_SYMBOL(nr_ve);
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Control");
++MODULE_LICENSE("GPL v2");
++
++module_init(vecalls_init)
++module_exit(vecalls_exit)
+diff -upr linux-2.6.16.orig/kernel/veowner.c linux-2.6.16-026test015/kernel/veowner.c
+--- linux-2.6.16.orig/kernel/veowner.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/veowner.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,308 @@
++/*
++ *  kernel/veowner.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/ve.h>
++#include <linux/ve_owner.h>
++#include <linux/ve_proto.h>
++#include <linux/ipc.h>
++#include <linux/fs.h>
++#include <linux/proc_fs.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/delay.h>
++#include <linux/vmalloc.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/list.h>
++#include <linux/inetdevice.h>
++#include <asm/system.h>
++#include <asm/io.h>
++
++#include <net/tcp.h>
++
++void prepare_ve0_process(struct task_struct *tsk)
++{
++	set_virt_pid(tsk, tsk->pid);
++	set_virt_tgid(tsk, tsk->tgid);
++	if (tsk->signal) {
++		set_virt_pgid(tsk, tsk->signal->pgrp);
++		set_virt_sid(tsk, tsk->signal->session);
++	}
++	VE_TASK_INFO(tsk)->exec_env = get_ve0();
++	VE_TASK_INFO(tsk)->owner_env = get_ve0();
++	VE_TASK_INFO(tsk)->sleep_time = 0;
++	VE_TASK_INFO(tsk)->wakeup_stamp = 0;
++	VE_TASK_INFO(tsk)->sched_time = 0;
++	seqcount_init(&VE_TASK_INFO(tsk)->wakeup_lock);
++
++	if (tsk->pid) {
++		SET_VE_LINKS(tsk);
++		atomic_inc(&get_ve0()->pcounter);
++	}
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++void prepare_ve0_loopback(void)
++{
++	get_ve0()->_loopback_dev = &loopback_dev;
++}
++#endif
++
++/*
++ * ------------------------------------------------------------------------
++ * proc entries
++ * ------------------------------------------------------------------------
++ */
++
++#ifdef CONFIG_PROC_FS
++static void proc_move(struct proc_dir_entry *ddir,
++		struct proc_dir_entry *sdir,
++		const char *name)
++{
++	struct proc_dir_entry **p, *q;
++	int len;
++
++	len = strlen(name);
++	for (p = &sdir->subdir, q = *p; q != NULL; p = &q->next, q = *p)
++		if (proc_match(len, name, q))
++			break;
++	if (q == NULL)
++		return;
++	*p = q->next;
++	q->parent = ddir;
++	q->next = ddir->subdir;
++	ddir->subdir = q;
++}
++static void prepare_proc_misc(void)
++{
++	static char *table[] = {
++		"loadavg",
++		"uptime",
++		"meminfo",
++		"version",
++		"stat",
++		"filesystems",
++		"locks",
++		"swaps",
++		"mounts",
++		"net",
++		"cpuinfo",
++		"sysvipc",
++		"sys",
++		"fs",
++		"vz",
++		"user_beancounters",
++		"cmdline",
++		"vmstat",
++		"modules",
++		"kmsg",
++		NULL,
++	};
++	char **p;
++
++	for (p = table; *p != NULL; p++)
++		proc_move(&proc_root, ve0.proc_root, *p);
++}
++int prepare_proc(void)
++{
++	struct ve_struct *envid;
++	struct proc_dir_entry *de;
++	struct proc_dir_entry *ve_root;
++
++	envid = set_exec_env(&ve0);
++	ve_root = ve0.proc_root->subdir;
++	/* move the whole tree to be visible in VE0 only */
++	ve0.proc_root->subdir = proc_root.subdir;
++	for (de = ve0.proc_root->subdir; de->next != NULL; de = de->next)
++		de->parent = ve0.proc_root;
++	de->parent = ve0.proc_root;
++	de->next = ve_root;
++
++	/* move back into the global scope some specific entries */
++	proc_root.subdir = NULL;
++	prepare_proc_misc();
++	proc_net = proc_mkdir("net", ve0.proc_root);
++	proc_net_stat = proc_mkdir("stat", proc_net);
++	proc_mkdir("vz", 0);
++#ifdef CONFIG_SYSVIPC
++	proc_mkdir("sysvipc", 0);
++#endif
++	proc_root_fs = proc_mkdir("fs", 0);
++	/* XXX proc_tty_init(); */
++
++	/* XXX process inodes */
++
++	(void)set_exec_env(envid);
++
++	(void)create_proc_glob_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++	return 0;
++}
++
++static struct proc_dir_entry ve0_proc_root = {
++	.name = "/proc",
++	.namelen = 5,
++	.mode = S_IFDIR | S_IRUGO | S_IXUGO,
++	.nlink = 2
++};
++
++void prepare_ve0_proc_root(void)
++{
++	ve0.proc_root = &ve0_proc_root;
++}
++#endif
++
++/*
++ * ------------------------------------------------------------------------
++ * Virtualized sysctl
++ * ------------------------------------------------------------------------
++ */
++
++static int semmin[4] = { 1, 1, 1, 1 };
++static int semmax[4] = { 8000, INT_MAX, 1000, IPCMNI };
++static ctl_table kern_table[] = {
++	{KERN_NODENAME, "hostname", system_utsname.nodename, 64,
++	 0644, NULL, &proc_doutsstring, &sysctl_string},
++	{KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
++	 0644, NULL, &proc_doutsstring, &sysctl_string},
++#ifdef CONFIG_SYSVIPC
++#define get_ve0_field(fname) &ve0._##fname
++	{KERN_SHMMAX, "shmmax", get_ve0_field(shm_ctlmax), sizeof (size_t),
++	 0644, NULL, &proc_doulongvec_minmax },
++	{KERN_SHMALL, "shmall", get_ve0_field(shm_ctlall), sizeof (size_t),
++	 0644, NULL, &proc_doulongvec_minmax },
++	{KERN_SHMMNI, "shmmni", get_ve0_field(shm_ctlmni), sizeof (int),
++	 0644, NULL, &proc_dointvec_minmax, NULL,
++	 NULL, &semmin[0], &semmax[3] },
++	{KERN_MSGMAX, "msgmax", get_ve0_field(msg_ctlmax), sizeof (int),
++	 0644, NULL, &proc_dointvec },
++	{KERN_MSGMNI, "msgmni", get_ve0_field(msg_ctlmni), sizeof (int),
++	 0644, NULL, &proc_dointvec_minmax, NULL,
++	 NULL, &semmin[0], &semmax[3] },
++	{KERN_MSGMNB, "msgmnb", get_ve0_field(msg_ctlmnb), sizeof (int),
++	 0644, NULL, &proc_dointvec },
++	{KERN_SEM, "sem", get_ve0_field(sem_ctls), 4*sizeof (int),
++	 0644, NULL, &proc_dointvec },
++#endif
++	{0}
++};
++static ctl_table root_table[] = {
++	{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
++	{0}
++};
++extern int ip_rt_src_check;
++extern int ve_area_access_check;
++static ctl_table vz_ipv4_route_table[] = {
++	{
++		ctl_name:	NET_IPV4_ROUTE_SRC_CHECK,
++		procname:	"src_check",
++		data:		&ip_rt_src_check,
++		maxlen:		sizeof(int),
++		mode:		0644,
++		proc_handler:	&proc_dointvec,
++	},
++	{ 0 }
++};
++static ctl_table vz_ipv4_table[] = {
++	{NET_IPV4_ROUTE, "route", NULL, 0, 0555, vz_ipv4_route_table},
++	{ 0 }
++};
++static ctl_table vz_net_table[] = {
++	{NET_IPV4,   "ipv4",      NULL, 0, 0555, vz_ipv4_table},
++	{ 0 }
++};
++static ctl_table vz_fs_table[] = {
++	{
++		ctl_name:	226,
++		procname:	"ve-area-access-check",
++		data:		&ve_area_access_check,
++		maxlen:		sizeof(int),
++		mode:		0644,
++		proc_handler:	&proc_dointvec,
++	},
++	{ 0 }
++};
++static ctl_table root_table2[] = {
++	{CTL_NET, "net", NULL, 0, 0555, vz_net_table},
++	{CTL_FS, "fs", NULL, 0, 0555, vz_fs_table},
++	{ 0 }
++};
++int prepare_sysctl(void)
++{
++	struct ve_struct *envid;
++
++	envid = set_exec_env(&ve0);
++	ve0.kern_header = register_sysctl_table(root_table, 1);
++	register_sysctl_table(root_table2, 0);
++	(void)set_exec_env(envid);
++	return 0;
++}
++
++void prepare_ve0_sysctl(void)
++{
++	INIT_LIST_HEAD(&ve0.sysctl_lh);
++#ifdef CONFIG_SYSCTL
++	ve0.proc_sys_root = proc_mkdir("sys", 0);
++#endif
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * XXX init_ve_system
++ * ------------------------------------------------------------------------
++ */
++
++void init_ve_system(void)
++{
++	struct task_struct *init_entry, *p, *tsk;
++	struct ve_struct *ptr;
++	unsigned long flags;
++	int i;
++
++	ptr = get_ve0();
++	(void)get_ve(ptr);
++	atomic_set(&ptr->pcounter, 1);
++
++	/* Don't forget about idle tasks */
++	write_lock_irqsave(&tasklist_lock, flags);
++	for (i = 0; i < NR_CPUS; i++) {
++		tsk = idle_task(i);
++		if (tsk == NULL)
++			continue;
++
++		prepare_ve0_process(tsk);
++	}
++	do_each_thread_all(p, tsk) {
++		prepare_ve0_process(tsk);
++	} while_each_thread_all(p, tsk);
++	write_unlock_irqrestore(&tasklist_lock, flags);
++
++	init_entry = child_reaper;
++	ptr->init_entry = init_entry;
++	/* XXX: why? */
++	cap_set_full(ptr->cap_default);
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	ptr->_ipv4_devconf = &ipv4_devconf;
++	ptr->_ipv4_devconf_dflt = &ipv4_devconf_dflt;
++#endif
++
++	read_lock(&init_entry->fs->lock);
++	ptr->fs_rootmnt = init_entry->fs->rootmnt;
++	ptr->fs_root = init_entry->fs->root;
++	read_unlock(&init_entry->fs->lock);
++
++	/* common prepares */
++#ifdef CONFIG_PROC_FS
++	prepare_proc();
++#endif
++	prepare_sysctl();
++	prepare_ipc();
++}
+diff -upr linux-2.6.16.orig/kernel/vzdev.c linux-2.6.16-026test015/kernel/vzdev.c
+--- linux-2.6.16.orig/kernel/vzdev.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/vzdev.c	2006-07-04 14:41:39.000000000 +0400
+@@ -0,0 +1,129 @@
++/*
++ *  kernel/vzdev.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/vzctl.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/vzcalluser.h>
++#include <asm/uaccess.h>
++#include <asm/pgalloc.h>
++#include <linux/device.h>
++#include <linux/smp_lock.h>
++
++#define VZCTL_MAJOR 126
++#define VZCTL_NAME "vzctl"
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Interface");
++MODULE_LICENSE("GPL v2");
++
++static LIST_HEAD(ioctls);
++static spinlock_t ioctl_lock = SPIN_LOCK_UNLOCKED;
++
++int vzctl_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++		unsigned long arg)
++{
++	int err;
++	struct list_head *p;
++	struct vzioctlinfo *inf;
++
++	err = -ENOTTY;
++	spin_lock(&ioctl_lock);
++	list_for_each(p, &ioctls) {
++		inf = list_entry(p, struct vzioctlinfo, list);
++		if (inf->type != _IOC_TYPE(cmd))
++			continue;
++
++		err = try_module_get(inf->owner) ? 0 : -EBUSY;
++		spin_unlock(&ioctl_lock);
++		if (!err) {
++			unlock_kernel();
++			err = (*inf->func)(ino, file, cmd, arg);
++			lock_kernel();
++			module_put(inf->owner);
++		}
++		return err;
++	}
++	spin_unlock(&ioctl_lock);
++	return err;
++}
++
++void vzioctl_register(struct vzioctlinfo *inf)
++{
++	spin_lock(&ioctl_lock);
++	list_add(&inf->list, &ioctls);
++	spin_unlock(&ioctl_lock);
++}
++
++void vzioctl_unregister(struct vzioctlinfo *inf)
++{
++	spin_lock(&ioctl_lock);
++	list_del_init(&inf->list);
++	spin_unlock(&ioctl_lock);
++}
++
++EXPORT_SYMBOL(vzioctl_register);
++EXPORT_SYMBOL(vzioctl_unregister);
++
++/*
++ * Init/exit stuff.
++ */
++static struct file_operations vzctl_fops = {
++	.owner		= THIS_MODULE,
++	.ioctl		= vzctl_ioctl,
++};
++
++static struct class *vzctl_class;
++
++static void __exit vzctl_exit(void)
++{
++	class_device_destroy(vzctl_class, MKDEV(VZCTL_MAJOR, 0));
++	class_destroy(vzctl_class);
++	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
++}
++
++static int __init vzctl_init(void)
++{
++	int ret;
++	struct class_device *class_err;
++
++	ret = register_chrdev(VZCTL_MAJOR, VZCTL_NAME, &vzctl_fops);
++	if (ret < 0)
++		goto out;
++
++	vzctl_class = class_create(THIS_MODULE, "vzctl");
++	if (IS_ERR(vzctl_class)) {
++		ret = PTR_ERR(vzctl_class);
++		goto out_cleandev;
++	}
++
++	class_err = class_device_create(vzctl_class, NULL, MKDEV(VZCTL_MAJOR, 0),
++				NULL, VZCTL_NAME);
++	if (IS_ERR(class_err)) {
++		ret = PTR_ERR(class_err);
++		goto out_rmclass;
++	}
++
++	goto out;
++
++out_rmclass:
++	class_destroy(vzctl_class);
++out_cleandev:
++	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
++out:
++	return ret;
++}
++
++module_init(vzctl_init)
++module_exit(vzctl_exit);
+diff -upr linux-2.6.16.orig/kernel/vzwdog.c linux-2.6.16-026test015/kernel/vzwdog.c
+--- linux-2.6.16.orig/kernel/vzwdog.c	2006-07-04 14:41:41.000000000 +0400
++++ linux-2.6.16-026test015/kernel/vzwdog.c	2006-07-04 14:41:38.000000000 +0400
+@@ -0,0 +1,278 @@
++/*
++ *  kernel/vzwdog.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/ctype.h>
++#include <linux/kobject.h>
++#include <linux/genhd.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/kernel_stat.h>
++#include <linux/smp_lock.h>
++#include <linux/errno.h>
++#include <linux/suspend.h>
++#include <linux/ve.h>
++#include <linux/vzstat.h>
++
++/* Staff regading kernel thread polling VE validity */
++static int sleep_timeout = 60;
++static pid_t wdog_thread_pid;
++static int   wdog_thread_continue = 1;
++static DECLARE_COMPLETION(license_thread_exited);
++
++extern void show_mem(void);
++extern struct ve_struct *ve_list_head;
++
++#if 0
++static char page[PAGE_SIZE];
++
++static void parse_irq_list(int len)
++{
++	int i, k, skip;
++	for (i = 0; i < len; ) {
++		k = i;
++		while (i < len && page[i] != '\n' && page[i] != ':')
++			i++;
++		skip = 0;
++		if (i < len && page[i] != '\n') {
++			i++; /* skip ':' */
++			while (i < len && (page[i] == ' ' || page[i] == '0'))
++				i++;
++			skip = (i < len && (page[i] < '0' || page[i] > '9'));
++			while (i < len && page[i] != '\n')
++				i++;
++		}
++		if (!skip)
++			printk("\n%.*s", i - k, page + k);
++		if (i < len)
++			i++; /* skip '\n' */
++	}
++}
++#endif
++
++static void show_irq_list(void)
++{
++#if 0
++	i = KSYMSAFECALL(int, get_irq_list, (page));
++	parse_irq_list(i);  /* Safe, zero was returned if unassigned */
++#endif
++}
++
++static void show_alloc_latency(void)
++{
++	static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
++		"A0",
++		"L0",
++		"H0",
++		"L1",
++		"H1"
++	};
++	int i;
++
++	printk("lat: ");
++	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
++		struct kstat_lat_struct *p;
++		cycles_t maxlat, avg0, avg1, avg2;
++
++		p = &kstat_glob.alloc_lat[i];
++		spin_lock_irq(&kstat_glb_lock);
++		maxlat = p->last.maxlat;
++		avg0 = p->avg[0];
++		avg1 = p->avg[1];
++		avg2 = p->avg[2];
++		spin_unlock_irq(&kstat_glb_lock);
++
++		printk("%s %Lu (%Lu %Lu %Lu)",
++				alloc_descr[i],
++				maxlat,
++				avg0,
++				avg1,
++				avg2);
++	}
++	printk("\n");
++}
++
++static void show_schedule_latency(void)
++{
++	struct kstat_lat_pcpu_struct *p;
++	cycles_t maxlat, totlat, avg0, avg1, avg2;
++	unsigned long count;
++
++	p = &kstat_glob.sched_lat;
++	spin_lock_irq(&kstat_glb_lock);
++	maxlat = p->last.maxlat;
++	totlat = p->last.totlat;
++	count = p->last.count;
++	avg0 = p->avg[0];
++	avg1 = p->avg[1];
++	avg2 = p->avg[2];
++	spin_unlock_irq(&kstat_glb_lock);
++
++	printk("sched lat: %Lu/%Lu/%lu (%Lu %Lu %Lu)\n",
++			maxlat,
++			totlat,
++			count,
++			avg0,
++			avg1,
++			avg2);
++}
++
++static void show_header(void)
++{
++	struct timeval tv;
++
++	do_gettimeofday(&tv);
++	printk("*** VZWDOG 1.14: time %lu.%06lu uptime %Lu CPU %d ***\n",
++			tv.tv_sec, tv.tv_usec,
++			get_jiffies_64(), smp_processor_id());
++#ifdef CONFIG_FAIRSCHED
++	printk("*** cycles_per_jiffy %lu jiffies_per_second %u ***\n",
++			cycles_per_jiffy, HZ);		
++#else
++	printk("*** jiffies_per_second %u ***\n", HZ);		
++#endif
++}
++
++static void show_pgdatinfo(void)
++{
++	pg_data_t *pgdat;
++
++	printk("pgdat:");
++	for_each_pgdat(pgdat) {
++		printk(" %d: %lu,%lu,%lu,%p",
++			pgdat->node_id,
++			pgdat->node_start_pfn,
++			pgdat->node_present_pages,
++			pgdat->node_spanned_pages,
++			pgdat->node_mem_map);
++	}
++	printk("\n");
++}
++
++static void show_diskio(void)
++{
++	struct gendisk *gd;
++	char buf[BDEVNAME_SIZE];
++
++	printk("disk_io: ");
++
++	down_read(&block_subsys.rwsem);
++	list_for_each_entry(gd, &block_subsys.kset.list, kobj.entry) {
++		char *name;
++		name = disk_name(gd, 0, buf);
++		if ((strlen(name) > 4) && (strncmp(name, "loop", 4) == 0) &&
++		    isdigit(name[4]))
++			continue;
++		if ((strlen(name) > 3) && (strncmp(name, "ram", 3) == 0) &&
++		    isdigit(name[3]))
++			continue;
++		printk("(%u,%u) %s r(%u %u %u) w(%u %u %u)\n",
++			gd->major, gd->first_minor,
++			name,
++			disk_stat_read(gd, ios[READ]),
++			disk_stat_read(gd, sectors[READ]),
++			disk_stat_read(gd, merges[READ]),
++			disk_stat_read(gd, ios[WRITE]),
++			disk_stat_read(gd, sectors[WRITE]),
++			disk_stat_read(gd, merges[WRITE]));
++	}
++	up_read(&block_subsys.rwsem);
++
++	printk("\n");
++}
++
++static void show_nrprocs(void)
++{
++	unsigned long _nr_running, _nr_sleeping,
++			_nr_unint, _nr_zombie, _nr_dead, _nr_stopped;
++
++	_nr_running = nr_running();
++	_nr_unint = nr_uninterruptible();
++	_nr_sleeping = nr_sleeping();
++	_nr_zombie = nr_zombie;
++	_nr_dead = atomic_read(&nr_dead);
++	_nr_stopped = nr_stopped();
++
++	printk("VEnum: %d, proc R %lu, S %lu, D %lu, "
++		"Z %lu, X %lu, T %lu (tot %d)\n",
++		nr_ve,	_nr_running, _nr_sleeping, _nr_unint,
++		_nr_zombie, _nr_dead, _nr_stopped, nr_threads);
++}
++
++static void wdog_print(void)
++{
++	show_header();
++	show_irq_list();
++	show_pgdatinfo();
++	show_mem();
++	show_diskio();
++	show_schedule_latency();
++	show_alloc_latency();
++	show_nrprocs();
++}
++
++static int wdog_loop(void* data)
++{
++	struct task_struct *tsk = current;
++	DECLARE_WAIT_QUEUE_HEAD(thread_wait_queue);
++
++	/*
++	 * This thread doesn't need any user-level access,
++	 * so get rid of all our resources
++	 */
++	daemonize("wdogd");
++
++	spin_lock_irq(&tsk->sighand->siglock);
++	sigfillset(&tsk->blocked);
++	sigdelset(&tsk->blocked, SIGHUP);
++	recalc_sigpending();
++	spin_unlock_irq(&tsk->sighand->siglock);
++
++	while (wdog_thread_continue) {
++		wdog_print();
++		interruptible_sleep_on_timeout(&thread_wait_queue,
++					       sleep_timeout*HZ);
++		try_to_freeze();
++		/* clear all signals */
++		if (signal_pending(tsk))
++			flush_signals(tsk);
++	}
++
++	complete_and_exit(&license_thread_exited, 0);
++}
++
++static int __init wdog_init(void)
++{
++	wdog_thread_pid = kernel_thread(wdog_loop, NULL, 0);
++	if (wdog_thread_pid < 0)
++		return wdog_thread_pid;
++
++	return 0;
++}
++
++static void __exit wdog_exit(void)
++{
++	wdog_thread_continue = 0;
++	if (wdog_thread_pid > 0) {
++		kill_proc(wdog_thread_pid, SIGHUP, 1);
++		wait_for_completion(&license_thread_exited);
++	}
++}
++
++module_param(sleep_timeout, int, 0);
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo WDOG");
++MODULE_LICENSE("GPL v2");
++
++module_init(wdog_init)
++module_exit(wdog_exit)
+diff -upr linux-2.6.16.orig/lib/Kconfig.debug linux-2.6.16-026test015/lib/Kconfig.debug
+--- linux-2.6.16.orig/lib/Kconfig.debug	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/lib/Kconfig.debug	2006-07-04 14:41:39.000000000 +0400
+@@ -48,7 +48,7 @@ config LOG_BUF_SHIFT
+ 
+ config DETECT_SOFTLOCKUP
+ 	bool "Detect Soft Lockups"
+-	depends on DEBUG_KERNEL
++	depends on DEBUG_KERNEL && !SCHED_VCPU
+ 	default y
+ 	help
+ 	  Say Y here to enable the kernel to detect "soft lockups",
+diff -upr linux-2.6.16.orig/lib/bust_spinlocks.c linux-2.6.16-026test015/lib/bust_spinlocks.c
+--- linux-2.6.16.orig/lib/bust_spinlocks.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/lib/bust_spinlocks.c	2006-07-04 14:41:37.000000000 +0400
+@@ -20,19 +20,11 @@ void bust_spinlocks(int yes)
+ 	if (yes) {
+ 		oops_in_progress = 1;
+ 	} else {
+-		int loglevel_save = console_loglevel;
+ #ifdef CONFIG_VT
+ 		unblank_screen();
+ #endif
+ 		oops_in_progress = 0;
+-		/*
+-		 * OK, the message is on the console.  Now we call printk()
+-		 * without oops_in_progress set so that printk() will give klogd
+-		 * and the blanked console a poke.  Hold onto your hats...
+-		 */
+-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
+-		printk(" ");
+-		console_loglevel = loglevel_save;
++		wake_up_klogd();
+ 	}
+ }
+ 
+diff -upr linux-2.6.16.orig/mm/filemap_xip.c linux-2.6.16-026test015/mm/filemap_xip.c
+--- linux-2.6.16.orig/mm/filemap_xip.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/filemap_xip.c	2006-07-04 14:41:37.000000000 +0400
+@@ -15,6 +15,7 @@
+ #include <linux/rmap.h>
+ #include <asm/tlbflush.h>
+ #include "filemap.h"
++#include <ub/ub_vmpages.h>
+ 
+ /*
+  * This is a file read routine for execute in place files, and uses
+@@ -190,7 +191,10 @@ __xip_unmap (struct address_space * mapp
+ 			flush_cache_page(vma, address, pte_pfn(*pte));
+ 			pteval = ptep_clear_flush(vma, address, pte);
+ 			page_remove_rmap(page);
++			pb_remove_ref(page, mm);
++			ub_unused_privvm_inc(mm, vma);
+ 			dec_mm_counter(mm, file_rss);
++			dec_vma_rss(vma);
+ 			BUG_ON(pte_dirty(pteval));
+ 			pte_unmap_unlock(pte, ptl);
+ 			page_cache_release(page);
+diff -upr linux-2.6.16.orig/mm/fremap.c linux-2.6.16-026test015/mm/fremap.c
+--- linux-2.6.16.orig/mm/fremap.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/fremap.c	2006-07-04 14:41:39.000000000 +0400
+@@ -20,6 +20,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+ 			unsigned long addr, pte_t *ptep)
+ {
+@@ -34,6 +36,7 @@ static int zap_pte(struct mm_struct *mm,
+ 			if (pte_dirty(pte))
+ 				set_page_dirty(page);
+ 			page_remove_rmap(page);
++			pb_remove_ref(page, mm);
+ 			page_cache_release(page);
+ 		}
+ 	} else {
+@@ -57,6 +60,10 @@ int install_page(struct mm_struct *mm, s
+ 	pte_t *pte;
+ 	pte_t pte_val;
+ 	spinlock_t *ptl;
++	struct page_beancounter *pbc;
++
++	if (unlikely(pb_alloc(&pbc)))
++		goto out_nopb;
+ 
+ 	pte = get_locked_pte(mm, addr, &ptl);
+ 	if (!pte)
+@@ -75,11 +82,15 @@ int install_page(struct mm_struct *mm, s
+ 	if (page_mapcount(page) > INT_MAX/2)
+ 		goto unlock;
+ 
+-	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
++	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) {
++		ub_unused_privvm_dec(mm, vma);
+ 		inc_mm_counter(mm, file_rss);
++		inc_vma_rss(vma);
++	}
+ 
+ 	flush_icache_page(vma, page);
+ 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
++	pb_add_ref(page, mm, &pbc);
+ 	page_add_file_rmap(page);
+ 	pte_val = *pte;
+ 	update_mmu_cache(vma, addr, pte_val);
+@@ -87,6 +98,8 @@ int install_page(struct mm_struct *mm, s
+ unlock:
+ 	pte_unmap_unlock(pte, ptl);
+ out:
++	pb_free(&pbc);
++out_nopb:
+ 	return err;
+ }
+ EXPORT_SYMBOL(install_page);
+@@ -109,7 +122,9 @@ int install_file_pte(struct mm_struct *m
+ 
+ 	if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
+ 		update_hiwater_rss(mm);
++		ub_unused_privvm_inc(mm, vma);
+ 		dec_mm_counter(mm, file_rss);
++		dec_vma_rss(vma);
+ 	}
+ 
+ 	set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
+@@ -220,4 +235,5 @@ asmlinkage long sys_remap_file_pages(uns
+ 
+ 	return err;
+ }
++EXPORT_SYMBOL_GPL(sys_remap_file_pages);
+ 
+diff -upr linux-2.6.16.orig/mm/madvise.c linux-2.6.16-026test015/mm/madvise.c
+--- linux-2.6.16.orig/mm/madvise.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/madvise.c	2006-07-04 14:41:36.000000000 +0400
+@@ -168,6 +168,9 @@ static long madvise_remove(struct vm_are
+ 			return -EINVAL;
+ 	}
+ 
++	if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
++		return -EACCES;
++
+ 	mapping = vma->vm_file->f_mapping;
+ 
+ 	offset = (loff_t)(start - vma->vm_start)
+diff -upr linux-2.6.16.orig/mm/memory.c linux-2.6.16-026test015/mm/memory.c
+--- linux-2.6.16.orig/mm/memory.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/memory.c	2006-07-04 14:41:39.000000000 +0400
+@@ -58,6 +58,8 @@
+ #include <linux/swapops.h>
+ #include <linux/elf.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ #ifndef CONFIG_NEED_MULTIPLE_NODES
+ /* use the per-pgdat data instead for discontigmem - mbligh */
+ unsigned long max_mapnr;
+@@ -81,6 +83,7 @@ unsigned long vmalloc_earlyreserve;
+ EXPORT_SYMBOL(num_physpages);
+ EXPORT_SYMBOL(high_memory);
+ EXPORT_SYMBOL(vmalloc_earlyreserve);
++EXPORT_SYMBOL_GPL(empty_zero_page);
+ 
+ int randomize_va_space __read_mostly = 1;
+ 
+@@ -103,18 +106,21 @@ void pgd_clear_bad(pgd_t *pgd)
+ 	pgd_ERROR(*pgd);
+ 	pgd_clear(pgd);
+ }
++EXPORT_SYMBOL_GPL(pgd_clear_bad);
+ 
+ void pud_clear_bad(pud_t *pud)
+ {
+ 	pud_ERROR(*pud);
+ 	pud_clear(pud);
+ }
++EXPORT_SYMBOL_GPL(pud_clear_bad);
+ 
+ void pmd_clear_bad(pmd_t *pmd)
+ {
+ 	pmd_ERROR(*pmd);
+ 	pmd_clear(pmd);
+ }
++EXPORT_SYMBOL_GPL(pmd_clear_bad);
+ 
+ /*
+  * Note: this doesn't free the actual pages themselves. That
+@@ -318,6 +324,7 @@ int __pte_alloc(struct mm_struct *mm, pm
+ 	spin_unlock(&mm->page_table_lock);
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(__pte_alloc);
+ 
+ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+ {
+@@ -418,6 +425,7 @@ struct page *vm_normal_page(struct vm_ar
+ 	 */
+ 	return pfn_to_page(pfn);
+ }
++EXPORT_SYMBOL_GPL(vm_normal_page);
+ 
+ /*
+  * copy one vm_area from one task to the other. Assumes the page tables
+@@ -428,7 +436,7 @@ struct page *vm_normal_page(struct vm_ar
+ static inline void
+ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+-		unsigned long addr, int *rss)
++		unsigned long addr, int *rss, struct page_beancounter **pbc)
+ {
+ 	unsigned long vm_flags = vma->vm_flags;
+ 	pte_t pte = *src_pte;
+@@ -471,6 +479,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
+ 	if (page) {
+ 		get_page(page);
+ 		page_dup_rmap(page);
++		pb_dup_ref(page, dst_mm, pbc);
+ 		rss[!!PageAnon(page)]++;
+ 	}
+ 
+@@ -478,20 +487,36 @@ out_set_pte:
+ 	set_pte_at(dst_mm, addr, dst_pte, pte);
+ }
+ 
++#define pte_ptrs(a)	(PTRS_PER_PTE - ((a >> PAGE_SHIFT)&(PTRS_PER_PTE - 1)))
++#ifdef CONFIG_USER_RESOURCE
++#define same_ub(mm1, mm2)	((mm1)->mm_ub == (mm2)->mm_ub)
++#else
++#define same_ub(mm1, mm2)	(1)
++#endif
++
+ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+-		pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
++		pmd_t *dst_pmd, pmd_t *src_pmd,
++		struct vm_area_struct *dst_vma,
++		struct vm_area_struct *vma,
+ 		unsigned long addr, unsigned long end)
+ {
+ 	pte_t *src_pte, *dst_pte;
+ 	spinlock_t *src_ptl, *dst_ptl;
+ 	int progress = 0;
+-	int rss[2];
++	int rss[2], rss_tot;
++	struct page_beancounter *pbc;
++	int err;
+ 
++	err = -ENOMEM;
++	pbc = same_ub(src_mm, dst_mm) ? PBC_COPY_SAME : NULL;
+ again:
++	if (pbc != PBC_COPY_SAME && pb_alloc_list(&pbc, pte_ptrs(addr)))
++		goto out;
+ 	rss[1] = rss[0] = 0;
+ 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
+ 	if (!dst_pte)
+-		return -ENOMEM;
++		goto out;
++
+ 	src_pte = pte_offset_map_nested(src_pmd, addr);
+ 	src_ptl = pte_lockptr(src_mm, src_pmd);
+ 	spin_lock(src_ptl);
+@@ -512,22 +537,32 @@ again:
+ 			progress++;
+ 			continue;
+ 		}
+-		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
++		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
++				vma, addr, rss, &pbc);
+ 		progress += 8;
+ 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+ 
+ 	spin_unlock(src_ptl);
+ 	pte_unmap_nested(src_pte - 1);
++	rss_tot = rss[0] + rss[1];
++	add_vma_rss(dst_vma, rss_tot);
++	ub_unused_privvm_sub(dst_mm, dst_vma, rss_tot);
+ 	add_mm_rss(dst_mm, rss[0], rss[1]);
+ 	pte_unmap_unlock(dst_pte - 1, dst_ptl);
+ 	cond_resched();
+ 	if (addr != end)
+ 		goto again;
+-	return 0;
++
++	err = 0;
++out:
++	pb_free_list(&pbc);
++	return err;
+ }
+ 
+ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+-		pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
++		pud_t *dst_pud, pud_t *src_pud,
++		struct vm_area_struct *dst_vma, 
++		struct vm_area_struct *vma,
+ 		unsigned long addr, unsigned long end)
+ {
+ 	pmd_t *src_pmd, *dst_pmd;
+@@ -542,14 +577,16 @@ static inline int copy_pmd_range(struct 
+ 		if (pmd_none_or_clear_bad(src_pmd))
+ 			continue;
+ 		if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+-						vma, addr, next))
++						dst_vma, vma, addr, next))
+ 			return -ENOMEM;
+ 	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
+ 	return 0;
+ }
+ 
+ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+-		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
++		pgd_t *dst_pgd, pgd_t *src_pgd,
++		struct vm_area_struct *dst_vma,
++		struct vm_area_struct *vma,
+ 		unsigned long addr, unsigned long end)
+ {
+ 	pud_t *src_pud, *dst_pud;
+@@ -564,19 +601,20 @@ static inline int copy_pud_range(struct 
+ 		if (pud_none_or_clear_bad(src_pud))
+ 			continue;
+ 		if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+-						vma, addr, next))
++						dst_vma, vma, addr, next))
+ 			return -ENOMEM;
+ 	} while (dst_pud++, src_pud++, addr = next, addr != end);
+ 	return 0;
+ }
+ 
+-int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+-		struct vm_area_struct *vma)
++int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
++		      unsigned long addr, size_t size)
+ {
++	struct mm_struct *dst_mm = dst_vma->vm_mm;
++	struct mm_struct *src_mm = vma->vm_mm;
+ 	pgd_t *src_pgd, *dst_pgd;
+ 	unsigned long next;
+-	unsigned long addr = vma->vm_start;
+-	unsigned long end = vma->vm_end;
++	unsigned long end = addr + size;
+ 
+ 	/*
+ 	 * Don't copy ptes where a page fault will fill them correctly.
+@@ -599,11 +637,22 @@ int copy_page_range(struct mm_struct *ds
+ 		if (pgd_none_or_clear_bad(src_pgd))
+ 			continue;
+ 		if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+-						vma, addr, next))
++						dst_vma, vma, addr, next))
+ 			return -ENOMEM;
+ 	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(__copy_page_range);
++
++int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
++		    struct vm_area_struct *dst_vma, struct vm_area_struct *vma)
++{
++	if (dst_vma->vm_mm != dst)
++		BUG();
++	if (vma->vm_mm != src)
++		BUG();
++	return __copy_page_range(dst_vma, vma, vma->vm_start, vma->vm_end-vma->vm_start);
++}
+ 
+ static unsigned long zap_pte_range(struct mmu_gather *tlb,
+ 				struct vm_area_struct *vma, pmd_t *pmd,
+@@ -615,6 +664,7 @@ static unsigned long zap_pte_range(struc
+ 	spinlock_t *ptl;
+ 	int file_rss = 0;
+ 	int anon_rss = 0;
++	int rss;
+ 
+ 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ 	do {
+@@ -668,6 +718,7 @@ static unsigned long zap_pte_range(struc
+ 				file_rss--;
+ 			}
+ 			page_remove_rmap(page);
++			pb_remove_ref(page, mm);
+ 			tlb_remove_page(tlb, page);
+ 			continue;
+ 		}
+@@ -682,6 +733,9 @@ static unsigned long zap_pte_range(struc
+ 		pte_clear_full(mm, addr, pte, tlb->fullmm);
+ 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+ 
++	rss = -(file_rss + anon_rss);
++	ub_unused_privvm_add(mm, vma, rss);
++	sub_vma_rss(vma, rss);
+ 	add_mm_rss(mm, file_rss, anon_rss);
+ 	pte_unmap_unlock(pte - 1, ptl);
+ 
+@@ -1087,12 +1141,14 @@ int get_user_pages(struct task_struct *t
+ }
+ EXPORT_SYMBOL(get_user_pages);
+ 
+-static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
++static int zeromap_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ 			unsigned long addr, unsigned long end, pgprot_t prot)
+ {
+ 	pte_t *pte;
+ 	spinlock_t *ptl;
++	struct mm_struct *mm;
+ 
++	mm = vma->vm_mm;
+ 	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+ 	if (!pte)
+ 		return -ENOMEM;
+@@ -1102,6 +1158,8 @@ static int zeromap_pte_range(struct mm_s
+ 		page_cache_get(page);
+ 		page_add_file_rmap(page);
+ 		inc_mm_counter(mm, file_rss);
++		inc_vma_rss(vma);
++		ub_unused_privvm_dec(mm, vma);
+ 		BUG_ON(!pte_none(*pte));
+ 		set_pte_at(mm, addr, pte, zero_pte);
+ 	} while (pte++, addr += PAGE_SIZE, addr != end);
+@@ -1109,35 +1167,35 @@ static int zeromap_pte_range(struct mm_s
+ 	return 0;
+ }
+ 
+-static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
++static inline int zeromap_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+ 			unsigned long addr, unsigned long end, pgprot_t prot)
+ {
+ 	pmd_t *pmd;
+ 	unsigned long next;
+ 
+-	pmd = pmd_alloc(mm, pud, addr);
++	pmd = pmd_alloc(vma->vm_mm, pud, addr);
+ 	if (!pmd)
+ 		return -ENOMEM;
+ 	do {
+ 		next = pmd_addr_end(addr, end);
+-		if (zeromap_pte_range(mm, pmd, addr, next, prot))
++		if (zeromap_pte_range(vma, pmd, addr, next, prot))
+ 			return -ENOMEM;
+ 	} while (pmd++, addr = next, addr != end);
+ 	return 0;
+ }
+ 
+-static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
++static inline int zeromap_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+ 			unsigned long addr, unsigned long end, pgprot_t prot)
+ {
+ 	pud_t *pud;
+ 	unsigned long next;
+ 
+-	pud = pud_alloc(mm, pgd, addr);
++	pud = pud_alloc(vma->vm_mm, pgd, addr);
+ 	if (!pud)
+ 		return -ENOMEM;
+ 	do {
+ 		next = pud_addr_end(addr, end);
+-		if (zeromap_pmd_range(mm, pud, addr, next, prot))
++		if (zeromap_pmd_range(vma, pud, addr, next, prot))
+ 			return -ENOMEM;
+ 	} while (pud++, addr = next, addr != end);
+ 	return 0;
+@@ -1149,15 +1207,14 @@ int zeromap_page_range(struct vm_area_st
+ 	pgd_t *pgd;
+ 	unsigned long next;
+ 	unsigned long end = addr + size;
+-	struct mm_struct *mm = vma->vm_mm;
+ 	int err;
+ 
+ 	BUG_ON(addr >= end);
+-	pgd = pgd_offset(mm, addr);
++	pgd = pgd_offset(vma->vm_mm, addr);
+ 	flush_cache_range(vma, addr, end);
+ 	do {
+ 		next = pgd_addr_end(addr, end);
+-		err = zeromap_pud_range(mm, pgd, addr, next, prot);
++		err = zeromap_pud_range(vma, pgd, addr, next, prot);
+ 		if (err)
+ 			break;
+ 	} while (pgd++, addr = next, addr != end);
+@@ -1183,11 +1240,14 @@ pte_t * fastcall get_locked_pte(struct m
+  * old drivers should use this, and they needed to mark their
+  * pages reserved for the old functions anyway.
+  */
+-static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot)
++static int insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot)
+ {
+ 	int retval;
+ 	pte_t *pte;
+-	spinlock_t *ptl;  
++	spinlock_t *ptl;
++	struct mm_struct *mm;
++
++	mm = vma->vm_mm;
+ 
+ 	retval = -EINVAL;
+ 	if (PageAnon(page))
+@@ -1204,6 +1264,7 @@ static int insert_page(struct mm_struct 
+ 	/* Ok, finally just insert the thing.. */
+ 	get_page(page);
+ 	inc_mm_counter(mm, file_rss);
++	inc_vma_rss(vma);
+ 	page_add_file_rmap(page);
+ 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
+ 
+@@ -1240,7 +1301,7 @@ int vm_insert_page(struct vm_area_struct
+ 	if (!page_count(page))
+ 		return -EINVAL;
+ 	vma->vm_flags |= VM_INSERTPAGE;
+-	return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
++	return insert_page(vma, addr, page, vma->vm_page_prot);
+ }
+ EXPORT_SYMBOL(vm_insert_page);
+ 
+@@ -1449,6 +1510,7 @@ static int do_wp_page(struct mm_struct *
+ 	struct page *old_page, *new_page;
+ 	pte_t entry;
+ 	int ret = VM_FAULT_MINOR;
++	struct page_beancounter *pbc;
+ 
+ 	old_page = vm_normal_page(vma, address, orig_pte);
+ 	if (!old_page)
+@@ -1476,6 +1538,9 @@ static int do_wp_page(struct mm_struct *
+ gotten:
+ 	pte_unmap_unlock(page_table, ptl);
+ 
++	if (unlikely(pb_alloc(&pbc)))
++		goto oom_nopb;
++
+ 	if (unlikely(anon_vma_prepare(vma)))
+ 		goto oom;
+ 	if (old_page == ZERO_PAGE(address)) {
+@@ -1496,12 +1561,16 @@ gotten:
+ 	if (likely(pte_same(*page_table, orig_pte))) {
+ 		if (old_page) {
+ 			page_remove_rmap(old_page);
++			pb_remove_ref(old_page, mm);
+ 			if (!PageAnon(old_page)) {
+ 				dec_mm_counter(mm, file_rss);
+ 				inc_mm_counter(mm, anon_rss);
+ 			}
+-		} else
++		} else {
++			ub_unused_privvm_dec(mm, vma);
+ 			inc_mm_counter(mm, anon_rss);
++			inc_vma_rss(vma);
++		}
+ 		flush_cache_page(vma, address, pte_pfn(orig_pte));
+ 		entry = mk_pte(new_page, vma->vm_page_prot);
+ 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+@@ -1510,6 +1579,7 @@ gotten:
+ 		lazy_mmu_prot_update(entry);
+ 		lru_cache_add_active(new_page);
+ 		page_add_new_anon_rmap(new_page, vma, address);
++		pb_add_ref(new_page, mm, &pbc);
+ 
+ 		/* Free the old page.. */
+ 		new_page = old_page;
+@@ -1519,10 +1589,13 @@ gotten:
+ 		page_cache_release(new_page);
+ 	if (old_page)
+ 		page_cache_release(old_page);
++	pb_free(&pbc);
+ unlock:
+ 	pte_unmap_unlock(page_table, ptl);
+ 	return ret;
+ oom:
++	pb_free(&pbc);
++oom_nopb:
+ 	if (old_page)
+ 		page_cache_release(old_page);
+ 	return VM_FAULT_OOM;
+@@ -1877,10 +1950,16 @@ static int do_swap_page(struct mm_struct
+ 	swp_entry_t entry;
+ 	pte_t pte;
+ 	int ret = VM_FAULT_MINOR;
++	struct page_beancounter *pbc;
++	cycles_t start;
+ 
+ 	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+-		goto out;
++		goto out_nostat;
++
++	if (unlikely(pb_alloc(&pbc)))
++		return VM_FAULT_OOM;
+ 
++	start = get_cycles();
+ 	entry = pte_to_swp_entry(orig_pte);
+ again:
+ 	page = lookup_swap_cache(entry);
+@@ -1928,6 +2007,8 @@ again:
+ 	/* The page isn't present yet, go ahead with the fault. */
+ 
+ 	inc_mm_counter(mm, anon_rss);
++	inc_vma_rss(vma);
++	ub_swapin_inc(mm);
+ 	pte = mk_pte(page, vma->vm_page_prot);
+ 	if (write_access && can_share_swap_page(page)) {
+ 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+@@ -1937,6 +2018,8 @@ again:
+ 	flush_icache_page(vma, page);
+ 	set_pte_at(mm, address, page_table, pte);
+ 	page_add_anon_rmap(page, vma, address);
++	pb_add_ref(page, mm, &pbc);
++	ub_unused_privvm_dec(mm, vma);
+ 
+ 	swap_free(entry);
+ 	if (vm_swap_full())
+@@ -1947,7 +2030,7 @@ again:
+ 		if (do_wp_page(mm, vma, address,
+ 				page_table, pmd, ptl, pte) == VM_FAULT_OOM)
+ 			ret = VM_FAULT_OOM;
+-		goto out;
++		goto out_wp;
+ 	}
+ 
+ 	/* No need to invalidate - it was non-present before */
+@@ -1955,10 +2038,16 @@ again:
+ 	lazy_mmu_prot_update(pte);
+ unlock:
+ 	pte_unmap_unlock(page_table, ptl);
+-out:
++out_wp:
++	pb_free(&pbc);
++	spin_lock_irq(&kstat_glb_lock);
++	KSTAT_LAT_ADD(&kstat_glob.swap_in, get_cycles() - start);
++	spin_unlock_irq(&kstat_glb_lock);
++out_nostat:
+ 	return ret;
+ out_nomap:
+ 	pte_unmap_unlock(page_table, ptl);
++	pb_free(&pbc);
+ 	unlock_page(page);
+ 	page_cache_release(page);
+ 	return ret;
+@@ -1976,11 +2065,15 @@ static int do_anonymous_page(struct mm_s
+ 	struct page *page;
+ 	spinlock_t *ptl;
+ 	pte_t entry;
++	struct page_beancounter *pbc;
+ 
+ 	if (write_access) {
+ 		/* Allocate our own private page. */
+ 		pte_unmap(page_table);
+ 
++		if (unlikely(pb_alloc(&pbc)))
++			goto oom_nopb;
++
+ 		if (unlikely(anon_vma_prepare(vma)))
+ 			goto oom;
+ 		page = alloc_zeroed_user_highpage(vma, address);
+@@ -1996,7 +2089,10 @@ static int do_anonymous_page(struct mm_s
+ 		inc_mm_counter(mm, anon_rss);
+ 		lru_cache_add_active(page);
+ 		page_add_new_anon_rmap(page, vma, address);
++		pb_add_ref(page, mm, &pbc);
+ 	} else {
++		pbc = NULL;
++
+ 		/* Map the ZERO_PAGE - vm_page_prot is readonly */
+ 		page = ZERO_PAGE(address);
+ 		page_cache_get(page);
+@@ -2010,18 +2106,23 @@ static int do_anonymous_page(struct mm_s
+ 		page_add_file_rmap(page);
+ 	}
+ 
++	inc_vma_rss(vma);
++	ub_unused_privvm_dec(mm, vma);
+ 	set_pte_at(mm, address, page_table, entry);
+ 
+ 	/* No need to invalidate - it was non-present before */
+ 	update_mmu_cache(vma, address, entry);
+ 	lazy_mmu_prot_update(entry);
+ unlock:
++	pb_free(&pbc);
+ 	pte_unmap_unlock(page_table, ptl);
+ 	return VM_FAULT_MINOR;
+ release:
+ 	page_cache_release(page);
+ 	goto unlock;
+ oom:
++	pb_free(&pbc);
++oom_nopb:
+ 	return VM_FAULT_OOM;
+ }
+ 
+@@ -2049,6 +2150,7 @@ static int do_no_page(struct mm_struct *
+ 	unsigned int sequence = 0;
+ 	int ret = VM_FAULT_MINOR;
+ 	int anon = 0;
++	struct page_beancounter *pbc;
+ 
+ 	pte_unmap(page_table);
+ 	BUG_ON(vma->vm_flags & VM_PFNMAP);
+@@ -2058,6 +2160,9 @@ static int do_no_page(struct mm_struct *
+ 		sequence = mapping->truncate_count;
+ 		smp_rmb(); /* serializes i_size against truncate_count */
+ 	}
++
++	if (unlikely(pb_alloc(&pbc)))
++		goto oom_nopb;
+ retry:
+ 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
+ 	/*
+@@ -2070,9 +2175,9 @@ retry:
+ 
+ 	/* no page was available -- either SIGBUS or OOM */
+ 	if (new_page == NOPAGE_SIGBUS)
+-		return VM_FAULT_SIGBUS;
++		goto bus_nopg;
+ 	if (new_page == NOPAGE_OOM)
+-		return VM_FAULT_OOM;
++		goto oom_nopg;
+ 
+ 	/*
+ 	 * Should we do an early C-O-W break?
+@@ -2131,6 +2236,9 @@ retry:
+ 			inc_mm_counter(mm, file_rss);
+ 			page_add_file_rmap(new_page);
+ 		}
++		inc_vma_rss(vma);
++		pb_add_ref(new_page, mm, &pbc);
++		ub_unused_privvm_dec(mm, vma);
+ 	} else {
+ 		/* One of our sibling threads was faster, back out. */
+ 		page_cache_release(new_page);
+@@ -2142,10 +2250,18 @@ retry:
+ 	lazy_mmu_prot_update(entry);
+ unlock:
+ 	pte_unmap_unlock(page_table, ptl);
++	pb_free(&pbc);
+ 	return ret;
+ oom:
+ 	page_cache_release(new_page);
++oom_nopg:
++	pb_free(&pbc);
++oom_nopb:
+ 	return VM_FAULT_OOM;
++
++bus_nopg:
++	pb_free(&pbc);
++	return VM_FAULT_SIGBUS;
+ }
+ 
+ /*
+@@ -2314,6 +2430,8 @@ int __pud_alloc(struct mm_struct *mm, pg
+ }
+ #endif /* __PAGETABLE_PUD_FOLDED */
+ 
++EXPORT_SYMBOL_GPL(__pud_alloc);
++
+ #ifndef __PAGETABLE_PMD_FOLDED
+ /*
+  * Allocate page middle directory.
+@@ -2348,6 +2466,8 @@ int __pmd_alloc(struct mm_struct *mm, pu
+ }
+ #endif /* __PAGETABLE_PMD_FOLDED */
+ 
++EXPORT_SYMBOL_GPL(__pmd_alloc);
++
+ int make_pages_present(unsigned long addr, unsigned long end)
+ {
+ 	int ret, len, write;
+diff -upr linux-2.6.16.orig/mm/mempolicy.c linux-2.6.16-026test015/mm/mempolicy.c
+--- linux-2.6.16.orig/mm/mempolicy.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/mempolicy.c	2006-07-04 14:41:38.000000000 +0400
+@@ -933,7 +933,7 @@ asmlinkage long sys_migrate_pages(pid_t 
+ 
+ 	/* Find the mm_struct */
+ 	read_lock(&tasklist_lock);
+-	task = pid ? find_task_by_pid(pid) : current;
++	task = pid ? find_task_by_pid_ve(pid) : current;
+ 	if (!task) {
+ 		read_unlock(&tasklist_lock);
+ 		return -ESRCH;
+@@ -1796,7 +1796,6 @@ static void gather_stats(struct page *pa
+ 		md->mapcount_max = count;
+ 
+ 	md->node[page_to_nid(page)]++;
+-	cond_resched();
+ }
+ 
+ #ifdef CONFIG_HUGETLB_PAGE
+diff -upr linux-2.6.16.orig/mm/mempool.c linux-2.6.16-026test015/mm/mempool.c
+--- linux-2.6.16.orig/mm/mempool.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/mempool.c	2006-07-04 14:41:37.000000000 +0400
+@@ -14,6 +14,7 @@
+ #include <linux/mempool.h>
+ #include <linux/blkdev.h>
+ #include <linux/writeback.h>
++#include <linux/kmem_cache.h>
+ 
+ static void add_element(mempool_t *pool, void *element)
+ {
+@@ -78,6 +79,8 @@ mempool_t *mempool_create_node(int min_n
+ 	init_waitqueue_head(&pool->wait);
+ 	pool->alloc = alloc_fn;
+ 	pool->free = free_fn;
++	if (alloc_fn == mempool_alloc_slab)
++		kmem_mark_nocharge((kmem_cache_t *)pool_data);
+ 
+ 	/*
+ 	 * First pre-allocate the guaranteed number of buffers.
+@@ -119,6 +122,7 @@ int mempool_resize(mempool_t *pool, int 
+ 	unsigned long flags;
+ 
+ 	BUG_ON(new_min_nr <= 0);
++	gfp_mask &= ~__GFP_UBC;
+ 
+ 	spin_lock_irqsave(&pool->lock, flags);
+ 	if (new_min_nr <= pool->min_nr) {
+@@ -212,6 +216,7 @@ void * mempool_alloc(mempool_t *pool, gf
+ 	gfp_mask |= __GFP_NOMEMALLOC;	/* don't allocate emergency reserves */
+ 	gfp_mask |= __GFP_NORETRY;	/* don't loop in __alloc_pages */
+ 	gfp_mask |= __GFP_NOWARN;	/* failures are OK */
++	gfp_mask &= ~__GFP_UBC;
+ 
+ 	gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
+ 
+diff -upr linux-2.6.16.orig/mm/mlock.c linux-2.6.16-026test015/mm/mlock.c
+--- linux-2.6.16.orig/mm/mlock.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/mlock.c	2006-07-04 14:41:39.000000000 +0400
+@@ -8,9 +8,11 @@
+ #include <linux/capability.h>
+ #include <linux/mman.h>
+ #include <linux/mm.h>
++#include <linux/module.h>
+ #include <linux/mempolicy.h>
+ #include <linux/syscalls.h>
+ 
++#include <ub/ub_vmpages.h>
+ 
+ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
+ 	unsigned long start, unsigned long end, unsigned int newflags)
+@@ -25,6 +27,14 @@ static int mlock_fixup(struct vm_area_st
+ 		goto out;
+ 	}
+ 
++	if (newflags & VM_LOCKED) {
++		ret = ub_locked_charge(mm, end - start);
++		if (ret < 0) {
++			*prev = vma;
++			goto out;
++		}
++	}
++
+ 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ 	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
+ 			  vma->vm_file, pgoff, vma_policy(vma));
+@@ -38,13 +48,13 @@ static int mlock_fixup(struct vm_area_st
+ 	if (start != vma->vm_start) {
+ 		ret = split_vma(mm, vma, start, 1);
+ 		if (ret)
+-			goto out;
++			goto out_uncharge;
+ 	}
+ 
+ 	if (end != vma->vm_end) {
+ 		ret = split_vma(mm, vma, end, 0);
+ 		if (ret)
+-			goto out;
++			goto out_uncharge;
+ 	}
+ 
+ success:
+@@ -63,13 +73,19 @@ success:
+ 		pages = -pages;
+ 		if (!(newflags & VM_IO))
+ 			ret = make_pages_present(start, end);
+-	}
++	} else
++		ub_locked_uncharge(mm, end - start);
+ 
+ 	vma->vm_mm->locked_vm -= pages;
+ out:
+ 	if (ret == -ENOMEM)
+ 		ret = -EAGAIN;
+ 	return ret;
++
++out_uncharge:
++	if (newflags & VM_LOCKED)
++		ub_locked_uncharge(mm, end - start);
++	goto out;
+ }
+ 
+ static int do_mlock(unsigned long start, size_t len, int on)
+@@ -146,6 +162,7 @@ asmlinkage long sys_mlock(unsigned long 
+ 	up_write(&current->mm->mmap_sem);
+ 	return error;
+ }
++EXPORT_SYMBOL_GPL(sys_mlock);
+ 
+ asmlinkage long sys_munlock(unsigned long start, size_t len)
+ {
+@@ -158,6 +175,7 @@ asmlinkage long sys_munlock(unsigned lon
+ 	up_write(&current->mm->mmap_sem);
+ 	return ret;
+ }
++EXPORT_SYMBOL_GPL(sys_munlock);
+ 
+ static int do_mlockall(int flags)
+ {
+diff -upr linux-2.6.16.orig/mm/mmap.c linux-2.6.16-026test015/mm/mmap.c
+--- linux-2.6.16.orig/mm/mmap.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/mmap.c	2006-07-04 14:41:39.000000000 +0400
+@@ -25,14 +25,18 @@
+ #include <linux/mount.h>
+ #include <linux/mempolicy.h>
+ #include <linux/rmap.h>
++#include <linux/virtinfo.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlb.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ static void unmap_region(struct mm_struct *mm,
+ 		struct vm_area_struct *vma, struct vm_area_struct *prev,
+ 		unsigned long start, unsigned long end);
++static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft);
+ 
+ /*
+  * WARNING: the debugging will use recursive algorithms so never enable this
+@@ -87,6 +91,16 @@ int __vm_enough_memory(long pages, int c
+ 
+ 	vm_acct_memory(pages);
+ 
++	switch (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_ENOUGHMEM,
++				(void *)pages)
++			& (NOTIFY_OK | NOTIFY_FAIL)) {
++		case NOTIFY_OK:
++			return 0;
++		case NOTIFY_FAIL:
++			vm_unacct_memory(pages);
++			return -ENOMEM;
++	}
++
+ 	/*
+ 	 * Sometimes we want to use more memory than we have
+ 	 */
+@@ -201,11 +215,16 @@ static struct vm_area_struct *remove_vma
+ 	struct vm_area_struct *next = vma->vm_next;
+ 
+ 	might_sleep();
++
++	ub_memory_uncharge(vma->vm_mm, vma->vm_end - vma->vm_start,
++			vma->vm_flags, vma->vm_file);
+ 	if (vma->vm_ops && vma->vm_ops->close)
+ 		vma->vm_ops->close(vma);
+ 	if (vma->vm_file)
+ 		fput(vma->vm_file);
+ 	mpol_free(vma_policy(vma));
++	if (get_vma_rss(vma))
++		warn_bad_rss(vma, 0);
+ 	kmem_cache_free(vm_area_cachep, vma);
+ 	return next;
+ }
+@@ -242,7 +261,7 @@ asmlinkage unsigned long sys_brk(unsigne
+ 		goto out;
+ 
+ 	/* Ok, looks good - let it rip. */
+-	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
++	if (__do_brk(oldbrk, newbrk-oldbrk, UB_HARD) != oldbrk)
+ 		goto out;
+ set_brk:
+ 	mm->brk = brk;
+@@ -726,7 +745,7 @@ struct vm_area_struct *vma_merge(struct 
+ 	else
+ 		next = mm->mmap;
+ 	area = next;
+-	if (next && next->vm_end == end)		/* cases 6, 7, 8 */
++	if (next && next->vm_end == end) 		/* cases 6, 7, 8 */
+ 		next = next->vm_next;
+ 
+ 	/*
+@@ -746,11 +765,22 @@ struct vm_area_struct *vma_merge(struct 
+ 				is_mergeable_anon_vma(prev->anon_vma,
+ 						      next->anon_vma)) {
+ 							/* cases 1, 6 */
++			add_vma_rss(prev, get_vma_rss(next));
++			if (area != next) /* case 6 */
++				add_vma_rss(prev, get_vma_rss(area));
+ 			vma_adjust(prev, prev->vm_start,
+ 				next->vm_end, prev->vm_pgoff, NULL);
+-		} else					/* cases 2, 5, 7 */
++		} else {				/* cases 2, 5, 7 */
++			if (next && addr == next->vm_start) { /* case 5 */
++				unsigned long rss;
++				rss = pages_in_vma_range(next, addr, end);
++				sub_vma_rss(next, rss);
++				add_vma_rss(prev, rss);
++			} else if (area != next) /* case 7 */
++				add_vma_rss(prev, get_vma_rss(area));
+ 			vma_adjust(prev, prev->vm_start,
+ 				end, prev->vm_pgoff, NULL);
++		}
+ 		return prev;
+ 	}
+ 
+@@ -761,12 +791,19 @@ struct vm_area_struct *vma_merge(struct 
+  			mpol_equal(policy, vma_policy(next)) &&
+ 			can_vma_merge_before(next, vm_flags,
+ 					anon_vma, file, pgoff+pglen)) {
+-		if (prev && addr < prev->vm_end)	/* case 4 */
++		if (prev && addr < prev->vm_end) {	/* case 4 */
++			unsigned long rss;
++			rss = pages_in_vma_range(prev, addr, end);
++			sub_vma_rss(prev, rss);
++			add_vma_rss(next, rss);
+ 			vma_adjust(prev, prev->vm_start,
+ 				addr, prev->vm_pgoff, NULL);
+-		else					/* cases 3, 8 */
++		} else {				/* cases 3, 8 */
++			if (area != next) /* case 8 */
++				add_vma_rss(area, get_vma_rss(next));
+ 			vma_adjust(area, addr, next->vm_end,
+ 				next->vm_pgoff - pglen, NULL);
++		}
+ 		return area;
+ 	}
+ 
+@@ -1033,6 +1070,10 @@ munmap_back:
+ 		}
+ 	}
+ 
++	if (ub_memory_charge(mm, len, vm_flags, file,
++				(flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD)))
++		goto charge_error;
++
+ 	/*
+ 	 * Can we just expand an old private anonymous mapping?
+ 	 * The VM_SHARED test is necessary because shmem_zero_setup
+@@ -1048,7 +1089,8 @@ munmap_back:
+ 	 * specific mapper. the address has already been validated, but
+ 	 * not unmapped, but the maps are removed from the list.
+ 	 */
+-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
++			(flags & MAP_EXECPRIO ? __GFP_SOFT_UBC : 0));
+ 	if (!vma) {
+ 		error = -ENOMEM;
+ 		goto unacct_error;
+@@ -1107,6 +1149,19 @@ munmap_back:
+ 		if (correct_wcount)
+ 			atomic_inc(&inode->i_writecount);
+ 	} else {
++		unsigned long rss;
++
++		rss = get_vma_rss(vma);
++		if (rss > 0) {
++			if (prev->vm_next && prev->vm_next->vm_start == addr)
++				/* vma_merge expanded next vm_area */
++				add_vma_rss(prev->vm_next, rss);
++			else
++				/* vma_merge expanded prev vm_area
++				 * and probably splitted it with next
++				 */
++				add_vma_rss(prev, rss);
++		}
+ 		if (file) {
+ 			if (correct_wcount)
+ 				atomic_inc(&inode->i_writecount);
+@@ -1142,6 +1197,8 @@ unmap_and_free_vma:
+ free_vma:
+ 	kmem_cache_free(vm_area_cachep, vma);
+ unacct_error:
++	ub_memory_uncharge(mm, len, vm_flags, file);
++charge_error:
+ 	if (charged)
+ 		vm_unacct_memory(charged);
+ 	return error;
+@@ -1471,12 +1528,16 @@ static int acct_stack_growth(struct vm_a
+ 			return -ENOMEM;
+ 	}
+ 
++	if (ub_memory_charge(mm, grow << PAGE_SHIFT, vma->vm_flags,
++				vma->vm_file, UB_SOFT))
++		goto fail_charge;
++
+ 	/*
+ 	 * Overcommit..  This must be the final test, as it will
+ 	 * update security statistics.
+ 	 */
+ 	if (security_vm_enough_memory(grow))
+-		return -ENOMEM;
++		goto fail_sec;
+ 
+ 	/* Ok, everything looks good - let it rip */
+ 	mm->total_vm += grow;
+@@ -1484,6 +1545,11 @@ static int acct_stack_growth(struct vm_a
+ 		mm->locked_vm += grow;
+ 	vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
+ 	return 0;
++
++fail_sec:
++	ub_memory_uncharge(mm, grow << PAGE_SHIFT, vma->vm_flags, vma->vm_file);
++fail_charge:
++	return -ENOMEM;
+ }
+ 
+ #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
+@@ -1744,8 +1810,13 @@ int split_vma(struct mm_struct * mm, str
+ 	else
+ 		vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+ 
++	/* protected with mmap sem */
++	set_vma_rss(vma, pages_in_vma(vma));
++	set_vma_rss(new, pages_in_vma(new));
++
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(split_vma);
+ 
+ /* Munmap is split into 2 main parts -- this part which finds
+  * what needs doing, and the areas themselves, which do the
+@@ -1839,7 +1910,7 @@ static inline void verify_mm_writelocked
+  *  anonymous maps.  eventually we may be able to do some
+  *  brk-specific accounting here.
+  */
+-unsigned long do_brk(unsigned long addr, unsigned long len)
++static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft)
+ {
+ 	struct mm_struct * mm = current->mm;
+ 	struct vm_area_struct * vma, * prev;
+@@ -1891,11 +1962,14 @@ unsigned long do_brk(unsigned long addr,
+ 	if (mm->map_count > sysctl_max_map_count)
+ 		return -ENOMEM;
+ 
+-	if (security_vm_enough_memory(len >> PAGE_SHIFT))
+-		return -ENOMEM;
+-
+ 	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+ 
++	if (ub_memory_charge(mm, len, flags, NULL, soft))
++		goto fail_charge;
++
++	if (security_vm_enough_memory(len >> PAGE_SHIFT))
++		goto fail_sec;
++
+ 	/* Can we just expand an old private anonymous mapping? */
+ 	if (vma_merge(mm, prev, addr, addr + len, flags,
+ 					NULL, NULL, pgoff, NULL))
+@@ -1904,11 +1978,11 @@ unsigned long do_brk(unsigned long addr,
+ 	/*
+ 	 * create a vma struct for an anonymous mapping
+ 	 */
+-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+-	if (!vma) {
+-		vm_unacct_memory(len >> PAGE_SHIFT);
+-		return -ENOMEM;
+-	}
++	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
++			(soft == UB_SOFT ? __GFP_SOFT_UBC : 0));
++	if (!vma)
++		goto fail_alloc;
++
+ 	memset(vma, 0, sizeof(*vma));
+ 
+ 	vma->vm_mm = mm;
+@@ -1925,8 +1999,19 @@ out:
+ 		make_pages_present(addr, addr + len);
+ 	}
+ 	return addr;
++
++fail_alloc:
++	vm_unacct_memory(len >> PAGE_SHIFT);
++fail_sec:
++	ub_memory_uncharge(mm, len, flags, NULL);
++fail_charge:
++	return -ENOMEM;
+ }
+ 
++unsigned long do_brk(unsigned long addr, unsigned long len)
++{
++	return __do_brk(addr, len, UB_SOFT);
++}
+ EXPORT_SYMBOL(do_brk);
+ 
+ /* Release all mmaps. */
+@@ -2036,6 +2121,7 @@ struct vm_area_struct *copy_vma(struct v
+ 			new_vma->vm_start = addr;
+ 			new_vma->vm_end = addr + len;
+ 			new_vma->vm_pgoff = pgoff;
++			set_vma_rss(new_vma, 0);
+ 			if (new_vma->vm_file)
+ 				get_file(new_vma->vm_file);
+ 			if (new_vma->vm_ops && new_vma->vm_ops->open)
+diff -upr linux-2.6.16.orig/mm/mprotect.c linux-2.6.16-026test015/mm/mprotect.c
+--- linux-2.6.16.orig/mm/mprotect.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/mprotect.c	2006-07-04 14:41:39.000000000 +0400
+@@ -9,6 +9,7 @@
+  */
+ 
+ #include <linux/mm.h>
++#include <linux/module.h>
+ #include <linux/hugetlb.h>
+ #include <linux/slab.h>
+ #include <linux/shm.h>
+@@ -25,6 +26,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ 		unsigned long addr, unsigned long end, pgprot_t newprot)
+ {
+@@ -109,12 +112,20 @@ mprotect_fixup(struct vm_area_struct *vm
+ 	pgprot_t newprot;
+ 	pgoff_t pgoff;
+ 	int error;
++	unsigned long ch_size;
++	int ch_dir;
+ 
+ 	if (newflags == oldflags) {
+ 		*pprev = vma;
+ 		return 0;
+ 	}
+ 
++	error = -ENOMEM;
++	ch_size = nrpages - pages_in_vma_range(vma, start, end);
++	ch_dir = ub_protected_charge(mm, ch_size, newflags, vma);
++	if (ch_dir == PRIVVM_ERROR)
++		goto fail_ch;
++
+ 	/*
+ 	 * If we make a private mapping writable we increase our commit;
+ 	 * but (without finer accounting) cannot reduce our commit if we
+@@ -127,7 +138,7 @@ mprotect_fixup(struct vm_area_struct *vm
+ 		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
+ 			charged = nrpages;
+ 			if (security_vm_enough_memory(charged))
+-				return -ENOMEM;
++				goto fail_sec;
+ 			newflags |= VM_ACCOUNT;
+ 		}
+ 	}
+@@ -169,10 +180,16 @@ success:
+ 	change_protection(vma, start, end, newprot);
+ 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
+ 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
++	if (ch_dir == PRIVVM_TO_SHARED)
++		__ub_unused_privvm_dec(mm, ch_size);
+ 	return 0;
+ 
+ fail:
+ 	vm_unacct_memory(charged);
++fail_sec:
++	if (ch_dir == PRIVVM_TO_PRIVATE)
++		__ub_unused_privvm_dec(mm, ch_size);
++fail_ch:
+ 	return error;
+ }
+ 
+@@ -280,3 +297,4 @@ out:
+ 	up_write(&current->mm->mmap_sem);
+ 	return error;
+ }
++EXPORT_SYMBOL_GPL(sys_mprotect);
+diff -upr linux-2.6.16.orig/mm/mremap.c linux-2.6.16-026test015/mm/mremap.c
+--- linux-2.6.16.orig/mm/mremap.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/mremap.c	2006-07-04 14:41:37.000000000 +0400
+@@ -23,6 +23,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
+ {
+ 	pgd_t *pgd;
+@@ -106,6 +108,8 @@ static void move_ptes(struct vm_area_str
+ 		pte = ptep_clear_flush(vma, old_addr, old_pte);
+ 		/* ZERO_PAGE can be dependant on virtual addr */
+ 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
++		dec_vma_rss(vma);
++		inc_vma_rss(new_vma);
+ 		set_pte_at(mm, new_addr, new_pte, pte);
+ 	}
+ 
+@@ -166,17 +170,21 @@ static unsigned long move_vma(struct vm_
+ 	unsigned long hiwater_vm;
+ 	int split = 0;
+ 
++	if (ub_memory_charge(mm, new_len, vm_flags,
++				vma->vm_file, UB_HARD))
++		goto err;
++
+ 	/*
+ 	 * We'd prefer to avoid failure later on in do_munmap:
+ 	 * which may split one vma into three before unmapping.
+ 	 */
+ 	if (mm->map_count >= sysctl_max_map_count - 3)
+-		return -ENOMEM;
++		goto err_nomem;
+ 
+ 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
+ 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
+ 	if (!new_vma)
+-		return -ENOMEM;
++		goto err_nomem;
+ 
+ 	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
+ 	if (moved_len < old_len) {
+@@ -235,7 +243,13 @@ static unsigned long move_vma(struct vm_
+ 					   new_addr + new_len);
+ 	}
+ 
+-	return new_addr;
++	if (new_addr != -ENOMEM)
++		return new_addr;
++
++err_nomem:
++	ub_memory_uncharge(mm, new_len, vm_flags, vma->vm_file);
++err:
++	return -ENOMEM;
+ }
+ 
+ /*
+@@ -359,7 +373,15 @@ unsigned long do_mremap(unsigned long ad
+ 			max_addr = vma->vm_next->vm_start;
+ 		/* can we just expand the current mapping? */
+ 		if (max_addr - addr >= new_len) {
+-			int pages = (new_len - old_len) >> PAGE_SHIFT;
++			int len;
++			int pages;
++
++			len = new_len - old_len;
++			pages = len >> PAGE_SHIFT;
++			ret = -ENOMEM;
++			if (ub_memory_charge(mm, len, vma->vm_flags,
++						vma->vm_file, UB_HARD))
++				goto out;
+ 
+ 			vma_adjust(vma, vma->vm_start,
+ 				addr + new_len, vma->vm_pgoff, NULL);
+diff -upr linux-2.6.16.orig/mm/oom_kill.c linux-2.6.16-026test015/mm/oom_kill.c
+--- linux-2.6.16.orig/mm/oom_kill.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/oom_kill.c	2006-07-04 14:41:38.000000000 +0400
+@@ -176,7 +176,7 @@ static struct task_struct *select_bad_pr
+ 	*ppoints = 0;
+ 
+ 	do_posix_clock_monotonic_gettime(&uptime);
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		unsigned long points;
+ 		int releasing;
+ 
+@@ -205,7 +205,7 @@ static struct task_struct *select_bad_pr
+ 			chosen = p;
+ 			*ppoints = points;
+ 		}
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 	return chosen;
+ }
+ 
+@@ -261,10 +261,10 @@ static struct mm_struct *oom_kill_task(t
+ 	 * kill all processes that share the ->mm (i.e. all threads),
+ 	 * but are in a different thread group
+ 	 */
+-	do_each_thread(g, q)
++	do_each_thread_all(g, q) {
+ 		if (q->mm == mm && q->tgid != p->tgid)
+ 			__oom_kill_task(q, message);
+-	while_each_thread(g, q);
++	} while_each_thread_all(g, q);
+ 
+ 	return mm;
+ }
+diff -upr linux-2.6.16.orig/mm/page_alloc.c linux-2.6.16-026test015/mm/page_alloc.c
+--- linux-2.6.16.orig/mm/page_alloc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/page_alloc.c	2006-07-04 14:41:38.000000000 +0400
+@@ -41,6 +41,8 @@
+ #include <asm/tlbflush.h>
+ #include "internal.h"
+ 
++#include <ub/ub_mem.h>
++
+ /*
+  * MCD - HACK: Find somewhere to initialize this EARLY, or make this
+  * initializer cleaner
+@@ -50,6 +52,7 @@ EXPORT_SYMBOL(node_online_map);
+ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
+ EXPORT_SYMBOL(node_possible_map);
+ struct pglist_data *pgdat_list __read_mostly;
++EXPORT_SYMBOL(pgdat_list);
+ unsigned long totalram_pages __read_mostly;
+ unsigned long totalhigh_pages __read_mostly;
+ long nr_swap_pages;
+@@ -153,7 +156,8 @@ static void bad_page(struct page *page)
+ 			1 << PG_reclaim |
+ 			1 << PG_slab    |
+ 			1 << PG_swapcache |
+-			1 << PG_writeback );
++			1 << PG_writeback |
++			1 << PG_buddy );
+ 	set_page_count(page, 0);
+ 	reset_page_mapcount(page);
+ 	page->mapping = NULL;
+@@ -224,12 +228,12 @@ static inline unsigned long page_order(s
+ 
+ static inline void set_page_order(struct page *page, int order) {
+ 	set_page_private(page, order);
+-	__SetPagePrivate(page);
++	__SetPageBuddy(page);
+ }
+ 
+ static inline void rmv_page_order(struct page *page)
+ {
+-	__ClearPagePrivate(page);
++	__ClearPageBuddy(page);
+ 	set_page_private(page, 0);
+ }
+ 
+@@ -268,11 +272,13 @@ __find_combined_index(unsigned long page
+  * This function checks whether a page is free && is the buddy
+  * we can do coalesce a page and its buddy if
+  * (a) the buddy is not in a hole &&
+- * (b) the buddy is free &&
+- * (c) the buddy is on the buddy system &&
+- * (d) a page and its buddy have the same order.
+- * for recording page's order, we use page_private(page) and PG_private.
++ * (b) the buddy is in the buddy system &&
++ * (c) a page and its buddy have the same order.
+  *
++ * For recording whether a page is in the buddy system, we use PG_buddy.
++ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
++ *
++ * For recording page's order, we use page_private(page).
+  */
+ static inline int page_is_buddy(struct page *page, int order)
+ {
+@@ -281,10 +287,10 @@ static inline int page_is_buddy(struct p
+ 		return 0;
+ #endif
+ 
+-       if (PagePrivate(page)           &&
+-           (page_order(page) == order) &&
+-            page_count(page) == 0)
++	if (PageBuddy(page) && page_order(page) == order) {
++		BUG_ON(page_count(page) != 0);
+                return 1;
++	}
+        return 0;
+ }
+ 
+@@ -301,7 +307,7 @@ static inline int page_is_buddy(struct p
+  * as necessary, plus some accounting needed to play nicely with other
+  * parts of the VM system.
+  * At each level, we keep a list of pages, which are heads of continuous
+- * free pages of length of (1 << order) and marked with PG_Private.Page's
++ * free pages of length of (1 << order) and marked with PG_buddy. Page's
+  * order is recorded in page_private(page) field.
+  * So when we are allocating or freeing one, we can derive the state of the
+  * other.  That is, if we allocate a small block, and both were   
+@@ -364,7 +370,8 @@ static inline int free_pages_check(struc
+ 			1 << PG_slab	|
+ 			1 << PG_swapcache |
+ 			1 << PG_writeback |
+-			1 << PG_reserved ))))
++			1 << PG_reserved |
++			1 << PG_buddy ))))
+ 		bad_page(page);
+ 	if (PageDirty(page))
+ 		__ClearPageDirty(page);
+@@ -434,6 +441,7 @@ static void __free_pages_ok(struct page 
+ 		return;
+ 
+ 	kernel_map_pages(page, 1 << order, 0);
++	ub_page_uncharge(page, order);
+ 	local_irq_save(flags);
+ 	__mod_page_state(pgfree, 1 << order);
+ 	free_one_page(page_zone(page), page, order);
+@@ -522,7 +530,8 @@ static int prep_new_page(struct page *pa
+ 			1 << PG_slab    |
+ 			1 << PG_swapcache |
+ 			1 << PG_writeback |
+-			1 << PG_reserved ))))
++			1 << PG_reserved |
++			1 << PG_buddy ))))
+ 		bad_page(page);
+ 
+ 	/*
+@@ -721,6 +730,7 @@ static void fastcall free_hot_cold_page(
+ 	kernel_map_pages(page, 1, 0);
+ 
+ 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
++	ub_page_uncharge(page, 0);
+ 	local_irq_save(flags);
+ 	__inc_page_state(pgfree);
+ 	list_add(&page->lru, &pcp->list);
+@@ -894,6 +904,28 @@ get_page_from_freelist(gfp_t gfp_mask, u
+ 	return page;
+ }
+ 
++static void __alloc_collect_stats(unsigned int gfp_mask,
++		unsigned int order, struct page *page, cycles_t time)
++{
++	int ind;
++	unsigned long flags;
++
++	time = get_cycles() - time;
++	if (!(gfp_mask & __GFP_WAIT))
++		ind = 0;
++	else if (!(gfp_mask & __GFP_HIGHMEM))
++		ind = (order > 0 ? 2 : 1);
++	else
++		ind = (order > 0 ? 4 : 3);
++	spin_lock_irqsave(&kstat_glb_lock, flags);
++	KSTAT_LAT_ADD(&kstat_glob.alloc_lat[ind], time);
++	if (!page)
++		kstat_glob.alloc_fails[ind]++;
++	spin_unlock_irqrestore(&kstat_glb_lock, flags);
++}
++
++int alloc_fail_warn;
++
+ /*
+  * This is the 'heart' of the zoned buddy allocator.
+  */
+@@ -909,6 +941,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
+ 	int do_retry;
+ 	int alloc_flags;
+ 	int did_some_progress;
++	cycles_t start;
+ 
+ 	might_sleep_if(wait);
+ 
+@@ -920,6 +953,7 @@ restart:
+ 		return NULL;
+ 	}
+ 
++	start = get_cycles();
+ 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ 				zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
+ 	if (page)
+@@ -944,7 +978,8 @@ restart:
+ 		alloc_flags |= ALLOC_HARDER;
+ 	if (gfp_mask & __GFP_HIGH)
+ 		alloc_flags |= ALLOC_HIGH;
+-	alloc_flags |= ALLOC_CPUSET;
++	if (wait)
++		alloc_flags |= ALLOC_CPUSET;
+ 
+ 	/*
+ 	 * Go through the zonelist again. Let __GFP_HIGH and allocations
+@@ -1038,14 +1073,22 @@ rebalance:
+ 	}
+ 
+ nopage:
+-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
++	__alloc_collect_stats(gfp_mask, order, page, start);
++	if (alloc_fail_warn && !(gfp_mask & __GFP_NOWARN) && 
++			printk_ratelimit()) {
+ 		printk(KERN_WARNING "%s: page allocation failure."
+ 			" order:%d, mode:0x%x\n",
+ 			p->comm, order, gfp_mask);
+ 		dump_stack();
+ 		show_mem();
+ 	}
++	return NULL;
++
+ got_pg:
++	if (ub_page_charge(page, order, gfp_mask)) {
++		__free_pages(page, order);
++		page = NULL;
++	}
+ 	return page;
+ }
+ 
+@@ -2378,7 +2421,10 @@ static void *vmstat_start(struct seq_fil
+ 	m->private = ps;
+ 	if (!ps)
+ 		return ERR_PTR(-ENOMEM);
+-	get_full_page_state(ps);
++	if (ve_is_super(get_exec_env()))
++		get_full_page_state(ps);
++	else
++		memset(ps, 0, sizeof(*ps));
+ 	ps->pgpgin /= 2;		/* sectors -> kbytes */
+ 	ps->pgpgout /= 2;
+ 	return (unsigned long *)ps + *pos;
+diff -upr linux-2.6.16.orig/mm/rmap.c linux-2.6.16-026test015/mm/rmap.c
+--- linux-2.6.16.orig/mm/rmap.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/rmap.c	2006-07-04 14:41:39.000000000 +0400
+@@ -56,6 +56,8 @@
+ 
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ //#define RMAP_DEBUG /* can be enabled only for debugging */
+ 
+ kmem_cache_t *anon_vma_cachep;
+@@ -117,6 +119,7 @@ int anon_vma_prepare(struct vm_area_stru
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(anon_vma_prepare);
+ 
+ void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
+ {
+@@ -145,6 +148,7 @@ void anon_vma_link(struct vm_area_struct
+ 		spin_unlock(&anon_vma->lock);
+ 	}
+ }
++EXPORT_SYMBOL_GPL(anon_vma_link);
+ 
+ void anon_vma_unlink(struct vm_area_struct *vma)
+ {
+@@ -180,14 +184,15 @@ static void anon_vma_ctor(void *data, km
+ void __init anon_vma_init(void)
+ {
+ 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
+-			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL);
++			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_UBC,
++			anon_vma_ctor, NULL);
+ }
+ 
+ /*
+  * Getting a lock on a stable anon_vma from a page off the LRU is
+  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
+  */
+-static struct anon_vma *page_lock_anon_vma(struct page *page)
++struct anon_vma *page_lock_anon_vma(struct page *page)
+ {
+ 	struct anon_vma *anon_vma = NULL;
+ 	unsigned long anon_mapping;
+@@ -205,6 +210,7 @@ out:
+ 	rcu_read_unlock();
+ 	return anon_vma;
+ }
++EXPORT_SYMBOL_GPL(page_lock_anon_vma);
+ 
+ #ifdef CONFIG_MIGRATION
+ /*
+@@ -220,6 +226,7 @@ void remove_from_swap(struct page *page)
+ 	struct anon_vma *anon_vma;
+ 	struct vm_area_struct *vma;
+ 	unsigned long mapping;
++	struct page_beancounter *pb;
+ 
+ 	if (!PageSwapCache(page))
+ 		return;
+@@ -229,6 +236,10 @@ void remove_from_swap(struct page *page)
+ 	if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
+ 		return;
+ 
++	pb = NULL;
++	if (pb_alloc_all(&pb))
++		return;
++
+ 	/*
+ 	 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
+ 	 */
+@@ -236,10 +247,12 @@ void remove_from_swap(struct page *page)
+ 	spin_lock(&anon_vma->lock);
+ 
+ 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
+-		remove_vma_swap(vma, page);
++		remove_vma_swap(vma, page, &pb);
+ 
+ 	spin_unlock(&anon_vma->lock);
+ 	delete_from_swap_cache(page);
++
++	pb_free_list(&pb);
+ }
+ EXPORT_SYMBOL(remove_from_swap);
+ #endif
+@@ -638,7 +651,11 @@ static int try_to_unmap_one(struct page 
+ 	} else
+ 		dec_mm_counter(mm, file_rss);
+ 
++	dec_vma_rss(vma);
+ 	page_remove_rmap(page);
++	ub_unused_privvm_inc(mm, vma);
++	ub_unmap_inc(mm);
++	pb_remove_ref(page, mm);
+ 	page_cache_release(page);
+ 
+ out_unmap:
+@@ -729,8 +746,12 @@ static void try_to_unmap_cluster(unsigne
+ 			set_page_dirty(page);
+ 
+ 		page_remove_rmap(page);
++		ub_unmap_inc(mm);
++		pb_remove_ref(page, mm);
++		ub_unused_privvm_inc(mm, vma);
+ 		page_cache_release(page);
+ 		dec_mm_counter(mm, file_rss);
++		dec_vma_rss(vma);
+ 		(*mapcount)--;
+ 	}
+ 	pte_unmap_unlock(pte - 1, ptl);
+diff -upr linux-2.6.16.orig/mm/shmem.c linux-2.6.16-026test015/mm/shmem.c
+--- linux-2.6.16.orig/mm/shmem.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/shmem.c	2006-07-04 14:41:39.000000000 +0400
+@@ -50,6 +50,8 @@
+ #include <asm/div64.h>
+ #include <asm/pgtable.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ /* This magic number is used in glibc for posix shared memory */
+ #define TMPFS_MAGIC	0x01021994
+ 
+@@ -211,7 +213,7 @@ static void shmem_free_blocks(struct ino
+  *
+  * It has to be called with the spinlock held.
+  */
+-static void shmem_recalc_inode(struct inode *inode)
++static void shmem_recalc_inode(struct inode *inode, long swp_freed)
+ {
+ 	struct shmem_inode_info *info = SHMEM_I(inode);
+ 	long freed;
+@@ -221,6 +223,8 @@ static void shmem_recalc_inode(struct in
+ 		info->alloced -= freed;
+ 		shmem_unacct_blocks(info->flags, freed);
+ 		shmem_free_blocks(inode, freed);
++		if (freed > swp_freed)
++			ub_tmpfs_respages_sub(info, freed - swp_freed);
+ 	}
+ }
+ 
+@@ -326,6 +330,11 @@ static void shmem_swp_set(struct shmem_i
+ 		struct page *page = kmap_atomic_to_page(entry);
+ 		set_page_private(page, page_private(page) + incdec);
+ 	}
++
++	if (incdec == 1)
++		ub_tmpfs_respages_dec(info);
++	else
++		ub_tmpfs_respages_inc(info);
+ }
+ 
+ /*
+@@ -342,14 +351,24 @@ static swp_entry_t *shmem_swp_alloc(stru
+ 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ 	struct page *page = NULL;
+ 	swp_entry_t *entry;
++	unsigned long ub_val;
+ 
+ 	if (sgp != SGP_WRITE &&
+ 	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+ 		return ERR_PTR(-EINVAL);
+ 
++	ub_val = 0;
++	if (info->next_index <= index) {
++		ub_val = index + 1 - info->next_index;
++		if (ub_shmpages_charge(info, ub_val))
++			return ERR_PTR(-ENOSPC);
++	}
++
+ 	while (!(entry = shmem_swp_entry(info, index, &page))) {
+-		if (sgp == SGP_READ)
+-			return shmem_swp_map(ZERO_PAGE(0));
++		if (sgp == SGP_READ) {
++			entry = shmem_swp_map(ZERO_PAGE(0));
++			goto out;
++		}
+ 		/*
+ 		 * Test free_blocks against 1 not 0, since we have 1 data
+ 		 * page (and perhaps indirect index pages) yet to allocate:
+@@ -359,7 +378,8 @@ static swp_entry_t *shmem_swp_alloc(stru
+ 			spin_lock(&sbinfo->stat_lock);
+ 			if (sbinfo->free_blocks <= 1) {
+ 				spin_unlock(&sbinfo->stat_lock);
+-				return ERR_PTR(-ENOSPC);
++				entry = ERR_PTR(-ENOSPC);
++				goto out;
+ 			}
+ 			sbinfo->free_blocks--;
+ 			inode->i_blocks += BLOCKS_PER_PAGE;
+@@ -367,31 +387,43 @@ static swp_entry_t *shmem_swp_alloc(stru
+ 		}
+ 
+ 		spin_unlock(&info->lock);
+-		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
++		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) |
++				__GFP_ZERO | __GFP_UBC);
+ 		if (page)
+ 			set_page_private(page, 0);
+ 		spin_lock(&info->lock);
+ 
+ 		if (!page) {
+-			shmem_free_blocks(inode, 1);
+-			return ERR_PTR(-ENOMEM);
++			entry = ERR_PTR(-ENOMEM);
++			goto out_block;
+ 		}
+ 		if (sgp != SGP_WRITE &&
+ 		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+ 			entry = ERR_PTR(-EINVAL);
+-			break;
++			goto out_dir;
+ 		}
+-		if (info->next_index <= index)
++		if (info->next_index <= index) {
++			ub_val = 0;
+ 			info->next_index = index + 1;
++		}
+ 	}
+ 	if (page) {
+ 		/* another task gave its page, or truncated the file */
+ 		shmem_free_blocks(inode, 1);
+ 		shmem_dir_free(page);
+ 	}
+-	if (info->next_index <= index && !IS_ERR(entry))
++	if (info->next_index <= index)
+ 		info->next_index = index + 1;
+ 	return entry;
++
++out_dir:
++	shmem_dir_free(page);
++out_block:
++	shmem_free_blocks(inode, 1);
++out:
++	if (ub_val)
++		ub_shmpages_uncharge(info, ub_val);
++	return entry;
+ }
+ 
+ /*
+@@ -484,6 +516,7 @@ static void shmem_truncate_range(struct 
+ 		return;
+ 
+ 	spin_lock(&info->lock);
++	ub_shmpages_uncharge(info, info->next_index - idx);
+ 	info->flags |= SHMEM_TRUNCATE;
+ 	if (likely(end == (loff_t) -1)) {
+ 		limit = info->next_index;
+@@ -613,7 +646,7 @@ done2:
+ 	info->swapped -= nr_swaps_freed;
+ 	if (nr_pages_to_free)
+ 		shmem_free_blocks(inode, nr_pages_to_free);
+-	shmem_recalc_inode(inode);
++	shmem_recalc_inode(inode, nr_swaps_freed);
+ 	spin_unlock(&info->lock);
+ 
+ 	/*
+@@ -696,6 +729,7 @@ static void shmem_delete_inode(struct in
+ 		sbinfo->free_inodes++;
+ 		spin_unlock(&sbinfo->stat_lock);
+ 	}
++	shmi_ub_put(info);
+ 	clear_inode(inode);
+ }
+ 
+@@ -817,6 +851,12 @@ int shmem_unuse(swp_entry_t entry, struc
+ 	return found;
+ }
+ 
++#ifdef CONFIG_USER_RESOURCE
++#define shm_get_swap_page(info)	(get_swap_page((info)->shmi_ub))
++#else
++#define shm_get_swap_page(info)	(get_swap_page(NULL))
++#endif
++
+ /*
+  * Move the page from the page cache to the swap cache.
+  */
+@@ -837,12 +877,12 @@ static int shmem_writepage(struct page *
+ 	info = SHMEM_I(inode);
+ 	if (info->flags & VM_LOCKED)
+ 		goto redirty;
+-	swap = get_swap_page();
++	swap = shm_get_swap_page(info);
+ 	if (!swap.val)
+ 		goto redirty;
+ 
+ 	spin_lock(&info->lock);
+-	shmem_recalc_inode(inode);
++	shmem_recalc_inode(inode, 0);
+ 	if (index >= info->next_index) {
+ 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
+ 		goto unlock;
+@@ -1030,7 +1070,7 @@ repeat:
+ 		goto failed;
+ 
+ 	spin_lock(&info->lock);
+-	shmem_recalc_inode(inode);
++	shmem_recalc_inode(inode, 0);
+ 	entry = shmem_swp_alloc(info, idx, sgp);
+ 	if (IS_ERR(entry)) {
+ 		spin_unlock(&info->lock);
+@@ -1206,6 +1246,7 @@ repeat:
+ 		spin_unlock(&info->lock);
+ 		flush_dcache_page(filepage);
+ 		SetPageUptodate(filepage);
++		ub_tmpfs_respages_inc(info);
+ 	}
+ done:
+ 	if (*pagep != filepage) {
+@@ -1307,28 +1348,6 @@ shmem_get_policy(struct vm_area_struct *
+ }
+ #endif
+ 
+-int shmem_lock(struct file *file, int lock, struct user_struct *user)
+-{
+-	struct inode *inode = file->f_dentry->d_inode;
+-	struct shmem_inode_info *info = SHMEM_I(inode);
+-	int retval = -ENOMEM;
+-
+-	spin_lock(&info->lock);
+-	if (lock && !(info->flags & VM_LOCKED)) {
+-		if (!user_shm_lock(inode->i_size, user))
+-			goto out_nomem;
+-		info->flags |= VM_LOCKED;
+-	}
+-	if (!lock && (info->flags & VM_LOCKED) && user) {
+-		user_shm_unlock(inode->i_size, user);
+-		info->flags &= ~VM_LOCKED;
+-	}
+-	retval = 0;
+-out_nomem:
+-	spin_unlock(&info->lock);
+-	return retval;
+-}
+-
+ int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+ 	file_accessed(file);
+@@ -1365,6 +1384,7 @@ shmem_get_inode(struct super_block *sb, 
+ 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ 		info = SHMEM_I(inode);
+ 		memset(info, 0, (char *)inode - (char *)info);
++		shmi_ub_set(info, get_exec_ub());
+ 		spin_lock_init(&info->lock);
+ 		INIT_LIST_HEAD(&info->swaplist);
+ 
+@@ -2100,6 +2120,7 @@ static int shmem_fill_super(struct super
+ 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ 	sb->s_magic = TMPFS_MAGIC;
+ 	sb->s_op = &shmem_ops;
++	sb->s_time_gran = 1;
+ 
+ 	inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
+ 	if (!inode)
+@@ -2172,6 +2193,7 @@ static struct address_space_operations s
+ 	.prepare_write	= shmem_prepare_write,
+ 	.commit_write	= simple_commit_write,
+ #endif
++	.migratepage	= migrate_page,
+ };
+ 
+ static struct file_operations shmem_file_operations = {
+@@ -2226,6 +2248,10 @@ static struct vm_operations_struct shmem
+ #endif
+ };
+ 
++int is_shmem_mapping(struct address_space *map)
++{
++	return (map != NULL && map->a_ops == &shmem_aops);
++}
+ 
+ static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
+ 	int flags, const char *dev_name, void *data)
+@@ -2233,13 +2259,19 @@ static struct super_block *shmem_get_sb(
+ 	return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
+ }
+ 
+-static struct file_system_type tmpfs_fs_type = {
++struct file_system_type tmpfs_fs_type = {
+ 	.owner		= THIS_MODULE,
+ 	.name		= "tmpfs",
+ 	.get_sb		= shmem_get_sb,
+ 	.kill_sb	= kill_litter_super,
+ };
++EXPORT_SYMBOL(tmpfs_fs_type);
++
++#ifdef CONFIG_VE
++#define shm_mnt	(get_exec_env()->shmem_mnt)
++#else
+ static struct vfsmount *shm_mnt;
++#endif
+ 
+ static int __init init_tmpfs(void)
+ {
+@@ -2276,6 +2308,36 @@ out3:
+ }
+ module_init(init_tmpfs)
+ 
++static inline int shm_charge_ahead(struct inode *inode)
++{
++#ifdef CONFIG_USER_RESOURCE
++	struct shmem_inode_info *info = SHMEM_I(inode);
++	unsigned long idx;
++	swp_entry_t *entry;
++
++	if (!inode->i_size)
++		return 0;
++	idx = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
++	/* 
++	 * Just touch info to allocate space for entry and
++	 * make all UBC checks 
++	 */
++	spin_lock(&info->lock);
++	entry = shmem_swp_alloc(info, idx, SGP_CACHE);
++	if (IS_ERR(entry))
++		goto err;
++	shmem_swp_unmap(entry);
++	spin_unlock(&info->lock);
++	return 0;
++
++err:
++	spin_unlock(&info->lock);
++	return PTR_ERR(entry);
++#else
++	return 0;
++#endif
++}
++
+ /*
+  * shmem_file_setup - get an unlinked file living in tmpfs
+  *
+@@ -2323,6 +2385,10 @@ struct file *shmem_file_setup(char *name
+ 	d_instantiate(dentry, inode);
+ 	inode->i_size = size;
+ 	inode->i_nlink = 0;	/* It is unlinked */
++	error = shm_charge_ahead(inode);
++	if (error)
++		goto close_file;
++
+ 	file->f_vfsmnt = mntget(shm_mnt);
+ 	file->f_dentry = dentry;
+ 	file->f_mapping = inode->i_mapping;
+@@ -2338,6 +2404,7 @@ put_memory:
+ 	shmem_unacct_size(flags, size);
+ 	return ERR_PTR(error);
+ }
++EXPORT_SYMBOL_GPL(shmem_file_setup);
+ 
+ /*
+  * shmem_zero_setup - setup a shared anonymous mapping
+@@ -2355,6 +2422,8 @@ int shmem_zero_setup(struct vm_area_stru
+ 
+ 	if (vma->vm_file)
+ 		fput(vma->vm_file);
++	else if (vma->vm_flags & VM_WRITE)
++		__ub_unused_privvm_dec(vma->vm_mm, size >> PAGE_SHIFT);
+ 	vma->vm_file = file;
+ 	vma->vm_ops = &shmem_vm_ops;
+ 	return 0;
+diff -upr linux-2.6.16.orig/mm/slab.c linux-2.6.16-026test015/mm/slab.c
+--- linux-2.6.16.orig/mm/slab.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/slab.c	2006-07-04 14:41:38.000000000 +0400
+@@ -105,32 +105,19 @@
+ #include	<linux/nodemask.h>
+ #include	<linux/mempolicy.h>
+ #include	<linux/mutex.h>
++#include	<linux/kmem_slab.h>
++#include	<linux/kmem_cache.h>
+ 
+ #include	<asm/uaccess.h>
+ #include	<asm/cacheflush.h>
+ #include	<asm/tlbflush.h>
+ #include	<asm/page.h>
+ 
+-/*
+- * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+- *		  SLAB_RED_ZONE & SLAB_POISON.
+- *		  0 for faster, smaller code (especially in the critical paths).
+- *
+- * STATS	- 1 to collect stats for /proc/slabinfo.
+- *		  0 for faster, smaller code (especially in the critical paths).
+- *
+- * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+- */
++#include	<ub/ub_mem.h>
+ 
+-#ifdef CONFIG_DEBUG_SLAB
+-#define	DEBUG		1
+-#define	STATS		1
+-#define	FORCED_DEBUG	1
+-#else
+-#define	DEBUG		0
+-#define	STATS		0
+-#define	FORCED_DEBUG	0
+-#endif
++#define DEBUG		SLAB_DEBUG
++#define STATS		SLAB_STATS
++#define FORCED_DEBUG	SLAB_FORCED_DEBUG
+ 
+ /* Shouldn't this be in a header file somewhere? */
+ #define	BYTES_PER_WORD		sizeof(void *)
+@@ -173,134 +160,20 @@
+ 			 SLAB_NO_REAP | SLAB_CACHE_DMA | \
+ 			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
+ 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+-			 SLAB_DESTROY_BY_RCU)
++			 SLAB_DESTROY_BY_RCU | SLAB_UBC | SLAB_NO_CHARGE)
+ #else
+ # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
+ 			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
+ 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+-			 SLAB_DESTROY_BY_RCU)
++			 SLAB_DESTROY_BY_RCU | SLAB_UBC | SLAB_NO_CHARGE)
+ #endif
+ 
+-/*
+- * kmem_bufctl_t:
+- *
+- * Bufctl's are used for linking objs within a slab
+- * linked offsets.
+- *
+- * This implementation relies on "struct page" for locating the cache &
+- * slab an object belongs to.
+- * This allows the bufctl structure to be small (one int), but limits
+- * the number of objects a slab (not a cache) can contain when off-slab
+- * bufctls are used. The limit is the size of the largest general cache
+- * that does not use off-slab slabs.
+- * For 32bit archs with 4 kB pages, is this 56.
+- * This is not serious, as it is only for large objects, when it is unwise
+- * to have too many per slab.
+- * Note: This limit can be raised by introducing a general cache whose size
+- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+- */
+-
+-typedef unsigned int kmem_bufctl_t;
+-#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
+-#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
+-#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
+-
+ /* Max number of objs-per-slab for caches which use off-slab slabs.
+  * Needed to avoid a possible looping condition in cache_grow().
+  */
+ static unsigned long offslab_limit;
+ 
+ /*
+- * struct slab
+- *
+- * Manages the objs in a slab. Placed either at the beginning of mem allocated
+- * for a slab, or allocated from an general cache.
+- * Slabs are chained into three list: fully used, partial, fully free slabs.
+- */
+-struct slab {
+-	struct list_head list;
+-	unsigned long colouroff;
+-	void *s_mem;		/* including colour offset */
+-	unsigned int inuse;	/* num of objs active in slab */
+-	kmem_bufctl_t free;
+-	unsigned short nodeid;
+-};
+-
+-/*
+- * struct slab_rcu
+- *
+- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
+- * arrange for kmem_freepages to be called via RCU.  This is useful if
+- * we need to approach a kernel structure obliquely, from its address
+- * obtained without the usual locking.  We can lock the structure to
+- * stabilize it and check it's still at the given address, only if we
+- * can be sure that the memory has not been meanwhile reused for some
+- * other kind of object (which our subsystem's lock might corrupt).
+- *
+- * rcu_read_lock before reading the address, then rcu_read_unlock after
+- * taking the spinlock within the structure expected at that address.
+- *
+- * We assume struct slab_rcu can overlay struct slab when destroying.
+- */
+-struct slab_rcu {
+-	struct rcu_head head;
+-	struct kmem_cache *cachep;
+-	void *addr;
+-};
+-
+-/*
+- * struct array_cache
+- *
+- * Purpose:
+- * - LIFO ordering, to hand out cache-warm objects from _alloc
+- * - reduce the number of linked list operations
+- * - reduce spinlock operations
+- *
+- * The limit is stored in the per-cpu structure to reduce the data cache
+- * footprint.
+- *
+- */
+-struct array_cache {
+-	unsigned int avail;
+-	unsigned int limit;
+-	unsigned int batchcount;
+-	unsigned int touched;
+-	spinlock_t lock;
+-	void *entry[0];		/*
+-				 * Must have this definition in here for the proper
+-				 * alignment of array_cache. Also simplifies accessing
+-				 * the entries.
+-				 * [0] is for gcc 2.95. It should really be [].
+-				 */
+-};
+-
+-/* bootstrap: The caches do not work without cpuarrays anymore,
+- * but the cpuarrays are allocated from the generic caches...
+- */
+-#define BOOT_CPUCACHE_ENTRIES	1
+-struct arraycache_init {
+-	struct array_cache cache;
+-	void *entries[BOOT_CPUCACHE_ENTRIES];
+-};
+-
+-/*
+- * The slab lists for all objects.
+- */
+-struct kmem_list3 {
+-	struct list_head slabs_partial;	/* partial list first, better asm code */
+-	struct list_head slabs_full;
+-	struct list_head slabs_free;
+-	unsigned long free_objects;
+-	unsigned long next_reap;
+-	int free_touched;
+-	unsigned int free_limit;
+-	unsigned int colour_next;	/* Per-node cache coloring */
+-	spinlock_t list_lock;
+-	struct array_cache *shared;	/* shared per node */
+-	struct array_cache **alien;	/* on other nodes */
+-};
+-
+-/*
+  * Need this for bootstrapping a per node allocator.
+  */
+ #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
+@@ -364,79 +237,6 @@ static void kmem_list3_init(struct kmem_
+ 	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
+ 	} while (0)
+ 
+-/*
+- * struct kmem_cache
+- *
+- * manages a cache.
+- */
+-
+-struct kmem_cache {
+-/* 1) per-cpu data, touched during every alloc/free */
+-	struct array_cache *array[NR_CPUS];
+-	unsigned int batchcount;
+-	unsigned int limit;
+-	unsigned int shared;
+-	unsigned int buffer_size;
+-/* 2) touched by every alloc & free from the backend */
+-	struct kmem_list3 *nodelists[MAX_NUMNODES];
+-	unsigned int flags;	/* constant flags */
+-	unsigned int num;	/* # of objs per slab */
+-	spinlock_t spinlock;
+-
+-/* 3) cache_grow/shrink */
+-	/* order of pgs per slab (2^n) */
+-	unsigned int gfporder;
+-
+-	/* force GFP flags, e.g. GFP_DMA */
+-	gfp_t gfpflags;
+-
+-	size_t colour;		/* cache colouring range */
+-	unsigned int colour_off;	/* colour offset */
+-	struct kmem_cache *slabp_cache;
+-	unsigned int slab_size;
+-	unsigned int dflags;	/* dynamic flags */
+-
+-	/* constructor func */
+-	void (*ctor) (void *, struct kmem_cache *, unsigned long);
+-
+-	/* de-constructor func */
+-	void (*dtor) (void *, struct kmem_cache *, unsigned long);
+-
+-/* 4) cache creation/removal */
+-	const char *name;
+-	struct list_head next;
+-
+-/* 5) statistics */
+-#if STATS
+-	unsigned long num_active;
+-	unsigned long num_allocations;
+-	unsigned long high_mark;
+-	unsigned long grown;
+-	unsigned long reaped;
+-	unsigned long errors;
+-	unsigned long max_freeable;
+-	unsigned long node_allocs;
+-	unsigned long node_frees;
+-	atomic_t allochit;
+-	atomic_t allocmiss;
+-	atomic_t freehit;
+-	atomic_t freemiss;
+-#endif
+-#if DEBUG
+-	/*
+-	 * If debugging is enabled, then the allocator can add additional
+-	 * fields and/or padding to every object. buffer_size contains the total
+-	 * object size including these internal fields, the following two
+-	 * variables contain the offset to the user object and its size.
+-	 */
+-	int obj_offset;
+-	int obj_size;
+-#endif
+-};
+-
+-#define CFLGS_OFF_SLAB		(0x80000000UL)
+-#define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
+-
+ #define BATCHREFILL_LIMIT	16
+ /* Optimization question: fewer reaps means less 
+  * probability for unnessary cpucache drain/refill cycles.
+@@ -573,42 +373,6 @@ static void **dbg_userword(struct kmem_c
+ #define	BREAK_GFP_ORDER_LO	0
+ static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+ 
+-/* Functions for storing/retrieving the cachep and or slab from the
+- * global 'mem_map'. These are used to find the slab an obj belongs to.
+- * With kfree(), these are used to find the cache which an obj belongs to.
+- */
+-static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
+-{
+-	page->lru.next = (struct list_head *)cache;
+-}
+-
+-static inline struct kmem_cache *page_get_cache(struct page *page)
+-{
+-	return (struct kmem_cache *)page->lru.next;
+-}
+-
+-static inline void page_set_slab(struct page *page, struct slab *slab)
+-{
+-	page->lru.prev = (struct list_head *)slab;
+-}
+-
+-static inline struct slab *page_get_slab(struct page *page)
+-{
+-	return (struct slab *)page->lru.prev;
+-}
+-
+-static inline struct kmem_cache *virt_to_cache(const void *obj)
+-{
+-	struct page *page = virt_to_page(obj);
+-	return page_get_cache(page);
+-}
+-
+-static inline struct slab *virt_to_slab(const void *obj)
+-{
+-	struct page *page = virt_to_page(obj);
+-	return page_get_slab(page);
+-}
+-
+ /* These are the default caches for kmalloc. Custom caches can have other sizes. */
+ struct cache_sizes malloc_sizes[] = {
+ #define CACHE(x) { .cs_size = (x) },
+@@ -715,9 +479,17 @@ struct kmem_cache *kmem_find_general_cac
+ }
+ EXPORT_SYMBOL(kmem_find_general_cachep);
+ 
+-static size_t slab_mgmt_size(size_t nr_objs, size_t align)
++static size_t slab_mgmt_size_noalign(size_t nr_objs, int flags)
+ {
+-	return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
++	size_t size_noub;
++
++	size_noub = sizeof(struct slab) + nr_objs * sizeof(kmem_bufctl_t);
++	return ALIGN(size_noub, UB_ALIGN(flags)) + nr_objs * UB_EXTRA(flags);
++}
++
++static size_t slab_mgmt_size(size_t nr_objs, size_t align, int flags)
++{
++	return ALIGN(slab_mgmt_size_noalign(nr_objs, flags), align);
+ }
+ 
+ /* Calculate the number of objects and left-over bytes for a given
+@@ -761,20 +533,23 @@ static void cache_estimate(unsigned long
+ 		 * into account.
+ 		 */
+ 		nr_objs = (slab_size - sizeof(struct slab)) /
+-			  (buffer_size + sizeof(kmem_bufctl_t));
++			  (buffer_size + sizeof(kmem_bufctl_t) +
++			   	UB_EXTRA(flags));
+ 
+ 		/*
+ 		 * This calculated number will be either the right
+ 		 * amount, or one greater than what we want.
+ 		 */
+-		if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
+-		       > slab_size)
++		if (slab_mgmt_size(nr_objs, align, flags) +
++				nr_objs * buffer_size > slab_size)
+ 			nr_objs--;
++		BUG_ON(slab_mgmt_size(nr_objs, align, flags) +
++				nr_objs * buffer_size > slab_size);
+ 
+ 		if (nr_objs > SLAB_LIMIT)
+ 			nr_objs = SLAB_LIMIT;
+ 
+-		mgmt_size = slab_mgmt_size(nr_objs, align);
++		mgmt_size = slab_mgmt_size(nr_objs, align, flags);
+ 	}
+ 	*num = nr_objs;
+ 	*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
+@@ -1254,6 +1029,7 @@ void __init kmem_cache_init(void)
+ 						      sizes[INDEX_AC].cs_size,
+ 						      ARCH_KMALLOC_MINALIGN,
+ 						      (ARCH_KMALLOC_FLAGS |
++						       SLAB_UBC|SLAB_NO_CHARGE |
+ 						       SLAB_PANIC), NULL, NULL);
+ 
+ 	if (INDEX_AC != INDEX_L3)
+@@ -1261,8 +1037,9 @@ void __init kmem_cache_init(void)
+ 		    kmem_cache_create(names[INDEX_L3].name,
+ 				      sizes[INDEX_L3].cs_size,
+ 				      ARCH_KMALLOC_MINALIGN,
+-				      (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL,
+-				      NULL);
++				      (ARCH_KMALLOC_FLAGS |
++				       SLAB_UBC | SLAB_NO_CHARGE |
++				       SLAB_PANIC), NULL, NULL);
+ 
+ 	while (sizes->cs_size != ULONG_MAX) {
+ 		/*
+@@ -1277,14 +1054,14 @@ void __init kmem_cache_init(void)
+ 							     sizes->cs_size,
+ 							     ARCH_KMALLOC_MINALIGN,
+ 							     (ARCH_KMALLOC_FLAGS
++							      | SLAB_UBC
++							      | SLAB_NO_CHARGE
+ 							      | SLAB_PANIC),
+ 							     NULL, NULL);
+ 
+ 		/* Inc off-slab bufctl limit until the ceiling is hit. */
+-		if (!(OFF_SLAB(sizes->cs_cachep))) {
+-			offslab_limit = sizes->cs_size - sizeof(struct slab);
+-			offslab_limit /= sizeof(kmem_bufctl_t);
+-		}
++		if (!(OFF_SLAB(sizes->cs_cachep)))
++			offslab_limit = sizes->cs_size;
+ 
+ 		sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
+ 							sizes->cs_size,
+@@ -1704,8 +1481,13 @@ static inline size_t calculate_slab_orde
+ 			continue;
+ 
+ 		/* More than offslab_limit objects will cause problems */
+-		if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit)
+-			break;
++		if (flags & CFLGS_OFF_SLAB) {
++			unsigned long slab_size;
++
++			slab_size = slab_mgmt_size_noalign(num, flags);
++			if (slab_size > offslab_limit)
++				break;
++		}
+ 
+ 		/* Found something acceptable - save it away */
+ 		cachep->num = num;
+@@ -1950,8 +1732,7 @@ kmem_cache_create (const char *name, siz
+ 		cachep = NULL;
+ 		goto oops;
+ 	}
+-	slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
+-			  + sizeof(struct slab), align);
++	slab_size = slab_mgmt_size(cachep->num, align, flags);
+ 
+ 	/*
+ 	 * If the slab has been placed off-slab, and we have enough space then
+@@ -1964,8 +1745,7 @@ kmem_cache_create (const char *name, siz
+ 
+ 	if (flags & CFLGS_OFF_SLAB) {
+ 		/* really off slab. No need for manual alignment */
+-		slab_size =
+-		    cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
++		slab_size = slab_mgmt_size_noalign(cachep->num, flags);
+ 	}
+ 
+ 	cachep->colour_off = cache_line_size();
+@@ -2045,6 +1825,7 @@ kmem_cache_create (const char *name, siz
+ 
+ 	/* cache setup completed, link it into the list */
+ 	list_add(&cachep->next, &cache_chain);
++	set_cache_objuse(cachep);
+       oops:
+ 	if (!cachep && (flags & SLAB_PANIC))
+ 		panic("kmem_cache_create(): failed to create slab `%s'\n",
+@@ -2266,6 +2047,8 @@ int kmem_cache_destroy(struct kmem_cache
+ 			kfree(l3);
+ 		}
+ 	}
++
++	ub_kmemcache_free(cachep);
+ 	kmem_cache_free(&cache_cache, cachep);
+ 
+ 	unlock_cpu_hotplug();
+@@ -2282,7 +2065,8 @@ static struct slab *alloc_slabmgmt(struc
+ 
+ 	if (OFF_SLAB(cachep)) {
+ 		/* Slab management obj is off-slab. */
+-		slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
++		slabp = kmem_cache_alloc(cachep->slabp_cache,
++				local_flags & (~__GFP_UBC));
+ 		if (!slabp)
+ 			return NULL;
+ 	} else {
+@@ -2292,15 +2076,11 @@ static struct slab *alloc_slabmgmt(struc
+ 	slabp->inuse = 0;
+ 	slabp->colouroff = colour_off;
+ 	slabp->s_mem = objp + colour_off;
++	init_slab_ubps(cachep, slabp);
+ 
+ 	return slabp;
+ }
+ 
+-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+-{
+-	return (kmem_bufctl_t *) (slabp + 1);
+-}
+-
+ static void cache_init_objs(struct kmem_cache *cachep,
+ 			    struct slab *slabp, unsigned long ctor_flags)
+ {
+@@ -2470,7 +2250,7 @@ static int cache_grow(struct kmem_cache 
+ 	/* Get mem for the objs.
+ 	 * Attempt to allocate a physical page from 'nodeid',
+ 	 */
+-	if (!(objp = kmem_getpages(cachep, flags, nodeid)))
++	if (!(objp = kmem_getpages(cachep, flags & (~__GFP_UBC), nodeid)))
+ 		goto failed;
+ 
+ 	/* Get slab management. */
+@@ -2823,6 +2603,11 @@ __cache_alloc(struct kmem_cache *cachep,
+ 	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
+ 					    caller);
+ 	prefetchw(objp);
++
++	if (objp && ub_slab_charge(objp, flags)) {
++		kmem_cache_free(cachep, objp);
++		objp = NULL;
++	}
+ 	return objp;
+ }
+ 
+@@ -2997,6 +2782,8 @@ static inline void __cache_free(struct k
+ 	check_irq_off();
+ 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
+ 
++	ub_slab_uncharge(objp);
++
+ 	/* Make sure we are not freeing a object from another
+ 	 * node to the array cache on this cpu.
+ 	 */
+@@ -3128,6 +2915,10 @@ void *kmem_cache_alloc_node(struct kmem_
+ 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,
+ 					   __builtin_return_address(0));
+ 
++	if (ptr && ub_slab_charge(ptr, flags)) {
++		kmem_cache_free(cachep, ptr);
++		ptr = NULL;
++	}
+ 	return ptr;
+ }
+ EXPORT_SYMBOL(kmem_cache_alloc_node);
+@@ -3543,6 +3334,7 @@ static void cache_reap(void *unused)
+ 		return;
+ 	}
+ 
++	{KSTAT_PERF_ENTER(cache_reap)
+ 	list_for_each(walk, &cache_chain) {
+ 		struct kmem_cache *searchp;
+ 		struct list_head *p;
+@@ -3608,6 +3400,7 @@ static void cache_reap(void *unused)
+ 	check_irq_on();
+ 	mutex_unlock(&cache_chain_mutex);
+ 	next_reap_node();
++	KSTAT_PERF_LEAVE(cache_reap)}
+ 	/* Setup the next iteration */
+ 	schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
+ }
+diff -upr linux-2.6.16.orig/mm/swap_state.c linux-2.6.16-026test015/mm/swap_state.c
+--- linux-2.6.16.orig/mm/swap_state.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/swap_state.c	2006-07-04 14:41:38.000000000 +0400
+@@ -18,6 +18,8 @@
+ 
+ #include <asm/pgtable.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ /*
+  * swapper_space is a fiction, retained to simplify the path through
+  * vmscan's shrink_list, to make sync_page look nicer, and to allow
+@@ -52,14 +54,18 @@ static struct {
+ 	unsigned long find_total;
+ 	unsigned long noent_race;
+ 	unsigned long exist_race;
++	unsigned long remove_race;
+ } swap_cache_info;
++EXPORT_SYMBOL(swap_cache_info);
+ 
+ void show_swap_cache_info(void)
+ {
+-	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
++	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, "
++		"race %lu+%lu+%lu\n",
+ 		swap_cache_info.add_total, swap_cache_info.del_total,
+ 		swap_cache_info.find_success, swap_cache_info.find_total,
+-		swap_cache_info.noent_race, swap_cache_info.exist_race);
++		swap_cache_info.noent_race, swap_cache_info.exist_race,
++		swap_cache_info.remove_race);
+ 	printk("Free swap  = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
+ 	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
+ }
+@@ -151,7 +157,14 @@ int add_to_swap(struct page * page, gfp_
+ 		BUG();
+ 
+ 	for (;;) {
+-		entry = get_swap_page();
++		struct user_beancounter *ub;
++
++		ub = pb_grab_page_ub(page);
++		if (IS_ERR(ub))
++			return 0;
++
++		entry = get_swap_page(ub);
++		put_beancounter(ub);
+ 		if (!entry.val)
+ 			return 0;
+ 
+@@ -252,10 +265,13 @@ int move_from_swap_cache(struct page *pa
+  */
+ static inline void free_swap_cache(struct page *page)
+ {
+-	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
++	if (!PageSwapCache(page))
++		return;
++	if (!TestSetPageLocked(page)) {
+ 		remove_exclusive_swap_page(page);
+ 		unlock_page(page);
+-	}
++	} else
++		INC_CACHE_INFO(remove_race);
+ }
+ 
+ /* 
+diff -upr linux-2.6.16.orig/mm/swapfile.c linux-2.6.16-026test015/mm/swapfile.c
+--- linux-2.6.16.orig/mm/swapfile.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/swapfile.c	2006-07-04 14:41:39.000000000 +0400
+@@ -33,6 +33,8 @@
+ #include <asm/tlbflush.h>
+ #include <linux/swapops.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ DEFINE_SPINLOCK(swap_lock);
+ unsigned int nr_swapfiles;
+ long total_swap_pages;
+@@ -172,7 +174,7 @@ no_page:
+ 	return 0;
+ }
+ 
+-swp_entry_t get_swap_page(void)
++swp_entry_t get_swap_page(struct user_beancounter *ub)
+ {
+ 	struct swap_info_struct *si;
+ 	pgoff_t offset;
+@@ -202,6 +204,7 @@ swp_entry_t get_swap_page(void)
+ 		offset = scan_swap_map(si);
+ 		if (offset) {
+ 			spin_unlock(&swap_lock);
++			ub_swapentry_inc(si, offset, ub);
+ 			return swp_entry(type, offset);
+ 		}
+ 		next = swap_list.next;
+@@ -277,6 +280,7 @@ static int swap_entry_free(struct swap_i
+ 		count--;
+ 		p->swap_map[offset] = count;
+ 		if (!count) {
++			ub_swapentry_dec(p, offset);
+ 			if (offset < p->lowest_bit)
+ 				p->lowest_bit = offset;
+ 			if (offset > p->highest_bit)
+@@ -423,11 +427,18 @@ void free_swap_and_cache(swp_entry_t ent
+  * force COW, vm_page_prot omits write permission from any private vma.
+  */
+ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
+-		unsigned long addr, swp_entry_t entry, struct page *page)
++		unsigned long addr, swp_entry_t entry, struct page *page,
++		struct page_beancounter **pb)
+ {
+-	inc_mm_counter(vma->vm_mm, anon_rss);
++	struct mm_struct *mm;
++
++	mm = vma->vm_mm;
++	inc_mm_counter(mm, anon_rss);
++	inc_vma_rss(vma);
++	ub_unused_privvm_dec(mm, vma);
++	pb_add_ref(page, mm, pb);
+ 	get_page(page);
+-	set_pte_at(vma->vm_mm, addr, pte,
++	set_pte_at(mm, addr, pte,
+ 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
+ 	page_add_anon_rmap(page, vma, addr);
+ 	swap_free(entry);
+@@ -440,7 +451,8 @@ static void unuse_pte(struct vm_area_str
+ 
+ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ 				unsigned long addr, unsigned long end,
+-				swp_entry_t entry, struct page *page)
++				swp_entry_t entry, struct page *page,
++				struct page_beancounter **pb)
+ {
+ 	pte_t swp_pte = swp_entry_to_pte(entry);
+ 	pte_t *pte;
+@@ -454,7 +466,7 @@ static int unuse_pte_range(struct vm_are
+ 		 * Test inline before going to call unuse_pte.
+ 		 */
+ 		if (unlikely(pte_same(*pte, swp_pte))) {
+-			unuse_pte(vma, pte++, addr, entry, page);
++			unuse_pte(vma, pte++, addr, entry, page, pb);
+ 			found = 1;
+ 			break;
+ 		}
+@@ -465,7 +477,8 @@ static int unuse_pte_range(struct vm_are
+ 
+ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+ 				unsigned long addr, unsigned long end,
+-				swp_entry_t entry, struct page *page)
++				swp_entry_t entry, struct page *page,
++				struct page_beancounter **pb)
+ {
+ 	pmd_t *pmd;
+ 	unsigned long next;
+@@ -475,7 +488,7 @@ static inline int unuse_pmd_range(struct
+ 		next = pmd_addr_end(addr, end);
+ 		if (pmd_none_or_clear_bad(pmd))
+ 			continue;
+-		if (unuse_pte_range(vma, pmd, addr, next, entry, page))
++		if (unuse_pte_range(vma, pmd, addr, next, entry, page, pb))
+ 			return 1;
+ 	} while (pmd++, addr = next, addr != end);
+ 	return 0;
+@@ -483,7 +496,8 @@ static inline int unuse_pmd_range(struct
+ 
+ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+ 				unsigned long addr, unsigned long end,
+-				swp_entry_t entry, struct page *page)
++				swp_entry_t entry, struct page *page,
++				struct page_beancounter **pb)
+ {
+ 	pud_t *pud;
+ 	unsigned long next;
+@@ -493,14 +507,15 @@ static inline int unuse_pud_range(struct
+ 		next = pud_addr_end(addr, end);
+ 		if (pud_none_or_clear_bad(pud))
+ 			continue;
+-		if (unuse_pmd_range(vma, pud, addr, next, entry, page))
++		if (unuse_pmd_range(vma, pud, addr, next, entry, page, pb))
+ 			return 1;
+ 	} while (pud++, addr = next, addr != end);
+ 	return 0;
+ }
+ 
+ static int unuse_vma(struct vm_area_struct *vma,
+-				swp_entry_t entry, struct page *page)
++				swp_entry_t entry, struct page *page,
++				struct page_beancounter **pb)
+ {
+ 	pgd_t *pgd;
+ 	unsigned long addr, end, next;
+@@ -521,14 +536,15 @@ static int unuse_vma(struct vm_area_stru
+ 		next = pgd_addr_end(addr, end);
+ 		if (pgd_none_or_clear_bad(pgd))
+ 			continue;
+-		if (unuse_pud_range(vma, pgd, addr, next, entry, page))
++		if (unuse_pud_range(vma, pgd, addr, next, entry, page, pb))
+ 			return 1;
+ 	} while (pgd++, addr = next, addr != end);
+ 	return 0;
+ }
+ 
+ static int unuse_mm(struct mm_struct *mm,
+-				swp_entry_t entry, struct page *page)
++				swp_entry_t entry, struct page *page,
++				struct page_beancounter **pb)
+ {
+ 	struct vm_area_struct *vma;
+ 
+@@ -543,7 +559,7 @@ static int unuse_mm(struct mm_struct *mm
+ 		lock_page(page);
+ 	}
+ 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+-		if (vma->anon_vma && unuse_vma(vma, entry, page))
++		if (vma->anon_vma && unuse_vma(vma, entry, page, pb))
+ 			break;
+ 	}
+ 	up_read(&mm->mmap_sem);
+@@ -555,11 +571,12 @@ static int unuse_mm(struct mm_struct *mm
+ }
+ 
+ #ifdef CONFIG_MIGRATION
+-int remove_vma_swap(struct vm_area_struct *vma, struct page *page)
++int remove_vma_swap(struct vm_area_struct *vma, struct page *page,
++		struct page_beancounter **pb)
+ {
+ 	swp_entry_t entry = { .val = page_private(page) };
+ 
+-	return unuse_vma(vma, entry, page);
++	return unuse_vma(vma, entry, page, pb);
+ }
+ #endif
+ 
+@@ -618,6 +635,7 @@ static int try_to_unuse(unsigned int typ
+ 	int retval = 0;
+ 	int reset_overflow = 0;
+ 	int shmem;
++	struct page_beancounter *pb;
+ 
+ 	/*
+ 	 * When searching mms for an entry, a good strategy is to
+@@ -670,6 +688,13 @@ again:
+ 			break;
+ 		}
+ 
++		pb = NULL;
++		if (pb_alloc_all(&pb)) {
++			page_cache_release(page);
++			retval = -ENOMEM;
++			break;
++		}
++
+ 		/*
+ 		 * Don't hold on to start_mm if it looks like exiting.
+ 		 */
+@@ -698,6 +723,20 @@ again:
+ 		}
+ 		wait_on_page_writeback(page);
+ 
++		/* If read failed we cannot map not-uptodate page to 
++		 * user space. Actually, we are in serious troubles,
++		 * we do not even know what process to kill. So, the only
++		 * variant remains: to stop swapoff() and allow someone
++		 * to kill processes to zap invalid pages.
++		 */
++		if (unlikely(!PageUptodate(page))) {
++			pb_free_list(&pb);
++			unlock_page(page);
++			page_cache_release(page);
++			retval = -EIO;
++			break;
++		}
++
+ 		/*
+ 		 * Remove all references to entry.
+ 		 * Whenever we reach init_mm, there's no address space
+@@ -709,7 +748,7 @@ again:
+ 			if (start_mm == &init_mm)
+ 				shmem = shmem_unuse(entry, page);
+ 			else
+-				retval = unuse_mm(start_mm, entry, page);
++				retval = unuse_mm(start_mm, entry, page, &pb);
+ 		}
+ 		if (*swap_map > 1) {
+ 			int set_start_mm = (*swap_map >= swcount);
+@@ -741,7 +780,7 @@ again:
+ 					set_start_mm = 1;
+ 					shmem = shmem_unuse(entry, page);
+ 				} else
+-					retval = unuse_mm(mm, entry, page);
++					retval = unuse_mm(mm, entry, page, &pb);
+ 				if (set_start_mm && *swap_map < swcount) {
+ 					mmput(new_start_mm);
+ 					atomic_inc(&mm->mm_users);
+@@ -755,6 +794,8 @@ again:
+ 			mmput(start_mm);
+ 			start_mm = new_start_mm;
+ 		}
++
++		pb_free_list(&pb);
+ 		if (retval) {
+ 			unlock_page(page);
+ 			page_cache_release(page);
+@@ -1100,6 +1141,10 @@ asmlinkage long sys_swapoff(const char _
+ 	int i, type, prev;
+ 	int err;
+ 	
++	/* VE admin check is just to be on the safe side, the admin may affect
++	 * swaps only if he has access to special, i.e. if he has been granted
++	 * access to the block device or if the swap file is in the area
++	 * visible to him. */
+ 	if (!capable(CAP_SYS_ADMIN))
+ 		return -EPERM;
+ 
+@@ -1199,6 +1244,7 @@ asmlinkage long sys_swapoff(const char _
+ 	spin_unlock(&swap_lock);
+ 	mutex_unlock(&swapon_mutex);
+ 	vfree(swap_map);
++	ub_swap_fini(p);
+ 	inode = mapping->host;
+ 	if (S_ISBLK(inode->i_mode)) {
+ 		struct block_device *bdev = I_BDEV(inode);
+@@ -1557,6 +1603,11 @@ asmlinkage long sys_swapon(const char __
+ 		goto bad_swap;
+ 	}
+ 
++	if (ub_swap_init(p, maxpages)) {
++		error = -ENOMEM;
++		goto bad_swap;
++	}
++
+ 	mutex_lock(&swapon_mutex);
+ 	spin_lock(&swap_lock);
+ 	p->flags = SWP_ACTIVE;
+diff -upr linux-2.6.16.orig/mm/vmalloc.c linux-2.6.16-026test015/mm/vmalloc.c
+--- linux-2.6.16.orig/mm/vmalloc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/vmalloc.c	2006-07-04 14:41:37.000000000 +0400
+@@ -20,6 +20,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_debug.h>
++
+ 
+ DEFINE_RWLOCK(vmlist_lock);
+ struct vm_struct *vmlist;
+@@ -256,6 +258,68 @@ struct vm_struct *get_vm_area_node(unsig
+ 	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
+ }
+ 
++struct vm_struct * get_vm_area_best(unsigned long size, unsigned long flags)
++{
++	unsigned long addr, best_addr, delta, best_delta;
++	struct vm_struct **p, **best_p, *tmp, *area;
++
++	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
++	if (!area)
++		return NULL;
++
++	size += PAGE_SIZE; /* one-page gap at the end */
++	addr = VMALLOC_START;
++	best_addr = 0UL;
++	best_p = NULL;
++	best_delta = PAGE_ALIGN(VMALLOC_END) - VMALLOC_START;
++
++	write_lock(&vmlist_lock);
++	for (p = &vmlist; (tmp = *p) &&
++			(tmp->addr <= (void *)PAGE_ALIGN(VMALLOC_END));
++			p = &tmp->next) {
++		if ((size + addr) < addr)
++			break;
++		delta = (unsigned long) tmp->addr - (size + addr);
++		if (delta < best_delta) {
++			best_delta = delta;
++			best_addr = addr;
++			best_p = p;
++		}
++		addr = tmp->size + (unsigned long) tmp->addr;
++		if (addr > VMALLOC_END-size)
++			break;
++	}
++
++	if (!tmp || (tmp->addr > (void *)PAGE_ALIGN(VMALLOC_END))) {
++		/* check free area after list end */
++		delta = (unsigned long) PAGE_ALIGN(VMALLOC_END) - (size + addr);
++		if (delta < best_delta) {
++			best_delta = delta;
++			best_addr = addr;
++			best_p = p;
++		}
++	}
++	if (best_addr) {
++		area->flags = flags;
++		/* allocate at the end of this area */
++		area->addr = (void *)(best_addr + best_delta);
++		area->size = size;
++		area->next = *best_p;
++		area->pages = NULL;
++		area->nr_pages = 0;
++		area->phys_addr = 0;
++		*best_p = area;
++		/* check like in __vunmap */
++		WARN_ON((PAGE_SIZE - 1) & (unsigned long)area->addr);
++	} else {
++		kfree(area);
++		area = NULL;
++	}
++	write_unlock(&vmlist_lock);
++
++	return area;
++}
++
+ /* Caller must hold vmlist_lock */
+ struct vm_struct *__remove_vm_area(void *addr)
+ {
+@@ -296,7 +360,7 @@ struct vm_struct *remove_vm_area(void *a
+ 	return v;
+ }
+ 
+-void __vunmap(void *addr, int deallocate_pages)
++void __vunmap(void *addr, int deallocate_pages, int uncharge)
+ {
+ 	struct vm_struct *area;
+ 
+@@ -320,6 +384,8 @@ void __vunmap(void *addr, int deallocate
+ 	if (deallocate_pages) {
+ 		int i;
+ 
++		if (uncharge)
++			dec_vmalloc_charged(area);
+ 		for (i = 0; i < area->nr_pages; i++) {
+ 			if (unlikely(!area->pages[i]))
+ 				BUG();
+@@ -350,7 +416,7 @@ void __vunmap(void *addr, int deallocate
+ void vfree(void *addr)
+ {
+ 	BUG_ON(in_interrupt());
+-	__vunmap(addr, 1);
++	__vunmap(addr, 1, 1);
+ }
+ EXPORT_SYMBOL(vfree);
+ 
+@@ -367,7 +433,7 @@ EXPORT_SYMBOL(vfree);
+ void vunmap(void *addr)
+ {
+ 	BUG_ON(in_interrupt());
+-	__vunmap(addr, 0);
++	__vunmap(addr, 0, 0);
+ }
+ EXPORT_SYMBOL(vunmap);
+ 
+@@ -439,10 +505,12 @@ void *__vmalloc_area_node(struct vm_stru
+ 
+ 	if (map_vm_area(area, prot, &pages))
+ 		goto fail;
++
++	inc_vmalloc_charged(area, gfp_mask);
+ 	return area->addr;
+ 
+ fail:
+-	vfree(area->addr);
++	__vunmap(area->addr, 1, 0);
+ 	return NULL;
+ }
+ 
+@@ -486,6 +554,21 @@ void *__vmalloc(unsigned long size, gfp_
+ }
+ EXPORT_SYMBOL(__vmalloc);
+ 
++static void *____vmalloc(unsigned long size, gfp_t mask, pgprot_t prot)
++{
++	struct vm_struct *area;
++
++	size = PAGE_ALIGN(size);
++	if (!size || (size >> PAGE_SHIFT) > num_physpages)
++		return NULL;
++
++	area = get_vm_area_best(size, VM_ALLOC);
++	if (!area)
++		return NULL;
++
++	return __vmalloc_area_node(area, mask, prot, -1);
++}
++
+ /**
+  *	vmalloc  -  allocate virtually contiguous memory
+  *
+@@ -503,6 +586,26 @@ void *vmalloc(unsigned long size)
+ }
+ EXPORT_SYMBOL(vmalloc);
+ 
++void *ub_vmalloc(unsigned long size)
++{
++	return __vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
++}
++EXPORT_SYMBOL(ub_vmalloc);
++
++void *vmalloc_best(unsigned long size)
++{
++       return ____vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
++}
++
++EXPORT_SYMBOL(vmalloc_best);
++
++void *ub_vmalloc_best(unsigned long size)
++{
++       return ____vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
++}
++
++EXPORT_SYMBOL(ub_vmalloc_best);
++
+ /**
+  *	vmalloc_node  -  allocate memory on a specific node
+  *
+@@ -521,6 +624,12 @@ void *vmalloc_node(unsigned long size, i
+ }
+ EXPORT_SYMBOL(vmalloc_node);
+ 
++void *ub_vmalloc_node(unsigned long size, int node)
++{
++	return __vmalloc_node(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL, node);
++}
++EXPORT_SYMBOL(ub_vmalloc_node);
++
+ #ifndef PAGE_KERNEL_EXEC
+ # define PAGE_KERNEL_EXEC PAGE_KERNEL
+ #endif
+@@ -631,3 +740,37 @@ finished:
+ 	read_unlock(&vmlist_lock);
+ 	return buf - buf_start;
+ }
++
++void vprintstat(void)
++{
++	struct vm_struct *p, *last_p = NULL;
++	unsigned long addr, size, free_size, max_free_size;
++	int num;
++
++	addr = VMALLOC_START;
++	size = max_free_size = 0;
++	num = 0;
++
++	read_lock(&vmlist_lock);
++	for (p = vmlist; p; p = p->next) {
++		free_size = (unsigned long)p->addr - addr;
++		if (free_size > max_free_size)
++			max_free_size = free_size;
++		addr = (unsigned long)p->addr + p->size;
++		size += p->size;
++		++num;
++		last_p = p;		
++	}
++	if (last_p) {
++		free_size = VMALLOC_END -
++			((unsigned long)last_p->addr + last_p->size);
++		if (free_size > max_free_size)
++			max_free_size = free_size;
++	}
++	read_unlock(&vmlist_lock);
++
++	printk("VMALLOC Used: %luKB Total: %luKB Entries: %d\n"
++			"    Max_Free: %luKB Start: %lx End: %lx\n",
++			size/1024, (VMALLOC_END - VMALLOC_START)/1024, num,
++			max_free_size/1024, VMALLOC_START, VMALLOC_END);
++}
+diff -upr linux-2.6.16.orig/mm/vmscan.c linux-2.6.16-026test015/mm/vmscan.c
+--- linux-2.6.16.orig/mm/vmscan.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/mm/vmscan.c	2006-07-04 14:41:38.000000000 +0400
+@@ -949,6 +949,17 @@ redo:
+ 			goto unlock_both;
+                 }
+ 
++		/* Make sure the dirty bit is up to date */
++		if (try_to_unmap(page, 1) == SWAP_FAIL) {
++			rc = -EPERM;
++			goto unlock_both;
++		}
++
++		if (page_mapcount(page)) {
++			rc = -EAGAIN;
++			goto unlock_both;
++		}
++
+ 		/*
+ 		 * Default handling if a filesystem does not provide
+ 		 * a migration function. We can only migrate clean
+@@ -1243,6 +1254,7 @@ refill_inactive_zone(struct zone *zone, 
+ 			reclaim_mapped = 1;
+ 	}
+ 
++	{KSTAT_PERF_ENTER(refill_inact)
+ 	lru_add_drain();
+ 	spin_lock_irq(&zone->lru_lock);
+ 	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
+@@ -1322,6 +1334,7 @@ refill_inactive_zone(struct zone *zone, 
+ 	local_irq_enable();
+ 
+ 	pagevec_release(&pvec);
++	KSTAT_PERF_LEAVE(refill_inact)}
+ }
+ 
+ /*
+@@ -1438,6 +1451,7 @@ int try_to_free_pages(struct zone **zone
+ 	unsigned long lru_pages = 0;
+ 	int i;
+ 
++	KSTAT_PERF_ENTER(ttfp);
+ 	sc.gfp_mask = gfp_mask;
+ 	sc.may_writepage = !laptop_mode;
+ 	sc.may_swap = 1;
+@@ -1500,6 +1514,7 @@ out:
+ 
+ 		zone->prev_priority = zone->temp_priority;
+ 	}
++	KSTAT_PERF_LEAVE(ttfp);
+ 	return ret;
+ }
+ 
+@@ -1832,7 +1847,8 @@ static int __init kswapd_init(void)
+ 	swap_setup();
+ 	for_each_pgdat(pgdat)
+ 		pgdat->kswapd
+-		= find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
++		= find_task_by_pid_all(kernel_thread(kswapd,
++					pgdat, CLONE_KERNEL));
+ 	total_memory = nr_free_pagecache_pages();
+ 	hotcpu_notifier(cpu_callback, 0);
+ 	return 0;
+diff -upr linux-2.6.16.orig/net/atm/clip.c linux-2.6.16-026test015/net/atm/clip.c
+--- linux-2.6.16.orig/net/atm/clip.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/atm/clip.c	2006-07-04 14:41:36.000000000 +0400
+@@ -613,12 +613,19 @@ static int clip_create(int number)
+ 
+ 
+ static int clip_device_event(struct notifier_block *this,unsigned long event,
+-    void *dev)
++			     void *arg)
+ {
++	struct net_device *dev = arg;
++
++	if (event == NETDEV_UNREGISTER) {
++		neigh_ifdown(&clip_tbl, dev);
++		return NOTIFY_DONE;
++	}
++
+ 	/* ignore non-CLIP devices */
+-	if (((struct net_device *) dev)->type != ARPHRD_ATM ||
+-	    ((struct net_device *) dev)->hard_start_xmit != clip_start_xmit)
++	if (dev->type != ARPHRD_ATM || dev->hard_start_xmit != clip_start_xmit)
+ 		return NOTIFY_DONE;
++
+ 	switch (event) {
+ 		case NETDEV_UP:
+ 			DPRINTK("clip_device_event NETDEV_UP\n");
+@@ -686,14 +693,12 @@ static struct notifier_block clip_inet_n
+ static void atmarpd_close(struct atm_vcc *vcc)
+ {
+ 	DPRINTK("atmarpd_close\n");
+-	atmarpd = NULL; /* assumed to be atomic */
+-	barrier();
+-	unregister_inetaddr_notifier(&clip_inet_notifier);
+-	unregister_netdevice_notifier(&clip_dev_notifier);
+-	if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
+-		printk(KERN_ERR "atmarpd_close: closing with requests "
+-		    "pending\n");
++
++	rtnl_lock();
++	atmarpd = NULL;
+ 	skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
++	rtnl_unlock();
++
+ 	DPRINTK("(done)\n");
+ 	module_put(THIS_MODULE);
+ }
+@@ -714,7 +719,12 @@ static struct atm_dev atmarpd_dev = {
+ 
+ static int atm_init_atmarp(struct atm_vcc *vcc)
+ {
+-	if (atmarpd) return -EADDRINUSE;
++	rtnl_lock();
++	if (atmarpd) {
++		rtnl_unlock();
++		return -EADDRINUSE;
++	}
++
+ 	if (start_timer) {
+ 		start_timer = 0;
+ 		init_timer(&idle_timer);
+@@ -731,10 +741,7 @@ static int atm_init_atmarp(struct atm_vc
+ 	vcc->push = NULL;
+ 	vcc->pop = NULL; /* crash */
+ 	vcc->push_oam = NULL; /* crash */
+-	if (register_netdevice_notifier(&clip_dev_notifier))
+-		printk(KERN_ERR "register_netdevice_notifier failed\n");
+-	if (register_inetaddr_notifier(&clip_inet_notifier))
+-		printk(KERN_ERR "register_inetaddr_notifier failed\n");
++	rtnl_unlock();
+ 	return 0;
+ }
+ 
+@@ -992,6 +999,8 @@ static int __init atm_clip_init(void)
+ 
+ 	clip_tbl_hook = &clip_tbl;
+ 	register_atm_ioctl(&clip_ioctl_ops);
++	register_netdevice_notifier(&clip_dev_notifier);
++	register_inetaddr_notifier(&clip_inet_notifier);
+ 
+ #ifdef CONFIG_PROC_FS
+ {
+@@ -1012,6 +1021,9 @@ static void __exit atm_clip_exit(void)
+ 
+ 	remove_proc_entry("arp", atm_proc_root);
+ 
++	unregister_inetaddr_notifier(&clip_inet_notifier);
++	unregister_netdevice_notifier(&clip_dev_notifier);
++
+ 	deregister_atm_ioctl(&clip_ioctl_ops);
+ 
+ 	/* First, stop the idle timer, so it stops banging
+diff -upr linux-2.6.16.orig/net/bridge/br_netfilter.c linux-2.6.16-026test015/net/bridge/br_netfilter.c
+--- linux-2.6.16.orig/net/bridge/br_netfilter.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/bridge/br_netfilter.c	2006-07-04 14:41:36.000000000 +0400
+@@ -739,6 +739,15 @@ out:
+ 	return NF_STOLEN;
+ }
+ 
++static int br_nf_dev_queue_xmit(struct sk_buff *skb)
++{
++	if (skb->protocol == htons(ETH_P_IP) &&
++	    skb->len > skb->dev->mtu &&
++	    !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
++		return ip_fragment(skb, br_dev_queue_push_xmit);
++	else
++		return br_dev_queue_push_xmit(skb);
++}
+ 
+ /* PF_BRIDGE/POST_ROUTING ********************************************/
+ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
+@@ -798,7 +807,7 @@ static unsigned int br_nf_post_routing(u
+ 		realoutdev = nf_bridge->netoutdev;
+ #endif
+ 	NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
+-	        br_dev_queue_push_xmit);
++	        br_nf_dev_queue_xmit);
+ 
+ 	return NF_STOLEN;
+ 
+@@ -843,7 +852,7 @@ static unsigned int ip_sabotage_out(unsi
+ 	if ((out->hard_start_xmit == br_dev_xmit &&
+ 	    okfn != br_nf_forward_finish &&
+ 	    okfn != br_nf_local_out_finish &&
+-	    okfn != br_dev_queue_push_xmit)
++	    okfn != br_nf_dev_queue_xmit)
+ #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+ 	    || ((out->priv_flags & IFF_802_1Q_VLAN) &&
+ 	    VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
+diff -upr linux-2.6.16.orig/net/compat.c linux-2.6.16-026test015/net/compat.c
+--- linux-2.6.16.orig/net/compat.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/compat.c	2006-07-04 14:41:36.000000000 +0400
+@@ -308,107 +308,6 @@ void scm_detach_fds_compat(struct msghdr
+ }
+ 
+ /*
+- * For now, we assume that the compatibility and native version
+- * of struct ipt_entry are the same - sfr.  FIXME
+- */
+-struct compat_ipt_replace {
+-	char			name[IPT_TABLE_MAXNAMELEN];
+-	u32			valid_hooks;
+-	u32			num_entries;
+-	u32			size;
+-	u32			hook_entry[NF_IP_NUMHOOKS];
+-	u32			underflow[NF_IP_NUMHOOKS];
+-	u32			num_counters;
+-	compat_uptr_t		counters;	/* struct ipt_counters * */
+-	struct ipt_entry	entries[0];
+-};
+-
+-static int do_netfilter_replace(int fd, int level, int optname,
+-				char __user *optval, int optlen)
+-{
+-	struct compat_ipt_replace __user *urepl;
+-	struct ipt_replace __user *repl_nat;
+-	char name[IPT_TABLE_MAXNAMELEN];
+-	u32 origsize, tmp32, num_counters;
+-	unsigned int repl_nat_size;
+-	int ret;
+-	int i;
+-	compat_uptr_t ucntrs;
+-
+-	urepl = (struct compat_ipt_replace __user *)optval;
+-	if (get_user(origsize, &urepl->size))
+-		return -EFAULT;
+-
+-	/* Hack: Causes ipchains to give correct error msg --RR */
+-	if (optlen != sizeof(*urepl) + origsize)
+-		return -ENOPROTOOPT;
+-
+-	/* XXX Assumes that size of ipt_entry is the same both in
+-	 *     native and compat environments.
+-	 */
+-	repl_nat_size = sizeof(*repl_nat) + origsize;
+-	repl_nat = compat_alloc_user_space(repl_nat_size);
+-
+-	ret = -EFAULT;
+-	if (put_user(origsize, &repl_nat->size))
+-		goto out;
+-
+-	if (!access_ok(VERIFY_READ, urepl, optlen) ||
+-	    !access_ok(VERIFY_WRITE, repl_nat, optlen))
+-		goto out;
+-
+-	if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
+-	    __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
+-		goto out;
+-
+-	if (__get_user(tmp32, &urepl->valid_hooks) ||
+-	    __put_user(tmp32, &repl_nat->valid_hooks))
+-		goto out;
+-
+-	if (__get_user(tmp32, &urepl->num_entries) ||
+-	    __put_user(tmp32, &repl_nat->num_entries))
+-		goto out;
+-
+-	if (__get_user(num_counters, &urepl->num_counters) ||
+-	    __put_user(num_counters, &repl_nat->num_counters))
+-		goto out;
+-
+-	if (__get_user(ucntrs, &urepl->counters) ||
+-	    __put_user(compat_ptr(ucntrs), &repl_nat->counters))
+-		goto out;
+-
+-	if (__copy_in_user(&repl_nat->entries[0],
+-			   &urepl->entries[0],
+-			   origsize))
+-		goto out;
+-
+-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+-		if (__get_user(tmp32, &urepl->hook_entry[i]) ||
+-		    __put_user(tmp32, &repl_nat->hook_entry[i]) ||
+-		    __get_user(tmp32, &urepl->underflow[i]) ||
+-		    __put_user(tmp32, &repl_nat->underflow[i]))
+-			goto out;
+-	}
+-
+-	/*
+-	 * Since struct ipt_counters just contains two u_int64_t members
+-	 * we can just do the access_ok check here and pass the (converted)
+-	 * pointer into the standard syscall.  We hope that the pointer is
+-	 * not misaligned ...
+-	 */
+-	if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
+-		       num_counters * sizeof(struct ipt_counters)))
+-		goto out;
+-
+-
+-	ret = sys_setsockopt(fd, level, optname,
+-			     (char __user *)repl_nat, repl_nat_size);
+-
+-out:
+-	return ret;
+-}
+-
+-/*
+  * A struct sock_filter is architecture independent.
+  */
+ struct compat_sock_fprog {
+@@ -460,10 +359,6 @@ static int do_set_sock_timeout(int fd, i
+ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
+ 				char __user *optval, int optlen)
+ {
+-	/* SO_SET_REPLACE seems to be the same in all levels */
+-	if (optname == IPT_SO_SET_REPLACE)
+-		return do_netfilter_replace(fd, level, optname,
+-					    optval, optlen);
+ 	if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER)
+ 		return do_set_attach_filter(fd, level, optname,
+ 					    optval, optlen);
+diff -upr linux-2.6.16.orig/net/core/datagram.c linux-2.6.16-026test015/net/core/datagram.c
+--- linux-2.6.16.orig/net/core/datagram.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/datagram.c	2006-07-04 14:41:37.000000000 +0400
+@@ -56,6 +56,8 @@
+ #include <net/sock.h>
+ #include <net/tcp_states.h>
+ 
++#include <ub/ub_net.h>
++
+ /*
+  *	Is a socket 'connection oriented' ?
+  */
+@@ -493,6 +495,7 @@ unsigned int datagram_poll(struct file *
+ {
+ 	struct sock *sk = sock->sk;
+ 	unsigned int mask;
++	int no_ubc_space;
+ 
+ 	poll_wait(file, sk->sk_sleep, wait);
+ 	mask = 0;
+@@ -500,8 +503,14 @@ unsigned int datagram_poll(struct file *
+ 	/* exceptional events? */
+ 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ 		mask |= POLLERR;
+-	if (sk->sk_shutdown == SHUTDOWN_MASK)
++	if (sk->sk_shutdown == SHUTDOWN_MASK) {
++		no_ubc_space = 0;
+ 		mask |= POLLHUP;
++	} else {
++		no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
++		if (no_ubc_space)
++			ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
++	}
+ 
+ 	/* readable? */
+ 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+@@ -518,7 +527,7 @@ unsigned int datagram_poll(struct file *
+ 	}
+ 
+ 	/* writable? */
+-	if (sock_writeable(sk))
++	if (!no_ubc_space && sock_writeable(sk))
+ 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ 	else
+ 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+diff -upr linux-2.6.16.orig/net/core/dev.c linux-2.6.16-026test015/net/core/dev.c
+--- linux-2.6.16.orig/net/core/dev.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/dev.c	2006-07-04 14:41:39.000000000 +0400
+@@ -115,6 +115,10 @@
+ #include <net/iw_handler.h>
+ #endif	/* CONFIG_NET_RADIO */
+ #include <asm/current.h>
++#include <ub/beancounter.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
+ 
+ /*
+  *	The list of packet types we will receive (as opposed to discard)
+@@ -167,25 +171,40 @@ static struct list_head ptype_all;		/* T
+  * unregister_netdevice(), which must be called with the rtnl
+  * semaphore held.
+  */
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define dev_tail	(get_exec_env()->_net_dev_tail)
++#else
+ struct net_device *dev_base;
+ static struct net_device **dev_tail = &dev_base;
++EXPORT_SYMBOL(dev_base);
++#endif
+ DEFINE_RWLOCK(dev_base_lock);
+ 
+-EXPORT_SYMBOL(dev_base);
+ EXPORT_SYMBOL(dev_base_lock);
+ 
++#ifdef CONFIG_VE
++#define MAX_UNMOVABLE_NETDEVICES (8*4096)
++static uint8_t unmovable_ifindex_list[MAX_UNMOVABLE_NETDEVICES/8];
++static LIST_HEAD(dev_global_list);
++#endif
++
+ #define NETDEV_HASHBITS	8
+ static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
+ static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+ 
+-static inline struct hlist_head *dev_name_hash(const char *name)
++struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env)
+ {
+-	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
++	unsigned hash;
++	if (!ve_is_super(env))
++		return visible_dev_head(env);
++	hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+ 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+ }
+ 
+-static inline struct hlist_head *dev_index_hash(int ifindex)
++struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env)
+ {
++	if (!ve_is_super(env))
++		return visible_dev_index_head(env);
+ 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+ }
+ 
+@@ -469,7 +488,7 @@ struct net_device *__dev_get_by_name(con
+ {
+ 	struct hlist_node *p;
+ 
+-	hlist_for_each(p, dev_name_hash(name)) {
++	hlist_for_each(p, dev_name_hash(name, get_exec_env())) {
+ 		struct net_device *dev
+ 			= hlist_entry(p, struct net_device, name_hlist);
+ 		if (!strncmp(dev->name, name, IFNAMSIZ))
+@@ -502,6 +521,32 @@ struct net_device *dev_get_by_name(const
+ }
+ 
+ /**
++ *	__dev_global_get_by_name - find a device by its name in dev_global_list
++ *	@name: name to find
++ *
++ *	Find an interface by name. Must be called under RTNL semaphore
++ *	If the name is found a pointer to the device
++ *	is returned. If the name is not found then %NULL is returned. The
++ *	reference counters are not incremented so the caller must be
++ *	careful with locks.
++ */
++
++#ifdef CONFIG_VE
++struct net_device *__dev_global_get_by_name(const char *name)
++{
++	struct net_device *dev;
++	/* It's called relatively rarely */
++	list_for_each_entry(dev, &dev_global_list, dev_global_list_entry) {
++		if (strncmp(dev->name, name, IFNAMSIZ) == 0)
++			return dev;
++	}
++	return NULL;
++}
++#else	/* CONFIG_VE */
++#define __dev_global_get_by_name(name)		__dev_get_by_name(name)
++#endif	/* CONFIG_VE */
++
++/**
+  *	__dev_get_by_index - find a device by its ifindex
+  *	@ifindex: index of device
+  *
+@@ -516,7 +561,7 @@ struct net_device *__dev_get_by_index(in
+ {
+ 	struct hlist_node *p;
+ 
+-	hlist_for_each(p, dev_index_hash(ifindex)) {
++	hlist_for_each(p, dev_index_hash(ifindex, get_exec_env())) {
+ 		struct net_device *dev
+ 			= hlist_entry(p, struct net_device, index_hlist);
+ 		if (dev->ifindex == ifindex)
+@@ -635,6 +680,23 @@ int dev_valid_name(const char *name)
+ 		 || strchr(name, '/'));
+ }
+ 
++static inline void __dev_check_name(const char *dev_name, const char *name, 
++		long *inuse, const int max_netdevices)
++{
++	int i = 0;
++	char buf[IFNAMSIZ];
++
++	if (!sscanf(dev_name, name, &i))
++		return;
++	if (i < 0 || i >= max_netdevices)
++		return;
++
++	/* avoid cases where sscanf is not exact inverse of printf */
++	snprintf(buf, sizeof(buf), name, i);
++	if (!strncmp(buf, dev_name, IFNAMSIZ))
++		set_bit(i, inuse);
++}
++
+ /**
+  *	dev_alloc_name - allocate a name for a device
+  *	@dev: device
+@@ -671,16 +733,20 @@ int dev_alloc_name(struct net_device *de
+ 		if (!inuse)
+ 			return -ENOMEM;
+ 
+-		for (d = dev_base; d; d = d->next) {
+-			if (!sscanf(d->name, name, &i))
+-				continue;
+-			if (i < 0 || i >= max_netdevices)
+-				continue;
+-
+-			/*  avoid cases where sscanf is not exact inverse of printf */
+-			snprintf(buf, sizeof(buf), name, i);
+-			if (!strncmp(buf, d->name, IFNAMSIZ))
+-				set_bit(i, inuse);
++#ifdef CONFIG_VE
++		if (ve_is_super(get_exec_env())) {
++			list_for_each_entry(d, &dev_global_list, 
++					dev_global_list_entry) {
++				__dev_check_name(d->name, name, inuse, 
++						max_netdevices);
++			}
++		} else
++#endif
++		{
++			for (d = dev_base; d; d = d->next) {
++				__dev_check_name(d->name, name, inuse, 
++						max_netdevices);
++			}
+ 		}
+ 
+ 		i = find_first_zero_bit(inuse, max_netdevices);
+@@ -688,7 +754,11 @@ int dev_alloc_name(struct net_device *de
+ 	}
+ 
+ 	snprintf(buf, sizeof(buf), name, i);
+-	if (!__dev_get_by_name(buf)) {
++	if (ve_is_super(get_exec_env()))
++		d = __dev_global_get_by_name(buf);
++	else
++		d = __dev_get_by_name(buf);
++	if (d == NULL) {
+ 		strlcpy(dev->name, buf, IFNAMSIZ);
+ 		return i;
+ 	}
+@@ -721,13 +791,14 @@ int dev_change_name(struct net_device *d
+ 	if (!dev_valid_name(newname))
+ 		return -EINVAL;
+ 
++	/* Rename of devices in VE is prohibited by CAP_NET_ADMIN */
+ 	if (strchr(newname, '%')) {
+ 		err = dev_alloc_name(dev, newname);
+ 		if (err < 0)
+ 			return err;
+ 		strcpy(newname, dev->name);
+ 	}
+-	else if (__dev_get_by_name(newname))
++	else if (__dev_global_get_by_name(newname))
+ 		return -EEXIST;
+ 	else
+ 		strlcpy(dev->name, newname, IFNAMSIZ);
+@@ -735,7 +806,8 @@ int dev_change_name(struct net_device *d
+ 	err = class_device_rename(&dev->class_dev, dev->name);
+ 	if (!err) {
+ 		hlist_del(&dev->name_hlist);
+-		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
++		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name,
++					get_exec_env()));
+ 		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ 	}
+ 
+@@ -1294,6 +1366,25 @@ int dev_queue_xmit(struct sk_buff *skb)
+ 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+ #endif
+ 	if (q->enqueue) {
++		struct user_beancounter *ub;
++
++		ub = netdev_bc(dev)->exec_ub;
++		/* the skb CAN be already charged if it transmitted via
++		 * something like bonding device */
++		if (ub && (skb_bc(skb)->resource == 0)) {
++			unsigned long chargesize;
++			chargesize = skb_charge_fullsize(skb);
++			if (charge_beancounter(ub, UB_OTHERSOCKBUF,
++						chargesize, UB_SOFT)) {
++				rcu_read_unlock();
++				rc = -ENOMEM;
++				goto out_kfree_skb;
++			}
++			skb_bc(skb)->ub = ub;
++			skb_bc(skb)->charged = chargesize;
++			skb_bc(skb)->resource = UB_OTHERSOCKBUF;
++		}
++
+ 		/* Grab device queue */
+ 		spin_lock(&dev->queue_lock);
+ 
+@@ -1580,6 +1671,7 @@ int netif_receive_skb(struct sk_buff *sk
+ 	struct net_device *orig_dev;
+ 	int ret = NET_RX_DROP;
+ 	unsigned short type;
++	struct ve_struct *old_env;
+ 
+ 	/* if we've gotten here through NAPI, check netpoll */
+ 	if (skb->dev->poll && netpoll_rx(skb))
+@@ -1598,6 +1690,17 @@ int netif_receive_skb(struct sk_buff *sk
+ 	skb->h.raw = skb->nh.raw = skb->data;
+ 	skb->mac_len = skb->nh.raw - skb->mac.raw;
+ 
++#ifdef CONFIG_VE
++	/*
++	 * Skb might be alloced in another VE context, than its device works.
++	 * So, set the correct owner_env.
++	 */
++	skb->owner_env = skb->dev->owner_env;
++	BUG_ON(skb->owner_env == NULL);
++#endif
++
++	old_env = set_exec_env(VE_OWNER_SKB(skb));
++
+ 	pt_prev = NULL;
+ 
+ 	rcu_read_lock();
+@@ -1663,6 +1766,7 @@ ncls:
+ 
+ out:
+ 	rcu_read_unlock();
++	(void)set_exec_env(old_env);
+ 	return ret;
+ }
+ 
+@@ -2038,7 +2142,7 @@ static int __init dev_proc_init(void)
+ {
+ 	int rc = -ENOMEM;
+ 
+-	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
++	if (!proc_glob_fops_create("net/dev", S_IRUGO, &dev_seq_fops))
+ 		goto out;
+ 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
+ 		goto out_dev;
+@@ -2050,7 +2154,7 @@ out:
+ out_softnet:
+ 	proc_net_remove("softnet_stat");
+ out_dev:
+-	proc_net_remove("dev");
++	remove_proc_glob_entry("net/dev", NULL);
+ 	goto out;
+ }
+ #else
+@@ -2115,6 +2219,9 @@ void dev_set_promiscuity(struct net_devi
+ 		dev->flags &= ~IFF_PROMISC;
+ 	else
+ 		dev->flags |= IFF_PROMISC;
++	/* Promiscous mode on these devices does not mean anything */
++	if (dev->flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
++		return;
+ 	if (dev->flags != old_flags) {
+ 		dev_mc_upload(dev);
+ 		printk(KERN_INFO "device %s %s promiscuous mode\n",
+@@ -2529,9 +2636,28 @@ int dev_ioctl(unsigned int cmd, void __u
+ 		 *	- require strict serialization.
+ 		 *	- do not return a value
+ 		 */
++		case SIOCSIFMTU:
++			if (!capable(CAP_NET_ADMIN) &&
++			    !capable(CAP_VE_NET_ADMIN))
++				return -EPERM;
++			dev_load(ifr.ifr_name);
++			rtnl_lock();
++			if (!ve_is_super(get_exec_env())) {
++				struct net_device *dev;
++				ret = -ENODEV;
++				if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
++					goto out_set_mtu_unlock;
++				ret = -EPERM;
++				if (ifr.ifr_mtu > dev->orig_mtu)
++					goto out_set_mtu_unlock;
++			}
++			ret = dev_ifsioc(&ifr, cmd);
++out_set_mtu_unlock:
++			rtnl_unlock();
++			return ret;
++		
+ 		case SIOCSIFFLAGS:
+ 		case SIOCSIFMETRIC:
+-		case SIOCSIFMTU:
+ 		case SIOCSIFMAP:
+ 		case SIOCSIFHWADDR:
+ 		case SIOCSIFSLAVE:
+@@ -2613,20 +2739,73 @@ int dev_ioctl(unsigned int cmd, void __u
+  *	dev_new_index	-	allocate an ifindex
+  *
+  *	Returns a suitable unique value for a new device interface
+- *	number.  The caller must hold the rtnl semaphore or the
++ *	number. The caller must hold the rtnl semaphore or the
+  *	dev_base_lock to be sure it remains unique.
++ *
++ *	Note: dev->name must be valid on entrance
+  */
+-static int dev_new_index(void)
++static int dev_ve_new_index(void)
+ {
+-	static int ifindex;
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	int *ifindex = &get_exec_env()->ifindex;
++	int delta = 2;
++#else
++	static int s_ifindex;
++	int *ifindex = &s_ifindex;
++	int delta = 1;
++#endif
+ 	for (;;) {
+-		if (++ifindex <= 0)
+-			ifindex = 1;
+-		if (!__dev_get_by_index(ifindex))
+-			return ifindex;
++		*ifindex += delta;
++		if (*ifindex <= 0)
++			*ifindex = 1;
++		if (!__dev_get_by_index(*ifindex))
++			return *ifindex;
+ 	}
+ }
+ 
++#ifdef CONFIG_VE
++static int dev_glb_new_index(void)
++{
++	int i;
++
++	i = find_first_zero_bit((long*)unmovable_ifindex_list, 
++			MAX_UNMOVABLE_NETDEVICES);
++
++	if (i == MAX_UNMOVABLE_NETDEVICES)
++		return -EMFILE;
++
++	__set_bit(i, (long*)unmovable_ifindex_list);
++	return (i + 1) * 2;
++}
++#endif
++
++static void dev_glb_free_index(struct net_device *dev)
++{
++#ifdef CONFIG_VE
++	int bit;
++
++	bit = dev->ifindex / 2 - 1;
++	BUG_ON(bit >= MAX_UNMOVABLE_NETDEVICES);
++	__clear_bit(bit, (long*)unmovable_ifindex_list);
++#endif
++}
++
++static int dev_new_index(struct net_device *dev)
++{
++#ifdef CONFIG_VE
++	if (ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
++		return dev_glb_new_index();
++#endif
++
++	return dev_ve_new_index();
++}
++
++static void dev_free_index(struct net_device *dev)
++{
++	if ((dev->ifindex % 2) == 0)
++		dev_glb_free_index(dev);
++}
++
+ static int dev_boot_phase = 1;
+ 
+ /* Delayed registration/unregisteration */
+@@ -2669,6 +2848,10 @@ int register_netdevice(struct net_device
+ 	/* When net_device's are persistent, this will be fatal. */
+ 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+ 
++	ret = -EPERM;
++	if (!ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
++		goto out;
++
+ 	spin_lock_init(&dev->queue_lock);
+ 	spin_lock_init(&dev->xmit_lock);
+ 	dev->xmit_lock_owner = -1;
+@@ -2688,27 +2871,32 @@ int register_netdevice(struct net_device
+ 		if (ret) {
+ 			if (ret > 0)
+ 				ret = -EIO;
+-			goto out_err;
++			goto out_free_div;
+ 		}
+ 	}
+  
+ 	if (!dev_valid_name(dev->name)) {
+ 		ret = -EINVAL;
+-		goto out_err;
++		goto out_free_div;
++	}
++
++	dev->ifindex = dev_new_index(dev);
++	if (dev->ifindex < 0) {
++		ret = dev->ifindex;
++		goto out_free_div;
+ 	}
+ 
+-	dev->ifindex = dev_new_index();
+ 	if (dev->iflink == -1)
+ 		dev->iflink = dev->ifindex;
+ 
+ 	/* Check for existence of name */
+-	head = dev_name_hash(dev->name);
++	head = dev_name_hash(dev->name, get_exec_env());
+ 	hlist_for_each(p, head) {
+ 		struct net_device *d
+ 			= hlist_entry(p, struct net_device, name_hlist);
+ 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+ 			ret = -EEXIST;
+- 			goto out_err;
++ 			goto out_free_ind;
+ 		}
+  	}
+ 
+@@ -2760,12 +2948,21 @@ int register_netdevice(struct net_device
+ 	set_bit(__LINK_STATE_PRESENT, &dev->state);
+ 
+ 	dev->next = NULL;
++	dev->owner_env = get_exec_env();
++	dev->orig_mtu = dev->mtu;
++	netdev_bc(dev)->owner_ub = get_beancounter(get_exec_ub());
++	netdev_bc(dev)->exec_ub = get_beancounter(get_exec_ub());
+ 	dev_init_scheduler(dev);
++#ifdef CONFIG_VE
++	if (ve_is_super(get_exec_env()))
++		list_add_tail(&dev->dev_global_list_entry, &dev_global_list);
++#endif
+ 	write_lock_bh(&dev_base_lock);
+ 	*dev_tail = dev;
+ 	dev_tail = &dev->next;
+ 	hlist_add_head(&dev->name_hlist, head);
+-	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
++	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, 
++						get_exec_env()));
+ 	dev_hold(dev);
+ 	dev->reg_state = NETREG_REGISTERING;
+ 	write_unlock_bh(&dev_base_lock);
+@@ -2779,7 +2976,9 @@ int register_netdevice(struct net_device
+ 
+ out:
+ 	return ret;
+-out_err:
++out_free_ind:
++	dev_free_index(dev);
++out_free_div:
+ 	free_divert_blk(dev);
+ 	goto out;
+ }
+@@ -2825,6 +3024,10 @@ int register_netdev(struct net_device *d
+ 	err = register_netdevice(dev);
+ out:
+ 	rtnl_unlock();
++	if (err == 0 && dev->reg_state != NETREG_REGISTERED) {
++		unregister_netdev(dev);
++		err = -ENOMEM;
++	}
+ 	return err;
+ }
+ EXPORT_SYMBOL(register_netdev);
+@@ -2907,6 +3110,7 @@ void netdev_run_todo(void)
+ {
+ 	struct list_head list = LIST_HEAD_INIT(list);
+ 	int err;
++	struct ve_struct *current_env;
+ 
+ 
+ 	/* Need to guard against multiple cpu's getting out of order. */
+@@ -2925,22 +3129,30 @@ void netdev_run_todo(void)
+ 	list_splice_init(&net_todo_list, &list);
+ 	spin_unlock(&net_todo_list_lock);
+ 		
++	current_env = get_exec_env();
+ 	while (!list_empty(&list)) {
+ 		struct net_device *dev
+ 			= list_entry(list.next, struct net_device, todo_list);
+ 		list_del(&dev->todo_list);
+ 
++		(void)set_exec_env(dev->owner_env);
+ 		switch(dev->reg_state) {
+ 		case NETREG_REGISTERING:
++			dev->reg_state = NETREG_REGISTERED;
+ 			err = netdev_register_sysfs(dev);
+-			if (err)
++			if (err) {
+ 				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
+ 				       dev->name, err);
+-			dev->reg_state = NETREG_REGISTERED;
++				dev->reg_state = NETREG_REGISTER_ERR;
++				break;
++			}
+ 			break;
+ 
+ 		case NETREG_UNREGISTERING:
+ 			netdev_unregister_sysfs(dev);
++			/* fall through */
++
++		case NETREG_REGISTER_ERR:
+ 			dev->reg_state = NETREG_UNREGISTERED;
+ 
+ 			netdev_wait_allrefs(dev);
+@@ -2951,6 +3163,10 @@ void netdev_run_todo(void)
+ 			BUG_TRAP(!dev->ip6_ptr);
+ 			BUG_TRAP(!dev->dn_ptr);
+ 
++			put_beancounter(netdev_bc(dev)->exec_ub);
++			put_beancounter(netdev_bc(dev)->owner_ub);
++			netdev_bc(dev)->exec_ub = NULL;
++			netdev_bc(dev)->owner_ub = NULL;
+ 
+ 			/* It must be the very last action, 
+ 			 * after this 'dev' may point to freed up memory.
+@@ -2965,6 +3181,7 @@ void netdev_run_todo(void)
+ 			break;
+ 		}
+ 	}
++	(void)set_exec_env(current_env);
+ 
+ out:
+ 	up(&net_todo_run_mutex);
+@@ -2990,7 +3207,7 @@ struct net_device *alloc_netdev(int size
+ 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+ 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+ 
+-	p = kmalloc(alloc_size, GFP_KERNEL);
++	p = ub_kmalloc(alloc_size, GFP_KERNEL);
+ 	if (!p) {
+ 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+ 		return NULL;
+@@ -3070,7 +3287,8 @@ int unregister_netdevice(struct net_devi
+ 		return -ENODEV;
+ 	}
+ 
+-	BUG_ON(dev->reg_state != NETREG_REGISTERED);
++	BUG_ON(dev->reg_state != NETREG_REGISTERED &&
++	       dev->reg_state != NETREG_REGISTER_ERR);
+ 
+ 	/* If device is running, close it first. */
+ 	if (dev->flags & IFF_UP)
+@@ -3086,6 +3304,10 @@ int unregister_netdevice(struct net_devi
+ 				dev_tail = dp;
+ 			*dp = d->next;
+ 			write_unlock_bh(&dev_base_lock);
++#ifdef CONFIG_VE
++			if (ve_is_super(get_exec_env()))
++				list_del(&dev->dev_global_list_entry);
++#endif
+ 			break;
+ 		}
+ 	}
+@@ -3095,7 +3317,8 @@ int unregister_netdevice(struct net_devi
+ 		return -ENODEV;
+ 	}
+ 
+-	dev->reg_state = NETREG_UNREGISTERING;
++	if (dev->reg_state != NETREG_REGISTER_ERR)
++		dev->reg_state = NETREG_UNREGISTERING;
+ 
+ 	synchronize_net();
+ 
+@@ -3119,6 +3342,8 @@ int unregister_netdevice(struct net_devi
+ 	/* Notifier chain MUST detach us from master device. */
+ 	BUG_TRAP(!dev->master);
+ 
++	dev_free_index(dev);
++
+ 	free_divert_blk(dev);
+ 
+ 	/* Finish processing unregister after unlock */
+@@ -3276,6 +3501,8 @@ EXPORT_SYMBOL(dev_close);
+ EXPORT_SYMBOL(dev_get_by_flags);
+ EXPORT_SYMBOL(dev_get_by_index);
+ EXPORT_SYMBOL(dev_get_by_name);
++EXPORT_SYMBOL(dev_name_hash);
++EXPORT_SYMBOL(dev_index_hash);
+ EXPORT_SYMBOL(dev_open);
+ EXPORT_SYMBOL(dev_queue_xmit);
+ EXPORT_SYMBOL(dev_remove_pack);
+diff -upr linux-2.6.16.orig/net/core/dev_mcast.c linux-2.6.16-026test015/net/core/dev_mcast.c
+--- linux-2.6.16.orig/net/core/dev_mcast.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/dev_mcast.c	2006-07-04 14:41:38.000000000 +0400
+@@ -290,9 +290,10 @@ static struct file_operations dev_mc_seq
+ 
+ void __init dev_mcast_init(void)
+ {
+-	proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
++	proc_glob_fops_create("net/dev_mcast", 0, &dev_mc_seq_fops);
+ }
+ 
+ EXPORT_SYMBOL(dev_mc_add);
+ EXPORT_SYMBOL(dev_mc_delete);
+ EXPORT_SYMBOL(dev_mc_upload);
++EXPORT_SYMBOL(dev_mc_discard);
+diff -upr linux-2.6.16.orig/net/core/dst.c linux-2.6.16-026test015/net/core/dst.c
+--- linux-2.6.16.orig/net/core/dst.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/dst.c	2006-07-04 14:41:39.000000000 +0400
+@@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dum
+ 		dst_gc_timer_inc = DST_GC_INC;
+ 		dst_gc_timer_expires = DST_GC_MIN;
+ 	}
+-	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+ #if RT_CACHE_DEBUG >= 2
+ 	printk("dst_total: %d/%d %ld\n",
+ 	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
+ #endif
+-	add_timer(&dst_gc_timer);
++	mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
+ 
+ out:
+ 	spin_unlock(&dst_lock);
+@@ -260,11 +259,14 @@ static int dst_dev_event(struct notifier
+ 	switch (event) {
+ 	case NETDEV_UNREGISTER:
+ 	case NETDEV_DOWN:
+-		spin_lock_bh(&dst_lock);
++		local_bh_disable();
++		dst_run_gc(0);
++		spin_lock(&dst_lock);
+ 		for (dst = dst_garbage_list; dst; dst = dst->next) {
+ 			dst_ifdown(dst, dev, event != NETDEV_DOWN);
+ 		}
+-		spin_unlock_bh(&dst_lock);
++		spin_unlock(&dst_lock);
++		local_bh_enable();
+ 		break;
+ 	}
+ 	return NOTIFY_DONE;
+diff -upr linux-2.6.16.orig/net/core/dv.c linux-2.6.16-026test015/net/core/dv.c
+--- linux-2.6.16.orig/net/core/dv.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/dv.c	2006-07-04 14:41:37.000000000 +0400
+@@ -547,3 +547,5 @@ void divert_frame(struct sk_buff *skb)
+ 		break;
+ 	}
+ }
++
++EXPORT_SYMBOL(free_divert_blk);
+diff -upr linux-2.6.16.orig/net/core/filter.c linux-2.6.16-026test015/net/core/filter.c
+--- linux-2.6.16.orig/net/core/filter.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/filter.c	2006-07-04 14:41:37.000000000 +0400
+@@ -34,6 +34,7 @@
+ #include <linux/timer.h>
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <asm/unaligned.h>
+ #include <linux/filter.h>
+ 
+ /* No hurry in this branch */
+@@ -177,7 +178,7 @@ unsigned int sk_run_filter(struct sk_buf
+ load_w:
+ 			ptr = load_pointer(skb, k, 4, &tmp);
+ 			if (ptr != NULL) {
+-				A = ntohl(*(u32 *)ptr);
++				A = ntohl(get_unaligned((u32 *)ptr));
+ 				continue;
+ 			}
+ 			break;
+@@ -186,7 +187,7 @@ load_w:
+ load_h:
+ 			ptr = load_pointer(skb, k, 2, &tmp);
+ 			if (ptr != NULL) {
+-				A = ntohs(*(u16 *)ptr);
++				A = ntohs(get_unaligned((u16 *)ptr));
+ 				continue;
+ 			}
+ 			break;
+@@ -406,7 +407,7 @@ int sk_attach_filter(struct sock_fprog *
+ 	if (fprog->filter == NULL)
+ 		return -EINVAL;
+ 
+-	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
++	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL_UBC);
+ 	if (!fp)
+ 		return -ENOMEM;
+ 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+diff -upr linux-2.6.16.orig/net/core/neighbour.c linux-2.6.16-026test015/net/core/neighbour.c
+--- linux-2.6.16.orig/net/core/neighbour.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/neighbour.c	2006-07-04 14:41:39.000000000 +0400
+@@ -33,6 +33,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/random.h>
+ #include <linux/string.h>
++#include <ub/beancounter.h>
+ 
+ #define NEIGH_DEBUG 1
+ 
+@@ -639,6 +640,8 @@ static void neigh_periodic_timer(unsigne
+ 	struct neigh_table *tbl = (struct neigh_table *)arg;
+ 	struct neighbour *n, **np;
+ 	unsigned long expire, now = jiffies;
++	struct ve_struct *env = set_exec_env(tbl->owner_env);
++	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
+ 
+ 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
+ 
+@@ -700,6 +703,8 @@ next_elt:
+  	mod_timer(&tbl->gc_timer, now + expire);
+ 
+ 	write_unlock(&tbl->lock);
++	set_exec_ub(ub);
++	set_exec_env(env);
+ }
+ 
+ static __inline__ int neigh_max_probes(struct neighbour *n)
+@@ -727,6 +732,11 @@ static void neigh_timer_handler(unsigned
+ 	struct neighbour *neigh = (struct neighbour *)arg;
+ 	unsigned state;
+ 	int notify = 0;
++	struct ve_struct *env;
++	struct user_beancounter *ub;
++
++	env = set_exec_env(neigh->dev->owner_env);
++	ub = set_exec_ub(netdev_bc(neigh->dev)->exec_ub);
+ 
+ 	write_lock(&neigh->lock);
+ 
+@@ -824,6 +834,8 @@ out:
+ 		neigh_app_notify(neigh);
+ #endif
+ 	neigh_release(neigh);
++	(void)set_exec_ub(ub);
++	(void)set_exec_env(env);
+ }
+ 
+ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+@@ -1202,6 +1214,9 @@ static void neigh_proxy_process(unsigned
+ 	unsigned long now = jiffies;
+ 	struct sk_buff *skb;
+ 
++	struct ve_struct *env = set_exec_env(tbl->owner_env);
++	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
++
+ 	spin_lock(&tbl->proxy_queue.lock);
+ 
+ 	skb = tbl->proxy_queue.next;
+@@ -1213,6 +1228,7 @@ static void neigh_proxy_process(unsigned
+ 		skb = skb->next;
+ 		if (tdif <= 0) {
+ 			struct net_device *dev = back->dev;
++
+ 			__skb_unlink(back, &tbl->proxy_queue);
+ 			if (tbl->proxy_redo && netif_running(dev))
+ 				tbl->proxy_redo(back);
+@@ -1220,6 +1236,7 @@ static void neigh_proxy_process(unsigned
+ 				kfree_skb(back);
+ 
+ 			dev_put(dev);
++
+ 		} else if (!sched_next || tdif < sched_next)
+ 			sched_next = tdif;
+ 	}
+@@ -1227,6 +1244,8 @@ static void neigh_proxy_process(unsigned
+ 	if (sched_next)
+ 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
+ 	spin_unlock(&tbl->proxy_queue.lock);
++	(void)set_exec_ub(ub);
++	(void)set_exec_env(env);
+ }
+ 
+ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+@@ -1323,12 +1342,15 @@ void neigh_parms_destroy(struct neigh_pa
+ }
+ 
+ 
+-void neigh_table_init(struct neigh_table *tbl)
++int neigh_table_init(struct neigh_table *tbl)
+ {
+ 	unsigned long now = jiffies;
+ 	unsigned long phsize;
+ 
+ 	atomic_set(&tbl->parms.refcnt, 1);
++	atomic_set(&tbl->entries, 0);
++	tbl->hash_chain_gc = 0;
++	tbl->parms.next = NULL;
+ 	INIT_RCU_HEAD(&tbl->parms.rcu_head);
+ 	tbl->parms.reachable_time =
+ 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
+@@ -1336,22 +1358,30 @@ void neigh_table_init(struct neigh_table
+ 	if (!tbl->kmem_cachep)
+ 		tbl->kmem_cachep = kmem_cache_create(tbl->id,
+ 						     tbl->entry_size,
+-						     0, SLAB_HWCACHE_ALIGN,
++						     0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
+ 						     NULL, NULL);
+ 
+ 	if (!tbl->kmem_cachep)
+-		panic("cannot create neighbour cache");
++		return -ENOMEM;
++
++	tbl->owner_env = get_ve(get_exec_env());
++	tbl->owner_ub = get_beancounter(get_exec_ub());
+ 
+ 	tbl->stats = alloc_percpu(struct neigh_statistics);
+ 	if (!tbl->stats)
+-		panic("cannot create neighbour cache statistics");
++		return -ENOMEM;
+ 	
+ #ifdef CONFIG_PROC_FS
+-	tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
+-	if (!tbl->pde) 
+-		panic("cannot create neighbour proc dir entry");
+-	tbl->pde->proc_fops = &neigh_stat_seq_fops;
+-	tbl->pde->data = tbl;
++	if (ve_is_super(get_exec_env())) {
++		char name[strlen(tbl->id) + sizeof("net/stat/")];
++		strcpy(name, "net/stat/");
++		strcat(name, tbl->id);
++		tbl->pde = create_proc_glob_entry(name, S_IRUGO, NULL);
++		if (tbl->pde) {
++			tbl->pde->proc_fops = &neigh_stat_seq_fops;
++			tbl->pde->data = tbl;
++		}
++	}
+ #endif
+ 
+ 	tbl->hash_mask = 1;
+@@ -1361,7 +1391,7 @@ void neigh_table_init(struct neigh_table
+ 	tbl->phash_buckets = kmalloc(phsize, GFP_KERNEL);
+ 
+ 	if (!tbl->hash_buckets || !tbl->phash_buckets)
+-		panic("cannot allocate neighbour cache hashes");
++		goto nomem;
+ 
+ 	memset(tbl->phash_buckets, 0, phsize);
+ 
+@@ -1385,6 +1415,24 @@ void neigh_table_init(struct neigh_table
+ 	tbl->next	= neigh_tables;
+ 	neigh_tables	= tbl;
+ 	write_unlock(&neigh_tbl_lock);
++	return 0;
++
++nomem:
++	if (tbl->hash_buckets) {
++		neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
++		tbl->hash_buckets = NULL;
++	}
++	if (tbl->phash_buckets) {
++		kfree(tbl->phash_buckets);
++		tbl->phash_buckets = NULL;
++	}
++	if (tbl->stats) {
++		free_percpu(tbl->stats);
++		tbl->stats = NULL;
++	}
++	put_beancounter(tbl->owner_ub);
++	put_ve(tbl->owner_env);
++	return -ENOMEM;
+ }
+ 
+ int neigh_table_clear(struct neigh_table *tbl)
+@@ -1398,6 +1446,15 @@ int neigh_table_clear(struct neigh_table
+ 	neigh_ifdown(tbl, NULL);
+ 	if (atomic_read(&tbl->entries))
+ 		printk(KERN_CRIT "neighbour leakage\n");
++#ifdef CONFIG_PROC_FS
++	if (ve_is_super(get_exec_env())) {
++		char name[strlen(tbl->id) + sizeof("net/stat/")];
++		strcpy(name, "net/stat/");
++		strcat(name, tbl->id);
++		remove_proc_glob_entry(name, NULL);
++	}
++#endif
++
+ 	write_lock(&neigh_tbl_lock);
+ 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
+ 		if (*tp == tbl) {
+@@ -1413,6 +1470,9 @@ int neigh_table_clear(struct neigh_table
+ 	kfree(tbl->phash_buckets);
+ 	tbl->phash_buckets = NULL;
+ 
++	put_beancounter(tbl->owner_ub);
++	put_ve(tbl->owner_env);
++
+ 	return 0;
+ }
+ 
+@@ -1435,6 +1495,8 @@ int neigh_delete(struct sk_buff *skb, st
+ 
+ 		if (tbl->family != ndm->ndm_family)
+ 			continue;
++		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
++			continue;
+ 		read_unlock(&neigh_tbl_lock);
+ 
+ 		err = -EINVAL;
+@@ -1488,6 +1550,8 @@ int neigh_add(struct sk_buff *skb, struc
+ 
+ 		if (tbl->family != ndm->ndm_family)
+ 			continue;
++		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
++			continue;
+ 		read_unlock(&neigh_tbl_lock);
+ 
+ 		err = -EINVAL;
+@@ -1720,6 +1784,9 @@ int neightbl_set(struct sk_buff *skb, st
+ 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
+ 			continue;
+ 
++		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
++			continue;
++
+ 		if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
+ 			break;
+ 	}
+@@ -1941,6 +2008,8 @@ int neigh_dump_info(struct sk_buff *skb,
+ 	s_t = cb->args[0];
+ 
+ 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
++		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
++			continue;
+ 		if (t < s_t || (family && tbl->family != family))
+ 			continue;
+ 		if (t > s_t)
+@@ -2530,11 +2599,12 @@ int neigh_sysctl_register(struct net_dev
+ 			  int p_id, int pdev_id, char *p_name, 
+ 			  proc_handler *handler, ctl_handler *strategy)
+ {
+-	struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
++	struct neigh_sysctl_table *t;
+ 	const char *dev_name_source = NULL;
+ 	char *dev_name = NULL;
+ 	int err = 0;
+ 
++	t = kmalloc(sizeof(*t), GFP_KERNEL);
+ 	if (!t)
+ 		return -ENOBUFS;
+ 	memcpy(t, &neigh_sysctl_template, sizeof(*t));
+diff -upr linux-2.6.16.orig/net/core/net-sysfs.c linux-2.6.16-026test015/net/core/net-sysfs.c
+--- linux-2.6.16.orig/net/core/net-sysfs.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/net-sysfs.c	2006-07-04 14:41:38.000000000 +0400
+@@ -388,12 +388,13 @@ static void netdev_release(struct class_
+ 	struct net_device *dev 
+ 		= container_of(cd, struct net_device, class_dev);
+ 
+-	BUG_ON(dev->reg_state != NETREG_RELEASED);
++	BUG_ON(dev->reg_state != NETREG_RELEASED &&
++	       dev->reg_state != NETREG_REGISTERING);
+ 
+ 	kfree((char *)dev - dev->padded);
+ }
+ 
+-static struct class net_class = {
++struct class net_class = {
+ 	.name = "net",
+ 	.release = netdev_release,
+ 	.class_dev_attrs = net_class_attributes,
+@@ -401,6 +402,13 @@ static struct class net_class = {
+ 	.uevent = netdev_uevent,
+ #endif
+ };
++EXPORT_SYMBOL(net_class);
++
++#ifndef CONFIG_VE
++#define visible_net_class net_class
++#else
++#define visible_net_class (*get_exec_env()->net_class)
++#endif
+ 
+ void netdev_unregister_sysfs(struct net_device * net)
+ {
+@@ -424,7 +432,7 @@ int netdev_register_sysfs(struct net_dev
+ 	struct class_device *class_dev = &(net->class_dev);
+ 	int ret;
+ 
+-	class_dev->class = &net_class;
++	class_dev->class = &visible_net_class;
+ 	class_dev->class_data = net;
+ 
+ 	strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE);
+@@ -453,12 +461,21 @@ out_cleanup:
+ out_unreg:
+ 	printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
+ 	       net->name, ret);
+-	class_device_unregister(class_dev);
++	/* put is called in free_netdev() */
++	class_device_del(class_dev);
+ out:
+ 	return ret;
+ }
+ 
++void prepare_sysfs_netdev(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->net_class = &net_class;
++#endif
++}
++
+ int netdev_sysfs_init(void)
+ {
++	prepare_sysfs_netdev();
+ 	return class_register(&net_class);
+ }
+diff -upr linux-2.6.16.orig/net/core/rtnetlink.c linux-2.6.16-026test015/net/core/rtnetlink.c
+--- linux-2.6.16.orig/net/core/rtnetlink.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/rtnetlink.c	2006-07-04 14:41:38.000000000 +0400
+@@ -434,6 +434,8 @@ static int rtnetlink_dump_all(struct sk_
+ 		if (rtnetlink_links[idx] == NULL ||
+ 		    rtnetlink_links[idx][type].dumpit == NULL)
+ 			continue;
++		if (vz_security_proto_check(idx, 0, 0))
++			continue;
+ 		if (idx > s_idx)
+ 			memset(&cb->args[0], 0, sizeof(cb->args));
+ 		if (rtnetlink_links[idx][type].dumpit(skb, cb))
+@@ -501,7 +503,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
+ 		return 0;
+ 
+ 	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
+-	if (family >= NPROTO) {
++	if (family >= NPROTO || vz_security_proto_check(family, 0, 0)) {
+ 		*errp = -EAFNOSUPPORT;
+ 		return -1;
+ 	}
+diff -upr linux-2.6.16.orig/net/core/scm.c linux-2.6.16-026test015/net/core/scm.c
+--- linux-2.6.16.orig/net/core/scm.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/scm.c	2006-07-04 14:41:38.000000000 +0400
+@@ -34,6 +34,7 @@
+ #include <net/compat.h>
+ #include <net/scm.h>
+ 
++#include <ub/ub_mem.h>
+ 
+ /*
+  *	Only allow a user to send credentials, that they could set with 
+@@ -42,7 +43,9 @@
+ 
+ static __inline__ int scm_check_creds(struct ucred *creds)
+ {
+-	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
++	if ((creds->pid == virt_tgid(current) ||
++	     creds->pid == current->tgid ||
++	     capable(CAP_VE_SYS_ADMIN)) &&
+ 	    ((creds->uid == current->uid || creds->uid == current->euid ||
+ 	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
+ 	    ((creds->gid == current->gid || creds->gid == current->egid ||
+@@ -69,7 +72,7 @@ static int scm_fp_copy(struct cmsghdr *c
+ 
+ 	if (!fpl)
+ 	{
+-		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
++		fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+ 		if (!fpl)
+ 			return -ENOMEM;
+ 		*fplp = fpl;
+@@ -275,7 +278,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
+ 	if (!fpl)
+ 		return NULL;
+ 
+-	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
++	new_fpl = ub_kmalloc(sizeof(*fpl), GFP_KERNEL);
+ 	if (new_fpl) {
+ 		for (i=fpl->count-1; i>=0; i--)
+ 			get_file(fpl->fp[i]);
+diff -upr linux-2.6.16.orig/net/core/skbuff.c linux-2.6.16-026test015/net/core/skbuff.c
+--- linux-2.6.16.orig/net/core/skbuff.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/skbuff.c	2006-07-04 14:41:38.000000000 +0400
+@@ -48,6 +48,7 @@
+ #include <linux/in.h>
+ #include <linux/inet.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/netdevice.h>
+ #ifdef CONFIG_NET_CLS_ACT
+ #include <net/pkt_sched.h>
+@@ -68,6 +69,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+ 
++#include <ub/ub_net.h>
++
+ static kmem_cache_t *skbuff_head_cache __read_mostly;
+ static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+ 
+@@ -147,6 +150,9 @@ struct sk_buff *__alloc_skb(unsigned int
+ 	if (!skb)
+ 		goto out;
+ 
++	if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
++		goto nobc;
++
+ 	/* Get the DATA. Size must match skb_add_mtu(). */
+ 	size = SKB_DATA_ALIGN(size);
+ 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+@@ -160,6 +166,7 @@ struct sk_buff *__alloc_skb(unsigned int
+ 	skb->data = data;
+ 	skb->tail = data;
+ 	skb->end  = data + size;
++	SET_VE_OWNER_SKB(skb, get_exec_env());
+ 	/* make sure we initialize shinfo sequentially */
+ 	shinfo = skb_shinfo(skb);
+ 	atomic_set(&shinfo->dataref, 1);
+@@ -182,6 +189,8 @@ struct sk_buff *__alloc_skb(unsigned int
+ out:
+ 	return skb;
+ nodata:
++	ub_skb_free_bc(skb);
++nobc:
+ 	kmem_cache_free(cache, skb);
+ 	skb = NULL;
+ 	goto out;
+@@ -214,6 +223,9 @@ struct sk_buff *alloc_skb_from_cache(kme
+ 	if (!skb)
+ 		goto out;
+ 
++	if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
++		goto nobc;
++
+ 	/* Get the DATA. */
+ 	size = SKB_DATA_ALIGN(size);
+ 	data = kmem_cache_alloc(cp, gfp_mask);
+@@ -227,6 +239,7 @@ struct sk_buff *alloc_skb_from_cache(kme
+ 	skb->data = data;
+ 	skb->tail = data;
+ 	skb->end  = data + size;
++	SET_VE_OWNER_SKB(skb, get_exec_env());
+ 
+ 	atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ 	skb_shinfo(skb)->nr_frags  = 0;
+@@ -236,6 +249,8 @@ struct sk_buff *alloc_skb_from_cache(kme
+ out:
+ 	return skb;
+ nodata:
++	ub_skb_free_bc(skb);
++nobc:
+ 	kmem_cache_free(skbuff_head_cache, skb);
+ 	skb = NULL;
+ 	goto out;
+@@ -290,6 +305,7 @@ void kfree_skbmem(struct sk_buff *skb)
+ 	atomic_t *fclone_ref;
+ 
+ 	skb_release_data(skb);
++	ub_skb_free_bc(skb);
+ 	switch (skb->fclone) {
+ 	case SKB_FCLONE_UNAVAILABLE:
+ 		kmem_cache_free(skbuff_head_cache, skb);
+@@ -331,6 +347,7 @@ void __kfree_skb(struct sk_buff *skb)
+ #ifdef CONFIG_XFRM
+ 	secpath_put(skb->sp);
+ #endif
++	ub_skb_uncharge(skb);
+ 	if (skb->destructor) {
+ 		WARN_ON(in_irq());
+ 		skb->destructor(skb);
+@@ -386,6 +403,11 @@ struct sk_buff *skb_clone(struct sk_buff
+ 		n->fclone = SKB_FCLONE_UNAVAILABLE;
+ 	}
+ 
++	if (ub_skb_alloc_bc(n, gfp_mask)) {
++		kmem_cache_free(skbuff_head_cache, n);
++		return NULL;
++	}
++
+ #define C(x) n->x = skb->x
+ 
+ 	n->next = n->prev = NULL;
+@@ -415,6 +437,7 @@ struct sk_buff *skb_clone(struct sk_buff
+ 	C(ipvs_property);
+ #endif
+ 	C(protocol);
++	SET_VE_OWNER_SKB(n, VE_OWNER_SKB(skb));
+ 	n->destructor = NULL;
+ #ifdef CONFIG_NETFILTER
+ 	C(nfmark);
+diff -upr linux-2.6.16.orig/net/core/sock.c linux-2.6.16-026test015/net/core/sock.c
+--- linux-2.6.16.orig/net/core/sock.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/sock.c	2006-07-04 14:41:38.000000000 +0400
+@@ -108,6 +108,7 @@
+ #include <linux/net.h>
+ #include <linux/mm.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/interrupt.h>
+ #include <linux/poll.h>
+ #include <linux/tcp.h>
+@@ -124,6 +125,9 @@
+ #include <net/xfrm.h>
+ #include <linux/ipsec.h>
+ 
++#include <ub/ub_net.h>
++#include <ub/beancounter.h>
++
+ #include <linux/filter.h>
+ 
+ #ifdef CONFIG_INET
+@@ -172,7 +176,7 @@ static void sock_warn_obsolete_bsdism(co
+ 	static char warncomm[TASK_COMM_LEN];
+ 	if (strcmp(warncomm, current->comm) && warned < 5) { 
+ 		strcpy(warncomm,  current->comm); 
+-		printk(KERN_WARNING "process `%s' is using obsolete "
++		ve_printk(VE_LOG, KERN_WARNING "process `%s' is using obsolete "
+ 		       "%s SO_BSDCOMPAT\n", warncomm, name);
+ 		warned++;
+ 	}
+@@ -404,8 +408,9 @@ set_rcvbuf:
+ 			if (!valbool) {
+ 				sk->sk_bound_dev_if = 0;
+ 			} else {
+-				if (optlen > IFNAMSIZ) 
+-					optlen = IFNAMSIZ; 
++				if (optlen > IFNAMSIZ - 1)
++					optlen = IFNAMSIZ - 1;
++				memset(devname, 0, sizeof(devname));
+ 				if (copy_from_user(devname, optval, optlen)) {
+ 					ret = -EFAULT;
+ 					break;
+@@ -659,6 +664,7 @@ struct sock *sk_alloc(int family, gfp_t 
+ 			 */
+ 			sk->sk_prot = sk->sk_prot_creator = prot;
+ 			sock_lock_init(sk);
++			SET_VE_OWNER_SK(sk, get_exec_env());
+ 		}
+ 		
+ 		if (security_sk_alloc(sk, family, priority))
+@@ -698,6 +704,7 @@ void sk_free(struct sock *sk)
+ 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+ 
+ 	security_sk_free(sk);
++	ub_sock_uncharge(sk);
+ 	if (sk->sk_prot_creator->slab != NULL)
+ 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
+ 	else
+@@ -742,14 +749,11 @@ struct sock *sk_clone(const struct sock 
+ 		if (filter != NULL)
+ 			sk_filter_charge(newsk, filter);
+ 
+-		if (unlikely(xfrm_sk_clone_policy(newsk))) {
+-			/* It is still raw copy of parent, so invalidate
+-			 * destructor and make plain sk_free() */
+-			newsk->sk_destruct = NULL;
+-			sk_free(newsk);
+-			newsk = NULL;
+-			goto out;
+-		}
++		if (ub_sock_charge(newsk, newsk->sk_family, newsk->sk_type) < 0)
++			goto out_err;
++
++		if (unlikely(xfrm_sk_clone_policy(newsk)))
++			 goto out_err;
+ 
+ 		newsk->sk_err	   = 0;
+ 		newsk->sk_priority = 0;
+@@ -773,8 +777,15 @@ struct sock *sk_clone(const struct sock 
+ 		if (newsk->sk_prot->sockets_allocated)
+ 			atomic_inc(newsk->sk_prot->sockets_allocated);
+ 	}
+-out:
+ 	return newsk;
++
++out_err:
++	/* It is still raw copy of parent, so invalidate
++	 * destructor and make plain sk_free() */
++	sock_reset_flag(newsk, SOCK_TIMESTAMP);
++	newsk->sk_destruct = NULL;
++	sk_free(newsk);
++	return NULL;
+ }
+ 
+ EXPORT_SYMBOL_GPL(sk_clone);
+@@ -934,14 +945,12 @@ static long sock_wait_for_wmem(struct so
+ /*
+  *	Generic send/receive buffer handlers
+  */
+-
+-static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
+-					    unsigned long header_len,
+-					    unsigned long data_len,
+-					    int noblock, int *errcode)
++struct sk_buff *sock_alloc_send_skb2(struct sock *sk, unsigned long size,
++				     unsigned long size2, int noblock,
++				     int *errcode)
+ {
+ 	struct sk_buff *skb;
+-	gfp_t gfp_mask;
++	unsigned int gfp_mask;
+ 	long timeo;
+ 	int err;
+ 
+@@ -959,46 +968,35 @@ static struct sk_buff *sock_alloc_send_p
+ 		if (sk->sk_shutdown & SEND_SHUTDOWN)
+ 			goto failure;
+ 
+-		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+-			skb = alloc_skb(header_len, sk->sk_allocation);
+-			if (skb) {
+-				int npages;
+-				int i;
+-
+-				/* No pages, we're done... */
+-				if (!data_len)
+-					break;
+-
+-				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+-				skb->truesize += data_len;
+-				skb_shinfo(skb)->nr_frags = npages;
+-				for (i = 0; i < npages; i++) {
+-					struct page *page;
+-					skb_frag_t *frag;
+-
+-					page = alloc_pages(sk->sk_allocation, 0);
+-					if (!page) {
+-						err = -ENOBUFS;
+-						skb_shinfo(skb)->nr_frags = i;
+-						kfree_skb(skb);
+-						goto failure;
+-					}
+-
+-					frag = &skb_shinfo(skb)->frags[i];
+-					frag->page = page;
+-					frag->page_offset = 0;
+-					frag->size = (data_len >= PAGE_SIZE ?
+-						      PAGE_SIZE :
+-						      data_len);
+-					data_len -= PAGE_SIZE;
+-				}
++		if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
++			if (size2 < size) {
++				size = size2;
++				continue;
++			}
++			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
++			err = -EAGAIN;
++			if (!timeo)
++				goto failure;
++			if (signal_pending(current))
++				goto interrupted;
++			timeo = ub_sock_wait_for_space(sk, timeo,
++					skb_charge_size(size));
++			continue;
++		}
+ 
++		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
++			skb = alloc_skb(size, sk->sk_allocation);
++			if (skb)
+ 				/* Full success... */
+ 				break;
+-			}
++			ub_sock_retwres_other(sk, skb_charge_size(size),
++					SOCK_MIN_UBCSPACE_CH);
+ 			err = -ENOBUFS;
+ 			goto failure;
+ 		}
++		ub_sock_retwres_other(sk,
++				skb_charge_size(size),
++				SOCK_MIN_UBCSPACE_CH);
+ 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ 		err = -EAGAIN;
+@@ -1009,6 +1007,7 @@ static struct sk_buff *sock_alloc_send_p
+ 		timeo = sock_wait_for_wmem(sk, timeo);
+ 	}
+ 
++	ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
+ 	skb_set_owner_w(skb, sk);
+ 	return skb;
+ 
+@@ -1022,7 +1021,7 @@ failure:
+ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
+ 				    int noblock, int *errcode)
+ {
+-	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
++	return sock_alloc_send_skb2(sk, size, size, noblock, errcode);
+ }
+ 
+ static void __lock_sock(struct sock *sk)
+@@ -1462,7 +1461,8 @@ int proto_register(struct proto *prot, i
+ 
+ 	if (alloc_slab) {
+ 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
+-					       SLAB_HWCACHE_ALIGN, NULL, NULL);
++					       SLAB_HWCACHE_ALIGN | SLAB_UBC,
++					       NULL, NULL);
+ 
+ 		if (prot->slab == NULL) {
+ 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
+@@ -1478,9 +1478,11 @@ int proto_register(struct proto *prot, i
+ 				goto out_free_sock_slab;
+ 
+ 			sprintf(request_sock_slab_name, mask, prot->name);
+-			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
+-								 prot->rsk_prot->obj_size, 0,
+-								 SLAB_HWCACHE_ALIGN, NULL, NULL);
++			prot->rsk_prot->slab =
++				kmem_cache_create(request_sock_slab_name,
++						prot->rsk_prot->obj_size, 0,
++						SLAB_HWCACHE_ALIGN | SLAB_UBC,
++						NULL, NULL);
+ 
+ 			if (prot->rsk_prot->slab == NULL) {
+ 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
+@@ -1501,7 +1503,7 @@ int proto_register(struct proto *prot, i
+ 			prot->twsk_prot->twsk_slab =
+ 				kmem_cache_create(timewait_sock_slab_name,
+ 						  prot->twsk_prot->twsk_obj_size,
+-						  0, SLAB_HWCACHE_ALIGN,
++						  0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
+ 						  NULL, NULL);
+ 			if (prot->twsk_prot->twsk_slab == NULL)
+ 				goto out_free_timewait_sock_slab_name;
+diff -upr linux-2.6.16.orig/net/core/stream.c linux-2.6.16-026test015/net/core/stream.c
+--- linux-2.6.16.orig/net/core/stream.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/core/stream.c	2006-07-04 14:41:37.000000000 +0400
+@@ -111,8 +111,9 @@ EXPORT_SYMBOL(sk_stream_wait_close);
+  * sk_stream_wait_memory - Wait for more memory for a socket
+  * @sk: socket to wait for memory
+  * @timeo_p: for how long
++ * @amount - amount of memory to wait for (in UB space!)
+  */
+-int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
++int sk_stream_wait_memory(struct sock *sk, long *timeo_p, unsigned long amount)
+ {
+ 	int err = 0;
+ 	long vm_wait = 0;
+@@ -134,8 +135,11 @@ int sk_stream_wait_memory(struct sock *s
+ 		if (signal_pending(current))
+ 			goto do_interrupted;
+ 		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+-		if (sk_stream_memory_free(sk) && !vm_wait)
+-			break;
++		if (amount == 0) {
++			if (sk_stream_memory_free(sk) && !vm_wait)
++				break;
++		} else
++			ub_sock_sndqueueadd_tcp(sk, amount);
+ 
+ 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ 		sk->sk_write_pending++;
+@@ -144,6 +148,8 @@ int sk_stream_wait_memory(struct sock *s
+ 						  sk_stream_memory_free(sk) &&
+ 						  vm_wait);
+ 		sk->sk_write_pending--;
++		if (amount > 0)
++			ub_sock_sndqueuedel(sk);
+ 
+ 		if (vm_wait) {
+ 			vm_wait -= current_timeo;
+diff -upr linux-2.6.16.orig/net/dccp/ipv6.c linux-2.6.16-026test015/net/dccp/ipv6.c
+--- linux-2.6.16.orig/net/dccp/ipv6.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/dccp/ipv6.c	2006-07-04 14:41:37.000000000 +0400
+@@ -872,6 +872,8 @@ static struct sock *dccp_v6_request_recv
+ 	ip6_dst_store(newsk, dst, NULL);
+ 	newsk->sk_route_caps = dst->dev->features &
+ 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++	if (!sysctl_tcp_use_sg)
++		newsk->sk_route_caps &= ~NETIF_F_SG;
+ 
+ 	newdp6 = (struct dccp6_sock *)newsk;
+ 	newinet = inet_sk(newsk);
+diff -upr linux-2.6.16.orig/net/ipv4/af_inet.c linux-2.6.16-026test015/net/ipv4/af_inet.c
+--- linux-2.6.16.orig/net/ipv4/af_inet.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/af_inet.c	2006-07-04 14:41:38.000000000 +0400
+@@ -114,6 +114,7 @@
+ #ifdef CONFIG_IP_MROUTE
+ #include <linux/mroute.h>
+ #endif
++#include <ub/ub_net.h>
+ 
+ DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
+ 
+@@ -298,6 +299,13 @@ lookup_protocol:
+ 	if (sk == NULL)
+ 		goto out;
+ 
++	err = -ENOBUFS;
++	if (ub_sock_charge(sk, PF_INET, sock->type))
++		goto out_sk_free;
++	/* if charge was successful, sock_init_data() MUST be called to
++	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
++	 */
++
+ 	err = 0;
+ 	sk->sk_no_check = answer_no_check;
+ 	if (INET_PROTOSW_REUSE & answer_flags)
+@@ -355,6 +363,9 @@ out:
+ out_rcu_unlock:
+ 	rcu_read_unlock();
+ 	goto out;
++out_sk_free:
++	sk_free(sk);
++	return err;
+ }
+ 
+ 
+@@ -369,6 +380,9 @@ int inet_release(struct socket *sock)
+ 
+ 	if (sk) {
+ 		long timeout;
++		struct ve_struct *saved_env;
++
++		saved_env = set_exec_env(VE_OWNER_SK(sk));
+ 
+ 		/* Applications forget to leave groups before exiting */
+ 		ip_mc_drop_socket(sk);
+@@ -386,6 +400,8 @@ int inet_release(struct socket *sock)
+ 			timeout = sk->sk_lingertime;
+ 		sock->sk = NULL;
+ 		sk->sk_prot->close(sk, timeout);
++
++		(void)set_exec_env(saved_env);
+ 	}
+ 	return 0;
+ }
+@@ -1108,20 +1124,20 @@ static struct net_protocol icmp_protocol
+ 
+ static int __init init_ipv4_mibs(void)
+ {
+-	net_statistics[0] = alloc_percpu(struct linux_mib);
+-	net_statistics[1] = alloc_percpu(struct linux_mib);
+-	ip_statistics[0] = alloc_percpu(struct ipstats_mib);
+-	ip_statistics[1] = alloc_percpu(struct ipstats_mib);
+-	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+-	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+-	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+-	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+-	udp_statistics[0] = alloc_percpu(struct udp_mib);
+-	udp_statistics[1] = alloc_percpu(struct udp_mib);
++	ve_net_statistics[0] = alloc_percpu(struct linux_mib);
++	ve_net_statistics[1] = alloc_percpu(struct linux_mib);
++	ve_ip_statistics[0] = alloc_percpu(struct ipstats_mib);
++	ve_ip_statistics[1] = alloc_percpu(struct ipstats_mib);
++	ve_icmp_statistics[0] = alloc_percpu(struct icmp_mib);
++	ve_icmp_statistics[1] = alloc_percpu(struct icmp_mib);
++	ve_tcp_statistics[0] = alloc_percpu(struct tcp_mib);
++	ve_tcp_statistics[1] = alloc_percpu(struct tcp_mib);
++	ve_udp_statistics[0] = alloc_percpu(struct udp_mib);
++	ve_udp_statistics[1] = alloc_percpu(struct udp_mib);
+ 	if (!
+-	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
+-	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
+-	     && udp_statistics[0] && udp_statistics[1]))
++	    (ve_net_statistics[0] && ve_net_statistics[1] && ve_ip_statistics[0]
++	     && ve_ip_statistics[1] && ve_tcp_statistics[0] && ve_tcp_statistics[1]
++	     && ve_udp_statistics[0] && ve_udp_statistics[1]))
+ 		return -ENOMEM;
+ 
+ 	(void) tcp_mib_init();
+diff -upr linux-2.6.16.orig/net/ipv4/arp.c linux-2.6.16-026test015/net/ipv4/arp.c
+--- linux-2.6.16.orig/net/ipv4/arp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/arp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -175,7 +175,7 @@ struct neigh_ops arp_broken_ops = {
+ 	.queue_xmit =		dev_queue_xmit,
+ };
+ 
+-struct neigh_table arp_tbl = {
++struct neigh_table global_arp_tbl = {
+ 	.family =	AF_INET,
+ 	.entry_size =	sizeof(struct neighbour) + 4,
+ 	.key_len =	4,
+@@ -184,7 +184,7 @@ struct neigh_table arp_tbl = {
+ 	.proxy_redo =	parp_redo,
+ 	.id =		"arp_cache",
+ 	.parms = {
+-		.tbl =			&arp_tbl,
++		.tbl =			&global_arp_tbl,
+ 		.base_reachable_time =	30 * HZ,
+ 		.retrans_time =	1 * HZ,
+ 		.gc_staletime =	60 * HZ,
+@@ -920,6 +920,9 @@ out:
+ 
+ static void parp_redo(struct sk_buff *skb)
+ {
++#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETFILTER_DEBUG)
++	skb->nf_debug = 0;
++#endif
+ 	arp_process(skb);
+ }
+ 
+@@ -988,7 +991,7 @@ static int arp_req_set(struct arpreq *r,
+ 			return 0;
+ 		}
+ 		if (dev == NULL) {
+-			ipv4_devconf.proxy_arp = 1;
++			ve_ipv4_devconf.proxy_arp = 1;
+ 			return 0;
+ 		}
+ 		if (__in_dev_get_rtnl(dev)) {
+@@ -1094,7 +1097,7 @@ static int arp_req_delete(struct arpreq 
+ 			return pneigh_delete(&arp_tbl, &ip, dev);
+ 		if (mask == 0) {
+ 			if (dev == NULL) {
+-				ipv4_devconf.proxy_arp = 0;
++				ve_ipv4_devconf.proxy_arp = 0;
+ 				return 0;
+ 			}
+ 			if (__in_dev_get_rtnl(dev)) {
+@@ -1240,7 +1243,9 @@ static int arp_proc_init(void);
+ 
+ void __init arp_init(void)
+ {
+-	neigh_table_init(&arp_tbl);
++	get_ve0()->ve_arp_tbl = &global_arp_tbl;
++	if (neigh_table_init(&arp_tbl))
++		panic("cannot initialize ARP tables\n");
+ 
+ 	dev_add_pack(&arp_packet_type);
+ 	arp_proc_init();
+@@ -1372,8 +1377,9 @@ static int arp_seq_open(struct inode *in
+ {
+ 	struct seq_file *seq;
+ 	int rc = -ENOMEM;
+-	struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+-       
++	struct neigh_seq_state *s;
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
+ 	if (!s)
+ 		goto out;
+ 
+@@ -1401,7 +1407,7 @@ static struct file_operations arp_seq_fo
+ 
+ static int __init arp_proc_init(void)
+ {
+-	if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops))
++	if (!proc_glob_fops_create("net/arp", S_IRUGO, &arp_seq_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+@@ -1421,8 +1427,55 @@ EXPORT_SYMBOL(arp_rcv);
+ EXPORT_SYMBOL(arp_create);
+ EXPORT_SYMBOL(arp_xmit);
+ EXPORT_SYMBOL(arp_send);
+-EXPORT_SYMBOL(arp_tbl);
++EXPORT_SYMBOL(global_arp_tbl);
+ 
+ #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
+ EXPORT_SYMBOL(clip_tbl_hook);
+ #endif
++
++int ve_arp_init(struct ve_struct *ve)
++{
++	struct ve_struct *old_env;
++	int err;
++
++	ve->ve_arp_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
++	if (ve->ve_arp_tbl == NULL) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	*(ve->ve_arp_tbl) = global_arp_tbl;
++	ve->ve_arp_tbl->parms.tbl = ve->ve_arp_tbl;
++	old_env = set_exec_env(ve);
++	err = neigh_table_init(ve->ve_arp_tbl);
++	if (err)
++		goto out_free;
++#ifdef CONFIG_SYSCTL
++	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
++			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
++#endif
++	set_exec_env(old_env);
++	err = 0;
++
++out:
++	return err;
++
++out_free:
++	kfree(ve->ve_arp_tbl);
++	ve->ve_arp_tbl = NULL;
++	goto out;
++}
++EXPORT_SYMBOL(ve_arp_init);
++
++void ve_arp_fini(struct ve_struct *ve)
++{
++	if (ve->ve_arp_tbl) {
++#ifdef CONFIG_SYSCTL
++		neigh_sysctl_unregister(&ve->ve_arp_tbl->parms);
++#endif
++		neigh_table_clear(ve->ve_arp_tbl);
++		kfree(ve->ve_arp_tbl);
++		ve->ve_arp_tbl = NULL;
++	}
++}
++EXPORT_SYMBOL(ve_arp_fini);
+diff -upr linux-2.6.16.orig/net/ipv4/devinet.c linux-2.6.16-026test015/net/ipv4/devinet.c
+--- linux-2.6.16.orig/net/ipv4/devinet.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/devinet.c	2006-07-04 14:41:39.000000000 +0400
+@@ -71,7 +71,7 @@ struct ipv4_devconf ipv4_devconf = {
+ 	.shared_media =	  1,
+ };
+ 
+-static struct ipv4_devconf ipv4_devconf_dflt = {
++struct ipv4_devconf ipv4_devconf_dflt = {
+ 	.accept_redirects =  1,
+ 	.send_redirects =    1,
+ 	.secure_redirects =  1,
+@@ -79,10 +79,16 @@ static struct ipv4_devconf ipv4_devconf_
+ 	.accept_source_route = 1,
+ };
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv4_devconf_dflt	(*(get_exec_env()->_ipv4_devconf_dflt))
++#else
++#define ve_ipv4_devconf_dflt	ipv4_devconf_dflt
++#endif
++
+ static void rtmsg_ifa(int event, struct in_ifaddr *);
+ 
+ static struct notifier_block *inetaddr_chain;
+-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
++void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ 			 int destroy);
+ #ifdef CONFIG_SYSCTL
+ static void devinet_sysctl_register(struct in_device *in_dev,
+@@ -92,7 +98,7 @@ static void devinet_sysctl_unregister(st
+ 
+ /* Locks all the inet devices. */
+ 
+-static struct in_ifaddr *inet_alloc_ifa(void)
++struct in_ifaddr *inet_alloc_ifa(void)
+ {
+ 	struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
+ 
+@@ -103,6 +109,7 @@ static struct in_ifaddr *inet_alloc_ifa(
+ 
+ 	return ifa;
+ }
++EXPORT_SYMBOL_GPL(inet_alloc_ifa);
+ 
+ static void inet_rcu_free_ifa(struct rcu_head *head)
+ {
+@@ -175,6 +182,7 @@ out_kfree:
+ 	in_dev = NULL;
+ 	goto out;
+ }
++EXPORT_SYMBOL_GPL(inetdev_init);
+ 
+ static void in_dev_rcu_put(struct rcu_head *head)
+ {
+@@ -190,7 +198,7 @@ static void inetdev_destroy(struct in_de
+ 	ASSERT_RTNL();
+ 
+ 	dev = in_dev->dev;
+-	if (dev == &loopback_dev)
++	if (dev == &ve0_loopback)
+ 		return;
+ 
+ 	in_dev->dead = 1;
+@@ -232,7 +240,7 @@ int inet_addr_onlink(struct in_device *i
+ 	return 0;
+ }
+ 
+-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
++void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ 			 int destroy)
+ {
+ 	struct in_ifaddr *promote = NULL;
+@@ -320,7 +328,7 @@ static void inet_del_ifa(struct in_devic
+ 	}
+ }
+ 
+-static int inet_insert_ifa(struct in_ifaddr *ifa)
++int inet_insert_ifa(struct in_ifaddr *ifa)
+ {
+ 	struct in_device *in_dev = ifa->ifa_dev;
+ 	struct in_ifaddr *ifa1, **ifap, **last_primary;
+@@ -370,6 +378,7 @@ static int inet_insert_ifa(struct in_ifa
+ 
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(inet_insert_ifa);
+ 
+ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
+ {
+@@ -578,7 +587,7 @@ int devinet_ioctl(unsigned int cmd, void
+ 
+ 	case SIOCSIFFLAGS:
+ 		ret = -EACCES;
+-		if (!capable(CAP_NET_ADMIN))
++		if (!capable(CAP_VE_NET_ADMIN))
+ 			goto out;
+ 		break;
+ 	case SIOCSIFADDR:	/* Set interface address (and family) */
+@@ -586,7 +595,7 @@ int devinet_ioctl(unsigned int cmd, void
+ 	case SIOCSIFDSTADDR:	/* Set the destination address */
+ 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
+ 		ret = -EACCES;
+-		if (!capable(CAP_NET_ADMIN))
++		if (!capable(CAP_VE_NET_ADMIN))
+ 			goto out;
+ 		ret = -EINVAL;
+ 		if (sin->sin_family != AF_INET)
+@@ -1163,10 +1172,10 @@ static struct rtnetlink_link inet_rtnetl
+ void inet_forward_change(void)
+ {
+ 	struct net_device *dev;
+-	int on = ipv4_devconf.forwarding;
++	int on = ve_ipv4_devconf.forwarding;
+ 
+-	ipv4_devconf.accept_redirects = !on;
+-	ipv4_devconf_dflt.forwarding = on;
++	ve_ipv4_devconf.accept_redirects = !on;
++	ve_ipv4_devconf_dflt.forwarding = on;
+ 
+ 	read_lock(&dev_base_lock);
+ 	for (dev = dev_base; dev; dev = dev->next) {
+@@ -1191,9 +1200,9 @@ static int devinet_sysctl_forward(ctl_ta
+ 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ 
+ 	if (write && *valp != val) {
+-		if (valp == &ipv4_devconf.forwarding)
++		if (valp == &ve_ipv4_devconf.forwarding)
+ 			inet_forward_change();
+-		else if (valp != &ipv4_devconf_dflt.forwarding)
++		else if (valp != &ve_ipv4_devconf_dflt.forwarding)
+ 			rt_cache_flush(0);
+ 	}
+ 
+@@ -1464,30 +1473,22 @@ static struct devinet_sysctl_table {
+ 	},
+ };
+ 
+-static void devinet_sysctl_register(struct in_device *in_dev,
+-				    struct ipv4_devconf *p)
++static struct devinet_sysctl_table *__devinet_sysctl_register(char *dev_name,
++		int ifindex, struct ipv4_devconf *p)
+ {
+ 	int i;
+-	struct net_device *dev = in_dev ? in_dev->dev : NULL;
+-	struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+-	char *dev_name = NULL;
++	struct devinet_sysctl_table *t;
+ 
++	t = kmalloc(sizeof(*t), GFP_KERNEL);
+ 	if (!t)
+-		return;
++		goto out;
++
+ 	memcpy(t, &devinet_sysctl, sizeof(*t));
+ 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+ 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
+ 		t->devinet_vars[i].de = NULL;
+ 	}
+ 
+-	if (dev) {
+-		dev_name = dev->name; 
+-		t->devinet_dev[0].ctl_name = dev->ifindex;
+-	} else {
+-		dev_name = "default";
+-		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+-	}
+-
+ 	/* 
+ 	 * Make a copy of dev_name, because '.procname' is regarded as const 
+ 	 * by sysctl and we wouldn't want anyone to change it under our feet
+@@ -1495,8 +1496,9 @@ static void devinet_sysctl_register(stru
+ 	 */	
+ 	dev_name = kstrdup(dev_name, GFP_KERNEL);
+ 	if (!dev_name)
+-	    goto free;
++	    goto out_free_table;
+ 
++	t->devinet_dev[0].ctl_name    = ifindex;
+ 	t->devinet_dev[0].procname    = dev_name;
+ 	t->devinet_dev[0].child	      = t->devinet_vars;
+ 	t->devinet_dev[0].de	      = NULL;
+@@ -1509,17 +1511,38 @@ static void devinet_sysctl_register(stru
+ 
+ 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
+ 	if (!t->sysctl_header)
+-	    goto free_procname;
++	    goto out_free_procname;
+ 
+-	p->sysctl = t;
+-	return;
++	return t;
+ 
+ 	/* error path */
+- free_procname:
++out_free_procname:
+ 	kfree(dev_name);
+- free:
++out_free_table:
+ 	kfree(t);
+-	return;
++out:
++	printk(KERN_DEBUG "Can't register net/ipv4/conf sysctls.\n");
++	return NULL;
++}
++
++static void devinet_sysctl_register(struct in_device *in_dev,
++				    struct ipv4_devconf *p)
++{
++	struct net_device *dev;
++	char *dev_name;
++	int ifindex;
++
++	dev = in_dev ? in_dev->dev : NULL;
++
++	if (dev) {
++		dev_name = dev->name; 
++		ifindex = dev->ifindex;
++	} else {
++		dev_name = "default";
++		ifindex = NET_PROTO_CONF_DEFAULT;
++	}
++
++	p->sysctl = __devinet_sysctl_register(dev_name, ifindex, p);
+ }
+ 
+ static void devinet_sysctl_unregister(struct ipv4_devconf *p)
+@@ -1532,7 +1555,170 @@ static void devinet_sysctl_unregister(st
+ 		kfree(t);
+ 	}
+ }
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static ctl_table net_sysctl_tables[] = {
++	/* 0: net */
++	{
++		.ctl_name	= CTL_NET,
++		.procname	= "net",
++		.mode		= 0555,
++		.child		= &net_sysctl_tables[2],
++	},
++	{ .ctl_name = 0, },
++	/* 2: net/ipv4 */
++	{
++		.ctl_name	= NET_IPV4,
++		.procname	= "ipv4",
++		.mode		= 0555,
++		.child		= &net_sysctl_tables[4],
++	},
++	{ .ctl_name = 0, },
++	/* 4, 5: net/ipv4/[vars] */
++	{
++		.ctl_name	= NET_IPV4_FORWARD,
++		.procname	= "ip_forward",
++		.data		= &ipv4_devconf.forwarding,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &ipv4_sysctl_forward,
++		.strategy	= &ipv4_sysctl_forward_strategy,
++	},
++	{
++		.ctl_name	= NET_IPV4_ROUTE,
++		.procname	= "route",
++		.maxlen		= 0,
++		.mode		= 0555,
++		.child		= &net_sysctl_tables[7],
++	},
++	{ .ctl_name = 0 },
++	/* 7: net/ipv4/route/flush */
++	{
++		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
++		.procname	= "flush",
++		.data		= NULL, /* setuped below */
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &ipv4_sysctl_rtcache_flush,
++		.strategy	= &ipv4_sysctl_rtcache_flush_strategy,
++	},
++	{ .ctl_name = 0 },
++};
++
++static int ip_forward_sysctl_register(struct ve_struct *ve,
++		struct ipv4_devconf *p)
++{
++	struct ctl_table_header *hdr;
++	ctl_table *root;
++
++	root = clone_sysctl_template(net_sysctl_tables,
++			sizeof(net_sysctl_tables) / sizeof(ctl_table));
++	if (root == NULL)
++		goto out;
++
++	root[4].data = &p->forwarding;
++	root[7].data = &ipv4_flush_delay;
++
++	hdr = register_sysctl_table(root, 1);
++	if (hdr == NULL)
++		goto out_free;
++
++	ve->forward_header = hdr;
++	ve->forward_table = root;
++	return 0;
++
++out_free:
++	free_sysctl_clone(root);
++out:
++	return -ENOMEM;
++}
++
++static inline void ip_forward_sysctl_unregister(struct ve_struct *ve)
++{
++	unregister_sysctl_table(ve->forward_header);
++	ve->forward_header = NULL;
++}
++
++static inline void ip_forward_sysctl_free(struct ve_struct *ve)
++{
++	free_sysctl_clone(ve->forward_table);
++	ve->forward_table = NULL;
++}
++#endif
++#endif
++
++int devinet_sysctl_init(struct ve_struct *ve)
++{
++	int err = 0;
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	struct ipv4_devconf *conf, *conf_def;
++
++	err = -ENOMEM;
++
++	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
++	if (!conf)
++		goto err1;
++
++	memcpy(conf, &ipv4_devconf, sizeof(*conf));
++	conf->sysctl = __devinet_sysctl_register("all",
++			NET_PROTO_CONF_ALL, conf);
++	if (!conf->sysctl)
++		goto err2;
++
++	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
++	if (!conf_def)
++		goto err3;
++
++	memcpy(conf_def, &ipv4_devconf_dflt, sizeof(*conf_def));
++	conf_def->sysctl = __devinet_sysctl_register("default",
++			NET_PROTO_CONF_DEFAULT, conf_def);
++	if (!conf_def->sysctl)
++		goto err4;
++
++	err = ip_forward_sysctl_register(ve, conf);
++	if (err)
++		goto err5;
++
++	ve->_ipv4_devconf = conf;
++	ve->_ipv4_devconf_dflt = conf_def;
++	return 0;
++
++err5:
++	devinet_sysctl_unregister(conf_def);
++err4:
++	kfree(conf_def);
++err3:
++	devinet_sysctl_unregister(conf);
++err2:
++	kfree(conf);
++err1:
+ #endif
++#endif
++	return err;
++}
++
++void devinet_sysctl_fini(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	ip_forward_sysctl_unregister(ve);
++	devinet_sysctl_unregister(ve->_ipv4_devconf);
++	devinet_sysctl_unregister(ve->_ipv4_devconf_dflt);
++#endif
++#endif
++}
++
++void devinet_sysctl_free(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	ip_forward_sysctl_free(ve);
++	kfree(ve->_ipv4_devconf);
++	kfree(ve->_ipv4_devconf_dflt);
++#endif
++#endif
++}
+ 
+ void __init devinet_init(void)
+ {
+@@ -1542,13 +1728,18 @@ void __init devinet_init(void)
+ #ifdef CONFIG_SYSCTL
+ 	devinet_sysctl.sysctl_header =
+ 		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
+-	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
++	__devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
++			&ipv4_devconf_dflt);
+ #endif
+ }
+ 
+ EXPORT_SYMBOL(devinet_ioctl);
+ EXPORT_SYMBOL(in_dev_finish_destroy);
+ EXPORT_SYMBOL(inet_select_addr);
++EXPORT_SYMBOL(inet_del_ifa);
+ EXPORT_SYMBOL(inetdev_by_index);
++EXPORT_SYMBOL(devinet_sysctl_init);
++EXPORT_SYMBOL(devinet_sysctl_fini);
++EXPORT_SYMBOL(devinet_sysctl_free);
+ EXPORT_SYMBOL(register_inetaddr_notifier);
+ EXPORT_SYMBOL(unregister_inetaddr_notifier);
+diff -upr linux-2.6.16.orig/net/ipv4/fib_frontend.c linux-2.6.16-026test015/net/ipv4/fib_frontend.c
+--- linux-2.6.16.orig/net/ipv4/fib_frontend.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/fib_frontend.c	2006-07-04 14:41:39.000000000 +0400
+@@ -53,14 +53,46 @@
+ 
+ #define RT_TABLE_MIN RT_TABLE_MAIN
+ 
++#undef ip_fib_local_table
++#undef ip_fib_main_table
+ struct fib_table *ip_fib_local_table;
+ struct fib_table *ip_fib_main_table;
++void prepare_fib_tables(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->_local_table = ip_fib_local_table;
++	ip_fib_local_table = (struct fib_table *)0x12345678;
++	get_ve0()->_main_table = ip_fib_main_table;
++	ip_fib_main_table = (struct fib_table *)0x12345678;
++#endif
++}
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ip_fib_local_table 	get_exec_env()->_local_table
++#define ip_fib_main_table 	get_exec_env()->_main_table
++#endif
+ 
+ #else
+ 
+ #define RT_TABLE_MIN 1
+ 
++#undef fib_tables
+ struct fib_table *fib_tables[RT_TABLE_MAX+1];
++void prepare_fib_tables(void)
++{
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	int i;
++
++	BUG_ON(sizeof(fib_tables) !=
++		sizeof(((struct ve_struct *)0)->_fib_tables));
++	memcpy(get_ve0()->_fib_tables, fib_tables, sizeof(fib_tables));
++	for (i = 0; i <= RT_TABLE_MAX; i++)
++		fib_tables[i] = (void *)0x12366678;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_tables get_exec_env()->_fib_tables
++#endif
+ 
+ struct fib_table *__fib_new_table(int id)
+ {
+@@ -250,7 +282,7 @@ int ip_rt_ioctl(unsigned int cmd, void _
+ 	switch (cmd) {
+ 	case SIOCADDRT:		/* Add a route */
+ 	case SIOCDELRT:		/* Delete a route */
+-		if (!capable(CAP_NET_ADMIN))
++		if (!capable(CAP_VE_NET_ADMIN))
+ 			return -EPERM;
+ 		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+ 			return -EFAULT;
+@@ -653,6 +685,7 @@ static struct notifier_block fib_netdev_
+ 
+ void __init ip_fib_init(void)
+ {
++	prepare_fib_tables();
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+ 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+ 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
+diff -upr linux-2.6.16.orig/net/ipv4/fib_hash.c linux-2.6.16-026test015/net/ipv4/fib_hash.c
+--- linux-2.6.16.orig/net/ipv4/fib_hash.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/fib_hash.c	2006-07-04 14:41:38.000000000 +0400
+@@ -36,6 +36,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/init.h>
++#include <linux/ve.h>
+ 
+ #include <net/ip.h>
+ #include <net/protocol.h>
+@@ -73,11 +74,6 @@ struct fn_zone {
+  * can be cheaper than memory lookup, so that FZ_* macros are used.
+  */
+ 
+-struct fn_hash {
+-	struct fn_zone	*fn_zones[33];
+-	struct fn_zone	*fn_zone_list;
+-};
+-
+ static inline u32 fn_hash(u32 key, struct fn_zone *fz)
+ {
+ 	u32 h = ntohl(key)>>(32 - fz->fz_order);
+@@ -623,7 +619,7 @@ fn_hash_delete(struct fib_table *tb, str
+ 	return -ESRCH;
+ }
+ 
+-static int fn_flush_list(struct fn_zone *fz, int idx)
++static int fn_flush_list(struct fn_zone *fz, int idx, int destroy)
+ {
+ 	struct hlist_head *head = &fz->fz_hash[idx];
+ 	struct hlist_node *node, *n;
+@@ -638,7 +634,9 @@ static int fn_flush_list(struct fn_zone 
+ 		list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
+ 			struct fib_info *fi = fa->fa_info;
+ 
+-			if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
++			if (fi == NULL)
++				continue;
++			if (destroy || (fi->fib_flags&RTNH_F_DEAD)) {
+ 				write_lock_bh(&fib_hash_lock);
+ 				list_del(&fa->fa_list);
+ 				if (list_empty(&f->fn_alias)) {
+@@ -660,7 +658,7 @@ static int fn_flush_list(struct fn_zone 
+ 	return found;
+ }
+ 
+-static int fn_hash_flush(struct fib_table *tb)
++static int __fn_hash_flush(struct fib_table *tb, int destroy)
+ {
+ 	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+ 	struct fn_zone *fz;
+@@ -670,11 +668,84 @@ static int fn_hash_flush(struct fib_tabl
+ 		int i;
+ 
+ 		for (i = fz->fz_divisor - 1; i >= 0; i--)
+-			found += fn_flush_list(fz, i);
++			found += fn_flush_list(fz, i, destroy);
+ 	}
+ 	return found;
+ }
+ 
++static int fn_hash_flush(struct fib_table *tb)
++{
++	return __fn_hash_flush(tb, 0);
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++void fib_hash_destroy(struct fib_table *tb)
++{
++	__fn_hash_flush(tb, 1);
++	kfree(tb);
++}
++
++/*
++ * Initialization of virtualized networking subsystem.
++ */
++int init_ve_route(struct ve_struct *ve)
++{
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++	if (fib_rules_create())
++		return -ENOMEM;
++	ve->_fib_tables[RT_TABLE_LOCAL] = fib_hash_init(RT_TABLE_LOCAL);
++	if (!ve->_fib_tables[RT_TABLE_LOCAL])
++		goto out_destroy;
++	ve->_fib_tables[RT_TABLE_MAIN] = fib_hash_init(RT_TABLE_MAIN);
++	if (!ve->_fib_tables[RT_TABLE_MAIN])
++		goto out_destroy_local;
++
++	return 0;
++
++out_destroy_local:
++	fib_hash_destroy(ve->_fib_tables[RT_TABLE_LOCAL]);
++out_destroy:
++	fib_rules_destroy();
++	ve->_local_rule = NULL;
++	return -ENOMEM;
++#else
++	ve->_local_table = fib_hash_init(RT_TABLE_LOCAL);
++	if (!ve->_local_table)
++		return -ENOMEM;
++	ve->_main_table = fib_hash_init(RT_TABLE_MAIN);
++	if (!ve->_main_table) {
++		fib_hash_destroy(ve->_local_table);
++		return -ENOMEM;
++	}
++	return 0;
++#endif
++}
++
++void fini_ve_route(struct ve_struct *ve)
++{
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++	int i;
++	for (i=0; i<RT_TABLE_MAX+1; i++)
++	{
++		if (!ve->_fib_tables[i])
++			continue;
++		fib_hash_destroy(ve->_fib_tables[i]);
++	}
++	fib_rules_destroy();
++	ve->_local_rule = NULL;
++#else
++	fib_hash_destroy(ve->_local_table);
++	fib_hash_destroy(ve->_main_table);
++#endif
++	fib_hash_free(ve->_fib_info_hash, ve->_fib_hash_size);
++	fib_hash_free(ve->_fib_info_laddrhash, ve->_fib_hash_size);
++	ve->_fib_info_hash = ve->_fib_info_laddrhash = NULL;
++}
++
++EXPORT_SYMBOL(init_ve_route);
++EXPORT_SYMBOL(fini_ve_route);
++#endif
++
+ 
+ static inline int
+ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
+@@ -766,7 +837,7 @@ static int fn_hash_dump(struct fib_table
+ 	return skb->len;
+ }
+ 
+-#ifdef CONFIG_IP_MULTIPLE_TABLES
++#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+ struct fib_table * fib_hash_init(int id)
+ #else
+ struct fib_table * __init fib_hash_init(int id)
+@@ -1076,13 +1147,13 @@ static struct file_operations fib_seq_fo
+ 
+ int __init fib_proc_init(void)
+ {
+-	if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
++	if (!proc_glob_fops_create("net/route", S_IRUGO, &fib_seq_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+ 
+ void __init fib_proc_exit(void)
+ {
+-	proc_net_remove("route");
++	remove_proc_glob_entry("net/route", NULL);
+ }
+ #endif /* CONFIG_PROC_FS */
+diff -upr linux-2.6.16.orig/net/ipv4/fib_lookup.h linux-2.6.16-026test015/net/ipv4/fib_lookup.h
+--- linux-2.6.16.orig/net/ipv4/fib_lookup.h	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/fib_lookup.h	2006-07-04 14:41:38.000000000 +0400
+@@ -41,5 +41,6 @@ extern struct fib_alias *fib_find_alias(
+ extern int fib_detect_death(struct fib_info *fi, int order,
+ 			    struct fib_info **last_resort,
+ 			    int *last_idx, int *dflt);
++void fib_hash_free(struct hlist_head *hash, int bytes);
+ 
+ #endif /* _FIB_LOOKUP_H */
+diff -upr linux-2.6.16.orig/net/ipv4/fib_rules.c linux-2.6.16-026test015/net/ipv4/fib_rules.c
+--- linux-2.6.16.orig/net/ipv4/fib_rules.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/fib_rules.c	2006-07-04 14:41:39.000000000 +0400
+@@ -39,6 +39,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
++#include <linux/rtnetlink.h>
+ #include <linux/init.h>
+ 
+ #include <net/ip.h>
+@@ -99,9 +100,89 @@ static struct fib_rule local_rule = {
+ 	.r_action =	RTN_UNICAST,
+ };
+ 
+-static struct fib_rule *fib_rules = &local_rule;
+ static DEFINE_RWLOCK(fib_rules_lock);
+ 
++void __init prepare_fib_rules(void)
++{
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	get_ve0()->_local_rule = &local_rule;
++	get_ve0()->_fib_rules = &local_rule;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define local_rule (*(get_exec_env()->_local_rule))
++#define fib_rules (get_exec_env()->_fib_rules)
++#else
++static struct fib_rule *fib_rules = &local_rule;
++#endif
++
++#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
++int fib_rules_create()
++{
++	struct fib_rule *default_rule, *main_rule, *loc_rule;
++
++	default_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++	if (default_rule == NULL)
++		goto out_def;
++	memset(default_rule, 0, sizeof(struct fib_rule));
++	atomic_set(&default_rule->r_clntref, 1);
++	default_rule->r_preference = 0x7FFF;
++	default_rule->r_table = RT_TABLE_DEFAULT;
++	default_rule->r_action = RTN_UNICAST;
++
++	main_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++	if (main_rule == NULL)
++		goto out_main;
++	memset(main_rule, 0, sizeof(struct fib_rule));
++	atomic_set(&main_rule->r_clntref, 1);
++	main_rule->r_preference = 0x7FFE;
++	main_rule->r_table = RT_TABLE_MAIN;
++	main_rule->r_action = RTN_UNICAST;
++	main_rule->r_next = default_rule;
++
++	loc_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++	if (loc_rule == NULL)
++		goto out_loc;
++	memset(loc_rule, 0, sizeof(struct fib_rule));
++	atomic_set(&loc_rule->r_clntref, 1);
++	loc_rule->r_preference = 0;
++	loc_rule->r_table = RT_TABLE_LOCAL;
++	loc_rule->r_action = RTN_UNICAST;
++	loc_rule->r_next = main_rule;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	get_exec_env()->_local_rule = loc_rule;
++	get_exec_env()->_fib_rules = loc_rule;
++#endif
++
++	return 0;
++
++out_loc:
++	kfree(main_rule);
++out_main:
++	kfree(default_rule);
++out_def:
++	return -1;
++}
++
++void fib_rules_destroy()
++{
++	struct fib_rule *r;
++
++	rtnl_lock();
++	write_lock_bh(&fib_rules_lock);
++	while(fib_rules != NULL) {
++		r = fib_rules;
++		fib_rules = fib_rules->r_next;
++		r->r_dead = 1;
++		fib_rule_put(r);
++	}
++	write_unlock_bh(&fib_rules_lock);
++	rtnl_unlock();
++}
++#endif
++
+ int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
+ 	struct rtattr **rta = arg;
+@@ -435,5 +516,6 @@ int inet_dump_rules(struct sk_buff *skb,
+ 
+ void __init fib_rules_init(void)
+ {
++	prepare_fib_rules();
+ 	register_netdevice_notifier(&fib_rules_notifier);
+ }
+diff -upr linux-2.6.16.orig/net/ipv4/fib_semantics.c linux-2.6.16-026test015/net/ipv4/fib_semantics.c
+--- linux-2.6.16.orig/net/ipv4/fib_semantics.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/fib_semantics.c	2006-07-04 14:41:39.000000000 +0400
+@@ -33,6 +33,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/init.h>
+@@ -56,6 +57,24 @@ static struct hlist_head *fib_info_laddr
+ static unsigned int fib_hash_size;
+ static unsigned int fib_info_cnt;
+ 
++void prepare_fib_info(void)
++{
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	get_ve0()->_fib_info_hash = fib_info_hash;
++	get_ve0()->_fib_info_laddrhash = fib_info_laddrhash;
++	get_ve0()->_fib_hash_size = fib_hash_size;
++	get_ve0()->_fib_info_cnt = fib_info_cnt;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_info_hash (get_exec_env()->_fib_info_hash)
++#define fib_info_laddrhash (get_exec_env()->_fib_info_laddrhash)
++#define fib_hash_size (get_exec_env()->_fib_hash_size)
++#define fib_info_cnt (get_exec_env()->_fib_info_cnt)
++#endif
++
++
+ #define DEVINDEX_HASHBITS 8
+ #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
+ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
+@@ -235,13 +254,15 @@ static struct fib_info *fib_find_info(co
+ 	return NULL;
+ }
+ 
+-static inline unsigned int fib_devindex_hashfn(unsigned int val)
++static inline unsigned int fib_devindex_hashfn(unsigned int val,
++		envid_t veid)
+ {
+ 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
+ 
+ 	return (val ^
+ 		(val >> DEVINDEX_HASHBITS) ^
+-		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
++		(val >> (DEVINDEX_HASHBITS * 2)) ^
++		(veid ^ (veid >> 16))) & mask;
+ }
+ 
+ /* Check, that the gateway is already configured.
+@@ -257,7 +278,7 @@ int ip_fib_check_default(u32 gw, struct 
+ 
+ 	read_lock(&fib_info_lock);
+ 
+-	hash = fib_devindex_hashfn(dev->ifindex);
++	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
+ 	head = &fib_info_devhash[hash];
+ 	hlist_for_each_entry(nh, node, head, nh_hash) {
+ 		if (nh->nh_dev == dev &&
+@@ -580,7 +601,7 @@ static struct hlist_head *fib_hash_alloc
+ 			__get_free_pages(GFP_KERNEL, get_order(bytes));
+ }
+ 
+-static void fib_hash_free(struct hlist_head *hash, int bytes)
++void fib_hash_free(struct hlist_head *hash, int bytes)
+ {
+ 	if (!hash)
+ 		return;
+@@ -837,7 +858,8 @@ link_it:
+ 
+ 		if (!nh->nh_dev)
+ 			continue;
+-		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
++		hash = fib_devindex_hashfn(nh->nh_dev->ifindex,
++				VEID(nh->nh_dev->owner_env));
+ 		head = &fib_info_devhash[hash];
+ 		hlist_add_head(&nh->nh_hash, head);
+ 	} endfor_nexthops(fi)
+@@ -1184,7 +1206,8 @@ int fib_sync_down(u32 local, struct net_
+ 
+ 	if (dev) {
+ 		struct fib_info *prev_fi = NULL;
+-		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
++		unsigned int hash = fib_devindex_hashfn(dev->ifindex,
++				VEID(dev->owner_env));
+ 		struct hlist_head *head = &fib_info_devhash[hash];
+ 		struct hlist_node *node;
+ 		struct fib_nh *nh;
+@@ -1249,7 +1272,7 @@ int fib_sync_up(struct net_device *dev)
+ 		return 0;
+ 
+ 	prev_fi = NULL;
+-	hash = fib_devindex_hashfn(dev->ifindex);
++	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
+ 	head = &fib_info_devhash[hash];
+ 	ret = 0;
+ 
+diff -upr linux-2.6.16.orig/net/ipv4/fib_trie.c linux-2.6.16-026test015/net/ipv4/fib_trie.c
+--- linux-2.6.16.orig/net/ipv4/fib_trie.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/fib_trie.c	2006-07-04 14:41:36.000000000 +0400
+@@ -314,11 +314,6 @@ static void __leaf_free_rcu(struct rcu_h
+ 	kfree(container_of(head, struct leaf, rcu));
+ }
+ 
+-static inline void free_leaf(struct leaf *leaf)
+-{
+-	call_rcu(&leaf->rcu, __leaf_free_rcu);
+-}
+-
+ static void __leaf_info_free_rcu(struct rcu_head *head)
+ {
+ 	kfree(container_of(head, struct leaf_info, rcu));
+@@ -357,7 +352,12 @@ static void __tnode_free_rcu(struct rcu_
+ 
+ static inline void tnode_free(struct tnode *tn)
+ {
+-	call_rcu(&tn->rcu, __tnode_free_rcu);
++	if(IS_LEAF(tn)) {
++		struct leaf *l = (struct leaf *) tn;
++		call_rcu_bh(&l->rcu, __leaf_free_rcu);
++	}
++        else
++		call_rcu(&tn->rcu, __tnode_free_rcu);
+ }
+ 
+ static struct leaf *leaf_new(void)
+diff -upr linux-2.6.16.orig/net/ipv4/igmp.c linux-2.6.16-026test015/net/ipv4/igmp.c
+--- linux-2.6.16.orig/net/ipv4/igmp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/igmp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -2262,6 +2262,8 @@ static inline struct ip_mc_list *igmp_mc
+ 	     state->dev; 
+ 	     state->dev = state->dev->next) {
+ 		struct in_device *in_dev;
++		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++			continue;
+ 		in_dev = in_dev_get(state->dev);
+ 		if (!in_dev)
+ 			continue;
+@@ -2291,6 +2293,8 @@ static struct ip_mc_list *igmp_mc_get_ne
+ 			state->in_dev = NULL;
+ 			break;
+ 		}
++		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++			continue;
+ 		state->in_dev = in_dev_get(state->dev);
+ 		if (!state->in_dev)
+ 			continue;
+@@ -2425,6 +2429,8 @@ static inline struct ip_sf_list *igmp_mc
+ 	     state->dev; 
+ 	     state->dev = state->dev->next) {
+ 		struct in_device *idev;
++		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++			continue;
+ 		idev = in_dev_get(state->dev);
+ 		if (unlikely(idev == NULL))
+ 			continue;
+@@ -2464,6 +2470,8 @@ static struct ip_sf_list *igmp_mcf_get_n
+ 				state->idev = NULL;
+ 				goto out;
+ 			}
++			if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++				continue;
+ 			state->idev = in_dev_get(state->dev);
+ 			if (!state->idev)
+ 				continue;
+@@ -2584,8 +2592,8 @@ static struct file_operations igmp_mcf_s
+ 
+ int __init igmp_mc_proc_init(void)
+ {
+-	proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops);
+-	proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
++	proc_glob_fops_create("net/igmp", S_IRUGO, &igmp_mc_seq_fops);
++	proc_glob_fops_create("net/mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+ 	return 0;
+ }
+ #endif
+diff -upr linux-2.6.16.orig/net/ipv4/inet_connection_sock.c linux-2.6.16-026test015/net/ipv4/inet_connection_sock.c
+--- linux-2.6.16.orig/net/ipv4/inet_connection_sock.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/inet_connection_sock.c	2006-07-04 14:41:38.000000000 +0400
+@@ -25,6 +25,9 @@
+ #include <net/tcp_states.h>
+ #include <net/xfrm.h>
+ 
++#include <ub/ub_net.h>
++#include <ub/ub_orphan.h>
++
+ #ifdef INET_CSK_DEBUG
+ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
+ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
+@@ -48,6 +51,7 @@ int inet_csk_bind_conflict(const struct 
+ 	sk_for_each_bound(sk2, node, &tb->owners) {
+ 		if (sk != sk2 &&
+ 		    !inet_v6_ipv6only(sk2) &&
++		    !ve_accessible_strict(VE_OWNER_SK(sk), VE_OWNER_SK(sk2)) &&
+ 		    (!sk->sk_bound_dev_if ||
+ 		     !sk2->sk_bound_dev_if ||
+ 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+@@ -77,7 +81,9 @@ int inet_csk_get_port(struct inet_hashin
+ 	struct hlist_node *node;
+ 	struct inet_bind_bucket *tb;
+ 	int ret;
++	struct ve_struct *env;
+ 
++	env = VE_OWNER_SK(sk);
+ 	local_bh_disable();
+ 	if (!snum) {
+ 		int low = sysctl_local_port_range[0];
+@@ -86,11 +92,15 @@ int inet_csk_get_port(struct inet_hashin
+ 		int rover = net_random() % (high - low) + low;
+ 
+ 		do {
+-			head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
++			head = &hashinfo->bhash[inet_bhashfn(rover,
++					hashinfo->bhash_size, VEID(env))];
+ 			spin_lock(&head->lock);
+-			inet_bind_bucket_for_each(tb, node, &head->chain)
++			inet_bind_bucket_for_each(tb, node, &head->chain) {
++				if (!ve_accessible_strict(VE_OWNER_TB(tb),env))
++					continue;
+ 				if (tb->port == rover)
+ 					goto next;
++			}
+ 			break;
+ 		next:
+ 			spin_unlock(&head->lock);
+@@ -113,11 +123,15 @@ int inet_csk_get_port(struct inet_hashin
+ 		 */
+ 		snum = rover;
+ 	} else {
+-		head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
++		head = &hashinfo->bhash[inet_bhashfn(snum,
++				hashinfo->bhash_size, VEID(env))];
+ 		spin_lock(&head->lock);
+-		inet_bind_bucket_for_each(tb, node, &head->chain)
++		inet_bind_bucket_for_each(tb, node, &head->chain) {
++			if (!ve_accessible_strict(VE_OWNER_TB(tb), env))
++				continue;
+ 			if (tb->port == snum)
+ 				goto tb_found;
++		}
+ 	}
+ 	tb = NULL;
+ 	goto tb_not_found;
+@@ -136,7 +150,7 @@ tb_found:
+ 	}
+ tb_not_found:
+ 	ret = 1;
+-	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
++	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum, env)) == NULL)
+ 		goto fail_unlock;
+ 	if (hlist_empty(&tb->owners)) {
+ 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+@@ -541,7 +555,7 @@ void inet_csk_destroy_sock(struct sock *
+ 
+ 	sk_refcnt_debug_release(sk);
+ 
+-	atomic_dec(sk->sk_prot->orphan_count);
++	ub_dec_orphan_count(sk);
+ 	sock_put(sk);
+ }
+ 
+@@ -621,7 +635,7 @@ void inet_csk_listen_stop(struct sock *s
+ 
+ 		sock_orphan(child);
+ 
+-		atomic_inc(sk->sk_prot->orphan_count);
++		ub_inc_orphan_count(sk);
+ 
+ 		inet_csk_destroy_sock(child);
+ 
+diff -upr linux-2.6.16.orig/net/ipv4/inet_diag.c linux-2.6.16-026test015/net/ipv4/inet_diag.c
+--- linux-2.6.16.orig/net/ipv4/inet_diag.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/inet_diag.c	2006-07-04 14:41:38.000000000 +0400
+@@ -673,7 +673,9 @@ static int inet_diag_dump(struct sk_buff
+ 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+ 	const struct inet_diag_handler *handler;
+ 	struct inet_hashinfo *hashinfo;
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	handler = inet_diag_table[cb->nlh->nlmsg_type];
+ 	BUG_ON(handler == NULL);
+ 	hashinfo = handler->idiag_hashinfo;
+@@ -694,6 +696,8 @@ static int inet_diag_dump(struct sk_buff
+ 			sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
+ 				struct inet_sock *inet = inet_sk(sk);
+ 
++				if (!ve_accessible(VE_OWNER_SK(sk), ve))
++					continue;
+ 				if (num < s_num) {
+ 					num++;
+ 					continue;
+@@ -754,6 +758,8 @@ skip_listen_ht:
+ 		sk_for_each(sk, node, &head->chain) {
+ 			struct inet_sock *inet = inet_sk(sk);
+ 
++			if (!ve_accessible(VE_OWNER_SK(sk), ve))
++				continue;
+ 			if (num < s_num)
+ 				goto next_normal;
+ 			if (!(r->idiag_states & (1 << sk->sk_state)))
+@@ -778,6 +784,8 @@ next_normal:
+ 			inet_twsk_for_each(tw, node,
+ 				    &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
+ 
++				if (!ve_accessible_veid(inet_twsk(sk)->tw_owner_env, VEID(ve)))
++					continue;
+ 				if (num < s_num)
+ 					goto next_dying;
+ 				if (r->id.idiag_sport != tw->tw_sport &&
+diff -upr linux-2.6.16.orig/net/ipv4/inet_hashtables.c linux-2.6.16-026test015/net/ipv4/inet_hashtables.c
+--- linux-2.6.16.orig/net/ipv4/inet_hashtables.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/inet_hashtables.c	2006-07-04 14:41:38.000000000 +0400
+@@ -30,7 +30,8 @@
+  */
+ struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
+ 						 struct inet_bind_hashbucket *head,
+-						 const unsigned short snum)
++						 const unsigned short snum,
++						 struct ve_struct *ve)
+ {
+ 	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC);
+ 
+@@ -38,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucke
+ 		tb->port      = snum;
+ 		tb->fastreuse = 0;
+ 		INIT_HLIST_HEAD(&tb->owners);
++		SET_VE_OWNER_TB(tb, ve);
+ 		hlist_add_head(&tb->node, &head->chain);
+ 	}
+ 	return tb;
+@@ -71,10 +73,13 @@ EXPORT_SYMBOL(inet_bind_hash);
+  */
+ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
+ {
+-	const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
+-	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
++	int bhash;
++	struct inet_bind_hashbucket *head;
+ 	struct inet_bind_bucket *tb;
+ 
++	bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size,
++			VEID(VE_OWNER_SK(sk)));
++	head = &hashinfo->bhash[bhash];
+ 	spin_lock(&head->lock);
+ 	tb = inet_csk(sk)->icsk_bind_hash;
+ 	__sk_del_bind_node(sk);
+@@ -130,7 +135,8 @@ EXPORT_SYMBOL(inet_listen_wlock);
+  * wildcarded during the search since they can never be otherwise.
+  */
+ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
+-				    const unsigned short hnum, const int dif)
++				    const unsigned short hnum, const int dif,
++				    struct ve_struct *env)
+ {
+ 	struct sock *result = NULL, *sk;
+ 	const struct hlist_node *node;
+@@ -139,6 +145,8 @@ struct sock *__inet_lookup_listener(cons
+ 	sk_for_each(sk, node, head) {
+ 		const struct inet_sock *inet = inet_sk(sk);
+ 
++		if (!ve_accessible_strict(VE_OWNER_SK(sk), env))
++			continue;
+ 		if (inet->num == hnum && !ipv6_only_sock(sk)) {
+ 			const __u32 rcv_saddr = inet->rcv_saddr;
+ 			int score = sk->sk_family == PF_INET ? 1 : 0;
+@@ -169,7 +177,8 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener
+ /* called with local bh disabled */
+ static int __inet_check_established(struct inet_timewait_death_row *death_row,
+ 				    struct sock *sk, __u16 lport,
+-				    struct inet_timewait_sock **twp)
++				    struct inet_timewait_sock **twp,
++				    struct ve_struct *ve)
+ {
+ 	struct inet_hashinfo *hinfo = death_row->hashinfo;
+ 	struct inet_sock *inet = inet_sk(sk);
+@@ -178,12 +187,15 @@ static int __inet_check_established(stru
+ 	int dif = sk->sk_bound_dev_if;
+ 	INET_ADDR_COOKIE(acookie, saddr, daddr)
+ 	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+-	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
++	unsigned int hash;
++	struct inet_ehash_bucket *head;
+ 	struct sock *sk2;
+ 	const struct hlist_node *node;
+ 	struct inet_timewait_sock *tw;
+ 
++	hash = inet_ehashfn(daddr, lport, saddr, inet->dport, VEID(ve));
++	head = inet_ehash_bucket(hinfo, hash);
++
+ 	prefetch(head->chain.first);
+ 	write_lock(&head->lock);
+ 
+@@ -191,7 +203,8 @@ static int __inet_check_established(stru
+ 	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+ 		tw = inet_twsk(sk2);
+ 
+-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
++		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr,
++					ports, dif, ve)) {
+ 			if (twsk_unique(sk, sk2, twp))
+ 				goto unique;
+ 			else
+@@ -202,7 +215,8 @@ static int __inet_check_established(stru
+ 
+ 	/* And established part... */
+ 	sk_for_each(sk2, node, &head->chain) {
+-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
++		if (INET_MATCH(sk2, hash, acookie, saddr, daddr,
++					ports, dif, ve))
+ 			goto not_unique;
+ 	}
+ 
+@@ -253,7 +267,9 @@ int inet_hash_connect(struct inet_timewa
+  	struct inet_bind_hashbucket *head;
+  	struct inet_bind_bucket *tb;
+ 	int ret;
++	struct ve_struct *ve;
+ 
++	ve = VE_OWNER_SK(sk);
+  	if (!snum) {
+  		int low = sysctl_local_port_range[0];
+  		int high = sysctl_local_port_range[1];
+@@ -268,7 +284,8 @@ int inet_hash_connect(struct inet_timewa
+  		local_bh_disable();
+ 		for (i = 1; i <= range; i++) {
+ 			port = low + (i + offset) % range;
+- 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
++ 			head = &hinfo->bhash[inet_bhashfn(port,
++					hinfo->bhash_size, VEID(ve))];
+  			spin_lock(&head->lock);
+ 
+  			/* Does not bother with rcv_saddr checks,
+@@ -282,13 +299,14 @@ int inet_hash_connect(struct inet_timewa
+  						goto next_port;
+  					if (!__inet_check_established(death_row,
+ 								      sk, port,
+-								      &tw))
++								      &tw, ve))
+  						goto ok;
+  					goto next_port;
+  				}
+  			}
+ 
+- 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
++ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
++					head, port, ve);
+  			if (!tb) {
+  				spin_unlock(&head->lock);
+  				break;
+@@ -323,7 +341,7 @@ ok:
+ 		goto out;
+  	}
+ 
+- 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
++ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
+  	tb  = inet_csk(sk)->icsk_bind_hash;
+ 	spin_lock_bh(&head->lock);
+ 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+@@ -333,7 +351,7 @@ ok:
+ 	} else {
+ 		spin_unlock(&head->lock);
+ 		/* No definite answer... Walk to established hash table */
+-		ret = __inet_check_established(death_row, sk, snum, NULL);
++		ret = __inet_check_established(death_row, sk, snum, NULL, ve);
+ out:
+ 		local_bh_enable();
+ 		return ret;
+diff -upr linux-2.6.16.orig/net/ipv4/inet_timewait_sock.c linux-2.6.16-026test015/net/ipv4/inet_timewait_sock.c
+--- linux-2.6.16.orig/net/ipv4/inet_timewait_sock.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/inet_timewait_sock.c	2006-07-04 14:41:38.000000000 +0400
+@@ -32,7 +32,8 @@ void __inet_twsk_kill(struct inet_timewa
+ 	write_unlock(&ehead->lock);
+ 
+ 	/* Disassociate with bind bucket. */
+-	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
++	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num,
++			hashinfo->bhash_size, tw->tw_owner_env)];
+ 	spin_lock(&bhead->lock);
+ 	tb = tw->tw_tb;
+ 	__hlist_del(&tw->tw_bind_node);
+@@ -66,7 +67,8 @@ void __inet_twsk_hashdance(struct inet_t
+ 	   Note, that any socket with inet->num != 0 MUST be bound in
+ 	   binding cache, even if it is closed.
+ 	 */
+-	bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
++	bhead = &hashinfo->bhash[inet_bhashfn(inet->num,
++			hashinfo->bhash_size, tw->tw_owner_env)];
+ 	spin_lock(&bhead->lock);
+ 	tw->tw_tb = icsk->icsk_bind_hash;
+ 	BUG_TRAP(icsk->icsk_bind_hash);
+@@ -90,9 +92,14 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance)
+ 
+ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
+ {
+-	struct inet_timewait_sock *tw =
+-		kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+-				 SLAB_ATOMIC);
++	struct user_beancounter *ub;
++	struct inet_timewait_sock *tw;
++
++	ub = set_exec_ub(sock_bc(sk)->ub);
++	tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
++			SLAB_ATOMIC);
++	(void)set_exec_ub(ub);
++
+ 	if (tw != NULL) {
+ 		const struct inet_sock *inet = inet_sk(sk);
+ 
+diff -upr linux-2.6.16.orig/net/ipv4/ip_forward.c linux-2.6.16-026test015/net/ipv4/ip_forward.c
+--- linux-2.6.16.orig/net/ipv4/ip_forward.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/ip_forward.c	2006-07-04 14:41:38.000000000 +0400
+@@ -87,6 +87,24 @@ int ip_forward(struct sk_buff *skb)
+ 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+ 		goto sr_failed;
+ 
++	/*
++	 * We try to optimize forwarding of VE packets:
++	 * do not decrement TTL (and so save skb_cow)
++	 * during forwarding of outgoing pkts from VE.
++	 * For incoming pkts we still do ttl decr,
++	 * since such skb is not cloned and does not require
++	 * actual cow. So, there is at least one place
++	 * in pkts path with mandatory ttl decr, that is
++	 * sufficient to prevent routing loops.
++	 */
++	iph = skb->nh.iph;
++	if (
++#ifdef CONFIG_IP_ROUTE_NAT			
++	    (rt->rt_flags & RTCF_NAT) == 0 &&	  /* no NAT mangling expected */
++#endif						  /* and */
++	    (skb->dev->features & NETIF_F_VENET)) /* src is VENET device */
++		goto no_ttl_decr;
++
+ 	/* We are about to mangle packet. Copy it! */
+ 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+ 		goto drop;
+@@ -95,6 +113,8 @@ int ip_forward(struct sk_buff *skb)
+ 	/* Decrease ttl after skb cow done */
+ 	ip_decrease_ttl(iph);
+ 
++no_ttl_decr:
++
+ 	/*
+ 	 *	We now generate an ICMP HOST REDIRECT giving the route
+ 	 *	we calculated.
+diff -upr linux-2.6.16.orig/net/ipv4/ip_fragment.c linux-2.6.16-026test015/net/ipv4/ip_fragment.c
+--- linux-2.6.16.orig/net/ipv4/ip_fragment.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/ip_fragment.c	2006-07-04 14:41:38.000000000 +0400
+@@ -44,6 +44,7 @@
+ #include <linux/udp.h>
+ #include <linux/inet.h>
+ #include <linux/netfilter_ipv4.h>
++#include <linux/ve_owner.h>
+ 
+ /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
+  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
+@@ -97,8 +98,12 @@ struct ipq {
+ 	int             iif;
+ 	unsigned int    rid;
+ 	struct inet_peer *peer;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(IPQ, struct ipq, owner_env)
++DCL_VE_OWNER(IPQ, struct ipq, owner_env)
++
+ /* Hash table. */
+ 
+ #define IPQ_HASHSZ	64
+@@ -182,7 +187,8 @@ static __inline__ void frag_free_queue(s
+ 
+ static __inline__ struct ipq *frag_alloc_queue(void)
+ {
+-	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
++	struct ipq *qp = kmalloc(sizeof(struct ipq) + sizeof(void *),
++				GFP_ATOMIC);
+ 
+ 	if(!qp)
+ 		return NULL;
+@@ -278,6 +284,9 @@ static void ip_evictor(void)
+ static void ip_expire(unsigned long arg)
+ {
+ 	struct ipq *qp = (struct ipq *) arg;
++	struct ve_struct *envid;
++
++	envid = set_exec_env(VE_OWNER_IPQ(qp));
+ 
+ 	spin_lock(&qp->lock);
+ 
+@@ -300,6 +309,8 @@ static void ip_expire(unsigned long arg)
+ out:
+ 	spin_unlock(&qp->lock);
+ 	ipq_put(qp, NULL);
++
++	(void)set_exec_env(envid);
+ }
+ 
+ /* Creation primitives. */
+@@ -321,7 +332,8 @@ static struct ipq *ip_frag_intern(unsign
+ 		   qp->saddr == qp_in->saddr	&&
+ 		   qp->daddr == qp_in->daddr	&&
+ 		   qp->protocol == qp_in->protocol &&
+-		   qp->user == qp_in->user) {
++		   qp->user == qp_in->user	&&
++		   qp->owner_env == get_exec_env()) {
+ 			atomic_inc(&qp->refcnt);
+ 			write_unlock(&ipfrag_lock);
+ 			qp_in->last_in |= COMPLETE;
+@@ -371,6 +383,8 @@ static struct ipq *ip_frag_create(unsign
+ 	spin_lock_init(&qp->lock);
+ 	atomic_set(&qp->refcnt, 1);
+ 
++	SET_VE_OWNER_IPQ(qp, get_exec_env());
++
+ 	return ip_frag_intern(hash, qp);
+ 
+ out_nomem:
+@@ -397,7 +411,8 @@ static inline struct ipq *ip_find(struct
+ 		   qp->saddr == saddr	&&
+ 		   qp->daddr == daddr	&&
+ 		   qp->protocol == protocol &&
+-		   qp->user == user) {
++		   qp->user == user	&&
++		   qp->owner_env == get_exec_env()) {
+ 			atomic_inc(&qp->refcnt);
+ 			read_unlock(&ipfrag_lock);
+ 			return qp;
+@@ -719,6 +734,9 @@ struct sk_buff *ip_defrag(struct sk_buff
+ 		    qp->meat == qp->len)
+ 			ret = ip_frag_reasm(qp, dev);
+ 
++		if (ret)
++			SET_VE_OWNER_SKB(ret, VE_OWNER_SKB(skb));
++
+ 		spin_unlock(&qp->lock);
+ 		ipq_put(qp, NULL);
+ 		return ret;
+@@ -729,6 +747,51 @@ struct sk_buff *ip_defrag(struct sk_buff
+ 	return NULL;
+ }
+ 
++#ifdef CONFIG_VE
++/* XXX */
++void ip_fragment_cleanup(struct ve_struct *envid)
++{
++	int i, progress;
++
++	/* All operations with fragment queues are performed from NET_RX/TX
++	 * soft interrupts or from timer context.  --Den */
++	local_bh_disable();
++	do {
++		progress = 0;
++		for (i = 0; i < IPQ_HASHSZ; i++) {
++			struct ipq *qp;
++			struct hlist_node *p, *n;
++
++			if (hlist_empty(&ipq_hash[i]))
++				continue;
++inner_restart:
++			read_lock(&ipfrag_lock);
++			hlist_for_each_entry_safe(qp, p, n,
++					&ipq_hash[i], list) {
++				if (!ve_accessible_strict(
++						VE_OWNER_IPQ(qp),
++						envid))
++					continue;
++				atomic_inc(&qp->refcnt);
++				read_unlock(&ipfrag_lock);
++
++				spin_lock(&qp->lock);
++				if (!(qp->last_in&COMPLETE))
++					ipq_kill(qp);
++				spin_unlock(&qp->lock);
++
++				ipq_put(qp, NULL);
++				progress = 1;
++				goto inner_restart;
++			}
++			read_unlock(&ipfrag_lock);
++		}
++	} while(progress);
++	local_bh_enable();
++}
++EXPORT_SYMBOL(ip_fragment_cleanup);
++#endif
++
+ void ipfrag_init(void)
+ {
+ 	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+diff -upr linux-2.6.16.orig/net/ipv4/ip_output.c linux-2.6.16-026test015/net/ipv4/ip_output.c
+--- linux-2.6.16.orig/net/ipv4/ip_output.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/ip_output.c	2006-07-04 14:41:37.000000000 +0400
+@@ -86,8 +86,6 @@
+ 
+ int sysctl_ip_default_ttl = IPDEFTTL;
+ 
+-static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
+-
+ /* Generate a checksum for an outgoing IP datagram. */
+ __inline__ void ip_send_check(struct iphdr *iph)
+ {
+@@ -421,7 +419,7 @@ static void ip_copy_metadata(struct sk_b
+  *	single device frame, and queue such a frame for sending.
+  */
+ 
+-static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
++int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+ {
+ 	struct iphdr *iph;
+ 	int raw = 0;
+@@ -673,6 +671,8 @@ fail:
+ 	return err;
+ }
+ 
++EXPORT_SYMBOL(ip_fragment);
++
+ int
+ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+ {
+@@ -1249,11 +1249,7 @@ int ip_push_pending_frames(struct sock *
+ 	iph->tos = inet->tos;
+ 	iph->tot_len = htons(skb->len);
+ 	iph->frag_off = df;
+-	if (!df) {
+-		__ip_select_ident(iph, &rt->u.dst, 0);
+-	} else {
+-		iph->id = htons(inet->id++);
+-	}
++	ip_select_ident(iph, &rt->u.dst, sk);
+ 	iph->ttl = ttl;
+ 	iph->protocol = sk->sk_protocol;
+ 	iph->saddr = rt->rt_src;
+@@ -1340,12 +1336,13 @@ void ip_send_reply(struct sock *sk, stru
+ 		char			data[40];
+ 	} replyopts;
+ 	struct ipcm_cookie ipc;
+-	u32 daddr;
++	u32 saddr, daddr;
+ 	struct rtable *rt = (struct rtable*)skb->dst;
+ 
+ 	if (ip_options_echo(&replyopts.opt, skb))
+ 		return;
+ 
++	saddr = skb->nh.iph->daddr;
+ 	daddr = ipc.addr = rt->rt_src;
+ 	ipc.opt = NULL;
+ 
+@@ -1359,7 +1356,7 @@ void ip_send_reply(struct sock *sk, stru
+ 	{
+ 		struct flowi fl = { .nl_u = { .ip4_u =
+ 					      { .daddr = daddr,
+-						.saddr = rt->rt_spec_dst,
++						.saddr = saddr,
+ 						.tos = RT_TOS(skb->nh.iph->tos) } },
+ 				    /* Not quite clean, but right. */
+ 				    .uli_u = { .ports =
+diff -upr linux-2.6.16.orig/net/ipv4/ipmr.c linux-2.6.16-026test015/net/ipv4/ipmr.c
+--- linux-2.6.16.orig/net/ipv4/ipmr.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/ipmr.c	2006-07-04 14:41:38.000000000 +0400
+@@ -837,7 +837,7 @@ static void mrtsock_destruct(struct sock
+ {
+ 	rtnl_lock();
+ 	if (sk == mroute_socket) {
+-		ipv4_devconf.mc_forwarding--;
++		ve_ipv4_devconf.mc_forwarding--;
+ 
+ 		write_lock_bh(&mrt_lock);
+ 		mroute_socket=NULL;
+@@ -888,7 +888,7 @@ int ip_mroute_setsockopt(struct sock *sk
+ 				mroute_socket=sk;
+ 				write_unlock_bh(&mrt_lock);
+ 
+-				ipv4_devconf.mc_forwarding++;
++				ve_ipv4_devconf.mc_forwarding++;
+ 			}
+ 			rtnl_unlock();
+ 			return ret;
+diff -upr linux-2.6.16.orig/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.16-026test015/net/ipv4/ipvs/ip_vs_conn.c
+--- linux-2.6.16.orig/net/ipv4/ipvs/ip_vs_conn.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/ipvs/ip_vs_conn.c	2006-07-04 14:41:37.000000000 +0400
+@@ -902,7 +902,8 @@ int ip_vs_conn_init(void)
+ 	/* Allocate ip_vs_conn slab cache */
+ 	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+ 					      sizeof(struct ip_vs_conn), 0,
+-					      SLAB_HWCACHE_ALIGN, NULL, NULL);
++					      SLAB_HWCACHE_ALIGN | SLAB_UBC,
++					      NULL, NULL);
+ 	if (!ip_vs_conn_cachep) {
+ 		vfree(ip_vs_conn_tab);
+ 		return -ENOMEM;
+diff -upr linux-2.6.16.orig/net/ipv4/ipvs/ip_vs_core.c linux-2.6.16-026test015/net/ipv4/ipvs/ip_vs_core.c
+--- linux-2.6.16.orig/net/ipv4/ipvs/ip_vs_core.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/ipvs/ip_vs_core.c	2006-07-04 14:41:38.000000000 +0400
+@@ -952,6 +952,10 @@ ip_vs_in(unsigned int hooknum, struct sk
+ 	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
+ 	 *	... don't know why 1st test DOES NOT include 2nd (?)
+ 	 */
++	/*
++	 * VZ: the question above is right.
++	 * The second test is superfluous.
++	 */
+ 	if (unlikely(skb->pkt_type != PACKET_HOST
+ 		     || skb->dev == &loopback_dev || skb->sk)) {
+ 		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/arp_tables.c linux-2.6.16-026test015/net/ipv4/netfilter/arp_tables.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/arp_tables.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/arp_tables.c	2006-07-04 14:41:36.000000000 +0400
+@@ -941,7 +941,7 @@ static int do_add_counters(void __user *
+ 
+ 	write_lock_bh(&t->lock);
+ 	private = t->private;
+-	if (private->number != paddc->num_counters) {
++	if (private->number != tmp.num_counters) {
+ 		ret = -EINVAL;
+ 		goto unlock_up_free;
+ 	}
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_core.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_core.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_core.c	2006-07-04 14:41:39.000000000 +0400
+@@ -49,6 +49,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_core.h>
+ #include <linux/netfilter_ipv4/listhelp.h>
++#include <ub/ub_mem.h>
+ 
+ #define IP_CONNTRACK_VERSION	"2.4"
+ 
+@@ -60,22 +61,41 @@
+ 
+ DEFINE_RWLOCK(ip_conntrack_lock);
+ 
+-/* ip_conntrack_standalone needs this */
+-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_conntrack_helpers \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_helpers)
++#define ve_ip_conntrack_max \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
++#define ve_ip_conntrack_count \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
++#define ve_ip_conntrack_unconfirmed \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_unconfirmed)
++#else
+ 
+ void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
+ LIST_HEAD(ip_conntrack_expect_list);
+ struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+ static LIST_HEAD(helpers);
++struct list_head *ip_conntrack_hash;
++static LIST_HEAD(unconfirmed);
++#define ve_ip_conntrack_count 		ip_conntrack_count
++#define ve_ip_conntrack_helpers		helpers
++#define ve_ip_conntrack_max 		ip_conntrack_max
++#define ve_ip_conntrack_unconfirmed 	unconfirmed
++#endif
++
++/* ip_conntrack_standalone needs this */
++atomic_t ip_conntrack_count = ATOMIC_INIT(0);
++
+ unsigned int ip_conntrack_htable_size = 0;
+ int ip_conntrack_max;
+-struct list_head *ip_conntrack_hash;
+ static kmem_cache_t *ip_conntrack_cachep __read_mostly;
+ static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
+ struct ip_conntrack ip_conntrack_untracked;
+ unsigned int ip_ct_log_invalid;
+-static LIST_HEAD(unconfirmed);
++#ifndef CONFIG_VE_IPTABLES
+ static int ip_conntrack_vmalloc;
++#endif
+ 
+ static unsigned int ip_conntrack_next_id = 1;
+ static unsigned int ip_conntrack_expect_next_id = 1;
+@@ -105,6 +125,9 @@ void ip_ct_deliver_cached_events(const s
+ {
+ 	struct ip_conntrack_ecache *ecache;
+ 	
++	if (!ve_is_super(get_exec_env()))
++		return;
++
+ 	local_bh_disable();
+ 	ecache = &__get_cpu_var(ip_conntrack_ecache);
+ 	if (ecache->ct == ct)
+@@ -133,6 +156,9 @@ static void ip_ct_event_cache_flush(void
+ 	struct ip_conntrack_ecache *ecache;
+ 	int cpu;
+ 
++	if (!ve_is_super(get_exec_env()))
++		return;
++
+ 	for_each_cpu(cpu) {
+ 		ecache = &per_cpu(ip_conntrack_ecache, cpu);
+ 		if (ecache->ct)
+@@ -226,7 +252,7 @@ __ip_conntrack_expect_find(const struct 
+ {
+ 	struct ip_conntrack_expect *i;
+ 	
+-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
++	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
+ 		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+ 			atomic_inc(&i->use);
+ 			return i;
+@@ -255,7 +281,7 @@ find_expectation(const struct ip_conntra
+ {
+ 	struct ip_conntrack_expect *i;
+ 
+-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
++	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
+ 		/* If master is not in hash table yet (ie. packet hasn't left
+ 		   this machine yet), how can other end know about expected?
+ 		   Hence these are not the droids you are looking for (if
+@@ -284,7 +310,7 @@ void ip_ct_remove_expectations(struct ip
+ 	if (ct->expecting == 0)
+ 		return;
+ 
+-	list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
++	list_for_each_entry_safe(i, tmp, &ve_ip_conntrack_expect_list, list) {
+ 		if (i->master == ct && del_timer(&i->timeout)) {
+ 			ip_ct_unlink_expect(i);
+ 			ip_conntrack_expect_put(i);
+@@ -302,8 +328,10 @@ clean_from_lists(struct ip_conntrack *ct
+ 
+ 	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ 	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+-	LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+-	LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
++	LIST_DELETE(&ve_ip_conntrack_hash[ho],
++ 		    &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
++	LIST_DELETE(&ve_ip_conntrack_hash[hr],
++ 		    &ct->tuplehash[IP_CT_DIR_REPLY]);
+ 
+ 	/* Destroy all pending expectations */
+ 	ip_ct_remove_expectations(ct);
+@@ -329,8 +357,8 @@ destroy_conntrack(struct nf_conntrack *n
+ 	if (proto && proto->destroy)
+ 		proto->destroy(ct);
+ 
+-	if (ip_conntrack_destroyed)
+-		ip_conntrack_destroyed(ct);
++	if (ve_ip_conntrack_destroyed)
++		ve_ip_conntrack_destroyed(ct);
+ 
+ 	write_lock_bh(&ip_conntrack_lock);
+ 	/* Expectations will have been removed in clean_from_lists,
+@@ -358,7 +386,11 @@ destroy_conntrack(struct nf_conntrack *n
+ static void death_by_timeout(unsigned long ul_conntrack)
+ {
+ 	struct ip_conntrack *ct = (void *)ul_conntrack;
++#ifdef CONFIG_VE_IPTABLES
++	struct ve_struct *old;
+ 
++	old = set_exec_env(VE_OWNER_CT(ct));
++#endif
+ 	write_lock_bh(&ip_conntrack_lock);
+ 	/* Inside lock so preempt is disabled on module removal path.
+ 	 * Otherwise we can get spurious warnings. */
+@@ -366,6 +398,9 @@ static void death_by_timeout(unsigned lo
+ 	clean_from_lists(ct);
+ 	write_unlock_bh(&ip_conntrack_lock);
+ 	ip_conntrack_put(ct);
++#ifdef CONFIG_VE_IPTABLES
++	(void)set_exec_env(old);
++#endif
+ }
+ 
+ static inline int
+@@ -386,7 +421,7 @@ __ip_conntrack_find(const struct ip_conn
+ 	unsigned int hash = hash_conntrack(tuple);
+ 
+ 	ASSERT_READ_LOCK(&ip_conntrack_lock);
+-	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
++	list_for_each_entry(h, &ve_ip_conntrack_hash[hash], list) {
+ 		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+ 			CONNTRACK_STAT_INC(found);
+ 			return h;
+@@ -418,9 +453,9 @@ static void __ip_conntrack_hash_insert(s
+ 					unsigned int repl_hash) 
+ {
+ 	ct->id = ++ip_conntrack_next_id;
+-	list_prepend(&ip_conntrack_hash[hash],
++	list_prepend(&ve_ip_conntrack_hash[hash],
+ 		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+-	list_prepend(&ip_conntrack_hash[repl_hash],
++	list_prepend(&ve_ip_conntrack_hash[repl_hash],
+ 		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
+ }
+ 
+@@ -471,11 +506,11 @@ __ip_conntrack_confirm(struct sk_buff **
+ 	/* See if there's one in the list already, including reverse:
+            NAT could have grabbed it without realizing, since we're
+            not in the hash.  If there is, we lost race. */
+-	if (!LIST_FIND(&ip_conntrack_hash[hash],
++	if (!LIST_FIND(&ve_ip_conntrack_hash[hash],
+ 		       conntrack_tuple_cmp,
+ 		       struct ip_conntrack_tuple_hash *,
+ 		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
+-	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
++	    && !LIST_FIND(&ve_ip_conntrack_hash[repl_hash],
+ 			  conntrack_tuple_cmp,
+ 			  struct ip_conntrack_tuple_hash *,
+ 			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
+@@ -569,7 +604,7 @@ static inline int helper_cmp(const struc
+ static struct ip_conntrack_helper *
+ __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
+ {
+-	return LIST_FIND(&helpers, helper_cmp,
++	return LIST_FIND(&ve_ip_conntrack_helpers, helper_cmp,
+ 			 struct ip_conntrack_helper *,
+ 			 tuple);
+ }
+@@ -605,7 +640,7 @@ void ip_conntrack_helper_put(struct ip_c
+ struct ip_conntrack_protocol *
+ __ip_conntrack_proto_find(u_int8_t protocol)
+ {
+-	return ip_ct_protos[protocol];
++	return ve_ip_ct_protos[protocol];
+ }
+ 
+ /* this is guaranteed to always return a valid protocol helper, since
+@@ -632,29 +667,32 @@ void ip_conntrack_proto_put(struct ip_co
+ }
+ 
+ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
+-					struct ip_conntrack_tuple *repl)
++		struct ip_conntrack_tuple *repl, struct user_beancounter *ub)
+ {
+ 	struct ip_conntrack *conntrack;
++	struct user_beancounter *old_ub;
+ 
+ 	if (!ip_conntrack_hash_rnd_initted) {
+ 		get_random_bytes(&ip_conntrack_hash_rnd, 4);
+ 		ip_conntrack_hash_rnd_initted = 1;
+ 	}
+ 
+-	if (ip_conntrack_max
+-	    && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
++	if (ve_ip_conntrack_max
++	    && atomic_read(&ve_ip_conntrack_count) >= ve_ip_conntrack_max) {
+ 		unsigned int hash = hash_conntrack(orig);
+ 		/* Try dropping from this hash chain. */
+-		if (!early_drop(&ip_conntrack_hash[hash])) {
++		if (!early_drop(&ve_ip_conntrack_hash[hash])) {
+ 			if (net_ratelimit())
+-				printk(KERN_WARNING
+-				       "ip_conntrack: table full, dropping"
+-				       " packet.\n");
++				ve_printk(VE_LOG_BOTH, KERN_WARNING
++				       "ip_conntrack: VPS %d: table full, dropping"
++				       " packet.\n", VEID(get_exec_env()));
+ 			return ERR_PTR(-ENOMEM);
+ 		}
+ 	}
+ 
++	old_ub = set_exec_ub(ub);
+ 	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
++	(void)set_exec_ub(old_ub);
+ 	if (!conntrack) {
+ 		DEBUGP("Can't allocate conntrack.\n");
+ 		return ERR_PTR(-ENOMEM);
+@@ -669,8 +707,11 @@ struct ip_conntrack *ip_conntrack_alloc(
+ 	init_timer(&conntrack->timeout);
+ 	conntrack->timeout.data = (unsigned long)conntrack;
+ 	conntrack->timeout.function = death_by_timeout;
++#ifdef CONFIG_VE_IPTABLES
++	SET_VE_OWNER_CT(conntrack, get_exec_env());
++#endif
+ 
+-	atomic_inc(&ip_conntrack_count);
++	atomic_inc(&ve_ip_conntrack_count);
+ 
+ 	return conntrack;
+ }
+@@ -678,7 +719,7 @@ struct ip_conntrack *ip_conntrack_alloc(
+ void
+ ip_conntrack_free(struct ip_conntrack *conntrack)
+ {
+-	atomic_dec(&ip_conntrack_count);
++	atomic_dec(&ve_ip_conntrack_count);
+ 	kmem_cache_free(ip_conntrack_cachep, conntrack);
+ }
+ 
+@@ -692,13 +733,22 @@ init_conntrack(struct ip_conntrack_tuple
+ 	struct ip_conntrack *conntrack;
+ 	struct ip_conntrack_tuple repl_tuple;
+ 	struct ip_conntrack_expect *exp;
++	struct user_beancounter *ub;
+ 
+ 	if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
+ 		DEBUGP("Can't invert tuple.\n");
+ 		return NULL;
+ 	}
+ 
+-	conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
++#ifdef CONFIG_USER_RESOURCE
++	if (skb->dev != NULL)  /* received skb */
++		ub = netdev_bc(skb->dev)->exec_ub;
++	else if (skb->sk != NULL) /* sent skb */
++		ub = sock_bc(skb->sk)->ub;
++	else
++#endif
++		ub = NULL;
++	conntrack = ip_conntrack_alloc(tuple, &repl_tuple, ub);
+ 	if (conntrack == NULL || IS_ERR(conntrack))
+ 		return (struct ip_conntrack_tuple_hash *)conntrack;
+ 
+@@ -733,7 +783,8 @@ init_conntrack(struct ip_conntrack_tuple
+ 	}
+ 
+ 	/* Overload tuple linked list to put us in unconfirmed list. */
+-	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
++	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list,
++			&ve_ip_conntrack_unconfirmed);
+ 
+ 	write_unlock_bh(&ip_conntrack_lock);
+ 
+@@ -925,7 +976,7 @@ void ip_conntrack_unexpect_related(struc
+ 
+ 	write_lock_bh(&ip_conntrack_lock);
+ 	/* choose the the oldest expectation to evict */
+-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
++	list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
+ 		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
+ 			ip_ct_unlink_expect(i);
+ 			write_unlock_bh(&ip_conntrack_lock);
+@@ -959,11 +1010,11 @@ void ip_conntrack_expect_put(struct ip_c
+ 		kmem_cache_free(ip_conntrack_expect_cachep, exp);
+ }
+ 
+-static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
++void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
+ {
+ 	atomic_inc(&exp->use);
+ 	exp->master->expecting++;
+-	list_add(&exp->list, &ip_conntrack_expect_list);
++	list_add(&exp->list, &ve_ip_conntrack_expect_list);
+ 
+ 	init_timer(&exp->timeout);
+ 	exp->timeout.data = (unsigned long)exp;
+@@ -975,13 +1026,14 @@ static void ip_conntrack_expect_insert(s
+ 	atomic_inc(&exp->use);
+ 	CONNTRACK_STAT_INC(expect_create);
+ }
++EXPORT_SYMBOL_GPL(ip_conntrack_expect_insert);
+ 
+ /* Race with expectations being used means we could have none to find; OK. */
+ static void evict_oldest_expect(struct ip_conntrack *master)
+ {
+ 	struct ip_conntrack_expect *i;
+ 
+-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
++	list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
+ 		if (i->master == master) {
+ 			if (del_timer(&i->timeout)) {
+ 				ip_ct_unlink_expect(i);
+@@ -1012,7 +1064,7 @@ int ip_conntrack_expect_related(struct i
+ 	DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
+ 
+ 	write_lock_bh(&ip_conntrack_lock);
+-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
++	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
+ 		if (expect_matches(i, expect)) {
+ 			/* Refresh timer: if it's dying, ignore.. */
+ 			if (refresh_timer(i)) {
+@@ -1060,18 +1112,48 @@ int ip_conntrack_helper_register(struct 
+ {
+ 	BUG_ON(me->timeout == 0);
+ 	write_lock_bh(&ip_conntrack_lock);
+-	list_prepend(&helpers, me);
++	list_prepend(&ve_ip_conntrack_helpers, me);
+ 	write_unlock_bh(&ip_conntrack_lock);
+ 
+ 	return 0;
+ }
+ 
++int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *me)
++{
++	int ret;
++	struct module *mod = me->me;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct ip_conntrack_helper *tmp;
++		__module_get(mod);
++		ret = -ENOMEM;
++		tmp = kmalloc(sizeof(struct ip_conntrack_helper), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, me, sizeof(struct ip_conntrack_helper));
++		me = tmp;
++	}
++
++	ret = ip_conntrack_helper_register(me);
++	if (ret)
++		goto out;
++
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env())){
++		kfree(me);
++nomem:
++		module_put(mod);
++	}
++	return ret;
++}
++
+ struct ip_conntrack_helper *
+ __ip_conntrack_helper_find_byname(const char *name)
+ {
+ 	struct ip_conntrack_helper *h;
+ 
+-	list_for_each_entry(h, &helpers, list) {
++	list_for_each_entry(h, &ve_ip_conntrack_helpers, list) {
+ 		if (!strcmp(h->name, name))
+ 			return h;
+ 	}
+@@ -1096,19 +1178,20 @@ void ip_conntrack_helper_unregister(stru
+ 
+ 	/* Need write lock here, to delete helper. */
+ 	write_lock_bh(&ip_conntrack_lock);
+-	LIST_DELETE(&helpers, me);
++	LIST_DELETE(&ve_ip_conntrack_helpers, me);
+ 
+ 	/* Get rid of expectations */
+-	list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
++	list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list, list) {
+ 		if (exp->master->helper == me && del_timer(&exp->timeout)) {
+ 			ip_ct_unlink_expect(exp);
+ 			ip_conntrack_expect_put(exp);
+ 		}
+ 	}
+ 	/* Get rid of expecteds, set helpers to NULL. */
+-	LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
++	LIST_FIND_W(&ve_ip_conntrack_unconfirmed, unhelp,
++			struct ip_conntrack_tuple_hash*, me);
+ 	for (i = 0; i < ip_conntrack_htable_size; i++)
+-		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
++		LIST_FIND_W(&ve_ip_conntrack_hash[i], unhelp,
+ 			    struct ip_conntrack_tuple_hash *, me);
+ 	write_unlock_bh(&ip_conntrack_lock);
+ 
+@@ -1116,6 +1199,25 @@ void ip_conntrack_helper_unregister(stru
+ 	synchronize_net();
+ }
+ 
++void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
++{
++
++	if (!ve_is_super(get_exec_env())) {
++		read_lock_bh(&ip_conntrack_lock);
++		me = list_named_find(&ve_ip_conntrack_helpers, me->name);
++		read_unlock_bh(&ip_conntrack_lock);
++		if (!me)
++			return;
++	}
++
++	ip_conntrack_helper_unregister(me);
++
++	if (!ve_is_super(get_exec_env())) {
++		module_put(me->me);
++		kfree(me);
++	}
++}
++
+ /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+ void __ip_ct_refresh_acct(struct ip_conntrack *ct, 
+ 		        enum ip_conntrack_info ctinfo,
+@@ -1246,13 +1348,13 @@ get_next_corpse(int (*iter)(struct ip_co
+ 
+ 	write_lock_bh(&ip_conntrack_lock);
+ 	for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
+-		h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
++		h = LIST_FIND_W(&ve_ip_conntrack_hash[*bucket], do_iter,
+ 				struct ip_conntrack_tuple_hash *, iter, data);
+ 		if (h)
+ 			break;
+ 	}
+ 	if (!h)
+-		h = LIST_FIND_W(&unconfirmed, do_iter,
++		h = LIST_FIND_W(&ve_ip_conntrack_unconfirmed, do_iter,
+ 				struct ip_conntrack_tuple_hash *, iter, data);
+ 	if (h)
+ 		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
+@@ -1289,6 +1391,11 @@ getorigdst(struct sock *sk, int optval, 
+ 	struct ip_conntrack_tuple_hash *h;
+ 	struct ip_conntrack_tuple tuple;
+ 	
++#ifdef CONFIG_VE_IPTABLES
++	if (!get_exec_env()->_ip_conntrack)
++		return -ENOPROTOOPT;
++#endif
++
+ 	IP_CT_TUPLE_U_BLANK(&tuple);
+ 	tuple.src.ip = inet->rcv_saddr;
+ 	tuple.src.u.tcp.port = inet->sport;
+@@ -1318,6 +1425,7 @@ getorigdst(struct sock *sk, int optval, 
+ 			.tuple.dst.u.tcp.port;
+ 		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ 			.tuple.dst.ip;
++		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+ 
+ 		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
+ 		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+@@ -1359,12 +1467,17 @@ static void free_conntrack_hash(struct l
+ 			   get_order(sizeof(struct list_head) * size));
+ }
+ 
++static void ip_conntrack_cache_free(void)
++{
++	kmem_cache_destroy(ip_conntrack_expect_cachep);
++	kmem_cache_destroy(ip_conntrack_cachep);
++	nf_unregister_sockopt(&so_getorigdst);
++}
++
+ /* Mishearing the voices in his head, our hero wonders how he's
+    supposed to kill the mall. */
+ void ip_conntrack_cleanup(void)
+ {
+-	ip_ct_attach = NULL;
+-
+ 	/* This makes sure all current packets have passed through
+            netfilter framework.  Roll on, two-stage module
+            delete... */
+@@ -1373,19 +1486,32 @@ void ip_conntrack_cleanup(void)
+ 	ip_ct_event_cache_flush();
+  i_see_dead_people:
+ 	ip_conntrack_flush();
+-	if (atomic_read(&ip_conntrack_count) != 0) {
++	if (atomic_read(&ve_ip_conntrack_count) != 0) {
+ 		schedule();
+ 		goto i_see_dead_people;
+ 	}
+-	/* wait until all references to ip_conntrack_untracked are dropped */
+-	while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
+-		schedule();
+-
+-	kmem_cache_destroy(ip_conntrack_cachep);
+-	kmem_cache_destroy(ip_conntrack_expect_cachep);
+-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
++	if (ve_is_super(get_exec_env())) {
++		/* wait until all references to ip_conntrack_untracked are
++		 * dropped */
++		while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
++			schedule();
++		ip_ct_attach = NULL;
++		ip_conntrack_cache_free();
++	}
++	free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
+ 			    ip_conntrack_htable_size);
+-	nf_unregister_sockopt(&so_getorigdst);
++	ve_ip_conntrack_hash = NULL;		    
++	INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
++	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
++	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
++	atomic_set(&ve_ip_conntrack_count, 0);
++	ve_ip_conntrack_max = 0;
++#ifdef CONFIG_VE_IPTABLES
++	kfree(ve_ip_ct_protos);
++	ve_ip_ct_protos = NULL;
++	kfree(get_exec_env()->_ip_conntrack);
++	get_exec_env()->_ip_conntrack = NULL;
++#endif
+ }
+ 
+ static struct list_head *alloc_hashtable(int size, int *vmalloced)
+@@ -1394,13 +1520,13 @@ static struct list_head *alloc_hashtable
+ 	unsigned int i;
+ 
+ 	*vmalloced = 0; 
+-	hash = (void*)__get_free_pages(GFP_KERNEL, 
++	hash = (void*)__get_free_pages(GFP_KERNEL_UBC,
+ 				       get_order(sizeof(struct list_head)
+ 						 * size));
+ 	if (!hash) { 
+ 		*vmalloced = 1;
+ 		printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
+-		hash = vmalloc(sizeof(struct list_head) * size);
++		hash = ub_vmalloc(sizeof(struct list_head) * size);
+ 	}
+ 
+ 	if (hash)
+@@ -1436,8 +1562,8 @@ static int set_hashsize(const char *val,
+ 
+ 	write_lock_bh(&ip_conntrack_lock);
+ 	for (i = 0; i < ip_conntrack_htable_size; i++) {
+-		while (!list_empty(&ip_conntrack_hash[i])) {
+-			h = list_entry(ip_conntrack_hash[i].next,
++		while (!list_empty(&ve_ip_conntrack_hash[i])) {
++			h = list_entry(ve_ip_conntrack_hash[i].next,
+ 				       struct ip_conntrack_tuple_hash, list);
+ 			list_del(&h->list);
+ 			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+@@ -1445,12 +1571,12 @@ static int set_hashsize(const char *val,
+ 		}
+ 	}
+ 	old_size = ip_conntrack_htable_size;
+-	old_vmalloced = ip_conntrack_vmalloc;
+-	old_hash = ip_conntrack_hash;
++	old_vmalloced = ve_ip_conntrack_vmalloc;
++	old_hash = ve_ip_conntrack_hash;
+ 
+ 	ip_conntrack_htable_size = hashsize;
+-	ip_conntrack_vmalloc = vmalloced;
+-	ip_conntrack_hash = hash;
++	ve_ip_conntrack_vmalloc = vmalloced;
++	ve_ip_conntrack_hash = hash;
+ 	ip_conntrack_hash_rnd = rnd;
+ 	write_unlock_bh(&ip_conntrack_lock);
+ 
+@@ -1461,9 +1587,8 @@ static int set_hashsize(const char *val,
+ module_param_call(hashsize, set_hashsize, param_get_uint,
+ 		  &ip_conntrack_htable_size, 0600);
+ 
+-int __init ip_conntrack_init(void)
++static int ip_conntrack_cache_create(void)
+ {
+-	unsigned int i;
+ 	int ret;
+ 
+ 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
+@@ -1477,70 +1602,127 @@ int __init ip_conntrack_init(void)
+ 		if (ip_conntrack_htable_size < 16)
+ 			ip_conntrack_htable_size = 16;
+ 	}
+-	ip_conntrack_max = 8 * ip_conntrack_htable_size;
++	ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
+ 
+ 	printk("ip_conntrack version %s (%u buckets, %d max)"
+ 	       " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
+-	       ip_conntrack_htable_size, ip_conntrack_max,
++	       ip_conntrack_htable_size, ve_ip_conntrack_max,
+ 	       sizeof(struct ip_conntrack));
+ 
+ 	ret = nf_register_sockopt(&so_getorigdst);
+ 	if (ret != 0) {
+ 		printk(KERN_ERR "Unable to register netfilter socket option\n");
+-		return ret;
+-	}
+-
+-	ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
+-					    &ip_conntrack_vmalloc);
+-	if (!ip_conntrack_hash) {
+-		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+-		goto err_unreg_sockopt;
++		goto out_sockopt;
+ 	}
+ 
++	ret = -ENOMEM;
+ 	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
+ 	                                        sizeof(struct ip_conntrack), 0,
+-	                                        0, NULL, NULL);
++	                                        SLAB_UBC, NULL, NULL);
+ 	if (!ip_conntrack_cachep) {
+ 		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
+-		goto err_free_hash;
++		goto err_unreg_sockopt;
+ 	}
+ 
+ 	ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
+ 					sizeof(struct ip_conntrack_expect),
+-					0, 0, NULL, NULL);
++					0, SLAB_UBC, NULL, NULL);
+ 	if (!ip_conntrack_expect_cachep) {
+ 		printk(KERN_ERR "Unable to create ip_expect slab cache\n");
+ 		goto err_free_conntrack_slab;
+ 	}
+ 
++	return 0;
++
++err_free_conntrack_slab:
++	kmem_cache_destroy(ip_conntrack_cachep);
++err_unreg_sockopt:
++	nf_unregister_sockopt(&so_getorigdst);
++out_sockopt:
++	return ret;
++}
++
++int ip_conntrack_init(void)
++{
++	struct ve_struct *env;
++	unsigned int i;
++	int ret;
++
++	env = get_exec_env();
++#ifdef CONFIG_VE_IPTABLES
++	ret = -ENOMEM;
++	env->_ip_conntrack =
++		kmalloc(sizeof(struct ve_ip_conntrack), GFP_KERNEL);
++	if (!env->_ip_conntrack)
++		goto out;
++	memset(env->_ip_conntrack, 0, sizeof(struct ve_ip_conntrack));
++	if (ve_is_super(env)) {
++		ret = ip_conntrack_cache_create();
++		if (ret)
++			goto cache_fail;
++	} else
++		ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
++#else /* CONFIG_VE_IPTABLES */
++	ret = ip_conntrack_cache_create();
++	if (ret)
++		goto out;
++#endif
++
++	ret = -ENOMEM;
++	ve_ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
++					    &ve_ip_conntrack_vmalloc);
++	if (!ve_ip_conntrack_hash) {
++		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
++		goto err_free_cache;
++	}
++
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip_ct_protos = (struct ip_conntrack_protocol **)
++		ub_kmalloc(sizeof(void *)*MAX_IP_CT_PROTO, GFP_KERNEL);
++	if (!ve_ip_ct_protos)
++		goto err_free_hash;
++#endif
+ 	/* Don't NEED lock here, but good form anyway. */
+ 	write_lock_bh(&ip_conntrack_lock);
+ 	for (i = 0; i < MAX_IP_CT_PROTO; i++)
+-		ip_ct_protos[i] = &ip_conntrack_generic_protocol;
++		ve_ip_ct_protos[i] = &ip_conntrack_generic_protocol;
+ 	/* Sew in builtin protocols. */
+-	ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
+-	ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
+-	ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
++	ve_ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
++	ve_ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
++	ve_ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
+ 	write_unlock_bh(&ip_conntrack_lock);
+ 
+-	/* For use by ipt_REJECT */
+-	ip_ct_attach = ip_conntrack_attach;
+-
+-	/* Set up fake conntrack:
+-	    - to never be deleted, not in any hashes */
+-	atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
+-	/*  - and look it like as a confirmed connection */
+-	set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
++	INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
++	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
++	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
++
++	if (ve_is_super(env)) {
++		/* For use by ipt_REJECT */
++		ip_ct_attach = ip_conntrack_attach;
++
++		/* Set up fake conntrack:
++		    - to never be deleted, not in any hashes */
++		atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
++		/*  - and look it like as a confirmed connection */
++		set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
++	}
+ 
+-	return ret;
++	return 0;
+ 
+-err_free_conntrack_slab:
+-	kmem_cache_destroy(ip_conntrack_cachep);
++#ifdef CONFIG_VE_IPTABLES
+ err_free_hash:
+-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
++#endif
++	free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
+ 			    ip_conntrack_htable_size);
+-err_unreg_sockopt:
+-	nf_unregister_sockopt(&so_getorigdst);
+-
+-	return -ENOMEM;
++	ve_ip_conntrack_hash = NULL;		    
++err_free_cache:
++	if (ve_is_super(env))
++		ip_conntrack_cache_free();
++#ifdef CONFIG_VE_IPTABLES
++cache_fail:
++	kfree(env->_ip_conntrack);
++	env->_ip_conntrack = NULL;
++#endif
++out:
++	return ret;
+ }
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_ftp.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_ftp.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_ftp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_ftp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -15,6 +15,7 @@
+ #include <linux/ctype.h>
+ #include <net/checksum.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+@@ -425,8 +426,8 @@ static int help(struct sk_buff **pskb,
+ 
+ 	/* Now, NAT might want to mangle the packet, and register the
+ 	 * (possibly changed) expectation itself. */
+-	if (ip_nat_ftp_hook)
+-		ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
++	if (ve_ip_nat_ftp_hook)
++		ret = ve_ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
+ 				      matchoff, matchlen, exp, &seq);
+ 	else {
+ 		/* Can't expect this?  Best to drop packet now. */
+@@ -452,16 +453,39 @@ out_update_nl:
+ static struct ip_conntrack_helper ftp[MAX_PORTS];
+ static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
+ 
+-/* Not __exit: called from init() */
+-static void fini(void)
++void fini_iptable_ftp(void)
+ {
+ 	int i;
+ 	for (i = 0; i < ports_c; i++) {
+ 		DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
+ 				ports[i]);
+-		ip_conntrack_helper_unregister(&ftp[i]);
++		virt_ip_conntrack_helper_unregister(&ftp[i]);
+ 	}
++}
++
++int init_iptable_ftp(void)
++{
++	int i, ret;
+ 
++	for (i = 0; i < ports_c; i++) {
++		DEBUGP("ip_ct_ftp: registering helper for port %d\n",
++				ports[i]);
++		ret = virt_ip_conntrack_helper_register(&ftp[i]);
++		if (ret) {
++			fini_iptable_ftp();
++			return ret;
++		}
++	}
++	return 0;
++}
++
++/* Not __exit: called from init() */
++static void fini(void)
++{
++	KSYMMODUNRESOLVE(ip_conntrack_ftp);
++	KSYMUNRESOLVE(init_iptable_ftp);
++	KSYMUNRESOLVE(fini_iptable_ftp);
++	fini_iptable_ftp();
+ 	kfree(ftp_buffer);
+ }
+ 
+@@ -496,13 +520,17 @@ static int __init init(void)
+ 
+ 		DEBUGP("ip_ct_ftp: registering helper for port %d\n", 
+ 				ports[i]);
+-		ret = ip_conntrack_helper_register(&ftp[i]);
++		ret = virt_ip_conntrack_helper_register(&ftp[i]);
+ 
+ 		if (ret) {
+ 			fini();
+ 			return ret;
+ 		}
+ 	}
++
++	KSYMRESOLVE(init_iptable_ftp);
++	KSYMRESOLVE(fini_iptable_ftp);
++	KSYMMODRESOLVE(ip_conntrack_ftp);
+ 	return 0;
+ }
+ 
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_irc.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_irc.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_irc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_irc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -28,6 +28,7 @@
+ #include <linux/ip.h>
+ #include <net/checksum.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+@@ -244,6 +245,33 @@ static char irc_names[MAX_PORTS][sizeof(
+ 
+ static void fini(void);
+ 
++void fini_iptable_irc(void)
++{
++	int i;
++	for (i = 0; i < ports_c; i++) {
++		DEBUGP("unregistering port %d\n",
++		       ports[i]);
++		virt_ip_conntrack_helper_unregister(&irc_helpers[i]);
++	}
++}
++
++int init_iptable_irc(void)
++{
++	int i, ret;
++
++	for (i = 0; i < ports_c; i++) {
++		DEBUGP("port #%d: %d\n", i, ports[i]);
++		ret = virt_ip_conntrack_helper_register(&irc_helpers[i]);
++		if (ret) {
++			printk("ip_conntrack_irc: ERROR registering port %d\n",
++				ports[i]);
++			fini_iptable_irc();
++			return -EBUSY;
++		}
++	}
++	return 0;
++}
++
+ static int __init init(void)
+ {
+ 	int i, ret;
+@@ -283,7 +311,7 @@ static int __init init(void)
+ 
+ 		DEBUGP("port #%d: %d\n", i, ports[i]);
+ 
+-		ret = ip_conntrack_helper_register(hlpr);
++		ret = virt_ip_conntrack_helper_register(hlpr);
+ 
+ 		if (ret) {
+ 			printk("ip_conntrack_irc: ERROR registering port %d\n",
+@@ -292,6 +320,10 @@ static int __init init(void)
+ 			return -EBUSY;
+ 		}
+ 	}
++
++	KSYMRESOLVE(init_iptable_irc);
++	KSYMRESOLVE(fini_iptable_irc);
++	KSYMMODRESOLVE(ip_conntrack_irc);
+ 	return 0;
+ }
+ 
+@@ -299,12 +331,10 @@ static int __init init(void)
+  * it is needed by the init function */
+ static void fini(void)
+ {
+-	int i;
+-	for (i = 0; i < ports_c; i++) {
+-		DEBUGP("unregistering port %d\n",
+-		       ports[i]);
+-		ip_conntrack_helper_unregister(&irc_helpers[i]);
+-	}
++	KSYMMODUNRESOLVE(ip_conntrack_irc);
++	KSYMUNRESOLVE(init_iptable_irc);
++	KSYMUNRESOLVE(fini_iptable_irc);
++	fini_iptable_irc();
+ 	kfree(irc_buffer);
+ }
+ 
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_netlink.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_netlink.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_netlink.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_netlink.c	2006-07-04 14:41:39.000000000 +0400
+@@ -29,6 +29,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/interrupt.h>
+ #include <linux/notifier.h>
++#include <net/sock.h>
+ 
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv4/ip_conntrack.h>
+@@ -39,6 +40,8 @@
+ 
+ #include <linux/netfilter/nfnetlink.h>
+ #include <linux/netfilter/nfnetlink_conntrack.h>
++#include <ub/beancounter.h>
++#include <ub/ub_sk.h>
+ 
+ MODULE_LICENSE("GPL");
+ 
+@@ -403,7 +406,7 @@ ctnetlink_dump_table(struct sk_buff *skb
+ 
+ 	read_lock_bh(&ip_conntrack_lock);
+ 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
+-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
++		list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
+ 			h = (struct ip_conntrack_tuple_hash *) i;
+ 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ 				continue;
+@@ -440,7 +443,7 @@ ctnetlink_dump_table_w(struct sk_buff *s
+ 
+ 	write_lock_bh(&ip_conntrack_lock);
+ 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
+-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
++		list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
+ 			h = (struct ip_conntrack_tuple_hash *) i;
+ 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ 				continue;
+@@ -1003,14 +1006,15 @@ ctnetlink_change_conntrack(struct ip_con
+ static int
+ ctnetlink_create_conntrack(struct nfattr *cda[], 
+ 			   struct ip_conntrack_tuple *otuple,
+-			   struct ip_conntrack_tuple *rtuple)
++			   struct ip_conntrack_tuple *rtuple,
++			   struct user_beancounter *ub)
+ {
+ 	struct ip_conntrack *ct;
+ 	int err = -EINVAL;
+ 
+ 	DEBUGP("entered %s\n", __FUNCTION__);
+ 
+-	ct = ip_conntrack_alloc(otuple, rtuple);
++	ct = ip_conntrack_alloc(otuple, rtuple, ub);
+ 	if (ct == NULL || IS_ERR(ct))
+ 		return -ENOMEM;	
+ 
+@@ -1087,8 +1091,16 @@ ctnetlink_new_conntrack(struct sock *ctn
+ 		write_unlock_bh(&ip_conntrack_lock);
+ 		DEBUGP("no such conntrack, create new\n");
+ 		err = -ENOENT;
+-		if (nlh->nlmsg_flags & NLM_F_CREATE)
+-			err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
++		if (nlh->nlmsg_flags & NLM_F_CREATE) {
++#ifdef CONFIG_USER_RESOURCE
++			if (skb->sk)
++				err = ctnetlink_create_conntrack(cda, &otuple,
++						&rtuple, sock_bc(skb->sk)->ub);
++			else
++#endif
++				err = ctnetlink_create_conntrack(cda,
++						&otuple, &rtuple, NULL);
++		}
+ 		return err;
+ 	}
+ 	/* implicit 'else' */
+@@ -1249,7 +1261,7 @@ ctnetlink_exp_dump_table(struct sk_buff 
+ 	DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
+ 
+ 	read_lock_bh(&ip_conntrack_lock);
+-	list_for_each_prev(i, &ip_conntrack_expect_list) {
++	list_for_each_prev(i, &ve_ip_conntrack_expect_list) {
+ 		exp = (struct ip_conntrack_expect *) i;
+ 		if (exp->id <= *id)
+ 			continue;
+@@ -1395,7 +1407,7 @@ ctnetlink_del_expect(struct sock *ctnl, 
+ 			write_unlock_bh(&ip_conntrack_lock);
+ 			return -EINVAL;
+ 		}
+-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
++		list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
+ 					 list) {
+ 			if (exp->master->helper == h 
+ 			    && del_timer(&exp->timeout)) {
+@@ -1407,7 +1419,7 @@ ctnetlink_del_expect(struct sock *ctnl, 
+ 	} else {
+ 		/* This basically means we have to flush everything*/
+ 		write_lock_bh(&ip_conntrack_lock);
+-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
++		list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
+ 					 list) {
+ 			if (del_timer(&exp->timeout)) {
+ 				ip_ct_unlink_expect(exp);
+@@ -1619,7 +1631,7 @@ static void __exit ctnetlink_exit(void)
+ 	printk("ctnetlink: unregistering from nfnetlink.\n");
+ 
+ #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+-	ip_conntrack_unregister_notifier(&ctnl_notifier_exp);
++	ip_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
+ 	ip_conntrack_unregister_notifier(&ctnl_notifier);
+ #endif
+ 
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2006-07-04 14:41:39.000000000 +0400
+@@ -52,7 +52,7 @@ static int packet(struct ip_conntrack *c
+ 		  const struct sk_buff *skb,
+ 		  enum ip_conntrack_info ctinfo)
+ {
+-	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
++	ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_generic_timeout);
+ 	return NF_ACCEPT;
+ }
+ 
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -104,7 +104,7 @@ static int icmp_packet(struct ip_conntra
+ 	} else {
+ 		atomic_inc(&ct->proto.icmp.count);
+ 		ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+-		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
++		ip_ct_refresh_acct(ct, ctinfo, skb, ve_ip_ct_icmp_timeout);
+ 	}
+ 
+ 	return NF_ACCEPT;
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_sctp.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_sctp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_sctp.c	2006-07-04 14:41:36.000000000 +0400
+@@ -235,12 +235,15 @@ static int do_basic_checks(struct ip_con
+ 			flag = 1;
+ 		}
+ 
+-		/* Cookie Ack/Echo chunks not the first OR 
+-		   Init / Init Ack / Shutdown compl chunks not the only chunks */
+-		if ((sch->type == SCTP_CID_COOKIE_ACK 
++		/*
++		 * Cookie Ack/Echo chunks not the first OR
++		 * Init / Init Ack / Shutdown compl chunks not the only chunks
++		 * OR zero-length.
++		 */
++		if (((sch->type == SCTP_CID_COOKIE_ACK
+ 			|| sch->type == SCTP_CID_COOKIE_ECHO
+ 			|| flag)
+-		     && count !=0 ) {
++		      && count !=0) || !sch->length) {
+ 			DEBUGP("Basic checks failed\n");
+ 			return 1;
+ 		}
+@@ -251,7 +254,7 @@ static int do_basic_checks(struct ip_con
+ 	}
+ 
+ 	DEBUGP("Basic checks passed\n");
+-	return 0;
++	return count == 0;
+ }
+ 
+ static int new_state(enum ip_conntrack_dir dir,
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -98,7 +98,7 @@ unsigned int ip_ct_tcp_timeout_close =  
+    to ~13-30min depending on RTO. */
+ unsigned int ip_ct_tcp_timeout_max_retrans =     5 MINS;
+  
+-static const unsigned int * tcp_timeouts[]
++const unsigned int * tcp_timeouts[]
+ = { NULL,                              /*      TCP_CONNTRACK_NONE */
+     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
+     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
+@@ -762,7 +762,7 @@ static int tcp_in_window(struct ip_ct_tc
+ 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
+ 			: "SEQ is over the upper bound (over the window of the receiver)");
+ 
+-		res = ip_ct_tcp_be_liberal;
++		res = ve_ip_ct_tcp_be_liberal;
+   	}
+   
+ 	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
+@@ -1033,9 +1033,11 @@ static int tcp_packet(struct ip_conntrac
+ 	    && (new_state == TCP_CONNTRACK_FIN_WAIT
+ 	    	|| new_state == TCP_CONNTRACK_CLOSE))
+ 		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
+-	timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
+-		  && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
+-		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
++	timeout = conntrack->proto.tcp.retrans >= ve_ip_ct_tcp_max_retrans &&
++		ve_ip_ct_tcp_timeouts[new_state] >
++					ve_ip_ct_tcp_timeout_max_retrans
++		? ve_ip_ct_tcp_timeout_max_retrans :
++					ve_ip_ct_tcp_timeouts[new_state];
+ 	write_unlock_bh(&tcp_lock);
+ 
+ 	ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+@@ -1110,7 +1112,7 @@ static int tcp_new(struct ip_conntrack *
+ 		conntrack->proto.tcp.seen[1].flags = 0;
+ 		conntrack->proto.tcp.seen[0].loose = 
+ 		conntrack->proto.tcp.seen[1].loose = 0;
+-	} else if (ip_ct_tcp_loose == 0) {
++	} else if (ve_ip_ct_tcp_loose == 0) {
+ 		/* Don't try to pick up connections. */
+ 		return 0;
+ 	} else {
+@@ -1134,7 +1136,7 @@ static int tcp_new(struct ip_conntrack *
+ 		conntrack->proto.tcp.seen[0].flags =
+ 		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
+ 		conntrack->proto.tcp.seen[0].loose = 
+-		conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
++		conntrack->proto.tcp.seen[1].loose = ve_ip_ct_tcp_loose;
+ 	}
+     
+ 	conntrack->proto.tcp.seen[1].td_end = 0;
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -71,12 +71,12 @@ static int udp_packet(struct ip_conntrac
+ 	   stream.  Extend timeout. */
+ 	if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+ 		ip_ct_refresh_acct(conntrack, ctinfo, skb, 
+-				   ip_ct_udp_timeout_stream);
++				   ve_ip_ct_udp_timeout_stream);
+ 		/* Also, more likely to be important, and not a probe */
+ 		if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
+ 			ip_conntrack_event_cache(IPCT_STATUS, skb);
+ 	} else
+-		ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
++		ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_udp_timeout);
+ 
+ 	return NF_ACCEPT;
+ }
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_standalone.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_standalone.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_conntrack_standalone.c	2006-07-04 14:41:39.000000000 +0400
+@@ -28,6 +28,7 @@
+ #include <net/checksum.h>
+ #include <net/ip.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ 
+ #define ASSERT_READ_LOCK(x)
+ #define ASSERT_WRITE_LOCK(x)
+@@ -46,9 +47,31 @@
+ 
+ MODULE_LICENSE("GPL");
+ 
++int ip_conntrack_disable_ve0 = 0;
++module_param(ip_conntrack_disable_ve0, int, 0440);
++
+ extern atomic_t ip_conntrack_count;
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_conntrack_count \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
++#else
++#define ve_ip_conntrack_count	ip_conntrack_count
++#endif
+ DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+ 
++/* Prior to 2.6.15, we had a ip_conntrack_enable_ve0 param. */
++static int warn_set(const char *val, struct kernel_param *kp)
++{
++	printk(KERN_INFO KBUILD_MODNAME
++	       ": parameter ip_conntrack_enable_ve0 is obsoleted. In ovzkernel"
++	       " >= 2.6.15 connection tracking on hardware node is enabled by "
++	       "default, use ip_conntrack_disable_ve0=1 parameter to "
++	       "disable.\n");
++	return 0;
++}
++module_param_call(ip_conntrack_enable_ve0, warn_set, NULL, NULL, 0);
++
+ static int kill_proto(struct ip_conntrack *i, void *data)
+ {
+ 	return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == 
+@@ -89,8 +112,8 @@ static struct list_head *ct_get_first(st
+ 	for (st->bucket = 0;
+ 	     st->bucket < ip_conntrack_htable_size;
+ 	     st->bucket++) {
+-		if (!list_empty(&ip_conntrack_hash[st->bucket]))
+-			return ip_conntrack_hash[st->bucket].next;
++		if (!list_empty(&ve_ip_conntrack_hash[st->bucket]))
++			return ve_ip_conntrack_hash[st->bucket].next;
+ 	}
+ 	return NULL;
+ }
+@@ -100,10 +123,10 @@ static struct list_head *ct_get_next(str
+ 	struct ct_iter_state *st = seq->private;
+ 
+ 	head = head->next;
+-	while (head == &ip_conntrack_hash[st->bucket]) {
++	while (head == &ve_ip_conntrack_hash[st->bucket]) {
+ 		if (++st->bucket >= ip_conntrack_htable_size)
+ 			return NULL;
+-		head = ip_conntrack_hash[st->bucket].next;
++		head = ve_ip_conntrack_hash[st->bucket].next;
+ 	}
+ 	return head;
+ }
+@@ -234,7 +257,7 @@ static struct file_operations ct_file_op
+ /* expects */
+ static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+ {
+-	struct list_head *e = &ip_conntrack_expect_list;
++	struct list_head *e = &ve_ip_conntrack_expect_list;
+ 	loff_t i;
+ 
+ 	/* strange seq_file api calls stop even if we fail,
+@@ -246,7 +269,7 @@ static void *exp_seq_start(struct seq_fi
+ 
+ 	for (i = 0; i <= *pos; i++) {
+ 		e = e->next;
+-		if (e == &ip_conntrack_expect_list)
++		if (e == &ve_ip_conntrack_expect_list)
+ 			return NULL;
+ 	}
+ 	return e;
+@@ -259,7 +282,7 @@ static void *exp_seq_next(struct seq_fil
+ 	++*pos;
+ 	e = e->next;
+ 
+-	if (e == &ip_conntrack_expect_list)
++	if (e == &ve_ip_conntrack_expect_list)
+ 		return NULL;
+ 
+ 	return e;
+@@ -344,7 +367,7 @@ static void ct_cpu_seq_stop(struct seq_f
+ 
+ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+ {
+-	unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
++	unsigned int nr_conntracks = atomic_read(&ve_ip_conntrack_count);
+ 	struct ip_conntrack_stat *st = v;
+ 
+ 	if (v == SEQ_START_TOKEN) {
+@@ -541,6 +564,28 @@ static struct nf_hook_ops ip_conntrack_l
+ 
+ /* From ip_conntrack_core.c */
+ extern int ip_conntrack_max;
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_conntrack_max \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
++#define ve_ip_ct_sysctl_header \
++	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_header)
++#define ve_ip_ct_net_table \
++	(get_exec_env()->_ip_conntrack->_ip_ct_net_table)
++#define ve_ip_ct_ipv4_table \
++	(get_exec_env()->_ip_conntrack->_ip_ct_ipv4_table)
++#define ve_ip_ct_netfilter_table \
++	(get_exec_env()->_ip_conntrack->_ip_ct_netfilter_table)
++#define ve_ip_ct_sysctl_table \
++	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_table)
++#else
++#define ve_ip_conntrack_max		ip_conntrack_max
++static struct ctl_table_header *ip_ct_sysctl_header;
++#define ve_ip_ct_sysctl_header		ip_ct_sysctl_header
++#define ve_ip_ct_net_table		ip_ct_net_table
++#define ve_ip_ct_ipv4_table		ip_ct_ipv4_table
++#define ve_ip_ct_netfilter_table	ip_ct_netfilter_table
++#define ve_ip_ct_sysctl_table		ip_ct_sysctl_table
++#endif
+ extern unsigned int ip_conntrack_htable_size;
+ 
+ /* From ip_conntrack_proto_tcp.c */
+@@ -571,8 +616,6 @@ extern unsigned int ip_ct_generic_timeou
+ static int log_invalid_proto_min = 0;
+ static int log_invalid_proto_max = 255;
+ 
+-static struct ctl_table_header *ip_ct_sysctl_header;
+-
+ static ctl_table ip_ct_sysctl_table[] = {
+ 	{
+ 		.ctl_name	= NET_IPV4_NF_CONNTRACK_MAX,
+@@ -781,6 +824,112 @@ static ctl_table ip_ct_net_table[] = {
+ };
+ 
+ EXPORT_SYMBOL(ip_ct_log_invalid);
++
++#ifdef CONFIG_VE_IPTABLES
++static void ip_conntrack_sysctl_cleanup(void)
++{
++	if (!ve_is_super(get_exec_env())) {
++		kfree(ve_ip_ct_net_table);
++		kfree(ve_ip_ct_ipv4_table);
++		kfree(ve_ip_ct_netfilter_table);
++		kfree(ve_ip_ct_sysctl_table);
++	}
++	ve_ip_ct_net_table = NULL;
++	ve_ip_ct_ipv4_table = NULL;
++	ve_ip_ct_netfilter_table = NULL;
++	ve_ip_ct_sysctl_table = NULL;
++}
++
++#define ALLOC_ENVCTL(field,k,label) \
++		if ( !(field = kmalloc(k*sizeof(ctl_table), GFP_KERNEL)) ) \
++				goto label;
++static int ip_conntrack_sysctl_init(void)
++{
++	int i, ret = 0;
++
++	ret = -ENOMEM;
++	if (ve_is_super(get_exec_env())) {
++		ve_ip_ct_net_table = ip_ct_net_table;
++		ve_ip_ct_ipv4_table = ip_ct_ipv4_table;
++		ve_ip_ct_netfilter_table = ip_ct_netfilter_table;
++		ve_ip_ct_sysctl_table = ip_ct_sysctl_table;
++	} else {
++		/* allocate structures in ve_struct */
++		ALLOC_ENVCTL(ve_ip_ct_net_table, 2, out);
++		ALLOC_ENVCTL(ve_ip_ct_ipv4_table, 2, nomem_1);
++		ALLOC_ENVCTL(ve_ip_ct_netfilter_table, 3, nomem_2);
++		ALLOC_ENVCTL(ve_ip_ct_sysctl_table, 21, nomem_3);
++
++		memcpy(ve_ip_ct_net_table, ip_ct_net_table,
++				2*sizeof(ctl_table));
++		memcpy(ve_ip_ct_ipv4_table, ip_ct_ipv4_table,
++				2*sizeof(ctl_table));
++		memcpy(ve_ip_ct_netfilter_table, ip_ct_netfilter_table,
++				3*sizeof(ctl_table));
++		memcpy(ve_ip_ct_sysctl_table, ip_ct_sysctl_table,
++				21*sizeof(ctl_table));
++
++		ve_ip_ct_net_table[0].child = ve_ip_ct_ipv4_table;
++		ve_ip_ct_ipv4_table[0].child = ve_ip_ct_netfilter_table;
++		ve_ip_ct_netfilter_table[0].child = ve_ip_ct_sysctl_table;
++	}
++	ve_ip_ct_sysctl_table[0].data = &ve_ip_conntrack_max;
++	ve_ip_ct_netfilter_table[1].data = &ve_ip_conntrack_max;
++	ve_ip_ct_sysctl_table[1].data = &ve_ip_conntrack_count;
++	/* skip ve_ip_ct_sysctl_table[2].data as it is read-only and common
++	 * for all environments */
++	ve_ip_ct_tcp_timeouts[1] = ip_ct_tcp_timeout_syn_sent;
++	ve_ip_ct_sysctl_table[3].data = &ve_ip_ct_tcp_timeouts[1];
++	ve_ip_ct_tcp_timeouts[2] = ip_ct_tcp_timeout_syn_recv;
++	ve_ip_ct_sysctl_table[4].data = &ve_ip_ct_tcp_timeouts[2];
++	ve_ip_ct_tcp_timeouts[3] = ip_ct_tcp_timeout_established;
++	ve_ip_ct_sysctl_table[5].data = &ve_ip_ct_tcp_timeouts[3];
++	ve_ip_ct_tcp_timeouts[4] = ip_ct_tcp_timeout_fin_wait;
++	ve_ip_ct_sysctl_table[6].data = &ve_ip_ct_tcp_timeouts[4];
++	ve_ip_ct_tcp_timeouts[5] = ip_ct_tcp_timeout_close_wait;
++	ve_ip_ct_sysctl_table[7].data = &ve_ip_ct_tcp_timeouts[5];
++	ve_ip_ct_tcp_timeouts[6] = ip_ct_tcp_timeout_last_ack;
++	ve_ip_ct_sysctl_table[8].data = &ve_ip_ct_tcp_timeouts[6];
++	ve_ip_ct_tcp_timeouts[7] = ip_ct_tcp_timeout_time_wait;
++	ve_ip_ct_sysctl_table[9].data = &ve_ip_ct_tcp_timeouts[7];
++	ve_ip_ct_tcp_timeouts[8] = ip_ct_tcp_timeout_close;
++	ve_ip_ct_sysctl_table[10].data = &ve_ip_ct_tcp_timeouts[8];
++	ve_ip_ct_udp_timeout = ip_ct_udp_timeout;
++	ve_ip_ct_sysctl_table[11].data = &ve_ip_ct_udp_timeout;
++	ve_ip_ct_udp_timeout_stream = ip_ct_udp_timeout_stream;
++	ve_ip_ct_sysctl_table[12].data = &ve_ip_ct_udp_timeout_stream;
++	ve_ip_ct_icmp_timeout = ip_ct_icmp_timeout;
++	ve_ip_ct_sysctl_table[13].data = &ve_ip_ct_icmp_timeout;
++	ve_ip_ct_generic_timeout = ip_ct_generic_timeout;
++	ve_ip_ct_sysctl_table[14].data = &ve_ip_ct_generic_timeout;
++	ve_ip_ct_log_invalid = ip_ct_log_invalid;
++	ve_ip_ct_sysctl_table[15].data = &ve_ip_ct_log_invalid;
++	ve_ip_ct_tcp_timeout_max_retrans = ip_ct_tcp_timeout_max_retrans;
++	ve_ip_ct_sysctl_table[16].data = &ve_ip_ct_tcp_timeout_max_retrans;
++	ve_ip_ct_tcp_loose = ip_ct_tcp_loose;
++	ve_ip_ct_sysctl_table[17].data = &ve_ip_ct_tcp_loose;
++	ve_ip_ct_tcp_be_liberal = ip_ct_tcp_be_liberal;
++	ve_ip_ct_sysctl_table[18].data = &ve_ip_ct_tcp_be_liberal;
++	ve_ip_ct_tcp_max_retrans = ip_ct_tcp_max_retrans;
++	ve_ip_ct_sysctl_table[19].data = &ve_ip_ct_tcp_max_retrans;
++	for (i = 0; i < 20; i++)
++		ve_ip_ct_sysctl_table[i].owner_env = get_exec_env();
++	ve_ip_ct_netfilter_table[1].owner_env = get_exec_env();
++	return 0;
++
++nomem_3:
++	kfree(ve_ip_ct_netfilter_table);
++	ve_ip_ct_netfilter_table = NULL;
++nomem_2:
++	kfree(ve_ip_ct_ipv4_table);
++	ve_ip_ct_ipv4_table = NULL;
++nomem_1:
++	kfree(ve_ip_ct_net_table);
++	ve_ip_ct_net_table = NULL;
++out:
++	return ret;
++}
++#endif /*CONFIG_VE*/
+ #endif /* CONFIG_SYSCTL */
+ 
+ static int init_or_cleanup(int init)
+@@ -792,9 +941,16 @@ static int init_or_cleanup(int init)
+ 
+ 	if (!init) goto cleanup;
+ 
++	ret = -ENOENT;
++	if (!ve_is_super(get_exec_env()))
++		__module_get(THIS_MODULE);
++
+ 	ret = ip_conntrack_init();
+ 	if (ret < 0)
+-		goto cleanup_nothing;
++		goto cleanup_unget;
++
++	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
++		return 0;
+ 
+ #ifdef CONFIG_PROC_FS
+ 	ret = -ENOMEM;
+@@ -804,98 +960,115 @@ static int init_or_cleanup(int init)
+ 	proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
+ 					&exp_file_ops);
+ 	if (!proc_exp) goto cleanup_proc;
++	proc_exp->proc_fops = &exp_file_ops;
+ 
+-	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
+-	if (!proc_stat)
+-		goto cleanup_proc_exp;
++	if (ve_is_super(get_exec_env())) {
++		proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
++		if (!proc_stat)
++			goto cleanup_proc_exp;
+ 
+-	proc_stat->proc_fops = &ct_cpu_seq_fops;
+-	proc_stat->owner = THIS_MODULE;
++		proc_stat->proc_fops = &ct_cpu_seq_fops;
++		proc_stat->owner = THIS_MODULE;
++	}
+ #endif
+ 
+-	ret = nf_register_hook(&ip_conntrack_defrag_ops);
++	ret = virt_nf_register_hook(&ip_conntrack_defrag_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register pre-routing defrag hook.\n");
+ 		goto cleanup_proc_stat;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
++	ret = virt_nf_register_hook(&ip_conntrack_defrag_local_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register local_out defrag hook.\n");
+ 		goto cleanup_defragops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_in_ops);
++	ret = virt_nf_register_hook(&ip_conntrack_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register pre-routing hook.\n");
+ 		goto cleanup_defraglocalops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_local_out_ops);
++	ret = virt_nf_register_hook(&ip_conntrack_local_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register local out hook.\n");
+ 		goto cleanup_inops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_helper_in_ops);
++	ret = virt_nf_register_hook(&ip_conntrack_helper_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register local in helper hook.\n");
+ 		goto cleanup_inandlocalops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_helper_out_ops);
++	ret = virt_nf_register_hook(&ip_conntrack_helper_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register postrouting helper hook.\n");
+ 		goto cleanup_helperinops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_out_ops);
++	ret = virt_nf_register_hook(&ip_conntrack_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register post-routing hook.\n");
+ 		goto cleanup_helperoutops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_local_in_ops);
++	ret = virt_nf_register_hook(&ip_conntrack_local_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register local in hook.\n");
+ 		goto cleanup_inoutandlocalops;
+ 	}
+ #ifdef CONFIG_SYSCTL
+-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
+-	if (ip_ct_sysctl_header == NULL) {
++#ifdef CONFIG_VE_IPTABLES
++	ret = ip_conntrack_sysctl_init();
++	if (ret < 0)
++		goto cleanup_sysctl;
++#endif
++	ret = -ENOMEM;
++	ve_ip_ct_sysctl_header = register_sysctl_table(ve_ip_ct_net_table, 0);
++	if (ve_ip_ct_sysctl_header == NULL) {
+ 		printk("ip_conntrack: can't register to sysctl.\n");
+-		ret = -ENOMEM;
+-		goto cleanup_localinops;
++		goto cleanup_sysctl2;
+ 	}
+ #endif
+ 
+-	return ret;
++	return 0;
+ 
+  cleanup:
++	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
++		goto cleanup_init;
+ 	synchronize_net();
+ #ifdef CONFIG_SYSCTL
+- 	unregister_sysctl_table(ip_ct_sysctl_header);
+- cleanup_localinops:
++ 	unregister_sysctl_table(ve_ip_ct_sysctl_header);
++ cleanup_sysctl2:
++#ifdef CONFIG_VE_IPTABLES
++	ip_conntrack_sysctl_cleanup();
++ cleanup_sysctl:
++#endif
+ #endif
+-	nf_unregister_hook(&ip_conntrack_local_in_ops);
++	virt_nf_unregister_hook(&ip_conntrack_local_in_ops);
+  cleanup_inoutandlocalops:
+-	nf_unregister_hook(&ip_conntrack_out_ops);
++	virt_nf_unregister_hook(&ip_conntrack_out_ops);
+  cleanup_helperoutops:
+-	nf_unregister_hook(&ip_conntrack_helper_out_ops);
++	virt_nf_unregister_hook(&ip_conntrack_helper_out_ops);
+  cleanup_helperinops:
+-	nf_unregister_hook(&ip_conntrack_helper_in_ops);
++	virt_nf_unregister_hook(&ip_conntrack_helper_in_ops);
+  cleanup_inandlocalops:
+-	nf_unregister_hook(&ip_conntrack_local_out_ops);
++	virt_nf_unregister_hook(&ip_conntrack_local_out_ops);
+  cleanup_inops:
+-	nf_unregister_hook(&ip_conntrack_in_ops);
++	virt_nf_unregister_hook(&ip_conntrack_in_ops);
+  cleanup_defraglocalops:
+-	nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
++	virt_nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
+  cleanup_defragops:
+-	nf_unregister_hook(&ip_conntrack_defrag_ops);
++	virt_nf_unregister_hook(&ip_conntrack_defrag_ops);
+  cleanup_proc_stat:
+ #ifdef CONFIG_PROC_FS
+-	remove_proc_entry("ip_conntrack", proc_net_stat);
++	if (ve_is_super(get_exec_env()))
++		remove_proc_entry("ip_conntrack", proc_net_stat);
+  cleanup_proc_exp:
+ 	proc_net_remove("ip_conntrack_expect");
+  cleanup_proc:
+ 	proc_net_remove("ip_conntrack");
+- cleanup_init:
+ #endif /* CONFIG_PROC_FS */
++ cleanup_init:
+ 	ip_conntrack_cleanup();
+- cleanup_nothing:
++ cleanup_unget:
++	if (!ve_is_super(get_exec_env()))
++		module_put(THIS_MODULE);
+ 	return ret;
+ }
+ 
+@@ -906,11 +1079,11 @@ int ip_conntrack_protocol_register(struc
+ 	int ret = 0;
+ 
+ 	write_lock_bh(&ip_conntrack_lock);
+-	if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
++	if (ve_ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
+ 		ret = -EBUSY;
+ 		goto out;
+ 	}
+-	ip_ct_protos[proto->proto] = proto;
++	ve_ip_ct_protos[proto->proto] = proto;
+  out:
+ 	write_unlock_bh(&ip_conntrack_lock);
+ 	return ret;
+@@ -919,7 +1092,7 @@ int ip_conntrack_protocol_register(struc
+ void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
+ {
+ 	write_lock_bh(&ip_conntrack_lock);
+-	ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
++	ve_ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
+ 	write_unlock_bh(&ip_conntrack_lock);
+ 	
+ 	/* Somebody could be still looking at the proto in bh. */
+@@ -929,17 +1102,39 @@ void ip_conntrack_protocol_unregister(st
+ 	ip_ct_iterate_cleanup(kill_proto, &proto->proto);
+ }
+ 
+-static int __init init(void)
++int init_iptable_conntrack(void)
+ {
+ 	return init_or_cleanup(1);
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_conntrack(void)
+ {
+ 	init_or_cleanup(0);
+ }
+ 
+-module_init(init);
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_conntrack();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_conntrack);
++	KSYMRESOLVE(fini_iptable_conntrack);
++	KSYMMODRESOLVE(ip_conntrack);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ip_conntrack);
++	KSYMUNRESOLVE(init_iptable_conntrack);
++	KSYMUNRESOLVE(fini_iptable_conntrack);
++	fini_iptable_conntrack();
++}
++
++subsys_initcall(init);
+ module_exit(fini);
+ 
+ /* Some modules need us, but don't depend directly on any symbol.
+@@ -956,15 +1151,20 @@ EXPORT_SYMBOL_GPL(ip_conntrack_unregiste
+ EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
+ EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
+ #endif
++EXPORT_SYMBOL(ip_conntrack_disable_ve0);
+ EXPORT_SYMBOL(ip_conntrack_protocol_register);
+ EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
+ EXPORT_SYMBOL(ip_ct_get_tuple);
+ EXPORT_SYMBOL(invert_tuplepr);
+ EXPORT_SYMBOL(ip_conntrack_alter_reply);
++#ifndef CONFIG_VE_IPTABLES
+ EXPORT_SYMBOL(ip_conntrack_destroyed);
++#endif
+ EXPORT_SYMBOL(need_conntrack);
+ EXPORT_SYMBOL(ip_conntrack_helper_register);
+ EXPORT_SYMBOL(ip_conntrack_helper_unregister);
++EXPORT_SYMBOL(virt_ip_conntrack_helper_register);
++EXPORT_SYMBOL(virt_ip_conntrack_helper_unregister);
+ EXPORT_SYMBOL(ip_ct_iterate_cleanup);
+ EXPORT_SYMBOL(__ip_ct_refresh_acct);
+ 
+@@ -974,14 +1174,18 @@ EXPORT_SYMBOL_GPL(__ip_conntrack_expect_
+ EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
+ EXPORT_SYMBOL(ip_conntrack_expect_related);
+ EXPORT_SYMBOL(ip_conntrack_unexpect_related);
++#ifndef CONFIG_VE_IPTABLES
+ EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
++#endif
+ EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
+ 
+ EXPORT_SYMBOL(ip_conntrack_tuple_taken);
+ EXPORT_SYMBOL(ip_ct_gather_frags);
+ EXPORT_SYMBOL(ip_conntrack_htable_size);
+ EXPORT_SYMBOL(ip_conntrack_lock);
++#ifndef CONFIG_VE_IPTABLES
+ EXPORT_SYMBOL(ip_conntrack_hash);
++#endif
+ EXPORT_SYMBOL(ip_conntrack_untracked);
+ EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
+ #ifdef CONFIG_IP_NF_NAT_NEEDED
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_core.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_core.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_core.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_core.c	2006-07-04 14:41:39.000000000 +0400
+@@ -21,6 +21,8 @@
+ #include <linux/icmp.h>
+ #include <linux/udp.h>
+ #include <linux/jhash.h>
++#include <linux/nfcalls.h>
++#include <ub/ub_mem.h>
+ 
+ #define ASSERT_READ_LOCK(x)
+ #define ASSERT_WRITE_LOCK(x)
+@@ -46,15 +48,24 @@ DEFINE_RWLOCK(ip_nat_lock);
+ /* Calculated at init based on memory size */
+ static unsigned int ip_nat_htable_size;
+ 
+-static struct list_head *bysource;
+-
+ #define MAX_IP_NAT_PROTO 256
++
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_bysource	\
++	(get_exec_env()->_ip_conntrack->_ip_nat_bysource)
++#define ve_ip_nat_protos	\
++	(get_exec_env()->_ip_conntrack->_ip_nat_protos)
++#else
++static struct list_head *bysource;
++#define ve_ip_nat_bysource	bysource
+ static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
++#define ve_ip_nat_protos	ip_nat_protos
++#endif
+ 
+ static inline struct ip_nat_protocol *
+ __ip_nat_proto_find(u_int8_t protonum)
+ {
+-	return ip_nat_protos[protonum];
++	return ve_ip_nat_protos[protonum];
+ }
+ 
+ struct ip_nat_protocol *
+@@ -177,7 +188,7 @@ find_appropriate_src(const struct ip_con
+ 	struct ip_conntrack *ct;
+ 
+ 	read_lock_bh(&ip_nat_lock);
+-	list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
++	list_for_each_entry(ct, &ve_ip_nat_bysource[h], nat.info.bysource) {
+ 		if (same_src(ct, tuple)) {
+ 			/* Copy source part from reply tuple. */
+ 			invert_tuplepr(result,
+@@ -291,13 +302,22 @@ get_unique_tuple(struct ip_conntrack_tup
+ 	ip_nat_proto_put(proto);
+ }
+ 
++void ip_nat_hash_conntrack(struct ip_conntrack *conntrack)
++{
++	unsigned int srchash
++		= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
++	write_lock_bh(&ip_nat_lock);
++	list_add(&conntrack->nat.info.bysource, &ve_ip_nat_bysource[srchash]);
++	write_unlock_bh(&ip_nat_lock);
++}
++EXPORT_SYMBOL_GPL(ip_nat_hash_conntrack);
++
+ unsigned int
+ ip_nat_setup_info(struct ip_conntrack *conntrack,
+ 		  const struct ip_nat_range *range,
+ 		  unsigned int hooknum)
+ {
+ 	struct ip_conntrack_tuple curr_tuple, new_tuple;
+-	struct ip_nat_info *info = &conntrack->nat.info;
+ 	int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
+ 	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ 
+@@ -332,14 +352,8 @@ ip_nat_setup_info(struct ip_conntrack *c
+ 	}
+ 
+ 	/* Place in source hash if this is the first time. */
+-	if (have_to_hash) {
+-		unsigned int srchash
+-			= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+-				      .tuple);
+-		write_lock_bh(&ip_nat_lock);
+-		list_add(&info->bysource, &bysource[srchash]);
+-		write_unlock_bh(&ip_nat_lock);
+-	}
++	if (have_to_hash)
++		ip_nat_hash_conntrack(conntrack);
+ 
+ 	/* It's done. */
+ 	if (maniptype == IP_NAT_MANIP_DST)
+@@ -521,11 +535,11 @@ int ip_nat_protocol_register(struct ip_n
+ 	int ret = 0;
+ 
+ 	write_lock_bh(&ip_nat_lock);
+-	if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
++	if (ve_ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
+ 		ret = -EBUSY;
+ 		goto out;
+ 	}
+-	ip_nat_protos[proto->protonum] = proto;
++	ve_ip_nat_protos[proto->protonum] = proto;
+  out:
+ 	write_unlock_bh(&ip_nat_lock);
+ 	return ret;
+@@ -536,7 +550,7 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
+ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
+ {
+ 	write_lock_bh(&ip_nat_lock);
+-	ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
++	ve_ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
+ 	write_unlock_bh(&ip_nat_lock);
+ 
+ 	/* Someone could be still looking at the proto in a bh. */
+@@ -589,38 +603,55 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_
+ EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
+ #endif
+ 
+-static int __init ip_nat_init(void)
++static int ip_nat_init(void)
+ {
+ 	size_t i;
++	int ret;
+ 
+-	/* Leave them the same for the moment. */
+-	ip_nat_htable_size = ip_conntrack_htable_size;
++	if (ve_is_super(get_exec_env()))
++		ip_nat_htable_size = ip_conntrack_htable_size;
+ 
+ 	/* One vmalloc for both hash tables */
+-	bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
+-	if (!bysource)
+-		return -ENOMEM;
++	ret = -ENOMEM;
++	ve_ip_nat_bysource =
++		ub_vmalloc(sizeof(struct list_head)*ip_nat_htable_size*2);
++	if (!ve_ip_nat_bysource)
++		goto nomem;
++
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip_nat_protos =
++		ub_kmalloc(sizeof(void *)*MAX_IP_NAT_PROTO, GFP_KERNEL);
++	if (!ve_ip_nat_protos)
++		goto nomem2;
++#endif
+ 
+ 	/* Sew in builtin protocols. */
+ 	write_lock_bh(&ip_nat_lock);
+ 	for (i = 0; i < MAX_IP_NAT_PROTO; i++)
+-		ip_nat_protos[i] = &ip_nat_unknown_protocol;
+-	ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
+-	ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
+-	ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
++		ve_ip_nat_protos[i] = &ip_nat_unknown_protocol;
++	ve_ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
++	ve_ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
++	ve_ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
+ 	write_unlock_bh(&ip_nat_lock);
+ 
+ 	for (i = 0; i < ip_nat_htable_size; i++) {
+-		INIT_LIST_HEAD(&bysource[i]);
++		INIT_LIST_HEAD(&ve_ip_nat_bysource[i]);
+ 	}
+ 
+ 	/* FIXME: Man, this is a hack.  <SIGH> */
+ 	IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
+-	ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
++	ve_ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+ 
+-	/* Initialize fake conntrack so that NAT will skip it */
+-	ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
++	if (ve_is_super(get_exec_env()))
++		/* Initialize fake conntrack so that NAT will skip it */
++		ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+ 	return 0;
++#ifdef CONFIG_VE_IPTABLES
++nomem2:
++#endif
++	vfree(ve_ip_nat_bysource);
++nomem:
++	return ret;
+ }
+ 
+ /* Clear NAT section of all conntracks, in case we're loaded again. */
+@@ -631,14 +662,41 @@ static int clean_nat(struct ip_conntrack
+ 	return 0;
+ }
+ 
+-static void __exit ip_nat_cleanup(void)
++static void ip_nat_cleanup(void)
+ {
+ 	ip_ct_iterate_cleanup(&clean_nat, NULL);
+-	ip_conntrack_destroyed = NULL;
+-	vfree(bysource);
++	ve_ip_conntrack_destroyed = NULL;
++	vfree(ve_ip_nat_bysource);
++	ve_ip_nat_bysource = NULL;
++#ifdef CONFIG_VE_IPTABLES
++	kfree(ve_ip_nat_protos);
++	ve_ip_nat_protos = NULL;
++#endif
++}
++
++static int __init init(void)
++{
++	int err;
++
++	err = ip_nat_init();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(ip_nat_init);
++	KSYMRESOLVE(ip_nat_cleanup);
++	KSYMMODRESOLVE(ip_nat);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ip_nat);
++	KSYMUNRESOLVE(ip_nat_cleanup);
++	KSYMUNRESOLVE(ip_nat_init);
++	ip_nat_cleanup();
+ }
+ 
+ MODULE_LICENSE("GPL");
+ 
+-module_init(ip_nat_init);
+-module_exit(ip_nat_cleanup);
++fs_initcall(init);
++module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_ftp.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_ftp.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_ftp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_ftp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -19,6 +19,7 @@
+ #include <linux/netfilter_ipv4/ip_nat_rule.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+@@ -154,18 +155,43 @@ static unsigned int ip_nat_ftp(struct sk
+ 	return NF_ACCEPT;
+ }
+ 
+-static void __exit fini(void)
++#ifdef CONFIG_VE_IPTABLES
++#undef ve_ip_nat_ftp_hook
++#define ve_ip_nat_ftp_hook \
++		(get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook)
++#endif
++int init_iptable_nat_ftp(void)
+ {
+-	ip_nat_ftp_hook = NULL;
++	BUG_ON(ve_ip_nat_ftp_hook);
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip_nat_ftp_hook = (ip_nat_helper_func)ip_nat_ftp;
++#else
++	ve_ip_nat_ftp_hook = ip_nat_ftp;
++#endif
++	return 0;
++}
++
++void fini_iptable_nat_ftp(void)
++{
++	ve_ip_nat_ftp_hook = NULL;
+ 	/* Make sure noone calls it, meanwhile. */
+ 	synchronize_net();
+ }
+ 
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ip_nat_ftp);
++	KSYMUNRESOLVE(init_iptable_nat_ftp);
++	KSYMUNRESOLVE(fini_iptable_nat_ftp);
++	fini_iptable_nat_ftp();
++}
++
+ static int __init init(void)
+ {
+-	BUG_ON(ip_nat_ftp_hook);
+-	ip_nat_ftp_hook = ip_nat_ftp;
+-	return 0;
++	KSYMRESOLVE(init_iptable_nat_ftp);
++	KSYMRESOLVE(fini_iptable_nat_ftp);
++	KSYMMODRESOLVE(ip_nat_ftp);
++	return init_iptable_nat_ftp();
+ }
+ 
+ /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_irc.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_irc.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_irc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_irc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -23,6 +23,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/moduleparam.h>
++#include <linux/nfcalls.h>
+ 
+ #if 0
+ #define DEBUGP printk
+@@ -96,18 +97,44 @@ static unsigned int help(struct sk_buff 
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++#ifdef CONFIG_VE_IPTABLES
++#undef ve_ip_nat_irc_hook
++#define ve_ip_nat_irc_hook \
++		(get_exec_env()->_ip_conntrack->_ip_nat_irc_hook)
++#endif
++
++int init_iptable_nat_irc(void)
++{
++	BUG_ON(ve_ip_nat_irc_hook);
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip_nat_irc_hook = (ip_nat_helper_func)help;
++#else
++	ve_ip_nat_irc_hook = help;
++#endif
++	return 0;
++}
++
++void fini_iptable_nat_irc(void)
+ {
+-	ip_nat_irc_hook = NULL;
++	ve_ip_nat_irc_hook = NULL;
+ 	/* Make sure noone calls it, meanwhile. */
+ 	synchronize_net();
+ }
+ 
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ip_nat_irc);
++	KSYMUNRESOLVE(init_iptable_nat_irc);
++	KSYMUNRESOLVE(fini_iptable_nat_irc);
++	fini_iptable_nat_irc();
++}
++
+ static int __init init(void)
+ {
+-	BUG_ON(ip_nat_irc_hook);
+-	ip_nat_irc_hook = help;
+-	return 0;
++	KSYMRESOLVE(init_iptable_nat_irc);
++	KSYMRESOLVE(fini_iptable_nat_irc);
++	KSYMMODRESOLVE(ip_nat_irc);
++	return init_iptable_nat_irc();
+ }
+ 
+ /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_rule.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_rule.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_rule.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_rule.c	2006-07-04 14:41:39.000000000 +0400
+@@ -34,6 +34,13 @@
+ #define DEBUGP(format, args...)
+ #endif
+ 
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_table		\
++	(get_exec_env()->_ip_conntrack->_ip_nat_table)
++#else
++#define ve_ip_nat_table		&nat_table
++#endif
++
+ #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
+ 
+ static struct
+@@ -41,7 +48,7 @@ static struct
+ 	struct ipt_replace repl;
+ 	struct ipt_standard entries[3];
+ 	struct ipt_error term;
+-} nat_initial_table __initdata
++} nat_initial_table
+ = { { "nat", NAT_VALID_HOOKS, 4,
+       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+       { [NF_IP_PRE_ROUTING] = 0,
+@@ -235,6 +242,93 @@ static int ipt_dnat_checkentry(const cha
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *target, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_target *pt;
++	struct ip_nat_multi_range_compat *pinfo;
++	struct compat_ip_nat_multi_range info;
++	u_int16_t tsize;
++
++	pt = (struct ipt_entry_target *)target;
++	tsize = pt->u.user.target_size;
++	if (__copy_to_user(*dstptr, pt, sizeof(struct ipt_entry_target)))
++		return -EFAULT;
++	pinfo = (struct ip_nat_multi_range_compat *)pt->data;
++	memset(&info, 0, sizeof(struct compat_ip_nat_multi_range));
++	info.rangesize = pinfo->rangesize;
++	info.range[0].flags = pinfo->range[0].flags;
++	info.range[0].min_ip = pinfo->range[0].min_ip;
++	info.range[0].max_ip = pinfo->range[0].max_ip;
++	info.range[0].min = pinfo->range[0].min;
++	info.range[0].max = pinfo->range[0].max;
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_target),
++				&info, sizeof(struct compat_ip_nat_multi_range)))
++		return -EFAULT;
++	tsize -= off;
++	if (put_user(tsize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += tsize;
++	return 0;
++}
++
++static int compat_from_user(void *target, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_target *pt;
++	struct ipt_entry_target *dstpt;
++	struct compat_ip_nat_multi_range *pinfo;
++	struct ip_nat_multi_range_compat info;
++	u_int16_t tsize;
++
++	pt = (struct compat_ipt_entry_target *)target;
++	dstpt = (struct ipt_entry_target *)*dstptr;
++	tsize = pt->u.user.target_size;
++	memcpy(*dstptr, pt, sizeof(struct compat_ipt_entry_target));
++	pinfo = (struct compat_ip_nat_multi_range *)pt->data;
++	memset(&info, 0, sizeof(struct ip_nat_multi_range_compat));
++	info.rangesize = pinfo->rangesize;
++	info.range[0].flags = pinfo->range[0].flags;
++	info.range[0].min_ip = pinfo->range[0].min_ip;
++	info.range[0].max_ip = pinfo->range[0].max_ip;
++	info.range[0].min = pinfo->range[0].min;
++	info.range[0].max = pinfo->range[0].max;
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_target),
++			&info, sizeof(struct ip_nat_multi_range_compat));
++	tsize += off;
++	dstpt->u.user.target_size = tsize;
++	*size += off;
++	*dstptr += tsize;
++	return 0;
++}
++
++static int compat(void *target, void **dstptr, int *size, int convert)
++{
++	int ret, off;
++
++	off = IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat)) -
++		COMPAT_IPT_ALIGN(sizeof(struct compat_ip_nat_multi_range));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = compat_to_user(target, dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = compat_from_user(target, dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ inline unsigned int
+ alloc_null_binding(struct ip_conntrack *conntrack,
+ 		   struct ip_nat_info *info,
+@@ -286,7 +380,7 @@ int ip_nat_rule_find(struct sk_buff **ps
+ {
+ 	int ret;
+ 
+-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
++	ret = ipt_do_table(pskb, hooknum, in, out, ve_ip_nat_table, NULL);
+ 
+ 	if (ret == NF_ACCEPT) {
+ 		if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
+@@ -300,21 +394,33 @@ static struct ipt_target ipt_snat_reg = 
+ 	.name		= "SNAT",
+ 	.target		= ipt_snat_target,
+ 	.checkentry	= ipt_snat_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ };
+ 
+ static struct ipt_target ipt_dnat_reg = {
+ 	.name		= "DNAT",
+ 	.target		= ipt_dnat_target,
+ 	.checkentry	= ipt_dnat_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ };
+ 
+-int __init ip_nat_rule_init(void)
++int ip_nat_rule_init(void)
+ {
+ 	int ret;
++	struct ipt_table *tmp_table;
++
++	tmp_table = ipt_register_table(&nat_table,
++			&nat_initial_table.repl);
++	if (IS_ERR(tmp_table))
++		return PTR_ERR(tmp_table);
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip_nat_table = tmp_table;
++#endif
+ 
+-	ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
+-	if (ret != 0)
+-		return ret;
+ 	ret = ipt_register_target(&ipt_snat_reg);
+ 	if (ret != 0)
+ 		goto unregister_table;
+@@ -328,7 +434,10 @@ int __init ip_nat_rule_init(void)
+  unregister_snat:
+ 	ipt_unregister_target(&ipt_snat_reg);
+  unregister_table:
+-	ipt_unregister_table(&nat_table);
++	ipt_unregister_table(ve_ip_nat_table);
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip_nat_table = NULL;
++#endif
+ 
+ 	return ret;
+ }
+@@ -337,5 +446,8 @@ void ip_nat_rule_cleanup(void)
+ {
+ 	ipt_unregister_target(&ipt_dnat_reg);
+ 	ipt_unregister_target(&ipt_snat_reg);
+-	ipt_unregister_table(&nat_table);
++	ipt_unregister_table(ve_ip_nat_table);
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip_nat_table = NULL;
++#endif
+ }
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_snmp_basic.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_snmp_basic.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_snmp_basic.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_snmp_basic.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1000,12 +1000,12 @@ static unsigned char snmp_trap_decode(st
+ 		
+ 	return 1;
+ 
++err_addr_free:
++	kfree((unsigned long *)trap->ip_address);
++
+ err_id_free:
+ 	kfree(trap->id);
+ 
+-err_addr_free:
+-	kfree((unsigned long *)trap->ip_address);
+-	
+ 	return 0;
+ }
+ 
+@@ -1123,11 +1123,10 @@ static int snmp_parse_mangle(unsigned ch
+ 		struct snmp_v1_trap trap;
+ 		unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
+ 		
+-		/* Discard trap allocations regardless */
+-		kfree(trap.id);
+-		kfree((unsigned long *)trap.ip_address);
+-		
+-		if (!ret)
++		if (ret) {
++			kfree(trap.id);
++			kfree((unsigned long *)trap.ip_address);
++		} else 
+ 			return ret;
+ 		
+ 	} else {
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_standalone.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_standalone.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_standalone.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_nat_standalone.c	2006-07-04 14:41:39.000000000 +0400
+@@ -30,6 +30,7 @@
+ #include <net/ip.h>
+ #include <net/checksum.h>
+ #include <linux/spinlock.h>
++#include <linux/nfcalls.h>
+ 
+ #define ASSERT_READ_LOCK(x)
+ #define ASSERT_WRITE_LOCK(x)
+@@ -358,45 +359,45 @@ static int init_or_cleanup(int init)
+ {
+ 	int ret = 0;
+ 
+-	need_conntrack();
+-
+ 	if (!init) goto cleanup;
+ 
+-#ifdef CONFIG_XFRM
+-	BUG_ON(ip_nat_decode_session != NULL);
+-	ip_nat_decode_session = nat_decode_session;
+-#endif
++	if (!ve_is_super(get_exec_env()))
++		__module_get(THIS_MODULE);
++
+ 	ret = ip_nat_rule_init();
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't setup rules.\n");
+-		goto cleanup_decode_session;
++ 		goto cleanup_modput;
+ 	}
+-	ret = nf_register_hook(&ip_nat_in_ops);
++	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
++		return 0;
++
++	ret = virt_nf_register_hook(&ip_nat_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register in hook.\n");
+ 		goto cleanup_rule_init;
+ 	}
+-	ret = nf_register_hook(&ip_nat_out_ops);
++	ret = virt_nf_register_hook(&ip_nat_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register out hook.\n");
+ 		goto cleanup_inops;
+ 	}
+-	ret = nf_register_hook(&ip_nat_adjust_in_ops);
++	ret = virt_nf_register_hook(&ip_nat_adjust_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register adjust in hook.\n");
+ 		goto cleanup_outops;
+ 	}
+-	ret = nf_register_hook(&ip_nat_adjust_out_ops);
++	ret = virt_nf_register_hook(&ip_nat_adjust_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register adjust out hook.\n");
+ 		goto cleanup_adjustin_ops;
+ 	}
+-	ret = nf_register_hook(&ip_nat_local_out_ops);
++	ret = virt_nf_register_hook(&ip_nat_local_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register local out hook.\n");
+ 		goto cleanup_adjustout_ops;;
+ 	}
+-	ret = nf_register_hook(&ip_nat_local_in_ops);
++	ret = virt_nf_register_hook(&ip_nat_local_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register local in hook.\n");
+ 		goto cleanup_localoutops;
+@@ -404,38 +405,76 @@ static int init_or_cleanup(int init)
+ 	return ret;
+ 
+  cleanup:
+-	nf_unregister_hook(&ip_nat_local_in_ops);
++	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
++		goto cleanup_rule_init;
++	virt_nf_unregister_hook(&ip_nat_local_in_ops);
+  cleanup_localoutops:
+-	nf_unregister_hook(&ip_nat_local_out_ops);
++	virt_nf_unregister_hook(&ip_nat_local_out_ops);
+  cleanup_adjustout_ops:
+-	nf_unregister_hook(&ip_nat_adjust_out_ops);
++	virt_nf_unregister_hook(&ip_nat_adjust_out_ops);
+  cleanup_adjustin_ops:
+-	nf_unregister_hook(&ip_nat_adjust_in_ops);
++	virt_nf_unregister_hook(&ip_nat_adjust_in_ops);
+  cleanup_outops:
+-	nf_unregister_hook(&ip_nat_out_ops);
++	virt_nf_unregister_hook(&ip_nat_out_ops);
+  cleanup_inops:
+-	nf_unregister_hook(&ip_nat_in_ops);
++	virt_nf_unregister_hook(&ip_nat_in_ops);
+  cleanup_rule_init:
+ 	ip_nat_rule_cleanup();
+- cleanup_decode_session:
+-#ifdef CONFIG_XFRM
+-	ip_nat_decode_session = NULL;
+-	synchronize_net();
+-#endif
++ cleanup_modput:
++	if (!ve_is_super(get_exec_env()))
++		module_put(THIS_MODULE);
+ 	return ret;
+ }
+ 
+-static int __init init(void)
++int init_iptable_nat(void)
+ {
+ 	return init_or_cleanup(1);
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_nat(void)
+ {
+ 	init_or_cleanup(0);
+ }
+ 
+-module_init(init);
++static int __init init(void)
++{
++	int err;
++
++	need_conntrack();
++
++#ifdef CONFIG_XFRM
++	BUG_ON(ip_nat_decode_session != NULL);
++	ip_nat_decode_session = nat_decode_session;
++#endif
++
++	err = init_iptable_nat();
++	if (err < 0) {
++#ifdef CONFIG_XFRM
++		ip_nat_decode_session = NULL;
++		synchronize_net();
++#endif
++		return err;
++	}
++
++	KSYMRESOLVE(init_iptable_nat);
++	KSYMRESOLVE(fini_iptable_nat);
++	KSYMMODRESOLVE(iptable_nat);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(iptable_nat);
++	KSYMUNRESOLVE(init_iptable_nat);
++	KSYMUNRESOLVE(fini_iptable_nat);
++	fini_iptable_nat();
++#ifdef CONFIG_XFRM
++	ip_nat_decode_session = NULL;
++	synchronize_net();
++#endif
++}
++
++fs_initcall(init);
+ module_exit(fini);
+ 
+ MODULE_LICENSE("GPL");
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_queue.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_queue.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_queue.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_queue.c	2006-07-04 14:41:39.000000000 +0400
+@@ -542,8 +542,17 @@ ipq_rcv_sk(struct sock *sk, int len)
+ 	down(&ipqnl_sem);
+ 			
+ 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
++#ifdef CONFIG_VE
++		struct ve_struct *env;
++#endif
+ 		skb = skb_dequeue(&sk->sk_receive_queue);
++#ifdef CONFIG_VE
++		env = set_exec_env(VE_OWNER_SKB(skb));
+ 		ipq_rcv_skb(skb);
++		(void)set_exec_env(env);
++#else
++		ipq_rcv_skb(skb);
++#endif
+ 		kfree_skb(skb);
+ 	}
+ 		
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_tables.c linux-2.6.16-026test015/net/ipv4/netfilter/ip_tables.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ip_tables.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ip_tables.c	2006-07-04 14:41:39.000000000 +0400
+@@ -24,14 +24,17 @@
+ #include <linux/module.h>
+ #include <linux/icmp.h>
+ #include <net/ip.h>
++#include <net/compat.h>
+ #include <asm/uaccess.h>
+ #include <asm/semaphore.h>
+ #include <linux/proc_fs.h>
+ #include <linux/err.h>
+ #include <linux/cpumask.h>
++#include <ub/ub_mem.h>
+ 
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+@@ -70,6 +73,14 @@ do {								\
+ #define inline
+ #endif
+ 
++#ifdef CONFIG_VE_IPTABLES
++/* include ve.h and define get_exec_env */
++#include <linux/sched.h>
++#define ve_ipt_standard_target	(get_exec_env()->_ipt_standard_target)
++#else
++#define ve_ipt_standard_target	&ipt_standard_target
++#endif
++
+ /*
+    We keep a set of rules for each CPU, so we can avoid write-locking
+    them in the softirq when updating the counters and therefore
+@@ -480,7 +491,7 @@ standard_check(const struct ipt_entry_ta
+ 	if (t->u.target_size
+ 	    != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
+ 		duprintf("standard_check: target size %u != %u\n",
+-			 t->u.target_size,
++			 t->u.target_size, (unsigned int)
+ 			 IPT_ALIGN(sizeof(struct ipt_standard_target)));
+ 		return 0;
+ 	}
+@@ -565,7 +576,7 @@ check_entry(struct ipt_entry *e, const c
+ 	}
+ 	t->u.kernel.target = target;
+ 
+-	if (t->u.kernel.target == &ipt_standard_target) {
++	if (t->u.kernel.target == ve_ipt_standard_target) {
+ 		if (!standard_check(t, size)) {
+ 			ret = -EINVAL;
+ 			goto cleanup_matches;
+@@ -790,32 +801,45 @@ get_counters(const struct xt_table_info 
+ 	}
+ }
+ 
+-static int
+-copy_entries_to_user(unsigned int total_size,
+-		     struct ipt_table *table,
+-		     void __user *userptr)
++static inline struct xt_counters * alloc_counters(struct ipt_table *table)
+ {
+-	unsigned int off, num, countersize;
+-	struct ipt_entry *e;
++	unsigned int countersize;
+ 	struct xt_counters *counters;
+ 	struct xt_table_info *private = table->private;
+-	int ret = 0;
+-	void *loc_cpu_entry;
+ 
+ 	/* We need atomic snapshot of counters: rest doesn't change
+ 	   (other than comefrom, which userspace doesn't care
+ 	   about). */
+ 	countersize = sizeof(struct xt_counters) * private->number;
+-	counters = vmalloc_node(countersize, numa_node_id());
++	counters = ub_vmalloc_node(countersize, numa_node_id());
+ 
+ 	if (counters == NULL)
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 
+ 	/* First, sum counters... */
+ 	write_lock_bh(&table->lock);
+ 	get_counters(private, counters);
+ 	write_unlock_bh(&table->lock);
+ 
++	return counters;
++}
++
++static int
++copy_entries_to_user(unsigned int total_size,
++		     struct ipt_table *table,
++		     void __user *userptr)
++{
++	unsigned int off, num;
++	struct ipt_entry *e;
++	struct xt_counters *counters;
++	struct xt_table_info *private = table->private;
++	int ret = 0;
++	void *loc_cpu_entry;
++
++	counters = alloc_counters(table);
++	if (IS_ERR(counters))
++		return PTR_ERR(counters);
++
+ 	/* choose the copy that is on our node/cpu, ...
+ 	 * This choice is lazy (because current thread is
+ 	 * allowed to migrate to another cpu)
+@@ -875,25 +899,391 @@ copy_entries_to_user(unsigned int total_
+ 	return ret;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static DECLARE_MUTEX(compat_ipt_mutex);
++
++struct compat_delta {
++	struct compat_delta *next;
++	u_int16_t offset;
++	short delta;
++};
++
++static struct compat_delta *compat_offsets = NULL;
++
++static int compat_add_offset(u_int16_t offset, short delta)
++{
++	struct compat_delta *tmp;
++
++	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
++	if (!tmp)
++		return -ENOMEM;
++	tmp->offset = offset;
++	tmp->delta = delta;
++	if (compat_offsets) {
++		tmp->next = compat_offsets->next;
++		compat_offsets->next = tmp;
++	} else {
++		compat_offsets = tmp;
++		tmp->next = NULL;
++	}
++	return 0;
++}
++
++static void compat_flush_offsets(void)
++{
++	struct compat_delta *tmp, *next;
++
++	if (compat_offsets) {
++		for(tmp = compat_offsets; tmp; tmp = next) {
++			next = tmp->next;
++			kfree(tmp);
++		}
++		compat_offsets = NULL;
++	}
++}
++
++static short compat_calc_jump(u_int16_t offset)
++{
++	struct compat_delta *tmp;
++	short delta;
++
++	for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
++		if (tmp->offset < offset)
++			delta += tmp->delta;
++	return delta;
++}
++
++struct compat_ipt_standard_target
++{
++	struct compat_ipt_entry_target target;
++	compat_int_t verdict;
++};
++
++#define IPT_ST_OFFSET	(sizeof(struct ipt_standard_target) - \
++				sizeof(struct compat_ipt_standard_target))
++
++struct compat_ipt_standard
++{
++	struct compat_ipt_entry entry;
++	struct compat_ipt_standard_target target;
++};
++
++static int compat_ipt_standard_fn(void *target,
++		void **dstptr, int *size, int convert)
++{
++	struct compat_ipt_standard_target compat_st, *pcompat_st;
++	struct ipt_standard_target st, *pst;
++	int ret;
++
++	ret = 0;
++	switch (convert) {
++		case COMPAT_TO_USER:
++			pst = (struct ipt_standard_target *)target;
++			memcpy(&compat_st.target, &pst->target,
++					sizeof(struct ipt_entry_target));
++			compat_st.verdict = pst->verdict;
++			if (compat_st.verdict > 0)
++				compat_st.verdict -=
++					compat_calc_jump(compat_st.verdict);
++			compat_st.target.u.user.target_size =
++			sizeof(struct compat_ipt_standard_target);
++			if (__copy_to_user(*dstptr, &compat_st,
++				sizeof(struct compat_ipt_standard_target)))
++				ret = -EFAULT;
++			*size -= IPT_ST_OFFSET;
++			*dstptr += sizeof(struct compat_ipt_standard_target);
++			break;
++		case COMPAT_FROM_USER:
++			pcompat_st =
++				(struct compat_ipt_standard_target *)target;
++			memcpy(&st.target, &pcompat_st->target,
++					sizeof(struct ipt_entry_target));
++			st.verdict = pcompat_st->verdict;
++			if (st.verdict > 0)
++				st.verdict += compat_calc_jump(st.verdict);
++			st.target.u.user.target_size =
++			sizeof(struct ipt_standard_target);
++			memcpy(*dstptr, &st,
++					sizeof(struct ipt_standard_target));
++			*size += IPT_ST_OFFSET;
++			*dstptr += sizeof(struct ipt_standard_target);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += IPT_ST_OFFSET;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++
++int ipt_target_align_compat(void *target, void **dstptr,
++		int *size, int off, int convert)
++{
++	struct compat_ipt_entry_target *pcompat;
++	struct ipt_entry_target *pt;
++	u_int16_t tsize;
++	int ret;
++
++	ret = 0;
++	switch (convert) {
++		case COMPAT_TO_USER:
++			pt = (struct ipt_entry_target *)target;
++			tsize = pt->u.user.target_size;
++			if (__copy_to_user(*dstptr, pt, tsize)) {
++				ret = -EFAULT;
++				break;
++			}
++			tsize -= off;
++			if (put_user(tsize, (u_int16_t *)*dstptr))
++				ret = -EFAULT;
++			*size -= off;
++			*dstptr += tsize;
++			break;
++		case COMPAT_FROM_USER:
++			pcompat = (struct compat_ipt_entry_target *)target;
++			pt = (struct ipt_entry_target *)*dstptr;
++			tsize = pcompat->u.user.target_size;
++			memcpy(pt, pcompat, tsize);
++			tsize += off;
++			pt->u.user.target_size = tsize;
++			*size += off;
++			*dstptr += tsize;
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++
++int ipt_match_align_compat(void *match, void **dstptr,
++		int *size, int off, int convert)
++{
++	struct compat_ipt_entry_match *pcompat_m;
++	struct ipt_entry_match *pm;
++	u_int16_t msize;
++	int ret;
++
++	ret = 0;
++	switch (convert) {
++		case COMPAT_TO_USER:
++			pm = (struct ipt_entry_match *)match;
++			msize = pm->u.user.match_size;
++			if (__copy_to_user(*dstptr, pm, msize)) {
++				ret = -EFAULT;
++				break;
++			}
++			msize -= off;
++			if (put_user(msize, (u_int16_t *)*dstptr))
++				ret = -EFAULT;
++			*size -= off;
++			*dstptr += msize;
++			break;
++		case COMPAT_FROM_USER:
++			pcompat_m = (struct compat_ipt_entry_match *)match;
++			pm = (struct ipt_entry_match *)*dstptr;
++			msize = pcompat_m->u.user.match_size;
++			memcpy(pm, pcompat_m, msize);
++			msize += off;
++			pm->u.user.match_size = msize;
++			*size += off;
++			*dstptr += msize;
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++
++static int icmp_compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_icmp)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_icmp));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++
++static inline int
++compat_calc_match(struct ipt_entry_match *m, int * size)
++{
++	if (m->u.kernel.match->compat)
++		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
++	return 0;
++}
++
++static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
++		void *base, struct xt_table_info *newinfo)
++{
++	struct ipt_entry_target *t;
++	u_int16_t entry_offset;
++	int off, i, ret;
++
++	off = 0;
++	entry_offset = (void *)e - base;
++	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
++	t = ipt_get_target(e);
++	if (t->u.kernel.target->compat)
++		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
++	newinfo->size -= off;
++	ret = compat_add_offset(entry_offset, off);
++	if (ret)
++		return ret;
++
++	for (i = 0; i< NF_IP_NUMHOOKS; i++) {
++		if (info->hook_entry[i] && (e < (struct ipt_entry *)
++				(base + info->hook_entry[i])))
++			newinfo->hook_entry[i] -= off;
++		if (info->underflow[i] && (e < (struct ipt_entry *)
++				(base + info->underflow[i])))
++			newinfo->underflow[i] -= off;
++	}
++	return 0;
++}
++
++static int compat_table_info(struct xt_table_info *info,
++		struct xt_table_info *newinfo)
++{
++	void *loc_cpu_entry;
++	int i;
++
++	if (!newinfo || !info)
++		return -EINVAL;
++
++	memset(newinfo, 0, sizeof(struct xt_table_info));
++	newinfo->size = info->size;
++	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
++		newinfo->hook_entry[i] = info->hook_entry[i];
++		newinfo->underflow[i] = info->underflow[i];
++	}
++	loc_cpu_entry = info->entries[raw_smp_processor_id()];
++	return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
++			compat_calc_entry, info, loc_cpu_entry, newinfo);
++}
++#endif
++
++static int get_info(void __user *user, int *len)
++{
++	char name[IPT_TABLE_MAXNAMELEN];
++	struct ipt_table *t;
++	int ret, size;
++
++#ifdef CONFIG_COMPAT
++	if (is_current_32bits())
++		size = sizeof(struct compat_ipt_getinfo);
++	else
++#endif
++		size = sizeof(struct ipt_getinfo);
++
++	if (*len != size) {
++		duprintf("length %u != %u\n", *len,
++			(unsigned int)sizeof(struct ipt_getinfo));
++		return -EINVAL;
++	}
++
++	if (copy_from_user(name, user, sizeof(name)) != 0)
++		return -EFAULT;
++
++	name[IPT_TABLE_MAXNAMELEN-1] = '\0';
++#ifdef CONFIG_COMPAT
++	down(&compat_ipt_mutex);
++#endif
++	t = try_then_request_module(xt_find_table_lock(AF_INET, name),
++			"iptable_%s", name);
++	if (t && !IS_ERR(t)) {
++		struct ipt_getinfo info;
++		struct xt_table_info *private = t->private;
++#ifdef CONFIG_COMPAT
++		struct compat_ipt_getinfo compat_info;
++#endif
++		void *pinfo;
++
++#ifdef CONFIG_COMPAT
++		if (is_current_32bits()) {
++			struct xt_table_info tmp;
++			ret = compat_table_info(private, &tmp);
++			compat_flush_offsets();
++			memcpy(compat_info.hook_entry, tmp.hook_entry,
++					sizeof(compat_info.hook_entry));
++			memcpy(compat_info.underflow, tmp.underflow,
++					sizeof(compat_info.underflow));
++			compat_info.valid_hooks = t->valid_hooks;
++			compat_info.num_entries = private->number;
++			compat_info.size = tmp.size;
++			strcpy(compat_info.name, name);
++			pinfo = (void *)&compat_info;
++		} else
++#endif
++		{
++			info.valid_hooks = t->valid_hooks;
++			memcpy(info.hook_entry, private->hook_entry,
++					sizeof(info.hook_entry));
++			memcpy(info.underflow, private->underflow,
++					sizeof(info.underflow));
++			info.num_entries = private->number;
++			info.size = private->size;
++			strcpy(info.name, name);
++			pinfo = (void *)&info;
++		}
++
++		if (copy_to_user(user, pinfo, *len) != 0)
++			ret = -EFAULT;
++		else
++			ret = 0;
++
++		xt_table_unlock(t);
++		module_put(t->me);
++	} else
++		ret = t ? PTR_ERR(t) : -ENOENT;
++#ifdef CONFIG_COMPAT
++		up(&compat_ipt_mutex);
++#endif
++	return ret;
++}
++
+ static int
+-get_entries(const struct ipt_get_entries *entries,
+-	    struct ipt_get_entries __user *uptr)
++get_entries(struct ipt_get_entries __user *uptr, int *len)
+ {
+ 	int ret;
++	struct ipt_get_entries get;
+ 	struct ipt_table *t;
+ 
+-	t = xt_find_table_lock(AF_INET, entries->name);
++	if (*len < sizeof(get)) {
++		duprintf("get_entries: %u < %d\n", *len,
++				(unsigned int)sizeof(get));
++		return -EINVAL;
++	}
++	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
++		return -EFAULT;
++	if (*len != sizeof(struct ipt_get_entries) + get.size) {
++		duprintf("get_entries: %u != %u\n", *len,
++				(unsigned int)(sizeof(struct ipt_get_entries) +
++				get.size));
++		return -EINVAL;
++	}
++
++	t = xt_find_table_lock(AF_INET, get.name);
+ 	if (t && !IS_ERR(t)) {
+ 		struct xt_table_info *private = t->private;
+ 		duprintf("t->private->number = %u\n",
+ 			 private->number);
+-		if (entries->size == private->size)
++		if (get.size == private->size)
+ 			ret = copy_entries_to_user(private->size,
+ 						   t, uptr->entrytable);
+ 		else {
+ 			duprintf("get_entries: I've got %u not %u!\n",
+ 				 private->size,
+-				 entries->size);
++				 get.size);
+ 			ret = -EINVAL;
+ 		}
+ 		module_put(t->me);
+@@ -905,71 +1295,39 @@ get_entries(const struct ipt_get_entries
+ }
+ 
+ static int
+-do_replace(void __user *user, unsigned int len)
++__do_replace(const char *name, unsigned int valid_hooks,
++		struct xt_table_info *newinfo, unsigned int num_counters,
++		void __user *counters_ptr)
+ {
+ 	int ret;
+-	struct ipt_replace tmp;
+ 	struct ipt_table *t;
+-	struct xt_table_info *newinfo, *oldinfo;
++	struct xt_table_info *oldinfo;
+ 	struct xt_counters *counters;
+-	void *loc_cpu_entry, *loc_cpu_old_entry;
+-
+-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+-		return -EFAULT;
+-
+-	/* Hack: Causes ipchains to give correct error msg --RR */
+-	if (len != sizeof(tmp) + tmp.size)
+-		return -ENOPROTOOPT;
+-
+-	/* overflow check */
+-	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
+-			SMP_CACHE_BYTES)
+-		return -ENOMEM;
+-	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+-		return -ENOMEM;
+-
+-	newinfo = xt_alloc_table_info(tmp.size);
+-	if (!newinfo)
+-		return -ENOMEM;
+-
+-	/* choose the copy that is our node/cpu */
+-	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+-	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+-			   tmp.size) != 0) {
+-		ret = -EFAULT;
+-		goto free_newinfo;
+-	}
++	void *loc_cpu_old_entry;
+ 
+-	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
++	ret = 0;
++	counters = ub_vmalloc_best(num_counters * sizeof(struct xt_counters));
+ 	if (!counters) {
+ 		ret = -ENOMEM;
+-		goto free_newinfo;
++		goto out;
+ 	}
+ 
+-	ret = translate_table(tmp.name, tmp.valid_hooks,
+-			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
+-			      tmp.hook_entry, tmp.underflow);
+-	if (ret != 0)
+-		goto free_newinfo_counters;
+-
+-	duprintf("ip_tables: Translated table\n");
+-
+-	t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
+-				    "iptable_%s", tmp.name);
++	t = try_then_request_module(xt_find_table_lock(AF_INET, name),
++				    "iptable_%s", name);
+ 	if (!t || IS_ERR(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+ 		goto free_newinfo_counters_untrans;
+ 	}
+ 
+ 	/* You lied! */
+-	if (tmp.valid_hooks != t->valid_hooks) {
++	if (valid_hooks != t->valid_hooks) {
+ 		duprintf("Valid hook crap: %08X vs %08X\n",
+-			 tmp.valid_hooks, t->valid_hooks);
++			 valid_hooks, t->valid_hooks);
+ 		ret = -EINVAL;
+ 		goto put_module;
+ 	}
+ 
+-	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
++	oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
+ 	if (!oldinfo)
+ 		goto put_module;
+ 
+@@ -989,8 +1347,8 @@ do_replace(void __user *user, unsigned i
+ 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ 	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+ 	xt_free_table_info(oldinfo);
+-	if (copy_to_user(tmp.counters, counters,
+-			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
++	if (copy_to_user(counters_ptr, counters,
++			 sizeof(struct xt_counters) * num_counters) != 0)
+ 		ret = -EFAULT;
+ 	vfree(counters);
+ 	xt_table_unlock(t);
+@@ -1000,9 +1358,62 @@ do_replace(void __user *user, unsigned i
+ 	module_put(t->me);
+ 	xt_table_unlock(t);
+  free_newinfo_counters_untrans:
+-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
+- free_newinfo_counters:
+ 	vfree(counters);
++ out:
++	return ret;
++}
++
++static int
++do_replace(void __user *user, unsigned int len)
++{
++	int ret;
++	struct ipt_replace tmp;
++	struct xt_table_info *newinfo;
++	void *loc_cpu_entry;
++
++	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
++		return -EFAULT;
++
++	/* Hack: Causes ipchains to give correct error msg --RR */
++	if (len != sizeof(tmp) + tmp.size)
++		return -ENOPROTOOPT;
++
++	/* overflow check */
++	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
++			SMP_CACHE_BYTES)
++		return -ENOMEM;
++	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
++		return -ENOMEM;
++
++	newinfo = xt_alloc_table_info(tmp.size);
++	if (!newinfo)
++		return -ENOMEM;
++
++	/* choose the copy that is our node/cpu */
++	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
++	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
++			   tmp.size) != 0) {
++		ret = -EFAULT;
++		goto free_newinfo;
++	}
++
++	ret = translate_table(tmp.name, tmp.valid_hooks,
++			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
++			      tmp.hook_entry, tmp.underflow);
++	if (ret != 0)
++		goto free_newinfo;
++
++	duprintf("ip_tables: Translated table\n");
++
++	ret = __do_replace(tmp.name, tmp.valid_hooks,
++			      newinfo, tmp.num_counters,
++			      tmp.counters);
++	if (ret)
++		goto free_newinfo_untrans;
++	return 0;
++
++ free_newinfo_untrans:
++	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
+  free_newinfo:
+ 	xt_free_table_info(newinfo);
+ 	return ret;
+@@ -1034,28 +1445,56 @@ static int
+ do_add_counters(void __user *user, unsigned int len)
+ {
+ 	unsigned int i;
+-	struct xt_counters_info tmp, *paddc;
++	struct xt_counters_info tmp;
++	struct xt_counters *paddc;
++	unsigned int num_counters;
++	char *name;
++	int size;
++	void *ptmp;
+ 	struct ipt_table *t;
+ 	struct xt_table_info *private;
+ 	int ret = 0;
+ 	void *loc_cpu_entry;
++#ifdef CONFIG_COMPAT
++	struct compat_xt_counters_info compat_tmp;
+ 
+-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
++	if (is_current_32bits()) {
++		ptmp = &compat_tmp;
++		size = sizeof(struct compat_xt_counters_info);
++	} else
++#endif
++	{
++		ptmp = &tmp;
++		size = sizeof(struct xt_counters_info);
++	}
++
++	if (copy_from_user(ptmp, user, size) != 0)
+ 		return -EFAULT;
+ 
+-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
++#ifdef CONFIG_COMPAT
++	if (is_current_32bits()) {
++		num_counters = compat_tmp.num_counters;
++		name = compat_tmp.name;
++	} else
++#endif
++	{
++		num_counters = tmp.num_counters;
++		name = tmp.name;
++	}
++
++	if (len != size + num_counters * sizeof(struct xt_counters))
+ 		return -EINVAL;
+ 
+-	paddc = vmalloc_node(len, numa_node_id());
++	paddc = ub_vmalloc_node(len - size, numa_node_id());
+ 	if (!paddc)
+ 		return -ENOMEM;
+ 
+-	if (copy_from_user(paddc, user, len) != 0) {
++	if (copy_from_user(paddc, user + size, len - size) != 0) {
+ 		ret = -EFAULT;
+ 		goto free;
+ 	}
+ 
+-	t = xt_find_table_lock(AF_INET, tmp.name);
++	t = xt_find_table_lock(AF_INET, name);
+ 	if (!t || IS_ERR(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+ 		goto free;
+@@ -1063,7 +1502,7 @@ do_add_counters(void __user *user, unsig
+ 
+ 	write_lock_bh(&t->lock);
+ 	private = t->private;
+-	if (private->number != paddc->num_counters) {
++	if (private->number != num_counters) {
+ 		ret = -EINVAL;
+ 		goto unlock_up_free;
+ 	}
+@@ -1074,7 +1513,7 @@ do_add_counters(void __user *user, unsig
+ 	IPT_ENTRY_ITERATE(loc_cpu_entry,
+ 			  private->size,
+ 			  add_counter_to_entry,
+-			  paddc->counters,
++			  paddc,
+ 			  &i);
+  unlock_up_free:
+ 	write_unlock_bh(&t->lock);
+@@ -1086,14 +1525,590 @@ do_add_counters(void __user *user, unsig
+ 	return ret;
+ }
+ 
++#ifdef CONFIG_COMPAT
++struct compat_ipt_replace {
++	char			name[IPT_TABLE_MAXNAMELEN];
++	u32			valid_hooks;
++	u32			num_entries;
++	u32			size;
++	u32			hook_entry[NF_IP_NUMHOOKS];
++	u32			underflow[NF_IP_NUMHOOKS];
++	u32			num_counters;
++	compat_uptr_t		counters;	/* struct ipt_counters * */
++	struct compat_ipt_entry	entries[0];
++};
++
++static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
++		void __user **dstptr, compat_uint_t *size)
++{
++	if (m->u.kernel.match->compat)
++		m->u.kernel.match->compat(m, dstptr, size, COMPAT_TO_USER);
++	else {
++		if (__copy_to_user(*dstptr, m, m->u.match_size))
++			return -EFAULT;
++		*dstptr += m->u.match_size;
++	}
++	return 0;
++}
++
++static int compat_copy_entry_to_user(struct ipt_entry *e,
++		void __user **dstptr, compat_uint_t *size)
++{
++	struct ipt_entry_target __user *t;
++	struct compat_ipt_entry __user *ce;
++	u_int16_t target_offset, next_offset;
++	compat_uint_t origsize;
++	int ret;
++
++	ret = -EFAULT;
++	origsize = *size;
++	ce = (struct compat_ipt_entry __user *)*dstptr;
++	if (__copy_to_user(ce, e, sizeof(struct ipt_entry)))
++		goto out;
++
++	*dstptr += sizeof(struct compat_ipt_entry);
++	ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size);
++	target_offset = e->target_offset - (origsize - *size);
++	if (ret)
++		goto out;
++	t = ipt_get_target(e);
++	if (t->u.kernel.target->compat) {
++		ret = t->u.kernel.target->compat(t,
++				dstptr, size, COMPAT_TO_USER);
++		if (ret)
++			goto out;
++	} else {
++		ret = -EFAULT;
++		if (__copy_to_user(*dstptr, t, t->u.target_size))
++			goto out;
++		*dstptr += t->u.target_size;
++	}
++	ret = -EFAULT;
++	next_offset = e->next_offset - (origsize - *size);
++	if (__put_user(target_offset, &ce->target_offset))
++		goto out;
++	if (__put_user(next_offset, &ce->next_offset))
++		goto out;
++	return 0;
++out:
++	return ret;
++}
++
++static inline int
++compat_check_calc_match(struct ipt_entry_match *m,
++	    const char *name,
++	    const struct ipt_ip *ip,
++	    unsigned int hookmask,
++	    int *size, int *i)
++{
++	struct ipt_match *match;
++
++	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
++						   m->u.user.revision),
++					"ipt_%s", m->u.user.name);
++	if (IS_ERR(match) || !match) {
++		duprintf("compat_check_calc_match: `%s' not found\n",
++				m->u.user.name);
++		return match ? PTR_ERR(match) : -ENOENT;
++	}
++	m->u.kernel.match = match;
++
++	if (m->u.kernel.match->compat)
++		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
++
++	(*i)++;
++	return 0;
++}
++
++static inline int
++check_compat_entry_size_and_hooks(struct ipt_entry *e,
++			   struct xt_table_info *newinfo,
++			   unsigned int *size,
++			   unsigned char *base,
++			   unsigned char *limit,
++			   unsigned int *hook_entries,
++			   unsigned int *underflows,
++			   unsigned int *i,
++			   const char *name)
++{
++	struct ipt_entry_target *t;
++	struct ipt_target *target;
++	u_int16_t entry_offset;
++	int ret, off, h, j;
++
++	duprintf("check_compat_entry_size_and_hooks %p\n", e);
++	if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
++	    || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
++		duprintf("Bad offset %p, limit = %p\n", e, limit);
++		return -EINVAL;
++	}
++
++	if (e->next_offset < sizeof(struct compat_ipt_entry) +
++			sizeof(struct compat_ipt_entry_target)) {
++		duprintf("checking: element %p size %u\n",
++			 e, e->next_offset);
++		return -EINVAL;
++	}
++
++	if (!ip_checkentry(&e->ip)) {
++		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
++		return -EINVAL;
++	}
++
++	off = 0;
++	entry_offset = (void *)e - (void *)base;
++	j = 0;
++	ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
++			e->comefrom, &off, &j);
++	if (ret != 0)
++		goto out;
++
++	t = ipt_get_target(e);
++	target = try_then_request_module(xt_find_target(AF_INET,
++						     t->u.user.name,
++						     t->u.user.revision),
++					 "ipt_%s", t->u.user.name);
++	if (IS_ERR(target) || !target) {
++		duprintf("check_entry: `%s' not found\n", t->u.user.name);
++		ret = target ? PTR_ERR(target) : -ENOENT;
++		goto out;
++	}
++	t->u.kernel.target = target;
++
++	if (t->u.kernel.target->compat)
++		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
++	*size += off;
++	ret = compat_add_offset(entry_offset, off);
++	if (ret)
++		goto out;
++
++	/* Check hooks & underflows */
++	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
++		if ((unsigned char *)e - base == hook_entries[h])
++			newinfo->hook_entry[h] = hook_entries[h];
++		if ((unsigned char *)e - base == underflows[h])
++			newinfo->underflow[h] = underflows[h];
++	}
++
++	/* Clear counters and comefrom */
++	e->counters = ((struct ipt_counters) { 0, 0 });
++	e->comefrom = 0;
++
++	(*i)++;
++	return 0;
++out:
++	IPT_MATCH_ITERATE(e, cleanup_match, &j);
++	return ret;
++}
++
++static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
++	void **dstptr, compat_uint_t *size, const char *name,
++	const struct ipt_ip *ip, unsigned int hookmask)
++{
++	struct ipt_entry_match *dm;
++
++	dm = (struct ipt_entry_match *)*dstptr;
++	if (m->u.kernel.match->compat)
++		m->u.kernel.match->compat(m, dstptr, size, COMPAT_FROM_USER);
++	else {
++		memcpy(*dstptr, m, m->u.match_size);
++		*dstptr += m->u.match_size;
++	}
++
++	if (dm->u.kernel.match->checkentry
++	    && !dm->u.kernel.match->checkentry(name, ip, dm->data,
++					      dm->u.match_size - sizeof(*dm),
++					      hookmask)) {
++		module_put(dm->u.kernel.match->me);
++		duprintf("ip_tables: check failed for `%s'.\n",
++			 dm->u.kernel.match->name);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
++	unsigned int *size, const char *name,
++	struct xt_table_info *newinfo, unsigned char *base)
++{
++	struct ipt_entry_target *t;
++	struct ipt_entry *de;
++	unsigned int origsize;
++	int ret, h;
++
++	ret = 0;
++	origsize = *size;
++	de = (struct ipt_entry *)*dstptr;
++	memcpy(de, e, sizeof(struct ipt_entry));
++
++	*dstptr += sizeof(struct compat_ipt_entry);
++	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
++			name, &de->ip, de->comefrom);
++	if (ret)
++		goto out;
++	de->target_offset = e->target_offset - (origsize - *size);
++	t = ipt_get_target(e);
++	if (t->u.kernel.target->compat)
++		t->u.kernel.target->compat(t,
++				dstptr, size, COMPAT_FROM_USER);
++	else {
++		memcpy(*dstptr, t, t->u.target_size);
++		*dstptr += t->u.target_size;
++	}
++
++	de->next_offset = e->next_offset - (origsize - *size);
++	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
++		if ((unsigned char *)de - base < newinfo->hook_entry[h])
++			newinfo->hook_entry[h] -= origsize - *size;
++		if ((unsigned char *)de - base < newinfo->underflow[h])
++			newinfo->underflow[h] -= origsize - *size;
++	}
++
++	ret = -EINVAL;
++	t = ipt_get_target(de);
++	if (t->u.kernel.target == &ipt_standard_target) {
++		if (!standard_check(t, *size))
++			goto out;
++	} else if (t->u.kernel.target->checkentry
++		   && !t->u.kernel.target->checkentry(name, de, t->data,
++						      t->u.target_size
++						      - sizeof(*t),
++						      de->comefrom)) {
++		module_put(t->u.kernel.target->me);
++		duprintf("ip_tables: compat: check failed for `%s'.\n",
++			 t->u.kernel.target->name);
++		goto out;
++	}
++	ret = 0;
++out:
++	return ret;
++}
++
++static int
++translate_compat_table(const char *name,
++		unsigned int valid_hooks,
++		struct xt_table_info **pinfo,
++		void **pentry0,
++		unsigned int total_size,
++		unsigned int number,
++		unsigned int *hook_entries,
++		unsigned int *underflows)
++{
++	unsigned int i;
++	struct xt_table_info *newinfo, *info;
++	void *pos, *entry0, *entry1;
++	unsigned int size;
++	int ret;
++
++	info = *pinfo;
++	entry0 = *pentry0;
++	size = total_size;
++	info->number = number;
++
++	/* Init all hooks to impossible value. */
++	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
++		info->hook_entry[i] = 0xFFFFFFFF;
++		info->underflow[i] = 0xFFFFFFFF;
++	}
++
++	duprintf("translate_compat_table: size %u\n", info->size);
++	i = 0;
++	down(&compat_ipt_mutex);
++	/* Walk through entries, checking offsets. */
++	ret = IPT_ENTRY_ITERATE(entry0, total_size,
++				check_compat_entry_size_and_hooks,
++				info, &size, entry0,
++				entry0 + total_size,
++				hook_entries, underflows, &i, name);
++	if (ret != 0)
++		goto out_unlock;
++
++	ret = -EINVAL;
++	if (i != number) {
++		duprintf("translate_compat_table: %u not %u entries\n",
++			 i, number);
++		goto out_unlock;
++	}
++
++	/* Check hooks all assigned */
++	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
++		/* Only hooks which are valid */
++		if (!(valid_hooks & (1 << i)))
++			continue;
++		if (info->hook_entry[i] == 0xFFFFFFFF) {
++			duprintf("Invalid hook entry %u %u\n",
++				 i, hook_entries[i]);
++			goto out_unlock;
++		}
++		if (info->underflow[i] == 0xFFFFFFFF) {
++			duprintf("Invalid underflow %u %u\n",
++				 i, underflows[i]);
++			goto out_unlock;
++		}
++	}
++
++	ret = -ENOMEM;
++	newinfo = xt_alloc_table_info(size);
++	if (!newinfo)
++		goto out_unlock;
++
++	newinfo->number = number;
++	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
++		newinfo->hook_entry[i] = info->hook_entry[i];
++		newinfo->underflow[i] = info->underflow[i];
++	}
++	entry1 = newinfo->entries[raw_smp_processor_id()];
++	pos = entry1;
++	size =  total_size;
++	ret = IPT_ENTRY_ITERATE(entry0, total_size,
++			compat_copy_entry_from_user, &pos, &size,
++			name, newinfo, entry1);
++	compat_flush_offsets();
++	up(&compat_ipt_mutex);
++	if (ret)
++		goto free_newinfo;
++
++	ret = -ELOOP;
++	if (!mark_source_chains(newinfo, valid_hooks, entry1))
++		goto free_newinfo;
++
++	/* And one copy for every other CPU */
++	for_each_cpu(i)
++		if (newinfo->entries[i] && newinfo->entries[i] != entry1)
++			memcpy(newinfo->entries[i], entry1, newinfo->size);
++
++	*pinfo = newinfo;
++	*pentry0 = entry1;
++	xt_free_table_info(info);
++	return 0;
++
++free_newinfo:
++	xt_free_table_info(newinfo);
++out:
++	return ret;
++out_unlock:
++	up(&compat_ipt_mutex);
++	goto out;
++}
++
++static int
++compat_do_replace(void __user *user, unsigned int len)
++{
++	int ret;
++	struct compat_ipt_replace tmp;
++	struct xt_table_info *newinfo;
++	void *loc_cpu_entry;
++
++	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
++		return -EFAULT;
++
++	/* Hack: Causes ipchains to give correct error msg --RR */
++	if (len != sizeof(tmp) + tmp.size)
++		return -ENOPROTOOPT;
++
++	/* overflow check */
++	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
++			SMP_CACHE_BYTES)
++		return -ENOMEM;
++	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
++		return -ENOMEM;
++
++	newinfo = xt_alloc_table_info(tmp.size);
++	if (!newinfo)
++		return -ENOMEM;
++
++	/* choose the copy that is our node/cpu */
++	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
++	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
++			   tmp.size) != 0) {
++		ret = -EFAULT;
++		goto free_newinfo;
++	}
++
++	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
++			      &newinfo, &loc_cpu_entry, tmp.size,
++			      tmp.num_entries, tmp.hook_entry, tmp.underflow);
++	if (ret != 0)
++		goto free_newinfo;
++
++	duprintf("compat_do_replace: Translated table\n");
++
++	ret = __do_replace(tmp.name, tmp.valid_hooks,
++			      newinfo, tmp.num_counters,
++			      compat_ptr(tmp.counters));
++	if (ret)
++		goto free_newinfo_untrans;
++	return 0;
++
++ free_newinfo_untrans:
++	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
++ free_newinfo:
++	xt_free_table_info(newinfo);
++	return ret;
++}
++
++struct compat_ipt_get_entries
++{
++	char name[IPT_TABLE_MAXNAMELEN];
++	compat_uint_t size;
++	struct compat_ipt_entry entrytable[0];
++};
++
++static int compat_copy_entries_to_user(unsigned int total_size,
++		     struct ipt_table *table, void __user *userptr)
++{
++	unsigned int off, num;
++	struct compat_ipt_entry e;
++	struct xt_counters *counters;
++	struct xt_table_info *private = table->private;
++	void __user *pos;
++	unsigned int size;
++	int ret = 0;
++	void *loc_cpu_entry;
++
++	counters = alloc_counters(table);
++	if (IS_ERR(counters))
++		return PTR_ERR(counters);
++
++	/* choose the copy that is on our node/cpu, ...
++	 * This choice is lazy (because current thread is
++	 * allowed to migrate to another cpu)
++	 */
++	loc_cpu_entry = private->entries[raw_smp_processor_id()];
++	pos = userptr;
++	size = total_size;
++	ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
++			compat_copy_entry_to_user, &pos, &size);
++	if (ret)
++		goto free_counters;
++
++	/* ... then go back and fix counters and names */
++	for (off = 0, num = 0; off < size; off += e.next_offset, num++) {
++		unsigned int i;
++		struct ipt_entry_match m;
++		struct ipt_entry_target t;
++
++		ret = -EFAULT;
++		if (copy_from_user(&e, userptr + off,
++					sizeof(struct compat_ipt_entry)))
++			goto free_counters;
++		if (copy_to_user(userptr + off +
++			offsetof(struct compat_ipt_entry, counters),
++			 &counters[num], sizeof(counters[num])))
++			goto free_counters;
++
++		for (i = sizeof(struct compat_ipt_entry);
++				i < e.target_offset; i += m.u.match_size) {
++			if (copy_from_user(&m, userptr + off + i,
++					sizeof(struct ipt_entry_match)))
++				goto free_counters;
++			if (copy_to_user(userptr + off + i +
++				offsetof(struct ipt_entry_match, u.user.name),
++				m.u.kernel.match->name,
++				strlen(m.u.kernel.match->name) + 1))
++				goto free_counters;
++		}
++
++		if (copy_from_user(&t, userptr + off + e.target_offset,
++					sizeof(struct ipt_entry_target)))
++			goto free_counters;
++		if (copy_to_user(userptr + off + e.target_offset +
++			offsetof(struct ipt_entry_target, u.user.name),
++			t.u.kernel.target->name,
++			strlen(t.u.kernel.target->name) + 1))
++			goto free_counters;
++	}
++	ret = 0;
++free_counters:
++	vfree(counters);
++	return ret;
++}
++
++static int
++compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
++{
++	int ret;
++	struct compat_ipt_get_entries get;
++	struct ipt_table *t;
++
++
++	if (*len < sizeof(get)) {
++		duprintf("compat_get_entries: %u < %u\n",
++				*len, (unsigned int)sizeof(get));
++		return -EINVAL;
++	}
++
++	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
++		return -EFAULT;
++
++	if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
++		duprintf("compat_get_entries: %u != %u\n", *len,
++			(unsigned int)(sizeof(struct compat_ipt_get_entries) +
++			get.size));
++		return -EINVAL;
++	}
++
++	down(&compat_ipt_mutex);
++	t = xt_find_table_lock(AF_INET, get.name);
++	if (t && !IS_ERR(t)) {
++		struct xt_table_info *private = t->private;
++		struct xt_table_info info;
++		duprintf("t->private->number = %u\n",
++			 private->number);
++		ret = compat_table_info(private, &info);
++		if (!ret && get.size == info.size) {
++			ret = compat_copy_entries_to_user(private->size,
++						   t, uptr->entrytable);
++		} else if (!ret) {
++			duprintf("compat_get_entries: I've got %u not %u!\n",
++				 private->size,
++				 get.size);
++			ret = -EINVAL;
++		}
++		compat_flush_offsets();
++		module_put(t->me);
++		xt_table_unlock(t);
++	} else
++		ret = t ? PTR_ERR(t) : -ENOENT;
++
++	up(&compat_ipt_mutex);
++	return ret;
++}
++
++static int
++compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
++{
++	int ret;
++
++	switch (cmd) {
++	case IPT_SO_GET_INFO:
++		ret = get_info(user, len);
++		break;
++	case IPT_SO_GET_ENTRIES:
++		ret = compat_get_entries(user, len);
++		break;
++	default:
++		duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
++		ret = -EINVAL;
++	}
++	return ret;
++}
++#endif
++
+ static int
+ do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user, unsigned int len)
+ {
+ 	int ret;
+ 
+-	if (!capable(CAP_NET_ADMIN))
++	if (!capable(CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 
++#ifdef CONFIG_COMPAT
++	if (is_current_32bits() && (cmd == IPT_SO_SET_REPLACE))
++		return compat_do_replace(user, len);
++#endif
++
+ 	switch (cmd) {
+ 	case IPT_SO_SET_REPLACE:
+ 		ret = do_replace(user, len);
+@@ -1116,69 +2131,22 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
+ {
+ 	int ret;
+ 
+-	if (!capable(CAP_NET_ADMIN))
++	if (!capable(CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 
+-	switch (cmd) {
+-	case IPT_SO_GET_INFO: {
+-		char name[IPT_TABLE_MAXNAMELEN];
+-		struct ipt_table *t;
+-
+-		if (*len != sizeof(struct ipt_getinfo)) {
+-			duprintf("length %u != %u\n", *len,
+-				 sizeof(struct ipt_getinfo));
+-			ret = -EINVAL;
+-			break;
+-		}
+-
+-		if (copy_from_user(name, user, sizeof(name)) != 0) {
+-			ret = -EFAULT;
+-			break;
+-		}
+-		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+-
+-		t = try_then_request_module(xt_find_table_lock(AF_INET, name),
+-					    "iptable_%s", name);
+-		if (t && !IS_ERR(t)) {
+-			struct ipt_getinfo info;
+-			struct xt_table_info *private = t->private;
+-
+-			info.valid_hooks = t->valid_hooks;
+-			memcpy(info.hook_entry, private->hook_entry,
+-			       sizeof(info.hook_entry));
+-			memcpy(info.underflow, private->underflow,
+-			       sizeof(info.underflow));
+-			info.num_entries = private->number;
+-			info.size = private->size;
+-			memcpy(info.name, name, sizeof(info.name));
+-
+-			if (copy_to_user(user, &info, *len) != 0)
+-				ret = -EFAULT;
+-			else
+-				ret = 0;
+-			xt_table_unlock(t);
+-			module_put(t->me);
+-		} else
+-			ret = t ? PTR_ERR(t) : -ENOENT;
+-	}
+-	break;
++#ifdef CONFIG_COMPAT
++	if (is_current_32bits())
++		return compat_do_ipt_get_ctl(sk, cmd, user, len);
++#endif
+ 
+-	case IPT_SO_GET_ENTRIES: {
+-		struct ipt_get_entries get;
++	switch (cmd) {
++	case IPT_SO_GET_INFO:
++		ret = get_info(user, len);
++		break;
+ 
+-		if (*len < sizeof(get)) {
+-			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+-			ret = -EINVAL;
+-		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+-			ret = -EFAULT;
+-		} else if (*len != sizeof(struct ipt_get_entries) + get.size) {
+-			duprintf("get_entries: %u != %u\n", *len,
+-				 sizeof(struct ipt_get_entries) + get.size);
+-			ret = -EINVAL;
+-		} else
+-			ret = get_entries(&get, user);
++	case IPT_SO_GET_ENTRIES:
++		ret = get_entries(user, len);
+ 		break;
+-	}
+ 
+ 	case IPT_SO_GET_REVISION_MATCH:
+ 	case IPT_SO_GET_REVISION_TARGET: {
+@@ -1214,7 +2182,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
+ 	return ret;
+ }
+ 
+-int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
++struct xt_table *ipt_register_table(struct xt_table *table,
++		const struct ipt_replace *repl)
+ {
+ 	int ret;
+ 	struct xt_table_info *newinfo;
+@@ -1224,7 +2193,7 @@ int ipt_register_table(struct xt_table *
+ 
+ 	newinfo = xt_alloc_table_info(repl->size);
+ 	if (!newinfo)
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 
+ 	/* choose the copy on our node/cpu
+ 	 * but dont care of preemption
+@@ -1239,15 +2208,14 @@ int ipt_register_table(struct xt_table *
+ 			      repl->underflow);
+ 	if (ret != 0) {
+ 		xt_free_table_info(newinfo);
+-		return ret;
++		return ERR_PTR(ret);
+ 	}
+ 
+-	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
++	table = virt_xt_register_table(table, &bootstrap, newinfo);
++	if (IS_ERR(table))
+ 		xt_free_table_info(newinfo);
+-		return ret;
+-	}
+ 
+-	return 0;
++	return table;
+ }
+ 
+ void ipt_unregister_table(struct ipt_table *table)
+@@ -1255,7 +2223,7 @@ void ipt_unregister_table(struct ipt_tab
+ 	struct xt_table_info *private;
+ 	void *loc_cpu_entry;
+ 
+- 	private = xt_unregister_table(table);
++ 	private = virt_xt_unregister_table(table);
+ 
+ 	/* Decrease module usage counts and free resources */
+ 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+@@ -1263,6 +2231,29 @@ void ipt_unregister_table(struct ipt_tab
+ 	xt_free_table_info(private);
+ }
+ 
++void ipt_flush_table(struct xt_table *table)
++{
++	struct xt_table *t;
++	void *loc_cpu_entry;
++
++	if (table == NULL)
++		return;
++
++	t = xt_find_table_lock(AF_INET, table->name);
++	if (t && !IS_ERR(t)) {
++		struct xt_table_info *private;
++		private = t->private;
++		loc_cpu_entry = private->entries[raw_smp_processor_id()];
++		IPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
++			  cleanup_entry, NULL);
++		if (private->number > private->initial_entries)
++			module_put(t->me);
++		private->size = 0;
++		xt_table_unlock(t);
++		module_put(t->me);
++	}
++}
++
+ /* Returns 1 if the type and code is matched by the range, 0 otherwise */
+ static inline int
+ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
+@@ -1327,6 +2318,9 @@ icmp_checkentry(const char *tablename,
+ /* The built-in targets: standard (NULL) and error. */
+ static struct ipt_target ipt_standard_target = {
+ 	.name		= IPT_STANDARD_TARGET,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat_ipt_standard_fn,
++#endif
+ };
+ 
+ static struct ipt_target ipt_error_target = {
+@@ -1348,43 +2342,107 @@ static struct ipt_match icmp_matchstruct
+ 	.name		= "icmp",
+ 	.match		= &icmp_match,
+ 	.checkentry	= &icmp_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &icmp_compat,
++#endif
+ };
+ 
+-static int __init init(void)
++static int init_iptables(void)
+ {
+ 	int ret;
+ 
+-	xt_proto_init(AF_INET);
++	if (ve_ipt_standard_target != NULL)
++		return -EEXIST;
++
++	ret = xt_register_target(AF_INET, &ipt_standard_target);
++	if (ret)
++		goto out;
++#ifdef CONFIG_VE_IPTABLES
++	ve_ipt_standard_target = xt_find_target(AF_INET, IPT_STANDARD_TARGET, 0);
++	if (IS_ERR(ve_ipt_standard_target))
++		goto out_standard;
++#endif
++	ret = xt_register_target(AF_INET, &ipt_error_target);
++	if (ret)
++		goto out_error;
++	ret = xt_register_match(AF_INET, &icmp_matchstruct);
++	if (ret)
++		goto out_icmp;
++	ret = xt_proto_init(AF_INET);
++	if (ret)
++		goto out_proc;
++	return 0;
++
++out_proc:
++	xt_unregister_match(AF_INET, &icmp_matchstruct);
++out_icmp:
++	xt_unregister_target(AF_INET, &ipt_error_target);
++out_error:
++#ifdef CONFIG_VE_IPTABLES
++	ve_ipt_standard_target = NULL;
++out_standard:
++#endif
++	xt_unregister_target(AF_INET, &ipt_standard_target);
++out:
++	return ret;
++}
++
++static void fini_iptables(void)
++{
++	xt_proto_fini(AF_INET);
++	xt_unregister_match(AF_INET, &icmp_matchstruct);
++	xt_unregister_target(AF_INET, &ipt_error_target);
++#ifdef CONFIG_VE_IPTABLES
++	ve_ipt_standard_target = NULL;
++#endif
++	xt_unregister_target(AF_INET, &ipt_standard_target);
++}
+ 
+-	/* Noone else will be downing sem now, so we won't sleep */
+-	xt_register_target(AF_INET, &ipt_standard_target);
+-	xt_register_target(AF_INET, &ipt_error_target);
+-	xt_register_match(AF_INET, &icmp_matchstruct);
++static int __init init(void)
++{
++	int ret;
++
++	ret = init_iptables();
++	if (ret)
++		goto out;
+ 
+ 	/* Register setsockopt */
+ 	ret = nf_register_sockopt(&ipt_sockopts);
+ 	if (ret < 0) {
+ 		duprintf("Unable to register sockopts.\n");
+-		return ret;
++		goto out_sockopts;
+ 	}
+ 
++	KSYMRESOLVE(init_iptables);
++	KSYMRESOLVE(fini_iptables);
++	KSYMRESOLVE(ipt_flush_table);
++	KSYMMODRESOLVE(ip_tables);
+ 	printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
+ 	return 0;
++
++out_sockopts:
++	fini_iptables();
++out:
++	return ret;
+ }
+ 
+ static void __exit fini(void)
+ {
++	KSYMMODUNRESOLVE(ip_tables);
++	KSYMUNRESOLVE(init_iptables);
++	KSYMUNRESOLVE(fini_iptables);
++	KSYMUNRESOLVE(ipt_flush_table);
+ 	nf_unregister_sockopt(&ipt_sockopts);
+-
+-	xt_unregister_match(AF_INET, &icmp_matchstruct);
+-	xt_unregister_target(AF_INET, &ipt_error_target);
+-	xt_unregister_target(AF_INET, &ipt_standard_target);
+-
+-	xt_proto_fini(AF_INET);
++	fini_iptables();
+ }
+ 
+ EXPORT_SYMBOL(ipt_register_table);
+ EXPORT_SYMBOL(ipt_unregister_table);
+ EXPORT_SYMBOL(ipt_do_table);
+-module_init(init);
++#ifdef CONFIG_COMPAT
++EXPORT_SYMBOL(ipt_match_align_compat);
++EXPORT_SYMBOL(ipt_target_align_compat);
++#endif
++EXPORT_SYMBOL(ipt_flush_table);
++subsys_initcall(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_LOG.c linux-2.6.16-026test015/net/ipv4/netfilter/ipt_LOG.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ipt_LOG.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ipt_LOG.c	2006-07-04 14:41:39.000000000 +0400
+@@ -18,6 +18,7 @@
+ #include <net/udp.h>
+ #include <net/tcp.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -463,10 +464,25 @@ static int ipt_log_checkentry(const char
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int ipt_log_compat(void *target,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_log_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_log_info));
++	return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_log_reg = {
+ 	.name		= "LOG",
+ 	.target		= ipt_log_target,
+ 	.checkentry	= ipt_log_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= ipt_log_compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+@@ -476,24 +492,44 @@ static struct nf_logger ipt_log_logger =
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_LOG(void)
++{
++	return ipt_register_target(&ipt_log_reg);
++}
++
++void fini_iptable_LOG(void)
++{
++	ipt_unregister_target(&ipt_log_reg);
++}
++
+ static int __init init(void)
+ {
+-	if (ipt_register_target(&ipt_log_reg))
+-		return -EINVAL;
++	int err;
++
++	err = init_iptable_LOG();
++	if (err < 0)
++		return err;
+ 	if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
+-		printk(KERN_WARNING "ipt_LOG: not logging via system console "
++		ve_printk(VE_LOG, KERN_WARNING "ipt_LOG: not logging via system console "
+ 		       "since somebody else already registered for PF_INET\n");
+ 		/* we cannot make module load fail here, since otherwise
+ 		 * iptables userspace would abort */
+ 	}
+ 	
++
++	KSYMRESOLVE(init_iptable_LOG);
++	KSYMRESOLVE(fini_iptable_LOG);
++	KSYMMODRESOLVE(ipt_LOG);
+ 	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
++	KSYMMODUNRESOLVE(ipt_LOG);
++	KSYMUNRESOLVE(init_iptable_LOG);
++	KSYMUNRESOLVE(fini_iptable_LOG);
+ 	nf_log_unregister_logger(&ipt_log_logger);
+-	ipt_unregister_target(&ipt_log_reg);
++	fini_iptable_LOG();
+ }
+ 
+ module_init(init);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.16-026test015/net/ipv4/netfilter/ipt_MASQUERADE.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ipt_MASQUERADE.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ipt_MASQUERADE.c	2006-07-04 14:41:39.000000000 +0400
+@@ -120,6 +120,7 @@ masquerade_target(struct sk_buff **pskb,
+ 	return ip_nat_setup_info(ct, &newrange, hooknum);
+ }
+ 
++#if 0
+ static inline int
+ device_cmp(struct ip_conntrack *i, void *ifindex)
+ {
+@@ -175,6 +176,7 @@ static struct notifier_block masq_dev_no
+ static struct notifier_block masq_inet_notifier = {
+ 	.notifier_call	= masq_inet_event,
+ };
++#endif
+ 
+ static struct ipt_target masquerade = {
+ 	.name		= "MASQUERADE",
+@@ -189,12 +191,16 @@ static int __init init(void)
+ 
+ 	ret = ipt_register_target(&masquerade);
+ 
++#if 0
++/*	These notifiers are unnecessary and may
++	lead to oops in virtual environments */
+ 	if (ret == 0) {
+ 		/* Register for device down reports */
+ 		register_netdevice_notifier(&masq_dev_notifier);
+ 		/* Register IP address change reports */
+ 		register_inetaddr_notifier(&masq_inet_notifier);
+ 	}
++#endif
+ 
+ 	return ret;
+ }
+@@ -202,8 +208,8 @@ static int __init init(void)
+ static void __exit fini(void)
+ {
+ 	ipt_unregister_target(&masquerade);
+-	unregister_netdevice_notifier(&masq_dev_notifier);
+-	unregister_inetaddr_notifier(&masq_inet_notifier);	
++/*	unregister_netdevice_notifier(&masq_dev_notifier);
++	unregister_inetaddr_notifier(&masq_inet_notifier);	*/
+ }
+ 
+ module_init(init);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_REDIRECT.c linux-2.6.16-026test015/net/ipv4/netfilter/ipt_REDIRECT.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ipt_REDIRECT.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ipt_REDIRECT.c	2006-07-04 14:41:39.000000000 +0400
+@@ -17,6 +17,7 @@
+ #include <linux/inetdevice.h>
+ #include <net/protocol.h>
+ #include <net/checksum.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4.h>
+ #include <linux/netfilter_ipv4/ip_nat_rule.h>
+ 
+@@ -25,7 +26,7 @@ MODULE_AUTHOR("Netfilter Core Team <core
+ MODULE_DESCRIPTION("iptables REDIRECT target module");
+ 
+ #if 0
+-#define DEBUGP printk
++#define DEBUGP ve_printk
+ #else
+ #define DEBUGP(format, args...)
+ #endif
+@@ -94,8 +95,14 @@ redirect_target(struct sk_buff **pskb,
+ 		
+ 		rcu_read_lock();
+ 		indev = __in_dev_get_rcu((*pskb)->dev);
+-		if (indev && (ifa = indev->ifa_list))
++		if (indev && (ifa = indev->ifa_list)) {
++			/* because of venet device specific, we should use
++			 * second ifa in the list */
++			if (IN_LOOPBACK(ntohl(ifa->ifa_local)) &&
++					ifa->ifa_next)
++				ifa = ifa->ifa_next;
+ 			newdst = ifa->ifa_local;
++		}
+ 		rcu_read_unlock();
+ 
+ 		if (!newdst)
+@@ -119,15 +126,37 @@ static struct ipt_target redirect_reg = 
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_iptable_REDIRECT(void)
+ {
+ 	return ipt_register_target(&redirect_reg);
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_REDIRECT(void)
+ {
+ 	ipt_unregister_target(&redirect_reg);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_REDIRECT();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_REDIRECT);
++	KSYMRESOLVE(fini_iptable_REDIRECT);
++	KSYMMODRESOLVE(ipt_REDIRECT);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ipt_REDIRECT);
++	KSYMUNRESOLVE(init_iptable_REDIRECT);
++	KSYMUNRESOLVE(fini_iptable_REDIRECT);
++	fini_iptable_REDIRECT();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.16-026test015/net/ipv4/netfilter/ipt_REJECT.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ipt_REJECT.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ipt_REJECT.c	2006-07-04 14:41:39.000000000 +0400
+@@ -22,6 +22,7 @@
+ #include <net/ip.h>
+ #include <net/tcp.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ #include <net/dst.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_REJECT.h>
+@@ -322,22 +323,59 @@ static int check(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *target,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_reject_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_reject_info));
++	return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_reject_reg = {
+ 	.name		= "REJECT",
+ 	.target		= reject,
+ 	.checkentry	= check,
++#ifdef CONFIG_COMPAT
++	.compat		= compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_iptable_REJECT(void)
+ {
+ 	return ipt_register_target(&ipt_reject_reg);
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_REJECT(void)
+ {
+ 	ipt_unregister_target(&ipt_reject_reg);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_REJECT();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_REJECT);
++	KSYMRESOLVE(fini_iptable_REJECT);
++	KSYMMODRESOLVE(ipt_REJECT);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ipt_REJECT);
++	KSYMUNRESOLVE(init_iptable_REJECT);
++	KSYMUNRESOLVE(fini_iptable_REJECT);
++	fini_iptable_REJECT();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_TCPMSS.c linux-2.6.16-026test015/net/ipv4/netfilter/ipt_TCPMSS.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ipt_TCPMSS.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ipt_TCPMSS.c	2006-07-04 14:41:39.000000000 +0400
+@@ -13,6 +13,7 @@
+ 
+ #include <linux/ip.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_TCPMSS.h>
+@@ -242,22 +243,59 @@ ipt_tcpmss_checkentry(const char *tablen
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int ipt_tcpmss_compat(void *target,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_tcpmss_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_info));
++	return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_tcpmss_reg = {
+ 	.name		= "TCPMSS",
+ 	.target		= ipt_tcpmss_target,
+ 	.checkentry	= ipt_tcpmss_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= ipt_tcpmss_compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_iptable_TCPMSS(void)
+ {
+ 	return ipt_register_target(&ipt_tcpmss_reg);
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_TCPMSS(void)
+ {
+ 	ipt_unregister_target(&ipt_tcpmss_reg);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_TCPMSS();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_TCPMSS);
++	KSYMRESOLVE(fini_iptable_TCPMSS);
++	KSYMMODRESOLVE(ipt_TCPMSS);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ipt_TCPMSS);
++	KSYMUNRESOLVE(init_iptable_TCPMSS);
++	KSYMUNRESOLVE(fini_iptable_TCPMSS);
++	fini_iptable_TCPMSS();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_TOS.c linux-2.6.16-026test015/net/ipv4/netfilter/ipt_TOS.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ipt_TOS.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ipt_TOS.c	2006-07-04 14:41:39.000000000 +0400
+@@ -15,6 +15,7 @@
+ 
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_TOS.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+@@ -83,22 +84,59 @@ checkentry(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *target,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_tos_target_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_target_info));
++	return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_tos_reg = {
+ 	.name		= "TOS",
+ 	.target		= target,
+ 	.checkentry	= checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_iptable_TOS(void)
+ {
+ 	return ipt_register_target(&ipt_tos_reg);
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_TOS(void)
+ {
+ 	ipt_unregister_target(&ipt_tos_reg);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_TOS();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_TOS);
++	KSYMRESOLVE(fini_iptable_TOS);
++	KSYMMODRESOLVE(ipt_TOS);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ipt_TOS);
++	KSYMUNRESOLVE(init_iptable_TOS);
++	KSYMUNRESOLVE(fini_iptable_TOS);
++	fini_iptable_TOS();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_multiport.c linux-2.6.16-026test015/net/ipv4/netfilter/ipt_multiport.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ipt_multiport.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ipt_multiport.c	2006-07-04 14:41:39.000000000 +0400
+@@ -13,6 +13,7 @@
+ #include <linux/types.h>
+ #include <linux/udp.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ipt_multiport.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -21,6 +22,13 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("iptables multiple port match module");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_multiport_match	(*(get_exec_env()->_multiport_match))
++#else
++#define ve_multiport_match	multiport_match
++#endif
++
+ #if 0
+ #define duprintf(format, args...) printk(format , ## args)
+ #else
+@@ -174,11 +182,36 @@ checkentry_v1(const char *tablename,
+ 	return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport_v1)));
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_multiport)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++
++static int compat_v1(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_multiport_v1)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport_v1));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match multiport_match = {
+ 	.name		= "multiport",
+ 	.revision	= 0,
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+@@ -187,10 +220,13 @@ static struct ipt_match multiport_match_
+ 	.revision	= 1,
+ 	.match		= &match_v1,
+ 	.checkentry	= &checkentry_v1,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat_v1,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_iptable_multiport(void)
+ {
+ 	int err;
+ 
+@@ -204,11 +240,33 @@ static int __init init(void)
+ 	return err;
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_multiport(void)
+ {
+ 	ipt_unregister_match(&multiport_match);
+ 	ipt_unregister_match(&multiport_match_v1);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_multiport();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_multiport);
++	KSYMRESOLVE(fini_iptable_multiport);
++	KSYMMODRESOLVE(ipt_multiport);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ipt_multiport);
++	KSYMUNRESOLVE(init_iptable_multiport);
++	KSYMUNRESOLVE(fini_iptable_multiport);
++	fini_iptable_multiport();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_tos.c linux-2.6.16-026test015/net/ipv4/netfilter/ipt_tos.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ipt_tos.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ipt_tos.c	2006-07-04 14:41:39.000000000 +0400
+@@ -10,6 +10,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ipt_tos.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -17,6 +18,13 @@
+ MODULE_LICENSE("GPL");
+ MODULE_DESCRIPTION("iptables TOS match module");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_tos_match		(*(get_exec_env()->_tos_match))
++#else
++#define ve_tos_match		tos_match
++#endif
++
+ static int
+ match(const struct sk_buff *skb,
+       const struct net_device *in,
+@@ -44,22 +52,59 @@ checkentry(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_tos_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_info));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match tos_match = {
+ 	.name		= "tos",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_iptable_tos(void)
+ {
+ 	return ipt_register_match(&tos_match);
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_tos(void)
+ {
+ 	ipt_unregister_match(&tos_match);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_tos();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_tos);
++	KSYMRESOLVE(fini_iptable_tos);
++	KSYMMODRESOLVE(ipt_tos);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ipt_tos);
++	KSYMUNRESOLVE(init_iptable_tos);
++	KSYMUNRESOLVE(fini_iptable_tos);
++	fini_iptable_tos();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_ttl.c linux-2.6.16-026test015/net/ipv4/netfilter/ipt_ttl.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/ipt_ttl.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/ipt_ttl.c	2006-07-04 14:41:39.000000000 +0400
+@@ -11,6 +11,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ipt_ttl.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -57,22 +58,58 @@ static int checkentry(const char *tablen
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_ttl_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_ttl_info));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match ttl_match = {
+ 	.name		= "ttl",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_iptable_ttl(void)
+ {
+ 	return ipt_register_match(&ttl_match);
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_ttl(void)
+ {
+ 	ipt_unregister_match(&ttl_match);
++}
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_ttl();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_ttl);
++	KSYMRESOLVE(fini_iptable_ttl);
++	KSYMMODRESOLVE(ipt_ttl);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ipt_ttl);
++	KSYMUNRESOLVE(init_iptable_ttl);
++	KSYMUNRESOLVE(fini_iptable_ttl);
++	fini_iptable_ttl();
+ }
+ 
+ module_init(init);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/iptable_filter.c linux-2.6.16-026test015/net/ipv4/netfilter/iptable_filter.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/iptable_filter.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/iptable_filter.c	2006-07-04 14:41:39.000000000 +0400
+@@ -12,12 +12,20 @@
+ 
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("iptables filter table");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_filter	(get_exec_env()->_ve_ipt_filter_pf)
++#else
++#define	ve_packet_filter	&packet_filter
++#endif
++
+ #define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
+ 
+ static struct
+@@ -25,7 +33,7 @@ static struct
+ 	struct ipt_replace repl;
+ 	struct ipt_standard entries[3];
+ 	struct ipt_error term;
+-} initial_table __initdata 
++} initial_table
+ = { { "filter", FILTER_VALID_HOOKS, 4,
+       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+       { [NF_IP_LOCAL_IN] = 0,
+@@ -90,7 +98,7 @@ ipt_hook(unsigned int hook,
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
+-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
++	return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
+ }
+ 
+ static unsigned int
+@@ -108,7 +116,7 @@ ipt_local_out_hook(unsigned int hook,
+ 		return NF_ACCEPT;
+ 	}
+ 
+-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
++	return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
+ }
+ 
+ static struct nf_hook_ops ipt_ops[] = {
+@@ -139,56 +147,89 @@ static struct nf_hook_ops ipt_ops[] = {
+ static int forward = NF_ACCEPT;
+ module_param(forward, bool, 0000);
+ 
+-static int __init init(void)
++int init_iptable_filter(void)
+ {
+ 	int ret;
+-
+-	if (forward < 0 || forward > NF_MAX_VERDICT) {
+-		printk("iptables forward must be 0 or 1\n");
+-		return -EINVAL;
+-	}
+-
+-	/* Entry 1 is the FORWARD hook */
+-	initial_table.entries[1].target.verdict = -forward - 1;
++	struct ipt_table *tmp_filter;
+ 
+ 	/* Register table */
+-	ret = ipt_register_table(&packet_filter, &initial_table.repl);
+-	if (ret < 0)
+-		return ret;
++	tmp_filter = ipt_register_table(&packet_filter,
++			&initial_table.repl);
++	if (IS_ERR(tmp_filter))
++		return PTR_ERR(tmp_filter);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_filter = tmp_filter;
++#endif
+ 
+ 	/* Register hooks */
+-	ret = nf_register_hook(&ipt_ops[0]);
++	ret = virt_nf_register_hook(&ipt_ops[0]);
+ 	if (ret < 0)
+ 		goto cleanup_table;
+ 
+-	ret = nf_register_hook(&ipt_ops[1]);
++	ret = virt_nf_register_hook(&ipt_ops[1]);
+ 	if (ret < 0)
+ 		goto cleanup_hook0;
+ 
+-	ret = nf_register_hook(&ipt_ops[2]);
++	ret = virt_nf_register_hook(&ipt_ops[2]);
+ 	if (ret < 0)
+ 		goto cleanup_hook1;
+ 
+ 	return ret;
+ 
+  cleanup_hook1:
+-	nf_unregister_hook(&ipt_ops[1]);
++	virt_nf_unregister_hook(&ipt_ops[1]);
+  cleanup_hook0:
+-	nf_unregister_hook(&ipt_ops[0]);
++	virt_nf_unregister_hook(&ipt_ops[0]);
+  cleanup_table:
+-	ipt_unregister_table(&packet_filter);
++	ipt_unregister_table(ve_packet_filter);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_filter = NULL;
++#endif
+ 
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_filter(void)
+ {
+ 	unsigned int i;
+ 
+ 	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+-		nf_unregister_hook(&ipt_ops[i]);
++		virt_nf_unregister_hook(&ipt_ops[i]);
+ 
+-	ipt_unregister_table(&packet_filter);
++	ipt_unregister_table(ve_packet_filter);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_filter = NULL;
++#endif
++}
++
++static int __init init(void)
++{
++	int err;
++
++	if (forward < 0 || forward > NF_MAX_VERDICT) {
++		printk("iptables forward must be 0 or 1\n");
++		return -EINVAL;
++	}
++
++	/* Entry 1 is the FORWARD hook */
++	initial_table.entries[1].target.verdict = -forward - 1;
++
++	err = init_iptable_filter();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_filter);
++	KSYMRESOLVE(fini_iptable_filter);
++	KSYMMODRESOLVE(iptable_filter);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(iptable_filter);
++	KSYMUNRESOLVE(init_iptable_filter);
++	KSYMUNRESOLVE(fini_iptable_filter);
++	fini_iptable_filter();
+ }
+ 
+ module_init(init);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/iptable_mangle.c linux-2.6.16-026test015/net/ipv4/netfilter/iptable_mangle.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/iptable_mangle.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/iptable_mangle.c	2006-07-04 14:41:39.000000000 +0400
+@@ -17,6 +17,7 @@
+ #include <linux/skbuff.h>
+ #include <net/sock.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ #include <linux/ip.h>
+ 
+ MODULE_LICENSE("GPL");
+@@ -35,7 +36,7 @@ static struct
+ 	struct ipt_replace repl;
+ 	struct ipt_standard entries[5];
+ 	struct ipt_error term;
+-} initial_table __initdata
++} initial_table
+ = { { "mangle", MANGLE_VALID_HOOKS, 6,
+       sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
+       { [NF_IP_PRE_ROUTING] 	= 0,
+@@ -112,6 +113,13 @@ static struct ipt_table packet_mangler =
+ 	.af		= AF_INET,
+ };
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_mangler	(get_exec_env()->_ipt_mangle_table)
++#else
++#define ve_packet_mangler	&packet_mangler
++#endif
++
+ /* The work comes in here from netfilter.c. */
+ static unsigned int
+ ipt_route_hook(unsigned int hook,
+@@ -120,7 +128,7 @@ ipt_route_hook(unsigned int hook,
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
+-	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++	return ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
+ }
+ 
+ static unsigned int
+@@ -149,7 +157,8 @@ ipt_local_hook(unsigned int hook,
+ 	daddr = (*pskb)->nh.iph->daddr;
+ 	tos = (*pskb)->nh.iph->tos;
+ 
+-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++	ret = ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
++
+ 	/* Reroute for ANY change. */
+ 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
+ 	    && ((*pskb)->nh.iph->saddr != saddr
+@@ -201,60 +210,103 @@ static struct nf_hook_ops ipt_ops[] = {
+ 	},
+ };
+ 
+-static int __init init(void)
++static int mangle_init(struct nf_hook_ops ipt_ops[])
+ {
+ 	int ret;
++	struct ipt_table *tmp_mangler;
+ 
+ 	/* Register table */
+-	ret = ipt_register_table(&packet_mangler, &initial_table.repl);
+-	if (ret < 0)
+-		return ret;
++	tmp_mangler = ipt_register_table(&packet_mangler,
++			&initial_table.repl);
++	if (IS_ERR(tmp_mangler))
++		return PTR_ERR(tmp_mangler);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_mangler = tmp_mangler;
++#endif
+ 
+ 	/* Register hooks */
+-	ret = nf_register_hook(&ipt_ops[0]);
++	ret = virt_nf_register_hook(&ipt_ops[0]);
+ 	if (ret < 0)
+ 		goto cleanup_table;
+ 
+-	ret = nf_register_hook(&ipt_ops[1]);
++	ret = virt_nf_register_hook(&ipt_ops[1]);
+ 	if (ret < 0)
+ 		goto cleanup_hook0;
+ 
+-	ret = nf_register_hook(&ipt_ops[2]);
++	ret = virt_nf_register_hook(&ipt_ops[2]);
+ 	if (ret < 0)
+ 		goto cleanup_hook1;
+ 
+-	ret = nf_register_hook(&ipt_ops[3]);
++	ret = virt_nf_register_hook(&ipt_ops[3]);
+ 	if (ret < 0)
+ 		goto cleanup_hook2;
+ 
+-	ret = nf_register_hook(&ipt_ops[4]);
++	ret = virt_nf_register_hook(&ipt_ops[4]);
+ 	if (ret < 0)
+ 		goto cleanup_hook3;
+ 
+ 	return ret;
+ 
+  cleanup_hook3:
+-        nf_unregister_hook(&ipt_ops[3]);
++        virt_nf_unregister_hook(&ipt_ops[3]);
+  cleanup_hook2:
+-        nf_unregister_hook(&ipt_ops[2]);
++        virt_nf_unregister_hook(&ipt_ops[2]);
+  cleanup_hook1:
+-	nf_unregister_hook(&ipt_ops[1]);
++	virt_nf_unregister_hook(&ipt_ops[1]);
+  cleanup_hook0:
+-	nf_unregister_hook(&ipt_ops[0]);
++	virt_nf_unregister_hook(&ipt_ops[0]);
+  cleanup_table:
+-	ipt_unregister_table(&packet_mangler);
++	ipt_unregister_table(ve_packet_mangler);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_mangler = NULL;
++#endif
+ 
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++static void mangle_fini(struct nf_hook_ops ipt_ops[])
+ {
+ 	unsigned int i;
+ 
+-	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+-		nf_unregister_hook(&ipt_ops[i]);
++	for (i = 0; i < 5; i++)
++		virt_nf_unregister_hook(&ipt_ops[i]);
++
++	ipt_unregister_table(ve_packet_mangler);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_mangler = NULL;
++#endif
++}
++
++int init_iptable_mangle(void)
++{
++	return mangle_init(ipt_ops);
++}
++
++void fini_iptable_mangle(void)
++{
++	mangle_fini(ipt_ops);
++}
++
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_mangle();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_mangle);
++	KSYMRESOLVE(fini_iptable_mangle);
++	KSYMMODRESOLVE(iptable_mangle);
++	return 0;
++}
+ 
+-	ipt_unregister_table(&packet_mangler);
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(iptable_mangle);
++	KSYMUNRESOLVE(init_iptable_mangle);
++	KSYMUNRESOLVE(fini_iptable_mangle);
++	fini_iptable_mangle();
+ }
+ 
+ module_init(init);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/iptable_raw.c linux-2.6.16-026test015/net/ipv4/netfilter/iptable_raw.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/iptable_raw.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/iptable_raw.c	2006-07-04 14:41:39.000000000 +0400
+@@ -118,12 +118,13 @@ static struct nf_hook_ops ipt_ops[] = {
+ 
+ static int __init init(void)
+ {
++	struct ipt_table *tmp;
+ 	int ret;
+ 
+ 	/* Register table */
+-	ret = ipt_register_table(&packet_raw, &initial_table.repl);
+-	if (ret < 0)
+-		return ret;
++	tmp = ipt_register_table(&packet_raw, &initial_table.repl);
++	if (IS_ERR(tmp))
++		return PTR_ERR(tmp);
+ 
+ 	/* Register hooks */
+ 	ret = nf_register_hook(&ipt_ops[0]);
+diff -upr linux-2.6.16.orig/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c linux-2.6.16-026test015/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+--- linux-2.6.16.orig/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c	2006-07-04 14:41:36.000000000 +0400
+@@ -354,6 +354,7 @@ getorigdst(struct sock *sk, int optval, 
+ 			.tuple.dst.u.tcp.port;
+ 		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ 			.tuple.dst.u3.ip;
++		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+ 
+ 		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
+ 		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+diff -upr linux-2.6.16.orig/net/ipv4/proc.c linux-2.6.16-026test015/net/ipv4/proc.c
+--- linux-2.6.16.orig/net/ipv4/proc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/proc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -258,11 +258,12 @@ static int snmp_seq_show(struct seq_file
+ 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
+ 
+ 	seq_printf(seq, "\nIp: %d %d",
+-			ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
++			ve_ipv4_devconf.forwarding ? 1 : 2,
++			sysctl_ip_default_ttl);
+ 
+ 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+ 		seq_printf(seq, " %lu",
+-			   fold_field((void **) ip_statistics, 
++			   fold_field((void **) ve_ip_statistics, 
+ 				      snmp4_ipstats_list[i].entry));
+ 
+ 	seq_puts(seq, "\nIcmp:");
+@@ -272,7 +273,7 @@ static int snmp_seq_show(struct seq_file
+ 	seq_puts(seq, "\nIcmp:");
+ 	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
+ 		seq_printf(seq, " %lu",
+-			   fold_field((void **) icmp_statistics, 
++			   fold_field((void **) ve_icmp_statistics, 
+ 				      snmp4_icmp_list[i].entry));
+ 
+ 	seq_puts(seq, "\nTcp:");
+@@ -284,11 +285,11 @@ static int snmp_seq_show(struct seq_file
+ 		/* MaxConn field is signed, RFC 2012 */
+ 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
+ 			seq_printf(seq, " %ld",
+-				   fold_field((void **) tcp_statistics, 
++				   fold_field((void **) ve_tcp_statistics, 
+ 					      snmp4_tcp_list[i].entry));
+ 		else
+ 			seq_printf(seq, " %lu",
+-				   fold_field((void **) tcp_statistics,
++				   fold_field((void **) ve_tcp_statistics,
+ 					      snmp4_tcp_list[i].entry));
+ 	}
+ 
+@@ -299,7 +300,7 @@ static int snmp_seq_show(struct seq_file
+ 	seq_puts(seq, "\nUdp:");
+ 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+ 		seq_printf(seq, " %lu",
+-			   fold_field((void **) udp_statistics, 
++			   fold_field((void **) ve_udp_statistics, 
+ 				      snmp4_udp_list[i].entry));
+ 
+ 	seq_putc(seq, '\n');
+@@ -333,7 +334,7 @@ static int netstat_seq_show(struct seq_f
+ 	seq_puts(seq, "\nTcpExt:");
+ 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
+ 		seq_printf(seq, " %lu",
+-			   fold_field((void **) net_statistics, 
++			   fold_field((void **) ve_net_statistics, 
+ 				      snmp4_net_list[i].entry));
+ 
+ 	seq_putc(seq, '\n');
+@@ -357,10 +358,10 @@ int __init ip_misc_proc_init(void)
+ {
+ 	int rc = 0;
+ 
+-	if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
++	if (!proc_glob_fops_create("net/netstat", S_IRUGO, &netstat_seq_fops))
+ 		goto out_netstat;
+ 
+-	if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
++	if (!proc_glob_fops_create("net/snmp", S_IRUGO, &snmp_seq_fops))
+ 		goto out_snmp;
+ 
+ 	if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
+@@ -368,9 +369,9 @@ int __init ip_misc_proc_init(void)
+ out:
+ 	return rc;
+ out_sockstat:
+-	proc_net_remove("snmp");
++	remove_proc_glob_entry("net/snmp", NULL);
+ out_snmp:
+-	proc_net_remove("netstat");
++	remove_proc_glob_entry("net/netstat", NULL);
+ out_netstat:
+ 	rc = -ENOMEM;
+ 	goto out;
+diff -upr linux-2.6.16.orig/net/ipv4/raw.c linux-2.6.16-026test015/net/ipv4/raw.c
+--- linux-2.6.16.orig/net/ipv4/raw.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/raw.c	2006-07-04 14:41:38.000000000 +0400
+@@ -114,7 +114,8 @@ struct sock *__raw_v4_lookup(struct sock
+ 		if (inet->num == num 					&&
+ 		    !(inet->daddr && inet->daddr != raddr) 		&&
+ 		    !(inet->rcv_saddr && inet->rcv_saddr != laddr)	&&
+-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
++		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) &&
++		    ve_accessible_strict(VE_OWNER_SK(sk), get_exec_env()))
+ 			goto found; /* gotcha */
+ 	}
+ 	sk = NULL;
+@@ -753,8 +754,12 @@ static struct sock *raw_get_first(struct
+ 		struct hlist_node *node;
+ 
+ 		sk_for_each(sk, node, &raw_v4_htable[state->bucket])
+-			if (sk->sk_family == PF_INET)
++			if (sk->sk_family == PF_INET) {
++				if (!ve_accessible(VE_OWNER_SK(sk),
++							get_exec_env()))
++					continue;
+ 				goto found;
++			}
+ 	}
+ 	sk = NULL;
+ found:
+@@ -768,8 +773,14 @@ static struct sock *raw_get_next(struct 
+ 	do {
+ 		sk = sk_next(sk);
+ try_again:
+-		;
+-	} while (sk && sk->sk_family != PF_INET);
++		if (!sk)
++			break;
++		if (sk->sk_family != PF_INET)
++			continue;
++		if (ve_accessible(VE_OWNER_SK(sk),
++					get_exec_env()))
++			break;
++	} while (1);
+ 
+ 	if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
+ 		sk = sk_head(&raw_v4_htable[state->bucket]);
+@@ -886,13 +897,13 @@ static struct file_operations raw_seq_fo
+ 
+ int __init raw_proc_init(void)
+ {
+-	if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
++	if (!proc_glob_fops_create("net/raw", S_IRUGO, &raw_seq_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+ 
+ void __init raw_proc_exit(void)
+ {
+-	proc_net_remove("raw");
++	remove_proc_glob_entry("net/raw", NULL);
+ }
+ #endif /* CONFIG_PROC_FS */
+diff -upr linux-2.6.16.orig/net/ipv4/route.c linux-2.6.16-026test015/net/ipv4/route.c
+--- linux-2.6.16.orig/net/ipv4/route.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/route.c	2006-07-04 14:41:39.000000000 +0400
+@@ -114,6 +114,8 @@
+ 
+ #define RT_GC_TIMEOUT (300*HZ)
+ 
++int ip_rt_src_check		= 1;
++
+ static int ip_rt_min_delay		= 2 * HZ;
+ static int ip_rt_max_delay		= 10 * HZ;
+ static int ip_rt_max_size;
+@@ -253,11 +255,28 @@ static unsigned int rt_hash_code(u32 dad
+ 		& rt_hash_mask);
+ }
+ 
++void prepare_rt_cache(void)
++{
++#ifdef CONFIG_VE
++	struct rtable *r;
++	int i;
++
++	for (i = rt_hash_mask; i >= 0; i--) {
++		spin_lock_bh(rt_hash_lock_addr(i));
++		for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
++			r->fl.owner_env = get_ve0();
++		}
++		spin_unlock_bh(rt_hash_lock_addr(i));
++        }
++#endif
++}
++
+ #ifdef CONFIG_PROC_FS
+ struct rt_cache_iter_state {
+ 	int bucket;
+ };
+ 
++static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r);
+ static struct rtable *rt_cache_get_first(struct seq_file *seq)
+ {
+ 	struct rtable *r = NULL;
+@@ -270,6 +289,8 @@ static struct rtable *rt_cache_get_first
+ 			break;
+ 		rcu_read_unlock_bh();
+ 	}
++	if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
++		r = rt_cache_get_next(seq, r);
+ 	return r;
+ }
+ 
+@@ -277,14 +298,19 @@ static struct rtable *rt_cache_get_next(
+ {
+ 	struct rt_cache_iter_state *st = rcu_dereference(seq->private);
+ 
+-	r = r->u.rt_next;
++start:
++	do {
++		r = r->u.rt_next;
++	} while (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()));
+ 	while (!r) {
+ 		rcu_read_unlock_bh();
+ 		if (--st->bucket < 0)
+-			break;
++			goto out;
+ 		rcu_read_lock_bh();
+ 		r = rt_hash_table[st->bucket].chain;
+ 	}
++	goto start;
++out:
+ 	return r;
+ }
+ 
+@@ -556,7 +582,8 @@ static inline int compare_keys(struct fl
+ {
+ 	return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
+ 	       fl1->oif     == fl2->oif &&
+-	       fl1->iif     == fl2->iif;
++	       fl1->iif     == fl2->iif &&
++	       ve_accessible_strict(fl1->owner_env, fl2->owner_env);
+ }
+ 
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+@@ -670,26 +697,105 @@ static void rt_check_expire(unsigned lon
+ 	mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
+ }
+ 
++typedef unsigned long rt_flush_gen_t;
++
++#ifdef CONFIG_VE
++
++static rt_flush_gen_t rt_flush_gen;
++
++/* called under rt_flush_lock */
++static void set_rt_flush_required(struct ve_struct *env)
++{
++	/*
++	 * If the global generation rt_flush_gen is equal to G, then
++	 * the pass considering entries labelled by G is yet to come.
++	 */
++	env->rt_flush_required = rt_flush_gen;
++}
++
++static spinlock_t rt_flush_lock;
++static rt_flush_gen_t reset_rt_flush_required(void)
++{
++	rt_flush_gen_t g;
++
++	spin_lock_bh(&rt_flush_lock);
++	g = rt_flush_gen++;
++	spin_unlock_bh(&rt_flush_lock);
++	return g;
++}
++
++static int check_rt_flush_required(struct ve_struct *env, rt_flush_gen_t gen)
++{
++	/* can be checked without the lock */
++	return env->rt_flush_required >= gen;
++}
++
++#else
++
++static void set_rt_flush_required(struct ve_struct *env)
++{
++}
++
++static rt_flush_gen_t reset_rt_flush_required(void)
++{
++	return 0;
++}
++
++#endif
++
+ /* This can run from both BH and non-BH contexts, the latter
+  * in the case of a forced flush event.
+  */
+ static void rt_run_flush(unsigned long dummy)
+ {
+ 	int i;
+-	struct rtable *rth, *next;
++	struct rtable * rth, * next;
++	struct rtable * tail;
++	rt_flush_gen_t gen;
+ 
+ 	rt_deadline = 0;
+ 
+ 	get_random_bytes(&rt_hash_rnd, 4);
+ 
++	gen = reset_rt_flush_required();
++
+ 	for (i = rt_hash_mask; i >= 0; i--) {
++#ifdef CONFIG_VE
++		struct rtable ** prev, * p;
++
++		spin_lock_bh(rt_hash_lock_addr(i));
++		rth = rt_hash_table[i].chain;
++
++		/* defer releasing the head of the list after spin_unlock */
++		for (tail = rth; tail; tail = tail->u.rt_next)
++			if (!check_rt_flush_required(tail->fl.owner_env, gen))
++				break;
++		if (rth != tail)
++			rt_hash_table[i].chain = tail;
++
++		/* call rt_free on entries after the tail requiring flush */
++		prev = &rt_hash_table[i].chain;
++		for (p = *prev; p; p = next) {
++			next = p->u.rt_next;
++			if (!check_rt_flush_required(p->fl.owner_env, gen)) {
++				prev = &p->u.rt_next;
++			} else {
++				*prev = next;
++				rt_free(p);
++			}
++		}
++
++#else
+ 		spin_lock_bh(rt_hash_lock_addr(i));
+ 		rth = rt_hash_table[i].chain;
+ 		if (rth)
+ 			rt_hash_table[i].chain = NULL;
++		tail = NULL;
++
++#endif
+ 		spin_unlock_bh(rt_hash_lock_addr(i));
+ 
+-		for (; rth; rth = next) {
++		for (; rth != tail; rth = next) {
+ 			next = rth->u.rt_next;
+ 			rt_free(rth);
+ 		}
+@@ -728,6 +834,8 @@ void rt_cache_flush(int delay)
+ 			delay = tmo;
+ 	}
+ 
++	set_rt_flush_required(get_exec_env());
++
+ 	if (delay <= 0) {
+ 		spin_unlock_bh(&rt_flush_lock);
+ 		rt_run_flush(0);
+@@ -743,9 +851,30 @@ void rt_cache_flush(int delay)
+ 
+ static void rt_secret_rebuild(unsigned long dummy)
+ {
++	int i;
++	struct rtable *rth, *next;
+ 	unsigned long now = jiffies;
+ 
+-	rt_cache_flush(0);
++	spin_lock_bh(&rt_flush_lock);
++	del_timer(&rt_flush_timer);
++	spin_unlock_bh(&rt_flush_lock);
++
++	rt_deadline = 0;
++	get_random_bytes(&rt_hash_rnd, 4);
++
++	for (i = rt_hash_mask; i >= 0; i--) {
++		spin_lock_bh(rt_hash_lock_addr(i));
++		rth = rt_hash_table[i].chain;
++		if (rth)
++			rt_hash_table[i].chain = NULL;
++		spin_unlock_bh(rt_hash_lock_addr(i));
++
++		for (; rth; rth = next) {
++			next = rth->u.rt_next;
++			rt_free(rth);
++		}
++	}
++
+ 	mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
+ }
+ 
+@@ -1118,7 +1247,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ 	struct rtable *rth, **rthp;
+ 	u32  skeys[2] = { saddr, 0 };
+ 	int  ikeys[2] = { dev->ifindex, 0 };
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	tos &= IPTOS_RT_MASK;
+ 
+ 	if (!in_dev)
+@@ -1154,6 +1285,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ 				    rth->fl.fl4_src != skeys[i] ||
+ 				    rth->fl.fl4_tos != tos ||
+ 				    rth->fl.oif != ikeys[k] ||
++#ifdef CONFIG_VE
++				    !ve_accessible_strict(rth->fl.owner_env,
++					    		  ve) ||
++#endif
+ 				    rth->fl.iif != 0) {
+ 					rthp = &rth->u.rt_next;
+ 					continue;
+@@ -1192,6 +1327,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ 				rt->u.dst.neighbour	= NULL;
+ 				rt->u.dst.hh		= NULL;
+ 				rt->u.dst.xfrm		= NULL;
++#ifdef CONFIG_VE
++				rt->fl.owner_env = ve;
++#endif
+ 
+ 				rt->rt_flags		|= RTCF_REDIRECTED;
+ 
+@@ -1631,6 +1769,9 @@ static int ip_route_input_mc(struct sk_b
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 	rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++	rth->fl.owner_env = get_exec_env();
++#endif
+ 	rth->fl.fl4_src	= saddr;
+ 	rth->rt_src	= saddr;
+ #ifdef CONFIG_NET_CLS_ROUTE
+@@ -1776,6 +1917,9 @@ static inline int __mkroute_input(struct
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 	rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++	rth->fl.owner_env = get_exec_env();
++#endif
+ 	rth->fl.fl4_src	= saddr;
+ 	rth->rt_src	= saddr;
+ 	rth->rt_gateway	= daddr;
+@@ -2021,6 +2165,9 @@ local_input:
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 	rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++	rth->fl.owner_env = get_exec_env();
++#endif
+ 	rth->fl.fl4_src	= saddr;
+ 	rth->rt_src	= saddr;
+ #ifdef CONFIG_NET_CLS_ROUTE
+@@ -2100,6 +2247,9 @@ int ip_route_input(struct sk_buff *skb, 
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 		    rth->fl.fl4_fwmark == skb->nfmark &&
+ #endif
++#ifdef CONFIG_VE
++		    rth->fl.owner_env == get_exec_env() &&
++#endif
+ 		    rth->fl.fl4_tos == tos) {
+ 			rth->u.dst.lastuse = jiffies;
+ 			dst_hold(&rth->u.dst);
+@@ -2226,6 +2376,9 @@ static inline int __mkroute_output(struc
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 	rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
+ #endif
++#ifdef CONFIG_VE
++	rth->fl.owner_env = get_exec_env();
++#endif
+ 	rth->rt_dst	= fl->fl4_dst;
+ 	rth->rt_src	= fl->fl4_src;
+ 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
+@@ -2399,10 +2552,13 @@ static int ip_route_output_slow(struct r
+ 		    ZERONET(oldflp->fl4_src))
+ 			goto out;
+ 
+-		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+-		dev_out = ip_dev_find(oldflp->fl4_src);
+-		if (dev_out == NULL)
+-			goto out;
++		if (ip_rt_src_check) {
++			/* It is equivalent to
++			   inet_addr_type(saddr) == RTN_LOCAL */
++			dev_out = ip_dev_find(oldflp->fl4_src);
++			if (dev_out == NULL)
++				goto out;
++		}
+ 
+ 		/* I removed check for oif == dev_out->oif here.
+ 		   It was wrong for two reasons:
+@@ -2429,6 +2585,12 @@ static int ip_route_output_slow(struct r
+ 			   Luckily, this hack is good workaround.
+ 			 */
+ 
++			if (dev_out == NULL) {
++				dev_out = ip_dev_find(oldflp->fl4_src);
++				if (dev_out == NULL)
++					goto out;
++			}
++
+ 			fl.oif = dev_out->ifindex;
+ 			goto make_route;
+ 		}
+@@ -2575,6 +2737,7 @@ int __ip_route_output_key(struct rtable 
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
+ #endif
++		    ve_accessible_strict(rth->fl.owner_env, get_exec_env()) &&
+ 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+ 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
+ 
+@@ -2705,7 +2868,7 @@ static int rt_fill_info(struct sk_buff *
+ 		u32 dst = rt->rt_dst;
+ 
+ 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
+-		    ipv4_devconf.mc_forwarding) {
++		    ve_ipv4_devconf.mc_forwarding) {
+ 			int err = ipmr_get_route(skb, r, nowait);
+ 			if (err <= 0) {
+ 				if (!nowait) {
+@@ -2750,7 +2913,10 @@ int inet_rtm_getroute(struct sk_buff *in
+ 	/* Reserve room for dummy headers, this skb can pass
+ 	   through good chunk of routing engine.
+ 	 */
+-	skb->mac.raw = skb->data;
++	skb->mac.raw = skb->nh.raw = skb->data;
++
++	/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
++	skb->nh.iph->protocol = IPPROTO_ICMP;
+ 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+ 
+ 	if (rta[RTA_SRC - 1])
+@@ -2853,22 +3019,22 @@ void ip_rt_multicast_event(struct in_dev
+ }
+ 
+ #ifdef CONFIG_SYSCTL
+-static int flush_delay;
++int ipv4_flush_delay;
+ 
+-static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
++int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+ 					struct file *filp, void __user *buffer,
+ 					size_t *lenp, loff_t *ppos)
+ {
+ 	if (write) {
+ 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+-		rt_cache_flush(flush_delay);
++		rt_cache_flush(ipv4_flush_delay);
+ 		return 0;
+ 	} 
+ 
+ 	return -EINVAL;
+ }
+ 
+-static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
++int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+ 						int __user *name,
+ 						int nlen,
+ 						void __user *oldval,
+@@ -2890,7 +3056,7 @@ ctl_table ipv4_route_table[] = {
+         {
+ 		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
+ 		.procname	= "flush",
+-		.data		= &flush_delay,
++		.data		= &ipv4_flush_delay,
+ 		.maxlen		= sizeof(int),
+ 		.mode		= 0200,
+ 		.proc_handler	= &ipv4_sysctl_rtcache_flush,
+@@ -3184,15 +3350,18 @@ int __init ip_rt_init(void)
+ #ifdef CONFIG_PROC_FS
+ 	{
+ 	struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
+-	if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
+-	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 
+-			    		     proc_net_stat))) {
++
++	if (!proc_glob_fops_create("net/rt_cache",
++				S_IRUGO, &rt_cache_seq_fops))
++		return -ENOMEM;
++
++	if (!(rtstat_pde = create_proc_glob_entry("net/stat/rt_cache",
++				S_IRUGO, NULL)))
+ 		return -ENOMEM;
+-	}
+ 	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
+ 	}
+ #ifdef CONFIG_NET_CLS_ROUTE
+-	create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
++	create_proc_read_entry("net/rt_acct", 0, NULL, ip_rt_acct_read, NULL);
+ #endif
+ #endif
+ #ifdef CONFIG_XFRM
+diff -upr linux-2.6.16.orig/net/ipv4/sysctl_net_ipv4.c linux-2.6.16-026test015/net/ipv4/sysctl_net_ipv4.c
+--- linux-2.6.16.orig/net/ipv4/sysctl_net_ipv4.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/sysctl_net_ipv4.c	2006-07-04 14:41:39.000000000 +0400
+@@ -22,6 +22,9 @@
+ /* From af_inet.c */
+ extern int sysctl_ip_nonlocal_bind;
+ 
++int sysctl_tcp_use_sg = 1;
++EXPORT_SYMBOL(sysctl_tcp_use_sg);
++
+ #ifdef CONFIG_SYSCTL
+ static int zero;
+ static int tcp_retr1_max = 255; 
+@@ -33,22 +36,21 @@ struct ipv4_config ipv4_config;
+ 
+ #ifdef CONFIG_SYSCTL
+ 
+-static
+ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ 			void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+-	int val = ipv4_devconf.forwarding;
++	int val = ve_ipv4_devconf.forwarding;
+ 	int ret;
+ 
+ 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ 
+-	if (write && ipv4_devconf.forwarding != val)
++	if (write && ve_ipv4_devconf.forwarding != val)
+ 		inet_forward_change();
+ 
+ 	return ret;
+ }
+ 
+-static int ipv4_sysctl_forward_strategy(ctl_table *table,
++int ipv4_sysctl_forward_strategy(ctl_table *table,
+ 			 int __user *name, int nlen,
+ 			 void __user *oldval, size_t __user *oldlenp,
+ 			 void __user *newval, size_t newlen, 
+@@ -664,6 +666,14 @@ ctl_table ipv4_table[] = {
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec,
+ 	},
++	{
++		.ctl_name	= NET_TCP_USE_SG,
++		.procname	= "tcp_use_sg",
++		.data		= &sysctl_tcp_use_sg,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
+ 
+ 	{ .ctl_name = 0 }
+ };
+diff -upr linux-2.6.16.orig/net/ipv4/tcp.c linux-2.6.16-026test015/net/ipv4/tcp.c
+--- linux-2.6.16.orig/net/ipv4/tcp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/tcp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -248,6 +248,7 @@
+  */
+ 
+ #include <linux/config.h>
++#include <linux/kmem_cache.h>
+ #include <linux/module.h>
+ #include <linux/types.h>
+ #include <linux/fcntl.h>
+@@ -263,6 +264,9 @@
+ #include <net/xfrm.h>
+ #include <net/ip.h>
+ 
++#include <ub/ub_orphan.h>
++#include <ub/ub_net.h>
++#include <ub/ub_tcp.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/ioctls.h>
+@@ -321,6 +325,7 @@ unsigned int tcp_poll(struct file *file,
+ 	unsigned int mask;
+ 	struct sock *sk = sock->sk;
+ 	struct tcp_sock *tp = tcp_sk(sk);
++	int check_send_space;
+ 
+ 	poll_wait(file, sk->sk_sleep, wait);
+ 	if (sk->sk_state == TCP_LISTEN)
+@@ -335,6 +340,21 @@ unsigned int tcp_poll(struct file *file,
+ 	if (sk->sk_err)
+ 		mask = POLLERR;
+ 
++	check_send_space = 1;
++#ifdef CONFIG_USER_RESOURCE
++	if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
++		unsigned long size;
++		size = MAX_TCP_HEADER + tp->mss_cache;
++		if (size > SOCK_MIN_UBCSPACE)
++			size = SOCK_MIN_UBCSPACE;
++		size = skb_charge_size(size);   
++		if (ub_sock_makewres_tcp(sk, size)) {
++			check_send_space = 0;
++			ub_sock_sndqueueadd_tcp(sk, size);
++		}
++	}
++#endif
++
+ 	/*
+ 	 * POLLHUP is certainly not done right. But poll() doesn't
+ 	 * have a notion of HUP in just one direction, and for a
+@@ -378,7 +398,7 @@ unsigned int tcp_poll(struct file *file,
+ 		     sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
+ 			mask |= POLLIN | POLLRDNORM;
+ 
+-		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
++		if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ 				mask |= POLLOUT | POLLWRNORM;
+ 			} else {  /* send SIGIO later */
+@@ -528,16 +548,23 @@ static ssize_t do_tcp_sendpages(struct s
+ 		int copy, i, can_coalesce;
+ 		int offset = poffset % PAGE_SIZE;
+ 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
++		unsigned long chargesize = 0;
+ 
+ 		if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
+ new_segment:
++			chargesize = 0;
+ 			if (!sk_stream_memory_free(sk))
+ 				goto wait_for_sndbuf;
+ 
++			chargesize = skb_charge_size(MAX_TCP_HEADER +
++					tp->mss_cache);
++			if (ub_sock_getwres_tcp(sk, chargesize) < 0)
++				goto wait_for_ubspace;
+ 			skb = sk_stream_alloc_pskb(sk, 0, 0,
+ 						   sk->sk_allocation);
+ 			if (!skb)
+ 				goto wait_for_memory;
++			ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+ 
+ 			skb_entail(sk, tp, skb);
+ 			copy = size_goal;
+@@ -593,10 +620,14 @@ new_segment:
+ wait_for_sndbuf:
+ 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
++		ub_sock_retwres_tcp(sk, chargesize,
++			skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
++		chargesize = 0;
++wait_for_ubspace:
+ 		if (copied)
+ 			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ 
+-		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
++		if ((err = sk_stream_wait_memory(sk, &timeo, chargesize)) != 0)
+ 			goto do_error;
+ 
+ 		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+@@ -699,6 +730,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
+ 	while (--iovlen >= 0) {
+ 		int seglen = iov->iov_len;
+ 		unsigned char __user *from = iov->iov_base;
++		unsigned long chargesize = 0;
+ 
+ 		iov++;
+ 
+@@ -709,18 +741,26 @@ int tcp_sendmsg(struct kiocb *iocb, stru
+ 
+ 			if (!sk->sk_send_head ||
+ 			    (copy = size_goal - skb->len) <= 0) {
++				unsigned long size;
+ 
+ new_segment:
+ 				/* Allocate new segment. If the interface is SG,
+ 				 * allocate skb fitting to single page.
+ 				 */
++				chargesize = 0;
+ 				if (!sk_stream_memory_free(sk))
+ 					goto wait_for_sndbuf;
+-
+-				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+-							   0, sk->sk_allocation);
++				size = select_size(sk, tp);
++				chargesize = skb_charge_size(MAX_TCP_HEADER +
++						size);
++				if (ub_sock_getwres_tcp(sk, chargesize) < 0)
++					goto wait_for_ubspace;
++				skb = sk_stream_alloc_pskb(sk, size, 0,
++						sk->sk_allocation);
+ 				if (!skb)
+ 					goto wait_for_memory;
++				ub_skb_set_charge(skb, sk, chargesize,
++						UB_TCPSNDBUF);
+ 
+ 				/*
+ 				 * Check whether we can use HW checksum.
+@@ -768,6 +808,7 @@ new_segment:
+ 				} else if (page) {
+ 					if (off == PAGE_SIZE) {
+ 						put_page(page);
++						ub_sock_tcp_detachpage(sk);
+ 						TCP_PAGE(sk) = page = NULL;
+ 						off = 0;
+ 					}
+@@ -781,6 +822,9 @@ new_segment:
+ 					goto wait_for_memory;
+ 
+ 				if (!page) {
++					chargesize = PAGE_SIZE;
++					if (ub_sock_tcp_chargepage(sk) < 0)
++						goto wait_for_ubspace;
+ 					/* Allocate new cache page. */
+ 					if (!(page = sk_stream_alloc_page(sk)))
+ 						goto wait_for_memory;
+@@ -812,7 +856,8 @@ new_segment:
+ 					} else if (off + copy < PAGE_SIZE) {
+ 						get_page(page);
+ 						TCP_PAGE(sk) = page;
+-					}
++					} else
++						ub_sock_tcp_detachpage(sk);
+ 				}
+ 
+ 				TCP_OFF(sk) = off + copy;
+@@ -843,10 +888,15 @@ new_segment:
+ wait_for_sndbuf:
+ 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
++			ub_sock_retwres_tcp(sk, chargesize,
++				skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
++			chargesize = 0;
++wait_for_ubspace:
+ 			if (copied)
+ 				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ 
+-			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
++			if ((err = sk_stream_wait_memory(sk, &timeo,
++							chargesize)) != 0)
+ 				goto do_error;
+ 
+ 			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+@@ -944,7 +994,18 @@ static void cleanup_rbuf(struct sock *sk
+ #if TCP_DEBUG
+ 	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+ 
+-	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
++	if (!(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq))) {
++		printk("KERNEL: assertion: skb==NULL || "
++				"before(tp->copied_seq, skb->end_seq)\n");
++		printk("VE%u pid %d comm %.16s\n", 
++				(get_exec_env() ? VEID(get_exec_env()) : 0),
++				current->pid, current->comm);
++		printk("copied=%d, copied_seq=%d, rcv_nxt=%d\n", copied,
++				tp->copied_seq, tp->rcv_nxt);
++		printk("skb->len=%d, skb->seq=%d, skb->end_seq=%d\n",
++				skb->len, TCP_SKB_CB(skb)->seq, 
++				TCP_SKB_CB(skb)->end_seq);
++	}
+ #endif
+ 
+ 	if (inet_csk_ack_scheduled(sk)) {
+@@ -1168,7 +1229,22 @@ int tcp_recvmsg(struct kiocb *iocb, stru
+ 				goto found_ok_skb;
+ 			if (skb->h.th->fin)
+ 				goto found_fin_ok;
+-			BUG_TRAP(flags & MSG_PEEK);
++			if (!(flags & MSG_PEEK)) {
++				printk("KERNEL: assertion: flags&MSG_PEEK\n");
++				printk("VE%u pid %d comm %.16s\n", 
++						(get_exec_env() ? 
++						 VEID(get_exec_env()) : 0),
++						current->pid, current->comm);
++				printk("flags=0x%x, len=%d, copied_seq=%d, "
++						"rcv_nxt=%d\n", flags, len,
++						tp->copied_seq, tp->rcv_nxt);
++				printk("skb->len=%d, *seq=%d, skb->seq=%d, "
++						"skb->end_seq=%d, offset=%d\n",
++						skb->len, *seq, 
++						TCP_SKB_CB(skb)->seq,
++						TCP_SKB_CB(skb)->end_seq, 
++						offset);
++			}
+ 			skb = skb->next;
+ 		} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
+ 
+@@ -1231,8 +1307,18 @@ int tcp_recvmsg(struct kiocb *iocb, stru
+ 
+ 			tp->ucopy.len = len;
+ 
+-			BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
+-				 (flags & (MSG_PEEK | MSG_TRUNC)));
++			if (!(tp->copied_seq == tp->rcv_nxt || 
++						(flags&(MSG_PEEK|MSG_TRUNC)))) {
++				printk("KERNEL: assertion: tp->copied_seq == "
++						"tp->rcv_nxt || ...\n");
++				printk("VE%u pid %d comm %.16s\n", 
++						(get_exec_env() ?
++						 VEID(get_exec_env()) : 0),
++						current->pid, current->comm);
++				printk("flags=0x%x, len=%d, copied_seq=%d, "
++						"rcv_nxt=%d\n", flags, len,
++						tp->copied_seq, tp->rcv_nxt);
++			}
+ 
+ 			/* Ugly... If prequeue is not empty, we have to
+ 			 * process it before releasing socket, otherwise
+@@ -1583,7 +1669,7 @@ adjudge_to_death:
+ 			if (tmo > TCP_TIMEWAIT_LEN) {
+ 				inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
+ 			} else {
+-				atomic_inc(sk->sk_prot->orphan_count);
++				ub_inc_orphan_count(sk);
+ 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+ 				goto out;
+ 			}
+@@ -1591,9 +1677,7 @@ adjudge_to_death:
+ 	}
+ 	if (sk->sk_state != TCP_CLOSE) {
+ 		sk_stream_mem_reclaim(sk);
+-		if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
+-		    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+-		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
++		if (ub_too_many_orphans(sk, ub_get_orphan_count(sk))) {
+ 			if (net_ratelimit())
+ 				printk(KERN_INFO "TCP: too many of orphaned "
+ 				       "sockets\n");
+@@ -1602,7 +1686,7 @@ adjudge_to_death:
+ 			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+ 		}
+ 	}
+-	atomic_inc(sk->sk_prot->orphan_count);
++	ub_inc_orphan_count(sk);
+ 
+ 	if (sk->sk_state == TCP_CLOSE)
+ 		inet_csk_destroy_sock(sk);
+@@ -2051,7 +2135,7 @@ void __init tcp_init(void)
+ 	tcp_hashinfo.bind_bucket_cachep =
+ 		kmem_cache_create("tcp_bind_bucket",
+ 				  sizeof(struct inet_bind_bucket), 0,
+-				  SLAB_HWCACHE_ALIGN, NULL, NULL);
++				  SLAB_HWCACHE_ALIGN | SLAB_UBC, NULL, NULL);
+ 	if (!tcp_hashinfo.bind_bucket_cachep)
+ 		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+ 
+diff -upr linux-2.6.16.orig/net/ipv4/tcp_input.c linux-2.6.16-026test015/net/ipv4/tcp_input.c
+--- linux-2.6.16.orig/net/ipv4/tcp_input.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/tcp_input.c	2006-07-04 14:41:37.000000000 +0400
+@@ -72,6 +72,8 @@
+ #include <linux/ipsec.h>
+ #include <asm/unaligned.h>
+ 
++#include <ub/ub_tcp.h>
++
+ int sysctl_tcp_timestamps = 1;
+ int sysctl_tcp_window_scaling = 1;
+ int sysctl_tcp_sack = 1;
+@@ -252,7 +254,7 @@ static void tcp_grow_window(struct sock 
+ 	/* Check #1 */
+ 	if (tp->rcv_ssthresh < tp->window_clamp &&
+ 	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
+-	    !tcp_memory_pressure) {
++	    ub_tcp_rmem_allows_expand(sk)) {
+ 		int incr;
+ 
+ 		/* Check #2. Increase window, if skb with such overhead
+@@ -321,6 +323,8 @@ static void tcp_init_buffer_space(struct
+ 
+ 	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
+ 	tp->snd_cwnd_stamp = tcp_time_stamp;
++
++	ub_tcp_update_maxadvmss(sk);
+ }
+ 
+ /* 5. Recalculate window clamp after socket hit its memory bounds. */
+@@ -332,7 +336,7 @@ static void tcp_clamp_window(struct sock
+ 
+ 	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+-	    !tcp_memory_pressure &&
++	    !ub_tcp_memory_pressure(sk) &&
+ 	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ 		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
+ 				    sysctl_tcp_rmem[2]);
+@@ -3118,7 +3122,7 @@ queue_and_out:
+ 			     !sk_stream_rmem_schedule(sk, skb))) {
+ 				if (tcp_prune_queue(sk) < 0 ||
+ 				    !sk_stream_rmem_schedule(sk, skb))
+-					goto drop;
++					goto drop_part;
+ 			}
+ 			sk_stream_set_owner_r(skb, sk);
+ 			__skb_queue_tail(&sk->sk_receive_queue, skb);
+@@ -3162,6 +3166,12 @@ out_of_window:
+ drop:
+ 		__kfree_skb(skb);
+ 		return;
++
++drop_part:
++		if (after(tp->copied_seq, tp->rcv_nxt))
++			tp->rcv_nxt = tp->copied_seq;
++		__kfree_skb(skb);
++		return;
+ 	}
+ 
+ 	/* Out of window. F.e. zero window probe. */
+@@ -3333,6 +3343,10 @@ tcp_collapse(struct sock *sk, struct sk_
+ 		nskb = alloc_skb(copy+header, GFP_ATOMIC);
+ 		if (!nskb)
+ 			return;
++		if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
++			kfree_skb(nskb);
++			return;
++		}
+ 		skb_reserve(nskb, header);
+ 		memcpy(nskb->head, skb->head, header);
+ 		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
+@@ -3429,7 +3443,7 @@ static int tcp_prune_queue(struct sock *
+ 
+ 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ 		tcp_clamp_window(sk, tp);
+-	else if (tcp_memory_pressure)
++	else if (ub_tcp_memory_pressure(sk))
+ 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+ 
+ 	tcp_collapse_ofo_queue(sk);
+@@ -3505,7 +3519,7 @@ static int tcp_should_expand_sndbuf(stru
+ 		return 0;
+ 
+ 	/* If we are under global TCP memory pressure, do not expand.  */
+-	if (tcp_memory_pressure)
++	if (ub_tcp_memory_pressure(sk))
+ 		return 0;
+ 
+ 	/* If we are under soft global TCP memory pressure, do not expand.  */
+@@ -3898,6 +3912,10 @@ int tcp_rcv_established(struct sock *sk,
+ 
+ 				if ((int)skb->truesize > sk->sk_forward_alloc)
+ 					goto step5;
++				/* This is OK not to try to free memory here.
++				 * Do this below on slow path. Den */
++				if (ub_tcprcvbuf_charge(sk, skb) < 0)
++					goto step5;
+ 
+ 				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
+ 
+diff -upr linux-2.6.16.orig/net/ipv4/tcp_ipv4.c linux-2.6.16-026test015/net/ipv4/tcp_ipv4.c
+--- linux-2.6.16.orig/net/ipv4/tcp_ipv4.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/tcp_ipv4.c	2006-07-04 14:41:39.000000000 +0400
+@@ -72,6 +72,8 @@
+ #include <net/timewait_sock.h>
+ #include <net/xfrm.h>
+ 
++#include <ub/ub_tcp.h>
++
+ #include <linux/inet.h>
+ #include <linux/ipv6.h>
+ #include <linux/stddef.h>
+@@ -705,6 +707,7 @@ struct request_sock_ops tcp_request_sock
+ 	.destructor	=	tcp_v4_reqsk_destructor,
+ 	.send_reset	=	tcp_v4_send_reset,
+ };
++EXPORT_SYMBOL_GPL(tcp_request_sock_ops);
+ 
+ static struct timewait_sock_ops tcp_timewait_sock_ops = {
+ 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
+@@ -979,12 +982,15 @@ static int tcp_v4_checksum_init(struct s
+  */
+ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+ {
++	struct user_beancounter *ub;
++
++	ub = set_exec_ub(sock_bc(sk)->ub);
+ 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ 		TCP_CHECK_TIMER(sk);
+ 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+ 			goto reset;
+ 		TCP_CHECK_TIMER(sk);
+-		return 0;
++		goto restore_context;
+ 	}
+ 
+ 	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
+@@ -998,7 +1004,7 @@ int tcp_v4_do_rcv(struct sock *sk, struc
+ 		if (nsk != sk) {
+ 			if (tcp_child_process(sk, nsk, skb))
+ 				goto reset;
+-			return 0;
++			goto restore_context;
+ 		}
+ 	}
+ 
+@@ -1006,6 +1012,9 @@ int tcp_v4_do_rcv(struct sock *sk, struc
+ 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+ 		goto reset;
+ 	TCP_CHECK_TIMER(sk);
++
++restore_context:
++	(void)set_exec_ub(ub);
+ 	return 0;
+ 
+ reset:
+@@ -1017,7 +1026,7 @@ discard:
+ 	 * might be destroyed here. This current version compiles correctly,
+ 	 * but you have been warned.
+ 	 */
+-	return 0;
++	goto restore_context;
+ 
+ csum_err:
+ 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+@@ -1302,6 +1311,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
+ 	 * If sendmsg cached page exists, toss it.
+ 	 */
+ 	if (sk->sk_sndmsg_page) {
++		/* queue is empty, uncharge */
++		ub_sock_tcp_detachpage(sk);
+ 		__free_page(sk->sk_sndmsg_page);
+ 		sk->sk_sndmsg_page = NULL;
+ 	}
+@@ -1316,16 +1327,34 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
+ #ifdef CONFIG_PROC_FS
+ /* Proc filesystem TCP sock list dumping. */
+ 
+-static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
++static inline struct inet_timewait_sock *tw_head(struct hlist_head *head,
++		envid_t veid)
+ {
+-	return hlist_empty(head) ? NULL :
+-		list_entry(head->first, struct inet_timewait_sock, tw_node);
++	struct inet_timewait_sock *tw;
++	struct hlist_node *pos;
++
++	if (hlist_empty(head))
++		return NULL;
++	hlist_for_each_entry(tw, pos, head, tw_node) {
++		if (!ve_accessible_veid(tw->tw_owner_env, veid))
++			continue;
++		return tw;
++	}
++	return NULL;
+ }
+ 
+-static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
++static inline struct inet_timewait_sock *
++	tw_next(struct inet_timewait_sock *tw, envid_t veid)
+ {
+-	return tw->tw_node.next ?
+-		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
++	while (1) {
++		if (tw->tw_node.next == NULL)
++			return NULL;
++		tw = hlist_entry(tw->tw_node.next, typeof(*tw), tw_node);
++		if (!ve_accessible_veid(tw->tw_owner_env, veid))
++			continue;
++		return tw;
++	}
++	return NULL;	/* make compiler happy */
+ }
+ 
+ static void *listening_get_next(struct seq_file *seq, void *cur)
+@@ -1334,7 +1363,9 @@ static void *listening_get_next(struct s
+ 	struct hlist_node *node;
+ 	struct sock *sk = cur;
+ 	struct tcp_iter_state* st = seq->private;
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	if (!sk) {
+ 		st->bucket = 0;
+ 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
+@@ -1374,6 +1405,8 @@ get_req:
+ 	}
+ get_sk:
+ 	sk_for_each_from(sk, node) {
++		if (!ve_accessible(VE_OWNER_SK(sk), ve))
++			continue;
+ 		if (sk->sk_family == st->family) {
+ 			cur = sk;
+ 			goto out;
+@@ -1414,7 +1447,9 @@ static void *established_get_first(struc
+ {
+ 	struct tcp_iter_state* st = seq->private;
+ 	void *rc = NULL;
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
+ 		struct sock *sk;
+ 		struct hlist_node *node;
+@@ -1425,6 +1460,8 @@ static void *established_get_first(struc
+ 
+ 		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+ 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
++			if (!ve_accessible(VE_OWNER_SK(sk), ve))
++				continue;
+ 			if (sk->sk_family != st->family) {
+ 				continue;
+ 			}
+@@ -1434,6 +1471,8 @@ static void *established_get_first(struc
+ 		st->state = TCP_SEQ_STATE_TIME_WAIT;
+ 		inet_twsk_for_each(tw, node,
+ 				   &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
++			if (!ve_accessible_veid(tw->tw_owner_env, VEID(ve)))
++				continue;
+ 			if (tw->tw_family != st->family) {
+ 				continue;
+ 			}
+@@ -1453,16 +1492,17 @@ static void *established_get_next(struct
+ 	struct inet_timewait_sock *tw;
+ 	struct hlist_node *node;
+ 	struct tcp_iter_state* st = seq->private;
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	++st->num;
+ 
+ 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
+ 		tw = cur;
+-		tw = tw_next(tw);
++		tw = tw_next(tw, VEID(ve));
+ get_tw:
+-		while (tw && tw->tw_family != st->family) {
+-			tw = tw_next(tw);
+-		}
++		while (tw && tw->tw_family != st->family)
++			tw = tw_next(tw, VEID(ve));
+ 		if (tw) {
+ 			cur = tw;
+ 			goto out;
+@@ -1484,12 +1524,15 @@ get_tw:
+ 		sk = sk_next(sk);
+ 
+ 	sk_for_each_from(sk, node) {
++		if (!ve_accessible(VE_OWNER_SK(sk), ve))
++			continue;
+ 		if (sk->sk_family == st->family)
+ 			goto found;
+ 	}
+ 
+ 	st->state = TCP_SEQ_STATE_TIME_WAIT;
+-	tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
++	tw = tw_head(&tcp_hashinfo.ehash[st->bucket +
++			tcp_hashinfo.ehash_size].chain, VEID(ve));
+ 	goto get_tw;
+ found:
+ 	cur = sk;
+@@ -1635,7 +1678,7 @@ int tcp_proc_register(struct tcp_seq_afi
+ 	afinfo->seq_fops->llseek	= seq_lseek;
+ 	afinfo->seq_fops->release	= seq_release_private;
+ 	
+-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
++	p = proc_glob_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+ 	if (p)
+ 		p->data = afinfo;
+ 	else
+@@ -1647,7 +1690,8 @@ void tcp_proc_unregister(struct tcp_seq_
+ {
+ 	if (!afinfo)
+ 		return;
+-	proc_net_remove(afinfo->name);
++
++	remove_proc_glob_entry(afinfo->name, NULL);
+ 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 
+ }
+ 
+@@ -1777,7 +1821,7 @@ out:
+ static struct file_operations tcp4_seq_fops;
+ static struct tcp_seq_afinfo tcp4_seq_afinfo = {
+ 	.owner		= THIS_MODULE,
+-	.name		= "tcp",
++	.name		= "net/tcp",
+ 	.family		= AF_INET,
+ 	.seq_show	= tcp4_seq_show,
+ 	.seq_fops	= &tcp4_seq_fops,
+@@ -1844,6 +1888,86 @@ void __init tcp_v4_init(struct net_proto
+ 	tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
+ }
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static void tcp_kill_ve_onesk(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++
++	/* Check the assumed state of the socket. */
++	if (!sock_flag(sk, SOCK_DEAD)) {
++		static int printed;
++invalid:
++		if (!printed)
++			printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
++				"wrseq %u unseq %u, wrqu %d.\n",
++				sock_flag(sk, SOCK_DEAD), sk->sk_state,
++				tp->write_seq, tp->snd_una,
++				!skb_queue_empty(&sk->sk_write_queue));
++		printed = 1;
++		return;
++	}
++
++	tcp_send_active_reset(sk, GFP_ATOMIC);
++	switch (sk->sk_state) {
++		case TCP_FIN_WAIT1:
++		case TCP_CLOSING:
++			/* In these 2 states the peer may want us to retransmit
++			 * some data and/or FIN.  Entering "resetting mode"
++			 * instead.
++			 */
++			tcp_time_wait(sk, TCP_CLOSE, 0);
++			break;
++		case TCP_FIN_WAIT2:
++			/* By some reason the socket may stay in this state
++			 * without turning into a TW bucket.  Fix it.
++			 */
++			tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
++			break;
++		case TCP_LAST_ACK:
++			/* Just jump into CLOSED state. */
++			tcp_done(sk);
++			break;
++		default:
++			/* The socket must be already close()d. */
++			goto invalid;
++	}
++}
++
++void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
++{
++	struct inet_ehash_bucket *head;
++	int i;
++
++	/* alive */
++	local_bh_disable();
++	head = tcp_hashinfo.ehash;
++	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
++		struct sock *sk;
++		struct hlist_node *node;
++more_work:
++		write_lock(&head[i].lock);
++		sk_for_each(sk, node, &head[i].chain) {
++			if (ve_accessible_strict(VE_OWNER_SK(sk), envid)) {
++				sock_hold(sk);
++				write_unlock(&head[i].lock);
++
++				bh_lock_sock(sk);
++				/* sk might have disappeared from the hash before
++				 * we got the lock */
++				if (sk->sk_state != TCP_CLOSE)
++					tcp_kill_ve_onesk(sk);
++				bh_unlock_sock(sk);
++				sock_put(sk);
++				goto more_work;
++			}
++		}
++		write_unlock(&head[i].lock);
++	}
++	local_bh_enable();
++}
++EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
++#endif
++
+ EXPORT_SYMBOL(ipv4_specific);
+ EXPORT_SYMBOL(tcp_hashinfo);
+ EXPORT_SYMBOL(tcp_prot);
+diff -upr linux-2.6.16.orig/net/ipv4/tcp_minisocks.c linux-2.6.16-026test015/net/ipv4/tcp_minisocks.c
+--- linux-2.6.16.orig/net/ipv4/tcp_minisocks.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/tcp_minisocks.c	2006-07-04 14:41:39.000000000 +0400
+@@ -29,6 +29,8 @@
+ #include <net/inet_common.h>
+ #include <net/xfrm.h>
+ 
++#include <ub/ub_net.h>
++
+ #ifdef CONFIG_SYSCTL
+ #define SYNC_INIT 0 /* let the user enable it */
+ #else
+@@ -307,6 +309,8 @@ void tcp_time_wait(struct sock *sk, int 
+ 			tw->tw_ipv6only = np->ipv6only;
+ 		}
+ #endif
++		tw->tw_owner_env = VEID(VE_OWNER_SK(sk));
++
+ 		/* Linkage updates. */
+ 		__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
+ 
+@@ -355,6 +359,8 @@ struct sock *tcp_create_openreq_child(st
+ 		struct tcp_sock *newtp;
+ 
+ 		/* Now setup tcp_sock */
++		SET_VE_OWNER_SK(newsk, VE_OWNER_SK(sk));
++
+ 		newtp = tcp_sk(newsk);
+ 		newtp->pred_flags = 0;
+ 		newtp->rcv_nxt = treq->rcv_isn + 1;
+diff -upr linux-2.6.16.orig/net/ipv4/tcp_output.c linux-2.6.16-026test015/net/ipv4/tcp_output.c
+--- linux-2.6.16.orig/net/ipv4/tcp_output.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/tcp_output.c	2006-07-04 14:41:37.000000000 +0400
+@@ -42,6 +42,9 @@
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+ 
++#include <ub/ub_net.h>
++#include <ub/ub_tcp.h>
++
+ /* People can turn this off for buggy TCP's found in printers etc. */
+ int sysctl_tcp_retrans_collapse = 1;
+ 
+@@ -528,16 +531,26 @@ int tcp_fragment(struct sock *sk, struct
+ 	if (nsize < 0)
+ 		nsize = 0;
+ 
+-	if (skb_cloned(skb) &&
+-	    skb_is_nonlinear(skb) &&
+-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+-		return -ENOMEM;
++	if (skb_cloned(skb) && skb_is_nonlinear(skb)) {
++		unsigned long chargesize;
++		chargesize = skb_bc(skb)->charged;
++		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
++			return -ENOMEM;
++		ub_sock_retwres_tcp(sk, chargesize, chargesize);
++		ub_tcpsndbuf_charge_forced(sk, skb);
++	}
+ 
+ 	/* Get a new skb... force flag on. */
+ 	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+ 	if (buff == NULL)
+ 		return -ENOMEM; /* We'll just try again later. */
+-	sk_charge_skb(sk, buff);
++	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
++		kfree_skb(buff);
++		return -ENOMEM;
++	}
++
++	buff->truesize = skb->len - len;
++	skb->truesize -= buff->truesize;
+ 
+ 	/* Correct the sequence numbers. */
+ 	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
+@@ -978,6 +991,11 @@ static int tso_fragment(struct sock *sk,
+ 	if (unlikely(buff == NULL))
+ 		return -ENOMEM;
+ 
++	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
++		kfree_skb(buff);
++		return -ENOMEM;
++	}
++
+ 	buff->truesize = nlen;
+ 	skb->truesize -= nlen;
+ 
+@@ -1281,7 +1299,7 @@ u32 __tcp_select_window(struct sock *sk)
+ 	if (free_space < full_space/2) {
+ 		icsk->icsk_ack.quick = 0;
+ 
+-		if (tcp_memory_pressure)
++		if (ub_tcp_shrink_rcvbuf(sk))
+ 			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
+ 
+ 		if (free_space < mss)
+@@ -1708,6 +1726,7 @@ void tcp_send_fin(struct sock *sk)
+ 				break;
+ 			yield();
+ 		}
++		ub_tcpsndbuf_charge_forced(sk, skb);
+ 
+ 		/* Reserve space for headers and prepare control bits. */
+ 		skb_reserve(skb, MAX_TCP_HEADER);
+@@ -1777,6 +1796,10 @@ int tcp_send_synack(struct sock *sk)
+ 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ 			if (nskb == NULL)
+ 				return -ENOMEM;
++			if (ub_tcpsndbuf_charge(sk, skb) < 0) {
++				kfree_skb(nskb);
++				return -ENOMEM;
++			}
+ 			__skb_unlink(skb, &sk->sk_write_queue);
+ 			skb_header_release(nskb);
+ 			__skb_queue_head(&sk->sk_write_queue, nskb);
+@@ -1928,6 +1951,10 @@ int tcp_connect(struct sock *sk)
+ 	buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
+ 	if (unlikely(buff == NULL))
+ 		return -ENOBUFS;
++	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
++		kfree_skb(buff);
++		return -ENOBUFS;
++	}
+ 
+ 	/* Reserve space for headers. */
+ 	skb_reserve(buff, MAX_TCP_HEADER);
+diff -upr linux-2.6.16.orig/net/ipv4/tcp_timer.c linux-2.6.16-026test015/net/ipv4/tcp_timer.c
+--- linux-2.6.16.orig/net/ipv4/tcp_timer.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/tcp_timer.c	2006-07-04 14:41:39.000000000 +0400
+@@ -22,6 +22,8 @@
+ 
+ #include <linux/module.h>
+ #include <net/tcp.h>
++#include <ub/ub_orphan.h>
++#include <ub/ub_tcp.h>
+ 
+ int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
+ int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
+@@ -67,7 +69,7 @@ static void tcp_write_err(struct sock *s
+ static int tcp_out_of_resources(struct sock *sk, int do_reset)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+-	int orphans = atomic_read(&tcp_orphan_count);
++	int orphans = ub_get_orphan_count(sk);
+ 
+ 	/* If peer does not open window for long time, or did not transmit 
+ 	 * anything for long time, penalize it. */
+@@ -78,9 +80,7 @@ static int tcp_out_of_resources(struct s
+ 	if (sk->sk_err_soft)
+ 		orphans <<= 1;
+ 
+-	if (orphans >= sysctl_tcp_max_orphans ||
+-	    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+-	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
++	if (ub_too_many_orphans(sk, orphans)) {
+ 		if (net_ratelimit())
+ 			printk(KERN_INFO "Out of socket memory\n");
+ 
+@@ -173,9 +173,12 @@ static int tcp_write_timeout(struct sock
+ static void tcp_delack_timer(unsigned long data)
+ {
+ 	struct sock *sk = (struct sock*)data;
++	struct ve_struct *env;
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct inet_connection_sock *icsk = inet_csk(sk);
+ 
++	env = set_exec_env(VE_OWNER_SK(sk));
++
+ 	bh_lock_sock(sk);
+ 	if (sock_owned_by_user(sk)) {
+ 		/* Try again later. */
+@@ -224,11 +227,12 @@ static void tcp_delack_timer(unsigned lo
+ 	TCP_CHECK_TIMER(sk);
+ 
+ out:
+-	if (tcp_memory_pressure)
++	if (ub_tcp_memory_pressure(sk))
+ 		sk_stream_mem_reclaim(sk);
+ out_unlock:
+ 	bh_unlock_sock(sk);
+ 	sock_put(sk);
++	(void)set_exec_env(env);
+ }
+ 
+ static void tcp_probe_timer(struct sock *sk)
+@@ -283,8 +287,11 @@ static void tcp_probe_timer(struct sock 
+ static void tcp_retransmit_timer(struct sock *sk)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
++	struct ve_struct *env;
+ 	struct inet_connection_sock *icsk = inet_csk(sk);
+ 
++	env = set_exec_env(VE_OWNER_SK(sk));
++
+ 	if (!tp->packets_out)
+ 		goto out;
+ 
+@@ -381,15 +388,19 @@ out_reset_timer:
+ 	if (icsk->icsk_retransmits > sysctl_tcp_retries1)
+ 		__sk_dst_reset(sk);
+ 
+-out:;
++out:
++	(void)set_exec_env(env);
+ }
+ 
+ static void tcp_write_timer(unsigned long data)
+ {
+ 	struct sock *sk = (struct sock*)data;
++	struct ve_struct *env;
+ 	struct inet_connection_sock *icsk = inet_csk(sk);
+ 	int event;
+ 
++	env = set_exec_env(VE_OWNER_SK(sk));
++
+ 	bh_lock_sock(sk);
+ 	if (sock_owned_by_user(sk)) {
+ 		/* Try again later */
+@@ -423,6 +434,7 @@ out:
+ out_unlock:
+ 	bh_unlock_sock(sk);
+ 	sock_put(sk);
++	(void)set_exec_env(env);
+ }
+ 
+ /*
+@@ -450,10 +462,13 @@ void tcp_set_keepalive(struct sock *sk, 
+ static void tcp_keepalive_timer (unsigned long data)
+ {
+ 	struct sock *sk = (struct sock *) data;
++	struct ve_struct *env;
+ 	struct inet_connection_sock *icsk = inet_csk(sk);
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	__u32 elapsed;
+ 
++	env = set_exec_env(VE_OWNER_SK(sk));
++
+ 	/* Only process if socket is not in use. */
+ 	bh_lock_sock(sk);
+ 	if (sock_owned_by_user(sk)) {
+@@ -525,4 +540,5 @@ death:	
+ out:
+ 	bh_unlock_sock(sk);
+ 	sock_put(sk);
++	(void)set_exec_env(env);
+ }
+diff -upr linux-2.6.16.orig/net/ipv4/udp.c linux-2.6.16-026test015/net/ipv4/udp.c
+--- linux-2.6.16.orig/net/ipv4/udp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv4/udp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -127,7 +127,9 @@ static int udp_v4_get_port(struct sock *
+ 	struct hlist_node *node;
+ 	struct sock *sk2;
+ 	struct inet_sock *inet = inet_sk(sk);
++	struct ve_struct *env;
+ 
++	env = VE_OWNER_SK(sk);
+ 	write_lock_bh(&udp_hash_lock);
+ 	if (snum == 0) {
+ 		int best_size_so_far, best, result, i;
+@@ -141,7 +143,7 @@ static int udp_v4_get_port(struct sock *
+ 			struct hlist_head *list;
+ 			int size;
+ 
+-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
++			list = &udp_hash[udp_hashfn(result, VEID(env))];
+ 			if (hlist_empty(list)) {
+ 				if (result > sysctl_local_port_range[1])
+ 					result = sysctl_local_port_range[0] +
+@@ -163,7 +165,7 @@ static int udp_v4_get_port(struct sock *
+ 				result = sysctl_local_port_range[0]
+ 					+ ((result - sysctl_local_port_range[0]) &
+ 					   (UDP_HTABLE_SIZE - 1));
+-			if (!udp_lport_inuse(result))
++			if (!udp_lport_inuse(result, env))
+ 				break;
+ 		}
+ 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
+@@ -172,11 +174,12 @@ gotit:
+ 		udp_port_rover = snum = result;
+ 	} else {
+ 		sk_for_each(sk2, node,
+-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
++			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
+ 			struct inet_sock *inet2 = inet_sk(sk2);
+ 
+ 			if (inet2->num == snum &&
+ 			    sk2 != sk &&
++			    ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
+ 			    !ipv6_only_sock(sk2) &&
+ 			    (!sk2->sk_bound_dev_if ||
+ 			     !sk->sk_bound_dev_if ||
+@@ -190,7 +193,7 @@ gotit:
+ 	}
+ 	inet->num = snum;
+ 	if (sk_unhashed(sk)) {
+-		struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
++		struct hlist_head *h = &udp_hash[udp_hashfn(snum, VEID(env))];
+ 
+ 		sk_add_node(sk, h);
+ 		sock_prot_inc_use(sk->sk_prot);
+@@ -228,11 +231,15 @@ static struct sock *udp_v4_lookup_longwa
+ 	struct hlist_node *node;
+ 	unsigned short hnum = ntohs(dport);
+ 	int badness = -1;
++	struct ve_struct *env;
+ 
+-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
++	env = get_exec_env();
++	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
+ 		struct inet_sock *inet = inet_sk(sk);
+ 
+-		if (inet->num == hnum && !ipv6_only_sock(sk)) {
++		if (inet->num == hnum &&
++		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
++		    !ipv6_only_sock(sk)) {
+ 			int score = (sk->sk_family == PF_INET ? 1 : 0);
+ 			if (inet->rcv_saddr) {
+ 				if (inet->rcv_saddr != daddr)
+@@ -1049,7 +1056,8 @@ static int udp_v4_mcast_deliver(struct s
+ 	int dif;
+ 
+ 	read_lock(&udp_hash_lock);
+-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
++	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
++				VEID(VE_OWNER_SKB(skb)))]);
+ 	dif = skb->dev->ifindex;
+ 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+ 	if (sk) {
+@@ -1367,10 +1375,14 @@ static struct sock *udp_get_first(struct
+ {
+ 	struct sock *sk;
+ 	struct udp_iter_state *state = seq->private;
++	struct ve_struct *env;
+ 
++	env = get_exec_env();
+ 	for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
+ 		struct hlist_node *node;
+ 		sk_for_each(sk, node, &udp_hash[state->bucket]) {
++			if (!ve_accessible(VE_OWNER_SK(sk), env))
++				continue;
+ 			if (sk->sk_family == state->family)
+ 				goto found;
+ 		}
+@@ -1387,8 +1399,13 @@ static struct sock *udp_get_next(struct 
+ 	do {
+ 		sk = sk_next(sk);
+ try_again:
+-		;
+-	} while (sk && sk->sk_family != state->family);
++		if (!sk)
++			break;
++		if (sk->sk_family != state->family)
++			continue;
++		if (ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
++			break;
++	} while (1);
+ 
+ 	if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
+ 		sk = sk_head(&udp_hash[state->bucket]);
+@@ -1474,7 +1491,7 @@ int udp_proc_register(struct udp_seq_afi
+ 	afinfo->seq_fops->llseek	= seq_lseek;
+ 	afinfo->seq_fops->release	= seq_release_private;
+ 
+-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
++	p = proc_glob_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+ 	if (p)
+ 		p->data = afinfo;
+ 	else
+@@ -1486,7 +1503,8 @@ void udp_proc_unregister(struct udp_seq_
+ {
+ 	if (!afinfo)
+ 		return;
+-	proc_net_remove(afinfo->name);
++
++	remove_proc_glob_entry(afinfo->name, NULL);
+ 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ }
+ 
+@@ -1529,7 +1547,7 @@ static int udp4_seq_show(struct seq_file
+ static struct file_operations udp4_seq_fops;
+ static struct udp_seq_afinfo udp4_seq_afinfo = {
+ 	.owner		= THIS_MODULE,
+-	.name		= "udp",
++	.name		= "net/udp",
+ 	.family		= AF_INET,
+ 	.seq_show	= udp4_seq_show,
+ 	.seq_fops	= &udp4_seq_fops,
+diff -upr linux-2.6.16.orig/net/ipv6/addrconf.c linux-2.6.16-026test015/net/ipv6/addrconf.c
+--- linux-2.6.16.orig/net/ipv6/addrconf.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/addrconf.c	2006-07-04 14:41:39.000000000 +0400
+@@ -100,6 +100,7 @@
+ #define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
+ 
+ #ifdef CONFIG_SYSCTL
++static struct addrconf_sysctl_table * __addrconf_sysctl_register(struct inet6_dev *idev, char *devname, int ifindex, struct ipv6_devconf *p);
+ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
+ static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
+ #endif
+@@ -133,8 +134,6 @@ static DEFINE_SPINLOCK(addrconf_verify_l
+ static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
+ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
+ 
+-static int addrconf_ifdown(struct net_device *dev, int how);
+-
+ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
+ static void addrconf_dad_timer(unsigned long data);
+ static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+@@ -149,7 +148,7 @@ static int ipv6_chk_same_addr(const stru
+ 
+ static struct notifier_block *inet6addr_chain;
+ 
+-struct ipv6_devconf ipv6_devconf = {
++struct ipv6_devconf global_ipv6_devconf = {
+ 	.forwarding		= 0,
+ 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
+ 	.mtu6			= IPV6_MIN_MTU,
+@@ -171,7 +170,7 @@ struct ipv6_devconf ipv6_devconf = {
+ 	.max_addresses		= IPV6_MAX_ADDRESSES,
+ };
+ 
+-static struct ipv6_devconf ipv6_devconf_dflt = {
++struct ipv6_devconf global_ipv6_devconf_dflt = {
+ 	.forwarding		= 0,
+ 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
+ 	.mtu6			= IPV6_MIN_MTU,
+@@ -192,6 +191,12 @@ static struct ipv6_devconf ipv6_devconf_
+ 	.max_addresses		= IPV6_MAX_ADDRESSES,
+ };
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ipv6_devconf_dflt	(*(get_exec_env()->_ipv6_devconf_dflt))
++#else
++#define ipv6_devconf_dflt	global_ipv6_devconf_dflt
++#endif
++
+ /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+ #if 0
+ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+@@ -463,8 +468,8 @@ static void addrconf_forward_change(void
+ 		read_lock(&addrconf_lock);
+ 		idev = __in6_dev_get(dev);
+ 		if (idev) {
+-			int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
+-			idev->cnf.forwarding = ipv6_devconf.forwarding;
++			int changed = (!idev->cnf.forwarding) ^ (!ve_ipv6_devconf.forwarding);
++			idev->cnf.forwarding = ve_ipv6_devconf.forwarding;
+ 			if (changed)
+ 				dev_forward_change(idev);
+ 		}
+@@ -1148,9 +1153,10 @@ int ipv6_chk_addr(struct in6_addr *addr,
+ 	read_lock_bh(&addrconf_hash_lock);
+ 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+ 		if (ipv6_addr_equal(&ifp->addr, addr) &&
+-		    !(ifp->flags&IFA_F_TENTATIVE)) {
++		    !(ifp->flags&IFA_F_TENTATIVE) &&
++		    ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env())) {
+ 			if (dev == NULL || ifp->idev->dev == dev ||
+-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
++			    !((ifp->scope&(IFA_LINK|IFA_HOST)) || strict))
+ 				break;
+ 		}
+ 	}
+@@ -1166,7 +1172,9 @@ int ipv6_chk_same_addr(const struct in6_
+ 
+ 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+ 		if (ipv6_addr_equal(&ifp->addr, addr)) {
+-			if (dev == NULL || ifp->idev->dev == dev)
++			if ((dev == NULL &&
++			     ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env()))
++			    || ifp->idev->dev == dev)
+ 				break;
+ 		}
+ 	}
+@@ -1180,9 +1188,10 @@ struct inet6_ifaddr * ipv6_get_ifaddr(st
+ 
+ 	read_lock_bh(&addrconf_hash_lock);
+ 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+-		if (ipv6_addr_equal(&ifp->addr, addr)) {
++		if (ipv6_addr_equal(&ifp->addr, addr) &&
++		    ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env())) {
+ 			if (dev == NULL || ifp->idev->dev == dev ||
+-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
++			    !((ifp->scope&(IFA_LINK|IFA_HOST)) || strict)) {
+ 				in6_ifa_hold(ifp);
+ 				break;
+ 			}
+@@ -1842,7 +1851,7 @@ err_exit:
+ /*
+  *	Manual configuration of address on an interface
+  */
+-static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
++int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
+ {
+ 	struct inet6_ifaddr *ifp;
+ 	struct inet6_dev *idev;
+@@ -1871,6 +1880,7 @@ static int inet6_addr_add(int ifindex, s
+ 
+ 	return PTR_ERR(ifp);
+ }
++EXPORT_SYMBOL_GPL(inet6_addr_add);
+ 
+ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
+ {
+@@ -1911,7 +1921,7 @@ int addrconf_add_ifaddr(void __user *arg
+ 	struct in6_ifreq ireq;
+ 	int err;
+ 	
+-	if (!capable(CAP_NET_ADMIN))
++	if (!capable(CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 	
+ 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+@@ -1928,7 +1938,7 @@ int addrconf_del_ifaddr(void __user *arg
+ 	struct in6_ifreq ireq;
+ 	int err;
+ 	
+-	if (!capable(CAP_NET_ADMIN))
++	if (!capable(CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 
+ 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+@@ -2270,7 +2280,7 @@ static struct notifier_block ipv6_dev_no
+ 	.priority = 0
+ };
+ 
+-static int addrconf_ifdown(struct net_device *dev, int how)
++int addrconf_ifdown(struct net_device *dev, int how)
+ {
+ 	struct inet6_dev *idev;
+ 	struct inet6_ifaddr *ifa, **bifa;
+@@ -2278,7 +2288,7 @@ static int addrconf_ifdown(struct net_de
+ 
+ 	ASSERT_RTNL();
+ 
+-	if (dev == &loopback_dev && how == 1)
++	if (dev == get_ve0()->_loopback_dev && how == 1)
+ 		how = 0;
+ 
+ 	rt6_ifdown(dev);
+@@ -2386,10 +2396,12 @@ static int addrconf_ifdown(struct net_de
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(addrconf_ifdown);
+ 
+ static void addrconf_rs_timer(unsigned long data)
+ {
+ 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
++	struct ve_struct *old_env = set_exec_env(ifp->idev->dev->owner_env);
+ 
+ 	if (ifp->idev->cnf.forwarding)
+ 		goto out;
+@@ -2428,6 +2440,7 @@ static void addrconf_rs_timer(unsigned l
+ 
+ out:
+ 	in6_ifa_put(ifp);
++	set_exec_env(old_env);
+ }
+ 
+ /*
+@@ -2495,6 +2508,7 @@ static void addrconf_dad_timer(unsigned 
+ 	struct inet6_dev *idev = ifp->idev;
+ 	struct in6_addr unspec;
+ 	struct in6_addr mcaddr;
++	struct ve_struct *old_env = set_exec_env(ifp->idev->dev->owner_env);
+ 
+ 	read_lock_bh(&idev->lock);
+ 	if (idev->dead) {
+@@ -2527,6 +2541,7 @@ static void addrconf_dad_timer(unsigned 
+ 	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
+ out:
+ 	in6_ifa_put(ifp);
++	set_exec_env(old_env);
+ }
+ 
+ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
+@@ -2594,8 +2609,11 @@ static struct inet6_ifaddr *if6_get_firs
+ 
+ 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
+ 		ifa = inet6_addr_lst[state->bucket];
+-		if (ifa)
+-			break;
++		while (ifa) {
++			if (ve_accessible_strict(ifa->idev->dev->owner_env, get_exec_env()))
++				return ifa;
++			ifa = ifa->lst_next;
++		}
+ 	}
+ 	return ifa;
+ }
+@@ -2606,6 +2624,11 @@ static struct inet6_ifaddr *if6_get_next
+ 
+ 	ifa = ifa->lst_next;
+ try_again:
++	while (ifa) {
++		if (ve_accessible_strict(ifa->idev->dev->owner_env, get_exec_env()))
++			break;
++		ifa = ifa->lst_next;
++	}
+ 	if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
+ 		ifa = inet6_addr_lst[state->bucket];
+ 		goto try_again;
+@@ -2697,14 +2720,14 @@ static struct file_operations if6_fops =
+ 
+ int __init if6_proc_init(void)
+ {
+-	if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
++	if (!proc_glob_fops_create("net/if_inet6", S_IRUGO, &if6_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+ 
+ void if6_proc_exit(void)
+ {
+-	proc_net_remove("if_inet6");
++	remove_proc_glob_entry("net/if_inet6", NULL);
+ }
+ #endif	/* CONFIG_PROC_FS */
+ 
+@@ -2717,6 +2740,7 @@ static void addrconf_verify(unsigned lon
+ 	struct inet6_ifaddr *ifp;
+ 	unsigned long now, next;
+ 	int i;
++	struct ve_struct *old_env;
+ 
+ 	spin_lock_bh(&addrconf_verify_lock);
+ 	now = jiffies;
+@@ -2737,6 +2761,8 @@ restart:
+ 			if (ifp->flags & IFA_F_PERMANENT)
+ 				continue;
+ 
++			old_env = set_exec_env(ifp->idev->dev->owner_env);
++
+ 			spin_lock(&ifp->lock);
+ 			age = (now - ifp->tstamp) / HZ;
+ 
+@@ -2751,6 +2777,7 @@ restart:
+ 				in6_ifa_hold(ifp);
+ 				read_unlock(&addrconf_hash_lock);
+ 				ipv6_del_addr(ifp);
++				set_exec_env(old_env);
+ 				goto restart;
+ 			} else if (age >= ifp->prefered_lft) {
+ 				/* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
+@@ -2772,6 +2799,7 @@ restart:
+ 
+ 					ipv6_ifa_notify(0, ifp);
+ 					in6_ifa_put(ifp);
++					set_exec_env(old_env);
+ 					goto restart;
+ 				}
+ #ifdef CONFIG_IPV6_PRIVACY
+@@ -2793,6 +2821,7 @@ restart:
+ 						ipv6_create_tempaddr(ifpub, ifp);
+ 						in6_ifa_put(ifpub);
+ 						in6_ifa_put(ifp);
++						set_exec_env(old_env);
+ 						goto restart;
+ 					}
+ 				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
+@@ -2805,6 +2834,7 @@ restart:
+ 					next = ifp->tstamp + ifp->prefered_lft * HZ;
+ 				spin_unlock(&ifp->lock);
+ 			}
++			set_exec_env(old_env);
+ 		}
+ 		read_unlock(&addrconf_hash_lock);
+ 	}
+@@ -3360,7 +3390,7 @@ int addrconf_sysctl_forward(ctl_table *c
+ 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ 
+ 	if (write && valp != &ipv6_devconf_dflt.forwarding) {
+-		if (valp != &ipv6_devconf.forwarding) {
++		if (valp != &ve_ipv6_devconf.forwarding) {
+ 			if ((!*valp) ^ (!val)) {
+ 				struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
+ 				if (idev == NULL)
+@@ -3368,7 +3398,7 @@ int addrconf_sysctl_forward(ctl_table *c
+ 				dev_forward_change(idev);
+ 			}
+ 		} else {
+-			ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
++			ipv6_devconf_dflt.forwarding = ve_ipv6_devconf.forwarding;
+ 			addrconf_forward_change();
+ 		}
+ 		if (*valp)
+@@ -3411,7 +3441,7 @@ static int addrconf_sysctl_forward_strat
+ 	}
+ 
+ 	if (valp != &ipv6_devconf_dflt.forwarding) {
+-		if (valp != &ipv6_devconf.forwarding) {
++		if (valp != &ve_ipv6_devconf.forwarding) {
+ 			struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
+ 			int changed;
+ 			if (unlikely(idev == NULL))
+@@ -3447,7 +3477,7 @@ static struct addrconf_sysctl_table
+         	{
+ 			.ctl_name	=	NET_IPV6_FORWARDING,
+ 			.procname	=	"forwarding",
+-         		.data		=	&ipv6_devconf.forwarding,
++         		.data		=	&global_ipv6_devconf.forwarding,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&addrconf_sysctl_forward,
+@@ -3456,7 +3486,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_HOP_LIMIT,
+ 			.procname	=	"hop_limit",
+-         		.data		=	&ipv6_devconf.hop_limit,
++         		.data		=	&global_ipv6_devconf.hop_limit,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+ 			.proc_handler	=	proc_dointvec,
+@@ -3464,7 +3494,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_MTU,
+ 			.procname	=	"mtu",
+-			.data		=	&ipv6_devconf.mtu6,
++			.data		=	&global_ipv6_devconf.mtu6,
+          		.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&proc_dointvec,
+@@ -3472,7 +3502,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_ACCEPT_RA,
+ 			.procname	=	"accept_ra",
+-         		.data		=	&ipv6_devconf.accept_ra,
++         		.data		=	&global_ipv6_devconf.accept_ra,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&proc_dointvec,
+@@ -3480,7 +3510,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_ACCEPT_REDIRECTS,
+ 			.procname	=	"accept_redirects",
+-         		.data		=	&ipv6_devconf.accept_redirects,
++         		.data		=	&global_ipv6_devconf.accept_redirects,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&proc_dointvec,
+@@ -3488,7 +3518,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_AUTOCONF,
+ 			.procname	=	"autoconf",
+-         		.data		=	&ipv6_devconf.autoconf,
++         		.data		=	&global_ipv6_devconf.autoconf,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&proc_dointvec,
+@@ -3496,7 +3526,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_DAD_TRANSMITS,
+ 			.procname	=	"dad_transmits",
+-         		.data		=	&ipv6_devconf.dad_transmits,
++         		.data		=	&global_ipv6_devconf.dad_transmits,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&proc_dointvec,
+@@ -3504,7 +3534,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_RTR_SOLICITS,
+ 			.procname	=	"router_solicitations",
+-         		.data		=	&ipv6_devconf.rtr_solicits,
++         		.data		=	&global_ipv6_devconf.rtr_solicits,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&proc_dointvec,
+@@ -3512,7 +3542,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_RTR_SOLICIT_INTERVAL,
+ 			.procname	=	"router_solicitation_interval",
+-         		.data		=	&ipv6_devconf.rtr_solicit_interval,
++         		.data		=	&global_ipv6_devconf.rtr_solicit_interval,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&proc_dointvec_jiffies,
+@@ -3521,7 +3551,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_RTR_SOLICIT_DELAY,
+ 			.procname	=	"router_solicitation_delay",
+-         		.data		=	&ipv6_devconf.rtr_solicit_delay,
++         		.data		=	&global_ipv6_devconf.rtr_solicit_delay,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&proc_dointvec_jiffies,
+@@ -3530,7 +3560,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_FORCE_MLD_VERSION,
+ 			.procname	=	"force_mld_version",
+-         		.data		=	&ipv6_devconf.force_mld_version,
++         		.data		=	&global_ipv6_devconf.force_mld_version,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+          		.proc_handler	=	&proc_dointvec,
+@@ -3539,7 +3569,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_USE_TEMPADDR,
+ 			.procname	=	"use_tempaddr",
+-	 		.data		=	&ipv6_devconf.use_tempaddr,
++	 		.data		=	&global_ipv6_devconf.use_tempaddr,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+ 	 		.proc_handler	=	&proc_dointvec,
+@@ -3547,7 +3577,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_TEMP_VALID_LFT,
+ 			.procname	=	"temp_valid_lft",
+-	 		.data		=	&ipv6_devconf.temp_valid_lft,
++	 		.data		=	&global_ipv6_devconf.temp_valid_lft,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+ 	 		.proc_handler	=	&proc_dointvec,
+@@ -3555,7 +3585,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_TEMP_PREFERED_LFT,
+ 			.procname	=	"temp_prefered_lft",
+-	 		.data		=	&ipv6_devconf.temp_prefered_lft,
++	 		.data		=	&global_ipv6_devconf.temp_prefered_lft,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+ 	 		.proc_handler	=	&proc_dointvec,
+@@ -3563,7 +3593,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_REGEN_MAX_RETRY,
+ 			.procname	=	"regen_max_retry",
+-	 		.data		=	&ipv6_devconf.regen_max_retry,
++	 		.data		=	&global_ipv6_devconf.regen_max_retry,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+ 	 		.proc_handler	=	&proc_dointvec,
+@@ -3571,7 +3601,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_MAX_DESYNC_FACTOR,
+ 			.procname	=	"max_desync_factor",
+-	 		.data		=	&ipv6_devconf.max_desync_factor,
++	 		.data		=	&global_ipv6_devconf.max_desync_factor,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+ 	 		.proc_handler	=	&proc_dointvec,
+@@ -3580,7 +3610,7 @@ static struct addrconf_sysctl_table
+ 		{
+ 			.ctl_name	=	NET_IPV6_MAX_ADDRESSES,
+ 			.procname	=	"max_addresses",
+-			.data		=	&ipv6_devconf.max_addresses,
++			.data		=	&global_ipv6_devconf.max_addresses,
+ 			.maxlen		=	sizeof(int),
+ 			.mode		=	0644,
+ 			.proc_handler	=	&proc_dointvec,
+@@ -3635,29 +3665,22 @@ static struct addrconf_sysctl_table
+ 	},
+ };
+ 
+-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
++static struct addrconf_sysctl_table *
++__addrconf_sysctl_register(struct inet6_dev *idev, char *dev_name, int ifindex, struct ipv6_devconf *p)
+ {
+ 	int i;
+-	struct net_device *dev = idev ? idev->dev : NULL;
+ 	struct addrconf_sysctl_table *t;
+-	char *dev_name = NULL;
+ 
+ 	t = kmalloc(sizeof(*t), GFP_KERNEL);
+ 	if (t == NULL)
+-		return;
++		return NULL;
++
+ 	memcpy(t, &addrconf_sysctl, sizeof(*t));
+ 	for (i=0; t->addrconf_vars[i].data; i++) {
+-		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
++		t->addrconf_vars[i].data += (char*)p - (char*)&global_ipv6_devconf;
+ 		t->addrconf_vars[i].de = NULL;
+ 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+ 	}
+-	if (dev) {
+-		dev_name = dev->name; 
+-		t->addrconf_dev[0].ctl_name = dev->ifindex;
+-	} else {
+-		dev_name = "default";
+-		t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+-	}
+ 
+ 	/* 
+ 	 * Make a copy of dev_name, because '.procname' is regarded as const 
+@@ -3668,6 +3691,7 @@ static void addrconf_sysctl_register(str
+ 	if (!dev_name)
+ 	    goto free;
+ 
++	t->addrconf_dev[0].ctl_name = ifindex;
+ 	t->addrconf_dev[0].procname = dev_name;
+ 
+ 	t->addrconf_dev[0].child = t->addrconf_vars;
+@@ -3682,9 +3706,7 @@ static void addrconf_sysctl_register(str
+ 	t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0);
+ 	if (t->sysctl_header == NULL)
+ 		goto free_procname;
+-	else
+-		p->sysctl = t;
+-	return;
++	return t;
+ 
+ 	/* error path */
+  free_procname:
+@@ -3692,7 +3714,26 @@ static void addrconf_sysctl_register(str
+  free:
+ 	kfree(t);
+ 
+-	return;
++	return NULL;
++}
++
++static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
++{
++	struct net_device *dev;
++	char *dev_name;
++	int ifindex;
++
++	dev = idev ? idev->dev : NULL;
++
++	if (dev) {
++		dev_name = dev->name; 
++		ifindex = dev->ifindex;
++	} else {
++		dev_name = "default";
++		ifindex = NET_PROTO_CONF_DEFAULT;
++	}
++
++	p->sysctl = __addrconf_sysctl_register(idev, dev_name, ifindex, p);
+ }
+ 
+ static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
+@@ -3706,6 +3747,73 @@ static void addrconf_sysctl_unregister(s
+ 	}
+ }
+ 
++int addrconf_sysctl_init(struct ve_struct *ve)
++{
++	int err = 0;
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	struct ipv6_devconf *conf, *conf_def;
++
++	err = -ENOMEM;
++
++	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
++	if (!conf)
++		goto err1;
++
++	memcpy(conf, &global_ipv6_devconf, sizeof(*conf));
++	conf->sysctl = __addrconf_sysctl_register(NULL, "all",
++			NET_PROTO_CONF_ALL, conf);
++	if (!conf->sysctl)
++		goto err2;
++
++	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
++	if (!conf_def)
++		goto err3;
++
++	memcpy(conf_def, &global_ipv6_devconf_dflt, sizeof(*conf_def));
++	conf_def->sysctl = __addrconf_sysctl_register(NULL, "default",
++			NET_PROTO_CONF_DEFAULT, conf_def);
++	if (!conf_def->sysctl)
++		goto err4;
++
++	ve->_ipv6_devconf = conf;
++	ve->_ipv6_devconf_dflt = conf_def;
++	return 0;
++
++err4:
++	kfree(conf_def);
++err3:
++	addrconf_sysctl_unregister(conf);
++err2:
++	kfree(conf);
++err1:
++#endif
++#endif
++	return err;
++}
++EXPORT_SYMBOL(addrconf_sysctl_init);
++
++void addrconf_sysctl_fini(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	addrconf_sysctl_unregister(ve->_ipv6_devconf);
++	addrconf_sysctl_unregister(ve->_ipv6_devconf_dflt);
++#endif
++#endif
++}
++EXPORT_SYMBOL(addrconf_sysctl_fini);
++
++void addrconf_sysctl_free(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	kfree(ve->_ipv6_devconf);
++	kfree(ve->_ipv6_devconf_dflt);
++#endif
++#endif
++}
++EXPORT_SYMBOL(addrconf_sysctl_free);
+ 
+ #endif
+ 
+@@ -3731,6 +3839,11 @@ int __init addrconf_init(void)
+ {
+ 	int err = 0;
+ 
++#ifdef CONFIG_VE
++	get_ve0()->_ipv6_devconf = &global_ipv6_devconf;
++	get_ve0()->_ipv6_devconf_dflt = &global_ipv6_devconf_dflt;
++#endif
++
+ 	/* The addrconf netdev notifier requires that loopback_dev
+ 	 * has it's ipv6 private information allocated and setup
+ 	 * before it can bring up and give link-local addresses
+@@ -3772,7 +3885,7 @@ int __init addrconf_init(void)
+ #ifdef CONFIG_SYSCTL
+ 	addrconf_sysctl.sysctl_header =
+ 		register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0);
+-	addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
++	__addrconf_sysctl_register(NULL, "default", NET_PROTO_CONF_DEFAULT, &global_ipv6_devconf_dflt);
+ #endif
+ 
+ 	return 0;
+@@ -3789,8 +3902,8 @@ void __exit addrconf_cleanup(void)
+ 
+ 	rtnetlink_links[PF_INET6] = NULL;
+ #ifdef CONFIG_SYSCTL
+-	addrconf_sysctl_unregister(&ipv6_devconf_dflt);
+-	addrconf_sysctl_unregister(&ipv6_devconf);
++	addrconf_sysctl_unregister(&global_ipv6_devconf_dflt);
++	addrconf_sysctl_unregister(&global_ipv6_devconf);
+ #endif
+ 
+ 	rtnl_lock();
+@@ -3835,6 +3948,6 @@ void __exit addrconf_cleanup(void)
+ #endif
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc_net_remove("if_inet6");
++	remove_proc_glob_entry("net/if_inet6", NULL);
+ #endif
+ }
+diff -upr linux-2.6.16.orig/net/ipv6/af_inet6.c linux-2.6.16-026test015/net/ipv6/af_inet6.c
+--- linux-2.6.16.orig/net/ipv6/af_inet6.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/af_inet6.c	2006-07-04 14:41:39.000000000 +0400
+@@ -60,6 +60,7 @@
+ #ifdef CONFIG_IPV6_TUNNEL
+ #include <net/ip6_tunnel.h>
+ #endif
++#include <ub/ub_net.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -160,6 +161,13 @@ lookup_protocol:
+ 	if (sk == NULL)
+ 		goto out;
+ 
++	err = -ENOBUFS;
++	if (ub_sock_charge(sk, PF_INET6, sock->type))
++		goto out_sk_free;
++	/* if charge was successful, sock_init_data() MUST be called to
++	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
++	 */
++
+ 	sock_init_data(sock, sk);
+ 
+ 	err = 0;
+@@ -234,6 +242,9 @@ out:
+ out_rcu_unlock:
+ 	rcu_read_unlock();
+ 	goto out;
++out_sk_free:
++	sk_free(sk);
++	return err;
+ }
+ 
+ 
+@@ -650,6 +661,8 @@ int inet6_sk_rebuild_header(struct sock 
+ 		ip6_dst_store(sk, dst, NULL);
+ 		sk->sk_route_caps = dst->dev->features &
+ 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++		if (!sysctl_tcp_use_sg)
++			sk->sk_route_caps &= ~NETIF_F_SG;
+ 	}
+ 
+ 	return 0;
+@@ -715,21 +728,21 @@ snmp6_mib_free(void *ptr[2])
+ 
+ static int __init init_ipv6_mibs(void)
+ {
+-	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
++	if (snmp6_mib_init((void **)ve_ipv6_statistics, sizeof (struct ipstats_mib),
+ 			   __alignof__(struct ipstats_mib)) < 0)
+ 		goto err_ip_mib;
+-	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
++	if (snmp6_mib_init((void **)ve_icmpv6_statistics, sizeof (struct icmpv6_mib),
+ 			   __alignof__(struct icmpv6_mib)) < 0)
+ 		goto err_icmp_mib;
+-	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
++	if (snmp6_mib_init((void **)ve_udp_stats_in6, sizeof (struct udp_mib),
+ 			   __alignof__(struct udp_mib)) < 0)
+ 		goto err_udp_mib;
+ 	return 0;
+ 
+ err_udp_mib:
+-	snmp6_mib_free((void **)icmpv6_statistics);
++	snmp6_mib_free((void **)ve_icmpv6_statistics);
+ err_icmp_mib:
+-	snmp6_mib_free((void **)ipv6_statistics);
++	snmp6_mib_free((void **)ve_ipv6_statistics);
+ err_ip_mib:
+ 	return -ENOMEM;
+ 	
+@@ -737,9 +750,9 @@ err_ip_mib:
+ 
+ static void cleanup_ipv6_mibs(void)
+ {
+-	snmp6_mib_free((void **)ipv6_statistics);
+-	snmp6_mib_free((void **)icmpv6_statistics);
+-	snmp6_mib_free((void **)udp_stats_in6);
++	snmp6_mib_free((void **)ve_ipv6_statistics);
++	snmp6_mib_free((void **)ve_icmpv6_statistics);
++	snmp6_mib_free((void **)ve_udp_stats_in6);
+ }
+ 
+ static int __init inet6_init(void)
+diff -upr linux-2.6.16.orig/net/ipv6/anycast.c linux-2.6.16-026test015/net/ipv6/anycast.c
+--- linux-2.6.16.orig/net/ipv6/anycast.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/anycast.c	2006-07-04 14:41:39.000000000 +0400
+@@ -83,7 +83,7 @@ int ipv6_sock_ac_join(struct sock *sk, i
+ 	struct net_device *dev = NULL;
+ 	struct inet6_dev *idev;
+ 	struct ipv6_ac_socklist *pac;
+-	int	ishost = !ipv6_devconf.forwarding;
++	int	ishost = !ve_ipv6_devconf.forwarding;
+ 	int	err = 0;
+ 
+ 	if (!capable(CAP_NET_ADMIN))
+@@ -455,6 +455,8 @@ static inline struct ifacaddr6 *ac6_get_
+ 	     state->dev;
+ 	     state->dev = state->dev->next) {
+ 		struct inet6_dev *idev;
++		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++			continue;
+ 		idev = in6_dev_get(state->dev);
+ 		if (!idev)
+ 			continue;
+@@ -484,6 +486,8 @@ static struct ifacaddr6 *ac6_get_next(st
+ 			state->idev = NULL;
+ 			break;
+ 		}
++		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++			continue;
+ 		state->idev = in6_dev_get(state->dev);
+ 		if (!state->idev)
+ 			continue;
+@@ -579,7 +583,7 @@ static struct file_operations ac6_seq_fo
+ 
+ int __init ac6_proc_init(void)
+ {
+-	if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
++	if (!proc_glob_fops_create("net/anycast6", S_IRUGO, &ac6_seq_fops))
+ 		return -ENOMEM;
+ 
+ 	return 0;
+@@ -587,7 +591,7 @@ int __init ac6_proc_init(void)
+ 
+ void ac6_proc_exit(void)
+ {
+-	proc_net_remove("anycast6");
++	remove_proc_glob_entry("net/anycast6", NULL);
+ }
+ #endif
+ 
+diff -upr linux-2.6.16.orig/net/ipv6/exthdrs.c linux-2.6.16-026test015/net/ipv6/exthdrs.c
+--- linux-2.6.16.orig/net/ipv6/exthdrs.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/exthdrs.c	2006-07-04 14:41:36.000000000 +0400
+@@ -489,6 +489,18 @@ int ipv6_parse_hopopts(struct sk_buff *s
+ {
+ 	struct inet6_skb_parm *opt = IP6CB(skb);
+ 
++	/*
++	 * skb->nh.raw is equal to skb->data, and
++	 * skb->h.raw - skb->nh.raw is always equal to
++	 * sizeof(struct ipv6hdr) by definition of
++	 * hop-by-hop options.
++	 */
++	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
++	    !pskb_may_pull(skb, sizeof(struct ipv6hdr) + ((skb->h.raw[1] + 1) << 3))) {
++		kfree_skb(skb);
++		return -1;
++	}
++
+ 	opt->hop = sizeof(struct ipv6hdr);
+ 	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
+ 		skb->h.raw += (skb->h.raw[1]+1)<<3;
+diff -upr linux-2.6.16.orig/net/ipv6/inet6_connection_sock.c linux-2.6.16-026test015/net/ipv6/inet6_connection_sock.c
+--- linux-2.6.16.orig/net/ipv6/inet6_connection_sock.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/inet6_connection_sock.c	2006-07-04 14:41:39.000000000 +0400
+@@ -26,6 +26,8 @@
+ #include <net/ip6_route.h>
+ #include <net/sock.h>
+ #include <net/inet6_connection_sock.h>
++#include <ub/ub_net.h>
++#include <ub/ub_orphan.h>
+ 
+ int inet6_csk_bind_conflict(const struct sock *sk,
+ 			    const struct inet_bind_bucket *tb)
+@@ -36,6 +38,7 @@ int inet6_csk_bind_conflict(const struct
+ 	/* We must walk the whole port owner list in this case. -DaveM */
+ 	sk_for_each_bound(sk2, node, &tb->owners) {
+ 		if (sk != sk2 &&
++		    !ve_accessible_strict(VE_OWNER_SK(sk), VE_OWNER_SK(sk2)) &&
+ 		    (!sk->sk_bound_dev_if ||
+ 		     !sk2->sk_bound_dev_if ||
+ 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+@@ -173,6 +176,7 @@ int inet6_csk_xmit(struct sk_buff *skb, 
+ 
+ 		if (err) {
+ 			sk->sk_err_soft = -err;
++			kfree_skb(skb);
+ 			return err;
+ 		}
+ 
+@@ -181,12 +185,15 @@ int inet6_csk_xmit(struct sk_buff *skb, 
+ 
+ 		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+ 			sk->sk_route_caps = 0;
++			kfree_skb(skb);
+ 			return err;
+ 		}
+ 
+ 		ip6_dst_store(sk, dst, NULL);
+ 		sk->sk_route_caps = dst->dev->features &
+ 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++		if (!sysctl_tcp_use_sg)
++			sk->sk_route_caps &= ~NETIF_F_SG;
+ 	}
+ 
+ 	skb->dst = dst_clone(dst);
+diff -upr linux-2.6.16.orig/net/ipv6/inet6_hashtables.c linux-2.6.16-026test015/net/ipv6/inet6_hashtables.c
+--- linux-2.6.16.orig/net/ipv6/inet6_hashtables.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/inet6_hashtables.c	2006-07-04 14:41:39.000000000 +0400
+@@ -31,9 +31,14 @@ struct sock *inet6_lookup_listener(struc
+ 	const struct hlist_node *node;
+ 	struct sock *result = NULL;
+ 	int score, hiscore = 0;
++	struct ve_struct *env;
++
++	env = get_exec_env();
+ 
+ 	read_lock(&hashinfo->lhash_lock);
+-	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
++	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum, VEID(env))]) {
++		if (!ve_accessible_strict(VE_OWNER_SK(sk), env))
++			continue;
+ 		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
+ 			const struct ipv6_pinfo *np = inet6_sk(sk);
+ 			
+@@ -84,7 +89,8 @@ EXPORT_SYMBOL_GPL(inet6_lookup);
+ 
+ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
+ 				     struct sock *sk, const __u16 lport,
+-				     struct inet_timewait_sock **twp)
++				     struct inet_timewait_sock **twp,
++				     struct ve_struct *ve)
+ {
+ 	struct inet_hashinfo *hinfo = death_row->hashinfo;
+ 	struct inet_sock *inet = inet_sk(sk);
+@@ -94,7 +100,7 @@ static int __inet6_check_established(str
+ 	const int dif = sk->sk_bound_dev_if;
+ 	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+ 	const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
+-						inet->dport);
++						inet->dport, VEID(ve));
+ 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+ 	struct sock *sk2;
+ 	const struct hlist_node *node;
+@@ -113,7 +119,8 @@ static int __inet6_check_established(str
+ 		   sk2->sk_family	       == PF_INET6	 &&
+ 		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
+ 		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
+-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
++		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
++		   ve_accessible_strict(tw->tw_owner_env, VEID(ve))) {
+ 			if (twsk_unique(sk, sk2, twp))
+ 				goto unique;
+ 			else
+@@ -124,7 +131,7 @@ static int __inet6_check_established(str
+ 
+ 	/* And established part... */
+ 	sk_for_each(sk2, node, &head->chain) {
+-		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
++		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif, ve))
+ 			goto not_unique;
+ 	}
+ 
+@@ -173,7 +180,9 @@ int inet6_hash_connect(struct inet_timew
+  	struct inet_bind_hashbucket *head;
+  	struct inet_bind_bucket *tb;
+ 	int ret;
++	struct ve_struct *ve;
+ 
++	ve = VE_OWNER_SK(sk);
+  	if (snum == 0) {
+  		const int low = sysctl_local_port_range[0];
+  		const int high = sysctl_local_port_range[1];
+@@ -187,7 +196,8 @@ int inet6_hash_connect(struct inet_timew
+  		local_bh_disable();
+ 		for (i = 1; i <= range; i++) {
+ 			port = low + (i + offset) % range;
+- 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
++ 			head = &hinfo->bhash[inet_bhashfn(port,
++					hinfo->bhash_size, VEID(ve))];
+  			spin_lock(&head->lock);
+ 
+  			/* Does not bother with rcv_saddr checks,
+@@ -201,14 +211,14 @@ int inet6_hash_connect(struct inet_timew
+  						goto next_port;
+  					if (!__inet6_check_established(death_row,
+ 								       sk, port,
+-								       &tw))
++								       &tw, ve))
+  						goto ok;
+  					goto next_port;
+  				}
+  			}
+ 
+  			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+-						     head, port);
++						     head, port, ve);
+  			if (!tb) {
+  				spin_unlock(&head->lock);
+  				break;
+@@ -243,7 +253,7 @@ ok:
+ 		goto out;
+  	}
+ 
+- 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
++ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
+  	tb   = inet_csk(sk)->icsk_bind_hash;
+ 	spin_lock_bh(&head->lock);
+ 
+@@ -254,7 +264,7 @@ ok:
+ 	} else {
+ 		spin_unlock(&head->lock);
+ 		/* No definite answer... Walk to established hash table */
+-		ret = __inet6_check_established(death_row, sk, snum, NULL);
++		ret = __inet6_check_established(death_row, sk, snum, NULL, ve);
+ out:
+ 		local_bh_enable();
+ 		return ret;
+diff -upr linux-2.6.16.orig/net/ipv6/ip6_fib.c linux-2.6.16-026test015/net/ipv6/ip6_fib.c
+--- linux-2.6.16.orig/net/ipv6/ip6_fib.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/ip6_fib.c	2006-07-04 14:41:39.000000000 +0400
+@@ -1128,8 +1128,12 @@ static int fib6_age(struct rt6_info *rt,
+ 
+ static DEFINE_SPINLOCK(fib6_gc_lock);
+ 
++LIST_HEAD(fib6_table_list);
++
+ void fib6_run_gc(unsigned long dummy)
+ {
++	struct fib6_table *tbl;
++
+ 	if (dummy != ~0UL) {
+ 		spin_lock_bh(&fib6_gc_lock);
+ 		gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
+@@ -1147,7 +1151,11 @@ void fib6_run_gc(unsigned long dummy)
+ 
+ 	write_lock_bh(&rt6_lock);
+ 	ndisc_dst_gc(&gc_args.more);
+-	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
++	list_for_each_entry(tbl, &fib6_table_list, list) {
++		struct ve_struct *old_env = set_exec_env(tbl->owner_env);
++		fib6_clean_tree(&tbl->root, fib6_age, 0, NULL);
++		set_exec_env(old_env);
++	}
+ 	write_unlock_bh(&rt6_lock);
+ 
+ 	if (gc_args.more)
+@@ -1163,7 +1171,7 @@ void __init fib6_init(void)
+ {
+ 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
+ 					   sizeof(struct fib6_node),
+-					   0, SLAB_HWCACHE_ALIGN,
++					   0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
+ 					   NULL, NULL);
+ 	if (!fib6_node_kmem)
+ 		panic("cannot create fib6_nodes cache");
+diff -upr linux-2.6.16.orig/net/ipv6/ip6_flowlabel.c linux-2.6.16-026test015/net/ipv6/ip6_flowlabel.c
+--- linux-2.6.16.orig/net/ipv6/ip6_flowlabel.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/ip6_flowlabel.c	2006-07-04 14:41:39.000000000 +0400
+@@ -417,6 +417,9 @@ int ipv6_flowlabel_opt(struct sock *sk, 
+ 	struct ipv6_fl_socklist *sfl, **sflp;
+ 	struct ip6_flowlabel *fl;
+ 
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
+ 	if (optlen < sizeof(freq))
+ 		return -EINVAL;
+ 
+diff -upr linux-2.6.16.orig/net/ipv6/ip6_output.c linux-2.6.16-026test015/net/ipv6/ip6_output.c
+--- linux-2.6.16.orig/net/ipv6/ip6_output.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/ip6_output.c	2006-07-04 14:41:39.000000000 +0400
+@@ -319,7 +319,7 @@ int ip6_forward(struct sk_buff *skb)
+ 	struct ipv6hdr *hdr = skb->nh.ipv6h;
+ 	struct inet6_skb_parm *opt = IP6CB(skb);
+ 	
+-	if (ipv6_devconf.forwarding == 0)
++	if (ve_ipv6_devconf.forwarding == 0)
+ 		goto error;
+ 
+ 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+@@ -407,6 +407,20 @@ int ip6_forward(struct sk_buff *skb)
+ 		return -EMSGSIZE;
+ 	}
+ 
++	/*
++	 * We try to optimize forwarding of VE packets:
++	 * do not decrement TTL (and so save skb_cow)
++	 * during forwarding of outgoing pkts from VE.
++	 * For incoming pkts we still do ttl decr,
++	 * since such skb is not cloned and does not require
++	 * actual cow. So, there is at least one place
++	 * in pkts path with mandatory ttl decr, that is
++	 * sufficient to prevent routing loops.
++	 */
++	hdr = skb->nh.ipv6h;
++	if (skb->dev->features & NETIF_F_VENET) /* src is VENET device */
++		goto no_ttl_decr;
++
+ 	if (skb_cow(skb, dst->dev->hard_header_len)) {
+ 		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ 		goto drop;
+@@ -418,6 +432,7 @@ int ip6_forward(struct sk_buff *skb)
+  
+ 	hdr->hop_limit--;
+ 
++no_ttl_decr:
+ 	IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+ 	return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
+ 
+diff -upr linux-2.6.16.orig/net/ipv6/mcast.c linux-2.6.16-026test015/net/ipv6/mcast.c
+--- linux-2.6.16.orig/net/ipv6/mcast.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/mcast.c	2006-07-04 14:41:39.000000000 +0400
+@@ -156,7 +156,7 @@ static int ip6_mc_leave_src(struct sock 
+ #define IGMP6_UNSOLICITED_IVAL	(10*HZ)
+ #define MLD_QRV_DEFAULT		2
+ 
+-#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \
++#define MLD_V1_SEEN(idev) (ve_ipv6_devconf.force_mld_version == 1 || \
+ 		(idev)->cnf.force_mld_version == 1 || \
+ 		((idev)->mc_v1_seen && \
+ 		time_before(jiffies, (idev)->mc_v1_seen)))
+@@ -248,6 +248,7 @@ int ipv6_sock_mc_join(struct sock *sk, i
+ 
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(ipv6_sock_mc_join);
+ 
+ /*
+  *	socket leave on multicast group
+@@ -2166,15 +2167,18 @@ static void igmp6_leave_group(struct ifm
+ static void mld_gq_timer_expire(unsigned long data)
+ {
+ 	struct inet6_dev *idev = (struct inet6_dev *)data;
++	struct ve_struct *old_env = set_exec_env(idev->dev->owner_env);
+ 
+ 	idev->mc_gq_running = 0;
+ 	mld_send_report(idev, NULL);
+ 	__in6_dev_put(idev);
++	set_exec_env(old_env);
+ }
+ 
+ static void mld_ifc_timer_expire(unsigned long data)
+ {
+ 	struct inet6_dev *idev = (struct inet6_dev *)data;
++	struct ve_struct *old_env = set_exec_env(idev->dev->owner_env);
+ 
+ 	mld_send_cr(idev);
+ 	if (idev->mc_ifc_count) {
+@@ -2183,6 +2187,7 @@ static void mld_ifc_timer_expire(unsigne
+ 			mld_ifc_start_timer(idev, idev->mc_maxdelay);
+ 	}
+ 	__in6_dev_put(idev);
++	set_exec_env(old_env);
+ }
+ 
+ static void mld_ifc_event(struct inet6_dev *idev)
+@@ -2197,6 +2202,7 @@ static void mld_ifc_event(struct inet6_d
+ static void igmp6_timer_handler(unsigned long data)
+ {
+ 	struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
++	struct ve_struct *old_env = set_exec_env(ma->idev->dev->owner_env);
+ 
+ 	if (MLD_V1_SEEN(ma->idev))
+ 		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
+@@ -2208,6 +2214,7 @@ static void igmp6_timer_handler(unsigned
+ 	ma->mca_flags &= ~MAF_TIMER_RUNNING;
+ 	spin_unlock(&ma->mca_lock);
+ 	ma_put(ma);
++	set_exec_env(old_env);
+ }
+ 
+ /* Device going down */
+@@ -2331,6 +2338,8 @@ static inline struct ifmcaddr6 *igmp6_mc
+ 	     state->dev; 
+ 	     state->dev = state->dev->next) {
+ 		struct inet6_dev *idev;
++		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++			continue;
+ 		idev = in6_dev_get(state->dev);
+ 		if (!idev)
+ 			continue;
+@@ -2361,6 +2370,8 @@ static struct ifmcaddr6 *igmp6_mc_get_ne
+ 			state->idev = NULL;
+ 			break;
+ 		}
++		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++			continue;
+ 		state->idev = in6_dev_get(state->dev);
+ 		if (!state->idev)
+ 			continue;
+@@ -2476,6 +2487,8 @@ static inline struct ip6_sf_list *igmp6_
+ 	     state->dev; 
+ 	     state->dev = state->dev->next) {
+ 		struct inet6_dev *idev;
++		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++			continue;
+ 		idev = in6_dev_get(state->dev);
+ 		if (unlikely(idev == NULL))
+ 			continue;
+@@ -2515,6 +2528,8 @@ static struct ip6_sf_list *igmp6_mcf_get
+ 				state->idev = NULL;
+ 				goto out;
+ 			}
++			if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
++				continue;
+ 			state->idev = in6_dev_get(state->dev);
+ 			if (!state->idev)
+ 				continue;
+@@ -2657,8 +2672,8 @@ int __init igmp6_init(struct net_proto_f
+ 	np->hop_limit = 1;
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
+-	proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
++	proc_glob_fops_create("net/igmp6", S_IRUGO, &igmp6_mc_seq_fops);
++	proc_glob_fops_create("net/mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
+ #endif
+ 
+ 	return 0;
+@@ -2670,7 +2685,7 @@ void igmp6_cleanup(void)
+ 	igmp6_socket = NULL; /* for safety */
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc_net_remove("mcfilter6");
+-	proc_net_remove("igmp6");
++	remove_proc_glob_entry("net/mcfilter6", NULL);
++	remove_proc_glob_entry("net/igmp6", NULL);
+ #endif
+ }
+diff -upr linux-2.6.16.orig/net/ipv6/ndisc.c linux-2.6.16-026test015/net/ipv6/ndisc.c
+--- linux-2.6.16.orig/net/ipv6/ndisc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/ndisc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -124,7 +124,7 @@ static struct neigh_ops ndisc_direct_ops
+ 	.queue_xmit =		dev_queue_xmit,
+ };
+ 
+-struct neigh_table nd_tbl = {
++struct neigh_table global_nd_tbl = {
+ 	.family =	AF_INET6,
+ 	.entry_size =	sizeof(struct neighbour) + sizeof(struct in6_addr),
+ 	.key_len =	sizeof(struct in6_addr),
+@@ -135,7 +135,7 @@ struct neigh_table nd_tbl = {
+ 	.proxy_redo =	pndisc_redo,
+ 	.id =		"ndisc_cache",
+ 	.parms = {
+-		.tbl =			&nd_tbl,
++		.tbl =			&global_nd_tbl,
+ 		.base_reachable_time =	30 * HZ,
+ 		.retrans_time =	 1 * HZ,
+ 		.gc_staletime =	60 * HZ,
+@@ -1660,7 +1660,9 @@ int __init ndisc_init(struct net_proto_f
+          * Initialize the neighbour table
+          */
+ 	
+-	neigh_table_init(&nd_tbl);
++	get_ve0()->ve_nd_tbl = &global_nd_tbl;
++	if (neigh_table_init(&nd_tbl))
++		panic("cannot initialize IPv6 NDISC tables\n");
+ 
+ #ifdef CONFIG_SYSCTL
+ 	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
+@@ -1682,3 +1684,52 @@ void ndisc_cleanup(void)
+ 	sock_release(ndisc_socket);
+ 	ndisc_socket = NULL; /* For safety. */
+ }
++
++int ve_ndisc_init(struct ve_struct *ve)
++{
++	struct ve_struct *old_env;
++	int err;
++
++	ve->ve_nd_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
++	if (ve->ve_nd_tbl == NULL) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	*(ve->ve_nd_tbl) = global_nd_tbl;
++	ve->ve_nd_tbl->parms.tbl = ve->ve_nd_tbl;
++	old_env = set_exec_env(ve);
++	err = neigh_table_init(ve->ve_nd_tbl);
++	if (err)
++		goto out_free;
++#ifdef CONFIG_SYSCTL
++	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
++			      "ipv6",
++			      &ndisc_ifinfo_sysctl_change,
++			      &ndisc_ifinfo_sysctl_strategy);
++#endif
++	set_exec_env(old_env);
++	err = 0;
++
++out:
++	return err;
++
++out_free:
++	kfree(ve->ve_nd_tbl);
++	ve->ve_nd_tbl = NULL;
++	goto out;
++}
++EXPORT_SYMBOL(ve_ndisc_init);
++
++void ve_ndisc_fini(struct ve_struct *ve)
++{
++	if (ve->ve_nd_tbl) {
++#ifdef CONFIG_SYSCTL
++		neigh_sysctl_unregister(&ve->ve_nd_tbl->parms);
++#endif
++		neigh_table_clear(ve->ve_nd_tbl);
++		kfree(ve->ve_nd_tbl);
++		ve->ve_nd_tbl = NULL;
++	}
++}
++EXPORT_SYMBOL(ve_ndisc_fini);
+diff -upr linux-2.6.16.orig/net/ipv6/netfilter/ip6_queue.c linux-2.6.16-026test015/net/ipv6/netfilter/ip6_queue.c
+--- linux-2.6.16.orig/net/ipv6/netfilter/ip6_queue.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/netfilter/ip6_queue.c	2006-07-04 14:41:39.000000000 +0400
+@@ -540,8 +540,11 @@ ipq_rcv_sk(struct sock *sk, int len)
+ 	down(&ipqnl_sem);
+ 			
+ 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
++		struct ve_struct *env;
+ 		skb = skb_dequeue(&sk->sk_receive_queue);
++		env = set_exec_env(VE_OWNER_SKB(skb));
+ 		ipq_rcv_skb(skb);
++		(void)set_exec_env(env);
+ 		kfree_skb(skb);
+ 	}
+ 		
+diff -upr linux-2.6.16.orig/net/ipv6/netfilter/ip6_tables.c linux-2.6.16-026test015/net/ipv6/netfilter/ip6_tables.c
+--- linux-2.6.16.orig/net/ipv6/netfilter/ip6_tables.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/netfilter/ip6_tables.c	2006-07-04 14:41:39.000000000 +0400
+@@ -32,9 +32,11 @@
+ #include <asm/semaphore.h>
+ #include <linux/proc_fs.h>
+ #include <linux/cpumask.h>
++#include <ub/ub_mem.h>
+ 
+ #include <linux/netfilter_ipv6/ip6_tables.h>
+ #include <linux/netfilter/x_tables.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+@@ -79,6 +81,14 @@ do {								\
+ #define inline
+ #endif
+ 
++#ifdef CONFIG_VE_IPTABLES
++/* include ve.h and define get_exec_env */
++#include <linux/sched.h>
++#define ve_ip6t_standard_target	(get_exec_env()->_ip6t_standard_target)
++#else
++#define ve_ip6t_standard_target	&ip6t_standard_target
++#endif
++
+ /*
+    We keep a set of rules for each CPU, so we can avoid write-locking
+    them in the softirq when updating the counters and therefore
+@@ -632,7 +642,7 @@ check_entry(struct ip6t_entry *e, const 
+ 	}
+ 	t->u.kernel.target = target;
+ 
+-	if (t->u.kernel.target == &ip6t_standard_target) {
++	if (t->u.kernel.target == ve_ip6t_standard_target) {
+ 		if (!standard_check(t, size)) {
+ 			ret = -EINVAL;
+ 			goto cleanup_matches;
+@@ -1120,7 +1130,7 @@ do_add_counters(void __user *user, unsig
+ 
+ 	write_lock_bh(&t->lock);
+ 	private = t->private;
+-	if (private->number != paddc->num_counters) {
++	if (private->number != tmp.num_counters) {
+ 		ret = -EINVAL;
+ 		goto unlock_up_free;
+ 	}
+@@ -1148,7 +1158,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd
+ {
+ 	int ret;
+ 
+-	if (!capable(CAP_NET_ADMIN))
++	if (!capable(CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 
+ 	switch (cmd) {
+@@ -1173,7 +1183,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd
+ {
+ 	int ret;
+ 
+-	if (!capable(CAP_NET_ADMIN))
++	if (!capable(CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 
+ 	switch (cmd) {
+@@ -1271,7 +1281,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd
+ 	return ret;
+ }
+ 
+-int ip6t_register_table(struct xt_table *table,
++struct ip6t_table *ip6t_register_table(struct xt_table *table,
+ 			const struct ip6t_replace *repl)
+ {
+ 	int ret;
+@@ -1282,7 +1292,7 @@ int ip6t_register_table(struct xt_table 
+ 
+ 	newinfo = xt_alloc_table_info(repl->size);
+ 	if (!newinfo)
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 
+ 	/* choose the copy on our node/cpu */
+ 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+@@ -1295,15 +1305,13 @@ int ip6t_register_table(struct xt_table 
+ 			      repl->underflow);
+ 	if (ret != 0) {
+ 		xt_free_table_info(newinfo);
+-		return ret;
++		return ERR_PTR(ret);
+ 	}
+ 
+-	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
++	table = virt_xt_register_table(table, &bootstrap, newinfo);
++	if (IS_ERR(table))
+ 		xt_free_table_info(newinfo);
+-		return ret;
+-	}
+-
+-	return 0;
++	return table;
+ }
+ 
+ void ip6t_unregister_table(struct xt_table *table)
+@@ -1311,7 +1319,7 @@ void ip6t_unregister_table(struct xt_tab
+ 	struct xt_table_info *private;
+ 	void *loc_cpu_entry;
+ 
+-	private = xt_unregister_table(table);
++	private = virt_xt_unregister_table(table);
+ 
+ 	/* Decrease module usage counts and free resources */
+ 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+@@ -1319,6 +1327,29 @@ void ip6t_unregister_table(struct xt_tab
+ 	xt_free_table_info(private);
+ }
+ 
++void ip6t_flush_table(struct xt_table *table)
++{
++	struct xt_table *t;
++	void *loc_cpu_entry;
++
++	if (table == NULL)
++		return;
++
++	t = xt_find_table_lock(AF_INET6, table->name);
++	if (t && !IS_ERR(t)) {
++		struct xt_table_info *private;
++		private = t->private;
++		loc_cpu_entry = private->entries[raw_smp_processor_id()];
++		IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size,
++			  cleanup_entry, NULL);
++		if (private->number > private->initial_entries)
++			module_put(t->me);
++		private->size = 0;
++		xt_table_unlock(t);
++		module_put(t->me);
++	}
++}
++
+ /* Returns 1 if the type and code is matched by the range, 0 otherwise */
+ static inline int
+ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
+@@ -1405,36 +1436,93 @@ static struct ip6t_match icmp6_matchstru
+ 	.checkentry	= &icmp6_checkentry,
+ };
+ 
+-static int __init init(void)
++static int init_ip6tables(void)
+ {
+ 	int ret;
+ 
+-	xt_proto_init(AF_INET6);
++	if (ve_ip6t_standard_target != NULL)
++		return -EEXIST;
+ 
+-	/* Noone else will be downing sem now, so we won't sleep */
+-	xt_register_target(AF_INET6, &ip6t_standard_target);
+-	xt_register_target(AF_INET6, &ip6t_error_target);
+-	xt_register_match(AF_INET6, &icmp6_matchstruct);
++	ret = xt_register_target(AF_INET6, &ip6t_standard_target);
++	if (ret)
++		goto out;
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip6t_standard_target = xt_find_target(AF_INET6, IP6T_STANDARD_TARGET, 0);
++	if (IS_ERR(ve_ip6t_standard_target))
++		goto out_standard;
++#endif
++	ret = xt_register_target(AF_INET6, &ip6t_error_target);
++	if (ret)
++		goto out_error;
++	ret = xt_register_match(AF_INET6, &icmp6_matchstruct);
++	if (ret)
++		goto out_icmp;
++	ret = xt_proto_init(AF_INET6);
++	if (ret)
++		goto out_proc;
++	return 0;
++
++out_proc:
++	xt_unregister_match(AF_INET6, &icmp6_matchstruct);
++out_icmp:
++	xt_unregister_target(AF_INET6, &ip6t_error_target);
++out_error:
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip6t_standard_target = NULL;
++out_standard:
++#endif
++	xt_unregister_target(AF_INET6, &ip6t_standard_target);
++out:
++	return ret;
++}
++
++static void fini_ip6tables(void)
++{
++	xt_proto_fini(AF_INET6);
++	xt_unregister_match(AF_INET6, &icmp6_matchstruct);
++	xt_unregister_target(AF_INET6, &ip6t_error_target);
++#ifdef CONFIG_VE_IPTABLES
++	ve_ip6t_standard_target = NULL;
++#endif
++	xt_unregister_target(AF_INET6, &ip6t_standard_target);
++}
++
++static int __init init(void)
++{
++	int ret;
++
++	ret = init_ip6tables();
++	if (ret)
++		goto out;
+ 
+ 	/* Register setsockopt */
+ 	ret = nf_register_sockopt(&ip6t_sockopts);
+ 	if (ret < 0) {
+ 		duprintf("Unable to register sockopts.\n");
+-		xt_proto_fini(AF_INET6);
+-		return ret;
++		goto out_sockopts;
+ 	}
+ 
++	KSYMRESOLVE(init_ip6tables);
++	KSYMRESOLVE(fini_ip6tables);
++	KSYMRESOLVE(ip6t_flush_table);
++	KSYMMODRESOLVE(ip6_tables);
+ 	printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
+ 	return 0;
++
++out_sockopts:
++	fini_ip6tables();
++out:
++	return ret;
+ }
+ 
+ static void __exit fini(void)
+ {
++	KSYMMODUNRESOLVE(ip6_tables);
++	KSYMUNRESOLVE(init_ip6tables);
++	KSYMUNRESOLVE(fini_ip6tables);
++	KSYMUNRESOLVE(ip6t_flush_table);
+ 	nf_unregister_sockopt(&ip6t_sockopts);
+-	xt_unregister_match(AF_INET6, &icmp6_matchstruct);
+-	xt_unregister_target(AF_INET6, &ip6t_error_target);
+-	xt_unregister_target(AF_INET6, &ip6t_standard_target);
+-	xt_proto_fini(AF_INET6);
++	fini_ip6tables();
+ }
+ 
+ /*
+@@ -1516,6 +1604,7 @@ EXPORT_SYMBOL(ip6t_do_table);
+ EXPORT_SYMBOL(ip6t_ext_hdr);
+ EXPORT_SYMBOL(ipv6_find_hdr);
+ EXPORT_SYMBOL(ip6_masked_addrcmp);
++EXPORT_SYMBOL(ip6t_flush_table);
+ 
+-module_init(init);
++subsys_initcall(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv6/netfilter/ip6t_LOG.c linux-2.6.16-026test015/net/ipv6/netfilter/ip6t_LOG.c
+--- linux-2.6.16.orig/net/ipv6/netfilter/ip6t_LOG.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/netfilter/ip6t_LOG.c	2006-07-04 14:41:39.000000000 +0400
+@@ -20,6 +20,7 @@
+ #include <net/udp.h>
+ #include <net/tcp.h>
+ #include <net/ipv6.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv6/ip6_tables.h>
+ 
+@@ -488,10 +489,23 @@ static struct nf_logger ip6t_logger = {
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_ip6table_LOG(void)
++{
++	return ip6t_register_target(&ip6t_log_reg);
++}
++
++void fini_ip6table_LOG(void)
++{
++	ip6t_unregister_target(&ip6t_log_reg);
++}
++
+ static int __init init(void)
+ {
+-	if (ip6t_register_target(&ip6t_log_reg))
+-		return -EINVAL;
++	int err;
++
++	err = init_ip6table_LOG();
++	if (err < 0)
++		return err;
+ 	if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
+ 		printk(KERN_WARNING "ip6t_LOG: not logging via system console "
+ 		       "since somebody else already registered for PF_INET6\n");
+@@ -499,13 +513,19 @@ static int __init init(void)
+ 		 * ip6tables userspace would abort */
+ 	}
+ 
++	KSYMRESOLVE(init_ip6table_LOG);
++	KSYMRESOLVE(fini_ip6table_LOG);
++	KSYMMODRESOLVE(ip6t_LOG);
+ 	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
++	KSYMMODUNRESOLVE(ip6t_LOG);
++	KSYMUNRESOLVE(init_ip6table_LOG);
++	KSYMUNRESOLVE(fini_ip6table_LOG);
+ 	nf_log_unregister_logger(&ip6t_logger);
+-	ip6t_unregister_target(&ip6t_log_reg);
++	fini_ip6table_LOG();
+ }
+ 
+ module_init(init);
+diff -upr linux-2.6.16.orig/net/ipv6/netfilter/ip6t_REJECT.c linux-2.6.16-026test015/net/ipv6/netfilter/ip6t_REJECT.c
+--- linux-2.6.16.orig/net/ipv6/netfilter/ip6t_REJECT.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/netfilter/ip6t_REJECT.c	2006-07-04 14:41:39.000000000 +0400
+@@ -26,6 +26,7 @@
+ #include <net/ip6_checksum.h>
+ #include <net/ip6_fib.h>
+ #include <net/ip6_route.h>
++#include <linux/nfcalls.h>
+ #include <net/flow.h>
+ #include <linux/netfilter_ipv6/ip6_tables.h>
+ #include <linux/netfilter_ipv6/ip6t_REJECT.h>
+@@ -268,17 +269,39 @@ static struct ip6t_target ip6t_reject_re
+ 	.me		= THIS_MODULE
+ };
+ 
+-static int __init init(void)
++int init_ip6table_REJECT(void)
+ {
+ 	if (ip6t_register_target(&ip6t_reject_reg))
+ 		return -EINVAL;
+ 	return 0;
+ }
+ 
+-static void __exit fini(void)
++void fini_ip6table_REJECT(void)
+ {
+ 	ip6t_unregister_target(&ip6t_reject_reg);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_ip6table_REJECT();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_ip6table_REJECT);
++	KSYMRESOLVE(fini_ip6table_REJECT);
++	KSYMMODRESOLVE(ip6t_REJECT);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ip6t_REJECT);
++	KSYMUNRESOLVE(init_ip6table_REJECT);
++	KSYMUNRESOLVE(fini_ip6table_REJECT);
++	fini_ip6table_REJECT();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv6/netfilter/ip6t_multiport.c linux-2.6.16-026test015/net/ipv6/netfilter/ip6t_multiport.c
+--- linux-2.6.16.orig/net/ipv6/netfilter/ip6t_multiport.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/netfilter/ip6t_multiport.c	2006-07-04 14:41:39.000000000 +0400
+@@ -14,6 +14,7 @@
+ #include <linux/udp.h>
+ #include <linux/skbuff.h>
+ #include <linux/in.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv6/ip6t_multiport.h>
+ #include <linux/netfilter_ipv6/ip6_tables.h>
+@@ -112,15 +113,37 @@ static struct ip6t_match multiport_match
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_ip6table_multiport(void)
+ {
+ 	return ip6t_register_match(&multiport_match);
+ }
+ 
+-static void __exit fini(void)
++void fini_ip6table_multiport(void)
+ {
+ 	ip6t_unregister_match(&multiport_match);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_ip6table_multiport();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_ip6table_multiport);
++	KSYMRESOLVE(fini_ip6table_multiport);
++	KSYMMODRESOLVE(ip6t_multiport);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ip6t_multiport);
++	KSYMUNRESOLVE(init_ip6table_multiport);
++	KSYMUNRESOLVE(fini_ip6table_multiport);
++	fini_ip6table_multiport();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/ipv6/netfilter/ip6table_filter.c linux-2.6.16-026test015/net/ipv6/netfilter/ip6table_filter.c
+--- linux-2.6.16.orig/net/ipv6/netfilter/ip6table_filter.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/netfilter/ip6table_filter.c	2006-07-04 14:41:39.000000000 +0400
+@@ -11,12 +11,20 @@
+ 
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv6/ip6_tables.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("ip6tables filter table");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_filter	(get_exec_env()->_ve_ip6t_filter_pf)
++#else
++#define	ve_packet_filter	&packet_filter
++#endif
++
+ #define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT))
+ 
+ /* Standard entry. */
+@@ -43,7 +51,7 @@ static struct
+ 	struct ip6t_replace repl;
+ 	struct ip6t_standard entries[3];
+ 	struct ip6t_error term;
+-} initial_table __initdata
++} initial_table
+ = { { "filter", FILTER_VALID_HOOKS, 4,
+       sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
+       { [NF_IP6_LOCAL_IN] = 0,
+@@ -108,7 +116,7 @@ ip6t_hook(unsigned int hook,
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
+-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
++	return ip6t_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
+ }
+ 
+ static unsigned int
+@@ -128,7 +136,7 @@ ip6t_local_out_hook(unsigned int hook,
+ 	}
+ #endif
+ 
+-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
++	return ip6t_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
+ }
+ 
+ static struct nf_hook_ops ip6t_ops[] = {
+@@ -159,56 +167,89 @@ static struct nf_hook_ops ip6t_ops[] = {
+ static int forward = NF_ACCEPT;
+ module_param(forward, bool, 0000);
+ 
+-static int __init init(void)
++int init_ip6table_filter(void)
+ {
+ 	int ret;
+-
+-	if (forward < 0 || forward > NF_MAX_VERDICT) {
+-		printk("iptables forward must be 0 or 1\n");
+-		return -EINVAL;
+-	}
+-
+-	/* Entry 1 is the FORWARD hook */
+-	initial_table.entries[1].target.verdict = -forward - 1;
++	struct ip6t_table *tmp_filter;
+ 
+ 	/* Register table */
+-	ret = ip6t_register_table(&packet_filter, &initial_table.repl);
+-	if (ret < 0)
+-		return ret;
++	tmp_filter = ip6t_register_table(&packet_filter,
++			&initial_table.repl);
++	if (IS_ERR(tmp_filter))
++		return PTR_ERR(tmp_filter);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_filter = tmp_filter;
++#endif
+ 
+ 	/* Register hooks */
+-	ret = nf_register_hook(&ip6t_ops[0]);
++	ret = virt_nf_register_hook(&ip6t_ops[0]);
+ 	if (ret < 0)
+ 		goto cleanup_table;
+ 
+-	ret = nf_register_hook(&ip6t_ops[1]);
++	ret = virt_nf_register_hook(&ip6t_ops[1]);
+ 	if (ret < 0)
+ 		goto cleanup_hook0;
+ 
+-	ret = nf_register_hook(&ip6t_ops[2]);
++	ret = virt_nf_register_hook(&ip6t_ops[2]);
+ 	if (ret < 0)
+ 		goto cleanup_hook1;
+ 
+ 	return ret;
+ 
+  cleanup_hook1:
+-	nf_unregister_hook(&ip6t_ops[1]);
++	virt_nf_unregister_hook(&ip6t_ops[1]);
+  cleanup_hook0:
+-	nf_unregister_hook(&ip6t_ops[0]);
++	virt_nf_unregister_hook(&ip6t_ops[0]);
+  cleanup_table:
+-	ip6t_unregister_table(&packet_filter);
++	ip6t_unregister_table(ve_packet_filter);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_filter = NULL;
++#endif
+ 
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_ip6table_filter(void)
+ {
+ 	unsigned int i;
+ 
+ 	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
+-		nf_unregister_hook(&ip6t_ops[i]);
++		virt_nf_unregister_hook(&ip6t_ops[i]);
+ 
+-	ip6t_unregister_table(&packet_filter);
++	ip6t_unregister_table(ve_packet_filter);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_filter = NULL;
++#endif
++}
++
++static int __init init(void)
++{
++	int err;
++
++	if (forward < 0 || forward > NF_MAX_VERDICT) {
++		printk("iptables forward must be 0 or 1\n");
++		return -EINVAL;
++	}
++
++	/* Entry 1 is the FORWARD hook */
++	initial_table.entries[1].target.verdict = -forward - 1;
++
++	err = init_ip6table_filter();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_ip6table_filter);
++	KSYMRESOLVE(fini_ip6table_filter);
++	KSYMMODRESOLVE(ip6table_filter);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ip6table_filter);
++	KSYMUNRESOLVE(init_ip6table_filter);
++	KSYMUNRESOLVE(fini_ip6table_filter);
++	fini_ip6table_filter();
+ }
+ 
+ module_init(init);
+diff -upr linux-2.6.16.orig/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.16-026test015/net/ipv6/netfilter/ip6table_mangle.c
+--- linux-2.6.16.orig/net/ipv6/netfilter/ip6table_mangle.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/netfilter/ip6table_mangle.c	2006-07-04 14:41:39.000000000 +0400
+@@ -12,6 +12,7 @@
+  */
+ #include <linux/module.h>
+ #include <linux/netfilter_ipv6/ip6_tables.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+@@ -53,7 +54,7 @@ static struct
+ 	struct ip6t_replace repl;
+ 	struct ip6t_standard entries[5];
+ 	struct ip6t_error term;
+-} initial_table __initdata
++} initial_table
+ = { { "mangle", MANGLE_VALID_HOOKS, 6,
+       sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
+       { [NF_IP6_PRE_ROUTING] 	= 0,
+@@ -130,6 +131,13 @@ static struct ip6t_table packet_mangler 
+ 	.af		= AF_INET6,
+ };
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_mangler	(get_exec_env()->_ip6t_mangle_table)
++#else
++#define ve_packet_mangler	&packet_mangler
++#endif
++
+ /* The work comes in here from netfilter.c. */
+ static unsigned int
+ ip6t_route_hook(unsigned int hook,
+@@ -138,7 +146,7 @@ ip6t_route_hook(unsigned int hook,
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
+-	return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++	return ip6t_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
+ }
+ 
+ static unsigned int
+@@ -174,7 +182,7 @@ ip6t_local_hook(unsigned int hook,
+ 	/* flowlabel and prio (includes version, which shouldn't change either */
+ 	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
+ 
+-	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++	ret = ip6t_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
+ 
+ 	if (ret != NF_DROP && ret != NF_STOLEN 
+ 		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
+@@ -228,60 +236,93 @@ static struct nf_hook_ops ip6t_ops[] = {
+ 	},
+ };
+ 
+-static int __init init(void)
++int init_ip6table_mangle(void)
+ {
+ 	int ret;
++	struct ip6t_table *tmp_mangler;
+ 
+ 	/* Register table */
+-	ret = ip6t_register_table(&packet_mangler, &initial_table.repl);
+-	if (ret < 0)
+-		return ret;
++	tmp_mangler = ip6t_register_table(&packet_mangler,
++			&initial_table.repl);
++	if (IS_ERR(tmp_mangler))
++		return PTR_ERR(tmp_mangler);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_mangler = tmp_mangler;
++#endif
+ 
+ 	/* Register hooks */
+-	ret = nf_register_hook(&ip6t_ops[0]);
++	ret = virt_nf_register_hook(&ip6t_ops[0]);
+ 	if (ret < 0)
+ 		goto cleanup_table;
+ 
+-	ret = nf_register_hook(&ip6t_ops[1]);
++	ret = virt_nf_register_hook(&ip6t_ops[1]);
+ 	if (ret < 0)
+ 		goto cleanup_hook0;
+ 
+-	ret = nf_register_hook(&ip6t_ops[2]);
++	ret = virt_nf_register_hook(&ip6t_ops[2]);
+ 	if (ret < 0)
+ 		goto cleanup_hook1;
+ 
+-	ret = nf_register_hook(&ip6t_ops[3]);
++	ret = virt_nf_register_hook(&ip6t_ops[3]);
+ 	if (ret < 0)
+ 		goto cleanup_hook2;
+ 
+-	ret = nf_register_hook(&ip6t_ops[4]);
++	ret = virt_nf_register_hook(&ip6t_ops[4]);
+ 	if (ret < 0)
+ 		goto cleanup_hook3;
+ 
+ 	return ret;
+ 
+  cleanup_hook3:
+-        nf_unregister_hook(&ip6t_ops[3]);
++        virt_nf_unregister_hook(&ip6t_ops[3]);
+  cleanup_hook2:
+-	nf_unregister_hook(&ip6t_ops[2]);
++	virt_nf_unregister_hook(&ip6t_ops[2]);
+  cleanup_hook1:
+-	nf_unregister_hook(&ip6t_ops[1]);
++	virt_nf_unregister_hook(&ip6t_ops[1]);
+  cleanup_hook0:
+-	nf_unregister_hook(&ip6t_ops[0]);
++	virt_nf_unregister_hook(&ip6t_ops[0]);
+  cleanup_table:
+-	ip6t_unregister_table(&packet_mangler);
++	ip6t_unregister_table(ve_packet_mangler);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_mangler = NULL;
++#endif
+ 
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_ip6table_mangle(void)
+ {
+ 	unsigned int i;
+ 
+ 	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
+-		nf_unregister_hook(&ip6t_ops[i]);
++		virt_nf_unregister_hook(&ip6t_ops[i]);
++
++	ip6t_unregister_table(ve_packet_mangler);
++#ifdef CONFIG_VE_IPTABLES
++	ve_packet_mangler = NULL;
++#endif
++}
++
++static int __init init(void)
++{
++	int err;
+ 
+-	ip6t_unregister_table(&packet_mangler);
++	err = init_ip6table_mangle();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_ip6table_mangle);
++	KSYMRESOLVE(fini_ip6table_mangle);
++	KSYMMODRESOLVE(ip6table_mangle);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(ip6table_mangle);
++	KSYMUNRESOLVE(init_ip6table_mangle);
++	KSYMUNRESOLVE(fini_ip6table_mangle);
++	fini_ip6table_mangle();
+ }
+ 
+ module_init(init);
+diff -upr linux-2.6.16.orig/net/ipv6/netfilter/ip6table_raw.c linux-2.6.16-026test015/net/ipv6/netfilter/ip6table_raw.c
+--- linux-2.6.16.orig/net/ipv6/netfilter/ip6table_raw.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/netfilter/ip6table_raw.c	2006-07-04 14:41:39.000000000 +0400
+@@ -145,11 +145,12 @@ static struct nf_hook_ops ip6t_ops[] = {
+ static int __init init(void)
+ {
+ 	int ret;
++	struct ip6t_table *tmp;
+ 
+ 	/* Register table */
+-	ret = ip6t_register_table(&packet_raw, &initial_table.repl);
+-	if (ret < 0)
+-		return ret;
++	tmp = ip6t_register_table(&packet_raw, &initial_table.repl);
++	if (IS_ERR(tmp))
++		return PTR_ERR(tmp);
+ 
+ 	/* Register hooks */
+ 	ret = nf_register_hook(&ip6t_ops[0]);
+diff -upr linux-2.6.16.orig/net/ipv6/proc.c linux-2.6.16-026test015/net/ipv6/proc.c
+--- linux-2.6.16.orig/net/ipv6/proc.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/proc.c	2006-07-04 14:41:39.000000000 +0400
+@@ -25,13 +25,18 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/stddef.h>
++#include <linux/ve.h>
+ #include <net/sock.h>
+ #include <net/tcp.h>
+ #include <net/transp_v6.h>
+ #include <net/ipv6.h>
+ 
+ #ifdef CONFIG_PROC_FS
++#ifdef CONFIG_VE
++#define proc_net_devsnmp6	(get_exec_env()->_proc_net_devsnmp6)
++#else
+ static struct proc_dir_entry *proc_net_devsnmp6;
++#endif
+ 
+ static int fold_prot_inuse(struct proto *proto)
+ {
+@@ -164,9 +169,9 @@ static int snmp6_seq_show(struct seq_fil
+ 		seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
+ 		snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
+ 	} else {
+-		snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
+-		snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
+-		snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
++		snmp6_seq_show_item(seq, (void **)ve_ipv6_statistics, snmp6_ipstats_list);
++		snmp6_seq_show_item(seq, (void **)ve_icmpv6_statistics, snmp6_icmp6_list);
++		snmp6_seq_show_item(seq, (void **)ve_udp_stats_in6, snmp6_udp6_list);
+ 	}
+ 	return 0;
+ }
+@@ -229,15 +234,27 @@ int snmp6_unregister_dev(struct inet6_de
+ 	return 0;
+ }
+ 
++int ve_snmp_proc_init(void)
++{
++	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
++	return proc_net_devsnmp6 == NULL ? -ENOMEM : 0;
++}
++EXPORT_SYMBOL(ve_snmp_proc_init);
++
++void ve_snmp_proc_fini(void)
++{
++	proc_net_remove("dev_snmp6");
++}
++EXPORT_SYMBOL(ve_snmp_proc_fini);
++
+ int __init ipv6_misc_proc_init(void)
+ {
+ 	int rc = 0;
+ 
+-	if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
++	if (!proc_glob_fops_create("net/snmp6", S_IRUGO, &snmp6_seq_fops))
+ 		goto proc_snmp6_fail;
+ 
+-	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
+-	if (!proc_net_devsnmp6)
++	if (ve_snmp_proc_init())
+ 		goto proc_dev_snmp6_fail;
+ 
+ 	if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
+@@ -246,9 +263,9 @@ out:
+ 	return rc;
+ 
+ proc_sockstat6_fail:
+-	proc_net_remove("dev_snmp6");
++	ve_snmp_proc_fini();
+ proc_dev_snmp6_fail:
+-	proc_net_remove("snmp6");
++	remove_proc_glob_entry("net/snmp6", NULL);
+ proc_snmp6_fail:
+ 	rc = -ENOMEM;
+ 	goto out;
+diff -upr linux-2.6.16.orig/net/ipv6/raw.c linux-2.6.16-026test015/net/ipv6/raw.c
+--- linux-2.6.16.orig/net/ipv6/raw.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/raw.c	2006-07-04 14:41:39.000000000 +0400
+@@ -99,6 +99,9 @@ struct sock *__raw_v6_lookup(struct sock
+ 			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ 				continue;
+ 
++			if (!ve_accessible_strict(VE_OWNER_SK(sk), get_exec_env()))
++				continue;
++
+ 			if (!ipv6_addr_any(&np->rcv_saddr)) {
+ 				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+ 					goto found;
+@@ -1046,8 +1049,14 @@ static struct sock *raw6_get_next(struct
+ 	do {
+ 		sk = sk_next(sk);
+ try_again:
+-		;
+-	} while (sk && sk->sk_family != PF_INET6);
++		if (!sk)
++			break;
++		if (sk->sk_family != PF_INET6)
++			continue;
++		if (ve_accessible(VE_OWNER_SK(sk),
++					get_exec_env()))
++			break;
++	} while (1);
+ 
+ 	if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
+ 		sk = sk_head(&raw_v6_htable[state->bucket]);
+@@ -1166,13 +1175,13 @@ static struct file_operations raw6_seq_f
+ 
+ int __init raw6_proc_init(void)
+ {
+-	if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
++	if (!proc_glob_fops_create("net/raw6", S_IRUGO, &raw6_seq_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+ 
+ void raw6_proc_exit(void)
+ {
+-	proc_net_remove("raw6");
++	remove_proc_glob_entry("net/raw6", NULL);
+ }
+ #endif	/* CONFIG_PROC_FS */
+diff -upr linux-2.6.16.orig/net/ipv6/reassembly.c linux-2.6.16-026test015/net/ipv6/reassembly.c
+--- linux-2.6.16.orig/net/ipv6/reassembly.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/reassembly.c	2006-07-04 14:41:39.000000000 +0400
+@@ -43,6 +43,7 @@
+ #include <linux/icmpv6.h>
+ #include <linux/random.h>
+ #include <linux/jhash.h>
++#include <linux/ve_owner.h>
+ 
+ #include <net/sock.h>
+ #include <net/snmp.h>
+@@ -53,6 +54,7 @@
+ #include <net/rawv6.h>
+ #include <net/ndisc.h>
+ #include <net/addrconf.h>
++#include <linux/ve_owner.h>
+ 
+ int sysctl_ip6frag_high_thresh = 256*1024;
+ int sysctl_ip6frag_low_thresh = 192*1024;
+@@ -95,8 +97,12 @@ struct frag_queue
+ #define FIRST_IN		2
+ #define LAST_IN			1
+ 	__u16			nhoffset;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(IP6Q, struct frag_queue, owner_env)
++DCL_VE_OWNER(IP6Q, struct frag_queue, owner_env)
++
+ /* Hash table. */
+ 
+ #define IP6Q_HASHSZ	64
+@@ -288,6 +294,9 @@ static void ip6_evictor(void)
+ static void ip6_frag_expire(unsigned long data)
+ {
+ 	struct frag_queue *fq = (struct frag_queue *) data;
++	struct ve_struct *envid;
++
++	envid = set_exec_env(VE_OWNER_IP6Q(fq));
+ 
+ 	spin_lock(&fq->lock);
+ 
+@@ -318,6 +327,8 @@ static void ip6_frag_expire(unsigned lon
+ out:
+ 	spin_unlock(&fq->lock);
+ 	fq_put(fq, NULL);
++
++	(void)set_exec_env(envid);
+ }
+ 
+ /* Creation primitives. */
+@@ -336,7 +347,8 @@ static struct frag_queue *ip6_frag_inter
+ 	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+ 		if (fq->id == fq_in->id && 
+ 		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
+-		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
++		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr) &&
++		    fq->owner_env == get_exec_env()) {
+ 			atomic_inc(&fq->refcnt);
+ 			write_unlock(&ip6_frag_lock);
+ 			fq_in->last_in |= COMPLETE;
+@@ -380,6 +392,8 @@ ip6_frag_create(unsigned int hash, u32 i
+ 	spin_lock_init(&fq->lock);
+ 	atomic_set(&fq->refcnt, 1);
+ 
++	SET_VE_OWNER_IP6Q(fq, get_exec_env());
++
+ 	return ip6_frag_intern(hash, fq);
+ 
+ oom:
+@@ -398,7 +412,8 @@ fq_find(u32 id, struct in6_addr *src, st
+ 	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+ 		if (fq->id == id && 
+ 		    ipv6_addr_equal(src, &fq->saddr) &&
+-		    ipv6_addr_equal(dst, &fq->daddr)) {
++		    ipv6_addr_equal(dst, &fq->daddr) &&
++		    fq->owner_env == get_exec_env()) {
+ 			atomic_inc(&fq->refcnt);
+ 			read_unlock(&ip6_frag_lock);
+ 			return fq;
+@@ -727,6 +742,9 @@ static int ipv6_frag_rcv(struct sk_buff 
+ 		    fq->meat == fq->len)
+ 			ret = ip6_frag_reasm(fq, skbp, dev);
+ 
++		if (ret > 0)
++			SET_VE_OWNER_SKB(*skbp, VE_OWNER_SKB(skb));
++
+ 		spin_unlock(&fq->lock);
+ 		fq_put(fq, NULL);
+ 		return ret;
+@@ -737,6 +755,50 @@ static int ipv6_frag_rcv(struct sk_buff 
+ 	return -1;
+ }
+ 
++#ifdef CONFIG_VE
++/* XXX */
++void ip6_frag_cleanup(struct ve_struct *envid)
++{
++	int i, progress;
++
++	local_bh_disable();
++	do {
++		progress = 0;
++		for (i = 0; i < IP6Q_HASHSZ; i++) {
++			struct frag_queue *fq;
++			struct hlist_node *p, *n;
++
++			if (hlist_empty(&ip6_frag_hash[i]))
++				continue;
++inner_restart:
++			read_lock(&ip6_frag_lock);
++			hlist_for_each_entry_safe(fq, p, n,
++					&ip6_frag_hash[i], list) {
++				if (!ve_accessible_strict(
++						VE_OWNER_IP6Q(fq),
++						envid))
++					continue;
++				atomic_inc(&fq->refcnt);
++				read_unlock(&ip6_frag_lock);
++
++				spin_lock(&fq->lock);
++				if (!(fq->last_in&COMPLETE))
++					fq_kill(fq);
++				spin_unlock(&fq->lock);
++
++				fq_put(fq, NULL);
++				progress = 1;
++				goto inner_restart;
++			}
++			read_unlock(&ip6_frag_lock);
++		}
++	} while(progress);
++	local_bh_enable();
++}
++EXPORT_SYMBOL(ip6_frag_cleanup);
++#endif
++
++
+ static struct inet6_protocol frag_protocol =
+ {
+ 	.handler	=	ipv6_frag_rcv,
+diff -upr linux-2.6.16.orig/net/ipv6/route.c linux-2.6.16-026test015/net/ipv6/route.c
+--- linux-2.6.16.orig/net/ipv6/route.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/route.c	2006-07-04 14:41:39.000000000 +0400
+@@ -52,7 +52,6 @@
+ #include <net/addrconf.h>
+ #include <net/tcp.h>
+ #include <linux/rtnetlink.h>
+-#include <net/dst.h>
+ #include <net/xfrm.h>
+ 
+ #include <asm/uaccess.h>
+@@ -113,7 +112,6 @@ struct rt6_info ip6_null_entry = {
+ 		.dst = {
+ 			.__refcnt	= ATOMIC_INIT(1),
+ 			.__use		= 1,
+-			.dev		= &loopback_dev,
+ 			.obsolete	= -1,
+ 			.error		= -ENETUNREACH,
+ 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+@@ -128,11 +126,19 @@ struct rt6_info ip6_null_entry = {
+ 	.rt6i_ref	= ATOMIC_INIT(1),
+ };
+ 
+-struct fib6_node ip6_routing_table = {
+-	.leaf		= &ip6_null_entry,
+-	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
++struct fib6_table global_fib6_table = {
++	.root = {
++		.leaf		= &ip6_null_entry,
++		.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
++	}
+ };
+ 
++#ifdef CONFIG_VE
++#define ip6_routing_table (get_exec_env()->_fib6_table->root)
++#else
++#define ip6_routing_table (global_ip6_routing_table.root)
++#endif
++
+ /* Protects all the ip6 fib */
+ 
+ DEFINE_RWLOCK(rt6_lock);
+@@ -778,7 +784,7 @@ static int ipv6_get_mtu(struct net_devic
+ 
+ int ipv6_get_hoplimit(struct net_device *dev)
+ {
+-	int hoplimit = ipv6_devconf.hop_limit;
++	int hoplimit = ve_ipv6_devconf.hop_limit;
+ 	struct inet6_dev *idev;
+ 
+ 	idev = in6_dev_get(dev);
+@@ -1421,10 +1427,12 @@ struct rt6_info *addrconf_dst_alloc(stru
+ 		rt->rt6i_flags |= RTF_ANYCAST;
+ 	else
+ 		rt->rt6i_flags |= RTF_LOCAL;
+-	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+-	if (rt->rt6i_nexthop == NULL) {
++	rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, rt->rt6i_dev);
++	if (IS_ERR(rt->rt6i_nexthop)) {
++		void *err = rt->rt6i_nexthop;
++		rt->rt6i_nexthop = NULL;
+ 		dst_free((struct dst_entry *) rt);
+-		return ERR_PTR(-ENOMEM);
++		return err;
+ 	}
+ 
+ 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+@@ -1640,8 +1648,12 @@ static int rt6_fill_node(struct sk_buff 
+ 		goto rtattr_failure;
+ 	if (rt->u.dst.neighbour)
+ 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+-	if (rt->u.dst.dev)
+-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
++	if (rt->u.dst.dev) {
++		struct net_device *odev = rt->rt6i_dev;
++		if (rt == &ip6_null_entry)
++			odev = &loopback_dev;
++		RTA_PUT(skb, RTA_OIF, sizeof(int), &odev->ifindex);
++	}
+ 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
+ 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
+ 	if (rt->rt6i_expires)
+@@ -2110,23 +2122,31 @@ void __init ip6_route_init(void)
+ 	if (!ip6_dst_ops.kmem_cachep)
+ 		panic("cannot create ip6_dst_cache");
+ 
++#ifdef CONFIG_VE
++	global_fib6_table.owner_env = get_ve0();
++	get_ve0()->_fib6_table = &global_fib6_table;
++#endif
++	list_add(&global_fib6_table.list, &fib6_table_list);
+ 	fib6_init();
+ #ifdef 	CONFIG_PROC_FS
+-	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
+-	if (p)
++	p = create_proc_glob_entry("net/ipv6_route", 0, NULL);
++	if (p) {
+ 		p->owner = THIS_MODULE;
++		p->get_info = rt6_proc_info;
++	}
+ 
+ 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+ #endif
+ #ifdef CONFIG_XFRM
+ 	xfrm6_init();
+ #endif
++	ip6_null_entry.u.dst.dev = &loopback_dev;
+ }
+ 
+ void ip6_route_cleanup(void)
+ {
+ #ifdef CONFIG_PROC_FS
+-	proc_net_remove("ipv6_route");
++	remove_proc_glob_entry("net/ipv6_route", NULL);
+ 	proc_net_remove("rt6_stats");
+ #endif
+ #ifdef CONFIG_XFRM
+@@ -2136,3 +2156,35 @@ void ip6_route_cleanup(void)
+ 	fib6_gc_cleanup();
+ 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
+ }
++
++int init_ve_route6(struct ve_struct *ve)
++{
++	struct ve_struct *old_env = set_exec_env(ve);
++	ve->_fib6_table = kzalloc(sizeof(struct fib6_table), GFP_KERNEL_UBC);
++	if (ve->_fib6_table) {
++		ve->_fib6_table->owner_env = ve;
++		ve->_fib6_table->root.leaf = &ip6_null_entry;
++		ve->_fib6_table->root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
++		write_lock_bh(&rt6_lock);
++		list_add(&ve->_fib6_table->list, &fib6_table_list);
++		write_unlock_bh(&rt6_lock);
++	}
++	set_exec_env(old_env);
++	return ve->_fib6_table ? 0 : -ENOMEM;
++}
++EXPORT_SYMBOL(init_ve_route6);
++
++void fini_ve_route6(struct ve_struct *ve)
++{
++	struct ve_struct *old_env = set_exec_env(ve);
++
++	if (ve->_fib6_table) {
++		rt6_ifdown(NULL);
++		write_lock_bh(&rt6_lock);
++		list_del(&ve->_fib6_table->list);
++		write_unlock_bh(&rt6_lock);
++		kfree(ve->_fib6_table);
++	}
++	set_exec_env(old_env);
++}
++EXPORT_SYMBOL(fini_ve_route6);
+diff -upr linux-2.6.16.orig/net/ipv6/tcp_ipv6.c linux-2.6.16-026test015/net/ipv6/tcp_ipv6.c
+--- linux-2.6.16.orig/net/ipv6/tcp_ipv6.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/tcp_ipv6.c	2006-07-04 14:41:39.000000000 +0400
+@@ -62,6 +62,8 @@
+ #include <net/dsfield.h>
+ #include <net/timewait_sock.h>
+ 
++#include <ub/ub_tcp.h>
++
+ #include <asm/uaccess.h>
+ 
+ #include <linux/proc_fs.h>
+@@ -77,7 +79,7 @@ static void	tcp_v6_send_check(struct soc
+ 
+ static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+ 
+-static struct inet_connection_sock_af_ops ipv6_mapped;
++struct inet_connection_sock_af_ops ipv6_mapped;
+ static struct inet_connection_sock_af_ops ipv6_specific;
+ 
+ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
+@@ -273,6 +275,8 @@ static int tcp_v6_connect(struct sock *s
+ 	ip6_dst_store(sk, dst, NULL);
+ 	sk->sk_route_caps = dst->dev->features &
+ 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++	if (!sysctl_tcp_use_sg)
++		sk->sk_route_caps &= ~NETIF_F_SG;
+ 
+ 	icsk->icsk_ext_hdr_len = 0;
+ 	if (np->opt)
+@@ -933,6 +937,8 @@ static struct sock * tcp_v6_syn_recv_soc
+ 	ip6_dst_store(newsk, dst, NULL);
+ 	newsk->sk_route_caps = dst->dev->features &
+ 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++	if (!sysctl_tcp_use_sg)
++		newsk->sk_route_caps &= ~NETIF_F_SG;
+ 
+ 	newtcp6sk = (struct tcp6_sock *)newsk;
+ 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+@@ -1040,6 +1046,8 @@ static int tcp_v6_do_rcv(struct sock *sk
+ 	struct ipv6_pinfo *np = inet6_sk(sk);
+ 	struct tcp_sock *tp;
+ 	struct sk_buff *opt_skb = NULL;
++	struct user_beancounter *ub;
++
+ 
+ 	/* Imagine: socket is IPv6. IPv4 packet arrives,
+ 	   goes to IPv4 receive handler and backlogged.
+@@ -1052,6 +1060,8 @@ static int tcp_v6_do_rcv(struct sock *sk
+ 	if (skb->protocol == htons(ETH_P_IP))
+ 		return tcp_v4_do_rcv(sk, skb);
+ 
++	ub = set_exec_ub(sock_bc(sk)->ub);
++
+ 	if (sk_filter(sk, skb, 0))
+ 		goto discard;
+ 
+@@ -1083,7 +1093,7 @@ static int tcp_v6_do_rcv(struct sock *sk
+ 		TCP_CHECK_TIMER(sk);
+ 		if (opt_skb)
+ 			goto ipv6_pktoptions;
+-		return 0;
++		goto restore_context;
+ 	}
+ 
+ 	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
+@@ -1104,7 +1114,7 @@ static int tcp_v6_do_rcv(struct sock *sk
+ 				goto reset;
+ 			if (opt_skb)
+ 				__kfree_skb(opt_skb);
+-			return 0;
++			goto restore_context;
+ 		}
+ 	}
+ 
+@@ -1114,6 +1124,9 @@ static int tcp_v6_do_rcv(struct sock *sk
+ 	TCP_CHECK_TIMER(sk);
+ 	if (opt_skb)
+ 		goto ipv6_pktoptions;
++
++restore_context:
++	(void)set_exec_ub(ub);
+ 	return 0;
+ 
+ reset:
+@@ -1122,7 +1135,7 @@ discard:
+ 	if (opt_skb)
+ 		__kfree_skb(opt_skb);
+ 	kfree_skb(skb);
+-	return 0;
++	goto restore_context;
+ csum_err:
+ 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ 	goto discard;
+@@ -1154,7 +1167,7 @@ ipv6_pktoptions:
+ 
+ 	if (opt_skb)
+ 		kfree_skb(opt_skb);
+-	return 0;
++	goto restore_context;
+ }
+ 
+ static int tcp_v6_rcv(struct sk_buff **pskb)
+@@ -1315,7 +1328,7 @@ static struct inet_connection_sock_af_op
+  *	TCP over IPv4 via INET6 API
+  */
+ 
+-static struct inet_connection_sock_af_ops ipv6_mapped = {
++struct inet_connection_sock_af_ops ipv6_mapped = {
+ 	.queue_xmit	=	ip_queue_xmit,
+ 	.send_check	=	tcp_v4_send_check,
+ 	.rebuild_header	=	inet_sk_rebuild_header,
+@@ -1329,6 +1342,7 @@ static struct inet_connection_sock_af_op
+ 	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
+ 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
+ };
++EXPORT_SYMBOL_GPL(ipv6_mapped);
+ 
+ 
+ 
+@@ -1535,7 +1549,7 @@ out:
+ static struct file_operations tcp6_seq_fops;
+ static struct tcp_seq_afinfo tcp6_seq_afinfo = {
+ 	.owner		= THIS_MODULE,
+-	.name		= "tcp6",
++	.name		= "net/tcp6",
+ 	.family		= AF_INET6,
+ 	.seq_show	= tcp6_seq_show,
+ 	.seq_fops	= &tcp6_seq_fops,
+diff -upr linux-2.6.16.orig/net/ipv6/udp.c linux-2.6.16-026test015/net/ipv6/udp.c
+--- linux-2.6.16.orig/net/ipv6/udp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/udp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -69,7 +69,9 @@ static int udp_v6_get_port(struct sock *
+ {
+ 	struct sock *sk2;
+ 	struct hlist_node *node;
++	struct ve_struct *env;
+ 
++	env = VE_OWNER_SK(sk);
+ 	write_lock_bh(&udp_hash_lock);
+ 	if (snum == 0) {
+ 		int best_size_so_far, best, result, i;
+@@ -83,7 +85,7 @@ static int udp_v6_get_port(struct sock *
+ 			int size;
+ 			struct hlist_head *list;
+ 
+-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
++			list = &udp_hash[udp_hashfn(result, VEID(env))];
+ 			if (hlist_empty(list)) {
+ 				if (result > sysctl_local_port_range[1])
+ 					result = sysctl_local_port_range[0] +
+@@ -105,7 +107,7 @@ static int udp_v6_get_port(struct sock *
+ 				result = sysctl_local_port_range[0]
+ 					+ ((result - sysctl_local_port_range[0]) &
+ 					   (UDP_HTABLE_SIZE - 1));
+-			if (!udp_lport_inuse(result))
++			if (!udp_lport_inuse(result, env))
+ 				break;
+ 		}
+ 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
+@@ -114,9 +116,10 @@ gotit:
+ 		udp_port_rover = snum = result;
+ 	} else {
+ 		sk_for_each(sk2, node,
+-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
++			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
+ 			if (inet_sk(sk2)->num == snum &&
+ 			    sk2 != sk &&
++			    ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
+ 			    (!sk2->sk_bound_dev_if ||
+ 			     !sk->sk_bound_dev_if ||
+ 			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+@@ -128,7 +131,7 @@ gotit:
+ 
+ 	inet_sk(sk)->num = snum;
+ 	if (sk_unhashed(sk)) {
+-		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
++		sk_add_node(sk, &udp_hash[udp_hashfn(snum, VEID(env))]);
+ 		sock_prot_inc_use(sk->sk_prot);
+ 	}
+ 	write_unlock_bh(&udp_hash_lock);
+@@ -161,12 +164,15 @@ static struct sock *udp_v6_lookup(struct
+ 	struct hlist_node *node;
+ 	unsigned short hnum = ntohs(dport);
+ 	int badness = -1;
++	struct ve_struct *env;
+ 
+  	read_lock(&udp_hash_lock);
+-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
++	env = get_exec_env();
++	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
+ 		struct inet_sock *inet = inet_sk(sk);
+ 
+-		if (inet->num == hnum && sk->sk_family == PF_INET6) {
++		if (inet->num == hnum && sk->sk_family == PF_INET6 &&
++		    ve_accessible_strict(VE_OWNER_SK(sk), env)) {
+ 			struct ipv6_pinfo *np = inet6_sk(sk);
+ 			int score = 0;
+ 			if (inet->dport) {
+@@ -415,7 +421,8 @@ static void udpv6_mcast_deliver(struct u
+ 	int dif;
+ 
+ 	read_lock(&udp_hash_lock);
+-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
++	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
++					  VEID(VE_OWNER_SKB(skb)))]);
+ 	dif = skb->dev->ifindex;
+ 	sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+ 	if (!sk) {
+@@ -1018,7 +1025,7 @@ static int udp6_seq_show(struct seq_file
+ static struct file_operations udp6_seq_fops;
+ static struct udp_seq_afinfo udp6_seq_afinfo = {
+ 	.owner		= THIS_MODULE,
+-	.name		= "udp6",
++	.name		= "net/udp6",
+ 	.family		= AF_INET6,
+ 	.seq_show	= udp6_seq_show,
+ 	.seq_fops	= &udp6_seq_fops,
+diff -upr linux-2.6.16.orig/net/ipv6/xfrm6_policy.c linux-2.6.16-026test015/net/ipv6/xfrm6_policy.c
+--- linux-2.6.16.orig/net/ipv6/xfrm6_policy.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/ipv6/xfrm6_policy.c	2006-07-04 14:41:36.000000000 +0400
+@@ -191,16 +191,18 @@ error:
+ static inline void
+ _decode_session6(struct sk_buff *skb, struct flowi *fl)
+ {
+-	u16 offset = sizeof(struct ipv6hdr);
++	u16 offset = skb->h.raw - skb->nh.raw;
+ 	struct ipv6hdr *hdr = skb->nh.ipv6h;
+-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+-	u8 nexthdr = skb->nh.ipv6h->nexthdr;
++	struct ipv6_opt_hdr *exthdr;
++	u8 nexthdr = skb->nh.raw[IP6CB(skb)->nhoff];
+ 
+ 	memset(fl, 0, sizeof(struct flowi));
+ 	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
+ 	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
+ 
+ 	while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
++		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
++
+ 		switch (nexthdr) {
+ 		case NEXTHDR_ROUTING:
+ 		case NEXTHDR_HOP:
+diff -upr linux-2.6.16.orig/net/netfilter/core.c linux-2.6.16-026test015/net/netfilter/core.c
+--- linux-2.6.16.orig/net/netfilter/core.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/core.c	2006-07-04 14:41:39.000000000 +0400
+@@ -32,16 +32,24 @@
+  * of skbuffs queued for userspace, and not deregister a hook unless
+  * this is zero, but that sucks.  Now, we simply check when the
+  * packets come back: if the hook is gone, the packet is discarded. */
++static DEFINE_SPINLOCK(nf_hook_lock);
++
+ struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+ EXPORT_SYMBOL(nf_hooks);
+-static DEFINE_SPINLOCK(nf_hook_lock);
++#ifdef CONFIG_VE_IPTABLES
++#define ve_nf_hooks \
++       ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
++#else
++#define ve_nf_hooks nf_hooks
++#endif
++
+ 
+ int nf_register_hook(struct nf_hook_ops *reg)
+ {
+ 	struct list_head *i;
+ 
+ 	spin_lock_bh(&nf_hook_lock);
+-	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
++	list_for_each(i, &ve_nf_hooks[reg->pf][reg->hooknum]) {
+ 		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+ 			break;
+ 	}
+@@ -53,6 +61,33 @@ int nf_register_hook(struct nf_hook_ops 
+ }
+ EXPORT_SYMBOL(nf_register_hook);
+ 
++int virt_nf_register_hook(struct nf_hook_ops *reg)
++{
++       int ret = 0;
++
++       if (!ve_is_super(get_exec_env())) {
++               struct nf_hook_ops *tmp;
++               ret = -ENOMEM;
++               tmp = kmalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
++               if (!tmp)
++                       goto nomem;
++               memcpy(tmp, reg, sizeof(struct nf_hook_ops));
++               reg =  tmp;
++       }
++
++       ret = nf_register_hook(reg);
++       if (ret)
++               goto out;
++
++       return 0;
++out:
++       if (!ve_is_super(get_exec_env()))
++               kfree(reg);
++nomem:
++       return ret;
++}
++EXPORT_SYMBOL(virt_nf_register_hook);
++
+ void nf_unregister_hook(struct nf_hook_ops *reg)
+ {
+ 	spin_lock_bh(&nf_hook_lock);
+@@ -63,6 +98,29 @@ void nf_unregister_hook(struct nf_hook_o
+ }
+ EXPORT_SYMBOL(nf_unregister_hook);
+ 
++int virt_nf_unregister_hook(struct nf_hook_ops *reg)
++{
++       struct nf_hook_ops *i;
++
++       spin_lock_bh(&nf_hook_lock);
++       list_for_each_entry(i, &ve_nf_hooks[reg->pf][reg->hooknum], list) {
++               if (reg->hook == i->hook) {
++                       reg = i;
++                       break;
++               }
++       }
++       spin_unlock_bh(&nf_hook_lock);
++       if (reg != i)
++               return -ENOENT;
++
++       nf_unregister_hook(reg);
++
++       if (!ve_is_super(get_exec_env()))
++               kfree(reg);
++       return 0;
++}
++EXPORT_SYMBOL(virt_nf_unregister_hook);
++
+ unsigned int nf_iterate(struct list_head *head,
+ 			struct sk_buff **skb,
+ 			int hook,
+@@ -120,9 +178,9 @@ int nf_hook_slow(int pf, unsigned int ho
+ 	/* We may already have this, but read-locks nest anyway */
+ 	rcu_read_lock();
+ 
+-	elem = &nf_hooks[pf][hook];
++	elem = &ve_nf_hooks[pf][hook];
+ next_hook:
+-	verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
++	verdict = nf_iterate(&ve_nf_hooks[pf][hook], pskb, hook, indev,
+ 			     outdev, &elem, okfn, hook_thresh);
+ 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
+ 		ret = 1;
+@@ -195,13 +253,54 @@ struct proc_dir_entry *proc_net_netfilte
+ EXPORT_SYMBOL(proc_net_netfilter);
+ #endif
+ 
+-void __init netfilter_init(void)
++void init_nf_hooks(struct list_head (*nh)[NF_MAX_HOOKS])
+ {
+ 	int i, h;
+ 	for (i = 0; i < NPROTO; i++) {
+ 		for (h = 0; h < NF_MAX_HOOKS; h++)
+-			INIT_LIST_HEAD(&nf_hooks[i][h]);
++			INIT_LIST_HEAD(&ve_nf_hooks[i][h]);
+ 	}
++}
++
++int init_netfilter(void)
++{
++#ifdef CONFIG_VE_IPTABLES
++       struct ve_struct *envid;
++
++       envid = get_exec_env();
++       envid->_nf_hooks = kmalloc(sizeof(nf_hooks), GFP_KERNEL);
++       if (envid->_nf_hooks == NULL)
++               return -ENOMEM;
++
++       /* FIXME: charge ubc */
++
++       init_nf_hooks(envid->_nf_hooks);
++       return 0;
++#else
++       init_nf_hooks(nf_hooks);
++       return 0;
++#endif
++}
++EXPORT_SYMBOL(init_netfilter);
++
++#ifdef CONFIG_VE_IPTABLES
++void fini_netfilter(void)
++{
++       struct ve_struct *envid;
++
++       envid = get_exec_env();
++       if (envid->_nf_hooks != NULL)
++               kfree(envid->_nf_hooks);
++       envid->_nf_hooks = NULL;
++
++       /* FIXME: uncharge ubc */
++}
++EXPORT_SYMBOL(fini_netfilter);
++#endif
++
++void __init netfilter_init(void)
++{
++       init_netfilter();
+ 
+ #ifdef CONFIG_PROC_FS
+ 	proc_net_netfilter = proc_mkdir("netfilter", proc_net);
+@@ -214,3 +313,4 @@ void __init netfilter_init(void)
+ 	if (netfilter_log_init() < 0)
+ 		panic("cannot initialize nf_log");
+ }
++
+diff -upr linux-2.6.16.orig/net/netfilter/nf_conntrack_netlink.c linux-2.6.16-026test015/net/netfilter/nf_conntrack_netlink.c
+--- linux-2.6.16.orig/net/netfilter/nf_conntrack_netlink.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/nf_conntrack_netlink.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1641,7 +1641,7 @@ static void __exit ctnetlink_exit(void)
+ 	printk("ctnetlink: unregistering from nfnetlink.\n");
+ 
+ #ifdef CONFIG_NF_CONNTRACK_EVENTS
+-	nf_conntrack_unregister_notifier(&ctnl_notifier_exp);
++	nf_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
+ 	nf_conntrack_unregister_notifier(&ctnl_notifier);
+ #endif
+ 
+diff -upr linux-2.6.16.orig/net/netfilter/nf_conntrack_proto_sctp.c linux-2.6.16-026test015/net/netfilter/nf_conntrack_proto_sctp.c
+--- linux-2.6.16.orig/net/netfilter/nf_conntrack_proto_sctp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/nf_conntrack_proto_sctp.c	2006-07-04 14:41:36.000000000 +0400
+@@ -240,12 +240,15 @@ static int do_basic_checks(struct nf_con
+ 			flag = 1;
+ 		}
+ 
+-		/* Cookie Ack/Echo chunks not the first OR 
+-		   Init / Init Ack / Shutdown compl chunks not the only chunks */
+-		if ((sch->type == SCTP_CID_COOKIE_ACK 
++		/*
++		 * Cookie Ack/Echo chunks not the first OR
++		 * Init / Init Ack / Shutdown compl chunks not the only chunks
++		 * OR zero-length.
++		 */
++		if (((sch->type == SCTP_CID_COOKIE_ACK
+ 			|| sch->type == SCTP_CID_COOKIE_ECHO
+ 			|| flag)
+-		     && count !=0 ) {
++		      && count !=0) || !sch->length) {
+ 			DEBUGP("Basic checks failed\n");
+ 			return 1;
+ 		}
+@@ -256,7 +259,7 @@ static int do_basic_checks(struct nf_con
+ 	}
+ 
+ 	DEBUGP("Basic checks passed\n");
+-	return 0;
++	return count == 0;
+ }
+ 
+ static int new_state(enum ip_conntrack_dir dir,
+diff -upr linux-2.6.16.orig/net/netfilter/nf_queue.c linux-2.6.16-026test015/net/netfilter/nf_queue.c
+--- linux-2.6.16.orig/net/netfilter/nf_queue.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/nf_queue.c	2006-07-04 14:41:39.000000000 +0400
+@@ -209,12 +209,12 @@ void nf_reinject(struct sk_buff *skb, st
+ 	/* Drop reference to owner of hook which queued us. */
+ 	module_put(info->elem->owner);
+ 
+-	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
++	list_for_each_rcu(i, &ve_nf_hooks[info->pf][info->hook]) {
+ 		if (i == elem) 
+   			break;
+   	}
+   
+-	if (i == &nf_hooks[info->pf][info->hook]) {
++	if (i == &ve_nf_hooks[info->pf][info->hook]) {
+ 		/* The module which sent it to userspace is gone. */
+ 		NFDEBUG("%s: module disappeared, dropping packet.\n",
+ 			__FUNCTION__);
+@@ -235,7 +235,7 @@ void nf_reinject(struct sk_buff *skb, st
+ 
+ 	if (verdict == NF_ACCEPT) {
+ 	next_hook:
+-		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
++		verdict = nf_iterate(&ve_nf_hooks[info->pf][info->hook],
+ 				     &skb, info->hook, 
+ 				     info->indev, info->outdev, &elem,
+ 				     info->okfn, INT_MIN);
+diff -upr linux-2.6.16.orig/net/netfilter/nf_sockopt.c linux-2.6.16-026test015/net/netfilter/nf_sockopt.c
+--- linux-2.6.16.orig/net/netfilter/nf_sockopt.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/nf_sockopt.c	2006-07-04 14:41:39.000000000 +0400
+@@ -80,6 +80,12 @@ static int nf_sockopt(struct sock *sk, i
+ 	struct nf_sockopt_ops *ops;
+ 	int ret;
+ 
++#ifdef CONFIG_VE_IPTABLES
++       if (!get_exec_env()->_nf_hooks ||
++           !get_exec_env()->_ipt_standard_target)
++               return -ENOPROTOOPT;
++#endif
++
+ 	if (down_interruptible(&nf_sockopt_mutex) != 0)
+ 		return -EINTR;
+ 
+diff -upr linux-2.6.16.orig/net/netfilter/x_tables.c linux-2.6.16-026test015/net/netfilter/x_tables.c
+--- linux-2.6.16.orig/net/netfilter/x_tables.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/x_tables.c	2006-07-04 14:41:39.000000000 +0400
+@@ -24,6 +24,10 @@
+ 
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_arp.h>
++#include <linux/nfcalls.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+@@ -38,7 +42,13 @@ struct xt_af {
+ 	struct list_head tables;
+ };
+ 
++#ifdef CONFIG_VE_IPTABLES
++/* include ve.h and define get_exec_env */
++#include <linux/sched.h>
++#define xt	(get_exec_env()->_xt)
++#else
+ static struct xt_af *xt;
++#endif
+ 
+ #ifdef DEBUG_IP_FIREWALL_USER
+ #define duprintf(format, args...) printk(format , ## args)
+@@ -52,17 +62,52 @@ enum {
+ 	MATCH,
+ };
+ 
++#ifdef CONFIG_USER_RESOURCE
++#define UB_NUMXTENT 23
++static int charge_xtables(struct user_beancounter *ub, unsigned long size)
++{
++	if (ub == NULL)
++		return 0;
++	return charge_beancounter(ub, UB_NUMXTENT, size, 1);
++}
++static void uncharge_xtables(struct user_beancounter *ub, unsigned long size)
++{
++	if (ub == NULL)
++		return;
++	uncharge_beancounter(ub, UB_NUMXTENT, size);
++}
++#endif	/* CONFIG_USER_RESOURCE */
++
+ /* Registration hooks for targets. */
+ int
+ xt_register_target(int af, struct xt_target *target)
+ {
+ 	int ret;
++	struct module *mod = target->me;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct xt_target *tmp;
++		__module_get(mod);
++		ret = -ENOMEM;
++		tmp = ub_kmalloc(sizeof(struct xt_target), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, target, sizeof(struct xt_target));
++		target = tmp;
++	}
+ 
+ 	ret = down_interruptible(&xt[af].mutex);
+ 	if (ret != 0)
+-		return ret;
++		goto out;
+ 	list_add(&target->list, &xt[af].target);
+ 	up(&xt[af].mutex);
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env())) {
++		kfree(target);
++nomem:
++		module_put(mod);
++	}
+ 	return ret;
+ }
+ EXPORT_SYMBOL(xt_register_target);
+@@ -71,8 +116,21 @@ void
+ xt_unregister_target(int af, struct xt_target *target)
+ {
+ 	down(&xt[af].mutex);
++	if (!ve_is_super(get_exec_env())) {
++		target = list_named_find(&xt[af].target, target->name);
++		if (!target) {
++			up(&xt[af].mutex);
++			return;
++		}
++	}
++
+ 	LIST_DELETE(&xt[af].target, target);
+ 	up(&xt[af].mutex);
++
++	if (!ve_is_super(get_exec_env())) {
++		module_put(target->me);
++		kfree(target);
++	}
+ }
+ EXPORT_SYMBOL(xt_unregister_target);
+ 
+@@ -80,14 +138,33 @@ int
+ xt_register_match(int af, struct xt_match *match)
+ {
+ 	int ret;
++	struct module *mod = match->me;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct xt_match *tmp;
++		__module_get(mod);
++		ret = -ENOMEM;
++		tmp = ub_kmalloc(sizeof(struct xt_match), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, match, sizeof(struct xt_match));
++		match = tmp;
++	}
+ 
+ 	ret = down_interruptible(&xt[af].mutex);
+ 	if (ret != 0)
+-		return ret;
++		goto out;
+ 
+ 	list_add(&match->list, &xt[af].match);
+ 	up(&xt[af].mutex);
+ 
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env())) {
++		kfree(match);
++nomem:
++		module_put(mod);
++	}
+ 	return ret;
+ }
+ EXPORT_SYMBOL(xt_register_match);
+@@ -96,8 +173,21 @@ void
+ xt_unregister_match(int af, struct xt_match *match)
+ {
+ 	down(&xt[af].mutex);
++	if (!ve_is_super(get_exec_env())) {
++		match = list_named_find(&xt[af].match, match->name);
++		if (!match) {
++			up(&xt[af].mutex);
++			return;
++		}
++	}
++
+ 	LIST_DELETE(&xt[af].match, match);
+ 	up(&xt[af].mutex);
++
++	if (!ve_is_super(get_exec_env())) {
++		module_put(match->me);
++		kfree(match);
++	}
+ }
+ EXPORT_SYMBOL(xt_unregister_match);
+ 
+@@ -246,7 +336,7 @@ struct xt_table_info *xt_alloc_table_inf
+ 	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
+ 		return NULL;
+ 
+-	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
++	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL_UBC);
+ 	if (!newinfo)
+ 		return NULL;
+ 
+@@ -255,10 +345,10 @@ struct xt_table_info *xt_alloc_table_inf
+ 	for_each_cpu(cpu) {
+ 		if (size <= PAGE_SIZE)
+ 			newinfo->entries[cpu] = kmalloc_node(size,
+-							GFP_KERNEL,
++							GFP_KERNEL_UBC,
+ 							cpu_to_node(cpu));
+ 		else
+-			newinfo->entries[cpu] = vmalloc_node(size,
++			newinfo->entries[cpu] = ub_vmalloc_node(size,
+ 							cpu_to_node(cpu));
+ 
+ 		if (newinfo->entries[cpu] == NULL) {
+@@ -315,6 +405,9 @@ xt_replace_table(struct xt_table *table,
+ 	      int *error)
+ {
+ 	struct xt_table_info *oldinfo, *private;
++#ifdef CONFIG_USER_RESOURCE
++	struct user_beancounter *old_ub, *new_ub;
++#endif
+ 
+ 	/* Do the substitution. */
+ 	write_lock_bh(&table->lock);
+@@ -328,6 +421,21 @@ xt_replace_table(struct xt_table *table,
+ 		return NULL;
+ 	}
+ 	oldinfo = private;
++
++#ifdef CONFIG_USER_RESOURCE
++	new_ub = mem_ub(newinfo);
++	if (charge_xtables(new_ub, newinfo->number)) {
++		oldinfo = NULL;
++		write_unlock_bh(&table->lock);
++		*error = -ENOMEM;
++		return NULL;
++ 	}
++	if (num_counters) {
++		old_ub = mem_ub(oldinfo);
++		uncharge_xtables(old_ub, oldinfo->number);
++	}
++#endif
++
+ 	table->private = newinfo;
+ 	newinfo->initial_entries = oldinfo->initial_entries;
+ 	write_unlock_bh(&table->lock);
+@@ -355,6 +463,7 @@ int xt_register_table(struct xt_table *t
+ 
+ 	/* Simplifies replace_table code. */
+ 	table->private = bootstrap;
++	rwlock_init(&table->lock);
+ 	if (!xt_replace_table(table, 0, newinfo, &ret))
+ 		goto unlock;
+ 
+@@ -364,7 +473,6 @@ int xt_register_table(struct xt_table *t
+ 	/* save number of initial entries */
+ 	private->initial_entries = private->number;
+ 
+-	rwlock_init(&table->lock);
+ 	list_prepend(&xt[table->af].tables, table);
+ 
+ 	ret = 0;
+@@ -374,6 +482,39 @@ int xt_register_table(struct xt_table *t
+ }
+ EXPORT_SYMBOL_GPL(xt_register_table);
+ 
++struct xt_table * virt_xt_register_table(struct xt_table *table,
++		      struct xt_table_info *bootstrap,
++		      struct xt_table_info *newinfo)
++{
++	int ret;
++	struct module *mod = table->me;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct xt_table *tmp;
++		__module_get(mod);
++		ret = -ENOMEM;
++		tmp = ub_kmalloc(sizeof(struct xt_table), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, table, sizeof(struct xt_table));
++		table = tmp;
++	}
++
++	ret = xt_register_table(table, bootstrap, newinfo);
++	if (ret)
++		goto out;
++
++	return table;
++out:
++	if (!ve_is_super(get_exec_env())) {
++		kfree(table);
++nomem:
++		module_put(mod);
++	}
++	return ERR_PTR(ret);
++}
++EXPORT_SYMBOL_GPL(virt_xt_register_table);
++
+ void *xt_unregister_table(struct xt_table *table)
+ {
+ 	struct xt_table_info *private;
+@@ -383,10 +524,27 @@ void *xt_unregister_table(struct xt_tabl
+ 	LIST_DELETE(&xt[table->af].tables, table);
+ 	up(&xt[table->af].mutex);
+ 
++#ifdef CONFIG_USER_RESOURCE
++	uncharge_xtables(mem_ub(private), private->number);
++#endif
++
+ 	return private;
+ }
+ EXPORT_SYMBOL_GPL(xt_unregister_table);
+ 
++void *virt_xt_unregister_table(struct xt_table *table)
++{
++	void *ret;
++
++	ret = xt_unregister_table(table);
++	if (!ve_is_super(get_exec_env())) {
++		module_put(table->me);
++		kfree(table);
++	}
++	return ret;
++}
++EXPORT_SYMBOL_GPL(virt_xt_unregister_table);
++
+ #ifdef CONFIG_PROC_FS
+ static char *xt_proto_prefix[NPROTO] = {
+ 	[AF_INET]	= "ip",
+@@ -597,10 +755,13 @@ void xt_proto_fini(int af)
+ EXPORT_SYMBOL_GPL(xt_proto_fini);
+ 
+ 
+-static int __init xt_init(void)
++int init_xtables(void)
+ {
+ 	int i;
+ 
++	if (xt)
++		return -EEXIST;
++
+ 	xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
+ 	if (!xt)
+ 		return -ENOMEM;
+@@ -614,11 +775,34 @@ static int __init xt_init(void)
+ 	return 0;
+ }
+ 
+-static void __exit xt_fini(void)
++void fini_xtables(void)
+ {
+ 	kfree(xt);
++	xt = NULL;
++}
++
++static int __init xt_init(void)
++{
++	int err;
++
++	err = init_xtables();
++	if (err)
++		return err;
++
++	KSYMRESOLVE(init_xtables);
++	KSYMRESOLVE(fini_xtables);
++	KSYMMODRESOLVE(x_tables);
++	return 0;
++}
++
++static void __exit xt_fini(void)
++{
++	KSYMMODUNRESOLVE(x_tables);
++	KSYMUNRESOLVE(init_xtables);
++	KSYMUNRESOLVE(fini_xtables);
++	fini_xtables();
+ }
+ 
+-module_init(xt_init);
++subsys_initcall(xt_init);
+ module_exit(xt_fini);
+ 
+diff -upr linux-2.6.16.orig/net/netfilter/xt_conntrack.c linux-2.6.16-026test015/net/netfilter/xt_conntrack.c
+--- linux-2.6.16.orig/net/netfilter/xt_conntrack.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/xt_conntrack.c	2006-07-04 14:41:39.000000000 +0400
+@@ -20,6 +20,8 @@
+ 
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter/xt_conntrack.h>
++#include <linux/netfilter_ipv4/ip_tables.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+@@ -213,25 +215,145 @@ static int check(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_match *pm;
++	struct xt_conntrack_info *pinfo;
++	struct compat_xt_conntrack_info info;
++	u_int16_t msize;
++
++	pm = (struct ipt_entry_match *)match;
++	msize = pm->u.user.match_size;
++	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++		return -EFAULT;
++	pinfo = (struct xt_conntrack_info *)pm->data;
++	memset(&info, 0, sizeof(struct compat_xt_conntrack_info));
++	info.statemask = pinfo->statemask;
++	info.statusmask = pinfo->statusmask;
++	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
++			sizeof(struct ip_conntrack_tuple));
++	memcpy(info.sipmsk, pinfo->sipmsk,
++			IP_CT_DIR_MAX * sizeof(struct in_addr));
++	memcpy(info.dipmsk, pinfo->dipmsk,
++			IP_CT_DIR_MAX * sizeof(struct in_addr));
++	info.expires_min = pinfo->expires_min;
++	info.expires_max = pinfo->expires_max;
++	info.flags = pinfo->flags;
++	info.invflags = pinfo->invflags;
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++				&info, sizeof(struct compat_xt_conntrack_info)))
++		return -EFAULT;
++	msize -= off;
++	if (put_user(msize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat_from_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_match *pm;
++	struct ipt_entry_match *dstpm;
++	struct compat_xt_conntrack_info *pinfo;
++	struct xt_conntrack_info info;
++	u_int16_t msize;
++
++	pm = (struct compat_ipt_entry_match *)match;
++	dstpm = (struct ipt_entry_match *)*dstptr;
++	msize = pm->u.user.match_size;
++	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++	pinfo = (struct compat_xt_conntrack_info *)pm->data;
++	memset(&info, 0, sizeof(struct xt_conntrack_info));
++	info.statemask = pinfo->statemask;
++	info.statusmask = pinfo->statusmask;
++	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
++			sizeof(struct ip_conntrack_tuple));
++	memcpy(info.sipmsk, pinfo->sipmsk,
++			IP_CT_DIR_MAX * sizeof(struct in_addr));
++	memcpy(info.dipmsk, pinfo->dipmsk,
++			IP_CT_DIR_MAX * sizeof(struct in_addr));
++	info.expires_min = pinfo->expires_min;
++	info.expires_max = pinfo->expires_max;
++	info.flags = pinfo->flags;
++	info.invflags = pinfo->invflags;
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++				&info, sizeof(struct xt_conntrack_info));
++	msize += off;
++	dstpm->u.user.match_size = msize;
++	*size += off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat(void *match, void **dstptr, int *size, int convert)
++{
++	int ret, off;
++
++	off = XT_ALIGN(sizeof(struct xt_conntrack_info)) -
++		COMPAT_XT_ALIGN(sizeof(struct compat_xt_conntrack_info));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = compat_to_user(match, dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = compat_from_user(match, dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ static struct xt_match conntrack_match = {
+ 	.name		= "conntrack",
+ 	.match		= &match,
+ 	.checkentry	= &check,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_xt_conntrack_match(void)
++{
++	return xt_register_match(AF_INET, &conntrack_match);
++}
++
++void fini_xt_conntrack_match(void)
++{
++	xt_unregister_match(AF_INET, &conntrack_match);
++}
++
+ static int __init init(void)
+ {
+ 	int ret;
+ 	need_conntrack();
+-	ret = xt_register_match(AF_INET, &conntrack_match);
+-
++	ret = init_xt_conntrack_match();
++	if (ret < 0)
++		return ret;
++
++	KSYMRESOLVE(init_xt_conntrack_match);
++	KSYMRESOLVE(fini_xt_conntrack_match);
++	KSYMMODRESOLVE(xt_conntrack);
+ 	return ret;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	xt_unregister_match(AF_INET, &conntrack_match);
++	KSYMMODUNRESOLVE(xt_conntrack);
++	KSYMUNRESOLVE(init_xt_conntrack_match);
++	KSYMUNRESOLVE(fini_xt_conntrack_match);
++	fini_xt_conntrack_match();
+ }
+ 
+ module_init(init);
+diff -upr linux-2.6.16.orig/net/netfilter/xt_helper.c linux-2.6.16-026test015/net/netfilter/xt_helper.c
+--- linux-2.6.16.orig/net/netfilter/xt_helper.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/xt_helper.c	2006-07-04 14:41:39.000000000 +0400
+@@ -24,6 +24,8 @@
+ #endif
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter/xt_helper.h>
++#include <linux/netfilter_ipv4/ip_tables.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
+@@ -148,23 +150,107 @@ static int check(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_match *pm;
++	struct xt_helper_info *pinfo;
++	struct compat_xt_helper_info info;
++	u_int16_t msize;
++
++	pm = (struct ipt_entry_match *)match;
++	msize = pm->u.user.match_size;
++	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++		return -EFAULT;
++	pinfo = (struct xt_helper_info *)pm->data;
++	memset(&info, 0, sizeof(struct compat_xt_helper_info));
++	info.invert = pinfo->invert;
++	memcpy(info.name, pinfo->name, 30);
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++				&info, sizeof(struct compat_xt_helper_info)))
++		return -EFAULT;
++	msize -= off;
++	if (put_user(msize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat_from_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_match *pm;
++	struct ipt_entry_match *dstpm;
++	struct compat_xt_helper_info *pinfo;
++	struct xt_helper_info info;
++	u_int16_t msize;
++
++	pm = (struct compat_ipt_entry_match *)match;
++	dstpm = (struct ipt_entry_match *)*dstptr;
++	msize = pm->u.user.match_size;
++	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++	pinfo = (struct compat_xt_helper_info *)pm->data;
++	memset(&info, 0, sizeof(struct xt_helper_info));
++	info.invert = pinfo->invert;
++	memcpy(info.name, pinfo->name, 30);
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++				&info, sizeof(struct xt_helper_info));
++	msize += off;
++	dstpm->u.user.match_size = msize;
++	*size += off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat(void *match, void **dstptr, int *size, int convert)
++{
++	int ret, off;
++
++	off = XT_ALIGN(sizeof(struct xt_helper_info)) -
++		COMPAT_XT_ALIGN(sizeof(struct compat_xt_helper_info));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = compat_to_user(match, dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = compat_from_user(match, dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ static struct xt_match helper_match = {
+ 	.name		= "helper",
+ 	.match		= &match,
+ 	.checkentry	= &check,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ static struct xt_match helper6_match = {
+ 	.name		= "helper",
+ 	.match		= &match,
+ 	.checkentry	= &check,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_xt_helper(void)
+ {
+ 	int ret;
+-	need_conntrack();
+ 
+ 	ret = xt_register_match(AF_INET, &helper_match);
+ 	if (ret < 0)
+@@ -177,12 +263,35 @@ static int __init init(void)
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_xt_helper(void)
+ {
+ 	xt_unregister_match(AF_INET, &helper_match);
+ 	xt_unregister_match(AF_INET6, &helper6_match);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	need_conntrack();
++	err = init_xt_helper();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_xt_helper);
++	KSYMRESOLVE(fini_xt_helper);
++	KSYMMODRESOLVE(xt_helper);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(xt_helper);
++	KSYMUNRESOLVE(init_xt_helper);
++	KSYMUNRESOLVE(fini_xt_helper);
++	fini_xt_helper();
++}
++
+ module_init(init);
+ module_exit(fini);
+ 
+diff -upr linux-2.6.16.orig/net/netfilter/xt_length.c linux-2.6.16-026test015/net/netfilter/xt_length.c
+--- linux-2.6.16.orig/net/netfilter/xt_length.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/xt_length.c	2006-07-04 14:41:39.000000000 +0400
+@@ -13,6 +13,7 @@
+ 
+ #include <linux/netfilter/xt_length.h>
+ #include <linux/netfilter/x_tables.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+ MODULE_DESCRIPTION("IP tables packet length matching module");
+@@ -63,20 +64,38 @@ checkentry(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = XT_ALIGN(sizeof(struct xt_length_info)) -
++		COMPAT_XT_ALIGN(sizeof(struct xt_length_info));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct xt_match length_match = {
+ 	.name		= "length",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ static struct xt_match length6_match = {
+ 	.name		= "length",
+ 	.match		= &match6,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_xt_length(void)
+ {
+ 	int ret;
+ 	ret = xt_register_match(AF_INET, &length_match);
+@@ -89,11 +108,33 @@ static int __init init(void)
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_xt_length(void)
+ {
+ 	xt_unregister_match(AF_INET, &length_match);
+ 	xt_unregister_match(AF_INET6, &length6_match);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_xt_length();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_xt_length);
++	KSYMRESOLVE(fini_xt_length);
++	KSYMMODRESOLVE(xt_length);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(xt_length);
++	KSYMUNRESOLVE(init_xt_length);
++	KSYMUNRESOLVE(fini_xt_length);
++	fini_xt_length();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/netfilter/xt_limit.c linux-2.6.16-026test015/net/netfilter/xt_limit.c
+--- linux-2.6.16.orig/net/netfilter/xt_limit.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/xt_limit.c	2006-07-04 14:41:39.000000000 +0400
+@@ -17,9 +17,11 @@
+ #include <linux/skbuff.h>
+ #include <linux/spinlock.h>
+ #include <linux/interrupt.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter/xt_limit.h>
++#include <linux/netfilter_ipv4/ip_tables.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
+@@ -27,6 +29,13 @@ MODULE_DESCRIPTION("iptables rate limit 
+ MODULE_ALIAS("ipt_limit");
+ MODULE_ALIAS("ip6t_limit");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ipt_limit_reg	(*(get_exec_env()->_ipt_limit_reg))
++#else
++#define ve_ipt_limit_reg	ipt_limit_reg
++#endif
++
+ /* The algorithm used is the Simple Token Bucket Filter (TBF)
+  * see net/sched/sch_tbf.c in the linux source tree
+  */
+@@ -137,20 +146,108 @@ ipt_limit_checkentry(const char *tablena
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int ipt_limit_compat_to_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_match *pm;
++	struct xt_rateinfo *pinfo;
++	struct compat_xt_rateinfo rinfo;
++	u_int16_t msize;
++
++	pm = (struct ipt_entry_match *)match;
++	msize = pm->u.user.match_size;
++	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++		return -EFAULT;
++	pinfo = (struct xt_rateinfo *)pm->data;
++	memset(&rinfo, 0, sizeof(struct compat_xt_rateinfo));
++	rinfo.avg = pinfo->avg;
++	rinfo.burst = pinfo->burst;
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++				&rinfo, sizeof(struct compat_xt_rateinfo)))
++		return -EFAULT;
++	msize -= off;
++	if (put_user(msize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int ipt_limit_compat_from_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_match *pm;
++	struct ipt_entry_match *dstpm;
++	struct compat_xt_rateinfo *pinfo;
++	struct xt_rateinfo rinfo;
++	u_int16_t msize;
++
++	pm = (struct compat_ipt_entry_match *)match;
++	dstpm = (struct ipt_entry_match *)*dstptr;
++	msize = pm->u.user.match_size;
++	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++	pinfo = (struct compat_xt_rateinfo *)pm->data;
++	memset(&rinfo, 0, sizeof(struct xt_rateinfo));
++	rinfo.avg = pinfo->avg;
++	rinfo.burst = pinfo->burst;
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++				&rinfo, sizeof(struct xt_rateinfo));
++	msize += off;
++	dstpm->u.user.match_size = msize;
++	*size += off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int ipt_limit_compat(void *match, void **dstptr,
++		int *size, int convert)
++{
++	int ret, off;
++
++	off = XT_ALIGN(sizeof(struct xt_rateinfo)) -
++		COMPAT_XT_ALIGN(sizeof(struct compat_xt_rateinfo));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = ipt_limit_compat_to_user(match,
++					dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = ipt_limit_compat_from_user(match,
++					dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ static struct xt_match ipt_limit_reg = {
+ 	.name		= "limit",
+ 	.match		= ipt_limit_match,
+ 	.checkentry	= ipt_limit_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= ipt_limit_compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ static struct xt_match limit6_reg = {
+ 	.name		= "limit",
+ 	.match		= ipt_limit_match,
+ 	.checkentry	= ipt_limit_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= ipt_limit_compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_xt_limit(void)
+ {
+ 	int ret;
+ 	
+@@ -165,11 +262,33 @@ static int __init init(void)
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_xt_limit(void)
+ {
+ 	xt_unregister_match(AF_INET, &ipt_limit_reg);
+ 	xt_unregister_match(AF_INET6, &limit6_reg);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_xt_limit();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_xt_limit);
++	KSYMRESOLVE(fini_xt_limit);
++	KSYMMODRESOLVE(xt_limit);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(xt_limit);
++	KSYMUNRESOLVE(init_xt_limit);
++	KSYMUNRESOLVE(fini_xt_limit);
++	fini_xt_limit();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/netfilter/xt_sctp.c linux-2.6.16-026test015/net/netfilter/xt_sctp.c
+--- linux-2.6.16.orig/net/netfilter/xt_sctp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/xt_sctp.c	2006-07-04 14:41:36.000000000 +0400
+@@ -62,7 +62,7 @@ match_packet(const struct sk_buff *skb,
+ 
+ 	do {
+ 		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
+-		if (sch == NULL) {
++		if (sch == NULL || sch->length == 0) {
+ 			duprintf("Dropping invalid SCTP packet.\n");
+ 			*hotdrop = 1;
+ 			return 0;
+diff -upr linux-2.6.16.orig/net/netfilter/xt_state.c linux-2.6.16-026test015/net/netfilter/xt_state.c
+--- linux-2.6.16.orig/net/netfilter/xt_state.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/xt_state.c	2006-07-04 14:41:39.000000000 +0400
+@@ -10,9 +10,11 @@
+ 
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ #include <net/netfilter/nf_conntrack_compat.h>
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter/xt_state.h>
++#include <linux/netfilter_ipv4/ip_tables.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+@@ -55,10 +57,90 @@ static int check(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_match *pm;
++	struct xt_state_info *pinfo;
++	struct compat_xt_state_info info;
++	u_int16_t msize;
++
++	pm = (struct ipt_entry_match *)match;
++	msize = pm->u.user.match_size;
++	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++		return -EFAULT;
++	pinfo = (struct xt_state_info *)pm->data;
++	memset(&info, 0, sizeof(struct compat_xt_state_info));
++	info.statemask = pinfo->statemask;
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++				&info, sizeof(struct compat_xt_state_info)))
++		return -EFAULT;
++	msize -= off;
++	if (put_user(msize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat_from_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_match *pm;
++	struct ipt_entry_match *dstpm;
++	struct compat_xt_state_info *pinfo;
++	struct xt_state_info info;
++	u_int16_t msize;
++
++	pm = (struct compat_ipt_entry_match *)match;
++	dstpm = (struct ipt_entry_match *)*dstptr;
++	msize = pm->u.user.match_size;
++	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++	pinfo = (struct compat_xt_state_info *)pm->data;
++	memset(&info, 0, sizeof(struct xt_state_info));
++	info.statemask = pinfo->statemask;
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++				&info, sizeof(struct xt_state_info));
++	msize += off;
++	dstpm->u.user.match_size = msize;
++	*size += off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat(void *match, void **dstptr, int *size, int convert)
++{
++	int ret, off;
++
++	off = XT_ALIGN(sizeof(struct xt_state_info)) -
++		COMPAT_XT_ALIGN(sizeof(struct compat_xt_state_info));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = compat_to_user(match, dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = compat_from_user(match, dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ static struct xt_match state_match = {
+ 	.name		= "state",
+ 	.match		= &match,
+ 	.checkentry	= &check,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+@@ -66,15 +148,16 @@ static struct xt_match state6_match = {
+ 	.name		= "state",
+ 	.match		= &match,
+ 	.checkentry	= &check,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_xt_state(void)
+ {
+ 	int ret;
+ 
+-	need_conntrack();
+-
+ 	ret = xt_register_match(AF_INET, &state_match);
+ 	if (ret < 0)
+ 		return ret;
+@@ -86,11 +169,34 @@ static int __init init(void)
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_xt_state(void)
+ {
+ 	xt_unregister_match(AF_INET, &state_match);
+ 	xt_unregister_match(AF_INET6, &state6_match);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	need_conntrack();
++	err = init_xt_state();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_xt_state);
++	KSYMRESOLVE(fini_xt_state);
++	KSYMMODRESOLVE(xt_state);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(xt_state);
++	KSYMUNRESOLVE(init_xt_state);
++	KSYMUNRESOLVE(fini_xt_state);
++	fini_xt_state();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/netfilter/xt_tcpmss.c linux-2.6.16-026test015/net/netfilter/xt_tcpmss.c
+--- linux-2.6.16.orig/net/netfilter/xt_tcpmss.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/xt_tcpmss.c	2006-07-04 14:41:39.000000000 +0400
+@@ -11,6 +11,7 @@
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter/xt_tcpmss.h>
+ #include <linux/netfilter/x_tables.h>
+@@ -133,10 +134,25 @@ checkentry6(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = XT_ALIGN(sizeof(struct xt_tcpmss_match_info)) -
++		COMPAT_XT_ALIGN(sizeof(struct xt_tcpmss_match_info));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct xt_match tcpmss_match = {
+ 	.name		= "tcpmss",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+@@ -144,11 +160,14 @@ static struct xt_match tcpmss6_match = {
+ 	.name		= "tcpmss",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry6,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
+ 
+-static int __init init(void)
++int init_xt_tcpmss(void)
+ {
+ 	int ret;
+ 	ret = xt_register_match(AF_INET, &tcpmss_match);
+@@ -162,11 +181,33 @@ static int __init init(void)
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_xt_tcpmss(void)
+ {
+ 	xt_unregister_match(AF_INET6, &tcpmss6_match);
+ 	xt_unregister_match(AF_INET, &tcpmss_match);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_xt_tcpmss();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_xt_tcpmss);
++	KSYMRESOLVE(fini_xt_tcpmss);
++	KSYMMODRESOLVE(xt_tcpmss);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(xt_tcpmss);
++	KSYMUNRESOLVE(init_xt_tcpmss);
++	KSYMUNRESOLVE(fini_xt_tcpmss);
++	fini_xt_tcpmss();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/netfilter/xt_tcpudp.c linux-2.6.16-026test015/net/netfilter/xt_tcpudp.c
+--- linux-2.6.16.orig/net/netfilter/xt_tcpudp.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netfilter/xt_tcpudp.c	2006-07-04 14:41:39.000000000 +0400
+@@ -5,6 +5,7 @@
+ #include <net/ipv6.h>
+ #include <net/tcp.h>
+ #include <net/udp.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter/xt_tcpudp.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -266,10 +267,35 @@ udp6_checkentry(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int tcp_compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = XT_ALIGN(sizeof(struct xt_tcp)) -
++		COMPAT_XT_ALIGN(sizeof(struct xt_tcp));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++
++static int udp_compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = XT_ALIGN(sizeof(struct xt_udp)) -
++		COMPAT_XT_ALIGN(sizeof(struct xt_udp));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct xt_match tcp_matchstruct = {
+ 	.name		= "tcp",
+ 	.match		= &tcp_match,
+ 	.checkentry	= &tcp_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &tcp_compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ static struct xt_match tcp6_matchstruct = {
+@@ -283,6 +309,9 @@ static struct xt_match udp_matchstruct =
+ 	.name		= "udp",
+ 	.match		= &udp_match,
+ 	.checkentry	= &udp_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &udp_compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ static struct xt_match udp6_matchstruct = {
+@@ -292,7 +321,7 @@ static struct xt_match udp6_matchstruct 
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++int init_xt_tcpudp(void)
+ {
+ 	int ret;
+ 	ret = xt_register_match(AF_INET, &tcp_matchstruct);
+@@ -322,7 +351,7 @@ out_unreg_tcp:
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_xt_tcpudp(void)
+ {
+ 	xt_unregister_match(AF_INET6, &udp6_matchstruct);
+ 	xt_unregister_match(AF_INET, &udp_matchstruct);
+@@ -330,5 +359,27 @@ static void __exit fini(void)
+ 	xt_unregister_match(AF_INET, &tcp_matchstruct);
+ }
+ 
++static int __init init(void)
++{
++	int err;
++
++	err = init_xt_tcpudp();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_xt_tcpudp);
++	KSYMRESOLVE(fini_xt_tcpudp);
++	KSYMMODRESOLVE(xt_tcpudp);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(xt_tcpudp);
++	KSYMUNRESOLVE(init_xt_tcpudp);
++	KSYMUNRESOLVE(fini_xt_tcpudp);
++	fini_xt_tcpudp();
++}
++
+ module_init(init);
+ module_exit(fini);
+diff -upr linux-2.6.16.orig/net/netlink/af_netlink.c linux-2.6.16-026test015/net/netlink/af_netlink.c
+--- linux-2.6.16.orig/net/netlink/af_netlink.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/netlink/af_netlink.c	2006-07-04 14:41:39.000000000 +0400
+@@ -60,27 +60,14 @@
+ #include <net/sock.h>
+ #include <net/scm.h>
+ #include <net/netlink.h>
++#include <net/netlink_sock.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_net.h>
+ 
+ #define Nprintk(a...)
+ #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
+ 
+-struct netlink_sock {
+-	/* struct sock has to be the first member of netlink_sock */
+-	struct sock		sk;
+-	u32			pid;
+-	u32			dst_pid;
+-	u32			dst_group;
+-	u32			flags;
+-	u32			subscriptions;
+-	u32			ngroups;
+-	unsigned long		*groups;
+-	unsigned long		state;
+-	wait_queue_head_t	wait;
+-	struct netlink_callback	*cb;
+-	spinlock_t		cb_lock;
+-	void			(*data_ready)(struct sock *sk, int bytes);
+-	struct module		*module;
+-};
+ 
+ #define NETLINK_KERNEL_SOCKET	0x1
+ #define NETLINK_RECV_PKTINFO	0x2
+@@ -209,7 +196,10 @@ static __inline__ struct sock *netlink_l
+ 	read_lock(&nl_table_lock);
+ 	head = nl_pid_hashfn(hash, pid);
+ 	sk_for_each(sk, node, head) {
+-		if (nlk_sk(sk)->pid == pid) {
++		/* VEs should find sockets, created by kernel */
++		if ((nlk_sk(sk)->pid == pid) &&
++				(!pid || ve_accessible_strict(VE_OWNER_SK(sk),
++							      get_exec_env()))){
+ 			sock_hold(sk);
+ 			goto found;
+ 		}
+@@ -309,7 +299,9 @@ static int netlink_insert(struct sock *s
+ 	head = nl_pid_hashfn(hash, pid);
+ 	len = 0;
+ 	sk_for_each(osk, node, head) {
+-		if (nlk_sk(osk)->pid == pid)
++		if ((nlk_sk(sk)->pid == pid) &&
++				ve_accessible_strict(VE_OWNER_SK(sk),
++					get_exec_env()))
+ 			break;
+ 		len++;
+ 	}
+@@ -362,6 +354,8 @@ static int __netlink_create(struct socke
+ 	sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
+ 	if (!sk)
+ 		return -ENOMEM;
++	if (ub_other_sock_charge(sk))
++		goto out_free;
+ 
+ 	sock_init_data(sock, sk);
+ 
+@@ -372,6 +366,10 @@ static int __netlink_create(struct socke
+ 	sk->sk_destruct = netlink_sock_destruct;
+ 	sk->sk_protocol = protocol;
+ 	return 0;
++
++out_free:
++	sk_free(sk);
++	return -ENOMEM;
+ }
+ 
+ static int netlink_create(struct socket *sock, int protocol)
+@@ -425,6 +423,7 @@ static int netlink_release(struct socket
+ 		return 0;
+ 
+ 	netlink_remove(sk);
++	sock_orphan(sk);
+ 	nlk = nlk_sk(sk);
+ 
+ 	spin_lock(&nlk->cb_lock);
+@@ -439,7 +438,6 @@ static int netlink_release(struct socket
+ 	/* OK. Socket is unlinked, and, therefore,
+ 	   no new packets will arrive */
+ 
+-	sock_orphan(sk);
+ 	sock->sk = NULL;
+ 	wake_up_interruptible_all(&nlk->wait);
+ 
+@@ -477,7 +475,7 @@ static int netlink_autobind(struct socke
+ 	struct hlist_head *head;
+ 	struct sock *osk;
+ 	struct hlist_node *node;
+-	s32 pid = current->tgid;
++	s32 pid = virt_pid(current);
+ 	int err;
+ 	static s32 rover = -4097;
+ 
+@@ -486,7 +484,9 @@ retry:
+ 	netlink_table_grab();
+ 	head = nl_pid_hashfn(hash, pid);
+ 	sk_for_each(osk, node, head) {
+-		if (nlk_sk(osk)->pid == pid) {
++		if ((nlk_sk(osk)->pid == pid) &&
++				ve_accessible_strict(VE_OWNER_SK(osk),
++					get_exec_env())) {
+ 			/* Bind collision, search negative pid values. */
+ 			pid = rover--;
+ 			if (rover > -4097)
+@@ -511,7 +511,7 @@ retry:
+ static inline int netlink_capable(struct socket *sock, unsigned int flag) 
+ { 
+ 	return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
+-	       capable(CAP_NET_ADMIN);
++	       capable(CAP_VE_NET_ADMIN);
+ } 
+ 
+ static void
+@@ -845,6 +845,9 @@ static inline int do_one_broadcast(struc
+ 	    !test_bit(p->group - 1, nlk->groups))
+ 		goto out;
+ 
++	if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
++		goto out;
++
+ 	if (p->failure) {
+ 		netlink_overrun(sk);
+ 		goto out;
+@@ -942,6 +945,9 @@ static inline int do_one_set_err(struct 
+ 	    !test_bit(p->group - 1, nlk->groups))
+ 		goto out;
+ 
++	if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
++		goto out;
++
+ 	sk->sk_err = p->code;
+ 	sk->sk_error_report(sk);
+ out:
+@@ -1076,12 +1082,17 @@ static int netlink_sendmsg(struct kiocb 
+ 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+ 	struct sock *sk = sock->sk;
+ 	struct netlink_sock *nlk = nlk_sk(sk);
+-	struct sockaddr_nl *addr=msg->msg_name;
++	struct sockaddr_nl *addr = msg->msg_name;
+ 	u32 dst_pid;
+-	u32 dst_group;
+ 	struct sk_buff *skb;
+ 	int err;
+ 	struct scm_cookie scm;
++	struct sock *dstsk;
++	long timeo;
++	int no_ubc, no_buf;
++	unsigned long chargesize;
++
++	DECLARE_WAITQUEUE(wait, current);
+ 
+ 	if (msg->msg_flags&MSG_OOB)
+ 		return -EOPNOTSUPP;
+@@ -1092,17 +1103,16 @@ static int netlink_sendmsg(struct kiocb 
+ 	if (err < 0)
+ 		return err;
+ 
++	/* Broadcasts from user to kernel are disabled. This is OK
++	 * according to ANK */
+ 	if (msg->msg_namelen) {
+ 		if (addr->nl_family != AF_NETLINK)
+ 			return -EINVAL;
+ 		dst_pid = addr->nl_pid;
+-		dst_group = ffs(addr->nl_groups);
+-		if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
++		if (addr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+ 			return -EPERM;
+-	} else {
++	} else
+ 		dst_pid = nlk->dst_pid;
+-		dst_group = nlk->dst_group;
+-	}
+ 
+ 	if (!nlk->pid) {
+ 		err = netlink_autobind(sock);
+@@ -1115,12 +1125,12 @@ static int netlink_sendmsg(struct kiocb 
+ 		goto out;
+ 	err = -ENOBUFS;
+ 	skb = alloc_skb(len, GFP_KERNEL);
+-	if (skb==NULL)
++	if (skb == NULL)
+ 		goto out;
+ 
+ 	NETLINK_CB(skb).pid	= nlk->pid;
+ 	NETLINK_CB(skb).dst_pid = dst_pid;
+-	NETLINK_CB(skb).dst_group = dst_group;
++	NETLINK_CB(skb).dst_group = 0;
+ 	NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
+ 	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+ 
+@@ -1131,25 +1141,88 @@ static int netlink_sendmsg(struct kiocb 
+ 	 */
+ 
+ 	err = -EFAULT;
+-	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
+-		kfree_skb(skb);
+-		goto out;
+-	}
++	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
++		goto out_free;
+ 
+ 	err = security_netlink_send(sk, skb);
+-	if (err) {
+-		kfree_skb(skb);
+-		goto out;
++	if (err)
++		goto out_free;
++
++	timeo = sock_sndtimeo(sk, msg->msg_flags&MSG_DONTWAIT);
++retry:
++	dstsk = netlink_getsockbypid(sk, dst_pid);
++	if (IS_ERR(dstsk)) {
++		err = PTR_ERR(dstsk);
++		goto out_free;
+ 	}
+ 
+-	if (dst_group) {
+-		atomic_inc(&skb->users);
+-		netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
++	nlk = nlk_sk(dstsk);
++#ifdef NL_EMULATE_DEV
++	if (nlk->handler) {
++		skb_orphan(skb);
++		err = nlk->handler(protocol, skb);
++		goto out_put;
++	}
++#endif
++
++	/* BTW, it could be done once, before the retry loop */
++	chargesize = skb_charge_fullsize(skb);
++	no_ubc = ub_sock_getwres_other(sk, chargesize);
++	no_buf = atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
++		test_bit(0, &nlk->state);
++	if (no_ubc || no_buf) {
++		wait_queue_head_t *sleep;
++
++		if (!no_ubc)
++			ub_sock_retwres_other(sk, chargesize,
++					      SOCK_MIN_UBCSPACE_CH);
++		err = -EAGAIN;
++		if (timeo == 0) {
++			kfree_skb(skb);
++			goto out_put;
++		}
++
++		/* wake up comes to different queues */
++		sleep = no_ubc ? sk->sk_sleep : &nlk->wait;
++		__set_current_state(TASK_INTERRUPTIBLE);
++		add_wait_queue(sleep, &wait);
++
++		/* this if can't be moved upper because ub_sock_snd_queue_add()
++		 * may change task state to TASK_RUNNING */
++		if (no_ubc)
++			ub_sock_sndqueueadd_other(sk, chargesize);
++
++		if ((atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
++		     test_bit(0, &nlk->state) || no_ubc) &&
++		    !sock_flag(dstsk, SOCK_DEAD))
++			timeo = schedule_timeout(timeo);
++
++		__set_current_state(TASK_RUNNING);
++		remove_wait_queue(sleep, &wait);
++		if (no_ubc)
++			ub_sock_sndqueuedel(sk);
++		sock_put(dstsk);
++
++		if (!signal_pending(current))
++			goto retry;
++		err = sock_intr_errno(timeo);
++		goto out_free;
+ 	}
+-	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+ 
++	skb_orphan(skb);
++	skb_set_owner_r(skb, dstsk);
++	ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
++	skb_queue_tail(&dstsk->sk_receive_queue, skb);
++	dstsk->sk_data_ready(dstsk, len);
++	err = len;
++out_put:
++	sock_put(dstsk);
+ out:
+ 	return err;
++
++out_free:
++	kfree_skb(skb);
++	return err;
+ }
+ 
+ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
+@@ -1303,6 +1376,10 @@ static int netlink_dump(struct sock *sk)
+ 	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
+ 	if (!skb)
+ 		return -ENOBUFS;
++	if (ub_nlrcvbuf_charge(skb, sk) < 0) {
++		kfree_skb(skb);
++		return -EACCES;
++	}
+ 
+ 	spin_lock(&nlk->cb_lock);
+ 
+@@ -1365,9 +1442,9 @@ int netlink_dump_start(struct sock *ssk,
+ 		return -ECONNREFUSED;
+ 	}
+ 	nlk = nlk_sk(sk);
+-	/* A dump is in progress... */
++	/* A dump or destruction is in progress... */
+ 	spin_lock(&nlk->cb_lock);
+-	if (nlk->cb) {
++	if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
+ 		spin_unlock(&nlk->cb_lock);
+ 		netlink_destroy_callback(cb);
+ 		sock_put(sk);
+@@ -1471,8 +1548,15 @@ void netlink_run_queue(struct sock *sk, 
+ 		*qlen = skb_queue_len(&sk->sk_receive_queue);
+ 
+ 	for (; *qlen; (*qlen)--) {
++		int ret;
++		struct ve_struct *old_env;
+ 		skb = skb_dequeue(&sk->sk_receive_queue);
+-		if (netlink_rcv_skb(skb, cb)) {
++
++		old_env = set_exec_env(VE_OWNER_SKB(skb));
++		ret = netlink_rcv_skb(skb, cb);
++		(void)set_exec_env(old_env);
++
++		if (ret) {
+ 			if (skb->len)
+ 				skb_queue_head(&sk->sk_receive_queue, skb);
+ 			else {
+@@ -1740,6 +1824,7 @@ enomem:
+ 
+ 	sock_register(&netlink_family_ops);
+ #ifdef CONFIG_PROC_FS
++	/* FIXME: virtualize before give access from VEs */
+ 	proc_net_fops_create("netlink", 0, &netlink_seq_fops);
+ #endif
+ 	/* The netlink device handler may be needed early. */ 
+diff -upr linux-2.6.16.orig/net/packet/af_packet.c linux-2.6.16-026test015/net/packet/af_packet.c
+--- linux-2.6.16.orig/net/packet/af_packet.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/packet/af_packet.c	2006-07-04 14:41:38.000000000 +0400
+@@ -79,6 +79,8 @@
+ #include <linux/module.h>
+ #include <linux/init.h>
+ 
++#include <ub/ub_net.h>
++
+ #ifdef CONFIG_INET
+ #include <net/inet_common.h>
+ #endif
+@@ -280,7 +282,8 @@ static int packet_rcv_spkt(struct sk_buf
+ 	 *	so that this procedure is noop.
+ 	 */
+ 
+-	if (skb->pkt_type == PACKET_LOOPBACK)
++	if (skb->pkt_type == PACKET_LOOPBACK ||
++	    !ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
+ 		goto out;
+ 
+ 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+@@ -472,6 +475,9 @@ static int packet_rcv(struct sk_buff *sk
+ 	sk = pt->af_packet_priv;
+ 	po = pkt_sk(sk);
+ 
++	if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
++		goto drop;
++
+ 	skb->dev = dev;
+ 
+ 	if (dev->hard_header) {
+@@ -531,6 +537,9 @@ static int packet_rcv(struct sk_buff *sk
+ 	if (pskb_trim(skb, snaplen))
+ 		goto drop_n_acct;
+ 
++	if (ub_sockrcvbuf_charge(sk, skb))
++		goto drop_n_acct;
++
+ 	skb_set_owner_r(skb, sk);
+ 	skb->dev = NULL;
+ 	dst_release(skb->dst);
+@@ -581,6 +590,9 @@ static int tpacket_rcv(struct sk_buff *s
+ 	sk = pt->af_packet_priv;
+ 	po = pkt_sk(sk);
+ 
++	if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
++		goto drop;
++
+ 	if (dev->hard_header) {
+ 		if (sk->sk_type != SOCK_DGRAM)
+ 			skb_push(skb, skb->data - skb->mac.raw);
+@@ -630,6 +642,12 @@ static int tpacket_rcv(struct sk_buff *s
+ 	if (snaplen > skb->len-skb->data_len)
+ 		snaplen = skb->len-skb->data_len;
+ 
++	if (copy_skb &&
++	    ub_sockrcvbuf_charge(sk, copy_skb)) {
++		spin_lock(&sk->sk_receive_queue.lock);
++		goto ring_is_full;
++	}
++
+ 	spin_lock(&sk->sk_receive_queue.lock);
+ 	h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
+ 	
+@@ -1010,6 +1028,8 @@ static int packet_create(struct socket *
+ 	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
+ 	if (sk == NULL)
+ 		goto out;
++	if (ub_other_sock_charge(sk))
++		goto out_free;
+ 
+ 	sock->ops = &packet_ops;
+ #ifdef CONFIG_SOCK_PACKET
+@@ -1048,6 +1068,9 @@ static int packet_create(struct socket *
+ 	sk_add_node(sk, &packet_sklist);
+ 	write_unlock_bh(&packet_sklist_lock);
+ 	return(0);
++
++out_free:
++	sk_free(sk);
+ out:
+ 	return err;
+ }
+@@ -1430,11 +1453,16 @@ static int packet_notifier(struct notifi
+ 	struct sock *sk;
+ 	struct hlist_node *node;
+ 	struct net_device *dev = (struct net_device*)data;
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	read_lock(&packet_sklist_lock);
+ 	sk_for_each(sk, node, &packet_sklist) {
+ 		struct packet_sock *po = pkt_sk(sk);
+ 
++		if (!ve_accessible_strict(VE_OWNER_SK(sk), ve))
++			continue;
++
+ 		switch (msg) {
+ 		case NETDEV_UNREGISTER:
+ #ifdef CONFIG_PACKET_MULTICAST
+@@ -1845,6 +1873,8 @@ static inline struct sock *packet_seq_id
+ 	struct hlist_node *node;
+ 
+ 	sk_for_each(s, node, &packet_sklist) {
++		if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
++			continue;
+ 		if (!off--)
+ 			return s;
+ 	}
+@@ -1860,9 +1890,13 @@ static void *packet_seq_start(struct seq
+ static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ 	++*pos;
+-	return  (v == SEQ_START_TOKEN) 
+-		? sk_head(&packet_sklist) 
+-		: sk_next((struct sock*)v) ;
++	do {
++		v = (v == SEQ_START_TOKEN) 
++			? sk_head(&packet_sklist) 
++			: sk_next((struct sock*)v);
++	} while (v != NULL &&
++		!ve_accessible(VE_OWNER_SK((struct sock*)v), get_exec_env()));	
++	return v;
+ }
+ 
+ static void packet_seq_stop(struct seq_file *seq, void *v)
+@@ -1918,7 +1952,7 @@ static struct file_operations packet_seq
+ 
+ static void __exit packet_exit(void)
+ {
+-	proc_net_remove("packet");
++	remove_proc_glob_entry("net/packet", NULL);
+ 	unregister_netdevice_notifier(&packet_netdev_notifier);
+ 	sock_unregister(PF_PACKET);
+ 	proto_unregister(&packet_proto);
+@@ -1933,7 +1967,7 @@ static int __init packet_init(void)
+ 
+ 	sock_register(&packet_family_ops);
+ 	register_netdevice_notifier(&packet_netdev_notifier);
+-	proc_net_fops_create("packet", 0, &packet_seq_fops);
++	proc_glob_fops_create("net/packet", 0, &packet_seq_fops);
+ out:
+ 	return rc;
+ }
+diff -upr linux-2.6.16.orig/net/sched/sch_cbq.c linux-2.6.16-026test015/net/sched/sch_cbq.c
+--- linux-2.6.16.orig/net/sched/sch_cbq.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sched/sch_cbq.c	2006-07-04 14:41:37.000000000 +0400
+@@ -932,8 +932,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int 
+ 
+ 			if (cl->deficit <= 0) {
+ 				q->active[prio] = cl;
+-				cl = cl->next_alive;
+ 				cl->deficit += cl->quantum;
++				cl = cl->next_alive;
+ 			}
+ 			return skb;
+ 
+@@ -1109,17 +1109,19 @@ static void cbq_normalize_quanta(struct 
+ 
+ 	for (h=0; h<16; h++) {
+ 		for (cl = q->classes[h]; cl; cl = cl->next) {
++			long mtu;
+ 			/* BUGGGG... Beware! This expression suffer of
+ 			   arithmetic overflows!
+ 			 */
+ 			if (cl->priority == prio) {
+-				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
+-					q->quanta[prio];
+-			}
+-			if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
+-				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
+-				cl->quantum = cl->qdisc->dev->mtu/2 + 1;
++				cl->quantum = (cl->weight * cl->allot) /
++					(q->quanta[prio] / q->nclasses[prio]);
+ 			}
++			mtu = cl->qdisc->dev->mtu;
++			if (cl->quantum <= mtu/2)
++				cl->quantum = mtu/2 + 1;
++			else if (cl->quantum > 32*mtu) 
++				cl->quantum = 32*mtu;
+ 		}
+ 	}
+ }
+diff -upr linux-2.6.16.orig/net/sched/sch_generic.c linux-2.6.16-026test015/net/sched/sch_generic.c
+--- linux-2.6.16.orig/net/sched/sch_generic.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sched/sch_generic.c	2006-07-04 14:41:38.000000000 +0400
+@@ -97,6 +97,7 @@ int qdisc_restart(struct net_device *dev
+ 
+ 	/* Dequeue packet */
+ 	if ((skb = q->dequeue(q)) != NULL) {
++		struct ve_struct *envid;
+ 		unsigned nolock = (dev->features & NETIF_F_LLTX);
+ 		/*
+ 		 * When the driver has LLTX set it does its own locking
+@@ -107,6 +108,7 @@ int qdisc_restart(struct net_device *dev
+ 		 * of lock congestion it should return -1 and the packet
+ 		 * will be requeued.
+ 		 */
++		envid = set_exec_env(VE_OWNER_SKB(skb));
+ 		if (!nolock) {
+ 			if (!spin_trylock(&dev->xmit_lock)) {
+ 			collision:
+@@ -121,6 +123,7 @@ int qdisc_restart(struct net_device *dev
+ 					kfree_skb(skb);
+ 					if (net_ratelimit())
+ 						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
++					(void)set_exec_env(envid);
+ 					return -1;
+ 				}
+ 				__get_cpu_var(netdev_rx_stat).cpu_collision++;
+@@ -146,6 +149,7 @@ int qdisc_restart(struct net_device *dev
+ 						spin_unlock(&dev->xmit_lock);
+ 					}
+ 					spin_lock(&dev->queue_lock);
++					(void)set_exec_env(envid);
+ 					return -1;
+ 				}
+ 				if (ret == NETDEV_TX_LOCKED && nolock) {
+@@ -177,6 +181,7 @@ int qdisc_restart(struct net_device *dev
+ requeue:
+ 		q->ops->requeue(skb, q);
+ 		netif_schedule(dev);
++		(void)set_exec_env(envid);
+ 		return 1;
+ 	}
+ 	BUG_ON((int) q->q.qlen < 0);
+@@ -625,3 +630,4 @@ EXPORT_SYMBOL(qdisc_reset);
+ EXPORT_SYMBOL(qdisc_restart);
+ EXPORT_SYMBOL(qdisc_lock_tree);
+ EXPORT_SYMBOL(qdisc_unlock_tree);
++EXPORT_SYMBOL(dev_shutdown);
+diff -upr linux-2.6.16.orig/net/sched/sch_teql.c linux-2.6.16-026test015/net/sched/sch_teql.c
+--- linux-2.6.16.orig/net/sched/sch_teql.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sched/sch_teql.c	2006-07-04 14:41:38.000000000 +0400
+@@ -189,6 +189,9 @@ static int teql_qdisc_init(struct Qdisc 
+ 	struct teql_master *m = (struct teql_master*)sch->ops;
+ 	struct teql_sched_data *q = qdisc_priv(sch);
+ 
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
+ 	if (dev->hard_header_len > m->dev->hard_header_len)
+ 		return -EINVAL;
+ 
+diff -upr linux-2.6.16.orig/net/sctp/inqueue.c linux-2.6.16-026test015/net/sctp/inqueue.c
+--- linux-2.6.16.orig/net/sctp/inqueue.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sctp/inqueue.c	2006-07-04 14:41:36.000000000 +0400
+@@ -149,6 +149,7 @@ struct sctp_chunk *sctp_inq_pop(struct s
+ 		/* This is the first chunk in the packet.  */
+ 		chunk->singleton = 1;
+ 		ch = (sctp_chunkhdr_t *) chunk->skb->data;
++		chunk->data_accepted = 0;
+ 	}
+ 
+         chunk->chunk_hdr = ch;
+diff -upr linux-2.6.16.orig/net/sctp/sm_statefuns.c linux-2.6.16-026test015/net/sctp/sm_statefuns.c
+--- linux-2.6.16.orig/net/sctp/sm_statefuns.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sctp/sm_statefuns.c	2006-07-04 14:41:36.000000000 +0400
+@@ -636,8 +636,9 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(co
+ 	 */
+         chunk->subh.cookie_hdr =
+ 		(struct sctp_signed_cookie *)chunk->skb->data;
+-	skb_pull(chunk->skb,
+-		 ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t));
++	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
++					 sizeof(sctp_chunkhdr_t)))
++		goto nomem;
+ 
+ 	/* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint
+ 	 * "Z" will reply with a COOKIE ACK chunk after building a TCB
+@@ -965,7 +966,8 @@ sctp_disposition_t sctp_sf_beat_8_3(cons
+ 	 */
+ 	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data;
+ 	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+-	skb_pull(chunk->skb, paylen);
++	if (!pskb_pull(chunk->skb, paylen))
++		goto nomem;
+ 
+ 	reply = sctp_make_heartbeat_ack(asoc, chunk,
+ 					chunk->subh.hb_hdr, paylen);
+@@ -1028,6 +1030,12 @@ sctp_disposition_t sctp_sf_backbeat_8_3(
+ 						  commands);
+ 
+ 	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
++	/* Make sure that the length of the parameter is what we expect */
++	if (ntohs(hbinfo->param_hdr.length) !=
++				    sizeof(sctp_sender_hb_info_t)) {
++		return SCTP_DISPOSITION_DISCARD;
++	}
++
+ 	from_addr = hbinfo->daddr;
+ 	link = sctp_assoc_lookup_paddr(asoc, &from_addr);
+ 
+@@ -1860,8 +1868,9 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupc
+ 	 * are in good shape.
+ 	 */
+         chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data;
+-	skb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
+-		 sizeof(sctp_chunkhdr_t));
++	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
++					sizeof(sctp_chunkhdr_t)))
++		goto nomem;
+ 
+ 	/* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie
+ 	 * of a duplicate COOKIE ECHO match the Verification Tags of the
+@@ -5151,7 +5160,9 @@ static int sctp_eat_data(const struct sc
+ 	int tmp;
+ 	__u32 tsn;
+ 	int account_value;
++	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
+ 	struct sock *sk = asoc->base.sk;
++	int rcvbuf_over = 0;
+ 
+ 	data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
+ 	skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
+@@ -5162,10 +5173,16 @@ static int sctp_eat_data(const struct sc
+ 	/* ASSERT:  Now skb->data is really the user data.  */
+ 
+ 	/*
+-	 * if we are established, and we have used up our receive
+-	 * buffer memory, drop the frame
++	 * If we are established, and we have used up our receive buffer
++	 * memory, think about droping the frame.
++	 * Note that we have an opportunity to improve performance here.
++	 * If we accept one chunk from an skbuff, we have to keep all the
++	 * memory of that skbuff around until the chunk is read into user
++	 * space. Therefore, once we accept 1 chunk we may as well accept all
++	 * remaining chunks in the skbuff. The data_accepted flag helps us do
++	 * that.
+ 	 */
+-	if (asoc->state == SCTP_STATE_ESTABLISHED) {
++	if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) {
+ 		/*
+ 		 * If the receive buffer policy is 1, then each
+ 		 * association can allocate up to sk_rcvbuf bytes
+@@ -5176,9 +5193,25 @@ static int sctp_eat_data(const struct sc
+ 			account_value = atomic_read(&asoc->rmem_alloc);
+ 		else
+ 			account_value = atomic_read(&sk->sk_rmem_alloc);
+-
+-		if (account_value > sk->sk_rcvbuf)
+-			return SCTP_IERROR_IGNORE_TSN;
++		if (account_value > sk->sk_rcvbuf) {
++			/*
++			 * We need to make forward progress, even when we are
++			 * under memory pressure, so we always allow the
++			 * next tsn after the ctsn ack point to be accepted.
++			 * This lets us avoid deadlocks in which we have to
++			 * drop frames that would otherwise let us drain the
++			 * receive queue.
++			 */
++			if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn)
++				return SCTP_IERROR_IGNORE_TSN;
++
++			/*
++			 * We're going to accept the frame but we should renege
++			 * to make space for it. This will send us down that
++			 * path later in this function.
++			 */
++			rcvbuf_over = 1;
++		}
+ 	}
+ 
+ 	/* Process ECN based congestion.
+@@ -5226,6 +5259,7 @@ static int sctp_eat_data(const struct sc
+ 	datalen -= sizeof(sctp_data_chunk_t);
+ 
+ 	deliver = SCTP_CMD_CHUNK_ULP;
++	chunk->data_accepted = 1;
+ 
+ 	/* Think about partial delivery. */
+ 	if ((datalen >= asoc->rwnd) && (!asoc->ulpq.pd_mode)) {
+@@ -5242,7 +5276,8 @@ static int sctp_eat_data(const struct sc
+ 	 * large spill over.
+ 	 */
+ 	if (!asoc->rwnd || asoc->rwnd_over ||
+-	    (datalen > asoc->rwnd + asoc->frag_point)) {
++	    (datalen > asoc->rwnd + asoc->frag_point) ||
++	    rcvbuf_over) {
+ 
+ 		/* If this is the next TSN, consider reneging to make
+ 		 * room.   Note: Playing nice with a confused sender.  A
+@@ -5250,8 +5285,8 @@ static int sctp_eat_data(const struct sc
+ 		 * space and in the future we may want to detect and
+ 		 * do more drastic reneging.
+ 		 */
+-		if (sctp_tsnmap_has_gap(&asoc->peer.tsn_map) &&
+-		    (sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1) == tsn) {
++		if (sctp_tsnmap_has_gap(map) &&
++		    (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
+ 			SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn);
+ 			deliver = SCTP_CMD_RENEGE;
+ 		} else {
+diff -upr linux-2.6.16.orig/net/sctp/sm_statetable.c linux-2.6.16-026test015/net/sctp/sm_statetable.c
+--- linux-2.6.16.orig/net/sctp/sm_statetable.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sctp/sm_statetable.c	2006-07-04 14:41:36.000000000 +0400
+@@ -366,9 +366,9 @@ const sctp_sm_table_entry_t *sctp_sm_loo
+ 	/* SCTP_STATE_EMPTY */ \
+ 	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ 	/* SCTP_STATE_CLOSED */ \
+-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
++	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ 	/* SCTP_STATE_COOKIE_WAIT */ \
+-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
++	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ 	/* SCTP_STATE_COOKIE_ECHOED */ \
+ 	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+ 	/* SCTP_STATE_ESTABLISHED */ \
+@@ -380,7 +380,7 @@ const sctp_sm_table_entry_t *sctp_sm_loo
+ 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+ 	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+ 	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
++	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ } /* TYPE_SCTP_ECN_ECNE */
+ 
+ #define TYPE_SCTP_ECN_CWR { \
+@@ -401,7 +401,7 @@ const sctp_sm_table_entry_t *sctp_sm_loo
+ 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+ 	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ 	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
++	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ } /* TYPE_SCTP_ECN_CWR */
+ 
+ #define TYPE_SCTP_SHUTDOWN_COMPLETE { \
+@@ -647,7 +647,7 @@ chunk_event_table_unknown[SCTP_STATE_NUM
+ 	/* SCTP_STATE_EMPTY */ \
+ 	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ 	/* SCTP_STATE_CLOSED */ \
+-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
++	{.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+ 	/* SCTP_STATE_COOKIE_WAIT */ \
+ 	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
+ 	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
+diff -upr linux-2.6.16.orig/net/sctp/ulpqueue.c linux-2.6.16-026test015/net/sctp/ulpqueue.c
+--- linux-2.6.16.orig/net/sctp/ulpqueue.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sctp/ulpqueue.c	2006-07-04 14:41:36.000000000 +0400
+@@ -279,6 +279,7 @@ static inline void sctp_ulpq_store_reasm
+ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag)
+ {
+ 	struct sk_buff *pos;
++	struct sk_buff *new = NULL;
+ 	struct sctp_ulpevent *event;
+ 	struct sk_buff *pnext, *last;
+ 	struct sk_buff *list = skb_shinfo(f_frag)->frag_list;
+@@ -297,11 +298,33 @@ static struct sctp_ulpevent *sctp_make_r
+ 	 */
+ 	if (last)
+ 		last->next = pos;
+-	else
+-		skb_shinfo(f_frag)->frag_list = pos;
++ 	else {
++ 		if (skb_cloned(f_frag)) {
++ 			/* This is a cloned skb, we can't just modify
++ 			 * the frag_list.  We need a new skb to do that.
++ 			 * Instead of calling skb_unshare(), we'll do it
++ 			 * ourselves since we need to delay the free.
++ 			 */
++ 			new = skb_copy(f_frag, GFP_ATOMIC);
++ 			if (!new)
++ 				return NULL;	/* try again later */
++
++ 			new->sk = f_frag->sk;
++
++ 			skb_shinfo(new)->frag_list = pos;
++ 		} else
++ 			skb_shinfo(f_frag)->frag_list = pos;
++ 	}
+ 
+ 	/* Remove the first fragment from the reassembly queue.  */
+ 	__skb_unlink(f_frag, queue);
++
++ 	/* if we did unshare, then free the old skb and re-assign */
++ 	if (new) {
++ 		kfree_skb(f_frag);
++ 		f_frag = new;
++ 	}
++
+ 	while (pos) {
+ 
+ 		pnext = pos->next;
+diff -upr linux-2.6.16.orig/net/socket.c linux-2.6.16-026test015/net/socket.c
+--- linux-2.6.16.orig/net/socket.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/socket.c	2006-07-04 14:41:39.000000000 +0400
+@@ -84,6 +84,7 @@
+ #include <linux/compat.h>
+ #include <linux/kmod.h>
+ #include <linux/audit.h>
++#include <linux/in.h>
+ 
+ #ifdef CONFIG_NET_RADIO
+ #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
+@@ -1075,6 +1076,49 @@ int sock_wake_async(struct socket *sock,
+ 	return 0;
+ }
+ 
++int vz_security_proto_check(int family, int type, int protocol)
++{
++#ifdef CONFIG_VE
++	if (ve_is_super(get_exec_env()))
++		return 0;
++
++	switch (family) {
++	case PF_UNSPEC:
++	case PF_PACKET:
++	case PF_NETLINK:
++	case PF_UNIX:
++		break;
++	case PF_INET:
++		switch (protocol) {
++		case  IPPROTO_IP:
++		case  IPPROTO_ICMP:
++		case  IPPROTO_TCP:
++		case  IPPROTO_UDP:
++		case  IPPROTO_RAW:
++			break;
++		default:
++			return -EAFNOSUPPORT;
++		}
++		break;
++	case PF_INET6:
++		switch (protocol) {
++		case  IPPROTO_IP:
++		case  IPPROTO_ICMPV6:
++		case  IPPROTO_TCP:
++		case  IPPROTO_UDP:
++		case  IPPROTO_RAW:
++			break;
++		default:
++			return -EAFNOSUPPORT;
++		}
++		break;
++	default:
++		return -EAFNOSUPPORT;
++	}
++#endif
++	return 0;
++}
++
+ static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
+ {
+ 	int err;
+@@ -1102,6 +1146,11 @@ static int __sock_create(int family, int
+ 		family = PF_PACKET;
+ 	}
+ 
++	/* VZ compatibility layer */
++	err = vz_security_proto_check(family, type, protocol);
++	if (err < 0)
++		return err;
++
+ 	err = security_socket_create(family, type, protocol, kern);
+ 	if (err)
+ 		return err;
+diff -upr linux-2.6.16.orig/net/sunrpc/clnt.c linux-2.6.16-026test015/net/sunrpc/clnt.c
+--- linux-2.6.16.orig/net/sunrpc/clnt.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sunrpc/clnt.c	2006-07-04 14:41:38.000000000 +0400
+@@ -168,10 +168,10 @@ rpc_new_client(struct rpc_xprt *xprt, ch
+ 	}
+ 
+ 	/* save the nodename */
+-	clnt->cl_nodelen = strlen(system_utsname.nodename);
++	clnt->cl_nodelen = strlen(ve_utsname.nodename);
+ 	if (clnt->cl_nodelen > UNX_MAXNODENAME)
+ 		clnt->cl_nodelen = UNX_MAXNODENAME;
+-	memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
++	memcpy(clnt->cl_nodename, ve_utsname.nodename, clnt->cl_nodelen);
+ 	return clnt;
+ 
+ out_no_auth:
+diff -upr linux-2.6.16.orig/net/sunrpc/sched.c linux-2.6.16-026test015/net/sunrpc/sched.c
+--- linux-2.6.16.orig/net/sunrpc/sched.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sunrpc/sched.c	2006-07-04 14:41:38.000000000 +0400
+@@ -605,7 +605,9 @@ EXPORT_SYMBOL(rpc_exit_task);
+ static int __rpc_execute(struct rpc_task *task)
+ {
+ 	int		status = 0;
++	struct ve_struct *env;
+ 
++	env = set_exec_env(get_ve0());
+ 	dprintk("RPC: %4d rpc_execute flgs %x\n",
+ 				task->tk_pid, task->tk_flags);
+ 
+@@ -693,6 +695,7 @@ static int __rpc_execute(struct rpc_task
+ 	rpc_mark_complete_task(task);
+ 	/* Release all resources associated with the task */
+ 	rpc_release_task(task);
++	(void)set_exec_env(env);
+ 	return status;
+ }
+ 
+diff -upr linux-2.6.16.orig/net/sunrpc/svcsock.c linux-2.6.16-026test015/net/sunrpc/svcsock.c
+--- linux-2.6.16.orig/net/sunrpc/svcsock.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/sunrpc/svcsock.c	2006-07-04 14:41:38.000000000 +0400
+@@ -361,6 +361,9 @@ svc_sendto(struct svc_rqst *rqstp, struc
+ 	size_t		base = xdr->page_base;
+ 	unsigned int	pglen = xdr->page_len;
+ 	unsigned int	flags = MSG_MORE;
++	struct ve_struct *old_env;
++
++	old_env = set_exec_env(get_ve0());
+ 
+ 	slen = xdr->len;
+ 
+@@ -425,6 +428,8 @@ out:
+ 			rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
+ 		rqstp->rq_addr.sin_addr.s_addr);
+ 
++	(void)set_exec_env(old_env);
++
+ 	return len;
+ }
+ 
+@@ -437,9 +442,12 @@ svc_recv_available(struct svc_sock *svsk
+ 	mm_segment_t	oldfs;
+ 	struct socket	*sock = svsk->sk_sock;
+ 	int		avail, err;
++	struct ve_struct *old_env;
+ 
+ 	oldfs = get_fs(); set_fs(KERNEL_DS);
++	old_env = set_exec_env(get_ve0());
+ 	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
++	(void)set_exec_env(old_env);
+ 	set_fs(oldfs);
+ 
+ 	return (err >= 0)? avail : err;
+@@ -454,6 +462,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
+ 	struct msghdr	msg;
+ 	struct socket	*sock;
+ 	int		len, alen;
++	struct ve_struct *old_env;
+ 
+ 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
+ 	sock = rqstp->rq_sock->sk_sock;
+@@ -465,7 +474,9 @@ svc_recvfrom(struct svc_rqst *rqstp, str
+ 
+ 	msg.msg_flags	= MSG_DONTWAIT;
+ 
++	old_env = set_exec_env(get_ve0());
+ 	len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
++	(void)set_exec_env(get_ve0());
+ 
+ 	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
+ 	 * possibly we should cache this in the svc_sock structure
+@@ -761,17 +772,19 @@ svc_tcp_accept(struct svc_sock *svsk)
+ 	const struct proto_ops *ops;
+ 	struct svc_sock	*newsvsk;
+ 	int		err, slen;
++	struct ve_struct *old_env;
+ 
+ 	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
+ 	if (!sock)
+ 		return;
+ 
++	old_env = set_exec_env(get_ve0());
+ 	err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
+ 	if (err) {
+ 		if (err == -ENOMEM)
+ 			printk(KERN_WARNING "%s: no more sockets!\n",
+ 			       serv->sv_name);
+-		return;
++		goto restore;
+ 	}
+ 
+ 	dprintk("svc: tcp_accept %p allocated\n", newsock);
+@@ -865,6 +878,8 @@ svc_tcp_accept(struct svc_sock *svsk)
+ 
+ 	}
+ 
++	(void)set_exec_env(old_env);
++
+ 	if (serv->sv_stats)
+ 		serv->sv_stats->nettcpconn++;
+ 
+@@ -872,6 +887,8 @@ svc_tcp_accept(struct svc_sock *svsk)
+ 
+ failed:
+ 	sock_release(newsock);
++restore:
++	(void)set_exec_env(old_env);
+ 	return;
+ }
+ 
+@@ -1388,6 +1405,7 @@ svc_create_socket(struct svc_serv *serv,
+ 	struct socket	*sock;
+ 	int		error;
+ 	int		type;
++	struct ve_struct *old_env;
+ 
+ 	dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
+ 				serv->sv_program->pg_name, protocol,
+@@ -1401,8 +1419,10 @@ svc_create_socket(struct svc_serv *serv,
+ 	}
+ 	type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+ 
++	old_env = set_exec_env(get_ve0());
++
+ 	if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
+-		return error;
++		goto restore;
+ 
+ 	if (sin != NULL) {
+ 		if (type == SOCK_STREAM)
+@@ -1418,12 +1438,16 @@ svc_create_socket(struct svc_serv *serv,
+ 			goto bummer;
+ 	}
+ 
+-	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
++	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) {
++		(void)set_exec_env(old_env);
+ 		return 0;
++	}
+ 
+ bummer:
+ 	dprintk("svc: svc_create_socket error = %d\n", -error);
+ 	sock_release(sock);
++restore:
++	(void)set_exec_env(old_env);
+ 	return error;
+ }
+ 
+diff -upr linux-2.6.16.orig/net/unix/af_unix.c linux-2.6.16-026test015/net/unix/af_unix.c
+--- linux-2.6.16.orig/net/unix/af_unix.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/unix/af_unix.c	2006-07-04 14:41:38.000000000 +0400
+@@ -118,6 +118,9 @@
+ #include <net/checksum.h>
+ #include <linux/security.h>
+ 
++#include <ub/ub_net.h>
++#include <ub/beancounter.h>
++
+ int sysctl_unix_max_dgram_qlen = 10;
+ 
+ struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+@@ -235,6 +238,8 @@ static struct sock *__unix_find_socket_b
+ 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+ 		struct unix_sock *u = unix_sk(s);
+ 
++		if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
++			continue;
+ 		if (u->addr->len == len &&
+ 		    !memcmp(u->addr->name, sunname, len))
+ 			goto found;
+@@ -439,7 +444,7 @@ static int unix_listen(struct socket *so
+ 	sk->sk_max_ack_backlog	= backlog;
+ 	sk->sk_state		= TCP_LISTEN;
+ 	/* set credentials so connect can copy them */
+-	sk->sk_peercred.pid	= current->tgid;
++	sk->sk_peercred.pid	= virt_tgid(current);
+ 	sk->sk_peercred.uid	= current->euid;
+ 	sk->sk_peercred.gid	= current->egid;
+ 	err = 0;
+@@ -553,6 +558,8 @@ static struct sock * unix_create1(struct
+ 	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
+ 	if (!sk)
+ 		goto out;
++	if (ub_other_sock_charge(sk))
++		goto out_sk_free;
+ 
+ 	atomic_inc(&unix_nr_socks);
+ 
+@@ -571,6 +578,9 @@ static struct sock * unix_create1(struct
+ 	unix_insert_socket(unix_sockets_unbound, sk);
+ out:
+ 	return sk;
++out_sk_free:
++	sk_free(sk);
++	return NULL;
+ }
+ 
+ static int unix_create(struct socket *sock, int protocol)
+@@ -676,7 +686,7 @@ static struct sock *unix_find_other(stru
+ 		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+ 		if (err)
+ 			goto fail;
+-		err = vfs_permission(&nd, MAY_WRITE);
++		err = vfs_permission(&nd, MAY_WRITE, NULL);
+ 		if (err)
+ 			goto put_fail;
+ 
+@@ -932,6 +942,7 @@ static int unix_stream_connect(struct so
+ 	int st;
+ 	int err;
+ 	long timeo;
++	unsigned long chargesize;
+ 
+ 	err = unix_mkname(sunaddr, addr_len, &hash);
+ 	if (err < 0)
+@@ -960,6 +971,10 @@ static int unix_stream_connect(struct so
+ 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
+ 	if (skb == NULL)
+ 		goto out;
++	chargesize = skb_charge_fullsize(skb);
++	if (ub_sock_getwres_other(newsk, chargesize) < 0)
++		goto out;	
++	ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
+ 
+ restart:
+ 	/*  Find listening sock. */
+@@ -1043,7 +1058,7 @@ restart:
+ 	unix_peer(newsk)	= sk;
+ 	newsk->sk_state		= TCP_ESTABLISHED;
+ 	newsk->sk_type		= sk->sk_type;
+-	newsk->sk_peercred.pid	= current->tgid;
++	newsk->sk_peercred.pid	= virt_tgid(current);
+ 	newsk->sk_peercred.uid	= current->euid;
+ 	newsk->sk_peercred.gid	= current->egid;
+ 	newu = unix_sk(newsk);
+@@ -1107,7 +1122,7 @@ static int unix_socketpair(struct socket
+ 	sock_hold(skb);
+ 	unix_peer(ska)=skb;
+ 	unix_peer(skb)=ska;
+-	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
++	ska->sk_peercred.pid = skb->sk_peercred.pid = virt_tgid(current);
+ 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
+ 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
+ 
+@@ -1433,6 +1448,16 @@ static int unix_stream_sendmsg(struct ki
+ 
+ 		size=len-sent;
+ 
++		if (msg->msg_flags & MSG_DONTWAIT)
++			ub_sock_makewres_other(sk, skb_charge_size(size));
++		if (sock_bc(sk) != NULL && 
++				sock_bc(sk)->poll_reserv >= 
++					SOCK_MIN_UBCSPACE &&
++				skb_charge_size(size) >
++					sock_bc(sk)->poll_reserv)
++			size = skb_charge_datalen(sock_bc(sk)->poll_reserv);
++				
++
+ 		/* Keep two messages in the pipe so it schedules better */
+ 		if (size > sk->sk_sndbuf / 2 - 64)
+ 			size = sk->sk_sndbuf / 2 - 64;
+@@ -1444,7 +1469,8 @@ static int unix_stream_sendmsg(struct ki
+ 		 *	Grab a buffer
+ 		 */
+ 		 
+-		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
++		skb = sock_alloc_send_skb2(sk, size, SOCK_MIN_UBCSPACE,
++				msg->msg_flags&MSG_DONTWAIT, &err);
+ 
+ 		if (skb==NULL)
+ 			goto out_err;
+@@ -1869,6 +1895,7 @@ static unsigned int unix_poll(struct fil
+ {
+ 	struct sock *sk = sock->sk;
+ 	unsigned int mask;
++	int no_ub_res;
+ 
+ 	poll_wait(file, sk->sk_sleep, wait);
+ 	mask = 0;
+@@ -1879,6 +1906,10 @@ static unsigned int unix_poll(struct fil
+ 	if (sk->sk_shutdown == SHUTDOWN_MASK)
+ 		mask |= POLLHUP;
+ 
++	no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
++	if (no_ub_res)
++		ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
++
+ 	/* readable? */
+ 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ 	    (sk->sk_shutdown & RCV_SHUTDOWN))
+@@ -1892,7 +1923,7 @@ static unsigned int unix_poll(struct fil
+ 	 * we set writable also when the other side has shut down the
+ 	 * connection. This prevents stuck sockets.
+ 	 */
+-	if (unix_writable(sk))
++	if (!no_ub_res && unix_writable(sk))
+ 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ 
+ 	return mask;
+@@ -2044,7 +2075,7 @@ static int __init af_unix_init(void)
+ 
+ 	sock_register(&unix_family_ops);
+ #ifdef CONFIG_PROC_FS
+-	proc_net_fops_create("unix", 0, &unix_seq_fops);
++	proc_glob_fops_create("net/unix", 0, &unix_seq_fops);
+ #endif
+ 	unix_sysctl_register();
+ out:
+@@ -2055,7 +2086,7 @@ static void __exit af_unix_exit(void)
+ {
+ 	sock_unregister(PF_UNIX);
+ 	unix_sysctl_unregister();
+-	proc_net_remove("unix");
++	remove_proc_glob_entry("net/unix", NULL);
+ 	proto_unregister(&unix_proto);
+ }
+ 
+diff -upr linux-2.6.16.orig/net/unix/garbage.c linux-2.6.16-026test015/net/unix/garbage.c
+--- linux-2.6.16.orig/net/unix/garbage.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/net/unix/garbage.c	2006-07-04 14:41:39.000000000 +0400
+@@ -76,6 +76,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/file.h>
+ #include <linux/proc_fs.h>
++#include <linux/module.h>
+ 
+ #include <net/sock.h>
+ #include <net/af_unix.h>
+@@ -135,7 +136,7 @@ void unix_notinflight(struct file *fp)
+ 		atomic_dec(&unix_tot_inflight);
+ 	}
+ }
+-
++EXPORT_SYMBOL_GPL(unix_notinflight);
+ 
+ /*
+  *	Garbage Collector Support Functions
+diff -upr linux-2.6.16.orig/security/commoncap.c linux-2.6.16-026test015/security/commoncap.c
+--- linux-2.6.16.orig/security/commoncap.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/security/commoncap.c	2006-07-04 14:41:38.000000000 +0400
+@@ -35,7 +35,7 @@ EXPORT_SYMBOL(cap_netlink_send);
+ 
+ int cap_netlink_recv(struct sk_buff *skb)
+ {
+-	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
++	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 	return 0;
+ }
+@@ -197,7 +197,7 @@ int cap_inode_setxattr(struct dentry *de
+ {
+ 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
+-	    !capable(CAP_SYS_ADMIN))
++	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
+ 		return -EPERM;
+ 	return 0;
+ }
+@@ -206,7 +206,7 @@ int cap_inode_removexattr(struct dentry 
+ {
+ 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
+-	    !capable(CAP_SYS_ADMIN))
++	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
+ 		return -EPERM;
+ 	return 0;
+ }
+@@ -312,7 +312,7 @@ void cap_task_reparent_to_init (struct t
+ 
+ int cap_syslog (int type)
+ {
+-	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
++	if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 	return 0;
+ }
+diff -upr linux-2.6.16.orig/security/keys/key.c linux-2.6.16-026test015/security/keys/key.c
+--- linux-2.6.16.orig/security/keys/key.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/security/keys/key.c	2006-07-04 14:41:36.000000000 +0400
+@@ -785,6 +785,10 @@ key_ref_t key_create_or_update(key_ref_t
+ 
+ 	key_check(keyring);
+ 
++	key_ref = ERR_PTR(-ENOTDIR);
++	if (keyring->type != &key_type_keyring)
++		goto error_2;
++
+ 	down_write(&keyring->sem);
+ 
+ 	/* if we're going to allocate a new key, we're going to have
+diff -upr linux-2.6.16.orig/security/keys/keyring.c linux-2.6.16-026test015/security/keys/keyring.c
+--- linux-2.6.16.orig/security/keys/keyring.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/security/keys/keyring.c	2006-07-04 14:41:36.000000000 +0400
+@@ -437,6 +437,7 @@ EXPORT_SYMBOL(keyring_search);
+ /*
+  * search the given keyring only (no recursion)
+  * - keyring must be locked by caller
++ * - caller must guarantee that the keyring is a keyring
+  */
+ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+ 			       const struct key_type *ktype,
+diff -upr linux-2.6.16.orig/security/selinux/hooks.c linux-2.6.16-026test015/security/selinux/hooks.c
+--- linux-2.6.16.orig/security/selinux/hooks.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/security/selinux/hooks.c	2006-07-04 14:41:38.000000000 +0400
+@@ -4167,12 +4167,12 @@ static int selinux_setprocattr(struct ta
+ 			struct task_struct *g, *t;
+ 			struct mm_struct *mm = p->mm;
+ 			read_lock(&tasklist_lock);
+-			do_each_thread(g, t)
++			do_each_thread_ve(g, t)
+ 				if (t->mm == mm && t != p) {
+ 					read_unlock(&tasklist_lock);
+ 					return -EPERM;
+ 				}
+-			while_each_thread(g, t);
++			while_each_thread_ve(g, t);
+ 			read_unlock(&tasklist_lock);
+                 }
+ 
+diff -upr linux-2.6.16.orig/security/selinux/ss/mls.c linux-2.6.16-026test015/security/selinux/ss/mls.c
+--- linux-2.6.16.orig/security/selinux/ss/mls.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/security/selinux/ss/mls.c	2006-07-04 14:41:36.000000000 +0400
+@@ -264,7 +264,7 @@ int mls_context_to_sid(char oldc,
+ 
+ 	if (!selinux_mls_enabled) {
+ 		if (def_sid != SECSID_NULL && oldc)
+-			*scontext += strlen(*scontext);
++			*scontext += strlen(*scontext)+1;
+ 		return 0;
+ 	}
+ 
+diff -upr linux-2.6.16.orig/security/selinux/ss/services.c linux-2.6.16-026test015/security/selinux/ss/services.c
+--- linux-2.6.16.orig/security/selinux/ss/services.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/security/selinux/ss/services.c	2006-07-04 14:41:36.000000000 +0400
+@@ -592,6 +592,10 @@ int security_sid_to_context(u32 sid, cha
+ 
+ 			*scontext_len = strlen(initial_sid_to_string[sid]) + 1;
+ 			scontextp = kmalloc(*scontext_len,GFP_ATOMIC);
++			if (!scontextp) {
++				rc = -ENOMEM;
++				goto out;
++			}
+ 			strcpy(scontextp, initial_sid_to_string[sid]);
+ 			*scontext = scontextp;
+ 			goto out;
+diff -upr linux-2.6.16.orig/sound/isa/opti9xx/opti92x-ad1848.c linux-2.6.16-026test015/sound/isa/opti9xx/opti92x-ad1848.c
+--- linux-2.6.16.orig/sound/isa/opti9xx/opti92x-ad1848.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/sound/isa/opti9xx/opti92x-ad1848.c	2006-07-04 14:41:36.000000000 +0400
+@@ -2088,9 +2088,11 @@ static int __init alsa_card_opti9xx_init
+ 	int error;
+ 	struct platform_device *device;
+ 
++#ifdef CONFIG_PNP
+ 	pnp_register_card_driver(&opti9xx_pnpc_driver);
+ 	if (snd_opti9xx_pnp_is_probed)
+ 		return 0;
++#endif
+ 	if (! is_isapnp_selected()) {
+ 		error = platform_driver_register(&snd_opti9xx_driver);
+ 		if (error < 0)
+@@ -2102,7 +2104,9 @@ static int __init alsa_card_opti9xx_init
+ 		}
+ 		platform_driver_unregister(&snd_opti9xx_driver);
+ 	}
++#ifdef CONFIG_PNP
+ 	pnp_unregister_card_driver(&opti9xx_pnpc_driver);
++#endif
+ #ifdef MODULE
+ 	printk(KERN_ERR "no OPTi " CHIP_NAME " soundcard found\n");
+ #endif
+@@ -2115,7 +2119,9 @@ static void __exit alsa_card_opti9xx_exi
+ 		platform_device_unregister(snd_opti9xx_platform_device);
+ 		platform_driver_unregister(&snd_opti9xx_driver);
+ 	}
++#ifdef CONFIG_PNP
+ 	pnp_unregister_card_driver(&opti9xx_pnpc_driver);
++#endif
+ }
+ 
+ module_init(alsa_card_opti9xx_init)
+diff -upr linux-2.6.16.orig/sound/oss/dmasound/tas_common.c linux-2.6.16-026test015/sound/oss/dmasound/tas_common.c
+--- linux-2.6.16.orig/sound/oss/dmasound/tas_common.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/sound/oss/dmasound/tas_common.c	2006-07-04 14:41:36.000000000 +0400
+@@ -195,8 +195,8 @@ tas_init(int driver_id, const char *driv
+ 
+ 	printk(KERN_INFO "tas driver [%s])\n", driver_name);
+ 
+-#ifndef CONFIG_I2C_KEYWEST
+-	request_module("i2c-keywest");
++#ifndef CONFIG_I2C_POWERMAC
++	request_module("i2c-powermac");
+ #endif
+ 	tas_node = find_devices("deq");
+ 	if (tas_node == NULL)
+diff -upr linux-2.6.16.orig/sound/pci/hda/patch_realtek.c linux-2.6.16-026test015/sound/pci/hda/patch_realtek.c
+--- linux-2.6.16.orig/sound/pci/hda/patch_realtek.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/sound/pci/hda/patch_realtek.c	2006-07-04 14:41:36.000000000 +0400
+@@ -2948,6 +2948,8 @@ static struct hda_board_config alc260_cf
+ 	{ .modelname = "basic", .config = ALC260_BASIC },
+ 	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81bb,
+ 	  .config = ALC260_BASIC }, /* Sony VAIO */
++	{ .pci_subvendor = 0x152d, .pci_subdevice = 0x0729,
++	  .config = ALC260_BASIC }, /* CTL Travel Master U553W */
+ 	{ .modelname = "hp", .config = ALC260_HP },
+ 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP },
+ 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3011, .config = ALC260_HP },
+diff -upr linux-2.6.16.orig/sound/ppc/daca.c linux-2.6.16-026test015/sound/ppc/daca.c
+--- linux-2.6.16.orig/sound/ppc/daca.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/sound/ppc/daca.c	2006-07-04 14:41:36.000000000 +0400
+@@ -256,7 +256,7 @@ int __init snd_pmac_daca_init(struct snd
+ 
+ #ifdef CONFIG_KMOD
+ 	if (current->fs->root)
+-		request_module("i2c-keywest");
++		request_module("i2c-powermac");
+ #endif /* CONFIG_KMOD */	
+ 
+ 	mix = kmalloc(sizeof(*mix), GFP_KERNEL);
+diff -upr linux-2.6.16.orig/sound/ppc/tumbler.c linux-2.6.16-026test015/sound/ppc/tumbler.c
+--- linux-2.6.16.orig/sound/ppc/tumbler.c	2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16-026test015/sound/ppc/tumbler.c	2006-07-04 14:41:36.000000000 +0400
+@@ -1314,7 +1314,7 @@ int __init snd_pmac_tumbler_init(struct 
+ 
+ #ifdef CONFIG_KMOD
+ 	if (current->fs->root)
+-		request_module("i2c-keywest");
++		request_module("i2c-powermac");
+ #endif /* CONFIG_KMOD */	
+ 
+ 	mix = kmalloc(sizeof(*mix), GFP_KERNEL);
diff --git a/openvz-sources/026.015-r2/0101_diff-merge-2.6.16.24-20060714.patch b/openvz-sources/026.015-r2/0101_diff-merge-2.6.16.24-20060714.patch
new file mode 100644
index 0000000..0f43c67
--- /dev/null
+++ b/openvz-sources/026.015-r2/0101_diff-merge-2.6.16.24-20060714.patch
@@ -0,0 +1,19 @@
+From: OpenVZ team <devel@openvz.org>
+Date: Fri, 14 Jul 2006 10:23:43 +0000 (+0400)
+Subject: Merged 2.6.16.24 from /linux/kernel/git/stable/linux-2.6.16.y
+X-Git-Url: http://10.0.101.105/cgi-bin/gitweb.cgi?p=kernel;a=commitdiff;h=9a23ec204b88ab5e678dc3e33fe03d7531167e66
+
+Merged 2.6.16.24 from /linux/kernel/git/stable/linux-2.6.16.y
+---
+
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1954,7 +1954,7 @@ asmlinkage long sys_prctl(int option, un
+ 			error = current->mm->dumpable;
+ 			break;
+ 		case PR_SET_DUMPABLE:
+-			if (arg2 < 0 || arg2 > 2) {
++			if (arg2 < 0 || arg2 > 1) {
+ 				error = -EINVAL;
+ 				break;
+ 			}
diff --git a/openvz-sources/026.015-r2/0102_procfs-dumpable-race.patch b/openvz-sources/026.015-r2/0102_procfs-dumpable-race.patch
new file mode 100644
index 0000000..a02bf91
--- /dev/null
+++ b/openvz-sources/026.015-r2/0102_procfs-dumpable-race.patch
@@ -0,0 +1,20 @@
+Index: linux-2.6.16-gentoo-r12/fs/proc/base.c
+===================================================================
+--- linux-2.6.16-gentoo-r12.orig/fs/proc/base.c
++++ linux-2.6.16-gentoo-r12/fs/proc/base.c
+@@ -1367,6 +1367,7 @@ static int pid_revalidate(struct dentry 
+ 			inode->i_uid = 0;
+ 			inode->i_gid = 0;
+ 		}
++		inode->i_mode &= ~(S_ISUID | S_ISGID);
+ 		security_task_to_inode(task, inode);
+ 		return 1;
+ 	}
+@@ -1394,6 +1395,7 @@ static int tid_fd_revalidate(struct dent
+ 				inode->i_uid = 0;
+ 				inode->i_gid = 0;
+ 			}
++			inode->i_mode &= ~(S_ISUID | S_ISGID);
+ 			security_task_to_inode(task, inode);
+ 			return 1;
+ 		}
author	Christian Heim <phreak@gentoo.org>	2006-07-15 14:47:37 +0000
committer	Christian Heim <phreak@gentoo.org>	2006-07-15 14:47:37 +0000
commit	e4c83cd472e7986c2fce3dbd0c12b9edce2299ce (patch)
tree	8740eab35358cab40fb55f26fb412c40a78c7ced /openvz-sources
parent	Adding the missing patch to 026.015-r1 (diff)
download	misc-e4c83cd472e7986c2fce3dbd0c12b9edce2299ce.tar.gz misc-e4c83cd472e7986c2fce3dbd0c12b9edce2299ce.tar.bz2 misc-e4c83cd472e7986c2fce3dbd0c12b9edce2299ce.zip