diff options
Diffstat (limited to '4.8.11/1009_linux-4.8.10.patch')
-rw-r--r-- | 4.8.11/1009_linux-4.8.10.patch | 4759 |
1 files changed, 0 insertions, 4759 deletions
diff --git a/4.8.11/1009_linux-4.8.10.patch b/4.8.11/1009_linux-4.8.10.patch deleted file mode 100644 index 1e751e5..0000000 --- a/4.8.11/1009_linux-4.8.10.patch +++ /dev/null @@ -1,4759 +0,0 @@ -diff --git a/Makefile b/Makefile -index c1519ab..7cf2b49 100644 ---- a/Makefile -+++ b/Makefile -@@ -1,6 +1,6 @@ - VERSION = 4 - PATCHLEVEL = 8 --SUBLEVEL = 9 -+SUBLEVEL = 10 - EXTRAVERSION = - NAME = Psychotic Stoned Sheep - -diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h -index 37a315d..a6847fc 100644 ---- a/arch/sparc/include/asm/uaccess_64.h -+++ b/arch/sparc/include/asm/uaccess_64.h -@@ -98,7 +98,6 @@ struct exception_table_entry { - unsigned int insn, fixup; - }; - --void __ret_efault(void); - void __retl_efault(void); - - /* Uh, these should become the main single-value transfer routines.. -@@ -205,55 +204,34 @@ int __get_user_bad(void); - unsigned long __must_check ___copy_from_user(void *to, - const void __user *from, - unsigned long size); --unsigned long copy_from_user_fixup(void *to, const void __user *from, -- unsigned long size); - static inline unsigned long __must_check - copy_from_user(void *to, const void __user *from, unsigned long size) - { -- unsigned long ret; -- - check_object_size(to, size, false); - -- ret = ___copy_from_user(to, from, size); -- if (unlikely(ret)) -- ret = copy_from_user_fixup(to, from, size); -- -- return ret; -+ return ___copy_from_user(to, from, size); - } - #define __copy_from_user copy_from_user - - unsigned long __must_check ___copy_to_user(void __user *to, - const void *from, - unsigned long size); --unsigned long copy_to_user_fixup(void __user *to, const void *from, -- unsigned long size); - static inline unsigned long __must_check - copy_to_user(void __user *to, const void *from, unsigned long size) - { -- unsigned long ret; -- - check_object_size(from, size, true); - -- ret = ___copy_to_user(to, from, size); -- if (unlikely(ret)) -- ret = copy_to_user_fixup(to, from, size); -- return ret; -+ return ___copy_to_user(to, from, size); - } - #define __copy_to_user copy_to_user - - unsigned long __must_check ___copy_in_user(void __user *to, - const void __user *from, - unsigned long size); --unsigned long copy_in_user_fixup(void __user *to, void __user *from, -- unsigned long size); - static inline unsigned long __must_check - copy_in_user(void __user *to, void __user *from, unsigned long size) - { -- unsigned long ret = ___copy_in_user(to, from, size); -- -- if (unlikely(ret)) -- ret = copy_in_user_fixup(to, from, size); -- return ret; -+ return ___copy_in_user(to, from, size); - } - #define __copy_in_user copy_in_user - -diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S -index a076b42..5f1f3ae 100644 ---- a/arch/sparc/kernel/head_64.S -+++ b/arch/sparc/kernel/head_64.S -@@ -922,47 +922,11 @@ prom_tba: .xword 0 - tlb_type: .word 0 /* Must NOT end up in BSS */ - .section ".fixup",#alloc,#execinstr - -- .globl __ret_efault, __retl_efault, __ret_one, __retl_one --ENTRY(__ret_efault) -- ret -- restore %g0, -EFAULT, %o0 --ENDPROC(__ret_efault) -- - ENTRY(__retl_efault) - retl - mov -EFAULT, %o0 - ENDPROC(__retl_efault) - --ENTRY(__retl_one) -- retl -- mov 1, %o0 --ENDPROC(__retl_one) -- --ENTRY(__retl_one_fp) -- VISExitHalf -- retl -- mov 1, %o0 --ENDPROC(__retl_one_fp) -- --ENTRY(__ret_one_asi) -- wr %g0, ASI_AIUS, %asi -- ret -- restore %g0, 1, %o0 --ENDPROC(__ret_one_asi) -- --ENTRY(__retl_one_asi) -- wr %g0, ASI_AIUS, %asi -- retl -- mov 1, %o0 --ENDPROC(__retl_one_asi) -- --ENTRY(__retl_one_asi_fp) -- wr %g0, ASI_AIUS, %asi -- VISExitHalf -- retl -- mov 1, %o0 --ENDPROC(__retl_one_asi_fp) -- - ENTRY(__retl_o1) - retl - mov %o1, %o0 -diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c -index 59bbeff..07933b9 100644 ---- a/arch/sparc/kernel/jump_label.c -+++ b/arch/sparc/kernel/jump_label.c -@@ -13,19 +13,30 @@ - void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) - { -- u32 val; - u32 *insn = (u32 *) (unsigned long) entry->code; -+ u32 val; - - if (type == JUMP_LABEL_JMP) { - s32 off = (s32)entry->target - (s32)entry->code; -+ bool use_v9_branch = false; -+ -+ BUG_ON(off & 3); - - #ifdef CONFIG_SPARC64 -- /* ba,pt %xcc, . + (off << 2) */ -- val = 0x10680000 | ((u32) off >> 2); --#else -- /* ba . + (off << 2) */ -- val = 0x10800000 | ((u32) off >> 2); -+ if (off <= 0xfffff && off >= -0x100000) -+ use_v9_branch = true; - #endif -+ if (use_v9_branch) { -+ /* WDISP19 - target is . + immed << 2 */ -+ /* ba,pt %xcc, . + off */ -+ val = 0x10680000 | (((u32) off >> 2) & 0x7ffff); -+ } else { -+ /* WDISP22 - target is . + immed << 2 */ -+ BUG_ON(off > 0x7fffff); -+ BUG_ON(off < -0x800000); -+ /* ba . + off */ -+ val = 0x10800000 | (((u32) off >> 2) & 0x3fffff); -+ } - } else { - val = 0x01000000; - } -diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c -index 9e034f2..20ffb05 100644 ---- a/arch/sparc/kernel/sparc_ksyms_64.c -+++ b/arch/sparc/kernel/sparc_ksyms_64.c -@@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user); - EXPORT_SYMBOL_GPL(real_hard_smp_processor_id); - - /* from head_64.S */ --EXPORT_SYMBOL(__ret_efault); - EXPORT_SYMBOL(tlb_type); - EXPORT_SYMBOL(sun4v_chip_type); - EXPORT_SYMBOL(prom_root_node); -diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S -index b7d0bd6..69a439f 100644 ---- a/arch/sparc/lib/GENcopy_from_user.S -+++ b/arch/sparc/lib/GENcopy_from_user.S -@@ -3,11 +3,11 @@ - * Copyright (C) 2007 David S. Miller (davem@davemloft.net) - */ - --#define EX_LD(x) \ -+#define EX_LD(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one; \ -+ .word 98b, y; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S -index 780550e..9947427 100644 ---- a/arch/sparc/lib/GENcopy_to_user.S -+++ b/arch/sparc/lib/GENcopy_to_user.S -@@ -3,11 +3,11 @@ - * Copyright (C) 2007 David S. Miller (davem@davemloft.net) - */ - --#define EX_ST(x) \ -+#define EX_ST(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one; \ -+ .word 98b, y; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S -index 89358ee..059ea24 100644 ---- a/arch/sparc/lib/GENmemcpy.S -+++ b/arch/sparc/lib/GENmemcpy.S -@@ -4,21 +4,18 @@ - */ - - #ifdef __KERNEL__ -+#include <linux/linkage.h> - #define GLOBAL_SPARE %g7 - #else - #define GLOBAL_SPARE %g5 - #endif - - #ifndef EX_LD --#define EX_LD(x) x -+#define EX_LD(x,y) x - #endif - - #ifndef EX_ST --#define EX_ST(x) x --#endif -- --#ifndef EX_RETVAL --#define EX_RETVAL(x) x -+#define EX_ST(x,y) x - #endif - - #ifndef LOAD -@@ -45,6 +42,29 @@ - .register %g3,#scratch - - .text -+ -+#ifndef EX_RETVAL -+#define EX_RETVAL(x) x -+ENTRY(GEN_retl_o4_1) -+ add %o4, %o2, %o4 -+ retl -+ add %o4, 1, %o0 -+ENDPROC(GEN_retl_o4_1) -+ENTRY(GEN_retl_g1_8) -+ add %g1, %o2, %g1 -+ retl -+ add %g1, 8, %o0 -+ENDPROC(GEN_retl_g1_8) -+ENTRY(GEN_retl_o2_4) -+ retl -+ add %o2, 4, %o0 -+ENDPROC(GEN_retl_o2_4) -+ENTRY(GEN_retl_o2_1) -+ retl -+ add %o2, 1, %o0 -+ENDPROC(GEN_retl_o2_1) -+#endif -+ - .align 64 - - .globl FUNC_NAME -@@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - sub %g0, %o4, %o4 - sub %o2, %o4, %o2 - 1: subcc %o4, 1, %o4 -- EX_LD(LOAD(ldub, %o1, %g1)) -- EX_ST(STORE(stb, %g1, %o0)) -+ EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1) -+ EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1) - add %o1, 1, %o1 - bne,pt %XCC, 1b - add %o0, 1, %o0 -@@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - andn %o2, 0x7, %g1 - sub %o2, %g1, %o2 - 1: subcc %g1, 0x8, %g1 -- EX_LD(LOAD(ldx, %o1, %g2)) -- EX_ST(STORE(stx, %g2, %o0)) -+ EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8) -+ EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8) - add %o1, 0x8, %o1 - bne,pt %XCC, 1b - add %o0, 0x8, %o0 -@@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 1: - subcc %o2, 4, %o2 -- EX_LD(LOAD(lduw, %o1, %g1)) -- EX_ST(STORE(stw, %g1, %o1 + %o3)) -+ EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4) -+ EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4) - bgu,pt %XCC, 1b - add %o1, 4, %o1 - -@@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - .align 32 - 90: - subcc %o2, 1, %o2 -- EX_LD(LOAD(ldub, %o1, %g1)) -- EX_ST(STORE(stb, %g1, %o1 + %o3)) -+ EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1) -+ EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1) - bgu,pt %XCC, 90b - add %o1, 1, %o1 - retl -diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile -index 3269b02..4f2384a 100644 ---- a/arch/sparc/lib/Makefile -+++ b/arch/sparc/lib/Makefile -@@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o - lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o - lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o - --lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o -+lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o - lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o - - obj-$(CONFIG_SPARC64) += iomap.o -diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S -index d5242b8..b79a699 100644 ---- a/arch/sparc/lib/NG2copy_from_user.S -+++ b/arch/sparc/lib/NG2copy_from_user.S -@@ -3,19 +3,19 @@ - * Copyright (C) 2007 David S. Miller (davem@davemloft.net) - */ - --#define EX_LD(x) \ -+#define EX_LD(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_asi;\ -+ .word 98b, y; \ - .text; \ - .align 4; - --#define EX_LD_FP(x) \ -+#define EX_LD_FP(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_asi_fp;\ -+ .word 98b, y##_fp; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S -index 4e962d9..dcec55f 100644 ---- a/arch/sparc/lib/NG2copy_to_user.S -+++ b/arch/sparc/lib/NG2copy_to_user.S -@@ -3,19 +3,19 @@ - * Copyright (C) 2007 David S. Miller (davem@davemloft.net) - */ - --#define EX_ST(x) \ -+#define EX_ST(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_asi;\ -+ .word 98b, y; \ - .text; \ - .align 4; - --#define EX_ST_FP(x) \ -+#define EX_ST_FP(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_asi_fp;\ -+ .word 98b, y##_fp; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S -index d5f585d..c629dbd 100644 ---- a/arch/sparc/lib/NG2memcpy.S -+++ b/arch/sparc/lib/NG2memcpy.S -@@ -4,6 +4,7 @@ - */ - - #ifdef __KERNEL__ -+#include <linux/linkage.h> - #include <asm/visasm.h> - #include <asm/asi.h> - #define GLOBAL_SPARE %g7 -@@ -32,21 +33,17 @@ - #endif - - #ifndef EX_LD --#define EX_LD(x) x -+#define EX_LD(x,y) x - #endif - #ifndef EX_LD_FP --#define EX_LD_FP(x) x -+#define EX_LD_FP(x,y) x - #endif - - #ifndef EX_ST --#define EX_ST(x) x -+#define EX_ST(x,y) x - #endif - #ifndef EX_ST_FP --#define EX_ST_FP(x) x --#endif -- --#ifndef EX_RETVAL --#define EX_RETVAL(x) x -+#define EX_ST_FP(x,y) x - #endif - - #ifndef LOAD -@@ -140,45 +137,110 @@ - fsrc2 %x6, %f12; \ - fsrc2 %x7, %f14; - #define FREG_LOAD_1(base, x0) \ -- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) -+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) - #define FREG_LOAD_2(base, x0, x1) \ -- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ -- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); -+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); - #define FREG_LOAD_3(base, x0, x1, x2) \ -- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ -- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ -- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); -+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); - #define FREG_LOAD_4(base, x0, x1, x2, x3) \ -- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ -- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ -- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ -- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); -+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); - #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ -- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ -- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ -- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ -- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ -- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); -+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); - #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ -- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ -- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ -- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ -- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ -- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ -- EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); -+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); - #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ -- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ -- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ -- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ -- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ -- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ -- EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ -- EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); -+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ -+ EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); - - .register %g2,#scratch - .register %g3,#scratch - - .text -+#ifndef EX_RETVAL -+#define EX_RETVAL(x) x -+__restore_fp: -+ VISExitHalf -+__restore_asi: -+ retl -+ wr %g0, ASI_AIUS, %asi -+ENTRY(NG2_retl_o2) -+ ba,pt %xcc, __restore_asi -+ mov %o2, %o0 -+ENDPROC(NG2_retl_o2) -+ENTRY(NG2_retl_o2_plus_1) -+ ba,pt %xcc, __restore_asi -+ add %o2, 1, %o0 -+ENDPROC(NG2_retl_o2_plus_1) -+ENTRY(NG2_retl_o2_plus_4) -+ ba,pt %xcc, __restore_asi -+ add %o2, 4, %o0 -+ENDPROC(NG2_retl_o2_plus_4) -+ENTRY(NG2_retl_o2_plus_8) -+ ba,pt %xcc, __restore_asi -+ add %o2, 8, %o0 -+ENDPROC(NG2_retl_o2_plus_8) -+ENTRY(NG2_retl_o2_plus_o4_plus_1) -+ add %o4, 1, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG2_retl_o2_plus_o4_plus_1) -+ENTRY(NG2_retl_o2_plus_o4_plus_8) -+ add %o4, 8, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG2_retl_o2_plus_o4_plus_8) -+ENTRY(NG2_retl_o2_plus_o4_plus_16) -+ add %o4, 16, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG2_retl_o2_plus_o4_plus_16) -+ENTRY(NG2_retl_o2_plus_g1_fp) -+ ba,pt %xcc, __restore_fp -+ add %o2, %g1, %o0 -+ENDPROC(NG2_retl_o2_plus_g1_fp) -+ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) -+ add %g1, 64, %g1 -+ ba,pt %xcc, __restore_fp -+ add %o2, %g1, %o0 -+ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) -+ENTRY(NG2_retl_o2_plus_g1_plus_1) -+ add %g1, 1, %g1 -+ ba,pt %xcc, __restore_asi -+ add %o2, %g1, %o0 -+ENDPROC(NG2_retl_o2_plus_g1_plus_1) -+ENTRY(NG2_retl_o2_and_7_plus_o4) -+ and %o2, 7, %o2 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG2_retl_o2_and_7_plus_o4) -+ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) -+ and %o2, 7, %o2 -+ add %o4, 8, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) -+#endif -+ - .align 64 - - .globl FUNC_NAME -@@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - sub %g0, %o4, %o4 ! bytes to align dst - sub %o2, %o4, %o2 - 1: subcc %o4, 1, %o4 -- EX_LD(LOAD(ldub, %o1, %g1)) -- EX_ST(STORE(stb, %g1, %o0)) -+ EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) -+ EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) - add %o1, 1, %o1 - bne,pt %XCC, 1b - add %o0, 1, %o0 -@@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - nop - /* fall through for 0 < low bits < 8 */ - 110: sub %o4, 64, %g2 -- EX_LD_FP(LOAD_BLK(%g2, %f0)) --1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) -- EX_LD_FP(LOAD_BLK(%o4, %f16)) -+ EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) -+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) -+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) - FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) -- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) -+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) - FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 -@@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 120: sub %o4, 56, %g2 - FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) --1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) -- EX_LD_FP(LOAD_BLK(%o4, %f16)) -+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) -+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) - FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) -- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) -+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) - FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 -@@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 130: sub %o4, 48, %g2 - FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) --1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) -- EX_LD_FP(LOAD_BLK(%o4, %f16)) -+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) -+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) - FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) -- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) -+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) - FREG_MOVE_6(f20, f22, f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 -@@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 140: sub %o4, 40, %g2 - FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) --1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) -- EX_LD_FP(LOAD_BLK(%o4, %f16)) -+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) -+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) - FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) -- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) -+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) - FREG_MOVE_5(f22, f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 -@@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 150: sub %o4, 32, %g2 - FREG_LOAD_4(%g2, f0, f2, f4, f6) --1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) -- EX_LD_FP(LOAD_BLK(%o4, %f16)) -+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) -+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) - FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) -- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) -+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) - FREG_MOVE_4(f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 -@@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 160: sub %o4, 24, %g2 - FREG_LOAD_3(%g2, f0, f2, f4) --1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) -- EX_LD_FP(LOAD_BLK(%o4, %f16)) -+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) -+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) - FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) -- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) -+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) - FREG_MOVE_3(f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 -@@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 170: sub %o4, 16, %g2 - FREG_LOAD_2(%g2, f0, f2) --1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) -- EX_LD_FP(LOAD_BLK(%o4, %f16)) -+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) -+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) - FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) -- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) -+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) - FREG_MOVE_2(f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 -@@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 180: sub %o4, 8, %g2 - FREG_LOAD_1(%g2, f0) --1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) -- EX_LD_FP(LOAD_BLK(%o4, %f16)) -+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) -+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) - FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) -- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) -+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) - FREG_MOVE_1(f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 -@@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - nop - - 190: --1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) -+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) - subcc %g1, 64, %g1 -- EX_LD_FP(LOAD_BLK(%o4, %f0)) -- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) -+ EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) -+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) - add %o4, 64, %o4 - bne,pt %xcc, 1b - LOAD(prefetch, %o4 + 64, #one_read) -@@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - andn %o2, 0xf, %o4 - and %o2, 0xf, %o2 - 1: subcc %o4, 0x10, %o4 -- EX_LD(LOAD(ldx, %o1, %o5)) -+ EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) - add %o1, 0x08, %o1 -- EX_LD(LOAD(ldx, %o1, %g1)) -+ EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) - sub %o1, 0x08, %o1 -- EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) -+ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) - add %o1, 0x8, %o1 -- EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) -+ EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) - bgu,pt %XCC, 1b - add %o1, 0x8, %o1 - 73: andcc %o2, 0x8, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x8, %o2 -- EX_LD(LOAD(ldx, %o1, %o5)) -- EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) -+ EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) -+ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) - add %o1, 0x8, %o1 - 1: andcc %o2, 0x4, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x4, %o2 -- EX_LD(LOAD(lduw, %o1, %o5)) -- EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) -+ EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) -+ EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) - add %o1, 0x4, %o1 - 1: cmp %o2, 0 - be,pt %XCC, 85f -@@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - sub %o2, %g1, %o2 - - 1: subcc %g1, 1, %g1 -- EX_LD(LOAD(ldub, %o1, %o5)) -- EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) -+ EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) -+ EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) - bgu,pt %icc, 1b - add %o1, 1, %o1 - -@@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 8: mov 64, GLOBAL_SPARE - andn %o1, 0x7, %o1 -- EX_LD(LOAD(ldx, %o1, %g2)) -+ EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) - sub GLOBAL_SPARE, %g1, GLOBAL_SPARE - andn %o2, 0x7, %o4 - sllx %g2, %g1, %g2 - 1: add %o1, 0x8, %o1 -- EX_LD(LOAD(ldx, %o1, %g3)) -+ EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) - subcc %o4, 0x8, %o4 - srlx %g3, GLOBAL_SPARE, %o5 - or %o5, %g2, %o5 -- EX_ST(STORE(stx, %o5, %o0)) -+ EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) - add %o0, 0x8, %o0 - bgu,pt %icc, 1b - sllx %g3, %g1, %g2 -@@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 1: - subcc %o2, 4, %o2 -- EX_LD(LOAD(lduw, %o1, %g1)) -- EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) -+ EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) -+ EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) - bgu,pt %XCC, 1b - add %o1, 4, %o1 - -@@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - .align 32 - 90: - subcc %o2, 1, %o2 -- EX_LD(LOAD(ldub, %o1, %g1)) -- EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) -+ EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) -+ EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) - bgu,pt %XCC, 90b - add %o1, 1, %o1 - retl -diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S -index 2e8ee7a..16a286c 100644 ---- a/arch/sparc/lib/NG4copy_from_user.S -+++ b/arch/sparc/lib/NG4copy_from_user.S -@@ -3,19 +3,19 @@ - * Copyright (C) 2012 David S. Miller (davem@davemloft.net) - */ - --#define EX_LD(x) \ -+#define EX_LD(x, y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_asi;\ -+ .word 98b, y; \ - .text; \ - .align 4; - --#define EX_LD_FP(x) \ -+#define EX_LD_FP(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_asi_fp;\ -+ .word 98b, y##_fp; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S -index be0bf45..6b0276f 100644 ---- a/arch/sparc/lib/NG4copy_to_user.S -+++ b/arch/sparc/lib/NG4copy_to_user.S -@@ -3,19 +3,19 @@ - * Copyright (C) 2012 David S. Miller (davem@davemloft.net) - */ - --#define EX_ST(x) \ -+#define EX_ST(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_asi;\ -+ .word 98b, y; \ - .text; \ - .align 4; - --#define EX_ST_FP(x) \ -+#define EX_ST_FP(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_asi_fp;\ -+ .word 98b, y##_fp; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S -index 8e13ee1..75bb93b 100644 ---- a/arch/sparc/lib/NG4memcpy.S -+++ b/arch/sparc/lib/NG4memcpy.S -@@ -4,6 +4,7 @@ - */ - - #ifdef __KERNEL__ -+#include <linux/linkage.h> - #include <asm/visasm.h> - #include <asm/asi.h> - #define GLOBAL_SPARE %g7 -@@ -46,22 +47,19 @@ - #endif - - #ifndef EX_LD --#define EX_LD(x) x -+#define EX_LD(x,y) x - #endif - #ifndef EX_LD_FP --#define EX_LD_FP(x) x -+#define EX_LD_FP(x,y) x - #endif - - #ifndef EX_ST --#define EX_ST(x) x -+#define EX_ST(x,y) x - #endif - #ifndef EX_ST_FP --#define EX_ST_FP(x) x -+#define EX_ST_FP(x,y) x - #endif - --#ifndef EX_RETVAL --#define EX_RETVAL(x) x --#endif - - #ifndef LOAD - #define LOAD(type,addr,dest) type [addr], dest -@@ -94,6 +92,158 @@ - .register %g3,#scratch - - .text -+#ifndef EX_RETVAL -+#define EX_RETVAL(x) x -+__restore_asi_fp: -+ VISExitHalf -+__restore_asi: -+ retl -+ wr %g0, ASI_AIUS, %asi -+ -+ENTRY(NG4_retl_o2) -+ ba,pt %xcc, __restore_asi -+ mov %o2, %o0 -+ENDPROC(NG4_retl_o2) -+ENTRY(NG4_retl_o2_plus_1) -+ ba,pt %xcc, __restore_asi -+ add %o2, 1, %o0 -+ENDPROC(NG4_retl_o2_plus_1) -+ENTRY(NG4_retl_o2_plus_4) -+ ba,pt %xcc, __restore_asi -+ add %o2, 4, %o0 -+ENDPROC(NG4_retl_o2_plus_4) -+ENTRY(NG4_retl_o2_plus_o5) -+ ba,pt %xcc, __restore_asi -+ add %o2, %o5, %o0 -+ENDPROC(NG4_retl_o2_plus_o5) -+ENTRY(NG4_retl_o2_plus_o5_plus_4) -+ add %o5, 4, %o5 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o5, %o0 -+ENDPROC(NG4_retl_o2_plus_o5_plus_4) -+ENTRY(NG4_retl_o2_plus_o5_plus_8) -+ add %o5, 8, %o5 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o5, %o0 -+ENDPROC(NG4_retl_o2_plus_o5_plus_8) -+ENTRY(NG4_retl_o2_plus_o5_plus_16) -+ add %o5, 16, %o5 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o5, %o0 -+ENDPROC(NG4_retl_o2_plus_o5_plus_16) -+ENTRY(NG4_retl_o2_plus_o5_plus_24) -+ add %o5, 24, %o5 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o5, %o0 -+ENDPROC(NG4_retl_o2_plus_o5_plus_24) -+ENTRY(NG4_retl_o2_plus_o5_plus_32) -+ add %o5, 32, %o5 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o5, %o0 -+ENDPROC(NG4_retl_o2_plus_o5_plus_32) -+ENTRY(NG4_retl_o2_plus_g1) -+ ba,pt %xcc, __restore_asi -+ add %o2, %g1, %o0 -+ENDPROC(NG4_retl_o2_plus_g1) -+ENTRY(NG4_retl_o2_plus_g1_plus_1) -+ add %g1, 1, %g1 -+ ba,pt %xcc, __restore_asi -+ add %o2, %g1, %o0 -+ENDPROC(NG4_retl_o2_plus_g1_plus_1) -+ENTRY(NG4_retl_o2_plus_g1_plus_8) -+ add %g1, 8, %g1 -+ ba,pt %xcc, __restore_asi -+ add %o2, %g1, %o0 -+ENDPROC(NG4_retl_o2_plus_g1_plus_8) -+ENTRY(NG4_retl_o2_plus_o4) -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4) -+ENTRY(NG4_retl_o2_plus_o4_plus_8) -+ add %o4, 8, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_8) -+ENTRY(NG4_retl_o2_plus_o4_plus_16) -+ add %o4, 16, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_16) -+ENTRY(NG4_retl_o2_plus_o4_plus_24) -+ add %o4, 24, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_24) -+ENTRY(NG4_retl_o2_plus_o4_plus_32) -+ add %o4, 32, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_32) -+ENTRY(NG4_retl_o2_plus_o4_plus_40) -+ add %o4, 40, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_40) -+ENTRY(NG4_retl_o2_plus_o4_plus_48) -+ add %o4, 48, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_48) -+ENTRY(NG4_retl_o2_plus_o4_plus_56) -+ add %o4, 56, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_56) -+ENTRY(NG4_retl_o2_plus_o4_plus_64) -+ add %o4, 64, %o4 -+ ba,pt %xcc, __restore_asi -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_64) -+ENTRY(NG4_retl_o2_plus_o4_fp) -+ ba,pt %xcc, __restore_asi_fp -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_fp) -+ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) -+ add %o4, 8, %o4 -+ ba,pt %xcc, __restore_asi_fp -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) -+ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) -+ add %o4, 16, %o4 -+ ba,pt %xcc, __restore_asi_fp -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) -+ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) -+ add %o4, 24, %o4 -+ ba,pt %xcc, __restore_asi_fp -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) -+ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) -+ add %o4, 32, %o4 -+ ba,pt %xcc, __restore_asi_fp -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) -+ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) -+ add %o4, 40, %o4 -+ ba,pt %xcc, __restore_asi_fp -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) -+ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) -+ add %o4, 48, %o4 -+ ba,pt %xcc, __restore_asi_fp -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) -+ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) -+ add %o4, 56, %o4 -+ ba,pt %xcc, __restore_asi_fp -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) -+ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) -+ add %o4, 64, %o4 -+ ba,pt %xcc, __restore_asi_fp -+ add %o2, %o4, %o0 -+ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) -+#endif - .align 64 - - .globl FUNC_NAME -@@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - brz,pt %g1, 51f - sub %o2, %g1, %o2 - --1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) -+ -+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) - add %o1, 1, %o1 - subcc %g1, 1, %g1 - add %o0, 1, %o0 - bne,pt %icc, 1b -- EX_ST(STORE(stb, %g2, %o0 - 0x01)) -+ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) - - 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) - LOAD(prefetch, %o1 + 0x080, #n_reads_strong) -@@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - brz,pt %g1, .Llarge_aligned - sub %o2, %g1, %o2 - --1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) -+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) - add %o1, 8, %o1 - subcc %g1, 8, %g1 - add %o0, 8, %o0 - bne,pt %icc, 1b -- EX_ST(STORE(stx, %g2, %o0 - 0x08)) -+ EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) - - .Llarge_aligned: - /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ - andn %o2, 0x3f, %o4 - sub %o2, %o4, %o2 - --1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) -+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) - add %o1, 0x40, %o1 -- EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) -+ EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) - subcc %o4, 0x40, %o4 -- EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) -- EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) -- EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) -- EX_ST(STORE_INIT(%g1, %o0)) -+ EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) -+ EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) -+ EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) -+ EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) - add %o0, 0x08, %o0 -- EX_ST(STORE_INIT(%g2, %o0)) -+ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) - add %o0, 0x08, %o0 -- EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) -- EX_ST(STORE_INIT(%g3, %o0)) -+ EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) -+ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) - add %o0, 0x08, %o0 -- EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) -- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) -+ EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) -+ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) - add %o0, 0x08, %o0 -- EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) -- EX_ST(STORE_INIT(%o5, %o0)) -+ EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) -+ EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) - add %o0, 0x08, %o0 -- EX_ST(STORE_INIT(%g2, %o0)) -+ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) - add %o0, 0x08, %o0 -- EX_ST(STORE_INIT(%g3, %o0)) -+ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) - add %o0, 0x08, %o0 -- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) -+ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) - add %o0, 0x08, %o0 - bne,pt %icc, 1b - LOAD(prefetch, %o1 + 0x200, #n_reads_strong) -@@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - sub %o2, %o4, %o2 - alignaddr %o1, %g0, %g1 - add %o1, %o4, %o1 -- EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) --1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) -+ EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) -+1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) - subcc %o4, 0x40, %o4 -- EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) -- EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) -- EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) -- EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) -- EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) -- EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) -+ EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) -+ EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) -+ EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) -+ EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) -+ EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) -+ EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) - faligndata %f0, %f2, %f16 -- EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) -+ EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) - faligndata %f2, %f4, %f18 - add %g1, 0x40, %g1 - faligndata %f4, %f6, %f20 -@@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - faligndata %f10, %f12, %f26 - faligndata %f12, %f14, %f28 - faligndata %f14, %f0, %f30 -- EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) -- EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) -- EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) -- EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) -- EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) -- EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) -- EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) -- EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) -+ EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) -+ EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) -+ EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) -+ EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) -+ EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) -+ EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) -+ EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) -+ EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) - add %o0, 0x40, %o0 - bne,pt %icc, 1b - LOAD(prefetch, %g1 + 0x200, #n_reads_strong) -@@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - andncc %o2, 0x20 - 1, %o5 - be,pn %icc, 2f - sub %o2, %o5, %o2 --1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) -- EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) -- EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) -- EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) -+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) -+ EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) -+ EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) -+ EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) - add %o1, 0x20, %o1 - subcc %o5, 0x20, %o5 -- EX_ST(STORE(stx, %g1, %o0 + 0x00)) -- EX_ST(STORE(stx, %g2, %o0 + 0x08)) -- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) -- EX_ST(STORE(stx, %o4, %o0 + 0x18)) -+ EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) -+ EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) -+ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) -+ EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) - bne,pt %icc, 1b - add %o0, 0x20, %o0 - 2: andcc %o2, 0x18, %o5 - be,pt %icc, 3f - sub %o2, %o5, %o2 --1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) -+ -+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) - add %o1, 0x08, %o1 - add %o0, 0x08, %o0 - subcc %o5, 0x08, %o5 - bne,pt %icc, 1b -- EX_ST(STORE(stx, %g1, %o0 - 0x08)) -+ EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) - 3: brz,pt %o2, .Lexit - cmp %o2, 0x04 - bl,pn %icc, .Ltiny - nop -- EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) -+ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) - add %o1, 0x04, %o1 - add %o0, 0x04, %o0 - subcc %o2, 0x04, %o2 - bne,pn %icc, .Ltiny -- EX_ST(STORE(stw, %g1, %o0 - 0x04)) -+ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) - ba,a,pt %icc, .Lexit - .Lmedium_unaligned: - /* First get dest 8 byte aligned. */ -@@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - brz,pt %g1, 2f - sub %o2, %g1, %o2 - --1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) -+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) - add %o1, 1, %o1 - subcc %g1, 1, %g1 - add %o0, 1, %o0 - bne,pt %icc, 1b -- EX_ST(STORE(stb, %g2, %o0 - 0x01)) -+ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) - 2: - and %o1, 0x7, %g1 - brz,pn %g1, .Lmedium_noprefetch -@@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - mov 64, %g2 - sub %g2, %g1, %g2 - andn %o1, 0x7, %o1 -- EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) -+ EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) - sllx %o4, %g1, %o4 - andn %o2, 0x08 - 1, %o5 - sub %o2, %o5, %o2 --1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) -+1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) - add %o1, 0x08, %o1 - subcc %o5, 0x08, %o5 - srlx %g3, %g2, GLOBAL_SPARE - or GLOBAL_SPARE, %o4, GLOBAL_SPARE -- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) -+ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) - add %o0, 0x08, %o0 - bne,pt %icc, 1b - sllx %g3, %g1, %o4 -@@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - ba,pt %icc, .Lsmall_unaligned - - .Ltiny: -- EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) -+ EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) - subcc %o2, 1, %o2 - be,pn %icc, .Lexit -- EX_ST(STORE(stb, %g1, %o0 + 0x00)) -- EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) -+ EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) -+ EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) - subcc %o2, 1, %o2 - be,pn %icc, .Lexit -- EX_ST(STORE(stb, %g1, %o0 + 0x01)) -- EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) -+ EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) -+ EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) - ba,pt %icc, .Lexit -- EX_ST(STORE(stb, %g1, %o0 + 0x02)) -+ EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) - - .Lsmall: - andcc %g2, 0x3, %g0 -@@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - andn %o2, 0x4 - 1, %o5 - sub %o2, %o5, %o2 - 1: -- EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) -+ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) - add %o1, 0x04, %o1 - subcc %o5, 0x04, %o5 - add %o0, 0x04, %o0 - bne,pt %icc, 1b -- EX_ST(STORE(stw, %g1, %o0 - 0x04)) -+ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) - brz,pt %o2, .Lexit - nop - ba,a,pt %icc, .Ltiny - - .Lsmall_unaligned: --1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) -+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) - add %o1, 1, %o1 - add %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %icc, 1b -- EX_ST(STORE(stb, %g1, %o0 - 0x01)) -+ EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) - ba,a,pt %icc, .Lexit - .size FUNC_NAME, .-FUNC_NAME -diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S -index 5d1e4d1..9cd42fc 100644 ---- a/arch/sparc/lib/NGcopy_from_user.S -+++ b/arch/sparc/lib/NGcopy_from_user.S -@@ -3,11 +3,11 @@ - * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) - */ - --#define EX_LD(x) \ -+#define EX_LD(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __ret_one_asi;\ -+ .word 98b, y; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S -index ff630dc..5c358af 100644 ---- a/arch/sparc/lib/NGcopy_to_user.S -+++ b/arch/sparc/lib/NGcopy_to_user.S -@@ -3,11 +3,11 @@ - * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) - */ - --#define EX_ST(x) \ -+#define EX_ST(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __ret_one_asi;\ -+ .word 98b, y; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S -index 96a14ca..d88c4ed 100644 ---- a/arch/sparc/lib/NGmemcpy.S -+++ b/arch/sparc/lib/NGmemcpy.S -@@ -4,6 +4,7 @@ - */ - - #ifdef __KERNEL__ -+#include <linux/linkage.h> - #include <asm/asi.h> - #include <asm/thread_info.h> - #define GLOBAL_SPARE %g7 -@@ -27,15 +28,11 @@ - #endif - - #ifndef EX_LD --#define EX_LD(x) x -+#define EX_LD(x,y) x - #endif - - #ifndef EX_ST --#define EX_ST(x) x --#endif -- --#ifndef EX_RETVAL --#define EX_RETVAL(x) x -+#define EX_ST(x,y) x - #endif - - #ifndef LOAD -@@ -79,6 +76,92 @@ - .register %g3,#scratch - - .text -+#ifndef EX_RETVAL -+#define EX_RETVAL(x) x -+__restore_asi: -+ ret -+ wr %g0, ASI_AIUS, %asi -+ restore -+ENTRY(NG_ret_i2_plus_i4_plus_1) -+ ba,pt %xcc, __restore_asi -+ add %i2, %i5, %i0 -+ENDPROC(NG_ret_i2_plus_i4_plus_1) -+ENTRY(NG_ret_i2_plus_g1) -+ ba,pt %xcc, __restore_asi -+ add %i2, %g1, %i0 -+ENDPROC(NG_ret_i2_plus_g1) -+ENTRY(NG_ret_i2_plus_g1_minus_8) -+ sub %g1, 8, %g1 -+ ba,pt %xcc, __restore_asi -+ add %i2, %g1, %i0 -+ENDPROC(NG_ret_i2_plus_g1_minus_8) -+ENTRY(NG_ret_i2_plus_g1_minus_16) -+ sub %g1, 16, %g1 -+ ba,pt %xcc, __restore_asi -+ add %i2, %g1, %i0 -+ENDPROC(NG_ret_i2_plus_g1_minus_16) -+ENTRY(NG_ret_i2_plus_g1_minus_24) -+ sub %g1, 24, %g1 -+ ba,pt %xcc, __restore_asi -+ add %i2, %g1, %i0 -+ENDPROC(NG_ret_i2_plus_g1_minus_24) -+ENTRY(NG_ret_i2_plus_g1_minus_32) -+ sub %g1, 32, %g1 -+ ba,pt %xcc, __restore_asi -+ add %i2, %g1, %i0 -+ENDPROC(NG_ret_i2_plus_g1_minus_32) -+ENTRY(NG_ret_i2_plus_g1_minus_40) -+ sub %g1, 40, %g1 -+ ba,pt %xcc, __restore_asi -+ add %i2, %g1, %i0 -+ENDPROC(NG_ret_i2_plus_g1_minus_40) -+ENTRY(NG_ret_i2_plus_g1_minus_48) -+ sub %g1, 48, %g1 -+ ba,pt %xcc, __restore_asi -+ add %i2, %g1, %i0 -+ENDPROC(NG_ret_i2_plus_g1_minus_48) -+ENTRY(NG_ret_i2_plus_g1_minus_56) -+ sub %g1, 56, %g1 -+ ba,pt %xcc, __restore_asi -+ add %i2, %g1, %i0 -+ENDPROC(NG_ret_i2_plus_g1_minus_56) -+ENTRY(NG_ret_i2_plus_i4) -+ ba,pt %xcc, __restore_asi -+ add %i2, %i4, %i0 -+ENDPROC(NG_ret_i2_plus_i4) -+ENTRY(NG_ret_i2_plus_i4_minus_8) -+ sub %i4, 8, %i4 -+ ba,pt %xcc, __restore_asi -+ add %i2, %i4, %i0 -+ENDPROC(NG_ret_i2_plus_i4_minus_8) -+ENTRY(NG_ret_i2_plus_8) -+ ba,pt %xcc, __restore_asi -+ add %i2, 8, %i0 -+ENDPROC(NG_ret_i2_plus_8) -+ENTRY(NG_ret_i2_plus_4) -+ ba,pt %xcc, __restore_asi -+ add %i2, 4, %i0 -+ENDPROC(NG_ret_i2_plus_4) -+ENTRY(NG_ret_i2_plus_1) -+ ba,pt %xcc, __restore_asi -+ add %i2, 1, %i0 -+ENDPROC(NG_ret_i2_plus_1) -+ENTRY(NG_ret_i2_plus_g1_plus_1) -+ add %g1, 1, %g1 -+ ba,pt %xcc, __restore_asi -+ add %i2, %g1, %i0 -+ENDPROC(NG_ret_i2_plus_g1_plus_1) -+ENTRY(NG_ret_i2) -+ ba,pt %xcc, __restore_asi -+ mov %i2, %i0 -+ENDPROC(NG_ret_i2) -+ENTRY(NG_ret_i2_and_7_plus_i4) -+ and %i2, 7, %i2 -+ ba,pt %xcc, __restore_asi -+ add %i2, %i4, %i0 -+ENDPROC(NG_ret_i2_and_7_plus_i4) -+#endif -+ - .align 64 - - .globl FUNC_NAME -@@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - sub %g0, %i4, %i4 ! bytes to align dst - sub %i2, %i4, %i2 - 1: subcc %i4, 1, %i4 -- EX_LD(LOAD(ldub, %i1, %g1)) -- EX_ST(STORE(stb, %g1, %o0)) -+ EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1) -+ EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1) - add %i1, 1, %i1 - bne,pt %XCC, 1b - add %o0, 1, %o0 -@@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - and %i4, 0x7, GLOBAL_SPARE - sll GLOBAL_SPARE, 3, GLOBAL_SPARE - mov 64, %i5 -- EX_LD(LOAD_TWIN(%i1, %g2, %g3)) -+ EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1) - sub %i5, GLOBAL_SPARE, %i5 - mov 16, %o4 - mov 32, %o5 -@@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - srlx WORD3, PRE_SHIFT, TMP; \ - or WORD2, TMP, WORD2; - --8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) -+8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) - MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) - LOAD(prefetch, %i1 + %i3, #one_read) - -- EX_ST(STORE_INIT(%g2, %o0 + 0x00)) -- EX_ST(STORE_INIT(%g3, %o0 + 0x08)) -+ EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1) -+ EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - -- EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) -+ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) - MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - -- EX_ST(STORE_INIT(%o2, %o0 + 0x10)) -- EX_ST(STORE_INIT(%o3, %o0 + 0x18)) -+ EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) -+ EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - -- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) -+ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) - MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) - -- EX_ST(STORE_INIT(%g2, %o0 + 0x20)) -- EX_ST(STORE_INIT(%g3, %o0 + 0x28)) -+ EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) -+ EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - -- EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) -+ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) - add %i1, 64, %i1 - MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - -- EX_ST(STORE_INIT(%o2, %o0 + 0x30)) -- EX_ST(STORE_INIT(%o3, %o0 + 0x38)) -+ EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) -+ EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) - - subcc %g1, 64, %g1 - bne,pt %XCC, 8b -@@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - ba,pt %XCC, 60f - add %i1, %i4, %i1 - --9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) -+9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) - MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) - LOAD(prefetch, %i1 + %i3, #one_read) - -- EX_ST(STORE_INIT(%g3, %o0 + 0x00)) -- EX_ST(STORE_INIT(%o2, %o0 + 0x08)) -+ EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1) -+ EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - -- EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) -+ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) - MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - -- EX_ST(STORE_INIT(%o3, %o0 + 0x10)) -- EX_ST(STORE_INIT(%g2, %o0 + 0x18)) -+ EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) -+ EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - -- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) -+ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) - MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) - -- EX_ST(STORE_INIT(%g3, %o0 + 0x20)) -- EX_ST(STORE_INIT(%o2, %o0 + 0x28)) -+ EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) -+ EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - -- EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) -+ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) - add %i1, 64, %i1 - MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - -- EX_ST(STORE_INIT(%o3, %o0 + 0x30)) -- EX_ST(STORE_INIT(%g2, %o0 + 0x38)) -+ EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) -+ EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) - - subcc %g1, 64, %g1 - bne,pt %XCC, 9b -@@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - * one twin load ahead, then add 8 back into source when - * we finish the loop. - */ -- EX_LD(LOAD_TWIN(%i1, %o4, %o5)) -+ EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1) - mov 16, %o7 - mov 32, %g2 - mov 48, %g3 - mov 64, %o1 --1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) -+1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) - LOAD(prefetch, %i1 + %o1, #one_read) -- EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line -- EX_ST(STORE_INIT(%o2, %o0 + 0x08)) -- EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) -- EX_ST(STORE_INIT(%o3, %o0 + 0x10)) -- EX_ST(STORE_INIT(%o4, %o0 + 0x18)) -- EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) -- EX_ST(STORE_INIT(%o5, %o0 + 0x20)) -- EX_ST(STORE_INIT(%o2, %o0 + 0x28)) -- EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) -+ EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line -+ EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) -+ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) -+ EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) -+ EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) -+ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) -+ EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) -+ EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) -+ EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48) - add %i1, 64, %i1 -- EX_ST(STORE_INIT(%o3, %o0 + 0x30)) -- EX_ST(STORE_INIT(%o4, %o0 + 0x38)) -+ EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) -+ EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) - subcc %g1, 64, %g1 - bne,pt %XCC, 1b - add %o0, 64, %o0 -@@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - mov 32, %g2 - mov 48, %g3 - mov 64, %o1 --1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) -- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) -+1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1) -+ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) - LOAD(prefetch, %i1 + %o1, #one_read) -- EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line -- EX_ST(STORE_INIT(%o5, %o0 + 0x08)) -- EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) -- EX_ST(STORE_INIT(%o2, %o0 + 0x10)) -- EX_ST(STORE_INIT(%o3, %o0 + 0x18)) -- EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) -+ EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line -+ EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) -+ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) -+ EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) -+ EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) -+ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) - add %i1, 64, %i1 -- EX_ST(STORE_INIT(%o4, %o0 + 0x20)) -- EX_ST(STORE_INIT(%o5, %o0 + 0x28)) -- EX_ST(STORE_INIT(%o2, %o0 + 0x30)) -- EX_ST(STORE_INIT(%o3, %o0 + 0x38)) -+ EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) -+ EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) -+ EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) -+ EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) - subcc %g1, 64, %g1 - bne,pt %XCC, 1b - add %o0, 64, %o0 -@@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - andn %i2, 0xf, %i4 - and %i2, 0xf, %i2 - 1: subcc %i4, 0x10, %i4 -- EX_LD(LOAD(ldx, %i1, %o4)) -+ EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) - add %i1, 0x08, %i1 -- EX_LD(LOAD(ldx, %i1, %g1)) -+ EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) - sub %i1, 0x08, %i1 -- EX_ST(STORE(stx, %o4, %i1 + %i3)) -+ EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) - add %i1, 0x8, %i1 -- EX_ST(STORE(stx, %g1, %i1 + %i3)) -+ EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) - bgu,pt %XCC, 1b - add %i1, 0x8, %i1 - 73: andcc %i2, 0x8, %g0 - be,pt %XCC, 1f - nop - sub %i2, 0x8, %i2 -- EX_LD(LOAD(ldx, %i1, %o4)) -- EX_ST(STORE(stx, %o4, %i1 + %i3)) -+ EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8) -+ EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8) - add %i1, 0x8, %i1 - 1: andcc %i2, 0x4, %g0 - be,pt %XCC, 1f - nop - sub %i2, 0x4, %i2 -- EX_LD(LOAD(lduw, %i1, %i5)) -- EX_ST(STORE(stw, %i5, %i1 + %i3)) -+ EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4) -+ EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4) - add %i1, 0x4, %i1 - 1: cmp %i2, 0 - be,pt %XCC, 85f -@@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - sub %i2, %g1, %i2 - - 1: subcc %g1, 1, %g1 -- EX_LD(LOAD(ldub, %i1, %i5)) -- EX_ST(STORE(stb, %i5, %i1 + %i3)) -+ EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1) -+ EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1) - bgu,pt %icc, 1b - add %i1, 1, %i1 - -@@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - - 8: mov 64, %i3 - andn %i1, 0x7, %i1 -- EX_LD(LOAD(ldx, %i1, %g2)) -+ EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2) - sub %i3, %g1, %i3 - andn %i2, 0x7, %i4 - sllx %g2, %g1, %g2 - 1: add %i1, 0x8, %i1 -- EX_LD(LOAD(ldx, %i1, %g3)) -+ EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4) - subcc %i4, 0x8, %i4 - srlx %g3, %i3, %i5 - or %i5, %g2, %i5 -- EX_ST(STORE(stx, %i5, %o0)) -+ EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) - add %o0, 0x8, %o0 - bgu,pt %icc, 1b - sllx %g3, %g1, %g2 -@@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - - 1: - subcc %i2, 4, %i2 -- EX_LD(LOAD(lduw, %i1, %g1)) -- EX_ST(STORE(stw, %g1, %i1 + %i3)) -+ EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4) -+ EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4) - bgu,pt %XCC, 1b - add %i1, 4, %i1 - -@@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ - .align 32 - 90: - subcc %i2, 1, %i2 -- EX_LD(LOAD(ldub, %i1, %g1)) -- EX_ST(STORE(stb, %g1, %i1 + %i3)) -+ EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1) -+ EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1) - bgu,pt %XCC, 90b - add %i1, 1, %i1 - ret -diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S -index ecc5692..bb6ff73 100644 ---- a/arch/sparc/lib/U1copy_from_user.S -+++ b/arch/sparc/lib/U1copy_from_user.S -@@ -3,19 +3,19 @@ - * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) - */ - --#define EX_LD(x) \ -+#define EX_LD(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one; \ -+ .word 98b, y; \ - .text; \ - .align 4; - --#define EX_LD_FP(x) \ -+#define EX_LD_FP(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_fp;\ -+ .word 98b, y; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S -index 9eea392..ed92ce73 100644 ---- a/arch/sparc/lib/U1copy_to_user.S -+++ b/arch/sparc/lib/U1copy_to_user.S -@@ -3,19 +3,19 @@ - * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) - */ - --#define EX_ST(x) \ -+#define EX_ST(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one; \ -+ .word 98b, y; \ - .text; \ - .align 4; - --#define EX_ST_FP(x) \ -+#define EX_ST_FP(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_fp;\ -+ .word 98b, y; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S -index 3e6209e..f30d2ab 100644 ---- a/arch/sparc/lib/U1memcpy.S -+++ b/arch/sparc/lib/U1memcpy.S -@@ -5,6 +5,7 @@ - */ - - #ifdef __KERNEL__ -+#include <linux/linkage.h> - #include <asm/visasm.h> - #include <asm/asi.h> - #define GLOBAL_SPARE g7 -@@ -23,21 +24,17 @@ - #endif - - #ifndef EX_LD --#define EX_LD(x) x -+#define EX_LD(x,y) x - #endif - #ifndef EX_LD_FP --#define EX_LD_FP(x) x -+#define EX_LD_FP(x,y) x - #endif - - #ifndef EX_ST --#define EX_ST(x) x -+#define EX_ST(x,y) x - #endif - #ifndef EX_ST_FP --#define EX_ST_FP(x) x --#endif -- --#ifndef EX_RETVAL --#define EX_RETVAL(x) x -+#define EX_ST_FP(x,y) x - #endif - - #ifndef LOAD -@@ -78,53 +75,169 @@ - faligndata %f7, %f8, %f60; \ - faligndata %f8, %f9, %f62; - --#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ -- EX_LD_FP(LOAD_BLK(%src, %fdest)); \ -- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ -- add %src, 0x40, %src; \ -- subcc %len, 0x40, %len; \ -- be,pn %xcc, jmptgt; \ -- add %dest, 0x40, %dest; \ -- --#define LOOP_CHUNK1(src, dest, len, branch_dest) \ -- MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) --#define LOOP_CHUNK2(src, dest, len, branch_dest) \ -- MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) --#define LOOP_CHUNK3(src, dest, len, branch_dest) \ -- MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) -+#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ -+ EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ -+ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ -+ add %src, 0x40, %src; \ -+ subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ -+ be,pn %xcc, jmptgt; \ -+ add %dest, 0x40, %dest; \ -+ -+#define LOOP_CHUNK1(src, dest, branch_dest) \ -+ MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) -+#define LOOP_CHUNK2(src, dest, branch_dest) \ -+ MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) -+#define LOOP_CHUNK3(src, dest, branch_dest) \ -+ MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) - - #define DO_SYNC membar #Sync; - #define STORE_SYNC(dest, fsrc) \ -- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ -+ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ - add %dest, 0x40, %dest; \ - DO_SYNC - - #define STORE_JUMP(dest, fsrc, target) \ -- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ -+ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ - add %dest, 0x40, %dest; \ - ba,pt %xcc, target; \ - nop; - --#define FINISH_VISCHUNK(dest, f0, f1, left) \ -- subcc %left, 8, %left;\ -- bl,pn %xcc, 95f; \ -- faligndata %f0, %f1, %f48; \ -- EX_ST_FP(STORE(std, %f48, %dest)); \ -+#define FINISH_VISCHUNK(dest, f0, f1) \ -+ subcc %g3, 8, %g3; \ -+ bl,pn %xcc, 95f; \ -+ faligndata %f0, %f1, %f48; \ -+ EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ - add %dest, 8, %dest; - --#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ -- subcc %left, 8, %left; \ -- bl,pn %xcc, 95f; \ -+#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ -+ subcc %g3, 8, %g3; \ -+ bl,pn %xcc, 95f; \ - fsrc2 %f0, %f1; - --#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ -- UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ -+#define UNEVEN_VISCHUNK(dest, f0, f1) \ -+ UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ - ba,a,pt %xcc, 93f; - - .register %g2,#scratch - .register %g3,#scratch - - .text -+#ifndef EX_RETVAL -+#define EX_RETVAL(x) x -+ENTRY(U1_g1_1_fp) -+ VISExitHalf -+ add %g1, 1, %g1 -+ add %g1, %g2, %g1 -+ retl -+ add %g1, %o2, %o0 -+ENDPROC(U1_g1_1_fp) -+ENTRY(U1_g2_0_fp) -+ VISExitHalf -+ retl -+ add %g2, %o2, %o0 -+ENDPROC(U1_g2_0_fp) -+ENTRY(U1_g2_8_fp) -+ VISExitHalf -+ add %g2, 8, %g2 -+ retl -+ add %g2, %o2, %o0 -+ENDPROC(U1_g2_8_fp) -+ENTRY(U1_gs_0_fp) -+ VISExitHalf -+ add %GLOBAL_SPARE, %g3, %o0 -+ retl -+ add %o0, %o2, %o0 -+ENDPROC(U1_gs_0_fp) -+ENTRY(U1_gs_80_fp) -+ VISExitHalf -+ add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE -+ add %GLOBAL_SPARE, %g3, %o0 -+ retl -+ add %o0, %o2, %o0 -+ENDPROC(U1_gs_80_fp) -+ENTRY(U1_gs_40_fp) -+ VISExitHalf -+ add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE -+ add %GLOBAL_SPARE, %g3, %o0 -+ retl -+ add %o0, %o2, %o0 -+ENDPROC(U1_gs_40_fp) -+ENTRY(U1_g3_0_fp) -+ VISExitHalf -+ retl -+ add %g3, %o2, %o0 -+ENDPROC(U1_g3_0_fp) -+ENTRY(U1_g3_8_fp) -+ VISExitHalf -+ add %g3, 8, %g3 -+ retl -+ add %g3, %o2, %o0 -+ENDPROC(U1_g3_8_fp) -+ENTRY(U1_o2_0_fp) -+ VISExitHalf -+ retl -+ mov %o2, %o0 -+ENDPROC(U1_o2_0_fp) -+ENTRY(U1_o2_1_fp) -+ VISExitHalf -+ retl -+ add %o2, 1, %o0 -+ENDPROC(U1_o2_1_fp) -+ENTRY(U1_gs_0) -+ VISExitHalf -+ retl -+ add %GLOBAL_SPARE, %o2, %o0 -+ENDPROC(U1_gs_0) -+ENTRY(U1_gs_8) -+ VISExitHalf -+ add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE -+ retl -+ add %GLOBAL_SPARE, 0x8, %o0 -+ENDPROC(U1_gs_8) -+ENTRY(U1_gs_10) -+ VISExitHalf -+ add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE -+ retl -+ add %GLOBAL_SPARE, 0x10, %o0 -+ENDPROC(U1_gs_10) -+ENTRY(U1_o2_0) -+ retl -+ mov %o2, %o0 -+ENDPROC(U1_o2_0) -+ENTRY(U1_o2_8) -+ retl -+ add %o2, 8, %o0 -+ENDPROC(U1_o2_8) -+ENTRY(U1_o2_4) -+ retl -+ add %o2, 4, %o0 -+ENDPROC(U1_o2_4) -+ENTRY(U1_o2_1) -+ retl -+ add %o2, 1, %o0 -+ENDPROC(U1_o2_1) -+ENTRY(U1_g1_0) -+ retl -+ add %g1, %o2, %o0 -+ENDPROC(U1_g1_0) -+ENTRY(U1_g1_1) -+ add %g1, 1, %g1 -+ retl -+ add %g1, %o2, %o0 -+ENDPROC(U1_g1_1) -+ENTRY(U1_gs_0_o2_adj) -+ and %o2, 7, %o2 -+ retl -+ add %GLOBAL_SPARE, %o2, %o0 -+ENDPROC(U1_gs_0_o2_adj) -+ENTRY(U1_gs_8_o2_adj) -+ and %o2, 7, %o2 -+ add %GLOBAL_SPARE, 8, %GLOBAL_SPARE -+ retl -+ add %GLOBAL_SPARE, %o2, %o0 -+ENDPROC(U1_gs_8_o2_adj) -+#endif -+ - .align 64 - - .globl FUNC_NAME -@@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - and %g2, 0x38, %g2 - - 1: subcc %g1, 0x1, %g1 -- EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) -- EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) -+ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) -+ EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) - bgu,pt %XCC, 1b - add %o1, 0x1, %o1 - -@@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - be,pt %icc, 3f - alignaddr %o1, %g0, %o1 - -- EX_LD_FP(LOAD(ldd, %o1, %f4)) --1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) -+ EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) -+1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) - add %o1, 0x8, %o1 - subcc %g2, 0x8, %g2 - faligndata %f4, %f6, %f0 -- EX_ST_FP(STORE(std, %f0, %o0)) -+ EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) - be,pn %icc, 3f - add %o0, 0x8, %o0 - -- EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) - add %o1, 0x8, %o1 - subcc %g2, 0x8, %g2 - faligndata %f6, %f4, %f0 -- EX_ST_FP(STORE(std, %f0, %o0)) -+ EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) - bne,pt %icc, 1b - add %o0, 0x8, %o0 - -@@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - add %g1, %GLOBAL_SPARE, %g1 - subcc %o2, %g3, %o2 - -- EX_LD_FP(LOAD_BLK(%o1, %f0)) -+ EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) - add %o1, 0x40, %o1 - add %g1, %g3, %g1 -- EX_LD_FP(LOAD_BLK(%o1, %f16)) -+ EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) - add %o1, 0x40, %o1 - sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE -- EX_LD_FP(LOAD_BLK(%o1, %f32)) -+ EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) - add %o1, 0x40, %o1 - - /* There are 8 instances of the unrolled loop, -@@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - .align 64 - 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) -- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) -+ LOOP_CHUNK1(o1, o0, 1f) - FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) -- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) -+ LOOP_CHUNK2(o1, o0, 2f) - FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) -- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) -+ LOOP_CHUNK3(o1, o0, 3f) - ba,pt %xcc, 1b+4 - faligndata %f0, %f2, %f48 - 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) -@@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - STORE_JUMP(o0, f48, 56f) - - 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) -- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) -+ LOOP_CHUNK1(o1, o0, 1f) - FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) -- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) -+ LOOP_CHUNK2(o1, o0, 2f) - FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) -- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) -+ LOOP_CHUNK3(o1, o0, 3f) - ba,pt %xcc, 1b+4 - faligndata %f2, %f4, %f48 - 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) -@@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - STORE_JUMP(o0, f48, 57f) - - 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) -- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) -+ LOOP_CHUNK1(o1, o0, 1f) - FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) -- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) -+ LOOP_CHUNK2(o1, o0, 2f) - FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) -- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) -+ LOOP_CHUNK3(o1, o0, 3f) - ba,pt %xcc, 1b+4 - faligndata %f4, %f6, %f48 - 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) -@@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - STORE_JUMP(o0, f48, 58f) - - 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) -- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) -+ LOOP_CHUNK1(o1, o0, 1f) - FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) -- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) -+ LOOP_CHUNK2(o1, o0, 2f) - FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) -- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) -+ LOOP_CHUNK3(o1, o0, 3f) - ba,pt %xcc, 1b+4 - faligndata %f6, %f8, %f48 - 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) -@@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - STORE_JUMP(o0, f48, 59f) - - 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) -- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) -+ LOOP_CHUNK1(o1, o0, 1f) - FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) -- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) -+ LOOP_CHUNK2(o1, o0, 2f) - FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) -- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) -+ LOOP_CHUNK3(o1, o0, 3f) - ba,pt %xcc, 1b+4 - faligndata %f8, %f10, %f48 - 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) -@@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - STORE_JUMP(o0, f48, 60f) - - 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) -- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) -+ LOOP_CHUNK1(o1, o0, 1f) - FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) -- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) -+ LOOP_CHUNK2(o1, o0, 2f) - FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) -- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) -+ LOOP_CHUNK3(o1, o0, 3f) - ba,pt %xcc, 1b+4 - faligndata %f10, %f12, %f48 - 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) -@@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - STORE_JUMP(o0, f48, 61f) - - 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) -- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) -+ LOOP_CHUNK1(o1, o0, 1f) - FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) -- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) -+ LOOP_CHUNK2(o1, o0, 2f) - FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) -- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) -+ LOOP_CHUNK3(o1, o0, 3f) - ba,pt %xcc, 1b+4 - faligndata %f12, %f14, %f48 - 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) -@@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - STORE_JUMP(o0, f48, 62f) - - 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) -- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) -+ LOOP_CHUNK1(o1, o0, 1f) - FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) -- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) -+ LOOP_CHUNK2(o1, o0, 2f) - FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) -- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) -+ LOOP_CHUNK3(o1, o0, 3f) - ba,pt %xcc, 1b+4 - faligndata %f14, %f16, %f48 - 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) -@@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - STORE_JUMP(o0, f48, 63f) - --40: FINISH_VISCHUNK(o0, f0, f2, g3) --41: FINISH_VISCHUNK(o0, f2, f4, g3) --42: FINISH_VISCHUNK(o0, f4, f6, g3) --43: FINISH_VISCHUNK(o0, f6, f8, g3) --44: FINISH_VISCHUNK(o0, f8, f10, g3) --45: FINISH_VISCHUNK(o0, f10, f12, g3) --46: FINISH_VISCHUNK(o0, f12, f14, g3) --47: UNEVEN_VISCHUNK(o0, f14, f0, g3) --48: FINISH_VISCHUNK(o0, f16, f18, g3) --49: FINISH_VISCHUNK(o0, f18, f20, g3) --50: FINISH_VISCHUNK(o0, f20, f22, g3) --51: FINISH_VISCHUNK(o0, f22, f24, g3) --52: FINISH_VISCHUNK(o0, f24, f26, g3) --53: FINISH_VISCHUNK(o0, f26, f28, g3) --54: FINISH_VISCHUNK(o0, f28, f30, g3) --55: UNEVEN_VISCHUNK(o0, f30, f0, g3) --56: FINISH_VISCHUNK(o0, f32, f34, g3) --57: FINISH_VISCHUNK(o0, f34, f36, g3) --58: FINISH_VISCHUNK(o0, f36, f38, g3) --59: FINISH_VISCHUNK(o0, f38, f40, g3) --60: FINISH_VISCHUNK(o0, f40, f42, g3) --61: FINISH_VISCHUNK(o0, f42, f44, g3) --62: FINISH_VISCHUNK(o0, f44, f46, g3) --63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) -- --93: EX_LD_FP(LOAD(ldd, %o1, %f2)) -+40: FINISH_VISCHUNK(o0, f0, f2) -+41: FINISH_VISCHUNK(o0, f2, f4) -+42: FINISH_VISCHUNK(o0, f4, f6) -+43: FINISH_VISCHUNK(o0, f6, f8) -+44: FINISH_VISCHUNK(o0, f8, f10) -+45: FINISH_VISCHUNK(o0, f10, f12) -+46: FINISH_VISCHUNK(o0, f12, f14) -+47: UNEVEN_VISCHUNK(o0, f14, f0) -+48: FINISH_VISCHUNK(o0, f16, f18) -+49: FINISH_VISCHUNK(o0, f18, f20) -+50: FINISH_VISCHUNK(o0, f20, f22) -+51: FINISH_VISCHUNK(o0, f22, f24) -+52: FINISH_VISCHUNK(o0, f24, f26) -+53: FINISH_VISCHUNK(o0, f26, f28) -+54: FINISH_VISCHUNK(o0, f28, f30) -+55: UNEVEN_VISCHUNK(o0, f30, f0) -+56: FINISH_VISCHUNK(o0, f32, f34) -+57: FINISH_VISCHUNK(o0, f34, f36) -+58: FINISH_VISCHUNK(o0, f36, f38) -+59: FINISH_VISCHUNK(o0, f38, f40) -+60: FINISH_VISCHUNK(o0, f40, f42) -+61: FINISH_VISCHUNK(o0, f42, f44) -+62: FINISH_VISCHUNK(o0, f44, f46) -+63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) -+ -+93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) - add %o1, 8, %o1 - subcc %g3, 8, %g3 - faligndata %f0, %f2, %f8 -- EX_ST_FP(STORE(std, %f8, %o0)) -+ EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) - bl,pn %xcc, 95f - add %o0, 8, %o0 -- EX_LD_FP(LOAD(ldd, %o1, %f0)) -+ EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) - add %o1, 8, %o1 - subcc %g3, 8, %g3 - faligndata %f2, %f0, %f8 -- EX_ST_FP(STORE(std, %f8, %o0)) -+ EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) - bge,pt %xcc, 93b - add %o0, 8, %o0 - - 95: brz,pt %o2, 2f - mov %g1, %o1 - --1: EX_LD_FP(LOAD(ldub, %o1, %o3)) -+1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) - add %o1, 1, %o1 - subcc %o2, 1, %o2 -- EX_ST_FP(STORE(stb, %o3, %o0)) -+ EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) - bne,pt %xcc, 1b - add %o0, 1, %o0 - -@@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 72: andn %o2, 0xf, %GLOBAL_SPARE - and %o2, 0xf, %o2 --1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) -- EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) -+1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) -+ EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) - subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE -- EX_ST(STORE(stx, %o5, %o1 + %o3)) -+ EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) - add %o1, 0x8, %o1 -- EX_ST(STORE(stx, %g1, %o1 + %o3)) -+ EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) - bgu,pt %XCC, 1b - add %o1, 0x8, %o1 - 73: andcc %o2, 0x8, %g0 - be,pt %XCC, 1f - nop -- EX_LD(LOAD(ldx, %o1, %o5)) -+ EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) - sub %o2, 0x8, %o2 -- EX_ST(STORE(stx, %o5, %o1 + %o3)) -+ EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) - add %o1, 0x8, %o1 - 1: andcc %o2, 0x4, %g0 - be,pt %XCC, 1f - nop -- EX_LD(LOAD(lduw, %o1, %o5)) -+ EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) - sub %o2, 0x4, %o2 -- EX_ST(STORE(stw, %o5, %o1 + %o3)) -+ EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) - add %o1, 0x4, %o1 - 1: cmp %o2, 0 - be,pt %XCC, 85f -@@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - sub %g0, %g1, %g1 - sub %o2, %g1, %o2 - --1: EX_LD(LOAD(ldub, %o1, %o5)) -+1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) - subcc %g1, 1, %g1 -- EX_ST(STORE(stb, %o5, %o1 + %o3)) -+ EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) - bgu,pt %icc, 1b - add %o1, 1, %o1 - -@@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 8: mov 64, %o3 - andn %o1, 0x7, %o1 -- EX_LD(LOAD(ldx, %o1, %g2)) -+ EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) - sub %o3, %g1, %o3 - andn %o2, 0x7, %GLOBAL_SPARE - sllx %g2, %g1, %g2 --1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) -+1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) - subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE - add %o1, 0x8, %o1 - srlx %g3, %o3, %o5 - or %o5, %g2, %o5 -- EX_ST(STORE(stx, %o5, %o0)) -+ EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) - add %o0, 0x8, %o0 - bgu,pt %icc, 1b - sllx %g3, %g1, %g2 -@@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - bne,pn %XCC, 90f - sub %o0, %o1, %o3 - --1: EX_LD(LOAD(lduw, %o1, %g1)) -+1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) - subcc %o2, 4, %o2 -- EX_ST(STORE(stw, %g1, %o1 + %o3)) -+ EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) - bgu,pt %XCC, 1b - add %o1, 4, %o1 - -@@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - mov EX_RETVAL(%o4), %o0 - - .align 32 --90: EX_LD(LOAD(ldub, %o1, %g1)) -+90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) - subcc %o2, 1, %o2 -- EX_ST(STORE(stb, %g1, %o1 + %o3)) -+ EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) - bgu,pt %XCC, 90b - add %o1, 1, %o1 - retl -diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S -index 88ad73d..db73010 100644 ---- a/arch/sparc/lib/U3copy_from_user.S -+++ b/arch/sparc/lib/U3copy_from_user.S -@@ -3,19 +3,19 @@ - * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) - */ - --#define EX_LD(x) \ -+#define EX_LD(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one; \ -+ .word 98b, y; \ - .text; \ - .align 4; - --#define EX_LD_FP(x) \ -+#define EX_LD_FP(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_fp;\ -+ .word 98b, y##_fp; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S -index 845139d..c4ee858 100644 ---- a/arch/sparc/lib/U3copy_to_user.S -+++ b/arch/sparc/lib/U3copy_to_user.S -@@ -3,19 +3,19 @@ - * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) - */ - --#define EX_ST(x) \ -+#define EX_ST(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one; \ -+ .word 98b, y; \ - .text; \ - .align 4; - --#define EX_ST_FP(x) \ -+#define EX_ST_FP(x,y) \ - 98: x; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one_fp;\ -+ .word 98b, y##_fp; \ - .text; \ - .align 4; - -diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S -index 491ee69..54f9870 100644 ---- a/arch/sparc/lib/U3memcpy.S -+++ b/arch/sparc/lib/U3memcpy.S -@@ -4,6 +4,7 @@ - */ - - #ifdef __KERNEL__ -+#include <linux/linkage.h> - #include <asm/visasm.h> - #include <asm/asi.h> - #define GLOBAL_SPARE %g7 -@@ -22,21 +23,17 @@ - #endif - - #ifndef EX_LD --#define EX_LD(x) x -+#define EX_LD(x,y) x - #endif - #ifndef EX_LD_FP --#define EX_LD_FP(x) x -+#define EX_LD_FP(x,y) x - #endif - - #ifndef EX_ST --#define EX_ST(x) x -+#define EX_ST(x,y) x - #endif - #ifndef EX_ST_FP --#define EX_ST_FP(x) x --#endif -- --#ifndef EX_RETVAL --#define EX_RETVAL(x) x -+#define EX_ST_FP(x,y) x - #endif - - #ifndef LOAD -@@ -77,6 +74,87 @@ - */ - - .text -+#ifndef EX_RETVAL -+#define EX_RETVAL(x) x -+__restore_fp: -+ VISExitHalf -+ retl -+ nop -+ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) -+ add %g1, 1, %g1 -+ add %g2, %g1, %g2 -+ ba,pt %xcc, __restore_fp -+ add %o2, %g2, %o0 -+ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) -+ENTRY(U3_retl_o2_plus_g2_fp) -+ ba,pt %xcc, __restore_fp -+ add %o2, %g2, %o0 -+ENDPROC(U3_retl_o2_plus_g2_fp) -+ENTRY(U3_retl_o2_plus_g2_plus_8_fp) -+ add %g2, 8, %g2 -+ ba,pt %xcc, __restore_fp -+ add %o2, %g2, %o0 -+ENDPROC(U3_retl_o2_plus_g2_plus_8_fp) -+ENTRY(U3_retl_o2) -+ retl -+ mov %o2, %o0 -+ENDPROC(U3_retl_o2) -+ENTRY(U3_retl_o2_plus_1) -+ retl -+ add %o2, 1, %o0 -+ENDPROC(U3_retl_o2_plus_1) -+ENTRY(U3_retl_o2_plus_4) -+ retl -+ add %o2, 4, %o0 -+ENDPROC(U3_retl_o2_plus_4) -+ENTRY(U3_retl_o2_plus_8) -+ retl -+ add %o2, 8, %o0 -+ENDPROC(U3_retl_o2_plus_8) -+ENTRY(U3_retl_o2_plus_g1_plus_1) -+ add %g1, 1, %g1 -+ retl -+ add %o2, %g1, %o0 -+ENDPROC(U3_retl_o2_plus_g1_plus_1) -+ENTRY(U3_retl_o2_fp) -+ ba,pt %xcc, __restore_fp -+ mov %o2, %o0 -+ENDPROC(U3_retl_o2_fp) -+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) -+ sll %o3, 6, %o3 -+ add %o3, 0x80, %o3 -+ ba,pt %xcc, __restore_fp -+ add %o2, %o3, %o0 -+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) -+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) -+ sll %o3, 6, %o3 -+ add %o3, 0x40, %o3 -+ ba,pt %xcc, __restore_fp -+ add %o2, %o3, %o0 -+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) -+ENTRY(U3_retl_o2_plus_GS_plus_0x10) -+ add GLOBAL_SPARE, 0x10, GLOBAL_SPARE -+ retl -+ add %o2, GLOBAL_SPARE, %o0 -+ENDPROC(U3_retl_o2_plus_GS_plus_0x10) -+ENTRY(U3_retl_o2_plus_GS_plus_0x08) -+ add GLOBAL_SPARE, 0x08, GLOBAL_SPARE -+ retl -+ add %o2, GLOBAL_SPARE, %o0 -+ENDPROC(U3_retl_o2_plus_GS_plus_0x08) -+ENTRY(U3_retl_o2_and_7_plus_GS) -+ and %o2, 7, %o2 -+ retl -+ add %o2, GLOBAL_SPARE, %o2 -+ENDPROC(U3_retl_o2_and_7_plus_GS) -+ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) -+ add GLOBAL_SPARE, 8, GLOBAL_SPARE -+ and %o2, 7, %o2 -+ retl -+ add %o2, GLOBAL_SPARE, %o2 -+ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) -+#endif -+ - .align 64 - - /* The cheetah's flexible spine, oversized liver, enlarged heart, -@@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - and %g2, 0x38, %g2 - - 1: subcc %g1, 0x1, %g1 -- EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) -- EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) -+ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1) -+ EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1) - bgu,pt %XCC, 1b - add %o1, 0x1, %o1 - -@@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - be,pt %icc, 3f - alignaddr %o1, %g0, %o1 - -- EX_LD_FP(LOAD(ldd, %o1, %f4)) --1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) -+ EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2) -+1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2) - add %o1, 0x8, %o1 - subcc %g2, 0x8, %g2 - faligndata %f4, %f6, %f0 -- EX_ST_FP(STORE(std, %f0, %o0)) -+ EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8) - be,pn %icc, 3f - add %o0, 0x8, %o0 - -- EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2) - add %o1, 0x8, %o1 - subcc %g2, 0x8, %g2 - faligndata %f6, %f4, %f2 -- EX_ST_FP(STORE(std, %f2, %o0)) -+ EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8) - bne,pt %icc, 1b - add %o0, 0x8, %o0 - -@@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - LOAD(prefetch, %o1 + 0x080, #one_read) - LOAD(prefetch, %o1 + 0x0c0, #one_read) - LOAD(prefetch, %o1 + 0x100, #one_read) -- EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2) - LOAD(prefetch, %o1 + 0x140, #one_read) -- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2) - LOAD(prefetch, %o1 + 0x180, #one_read) -- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2) - LOAD(prefetch, %o1 + 0x1c0, #one_read) - faligndata %f0, %f2, %f16 -- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2) - faligndata %f2, %f4, %f18 -- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2) - faligndata %f4, %f6, %f20 -- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2) - faligndata %f6, %f8, %f22 - -- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2) - faligndata %f8, %f10, %f24 -- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2) - faligndata %f10, %f12, %f26 -- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) - - subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE - add %o1, 0x40, %o1 -@@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - .align 64 - 1: -- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) - faligndata %f12, %f14, %f28 -- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) - faligndata %f14, %f0, %f30 -- EX_ST_FP(STORE_BLK(%f16, %o0)) -- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) -+ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) - faligndata %f0, %f2, %f16 - add %o0, 0x40, %o0 - -- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) - faligndata %f2, %f4, %f18 -- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) - faligndata %f4, %f6, %f20 -- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) - subcc %o3, 0x01, %o3 - faligndata %f6, %f8, %f22 -- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80) - - faligndata %f8, %f10, %f24 -- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80) - LOAD(prefetch, %o1 + 0x1c0, #one_read) - faligndata %f10, %f12, %f26 - bg,pt %XCC, 1b -@@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - /* Finally we copy the last full 64-byte block. */ - 2: -- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) - faligndata %f12, %f14, %f28 -- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) - faligndata %f14, %f0, %f30 -- EX_ST_FP(STORE_BLK(%f16, %o0)) -- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) -+ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) - faligndata %f0, %f2, %f16 -- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) - faligndata %f2, %f4, %f18 -- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) - faligndata %f4, %f6, %f20 -- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) - faligndata %f6, %f8, %f22 -- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40) - faligndata %f8, %f10, %f24 - cmp %g1, 0 - be,pt %XCC, 1f - add %o0, 0x40, %o0 -- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40) - 1: faligndata %f10, %f12, %f26 - faligndata %f12, %f14, %f28 - faligndata %f14, %f0, %f30 -- EX_ST_FP(STORE_BLK(%f16, %o0)) -+ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40) - add %o0, 0x40, %o0 - add %o1, 0x40, %o1 - membar #Sync -@@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - sub %o2, %g2, %o2 - be,a,pt %XCC, 1f -- EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2) - --1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) -+1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2) - add %o1, 0x8, %o1 - subcc %g2, 0x8, %g2 - faligndata %f0, %f2, %f8 -- EX_ST_FP(STORE(std, %f8, %o0)) -+ EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) - be,pn %XCC, 2f - add %o0, 0x8, %o0 -- EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) -+ EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2) - add %o1, 0x8, %o1 - subcc %g2, 0x8, %g2 - faligndata %f2, %f0, %f8 -- EX_ST_FP(STORE(std, %f8, %o0)) -+ EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) - bne,pn %XCC, 1b - add %o0, 0x8, %o0 - -@@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - andcc %o2, 0x8, %g0 - be,pt %icc, 1f - nop -- EX_LD(LOAD(ldx, %o1, %o5)) -- EX_ST(STORE(stx, %o5, %o1 + %o3)) -+ EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2) -+ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2) - add %o1, 0x8, %o1 -+ sub %o2, 8, %o2 - - 1: andcc %o2, 0x4, %g0 - be,pt %icc, 1f - nop -- EX_LD(LOAD(lduw, %o1, %o5)) -- EX_ST(STORE(stw, %o5, %o1 + %o3)) -+ EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2) -+ EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2) - add %o1, 0x4, %o1 -+ sub %o2, 4, %o2 - - 1: andcc %o2, 0x2, %g0 - be,pt %icc, 1f - nop -- EX_LD(LOAD(lduh, %o1, %o5)) -- EX_ST(STORE(sth, %o5, %o1 + %o3)) -+ EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2) -+ EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2) - add %o1, 0x2, %o1 -+ sub %o2, 2, %o2 - - 1: andcc %o2, 0x1, %g0 - be,pt %icc, 85f - nop -- EX_LD(LOAD(ldub, %o1, %o5)) -+ EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) - ba,pt %xcc, 85f -- EX_ST(STORE(stb, %o5, %o1 + %o3)) -+ EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) - - .align 64 - 70: /* 16 < len <= 64 */ -@@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - andn %o2, 0xf, GLOBAL_SPARE - and %o2, 0xf, %o2 - 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE -- EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) -- EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) -- EX_ST(STORE(stx, %o5, %o1 + %o3)) -+ EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10) -+ EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10) -+ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10) - add %o1, 0x8, %o1 -- EX_ST(STORE(stx, %g1, %o1 + %o3)) -+ EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08) - bgu,pt %XCC, 1b - add %o1, 0x8, %o1 - 73: andcc %o2, 0x8, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x8, %o2 -- EX_LD(LOAD(ldx, %o1, %o5)) -- EX_ST(STORE(stx, %o5, %o1 + %o3)) -+ EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8) -+ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8) - add %o1, 0x8, %o1 - 1: andcc %o2, 0x4, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x4, %o2 -- EX_LD(LOAD(lduw, %o1, %o5)) -- EX_ST(STORE(stw, %o5, %o1 + %o3)) -+ EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4) -+ EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) - add %o1, 0x4, %o1 - 1: cmp %o2, 0 - be,pt %XCC, 85f -@@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - sub %o2, %g1, %o2 - - 1: subcc %g1, 1, %g1 -- EX_LD(LOAD(ldub, %o1, %o5)) -- EX_ST(STORE(stb, %o5, %o1 + %o3)) -+ EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1) -+ EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1) - bgu,pt %icc, 1b - add %o1, 1, %o1 - -@@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 8: mov 64, %o3 - andn %o1, 0x7, %o1 -- EX_LD(LOAD(ldx, %o1, %g2)) -+ EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2) - sub %o3, %g1, %o3 - andn %o2, 0x7, GLOBAL_SPARE - sllx %g2, %g1, %g2 --1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) -+1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS) - subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE - add %o1, 0x8, %o1 - srlx %g3, %o3, %o5 - or %o5, %g2, %o5 -- EX_ST(STORE(stx, %o5, %o0)) -+ EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8) - add %o0, 0x8, %o0 - bgu,pt %icc, 1b - sllx %g3, %g1, %g2 -@@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - - 1: - subcc %o2, 4, %o2 -- EX_LD(LOAD(lduw, %o1, %g1)) -- EX_ST(STORE(stw, %g1, %o1 + %o3)) -+ EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4) -+ EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4) - bgu,pt %XCC, 1b - add %o1, 4, %o1 - -@@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - .align 32 - 90: - subcc %o2, 1, %o2 -- EX_LD(LOAD(ldub, %o1, %g1)) -- EX_ST(STORE(stb, %g1, %o1 + %o3)) -+ EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1) -+ EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1) - bgu,pt %XCC, 90b - add %o1, 1, %o1 - retl -diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S -index 302c0e6..4c89b48 100644 ---- a/arch/sparc/lib/copy_in_user.S -+++ b/arch/sparc/lib/copy_in_user.S -@@ -8,18 +8,33 @@ - - #define XCC xcc - --#define EX(x,y) \ -+#define EX(x,y,z) \ - 98: x,y; \ - .section __ex_table,"a";\ - .align 4; \ -- .word 98b, __retl_one; \ -+ .word 98b, z; \ - .text; \ - .align 4; - -+#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8) -+#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4) -+#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1) -+ - .register %g2,#scratch - .register %g3,#scratch - - .text -+__retl_o4_plus_8: -+ add %o4, %o2, %o4 -+ retl -+ add %o4, 8, %o0 -+__retl_o2_plus_4: -+ retl -+ add %o2, 4, %o0 -+__retl_o2_plus_1: -+ retl -+ add %o2, 1, %o0 -+ - .align 32 - - /* Don't try to get too fancy here, just nice and -@@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ - andn %o2, 0x7, %o4 - and %o2, 0x7, %o2 - 1: subcc %o4, 0x8, %o4 -- EX(ldxa [%o1] %asi, %o5) -- EX(stxa %o5, [%o0] %asi) -+ EX_O4(ldxa [%o1] %asi, %o5) -+ EX_O4(stxa %o5, [%o0] %asi) - add %o1, 0x8, %o1 - bgu,pt %XCC, 1b - add %o0, 0x8, %o0 -@@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ - be,pt %XCC, 1f - nop - sub %o2, 0x4, %o2 -- EX(lduwa [%o1] %asi, %o5) -- EX(stwa %o5, [%o0] %asi) -+ EX_O2_4(lduwa [%o1] %asi, %o5) -+ EX_O2_4(stwa %o5, [%o0] %asi) - add %o1, 0x4, %o1 - add %o0, 0x4, %o0 - 1: cmp %o2, 0 -@@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ - - 82: - subcc %o2, 4, %o2 -- EX(lduwa [%o1] %asi, %g1) -- EX(stwa %g1, [%o0] %asi) -+ EX_O2_4(lduwa [%o1] %asi, %g1) -+ EX_O2_4(stwa %g1, [%o0] %asi) - add %o1, 4, %o1 - bgu,pt %XCC, 82b - add %o0, 4, %o0 -@@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ - .align 32 - 90: - subcc %o2, 1, %o2 -- EX(lduba [%o1] %asi, %g1) -- EX(stba %g1, [%o0] %asi) -+ EX_O2_1(lduba [%o1] %asi, %g1) -+ EX_O2_1(stba %g1, [%o0] %asi) - add %o1, 1, %o1 - bgu,pt %XCC, 90b - add %o0, 1, %o0 -diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c -deleted file mode 100644 -index ac96ae2..0000000 ---- a/arch/sparc/lib/user_fixup.c -+++ /dev/null -@@ -1,71 +0,0 @@ --/* user_fixup.c: Fix up user copy faults. -- * -- * Copyright (C) 2004 David S. Miller <davem@redhat.com> -- */ -- --#include <linux/compiler.h> --#include <linux/kernel.h> --#include <linux/string.h> --#include <linux/errno.h> --#include <linux/module.h> -- --#include <asm/uaccess.h> -- --/* Calculating the exact fault address when using -- * block loads and stores can be very complicated. -- * -- * Instead of trying to be clever and handling all -- * of the cases, just fix things up simply here. -- */ -- --static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset) --{ -- unsigned long fault_addr = current_thread_info()->fault_address; -- unsigned long end = start + size; -- -- if (fault_addr < start || fault_addr >= end) { -- *offset = 0; -- } else { -- *offset = fault_addr - start; -- size = end - fault_addr; -- } -- return size; --} -- --unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) --{ -- unsigned long offset; -- -- size = compute_size((unsigned long) from, size, &offset); -- if (likely(size)) -- memset(to + offset, 0, size); -- -- return size; --} --EXPORT_SYMBOL(copy_from_user_fixup); -- --unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) --{ -- unsigned long offset; -- -- return compute_size((unsigned long) to, size, &offset); --} --EXPORT_SYMBOL(copy_to_user_fixup); -- --unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) --{ -- unsigned long fault_addr = current_thread_info()->fault_address; -- unsigned long start = (unsigned long) to; -- unsigned long end = start + size; -- -- if (fault_addr >= start && fault_addr < end) -- return end - fault_addr; -- -- start = (unsigned long) from; -- end = start + size; -- if (fault_addr >= start && fault_addr < end) -- return end - fault_addr; -- -- return size; --} --EXPORT_SYMBOL(copy_in_user_fixup); -diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c -index f2b7711..e20fbba 100644 ---- a/arch/sparc/mm/tsb.c -+++ b/arch/sparc/mm/tsb.c -@@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr) - return (tag == (vaddr >> 22)); - } - -+static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) -+{ -+ unsigned long idx; -+ -+ for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { -+ struct tsb *ent = &swapper_tsb[idx]; -+ unsigned long match = idx << 13; -+ -+ match |= (ent->tag << 22); -+ if (match >= start && match < end) -+ ent->tag = (1UL << TSB_TAG_INVALID_BIT); -+ } -+} -+ - /* TSB flushes need only occur on the processor initiating the address - * space modification, not on each cpu the address space has run on. - * Only the TLB flush needs that treatment. -@@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) - { - unsigned long v; - -+ if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) -+ return flush_tsb_kernel_range_scan(start, end); -+ - for (v = start; v < end; v += PAGE_SIZE) { - unsigned long hash = tsb_hash(v, PAGE_SHIFT, - KERNEL_TSB_NENTRIES); -diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S -index b4f4733..5d2fd6c 100644 ---- a/arch/sparc/mm/ultra.S -+++ b/arch/sparc/mm/ultra.S -@@ -30,7 +30,7 @@ - .text - .align 32 - .globl __flush_tlb_mm --__flush_tlb_mm: /* 18 insns */ -+__flush_tlb_mm: /* 19 insns */ - /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ - ldxa [%o1] ASI_DMMU, %g2 - cmp %g2, %o0 -@@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */ - - .align 32 - .globl __flush_tlb_pending --__flush_tlb_pending: /* 26 insns */ -+__flush_tlb_pending: /* 27 insns */ - /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ - rdpr %pstate, %g7 - sllx %o1, 3, %o1 -@@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */ - - .align 32 - .globl __flush_tlb_kernel_range --__flush_tlb_kernel_range: /* 16 insns */ -+__flush_tlb_kernel_range: /* 31 insns */ - /* %o0=start, %o1=end */ - cmp %o0, %o1 - be,pn %xcc, 2f -+ sub %o1, %o0, %o3 -+ srlx %o3, 18, %o4 -+ brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow - sethi %hi(PAGE_SIZE), %o4 -- sub %o1, %o0, %o3 - sub %o3, %o4, %o3 - or %o0, 0x20, %o0 ! Nucleus - 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP -@@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */ - retl - nop - nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ -+__spitfire_flush_tlb_kernel_range_slow: -+ mov 63 * 8, %o4 -+1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3 -+ andcc %o3, 0x40, %g0 /* _PAGE_L_4U */ -+ bne,pn %xcc, 2f -+ mov TLB_TAG_ACCESS, %o3 -+ stxa %g0, [%o3] ASI_IMMU -+ stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS -+ membar #Sync -+2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3 -+ andcc %o3, 0x40, %g0 -+ bne,pn %xcc, 2f -+ mov TLB_TAG_ACCESS, %o3 -+ stxa %g0, [%o3] ASI_DMMU -+ stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS -+ membar #Sync -+2: sub %o4, 8, %o4 -+ brgez,pt %o4, 1b -+ nop -+ retl -+ nop - - __spitfire_flush_tlb_mm_slow: - rdpr %pstate, %g1 -@@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */ - retl - wrpr %g7, 0x0, %pstate - -+__cheetah_flush_tlb_kernel_range: /* 31 insns */ -+ /* %o0=start, %o1=end */ -+ cmp %o0, %o1 -+ be,pn %xcc, 2f -+ sub %o1, %o0, %o3 -+ srlx %o3, 18, %o4 -+ brnz,pn %o4, 3f -+ sethi %hi(PAGE_SIZE), %o4 -+ sub %o3, %o4, %o3 -+ or %o0, 0x20, %o0 ! Nucleus -+1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP -+ stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP -+ membar #Sync -+ brnz,pt %o3, 1b -+ sub %o3, %o4, %o3 -+2: sethi %hi(KERNBASE), %o3 -+ flush %o3 -+ retl -+ nop -+3: mov 0x80, %o4 -+ stxa %g0, [%o4] ASI_DMMU_DEMAP -+ membar #Sync -+ stxa %g0, [%o4] ASI_IMMU_DEMAP -+ membar #Sync -+ retl -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ - #ifdef DCACHE_ALIASING_POSSIBLE - __cheetah_flush_dcache_page: /* 11 insns */ - sethi %hi(PAGE_OFFSET), %g1 -@@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error: - ret - restore - --__hypervisor_flush_tlb_mm: /* 10 insns */ -+__hypervisor_flush_tlb_mm: /* 19 insns */ - mov %o0, %o2 /* ARG2: mmu context */ - mov 0, %o0 /* ARG0: CPU lists unimplemented */ - mov 0, %o1 /* ARG1: CPU lists unimplemented */ - mov HV_MMU_ALL, %o3 /* ARG3: flags */ - mov HV_FAST_MMU_DEMAP_CTX, %o5 - ta HV_FAST_TRAP -- brnz,pn %o0, __hypervisor_tlb_tl0_error -+ brnz,pn %o0, 1f - mov HV_FAST_MMU_DEMAP_CTX, %o1 - retl - nop -+1: sethi %hi(__hypervisor_tlb_tl0_error), %o5 -+ jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0 -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop - --__hypervisor_flush_tlb_page: /* 11 insns */ -+__hypervisor_flush_tlb_page: /* 22 insns */ - /* %o0 = context, %o1 = vaddr */ - mov %o0, %g2 - mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ -@@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */ - srlx %o0, PAGE_SHIFT, %o0 - sllx %o0, PAGE_SHIFT, %o0 - ta HV_MMU_UNMAP_ADDR_TRAP -- brnz,pn %o0, __hypervisor_tlb_tl0_error -+ brnz,pn %o0, 1f - mov HV_MMU_UNMAP_ADDR_TRAP, %o1 - retl - nop -+1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 -+ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop - --__hypervisor_flush_tlb_pending: /* 16 insns */ -+__hypervisor_flush_tlb_pending: /* 27 insns */ - /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ - sllx %o1, 3, %g1 - mov %o2, %g2 -@@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */ - srlx %o0, PAGE_SHIFT, %o0 - sllx %o0, PAGE_SHIFT, %o0 - ta HV_MMU_UNMAP_ADDR_TRAP -- brnz,pn %o0, __hypervisor_tlb_tl0_error -+ brnz,pn %o0, 1f - mov HV_MMU_UNMAP_ADDR_TRAP, %o1 - brnz,pt %g1, 1b - nop - retl - nop -+1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 -+ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop - --__hypervisor_flush_tlb_kernel_range: /* 16 insns */ -+__hypervisor_flush_tlb_kernel_range: /* 31 insns */ - /* %o0=start, %o1=end */ - cmp %o0, %o1 - be,pn %xcc, 2f -- sethi %hi(PAGE_SIZE), %g3 -- mov %o0, %g1 -- sub %o1, %g1, %g2 -+ sub %o1, %o0, %g2 -+ srlx %g2, 18, %g3 -+ brnz,pn %g3, 4f -+ mov %o0, %g1 -+ sethi %hi(PAGE_SIZE), %g3 - sub %g2, %g3, %g2 - 1: add %g1, %g2, %o0 /* ARG0: virtual address */ - mov 0, %o1 /* ARG1: mmu context */ - mov HV_MMU_ALL, %o2 /* ARG2: flags */ - ta HV_MMU_UNMAP_ADDR_TRAP -- brnz,pn %o0, __hypervisor_tlb_tl0_error -+ brnz,pn %o0, 3f - mov HV_MMU_UNMAP_ADDR_TRAP, %o1 - brnz,pt %g2, 1b - sub %g2, %g3, %g2 - 2: retl - nop -+3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 -+ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 -+ nop -+4: mov 0, %o0 /* ARG0: CPU lists unimplemented */ -+ mov 0, %o1 /* ARG1: CPU lists unimplemented */ -+ mov 0, %o2 /* ARG2: mmu context == nucleus */ -+ mov HV_MMU_ALL, %o3 /* ARG3: flags */ -+ mov HV_FAST_MMU_DEMAP_CTX, %o5 -+ ta HV_FAST_TRAP -+ brnz,pn %o0, 3b -+ mov HV_FAST_MMU_DEMAP_CTX, %o1 -+ retl -+ nop - - #ifdef DCACHE_ALIASING_POSSIBLE - /* XXX Niagara and friends have an 8K cache, so no aliasing is -@@ -394,43 +511,6 @@ tlb_patch_one: - retl - nop - -- .globl cheetah_patch_cachetlbops --cheetah_patch_cachetlbops: -- save %sp, -128, %sp -- -- sethi %hi(__flush_tlb_mm), %o0 -- or %o0, %lo(__flush_tlb_mm), %o0 -- sethi %hi(__cheetah_flush_tlb_mm), %o1 -- or %o1, %lo(__cheetah_flush_tlb_mm), %o1 -- call tlb_patch_one -- mov 19, %o2 -- -- sethi %hi(__flush_tlb_page), %o0 -- or %o0, %lo(__flush_tlb_page), %o0 -- sethi %hi(__cheetah_flush_tlb_page), %o1 -- or %o1, %lo(__cheetah_flush_tlb_page), %o1 -- call tlb_patch_one -- mov 22, %o2 -- -- sethi %hi(__flush_tlb_pending), %o0 -- or %o0, %lo(__flush_tlb_pending), %o0 -- sethi %hi(__cheetah_flush_tlb_pending), %o1 -- or %o1, %lo(__cheetah_flush_tlb_pending), %o1 -- call tlb_patch_one -- mov 27, %o2 -- --#ifdef DCACHE_ALIASING_POSSIBLE -- sethi %hi(__flush_dcache_page), %o0 -- or %o0, %lo(__flush_dcache_page), %o0 -- sethi %hi(__cheetah_flush_dcache_page), %o1 -- or %o1, %lo(__cheetah_flush_dcache_page), %o1 -- call tlb_patch_one -- mov 11, %o2 --#endif /* DCACHE_ALIASING_POSSIBLE */ -- -- ret -- restore -- - #ifdef CONFIG_SMP - /* These are all called by the slaves of a cross call, at - * trap level 1, with interrupts fully disabled. -@@ -447,7 +527,7 @@ cheetah_patch_cachetlbops: - */ - .align 32 - .globl xcall_flush_tlb_mm --xcall_flush_tlb_mm: /* 21 insns */ -+xcall_flush_tlb_mm: /* 24 insns */ - mov PRIMARY_CONTEXT, %g2 - ldxa [%g2] ASI_DMMU, %g3 - srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 -@@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */ - nop - nop - nop -+ nop -+ nop -+ nop - - .globl xcall_flush_tlb_page --xcall_flush_tlb_page: /* 17 insns */ -+xcall_flush_tlb_page: /* 20 insns */ - /* %g5=context, %g1=vaddr */ - mov PRIMARY_CONTEXT, %g4 - ldxa [%g4] ASI_DMMU, %g2 -@@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */ - retry - nop - nop -+ nop -+ nop -+ nop - - .globl xcall_flush_tlb_kernel_range --xcall_flush_tlb_kernel_range: /* 25 insns */ -+xcall_flush_tlb_kernel_range: /* 44 insns */ - sethi %hi(PAGE_SIZE - 1), %g2 - or %g2, %lo(PAGE_SIZE - 1), %g2 - andn %g1, %g2, %g1 - andn %g7, %g2, %g7 - sub %g7, %g1, %g3 -- add %g2, 1, %g2 -+ srlx %g3, 18, %g2 -+ brnz,pn %g2, 2f -+ add %g2, 1, %g2 - sub %g3, %g2, %g3 - or %g1, 0x20, %g1 ! Nucleus - 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP -@@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */ - brnz,pt %g3, 1b - sub %g3, %g2, %g3 - retry -- nop -- nop -+2: mov 63 * 8, %g1 -+1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2 -+ andcc %g2, 0x40, %g0 /* _PAGE_L_4U */ -+ bne,pn %xcc, 2f -+ mov TLB_TAG_ACCESS, %g2 -+ stxa %g0, [%g2] ASI_IMMU -+ stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS -+ membar #Sync -+2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2 -+ andcc %g2, 0x40, %g0 -+ bne,pn %xcc, 2f -+ mov TLB_TAG_ACCESS, %g2 -+ stxa %g0, [%g2] ASI_DMMU -+ stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS -+ membar #Sync -+2: sub %g1, 8, %g1 -+ brgez,pt %g1, 1b -+ nop -+ retry - nop - nop - nop -@@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4: - - retry - -+__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ -+ sethi %hi(PAGE_SIZE - 1), %g2 -+ or %g2, %lo(PAGE_SIZE - 1), %g2 -+ andn %g1, %g2, %g1 -+ andn %g7, %g2, %g7 -+ sub %g7, %g1, %g3 -+ srlx %g3, 18, %g2 -+ brnz,pn %g2, 2f -+ add %g2, 1, %g2 -+ sub %g3, %g2, %g3 -+ or %g1, 0x20, %g1 ! Nucleus -+1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP -+ stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP -+ membar #Sync -+ brnz,pt %g3, 1b -+ sub %g3, %g2, %g3 -+ retry -+2: mov 0x80, %g2 -+ stxa %g0, [%g2] ASI_DMMU_DEMAP -+ membar #Sync -+ stxa %g0, [%g2] ASI_IMMU_DEMAP -+ membar #Sync -+ retry -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ nop -+ - #ifdef DCACHE_ALIASING_POSSIBLE - .align 32 - .globl xcall_flush_dcache_page_cheetah -@@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error: - ba,a,pt %xcc, rtrap - - .globl __hypervisor_xcall_flush_tlb_mm --__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ -+__hypervisor_xcall_flush_tlb_mm: /* 24 insns */ - /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ - mov %o0, %g2 - mov %o1, %g3 -@@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ - mov HV_FAST_MMU_DEMAP_CTX, %o5 - ta HV_FAST_TRAP - mov HV_FAST_MMU_DEMAP_CTX, %g6 -- brnz,pn %o0, __hypervisor_tlb_xcall_error -+ brnz,pn %o0, 1f - mov %o0, %g5 - mov %g2, %o0 - mov %g3, %o1 -@@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ - mov %g7, %o5 - membar #Sync - retry -+1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 -+ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 -+ nop - - .globl __hypervisor_xcall_flush_tlb_page --__hypervisor_xcall_flush_tlb_page: /* 17 insns */ -+__hypervisor_xcall_flush_tlb_page: /* 20 insns */ - /* %g5=ctx, %g1=vaddr */ - mov %o0, %g2 - mov %o1, %g3 -@@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */ - sllx %o0, PAGE_SHIFT, %o0 - ta HV_MMU_UNMAP_ADDR_TRAP - mov HV_MMU_UNMAP_ADDR_TRAP, %g6 -- brnz,a,pn %o0, __hypervisor_tlb_xcall_error -+ brnz,a,pn %o0, 1f - mov %o0, %g5 - mov %g2, %o0 - mov %g3, %o1 - mov %g4, %o2 - membar #Sync - retry -+1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 -+ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 -+ nop - - .globl __hypervisor_xcall_flush_tlb_kernel_range --__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ -+__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */ - /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ - sethi %hi(PAGE_SIZE - 1), %g2 - or %g2, %lo(PAGE_SIZE - 1), %g2 - andn %g1, %g2, %g1 - andn %g7, %g2, %g7 - sub %g7, %g1, %g3 -+ srlx %g3, 18, %g7 - add %g2, 1, %g2 - sub %g3, %g2, %g3 - mov %o0, %g2 - mov %o1, %g4 -- mov %o2, %g7 -+ brnz,pn %g7, 2f -+ mov %o2, %g7 - 1: add %g1, %g3, %o0 /* ARG0: virtual address */ - mov 0, %o1 /* ARG1: mmu context */ - mov HV_MMU_ALL, %o2 /* ARG2: flags */ - ta HV_MMU_UNMAP_ADDR_TRAP - mov HV_MMU_UNMAP_ADDR_TRAP, %g6 -- brnz,pn %o0, __hypervisor_tlb_xcall_error -+ brnz,pn %o0, 1f - mov %o0, %g5 - sethi %hi(PAGE_SIZE), %o2 - brnz,pt %g3, 1b - sub %g3, %o2, %g3 -- mov %g2, %o0 -+5: mov %g2, %o0 - mov %g4, %o1 - mov %g7, %o2 - membar #Sync - retry -+1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 -+ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 -+ nop -+2: mov %o3, %g1 -+ mov %o5, %g3 -+ mov 0, %o0 /* ARG0: CPU lists unimplemented */ -+ mov 0, %o1 /* ARG1: CPU lists unimplemented */ -+ mov 0, %o2 /* ARG2: mmu context == nucleus */ -+ mov HV_MMU_ALL, %o3 /* ARG3: flags */ -+ mov HV_FAST_MMU_DEMAP_CTX, %o5 -+ ta HV_FAST_TRAP -+ mov %g1, %o3 -+ brz,pt %o0, 5b -+ mov %g3, %o5 -+ mov HV_FAST_MMU_DEMAP_CTX, %g6 -+ ba,pt %xcc, 1b -+ clr %g5 - - /* These just get rescheduled to PIL vectors. */ - .globl xcall_call_function -@@ -809,6 +985,58 @@ xcall_kgdb_capture: - - #endif /* CONFIG_SMP */ - -+ .globl cheetah_patch_cachetlbops -+cheetah_patch_cachetlbops: -+ save %sp, -128, %sp -+ -+ sethi %hi(__flush_tlb_mm), %o0 -+ or %o0, %lo(__flush_tlb_mm), %o0 -+ sethi %hi(__cheetah_flush_tlb_mm), %o1 -+ or %o1, %lo(__cheetah_flush_tlb_mm), %o1 -+ call tlb_patch_one -+ mov 19, %o2 -+ -+ sethi %hi(__flush_tlb_page), %o0 -+ or %o0, %lo(__flush_tlb_page), %o0 -+ sethi %hi(__cheetah_flush_tlb_page), %o1 -+ or %o1, %lo(__cheetah_flush_tlb_page), %o1 -+ call tlb_patch_one -+ mov 22, %o2 -+ -+ sethi %hi(__flush_tlb_pending), %o0 -+ or %o0, %lo(__flush_tlb_pending), %o0 -+ sethi %hi(__cheetah_flush_tlb_pending), %o1 -+ or %o1, %lo(__cheetah_flush_tlb_pending), %o1 -+ call tlb_patch_one -+ mov 27, %o2 -+ -+ sethi %hi(__flush_tlb_kernel_range), %o0 -+ or %o0, %lo(__flush_tlb_kernel_range), %o0 -+ sethi %hi(__cheetah_flush_tlb_kernel_range), %o1 -+ or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1 -+ call tlb_patch_one -+ mov 31, %o2 -+ -+#ifdef DCACHE_ALIASING_POSSIBLE -+ sethi %hi(__flush_dcache_page), %o0 -+ or %o0, %lo(__flush_dcache_page), %o0 -+ sethi %hi(__cheetah_flush_dcache_page), %o1 -+ or %o1, %lo(__cheetah_flush_dcache_page), %o1 -+ call tlb_patch_one -+ mov 11, %o2 -+#endif /* DCACHE_ALIASING_POSSIBLE */ -+ -+#ifdef CONFIG_SMP -+ sethi %hi(xcall_flush_tlb_kernel_range), %o0 -+ or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 -+ sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1 -+ or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1 -+ call tlb_patch_one -+ mov 44, %o2 -+#endif /* CONFIG_SMP */ -+ -+ ret -+ restore - - .globl hypervisor_patch_cachetlbops - hypervisor_patch_cachetlbops: -@@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops: - sethi %hi(__hypervisor_flush_tlb_mm), %o1 - or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 - call tlb_patch_one -- mov 10, %o2 -+ mov 19, %o2 - - sethi %hi(__flush_tlb_page), %o0 - or %o0, %lo(__flush_tlb_page), %o0 - sethi %hi(__hypervisor_flush_tlb_page), %o1 - or %o1, %lo(__hypervisor_flush_tlb_page), %o1 - call tlb_patch_one -- mov 11, %o2 -+ mov 22, %o2 - - sethi %hi(__flush_tlb_pending), %o0 - or %o0, %lo(__flush_tlb_pending), %o0 - sethi %hi(__hypervisor_flush_tlb_pending), %o1 - or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 - call tlb_patch_one -- mov 16, %o2 -+ mov 27, %o2 - - sethi %hi(__flush_tlb_kernel_range), %o0 - or %o0, %lo(__flush_tlb_kernel_range), %o0 - sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 - or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 - call tlb_patch_one -- mov 16, %o2 -+ mov 31, %o2 - - #ifdef DCACHE_ALIASING_POSSIBLE - sethi %hi(__flush_dcache_page), %o0 -@@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops: - sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 - or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 - call tlb_patch_one -- mov 21, %o2 -+ mov 24, %o2 - - sethi %hi(xcall_flush_tlb_page), %o0 - or %o0, %lo(xcall_flush_tlb_page), %o0 - sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 - or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 - call tlb_patch_one -- mov 17, %o2 -+ mov 20, %o2 - - sethi %hi(xcall_flush_tlb_kernel_range), %o0 - or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 - sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 - or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 - call tlb_patch_one -- mov 25, %o2 -+ mov 44, %o2 - #endif /* CONFIG_SMP */ - - ret -diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c -index c4751ec..45e87c9 100644 ---- a/drivers/net/ethernet/broadcom/bgmac.c -+++ b/drivers/net/ethernet/broadcom/bgmac.c -@@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, - u32 ctl; - - ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL); -+ -+ /* preserve ONLY bits 16-17 from current hardware value */ -+ ctl &= BGMAC_DMA_RX_ADDREXT_MASK; -+ - if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) { - ctl &= ~BGMAC_DMA_RX_BL_MASK; - ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT; -@@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, - ctl &= ~BGMAC_DMA_RX_PT_MASK; - ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT; - } -- ctl &= BGMAC_DMA_RX_ADDREXT_MASK; - ctl |= BGMAC_DMA_RX_ENABLE; - ctl |= BGMAC_DMA_RX_PARITY_DISABLE; - ctl |= BGMAC_DMA_RX_OVERFLOW_CONT; -diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c -index 505ceaf..2c850a9 100644 ---- a/drivers/net/ethernet/broadcom/bnx2.c -+++ b/drivers/net/ethernet/broadcom/bnx2.c -@@ -49,6 +49,7 @@ - #include <linux/firmware.h> - #include <linux/log2.h> - #include <linux/aer.h> -+#include <linux/crash_dump.h> - - #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE) - #define BCM_CNIC 1 -@@ -4759,15 +4760,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp) - BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR); - } - --static int --bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) -+static void -+bnx2_wait_dma_complete(struct bnx2 *bp) - { - u32 val; -- int i, rc = 0; -- u8 old_port; -+ int i; - -- /* Wait for the current PCI transaction to complete before -- * issuing a reset. */ -+ /* -+ * Wait for the current PCI transaction to complete before -+ * issuing a reset. -+ */ - if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) || - (BNX2_CHIP(bp) == BNX2_CHIP_5708)) { - BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS, -@@ -4791,6 +4793,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) - } - } - -+ return; -+} -+ -+ -+static int -+bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) -+{ -+ u32 val; -+ int i, rc = 0; -+ u8 old_port; -+ -+ /* Wait for the current PCI transaction to complete before -+ * issuing a reset. */ -+ bnx2_wait_dma_complete(bp); -+ - /* Wait for the firmware to tell us it is ok to issue a reset. */ - bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1); - -@@ -6356,6 +6373,10 @@ bnx2_open(struct net_device *dev) - struct bnx2 *bp = netdev_priv(dev); - int rc; - -+ rc = bnx2_request_firmware(bp); -+ if (rc < 0) -+ goto out; -+ - netif_carrier_off(dev); - - bnx2_disable_int(bp); -@@ -6424,6 +6445,7 @@ bnx2_open(struct net_device *dev) - bnx2_free_irq(bp); - bnx2_free_mem(bp); - bnx2_del_napi(bp); -+ bnx2_release_firmware(bp); - goto out; - } - -@@ -8570,12 +8592,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - - pci_set_drvdata(pdev, dev); - -- rc = bnx2_request_firmware(bp); -- if (rc < 0) -- goto error; -- -+ /* -+ * In-flight DMA from 1st kernel could continue going in kdump kernel. -+ * New io-page table has been created before bnx2 does reset at open stage. -+ * We have to wait for the in-flight DMA to complete to avoid it look up -+ * into the newly created io-page table. -+ */ -+ if (is_kdump_kernel()) -+ bnx2_wait_dma_complete(bp); - -- bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET); - memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); - - dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | -@@ -8608,7 +8633,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - return 0; - - error: -- bnx2_release_firmware(bp); - pci_iounmap(pdev, bp->regview); - pci_release_regions(pdev); - pci_disable_device(pdev); -diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c -index d48873b..5cdc96b 100644 ---- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c -+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c -@@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) - - span_entry->used = true; - span_entry->id = index; -- span_entry->ref_count = 0; -+ span_entry->ref_count = 1; - span_entry->local_port = local_port; - return span_entry; - } -@@ -268,6 +268,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) - - span_entry = mlxsw_sp_span_entry_find(port); - if (span_entry) { -+ /* Already exists, just take a reference */ - span_entry->ref_count++; - return span_entry; - } -@@ -278,6 +279,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) - static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_span_entry *span_entry) - { -+ WARN_ON(!span_entry->ref_count); - if (--span_entry->ref_count == 0) - mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); - return 0; -diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c -index 3f5c51d..62514b9 100644 ---- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c -+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c -@@ -777,6 +777,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, - } - } - -+static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) -+{ -+ u8 num_rec, last_rec_index, num_entries; -+ -+ num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); -+ last_rec_index = num_rec - 1; -+ -+ if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) -+ return false; -+ if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == -+ MLXSW_REG_RAUHTD_TYPE_IPV6) -+ return true; -+ -+ num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, -+ last_rec_index); -+ if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) -+ return true; -+ return false; -+} -+ - static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) - { - char *rauhtd_pl; -@@ -803,7 +823,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) - for (i = 0; i < num_rec; i++) - mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, - i); -- } while (num_rec); -+ } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); - rtnl_unlock(); - - kfree(rauhtd_pl); -diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c -index 4c8c60a..fe9e7b1 100644 ---- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c -+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c -@@ -871,6 +871,13 @@ static int stmmac_init_phy(struct net_device *dev) - return -ENODEV; - } - -+ /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid -+ * subsequent PHY polling, make sure we force a link transition if -+ * we have a UP/DOWN/UP transition -+ */ -+ if (phydev->is_pseudo_fixed_link) -+ phydev->irq = PHY_POLL; -+ - pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)" - " Link = %d\n", dev->name, phydev->phy_id, phydev->link); - -diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c -index 5c8429f..3a5530d 100644 ---- a/drivers/usb/gadget/function/f_fs.c -+++ b/drivers/usb/gadget/function/f_fs.c -@@ -133,8 +133,60 @@ struct ffs_epfile { - /* - * Buffer for holding data from partial reads which may happen since - * we’re rounding user read requests to a multiple of a max packet size. -+ * -+ * The pointer is initialised with NULL value and may be set by -+ * __ffs_epfile_read_data function to point to a temporary buffer. -+ * -+ * In normal operation, calls to __ffs_epfile_read_buffered will consume -+ * data from said buffer and eventually free it. Importantly, while the -+ * function is using the buffer, it sets the pointer to NULL. This is -+ * all right since __ffs_epfile_read_data and __ffs_epfile_read_buffered -+ * can never run concurrently (they are synchronised by epfile->mutex) -+ * so the latter will not assign a new value to the pointer. -+ * -+ * Meanwhile ffs_func_eps_disable frees the buffer (if the pointer is -+ * valid) and sets the pointer to READ_BUFFER_DROP value. This special -+ * value is crux of the synchronisation between ffs_func_eps_disable and -+ * __ffs_epfile_read_data. -+ * -+ * Once __ffs_epfile_read_data is about to finish it will try to set the -+ * pointer back to its old value (as described above), but seeing as the -+ * pointer is not-NULL (namely READ_BUFFER_DROP) it will instead free -+ * the buffer. -+ * -+ * == State transitions == -+ * -+ * • ptr == NULL: (initial state) -+ * ◦ __ffs_epfile_read_buffer_free: go to ptr == DROP -+ * ◦ __ffs_epfile_read_buffered: nop -+ * ◦ __ffs_epfile_read_data allocates temp buffer: go to ptr == buf -+ * ◦ reading finishes: n/a, not in ‘and reading’ state -+ * • ptr == DROP: -+ * ◦ __ffs_epfile_read_buffer_free: nop -+ * ◦ __ffs_epfile_read_buffered: go to ptr == NULL -+ * ◦ __ffs_epfile_read_data allocates temp buffer: free buf, nop -+ * ◦ reading finishes: n/a, not in ‘and reading’ state -+ * • ptr == buf: -+ * ◦ __ffs_epfile_read_buffer_free: free buf, go to ptr == DROP -+ * ◦ __ffs_epfile_read_buffered: go to ptr == NULL and reading -+ * ◦ __ffs_epfile_read_data: n/a, __ffs_epfile_read_buffered -+ * is always called first -+ * ◦ reading finishes: n/a, not in ‘and reading’ state -+ * • ptr == NULL and reading: -+ * ◦ __ffs_epfile_read_buffer_free: go to ptr == DROP and reading -+ * ◦ __ffs_epfile_read_buffered: n/a, mutex is held -+ * ◦ __ffs_epfile_read_data: n/a, mutex is held -+ * ◦ reading finishes and … -+ * … all data read: free buf, go to ptr == NULL -+ * … otherwise: go to ptr == buf and reading -+ * • ptr == DROP and reading: -+ * ◦ __ffs_epfile_read_buffer_free: nop -+ * ◦ __ffs_epfile_read_buffered: n/a, mutex is held -+ * ◦ __ffs_epfile_read_data: n/a, mutex is held -+ * ◦ reading finishes: free buf, go to ptr == DROP - */ -- struct ffs_buffer *read_buffer; /* P: epfile->mutex */ -+ struct ffs_buffer *read_buffer; -+#define READ_BUFFER_DROP ((struct ffs_buffer *)ERR_PTR(-ESHUTDOWN)) - - char name[5]; - -@@ -733,25 +785,47 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep, - schedule_work(&io_data->work); - } - -+static void __ffs_epfile_read_buffer_free(struct ffs_epfile *epfile) -+{ -+ /* -+ * See comment in struct ffs_epfile for full read_buffer pointer -+ * synchronisation story. -+ */ -+ struct ffs_buffer *buf = xchg(&epfile->read_buffer, READ_BUFFER_DROP); -+ if (buf && buf != READ_BUFFER_DROP) -+ kfree(buf); -+} -+ - /* Assumes epfile->mutex is held. */ - static ssize_t __ffs_epfile_read_buffered(struct ffs_epfile *epfile, - struct iov_iter *iter) - { -- struct ffs_buffer *buf = epfile->read_buffer; -+ /* -+ * Null out epfile->read_buffer so ffs_func_eps_disable does not free -+ * the buffer while we are using it. See comment in struct ffs_epfile -+ * for full read_buffer pointer synchronisation story. -+ */ -+ struct ffs_buffer *buf = xchg(&epfile->read_buffer, NULL); - ssize_t ret; -- if (!buf) -+ if (!buf || buf == READ_BUFFER_DROP) - return 0; - - ret = copy_to_iter(buf->data, buf->length, iter); - if (buf->length == ret) { - kfree(buf); -- epfile->read_buffer = NULL; -- } else if (unlikely(iov_iter_count(iter))) { -+ return ret; -+ } -+ -+ if (unlikely(iov_iter_count(iter))) { - ret = -EFAULT; - } else { - buf->length -= ret; - buf->data += ret; - } -+ -+ if (cmpxchg(&epfile->read_buffer, NULL, buf)) -+ kfree(buf); -+ - return ret; - } - -@@ -780,7 +854,15 @@ static ssize_t __ffs_epfile_read_data(struct ffs_epfile *epfile, - buf->length = data_len; - buf->data = buf->storage; - memcpy(buf->storage, data + ret, data_len); -- epfile->read_buffer = buf; -+ -+ /* -+ * At this point read_buffer is NULL or READ_BUFFER_DROP (if -+ * ffs_func_eps_disable has been called in the meanwhile). See comment -+ * in struct ffs_epfile for full read_buffer pointer synchronisation -+ * story. -+ */ -+ if (unlikely(cmpxchg(&epfile->read_buffer, NULL, buf))) -+ kfree(buf); - - return ret; - } -@@ -1094,8 +1176,7 @@ ffs_epfile_release(struct inode *inode, struct file *file) - - ENTER(); - -- kfree(epfile->read_buffer); -- epfile->read_buffer = NULL; -+ __ffs_epfile_read_buffer_free(epfile); - ffs_data_closed(epfile->ffs); - - return 0; -@@ -1721,24 +1802,20 @@ static void ffs_func_eps_disable(struct ffs_function *func) - unsigned count = func->ffs->eps_count; - unsigned long flags; - -+ spin_lock_irqsave(&func->ffs->eps_lock, flags); - do { -- if (epfile) -- mutex_lock(&epfile->mutex); -- spin_lock_irqsave(&func->ffs->eps_lock, flags); - /* pending requests get nuked */ - if (likely(ep->ep)) - usb_ep_disable(ep->ep); - ++ep; -- spin_unlock_irqrestore(&func->ffs->eps_lock, flags); - - if (epfile) { - epfile->ep = NULL; -- kfree(epfile->read_buffer); -- epfile->read_buffer = NULL; -- mutex_unlock(&epfile->mutex); -+ __ffs_epfile_read_buffer_free(epfile); - ++epfile; - } - } while (--count); -+ spin_unlock_irqrestore(&func->ffs->eps_lock, flags); - } - - static int ffs_func_eps_enable(struct ffs_function *func) -diff --git a/include/net/ip.h b/include/net/ip.h -index 156b0c1..0ccf6da 100644 ---- a/include/net/ip.h -+++ b/include/net/ip.h -@@ -47,7 +47,6 @@ struct inet_skb_parm { - #define IPSKB_REROUTED BIT(4) - #define IPSKB_DOREDIRECT BIT(5) - #define IPSKB_FRAG_PMTU BIT(6) --#define IPSKB_FRAG_SEGS BIT(7) - - u16 frag_max_size; - }; -diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h -index 43a5a0e..b01d5d1 100644 ---- a/include/net/ip6_tunnel.h -+++ b/include/net/ip6_tunnel.h -@@ -145,6 +145,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, - { - int pkt_len, err; - -+ memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); - pkt_len = skb->len - skb_inner_network_offset(skb); - err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); - if (unlikely(net_xmit_eval(err))) -diff --git a/include/net/sock.h b/include/net/sock.h -index 8741988..c26eab9 100644 ---- a/include/net/sock.h -+++ b/include/net/sock.h -@@ -1587,11 +1587,11 @@ static inline void sock_put(struct sock *sk) - void sock_gen_put(struct sock *sk); - - int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, -- unsigned int trim_cap); -+ unsigned int trim_cap, bool refcounted); - static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested) - { -- return __sk_receive_skb(sk, skb, nested, 1); -+ return __sk_receive_skb(sk, skb, nested, 1, true); - } - - static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) -diff --git a/include/net/tcp.h b/include/net/tcp.h -index 7717302..0de6989 100644 ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -1164,6 +1164,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) - } - - bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); -+int tcp_filter(struct sock *sk, struct sk_buff *skb); - - #undef STATE_TRACE - -diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h -index 5cd4d4d..9c9c6ad 100644 ---- a/include/uapi/linux/atm_zatm.h -+++ b/include/uapi/linux/atm_zatm.h -@@ -14,7 +14,6 @@ - - #include <linux/atmapi.h> - #include <linux/atmioc.h> --#include <linux/time.h> - - #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) - /* get pool statistics */ -diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c -index 570eeca..ad1bc67 100644 ---- a/kernel/bpf/hashtab.c -+++ b/kernel/bpf/hashtab.c -@@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab) - - hlist_for_each_entry_safe(l, n, head, hash_node) { - hlist_del_rcu(&l->hash_node); -- htab_elem_free(htab, l); -+ if (l->state != HTAB_EXTRA_ELEM_USED) -+ htab_elem_free(htab, l); - } - } - } -diff --git a/net/core/dev.c b/net/core/dev.c -index 44b3ba4..9ce9d72 100644 ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb) - goto out; - } - -- *(__sum16 *)(skb->data + offset) = csum_fold(csum); -+ *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; - out_set_summed: - skb->ip_summed = CHECKSUM_NONE; - out: -diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c -index 52742a0..5550a86 100644 ---- a/net/core/flow_dissector.c -+++ b/net/core/flow_dissector.c -@@ -118,7 +118,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, - struct flow_dissector_key_tags *key_tags; - struct flow_dissector_key_keyid *key_keyid; - u8 ip_proto = 0; -- bool ret = false; -+ bool ret; - - if (!data) { - data = skb->data; -@@ -481,12 +481,17 @@ bool __skb_flow_dissect(const struct sk_buff *skb, - out_good: - ret = true; - --out_bad: -+ key_control->thoff = (u16)nhoff; -+out: - key_basic->n_proto = proto; - key_basic->ip_proto = ip_proto; -- key_control->thoff = (u16)nhoff; - - return ret; -+ -+out_bad: -+ ret = false; -+ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); -+ goto out; - } - EXPORT_SYMBOL(__skb_flow_dissect); - -diff --git a/net/core/sock.c b/net/core/sock.c -index fd7b41e..10acacc 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) - EXPORT_SYMBOL(sock_queue_rcv_skb); - - int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, -- const int nested, unsigned int trim_cap) -+ const int nested, unsigned int trim_cap, bool refcounted) - { - int rc = NET_RX_SUCCESS; - -@@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, - - bh_unlock_sock(sk); - out: -- sock_put(sk); -+ if (refcounted) -+ sock_put(sk); - return rc; - discard_and_relse: - kfree_skb(skb); -@@ -1563,6 +1564,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) - RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); - - newsk->sk_err = 0; -+ newsk->sk_err_soft = 0; - newsk->sk_priority = 0; - newsk->sk_incoming_cpu = raw_smp_processor_id(); - atomic64_set(&newsk->sk_cookie, 0); -diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c -index 345a3ae..b567c87 100644 ---- a/net/dccp/ipv4.c -+++ b/net/dccp/ipv4.c -@@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) - { - const struct iphdr *iph = (struct iphdr *)skb->data; - const u8 offset = iph->ihl << 2; -- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); -+ const struct dccp_hdr *dh; - struct dccp_sock *dp; - struct inet_sock *inet; - const int type = icmp_hdr(skb)->type; -@@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) - int err; - struct net *net = dev_net(skb->dev); - -- if (skb->len < offset + sizeof(*dh) || -- skb->len < offset + __dccp_basic_hdr_len(dh)) { -- __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); -- return; -- } -+ /* Only need dccph_dport & dccph_sport which are the first -+ * 4 bytes in dccp header. -+ * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. -+ */ -+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); -+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); -+ dh = (struct dccp_hdr *)(skb->data + offset); - - sk = __inet_lookup_established(net, &dccp_hashinfo, - iph->daddr, dh->dccph_dport, -@@ -868,7 +870,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) - goto discard_and_relse; - nf_reset(skb); - -- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4); -+ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); - - no_dccp_socket: - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) -diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c -index 3828f94..715e5d1 100644 ---- a/net/dccp/ipv6.c -+++ b/net/dccp/ipv6.c -@@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) - { - const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; -- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); -+ const struct dccp_hdr *dh; - struct dccp_sock *dp; - struct ipv6_pinfo *np; - struct sock *sk; -@@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - __u64 seq; - struct net *net = dev_net(skb->dev); - -- if (skb->len < offset + sizeof(*dh) || -- skb->len < offset + __dccp_basic_hdr_len(dh)) { -- __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), -- ICMP6_MIB_INERRORS); -- return; -- } -+ /* Only need dccph_dport & dccph_sport which are the first -+ * 4 bytes in dccp header. -+ * Our caller (icmpv6_notify()) already pulled 8 bytes for us. -+ */ -+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); -+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); -+ dh = (struct dccp_hdr *)(skb->data + offset); - - sk = __inet6_lookup_established(net, &dccp_hashinfo, - &hdr->daddr, dh->dccph_dport, -@@ -738,7 +739,8 @@ static int dccp_v6_rcv(struct sk_buff *skb) - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) - goto discard_and_relse; - -- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0; -+ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, -+ refcounted) ? -1 : 0; - - no_dccp_socket: - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) -@@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { - .getsockopt = ipv6_getsockopt, - .addr2sockaddr = inet6_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in6), -+ .bind_conflict = inet6_csk_bind_conflict, - #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ipv6_setsockopt, - .compat_getsockopt = compat_ipv6_getsockopt, -diff --git a/net/dccp/proto.c b/net/dccp/proto.c -index 41e6580..9fe25bf 100644 ---- a/net/dccp/proto.c -+++ b/net/dccp/proto.c -@@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout) - __kfree_skb(skb); - } - -+ /* If socket has been already reset kill it. */ -+ if (sk->sk_state == DCCP_CLOSED) -+ goto adjudge_to_death; -+ - if (data_was_unread) { - /* Unread data was tossed, send an appropriate Reset Code */ - DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); -diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c -index e2ffc2a..7ef7031 100644 ---- a/net/ipv4/fib_trie.c -+++ b/net/ipv4/fib_trie.c -@@ -2455,22 +2455,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, - struct key_vector *l, **tp = &iter->tnode; - t_key key; - -- /* use cache location of next-to-find key */ -+ /* use cached location of previously found key */ - if (iter->pos > 0 && pos >= iter->pos) { -- pos -= iter->pos; - key = iter->key; - } else { -- iter->pos = 0; -+ iter->pos = 1; - key = 0; - } - -- while ((l = leaf_walk_rcu(tp, key)) != NULL) { -+ pos -= iter->pos; -+ -+ while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { - key = l->key + 1; - iter->pos++; -- -- if (--pos <= 0) -- break; -- - l = NULL; - - /* handle unlikely case of a key wrap */ -@@ -2479,7 +2476,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, - } - - if (l) -- iter->key = key; /* remember it */ -+ iter->key = l->key; /* remember it */ - else - iter->pos = 0; /* forget it */ - -@@ -2507,7 +2504,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) - return fib_route_get_idx(iter, *pos); - - iter->pos = 0; -- iter->key = 0; -+ iter->key = KEY_MAX; - - return SEQ_START_TOKEN; - } -@@ -2516,7 +2513,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) - { - struct fib_route_iter *iter = seq->private; - struct key_vector *l = NULL; -- t_key key = iter->key; -+ t_key key = iter->key + 1; - - ++*pos; - -@@ -2525,7 +2522,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) - l = leaf_walk_rcu(&iter->tnode, key); - - if (l) { -- iter->key = l->key + 1; -+ iter->key = l->key; - iter->pos++; - } else { - iter->pos = 0; -diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c -index 38abe70..48734ee 100644 ---- a/net/ipv4/icmp.c -+++ b/net/ipv4/icmp.c -@@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net, - fl4->flowi4_proto = IPPROTO_ICMP; - fl4->fl4_icmp_type = type; - fl4->fl4_icmp_code = code; -- fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); -+ fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); - - security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); - rt = __ip_route_output_key_hash(net, fl4, -@@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net, - if (err) - goto relookup_failed; - -- if (inet_addr_type_dev_table(net, skb_in->dev, -+ if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev, - fl4_dec.saddr) == RTN_LOCAL) { - rt2 = __ip_route_output_key(net, &fl4_dec); - if (IS_ERR(rt2)) -diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c -index 8b4ffd2..9f0a7b9 100644 ---- a/net/ipv4/ip_forward.c -+++ b/net/ipv4/ip_forward.c -@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb) - if (opt->is_strictroute && rt->rt_uses_gateway) - goto sr_failed; - -- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; -+ IPCB(skb)->flags |= IPSKB_FORWARDED; - mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); - if (ip_exceeds_mtu(skb, mtu)) { - IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); -diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c -index dde37fb..307daed 100644 ---- a/net/ipv4/ip_output.c -+++ b/net/ipv4/ip_output.c -@@ -223,11 +223,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, - struct sk_buff *segs; - int ret = 0; - -- /* common case: fragmentation of segments is not allowed, -- * or seglen is <= mtu -+ /* common case: seglen is <= mtu - */ -- if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) || -- skb_gso_validate_mtu(skb, mtu)) -+ if (skb_gso_validate_mtu(skb, mtu)) - return ip_finish_output2(net, sk, skb); - - /* Slowpath - GSO segment length is exceeding the dst MTU. -diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c -index 0f227db..afd6b59 100644 ---- a/net/ipv4/ip_tunnel_core.c -+++ b/net/ipv4/ip_tunnel_core.c -@@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, - int pkt_len = skb->len - skb_inner_network_offset(skb); - struct net *net = dev_net(rt->dst.dev); - struct net_device *dev = skb->dev; -- int skb_iif = skb->skb_iif; - struct iphdr *iph; - int err; - -@@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, - skb_dst_set(skb, &rt->dst); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - -- if (skb_iif && !(df & htons(IP_DF))) { -- /* Arrived from an ingress interface, got encapsulated, with -- * fragmentation of encapulating frames allowed. -- * If skb is gso, the resulting encapsulated network segments -- * may exceed dst mtu. -- * Allow IP Fragmentation of segments. -- */ -- IPCB(skb)->flags |= IPSKB_FRAG_SEGS; -- } -- - /* Push down and install the IP header. */ - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); -diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c -index 5f006e1..27089f5 100644 ---- a/net/ipv4/ipmr.c -+++ b/net/ipv4/ipmr.c -@@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, - vif->dev->stats.tx_bytes += skb->len; - } - -- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; -+ IPCB(skb)->flags |= IPSKB_FORWARDED; - - /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally - * not only before forwarding, but after forwarding on all output -diff --git a/net/ipv4/route.c b/net/ipv4/route.c -index 62c3ed0..2f23ef1 100644 ---- a/net/ipv4/route.c -+++ b/net/ipv4/route.c -@@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow - goto reject_redirect; - } - -- n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); -+ n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); -+ if (!n) -+ n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); - if (!IS_ERR(n)) { - if (!(n->nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); -diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index ffbb218..c876f5d 100644 ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -1145,7 +1145,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) - - err = -EPIPE; - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) -- goto out_err; -+ goto do_error; - - sg = !!(sk->sk_route_caps & NETIF_F_SG); - -@@ -1219,7 +1219,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) - - if (!skb_can_coalesce(skb, i, pfrag->page, - pfrag->offset)) { -- if (i == sysctl_max_skb_frags || !sg) { -+ if (i >= sysctl_max_skb_frags || !sg) { - tcp_mark_push(tp, skb); - goto new_segment; - } -diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c -index 10d728b..ab37c67 100644 ---- a/net/ipv4/tcp_dctcp.c -+++ b/net/ipv4/tcp_dctcp.c -@@ -56,6 +56,7 @@ struct dctcp { - u32 next_seq; - u32 ce_state; - u32 delayed_ack_reserved; -+ u32 loss_cwnd; - }; - - static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ -@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) - ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); - - ca->delayed_ack_reserved = 0; -+ ca->loss_cwnd = 0; - ca->ce_state = 0; - - dctcp_reset(tp, ca); -@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) - - static u32 dctcp_ssthresh(struct sock *sk) - { -- const struct dctcp *ca = inet_csk_ca(sk); -+ struct dctcp *ca = inet_csk_ca(sk); - struct tcp_sock *tp = tcp_sk(sk); - -+ ca->loss_cwnd = tp->snd_cwnd; - return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); - } - -@@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, - return 0; - } - -+static u32 dctcp_cwnd_undo(struct sock *sk) -+{ -+ const struct dctcp *ca = inet_csk_ca(sk); -+ -+ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -+} -+ - static struct tcp_congestion_ops dctcp __read_mostly = { - .init = dctcp_init, - .in_ack_event = dctcp_update_alpha, - .cwnd_event = dctcp_cwnd_event, - .ssthresh = dctcp_ssthresh, - .cong_avoid = tcp_reno_cong_avoid, -+ .undo_cwnd = dctcp_cwnd_undo, - .set_state = dctcp_state, - .get_info = dctcp_get_info, - .flags = TCP_CONG_NEEDS_ECN, -diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c -index 7158d4f..7b235fa 100644 ---- a/net/ipv4/tcp_ipv4.c -+++ b/net/ipv4/tcp_ipv4.c -@@ -1537,6 +1537,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) - } - EXPORT_SYMBOL(tcp_prequeue); - -+int tcp_filter(struct sock *sk, struct sk_buff *skb) -+{ -+ struct tcphdr *th = (struct tcphdr *)skb->data; -+ unsigned int eaten = skb->len; -+ int err; -+ -+ err = sk_filter_trim_cap(sk, skb, th->doff * 4); -+ if (!err) { -+ eaten -= skb->len; -+ TCP_SKB_CB(skb)->end_seq -= eaten; -+ } -+ return err; -+} -+EXPORT_SYMBOL(tcp_filter); -+ - /* - * From tcp_input.c - */ -@@ -1648,8 +1663,10 @@ int tcp_v4_rcv(struct sk_buff *skb) - - nf_reset(skb); - -- if (sk_filter(sk, skb)) -+ if (tcp_filter(sk, skb)) - goto discard_and_relse; -+ th = (const struct tcphdr *)skb->data; -+ iph = ip_hdr(skb); - - skb->dev = NULL; - -diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c -index bd59c34..7370ad2 100644 ---- a/net/ipv6/icmp.c -+++ b/net/ipv6/icmp.c -@@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - if (__ipv6_addr_needs_scope_id(addr_type)) - iif = skb->dev->ifindex; - else -- iif = l3mdev_master_ifindex(skb->dev); -+ iif = l3mdev_master_ifindex(skb_dst(skb)->dev); - - /* - * Must not send error if the source does not uniquely -diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c -index fc67822..af6a09e 100644 ---- a/net/ipv6/tcp_ipv6.c -+++ b/net/ipv6/tcp_ipv6.c -@@ -1228,7 +1228,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) - if (skb->protocol == htons(ETH_P_IP)) - return tcp_v4_do_rcv(sk, skb); - -- if (sk_filter(sk, skb)) -+ if (tcp_filter(sk, skb)) - goto discard; - - /* -@@ -1455,8 +1455,10 @@ static int tcp_v6_rcv(struct sk_buff *skb) - if (tcp_v6_inbound_md5_hash(sk, skb)) - goto discard_and_relse; - -- if (sk_filter(sk, skb)) -+ if (tcp_filter(sk, skb)) - goto discard_and_relse; -+ th = (const struct tcphdr *)skb->data; -+ hdr = ipv6_hdr(skb); - - skb->dev = NULL; - -diff --git a/net/sctp/socket.c b/net/sctp/socket.c -index baccbf3..7b0e059 100644 ---- a/net/sctp/socket.c -+++ b/net/sctp/socket.c -@@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk, - - timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); - -- err = sctp_wait_for_connect(asoc, &timeo); -- if ((err == 0 || err == -EINPROGRESS) && assoc_id) -+ if (assoc_id) - *assoc_id = asoc->assoc_id; -+ err = sctp_wait_for_connect(asoc, &timeo); -+ /* Note: the asoc may be freed after the return of -+ * sctp_wait_for_connect. -+ */ - - /* Don't free association on exit. */ - asoc = NULL; -@@ -4278,19 +4281,18 @@ static void sctp_shutdown(struct sock *sk, int how) - { - struct net *net = sock_net(sk); - struct sctp_endpoint *ep; -- struct sctp_association *asoc; - - if (!sctp_style(sk, TCP)) - return; - -- if (how & SEND_SHUTDOWN) { -+ ep = sctp_sk(sk)->ep; -+ if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { -+ struct sctp_association *asoc; -+ - sk->sk_state = SCTP_SS_CLOSING; -- ep = sctp_sk(sk)->ep; -- if (!list_empty(&ep->asocs)) { -- asoc = list_entry(ep->asocs.next, -- struct sctp_association, asocs); -- sctp_primitive_SHUTDOWN(net, asoc, NULL); -- } -+ asoc = list_entry(ep->asocs.next, -+ struct sctp_association, asocs); -+ sctp_primitive_SHUTDOWN(net, asoc, NULL); - } - } - -diff --git a/net/socket.c b/net/socket.c -index a1bd161..03bc2c2 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, - if (err) - break; - ++datagrams; -+ if (msg_data_left(&msg_sys)) -+ break; - cond_resched(); - } - -diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c -index f3825b6..f046b77 100644 ---- a/tools/spi/spidev_test.c -+++ b/tools/spi/spidev_test.c -@@ -19,6 +19,7 @@ - #include <getopt.h> - #include <fcntl.h> - #include <sys/ioctl.h> -+#include <linux/ioctl.h> - #include <sys/stat.h> - #include <linux/types.h> - #include <linux/spi/spidev.h> |