commit:     c8c8fca074336deefaa5af1dbf8bf3b62839878e
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Mon Nov 21 14:50:13 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Mon Nov 21 14:50:13 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=c8c8fca0

Linux patch 4.8.10

 0000_README             |    4 +
 1009_linux-4.8.10.patch | 4759 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 4763 insertions(+)

diff --git a/0000_README b/0000_README
index d5af994..13976e7 100644
--- a/0000_README
+++ b/0000_README
@@ -79,6 +79,10 @@ Patch:  1008_linux-4.8.9.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.8.9
 
+Patch:  1009_linux-4.8.10.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.8.10
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1009_linux-4.8.10.patch b/1009_linux-4.8.10.patch
new file mode 100644
index 0000000..7b1d9cf
--- /dev/null
+++ b/1009_linux-4.8.10.patch
@@ -0,0 +1,4759 @@
+diff --git a/Makefile b/Makefile
+index c1519ab85258..7cf2b4985703 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 8
+-SUBLEVEL = 9
++SUBLEVEL = 10
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+ 
+diff --git a/arch/sparc/include/asm/uaccess_64.h 
b/arch/sparc/include/asm/uaccess_64.h
+index 37a315d0ddd4..a6847fc05a6d 100644
+--- a/arch/sparc/include/asm/uaccess_64.h
++++ b/arch/sparc/include/asm/uaccess_64.h
+@@ -98,7 +98,6 @@ struct exception_table_entry {
+         unsigned int insn, fixup;
+ };
+ 
+-void __ret_efault(void);
+ void __retl_efault(void);
+ 
+ /* Uh, these should become the main single-value transfer routines..
+@@ -205,55 +204,34 @@ int __get_user_bad(void);
+ unsigned long __must_check ___copy_from_user(void *to,
+                                            const void __user *from,
+                                            unsigned long size);
+-unsigned long copy_from_user_fixup(void *to, const void __user *from,
+-                                 unsigned long size);
+ static inline unsigned long __must_check
+ copy_from_user(void *to, const void __user *from, unsigned long size)
+ {
+-      unsigned long ret;
+-
+       check_object_size(to, size, false);
+ 
+-      ret = ___copy_from_user(to, from, size);
+-      if (unlikely(ret))
+-              ret = copy_from_user_fixup(to, from, size);
+-
+-      return ret;
++      return ___copy_from_user(to, from, size);
+ }
+ #define __copy_from_user copy_from_user
+ 
+ unsigned long __must_check ___copy_to_user(void __user *to,
+                                          const void *from,
+                                          unsigned long size);
+-unsigned long copy_to_user_fixup(void __user *to, const void *from,
+-                               unsigned long size);
+ static inline unsigned long __must_check
+ copy_to_user(void __user *to, const void *from, unsigned long size)
+ {
+-      unsigned long ret;
+-
+       check_object_size(from, size, true);
+ 
+-      ret = ___copy_to_user(to, from, size);
+-      if (unlikely(ret))
+-              ret = copy_to_user_fixup(to, from, size);
+-      return ret;
++      return ___copy_to_user(to, from, size);
+ }
+ #define __copy_to_user copy_to_user
+ 
+ unsigned long __must_check ___copy_in_user(void __user *to,
+                                          const void __user *from,
+                                          unsigned long size);
+-unsigned long copy_in_user_fixup(void __user *to, void __user *from,
+-                               unsigned long size);
+ static inline unsigned long __must_check
+ copy_in_user(void __user *to, void __user *from, unsigned long size)
+ {
+-      unsigned long ret = ___copy_in_user(to, from, size);
+-
+-      if (unlikely(ret))
+-              ret = copy_in_user_fixup(to, from, size);
+-      return ret;
++      return ___copy_in_user(to, from, size);
+ }
+ #define __copy_in_user copy_in_user
+ 
+diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
+index a076b4249e62..5f1f3ae21657 100644
+--- a/arch/sparc/kernel/head_64.S
++++ b/arch/sparc/kernel/head_64.S
+@@ -922,47 +922,11 @@ prom_tba:        .xword  0
+ tlb_type:     .word   0       /* Must NOT end up in BSS */
+       .section        ".fixup",#alloc,#execinstr
+ 
+-      .globl  __ret_efault, __retl_efault, __ret_one, __retl_one
+-ENTRY(__ret_efault)
+-      ret
+-       restore %g0, -EFAULT, %o0
+-ENDPROC(__ret_efault)
+-
+ ENTRY(__retl_efault)
+       retl
+        mov    -EFAULT, %o0
+ ENDPROC(__retl_efault)
+ 
+-ENTRY(__retl_one)
+-      retl
+-       mov    1, %o0
+-ENDPROC(__retl_one)
+-
+-ENTRY(__retl_one_fp)
+-      VISExitHalf
+-      retl
+-       mov    1, %o0
+-ENDPROC(__retl_one_fp)
+-
+-ENTRY(__ret_one_asi)
+-      wr      %g0, ASI_AIUS, %asi
+-      ret
+-       restore %g0, 1, %o0
+-ENDPROC(__ret_one_asi)
+-
+-ENTRY(__retl_one_asi)
+-      wr      %g0, ASI_AIUS, %asi
+-      retl
+-       mov    1, %o0
+-ENDPROC(__retl_one_asi)
+-
+-ENTRY(__retl_one_asi_fp)
+-      wr      %g0, ASI_AIUS, %asi
+-      VISExitHalf
+-      retl
+-       mov    1, %o0
+-ENDPROC(__retl_one_asi_fp)
+-
+ ENTRY(__retl_o1)
+       retl
+        mov    %o1, %o0
+diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
+index 59bbeff55024..07933b9e9ce0 100644
+--- a/arch/sparc/kernel/jump_label.c
++++ b/arch/sparc/kernel/jump_label.c
+@@ -13,19 +13,30 @@
+ void arch_jump_label_transform(struct jump_entry *entry,
+                              enum jump_label_type type)
+ {
+-      u32 val;
+       u32 *insn = (u32 *) (unsigned long) entry->code;
++      u32 val;
+ 
+       if (type == JUMP_LABEL_JMP) {
+               s32 off = (s32)entry->target - (s32)entry->code;
++              bool use_v9_branch = false;
++
++              BUG_ON(off & 3);
+ 
+ #ifdef CONFIG_SPARC64
+-              /* ba,pt %xcc, . + (off << 2) */
+-              val = 0x10680000 | ((u32) off >> 2);
+-#else
+-              /* ba . + (off << 2) */
+-              val = 0x10800000 | ((u32) off >> 2);
++              if (off <= 0xfffff && off >= -0x100000)
++                      use_v9_branch = true;
+ #endif
++              if (use_v9_branch) {
++                      /* WDISP19 - target is . + immed << 2 */
++                      /* ba,pt %xcc, . + off */
++                      val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
++              } else {
++                      /* WDISP22 - target is . + immed << 2 */
++                      BUG_ON(off > 0x7fffff);
++                      BUG_ON(off < -0x800000);
++                      /* ba . + off */
++                      val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
++              }
+       } else {
+               val = 0x01000000;
+       }
+diff --git a/arch/sparc/kernel/sparc_ksyms_64.c 
b/arch/sparc/kernel/sparc_ksyms_64.c
+index 9e034f29dcc5..20ffb052fe38 100644
+--- a/arch/sparc/kernel/sparc_ksyms_64.c
++++ b/arch/sparc/kernel/sparc_ksyms_64.c
+@@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user);
+ EXPORT_SYMBOL_GPL(real_hard_smp_processor_id);
+ 
+ /* from head_64.S */
+-EXPORT_SYMBOL(__ret_efault);
+ EXPORT_SYMBOL(tlb_type);
+ EXPORT_SYMBOL(sun4v_chip_type);
+ EXPORT_SYMBOL(prom_root_node);
+diff --git a/arch/sparc/lib/GENcopy_from_user.S 
b/arch/sparc/lib/GENcopy_from_user.S
+index b7d0bd6b1406..69a439fa2fc1 100644
+--- a/arch/sparc/lib/GENcopy_from_user.S
++++ b/arch/sparc/lib/GENcopy_from_user.S
+@@ -3,11 +3,11 @@
+  * Copyright (C) 2007 David S. Miller ([email protected])
+  */
+ 
+-#define EX_LD(x)              \
++#define EX_LD(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one;  \
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/GENcopy_to_user.S 
b/arch/sparc/lib/GENcopy_to_user.S
+index 780550e1afc7..9947427ce354 100644
+--- a/arch/sparc/lib/GENcopy_to_user.S
++++ b/arch/sparc/lib/GENcopy_to_user.S
+@@ -3,11 +3,11 @@
+  * Copyright (C) 2007 David S. Miller ([email protected])
+  */
+ 
+-#define EX_ST(x)              \
++#define EX_ST(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one;  \
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
+index 89358ee94851..059ea24ad73d 100644
+--- a/arch/sparc/lib/GENmemcpy.S
++++ b/arch/sparc/lib/GENmemcpy.S
+@@ -4,21 +4,18 @@
+  */
+ 
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #define GLOBAL_SPARE  %g7
+ #else
+ #define GLOBAL_SPARE  %g5
+ #endif
+ 
+ #ifndef EX_LD
+-#define EX_LD(x)      x
++#define EX_LD(x,y)    x
+ #endif
+ 
+ #ifndef EX_ST
+-#define EX_ST(x)      x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x)  x
++#define EX_ST(x,y)    x
+ #endif
+ 
+ #ifndef LOAD
+@@ -45,6 +42,29 @@
+       .register       %g3,#scratch
+ 
+       .text
++
++#ifndef EX_RETVAL
++#define EX_RETVAL(x)  x
++ENTRY(GEN_retl_o4_1)
++      add     %o4, %o2, %o4
++      retl
++       add    %o4, 1, %o0
++ENDPROC(GEN_retl_o4_1)
++ENTRY(GEN_retl_g1_8)
++      add     %g1, %o2, %g1
++      retl
++       add    %g1, 8, %o0
++ENDPROC(GEN_retl_g1_8)
++ENTRY(GEN_retl_o2_4)
++      retl
++       add    %o2, 4, %o0
++ENDPROC(GEN_retl_o2_4)
++ENTRY(GEN_retl_o2_1)
++      retl
++       add    %o2, 1, %o0
++ENDPROC(GEN_retl_o2_1)
++#endif
++
+       .align          64
+ 
+       .globl  FUNC_NAME
+@@ -73,8 +93,8 @@ FUNC_NAME:   /* %o0=dst, %o1=src, %o2=len */
+       sub             %g0, %o4, %o4
+       sub             %o2, %o4, %o2
+ 1:    subcc           %o4, 1, %o4
+-      EX_LD(LOAD(ldub, %o1, %g1))
+-      EX_ST(STORE(stb, %g1, %o0))
++      EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
++      EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
+       add             %o1, 1, %o1
+       bne,pt          %XCC, 1b
+       add             %o0, 1, %o0
+@@ -82,8 +102,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
+       andn            %o2, 0x7, %g1
+       sub             %o2, %g1, %o2
+ 1:    subcc           %g1, 0x8, %g1
+-      EX_LD(LOAD(ldx, %o1, %g2))
+-      EX_ST(STORE(stx, %g2, %o0))
++      EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
++      EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
+       add             %o1, 0x8, %o1
+       bne,pt          %XCC, 1b
+        add            %o0, 0x8, %o0
+@@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ 
+ 1:
+       subcc           %o2, 4, %o2
+-      EX_LD(LOAD(lduw, %o1, %g1))
+-      EX_ST(STORE(stw, %g1, %o1 + %o3))
++      EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
++      EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
+       bgu,pt          %XCC, 1b
+        add            %o1, 4, %o1
+ 
+@@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+       .align          32
+ 90:
+       subcc           %o2, 1, %o2
+-      EX_LD(LOAD(ldub, %o1, %g1))
+-      EX_ST(STORE(stb, %g1, %o1 + %o3))
++      EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
++      EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
+       bgu,pt          %XCC, 90b
+        add            %o1, 1, %o1
+       retl
+diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
+index 3269b0234093..4f2384a4286a 100644
+--- a/arch/sparc/lib/Makefile
++++ b/arch/sparc/lib/Makefile
+@@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) +=  NG4patch.o NG4copy_page.o 
NG4clear_page.o NG4memset.o
+ lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
+ lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
+ 
+-lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
++lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
+ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
+ 
+ obj-$(CONFIG_SPARC64) += iomap.o
+diff --git a/arch/sparc/lib/NG2copy_from_user.S 
b/arch/sparc/lib/NG2copy_from_user.S
+index d5242b8c4f94..b79a6998d87c 100644
+--- a/arch/sparc/lib/NG2copy_from_user.S
++++ b/arch/sparc/lib/NG2copy_from_user.S
+@@ -3,19 +3,19 @@
+  * Copyright (C) 2007 David S. Miller ([email protected])
+  */
+ 
+-#define EX_LD(x)              \
++#define EX_LD(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_asi;\
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+-#define EX_LD_FP(x)           \
++#define EX_LD_FP(x,y)         \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_asi_fp;\
++      .word 98b, y##_fp;      \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/NG2copy_to_user.S 
b/arch/sparc/lib/NG2copy_to_user.S
+index 4e962d993b10..dcec55f254ab 100644
+--- a/arch/sparc/lib/NG2copy_to_user.S
++++ b/arch/sparc/lib/NG2copy_to_user.S
+@@ -3,19 +3,19 @@
+  * Copyright (C) 2007 David S. Miller ([email protected])
+  */
+ 
+-#define EX_ST(x)              \
++#define EX_ST(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_asi;\
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+-#define EX_ST_FP(x)           \
++#define EX_ST_FP(x,y)         \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_asi_fp;\
++      .word 98b, y##_fp;      \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
+index d5f585df2f3f..c629dbd121b6 100644
+--- a/arch/sparc/lib/NG2memcpy.S
++++ b/arch/sparc/lib/NG2memcpy.S
+@@ -4,6 +4,7 @@
+  */
+ 
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/visasm.h>
+ #include <asm/asi.h>
+ #define GLOBAL_SPARE  %g7
+@@ -32,21 +33,17 @@
+ #endif
+ 
+ #ifndef EX_LD
+-#define EX_LD(x)      x
++#define EX_LD(x,y)    x
+ #endif
+ #ifndef EX_LD_FP
+-#define EX_LD_FP(x)   x
++#define EX_LD_FP(x,y) x
+ #endif
+ 
+ #ifndef EX_ST
+-#define EX_ST(x)      x
++#define EX_ST(x,y)    x
+ #endif
+ #ifndef EX_ST_FP
+-#define EX_ST_FP(x)   x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x)  x
++#define EX_ST_FP(x,y) x
+ #endif
+ 
+ #ifndef LOAD
+@@ -140,45 +137,110 @@
+       fsrc2           %x6, %f12; \
+       fsrc2           %x7, %f14;
+ #define FREG_LOAD_1(base, x0) \
+-      EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
++      EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
+ #define FREG_LOAD_2(base, x0, x1) \
+-      EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
++      EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_3(base, x0, x1, x2) \
+-      EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
++      EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_4(base, x0, x1, x2, x3) \
+-      EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
++      EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
+-      EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
++      EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
+-      EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
++      EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
+-      EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
+-      EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
++      EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
++      EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
+ 
+       .register       %g2,#scratch
+       .register       %g3,#scratch
+ 
+       .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x)  x
++__restore_fp:
++      VISExitHalf
++__restore_asi:
++      retl
++       wr     %g0, ASI_AIUS, %asi
++ENTRY(NG2_retl_o2)
++      ba,pt   %xcc, __restore_asi
++       mov    %o2, %o0
++ENDPROC(NG2_retl_o2)
++ENTRY(NG2_retl_o2_plus_1)
++      ba,pt   %xcc, __restore_asi
++       add    %o2, 1, %o0
++ENDPROC(NG2_retl_o2_plus_1)
++ENTRY(NG2_retl_o2_plus_4)
++      ba,pt   %xcc, __restore_asi
++       add    %o2, 4, %o0
++ENDPROC(NG2_retl_o2_plus_4)
++ENTRY(NG2_retl_o2_plus_8)
++      ba,pt   %xcc, __restore_asi
++       add    %o2, 8, %o0
++ENDPROC(NG2_retl_o2_plus_8)
++ENTRY(NG2_retl_o2_plus_o4_plus_1)
++      add     %o4, 1, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_plus_o4_plus_1)
++ENTRY(NG2_retl_o2_plus_o4_plus_8)
++      add     %o4, 8, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_plus_o4_plus_8)
++ENTRY(NG2_retl_o2_plus_o4_plus_16)
++      add     %o4, 16, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_plus_o4_plus_16)
++ENTRY(NG2_retl_o2_plus_g1_fp)
++      ba,pt   %xcc, __restore_fp
++       add    %o2, %g1, %o0
++ENDPROC(NG2_retl_o2_plus_g1_fp)
++ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
++      add     %g1, 64, %g1
++      ba,pt   %xcc, __restore_fp
++       add    %o2, %g1, %o0
++ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
++ENTRY(NG2_retl_o2_plus_g1_plus_1)
++      add     %g1, 1, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %g1, %o0
++ENDPROC(NG2_retl_o2_plus_g1_plus_1)
++ENTRY(NG2_retl_o2_and_7_plus_o4)
++      and     %o2, 7, %o2
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_and_7_plus_o4)
++ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
++      and     %o2, 7, %o2
++      add     %o4, 8, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
++#endif
++
+       .align          64
+ 
+       .globl  FUNC_NAME
+@@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+       sub             %g0, %o4, %o4   ! bytes to align dst
+       sub             %o2, %o4, %o2
+ 1:    subcc           %o4, 1, %o4
+-      EX_LD(LOAD(ldub, %o1, %g1))
+-      EX_ST(STORE(stb, %g1, %o0))
++      EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
++      EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
+       add             %o1, 1, %o1
+       bne,pt          %XCC, 1b
+       add             %o0, 1, %o0
+@@ -281,11 +343,11 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+        nop
+       /* fall through for 0 < low bits < 8 */
+ 110:  sub             %o4, 64, %g2
+-      EX_LD_FP(LOAD_BLK(%g2, %f0))
+-1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+-      EX_LD_FP(LOAD_BLK(%o4, %f16))
++      EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
++1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++      EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+       FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
+-      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
+       subcc           %g1, 64, %g1
+       add             %o4, 64, %o4
+@@ -296,10 +358,10 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 120:  sub             %o4, 56, %g2
+       FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
+-1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+-      EX_LD_FP(LOAD_BLK(%o4, %f16))
++1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++      EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+       FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
+-      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
+       subcc           %g1, 64, %g1
+       add             %o4, 64, %o4
+@@ -310,10 +372,10 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 130:  sub             %o4, 48, %g2
+       FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
+-1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+-      EX_LD_FP(LOAD_BLK(%o4, %f16))
++1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++      EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+       FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
+-      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
+       subcc           %g1, 64, %g1
+       add             %o4, 64, %o4
+@@ -324,10 +386,10 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 140:  sub             %o4, 40, %g2
+       FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
+-1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+-      EX_LD_FP(LOAD_BLK(%o4, %f16))
++1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++      EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+       FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
+-      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       FREG_MOVE_5(f22, f24, f26, f28, f30)
+       subcc           %g1, 64, %g1
+       add             %o4, 64, %o4
+@@ -338,10 +400,10 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 150:  sub             %o4, 32, %g2
+       FREG_LOAD_4(%g2, f0, f2, f4, f6)
+-1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+-      EX_LD_FP(LOAD_BLK(%o4, %f16))
++1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++      EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+       FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
+-      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       FREG_MOVE_4(f24, f26, f28, f30)
+       subcc           %g1, 64, %g1
+       add             %o4, 64, %o4
+@@ -352,10 +414,10 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 160:  sub             %o4, 24, %g2
+       FREG_LOAD_3(%g2, f0, f2, f4)
+-1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+-      EX_LD_FP(LOAD_BLK(%o4, %f16))
++1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++      EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+       FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
+-      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       FREG_MOVE_3(f26, f28, f30)
+       subcc           %g1, 64, %g1
+       add             %o4, 64, %o4
+@@ -366,10 +428,10 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 170:  sub             %o4, 16, %g2
+       FREG_LOAD_2(%g2, f0, f2)
+-1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+-      EX_LD_FP(LOAD_BLK(%o4, %f16))
++1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++      EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+       FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
+-      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       FREG_MOVE_2(f28, f30)
+       subcc           %g1, 64, %g1
+       add             %o4, 64, %o4
+@@ -380,10 +442,10 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 180:  sub             %o4, 8, %g2
+       FREG_LOAD_1(%g2, f0)
+-1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+-      EX_LD_FP(LOAD_BLK(%o4, %f16))
++1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++      EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+       FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
+-      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       FREG_MOVE_1(f30)
+       subcc           %g1, 64, %g1
+       add             %o4, 64, %o4
+@@ -393,10 +455,10 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+        nop
+ 
+ 190:
+-1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
++1:    EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       subcc           %g1, 64, %g1
+-      EX_LD_FP(LOAD_BLK(%o4, %f0))
+-      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++      EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
++      EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
+       add             %o4, 64, %o4
+       bne,pt          %xcc, 1b
+        LOAD(prefetch, %o4 + 64, #one_read)
+@@ -423,28 +485,28 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       andn            %o2, 0xf, %o4
+       and             %o2, 0xf, %o2
+ 1:    subcc           %o4, 0x10, %o4
+-      EX_LD(LOAD(ldx, %o1, %o5))
++      EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
+       add             %o1, 0x08, %o1
+-      EX_LD(LOAD(ldx, %o1, %g1))
++      EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
+       sub             %o1, 0x08, %o1
+-      EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
++      EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
+       add             %o1, 0x8, %o1
+-      EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
++      EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
+       bgu,pt          %XCC, 1b
+        add            %o1, 0x8, %o1
+ 73:   andcc           %o2, 0x8, %g0
+       be,pt           %XCC, 1f
+        nop
+       sub             %o2, 0x8, %o2
+-      EX_LD(LOAD(ldx, %o1, %o5))
+-      EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
++      EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
++      EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
+       add             %o1, 0x8, %o1
+ 1:    andcc           %o2, 0x4, %g0
+       be,pt           %XCC, 1f
+        nop
+       sub             %o2, 0x4, %o2
+-      EX_LD(LOAD(lduw, %o1, %o5))
+-      EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
++      EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
++      EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
+       add             %o1, 0x4, %o1
+ 1:    cmp             %o2, 0
+       be,pt           %XCC, 85f
+@@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+       sub             %o2, %g1, %o2
+ 
+ 1:    subcc           %g1, 1, %g1
+-      EX_LD(LOAD(ldub, %o1, %o5))
+-      EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
++      EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
++      EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
+       bgu,pt          %icc, 1b
+        add            %o1, 1, %o1
+ 
+@@ -477,16 +539,16 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 8:    mov             64, GLOBAL_SPARE
+       andn            %o1, 0x7, %o1
+-      EX_LD(LOAD(ldx, %o1, %g2))
++      EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
+       sub             GLOBAL_SPARE, %g1, GLOBAL_SPARE
+       andn            %o2, 0x7, %o4
+       sllx            %g2, %g1, %g2
+ 1:    add             %o1, 0x8, %o1
+-      EX_LD(LOAD(ldx, %o1, %g3))
++      EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
+       subcc           %o4, 0x8, %o4
+       srlx            %g3, GLOBAL_SPARE, %o5
+       or              %o5, %g2, %o5
+-      EX_ST(STORE(stx, %o5, %o0))
++      EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
+       add             %o0, 0x8, %o0
+       bgu,pt          %icc, 1b
+        sllx           %g3, %g1, %g2
+@@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ 
+ 1:
+       subcc           %o2, 4, %o2
+-      EX_LD(LOAD(lduw, %o1, %g1))
+-      EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
++      EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
++      EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
+       bgu,pt          %XCC, 1b
+        add            %o1, 4, %o1
+ 
+@@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+       .align          32
+ 90:
+       subcc           %o2, 1, %o2
+-      EX_LD(LOAD(ldub, %o1, %g1))
+-      EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
++      EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
++      EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
+       bgu,pt          %XCC, 90b
+        add            %o1, 1, %o1
+       retl
+diff --git a/arch/sparc/lib/NG4copy_from_user.S 
b/arch/sparc/lib/NG4copy_from_user.S
+index 2e8ee7ad07a9..16a286c1a528 100644
+--- a/arch/sparc/lib/NG4copy_from_user.S
++++ b/arch/sparc/lib/NG4copy_from_user.S
+@@ -3,19 +3,19 @@
+  * Copyright (C) 2012 David S. Miller ([email protected])
+  */
+ 
+-#define EX_LD(x)              \
++#define EX_LD(x, y)           \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_asi;\
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+-#define EX_LD_FP(x)           \
++#define EX_LD_FP(x,y)         \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_asi_fp;\
++      .word 98b, y##_fp;      \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/NG4copy_to_user.S 
b/arch/sparc/lib/NG4copy_to_user.S
+index be0bf4590df8..6b0276ffc858 100644
+--- a/arch/sparc/lib/NG4copy_to_user.S
++++ b/arch/sparc/lib/NG4copy_to_user.S
+@@ -3,19 +3,19 @@
+  * Copyright (C) 2012 David S. Miller ([email protected])
+  */
+ 
+-#define EX_ST(x)              \
++#define EX_ST(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_asi;\
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+-#define EX_ST_FP(x)           \
++#define EX_ST_FP(x,y)         \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_asi_fp;\
++      .word 98b, y##_fp;      \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
+index 8e13ee1f4454..75bb93b1437f 100644
+--- a/arch/sparc/lib/NG4memcpy.S
++++ b/arch/sparc/lib/NG4memcpy.S
+@@ -4,6 +4,7 @@
+  */
+ 
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/visasm.h>
+ #include <asm/asi.h>
+ #define GLOBAL_SPARE  %g7
+@@ -46,22 +47,19 @@
+ #endif
+ 
+ #ifndef EX_LD
+-#define EX_LD(x)      x
++#define EX_LD(x,y)    x
+ #endif
+ #ifndef EX_LD_FP
+-#define EX_LD_FP(x)   x
++#define EX_LD_FP(x,y) x
+ #endif
+ 
+ #ifndef EX_ST
+-#define EX_ST(x)      x
++#define EX_ST(x,y)    x
+ #endif
+ #ifndef EX_ST_FP
+-#define EX_ST_FP(x)   x
++#define EX_ST_FP(x,y) x
+ #endif
+ 
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x)  x
+-#endif
+ 
+ #ifndef LOAD
+ #define LOAD(type,addr,dest)  type [addr], dest
+@@ -94,6 +92,158 @@
+       .register       %g3,#scratch
+ 
+       .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x)  x
++__restore_asi_fp:
++      VISExitHalf
++__restore_asi:
++      retl
++       wr     %g0, ASI_AIUS, %asi
++
++ENTRY(NG4_retl_o2)
++      ba,pt   %xcc, __restore_asi
++       mov    %o2, %o0
++ENDPROC(NG4_retl_o2)
++ENTRY(NG4_retl_o2_plus_1)
++      ba,pt   %xcc, __restore_asi
++       add    %o2, 1, %o0
++ENDPROC(NG4_retl_o2_plus_1)
++ENTRY(NG4_retl_o2_plus_4)
++      ba,pt   %xcc, __restore_asi
++       add    %o2, 4, %o0
++ENDPROC(NG4_retl_o2_plus_4)
++ENTRY(NG4_retl_o2_plus_o5)
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5)
++ENTRY(NG4_retl_o2_plus_o5_plus_4)
++      add     %o5, 4, %o5
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_4)
++ENTRY(NG4_retl_o2_plus_o5_plus_8)
++      add     %o5, 8, %o5
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_8)
++ENTRY(NG4_retl_o2_plus_o5_plus_16)
++      add     %o5, 16, %o5
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_16)
++ENTRY(NG4_retl_o2_plus_o5_plus_24)
++      add     %o5, 24, %o5
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_24)
++ENTRY(NG4_retl_o2_plus_o5_plus_32)
++      add     %o5, 32, %o5
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_32)
++ENTRY(NG4_retl_o2_plus_g1)
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %g1, %o0
++ENDPROC(NG4_retl_o2_plus_g1)
++ENTRY(NG4_retl_o2_plus_g1_plus_1)
++      add     %g1, 1, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %g1, %o0
++ENDPROC(NG4_retl_o2_plus_g1_plus_1)
++ENTRY(NG4_retl_o2_plus_g1_plus_8)
++      add     %g1, 8, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %g1, %o0
++ENDPROC(NG4_retl_o2_plus_g1_plus_8)
++ENTRY(NG4_retl_o2_plus_o4)
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4)
++ENTRY(NG4_retl_o2_plus_o4_plus_8)
++      add     %o4, 8, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_8)
++ENTRY(NG4_retl_o2_plus_o4_plus_16)
++      add     %o4, 16, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_16)
++ENTRY(NG4_retl_o2_plus_o4_plus_24)
++      add     %o4, 24, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_24)
++ENTRY(NG4_retl_o2_plus_o4_plus_32)
++      add     %o4, 32, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_32)
++ENTRY(NG4_retl_o2_plus_o4_plus_40)
++      add     %o4, 40, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_40)
++ENTRY(NG4_retl_o2_plus_o4_plus_48)
++      add     %o4, 48, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_48)
++ENTRY(NG4_retl_o2_plus_o4_plus_56)
++      add     %o4, 56, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_56)
++ENTRY(NG4_retl_o2_plus_o4_plus_64)
++      add     %o4, 64, %o4
++      ba,pt   %xcc, __restore_asi
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_64)
++ENTRY(NG4_retl_o2_plus_o4_fp)
++      ba,pt   %xcc, __restore_asi_fp
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
++      add     %o4, 8, %o4
++      ba,pt   %xcc, __restore_asi_fp
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
++      add     %o4, 16, %o4
++      ba,pt   %xcc, __restore_asi_fp
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
++      add     %o4, 24, %o4
++      ba,pt   %xcc, __restore_asi_fp
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
++      add     %o4, 32, %o4
++      ba,pt   %xcc, __restore_asi_fp
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
++      add     %o4, 40, %o4
++      ba,pt   %xcc, __restore_asi_fp
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
++      add     %o4, 48, %o4
++      ba,pt   %xcc, __restore_asi_fp
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
++      add     %o4, 56, %o4
++      ba,pt   %xcc, __restore_asi_fp
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
++      add     %o4, 64, %o4
++      ba,pt   %xcc, __restore_asi_fp
++       add    %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
++#endif
+       .align          64
+ 
+       .globl  FUNC_NAME
+@@ -124,12 +274,13 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       brz,pt          %g1, 51f
+        sub            %o2, %g1, %o2
+ 
+-1:    EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
++
++1:    EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
+       add             %o1, 1, %o1
+       subcc           %g1, 1, %g1
+       add             %o0, 1, %o0
+       bne,pt          %icc, 1b
+-       EX_ST(STORE(stb, %g2, %o0 - 0x01))
++       EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
+ 
+ 51:   LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
+       LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
+@@ -154,43 +305,43 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       brz,pt          %g1, .Llarge_aligned
+        sub            %o2, %g1, %o2
+ 
+-1:    EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
++1:    EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
+       add             %o1, 8, %o1
+       subcc           %g1, 8, %g1
+       add             %o0, 8, %o0
+       bne,pt          %icc, 1b
+-       EX_ST(STORE(stx, %g2, %o0 - 0x08))
++       EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
+ 
+ .Llarge_aligned:
+       /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
+       andn            %o2, 0x3f, %o4
+       sub             %o2, %o4, %o2
+ 
+-1:    EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
++1:    EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
+       add             %o1, 0x40, %o1
+-      EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
++      EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
+       subcc           %o4, 0x40, %o4
+-      EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
+-      EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
+-      EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
+-      EX_ST(STORE_INIT(%g1, %o0))
++      EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
++      EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
++      EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
++      EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
+       add             %o0, 0x08, %o0
+-      EX_ST(STORE_INIT(%g2, %o0))
++      EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
+       add             %o0, 0x08, %o0
+-      EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
+-      EX_ST(STORE_INIT(%g3, %o0))
++      EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
++      EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
+       add             %o0, 0x08, %o0
+-      EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
+-      EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
++      EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
++      EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
+       add             %o0, 0x08, %o0
+-      EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
+-      EX_ST(STORE_INIT(%o5, %o0))
++      EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
++      EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
+       add             %o0, 0x08, %o0
+-      EX_ST(STORE_INIT(%g2, %o0))
++      EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
+       add             %o0, 0x08, %o0
+-      EX_ST(STORE_INIT(%g3, %o0))
++      EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
+       add             %o0, 0x08, %o0
+-      EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
++      EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
+       add             %o0, 0x08, %o0
+       bne,pt          %icc, 1b
+        LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
+@@ -216,17 +367,17 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       sub             %o2, %o4, %o2
+       alignaddr       %o1, %g0, %g1
+       add             %o1, %o4, %o1
+-      EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
+-1:    EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
++      EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
++1:    EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
+       subcc           %o4, 0x40, %o4
+-      EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
+-      EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
+-      EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
+-      EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
+-      EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
+-      EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
++      EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
++      EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
++      EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
++      EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
++      EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
++      EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
+       faligndata      %f0, %f2, %f16
+-      EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
++      EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
+       faligndata      %f2, %f4, %f18
+       add             %g1, 0x40, %g1
+       faligndata      %f4, %f6, %f20
+@@ -235,14 +386,14 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       faligndata      %f10, %f12, %f26
+       faligndata      %f12, %f14, %f28
+       faligndata      %f14, %f0, %f30
+-      EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
+-      EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
+-      EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
+-      EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
+-      EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
+-      EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
+-      EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
+-      EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
++      EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
++      EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
++      EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
++      EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
++      EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
++      EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
++      EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
++      EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
+       add             %o0, 0x40, %o0
+       bne,pt          %icc, 1b
+        LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
+@@ -270,37 +421,38 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       andncc          %o2, 0x20 - 1, %o5
+       be,pn           %icc, 2f
+        sub            %o2, %o5, %o2
+-1:    EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+-      EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
+-      EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
+-      EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
++1:    EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
++      EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
++      EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
++      EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
+       add             %o1, 0x20, %o1
+       subcc           %o5, 0x20, %o5
+-      EX_ST(STORE(stx, %g1, %o0 + 0x00))
+-      EX_ST(STORE(stx, %g2, %o0 + 0x08))
+-      EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
+-      EX_ST(STORE(stx, %o4, %o0 + 0x18))
++      EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
++      EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
++      EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
++      EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
+       bne,pt          %icc, 1b
+        add            %o0, 0x20, %o0
+ 2:    andcc           %o2, 0x18, %o5
+       be,pt           %icc, 3f
+        sub            %o2, %o5, %o2
+-1:    EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
++
++1:    EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
+       add             %o1, 0x08, %o1
+       add             %o0, 0x08, %o0
+       subcc           %o5, 0x08, %o5
+       bne,pt          %icc, 1b
+-       EX_ST(STORE(stx, %g1, %o0 - 0x08))
++       EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
+ 3:    brz,pt          %o2, .Lexit
+        cmp            %o2, 0x04
+       bl,pn           %icc, .Ltiny
+        nop
+-      EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
++      EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
+       add             %o1, 0x04, %o1
+       add             %o0, 0x04, %o0
+       subcc           %o2, 0x04, %o2
+       bne,pn          %icc, .Ltiny
+-       EX_ST(STORE(stw, %g1, %o0 - 0x04))
++       EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
+       ba,a,pt         %icc, .Lexit
+ .Lmedium_unaligned:
+       /* First get dest 8 byte aligned.  */
+@@ -309,12 +461,12 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       brz,pt          %g1, 2f
+        sub            %o2, %g1, %o2
+ 
+-1:    EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
++1:    EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
+       add             %o1, 1, %o1
+       subcc           %g1, 1, %g1
+       add             %o0, 1, %o0
+       bne,pt          %icc, 1b
+-       EX_ST(STORE(stb, %g2, %o0 - 0x01))
++       EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
+ 2:
+       and             %o1, 0x7, %g1
+       brz,pn          %g1, .Lmedium_noprefetch
+@@ -322,16 +474,16 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       mov             64, %g2
+       sub             %g2, %g1, %g2
+       andn            %o1, 0x7, %o1
+-      EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
++      EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
+       sllx            %o4, %g1, %o4
+       andn            %o2, 0x08 - 1, %o5
+       sub             %o2, %o5, %o2
+-1:    EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
++1:    EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
+       add             %o1, 0x08, %o1
+       subcc           %o5, 0x08, %o5
+       srlx            %g3, %g2, GLOBAL_SPARE
+       or              GLOBAL_SPARE, %o4, GLOBAL_SPARE
+-      EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
++      EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
+       add             %o0, 0x08, %o0
+       bne,pt          %icc, 1b
+        sllx           %g3, %g1, %o4
+@@ -342,17 +494,17 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       ba,pt           %icc, .Lsmall_unaligned
+ 
+ .Ltiny:
+-      EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
++      EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
+       subcc           %o2, 1, %o2
+       be,pn           %icc, .Lexit
+-       EX_ST(STORE(stb, %g1, %o0 + 0x00))
+-      EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
++       EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
++      EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
+       subcc           %o2, 1, %o2
+       be,pn           %icc, .Lexit
+-       EX_ST(STORE(stb, %g1, %o0 + 0x01))
+-      EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
++       EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
++      EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
+       ba,pt           %icc, .Lexit
+-       EX_ST(STORE(stb, %g1, %o0 + 0x02))
++       EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
+ 
+ .Lsmall:
+       andcc           %g2, 0x3, %g0
+@@ -360,22 +512,22 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+        andn           %o2, 0x4 - 1, %o5
+       sub             %o2, %o5, %o2
+ 1:
+-      EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
++      EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
+       add             %o1, 0x04, %o1
+       subcc           %o5, 0x04, %o5
+       add             %o0, 0x04, %o0
+       bne,pt          %icc, 1b
+-       EX_ST(STORE(stw, %g1, %o0 - 0x04))
++       EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
+       brz,pt          %o2, .Lexit
+        nop
+       ba,a,pt         %icc, .Ltiny
+ 
+ .Lsmall_unaligned:
+-1:    EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
++1:    EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
+       add             %o1, 1, %o1
+       add             %o0, 1, %o0
+       subcc           %o2, 1, %o2
+       bne,pt          %icc, 1b
+-       EX_ST(STORE(stb, %g1, %o0 - 0x01))
++       EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
+       ba,a,pt         %icc, .Lexit
+       .size           FUNC_NAME, .-FUNC_NAME
+diff --git a/arch/sparc/lib/NGcopy_from_user.S 
b/arch/sparc/lib/NGcopy_from_user.S
+index 5d1e4d1ac21e..9cd42fcbc781 100644
+--- a/arch/sparc/lib/NGcopy_from_user.S
++++ b/arch/sparc/lib/NGcopy_from_user.S
+@@ -3,11 +3,11 @@
+  * Copyright (C) 2006, 2007 David S. Miller ([email protected])
+  */
+ 
+-#define EX_LD(x)              \
++#define EX_LD(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __ret_one_asi;\
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
+index ff630dcb273c..5c358afd464e 100644
+--- a/arch/sparc/lib/NGcopy_to_user.S
++++ b/arch/sparc/lib/NGcopy_to_user.S
+@@ -3,11 +3,11 @@
+  * Copyright (C) 2006, 2007 David S. Miller ([email protected])
+  */
+ 
+-#define EX_ST(x)              \
++#define EX_ST(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __ret_one_asi;\
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
+index 96a14caf6966..d88c4ed50a00 100644
+--- a/arch/sparc/lib/NGmemcpy.S
++++ b/arch/sparc/lib/NGmemcpy.S
+@@ -4,6 +4,7 @@
+  */
+ 
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/asi.h>
+ #include <asm/thread_info.h>
+ #define GLOBAL_SPARE  %g7
+@@ -27,15 +28,11 @@
+ #endif
+ 
+ #ifndef EX_LD
+-#define EX_LD(x)      x
++#define EX_LD(x,y)    x
+ #endif
+ 
+ #ifndef EX_ST
+-#define EX_ST(x)      x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x)  x
++#define EX_ST(x,y)    x
+ #endif
+ 
+ #ifndef LOAD
+@@ -79,6 +76,92 @@
+       .register       %g3,#scratch
+ 
+       .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x)  x
++__restore_asi:
++      ret
++      wr      %g0, ASI_AIUS, %asi
++       restore
++ENTRY(NG_ret_i2_plus_i4_plus_1)
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %i5, %i0
++ENDPROC(NG_ret_i2_plus_i4_plus_1)
++ENTRY(NG_ret_i2_plus_g1)
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1)
++ENTRY(NG_ret_i2_plus_g1_minus_8)
++      sub     %g1, 8, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_8)
++ENTRY(NG_ret_i2_plus_g1_minus_16)
++      sub     %g1, 16, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_16)
++ENTRY(NG_ret_i2_plus_g1_minus_24)
++      sub     %g1, 24, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_24)
++ENTRY(NG_ret_i2_plus_g1_minus_32)
++      sub     %g1, 32, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_32)
++ENTRY(NG_ret_i2_plus_g1_minus_40)
++      sub     %g1, 40, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_40)
++ENTRY(NG_ret_i2_plus_g1_minus_48)
++      sub     %g1, 48, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_48)
++ENTRY(NG_ret_i2_plus_g1_minus_56)
++      sub     %g1, 56, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_56)
++ENTRY(NG_ret_i2_plus_i4)
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %i4, %i0
++ENDPROC(NG_ret_i2_plus_i4)
++ENTRY(NG_ret_i2_plus_i4_minus_8)
++      sub     %i4, 8, %i4
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %i4, %i0
++ENDPROC(NG_ret_i2_plus_i4_minus_8)
++ENTRY(NG_ret_i2_plus_8)
++      ba,pt   %xcc, __restore_asi
++       add    %i2, 8, %i0
++ENDPROC(NG_ret_i2_plus_8)
++ENTRY(NG_ret_i2_plus_4)
++      ba,pt   %xcc, __restore_asi
++       add    %i2, 4, %i0
++ENDPROC(NG_ret_i2_plus_4)
++ENTRY(NG_ret_i2_plus_1)
++      ba,pt   %xcc, __restore_asi
++       add    %i2, 1, %i0
++ENDPROC(NG_ret_i2_plus_1)
++ENTRY(NG_ret_i2_plus_g1_plus_1)
++      add     %g1, 1, %g1
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_plus_1)
++ENTRY(NG_ret_i2)
++      ba,pt   %xcc, __restore_asi
++       mov    %i2, %i0
++ENDPROC(NG_ret_i2)
++ENTRY(NG_ret_i2_and_7_plus_i4)
++      and     %i2, 7, %i2
++      ba,pt   %xcc, __restore_asi
++       add    %i2, %i4, %i0
++ENDPROC(NG_ret_i2_and_7_plus_i4)
++#endif
++
+       .align          64
+ 
+       .globl  FUNC_NAME
+@@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+       sub             %g0, %i4, %i4   ! bytes to align dst
+       sub             %i2, %i4, %i2
+ 1:    subcc           %i4, 1, %i4
+-      EX_LD(LOAD(ldub, %i1, %g1))
+-      EX_ST(STORE(stb, %g1, %o0))
++      EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
++      EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
+       add             %i1, 1, %i1
+       bne,pt          %XCC, 1b
+       add             %o0, 1, %o0
+@@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+       and             %i4, 0x7, GLOBAL_SPARE
+       sll             GLOBAL_SPARE, 3, GLOBAL_SPARE
+       mov             64, %i5
+-      EX_LD(LOAD_TWIN(%i1, %g2, %g3))
++      EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
+       sub             %i5, GLOBAL_SPARE, %i5
+       mov             16, %o4
+       mov             32, %o5
+@@ -178,31 +261,31 @@ FUNC_NAME:       /* %i0=dst, %i1=src, %i2=len */
+       srlx            WORD3, PRE_SHIFT, TMP; \
+       or              WORD2, TMP, WORD2;
+ 
+-8:    EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
++8:    EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
+       MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
+       LOAD(prefetch, %i1 + %i3, #one_read)
+ 
+-      EX_ST(STORE_INIT(%g2, %o0 + 0x00))
+-      EX_ST(STORE_INIT(%g3, %o0 + 0x08))
++      EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
++      EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+ 
+-      EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
++      EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
+       MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
+ 
+-      EX_ST(STORE_INIT(%o2, %o0 + 0x10))
+-      EX_ST(STORE_INIT(%o3, %o0 + 0x18))
++      EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
++      EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+ 
+-      EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
++      EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+       MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
+ 
+-      EX_ST(STORE_INIT(%g2, %o0 + 0x20))
+-      EX_ST(STORE_INIT(%g3, %o0 + 0x28))
++      EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
++      EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+ 
+-      EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
++      EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
+       add             %i1, 64, %i1
+       MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
+ 
+-      EX_ST(STORE_INIT(%o2, %o0 + 0x30))
+-      EX_ST(STORE_INIT(%o3, %o0 + 0x38))
++      EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
++      EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+ 
+       subcc           %g1, 64, %g1
+       bne,pt          %XCC, 8b
+@@ -211,31 +294,31 @@ FUNC_NAME:       /* %i0=dst, %i1=src, %i2=len */
+       ba,pt           %XCC, 60f
+        add            %i1, %i4, %i1
+ 
+-9:    EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
++9:    EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
+       MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
+       LOAD(prefetch, %i1 + %i3, #one_read)
+ 
+-      EX_ST(STORE_INIT(%g3, %o0 + 0x00))
+-      EX_ST(STORE_INIT(%o2, %o0 + 0x08))
++      EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
++      EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+ 
+-      EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
++      EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
+       MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
+ 
+-      EX_ST(STORE_INIT(%o3, %o0 + 0x10))
+-      EX_ST(STORE_INIT(%g2, %o0 + 0x18))
++      EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
++      EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+ 
+-      EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
++      EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+       MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
+ 
+-      EX_ST(STORE_INIT(%g3, %o0 + 0x20))
+-      EX_ST(STORE_INIT(%o2, %o0 + 0x28))
++      EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
++      EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+ 
+-      EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
++      EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
+       add             %i1, 64, %i1
+       MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
+ 
+-      EX_ST(STORE_INIT(%o3, %o0 + 0x30))
+-      EX_ST(STORE_INIT(%g2, %o0 + 0x38))
++      EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
++      EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+ 
+       subcc           %g1, 64, %g1
+       bne,pt          %XCC, 9b
+@@ -249,25 +332,25 @@ FUNC_NAME:       /* %i0=dst, %i1=src, %i2=len */
+        * one twin load ahead, then add 8 back into source when
+        * we finish the loop.
+        */
+-      EX_LD(LOAD_TWIN(%i1, %o4, %o5))
++      EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
+       mov     16, %o7
+       mov     32, %g2
+       mov     48, %g3
+       mov     64, %o1
+-1:    EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
++1:    EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
+       LOAD(prefetch, %i1 + %o1, #one_read)
+-      EX_ST(STORE_INIT(%o5, %o0 + 0x00))      ! initializes cache line
+-      EX_ST(STORE_INIT(%o2, %o0 + 0x08))
+-      EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
+-      EX_ST(STORE_INIT(%o3, %o0 + 0x10))
+-      EX_ST(STORE_INIT(%o4, %o0 + 0x18))
+-      EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
+-      EX_ST(STORE_INIT(%o5, %o0 + 0x20))
+-      EX_ST(STORE_INIT(%o2, %o0 + 0x28))
+-      EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
++      EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1)   ! initializes 
cache line
++      EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
++      EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
++      EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
++      EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
++      EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
++      EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
++      EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
++      EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
+       add             %i1, 64, %i1
+-      EX_ST(STORE_INIT(%o3, %o0 + 0x30))
+-      EX_ST(STORE_INIT(%o4, %o0 + 0x38))
++      EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
++      EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+       subcc           %g1, 64, %g1
+       bne,pt          %XCC, 1b
+        add            %o0, 64, %o0
+@@ -282,20 +365,20 @@ FUNC_NAME:       /* %i0=dst, %i1=src, %i2=len */
+       mov     32, %g2
+       mov     48, %g3
+       mov     64, %o1
+-1:    EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
+-      EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
++1:    EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
++      EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
+       LOAD(prefetch, %i1 + %o1, #one_read)
+-      EX_ST(STORE_INIT(%o4, %o0 + 0x00))      ! initializes cache line
+-      EX_ST(STORE_INIT(%o5, %o0 + 0x08))
+-      EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
+-      EX_ST(STORE_INIT(%o2, %o0 + 0x10))
+-      EX_ST(STORE_INIT(%o3, %o0 + 0x18))
+-      EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
++      EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1)   ! initializes 
cache line
++      EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
++      EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
++      EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
++      EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
++      EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+       add     %i1, 64, %i1
+-      EX_ST(STORE_INIT(%o4, %o0 + 0x20))
+-      EX_ST(STORE_INIT(%o5, %o0 + 0x28))
+-      EX_ST(STORE_INIT(%o2, %o0 + 0x30))
+-      EX_ST(STORE_INIT(%o3, %o0 + 0x38))
++      EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
++      EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
++      EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
++      EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+       subcc   %g1, 64, %g1
+       bne,pt  %XCC, 1b
+        add    %o0, 64, %o0
+@@ -321,28 +404,28 @@ FUNC_NAME:       /* %i0=dst, %i1=src, %i2=len */
+       andn            %i2, 0xf, %i4
+       and             %i2, 0xf, %i2
+ 1:    subcc           %i4, 0x10, %i4
+-      EX_LD(LOAD(ldx, %i1, %o4))
++      EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
+       add             %i1, 0x08, %i1
+-      EX_LD(LOAD(ldx, %i1, %g1))
++      EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
+       sub             %i1, 0x08, %i1
+-      EX_ST(STORE(stx, %o4, %i1 + %i3))
++      EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
+       add             %i1, 0x8, %i1
+-      EX_ST(STORE(stx, %g1, %i1 + %i3))
++      EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
+       bgu,pt          %XCC, 1b
+        add            %i1, 0x8, %i1
+ 73:   andcc           %i2, 0x8, %g0
+       be,pt           %XCC, 1f
+        nop
+       sub             %i2, 0x8, %i2
+-      EX_LD(LOAD(ldx, %i1, %o4))
+-      EX_ST(STORE(stx, %o4, %i1 + %i3))
++      EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
++      EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
+       add             %i1, 0x8, %i1
+ 1:    andcc           %i2, 0x4, %g0
+       be,pt           %XCC, 1f
+        nop
+       sub             %i2, 0x4, %i2
+-      EX_LD(LOAD(lduw, %i1, %i5))
+-      EX_ST(STORE(stw, %i5, %i1 + %i3))
++      EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
++      EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
+       add             %i1, 0x4, %i1
+ 1:    cmp             %i2, 0
+       be,pt           %XCC, 85f
+@@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+       sub             %i2, %g1, %i2
+ 
+ 1:    subcc           %g1, 1, %g1
+-      EX_LD(LOAD(ldub, %i1, %i5))
+-      EX_ST(STORE(stb, %i5, %i1 + %i3))
++      EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
++      EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
+       bgu,pt          %icc, 1b
+        add            %i1, 1, %i1
+ 
+@@ -375,16 +458,16 @@ FUNC_NAME:       /* %i0=dst, %i1=src, %i2=len */
+ 
+ 8:    mov             64, %i3
+       andn            %i1, 0x7, %i1
+-      EX_LD(LOAD(ldx, %i1, %g2))
++      EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
+       sub             %i3, %g1, %i3
+       andn            %i2, 0x7, %i4
+       sllx            %g2, %g1, %g2
+ 1:    add             %i1, 0x8, %i1
+-      EX_LD(LOAD(ldx, %i1, %g3))
++      EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
+       subcc           %i4, 0x8, %i4
+       srlx            %g3, %i3, %i5
+       or              %i5, %g2, %i5
+-      EX_ST(STORE(stx, %i5, %o0))
++      EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
+       add             %o0, 0x8, %o0
+       bgu,pt          %icc, 1b
+        sllx           %g3, %g1, %g2
+@@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ 
+ 1:
+       subcc           %i2, 4, %i2
+-      EX_LD(LOAD(lduw, %i1, %g1))
+-      EX_ST(STORE(stw, %g1, %i1 + %i3))
++      EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
++      EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
+       bgu,pt          %XCC, 1b
+        add            %i1, 4, %i1
+ 
+@@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+       .align          32
+ 90:
+       subcc           %i2, 1, %i2
+-      EX_LD(LOAD(ldub, %i1, %g1))
+-      EX_ST(STORE(stb, %g1, %i1 + %i3))
++      EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
++      EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
+       bgu,pt          %XCC, 90b
+        add            %i1, 1, %i1
+       ret
+diff --git a/arch/sparc/lib/U1copy_from_user.S 
b/arch/sparc/lib/U1copy_from_user.S
+index ecc5692fa2b4..bb6ff73229e3 100644
+--- a/arch/sparc/lib/U1copy_from_user.S
++++ b/arch/sparc/lib/U1copy_from_user.S
+@@ -3,19 +3,19 @@
+  * Copyright (C) 1999, 2000, 2004 David S. Miller ([email protected])
+  */
+ 
+-#define EX_LD(x)              \
++#define EX_LD(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one;  \
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+-#define EX_LD_FP(x)           \
++#define EX_LD_FP(x,y)         \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_fp;\
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
+index 9eea392e44d4..ed92ce739558 100644
+--- a/arch/sparc/lib/U1copy_to_user.S
++++ b/arch/sparc/lib/U1copy_to_user.S
+@@ -3,19 +3,19 @@
+  * Copyright (C) 1999, 2000, 2004 David S. Miller ([email protected])
+  */
+ 
+-#define EX_ST(x)              \
++#define EX_ST(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one;  \
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+-#define EX_ST_FP(x)           \
++#define EX_ST_FP(x,y)         \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_fp;\
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
+index 3e6209ebb7d7..f30d2ab2c371 100644
+--- a/arch/sparc/lib/U1memcpy.S
++++ b/arch/sparc/lib/U1memcpy.S
+@@ -5,6 +5,7 @@
+  */
+ 
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/visasm.h>
+ #include <asm/asi.h>
+ #define GLOBAL_SPARE  g7
+@@ -23,21 +24,17 @@
+ #endif
+ 
+ #ifndef EX_LD
+-#define EX_LD(x)      x
++#define EX_LD(x,y)    x
+ #endif
+ #ifndef EX_LD_FP
+-#define EX_LD_FP(x)   x
++#define EX_LD_FP(x,y) x
+ #endif
+ 
+ #ifndef EX_ST
+-#define EX_ST(x)      x
++#define EX_ST(x,y)    x
+ #endif
+ #ifndef EX_ST_FP
+-#define EX_ST_FP(x)   x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x)  x
++#define EX_ST_FP(x,y) x
+ #endif
+ 
+ #ifndef LOAD
+@@ -78,53 +75,169 @@
+       faligndata              %f7, %f8, %f60;                 \
+       faligndata              %f8, %f9, %f62;
+ 
+-#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)  \
+-      EX_LD_FP(LOAD_BLK(%src, %fdest));                               \
+-      EX_ST_FP(STORE_BLK(%fsrc, %dest));                              \
+-      add                     %src, 0x40, %src;               \
+-      subcc                   %len, 0x40, %len;               \
+-      be,pn                   %xcc, jmptgt;                   \
+-       add                    %dest, 0x40, %dest;             \
+-
+-#define LOOP_CHUNK1(src, dest, len, branch_dest)              \
+-      MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
+-#define LOOP_CHUNK2(src, dest, len, branch_dest)              \
+-      MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
+-#define LOOP_CHUNK3(src, dest, len, branch_dest)              \
+-      MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
++#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt)                       
\
++      EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp);                  \
++      EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);                 \
++      add                     %src, 0x40, %src;                       \
++      subcc                   %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE;     \
++      be,pn                   %xcc, jmptgt;                           \
++       add                    %dest, 0x40, %dest;                     \
++
++#define LOOP_CHUNK1(src, dest, branch_dest)           \
++      MAIN_LOOP_CHUNK(src, dest, f0,  f48, branch_dest)
++#define LOOP_CHUNK2(src, dest, branch_dest)           \
++      MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
++#define LOOP_CHUNK3(src, dest, branch_dest)           \
++      MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
+ 
+ #define DO_SYNC                       membar  #Sync;
+ #define STORE_SYNC(dest, fsrc)                                \
+-      EX_ST_FP(STORE_BLK(%fsrc, %dest));                      \
++      EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
+       add                     %dest, 0x40, %dest;     \
+       DO_SYNC
+ 
+ #define STORE_JUMP(dest, fsrc, target)                        \
+-      EX_ST_FP(STORE_BLK(%fsrc, %dest));                      \
++      EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
+       add                     %dest, 0x40, %dest;     \
+       ba,pt                   %xcc, target;           \
+        nop;
+ 
+-#define FINISH_VISCHUNK(dest, f0, f1, left)   \
+-      subcc                   %left, 8, %left;\
+-      bl,pn                   %xcc, 95f;      \
+-       faligndata             %f0, %f1, %f48; \
+-      EX_ST_FP(STORE(std, %f48, %dest));              \
++#define FINISH_VISCHUNK(dest, f0, f1)                 \
++      subcc                   %g3, 8, %g3;            \
++      bl,pn                   %xcc, 95f;              \
++       faligndata             %f0, %f1, %f48;         \
++      EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp);  \
+       add                     %dest, 8, %dest;
+ 
+-#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)      \
+-      subcc                   %left, 8, %left;        \
+-      bl,pn                   %xcc, 95f;              \
++#define UNEVEN_VISCHUNK_LAST(dest, f0, f1)    \
++      subcc                   %g3, 8, %g3;    \
++      bl,pn                   %xcc, 95f;      \
+        fsrc2                  %f0, %f1;
+ 
+-#define UNEVEN_VISCHUNK(dest, f0, f1, left)           \
+-      UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)        \
++#define UNEVEN_VISCHUNK(dest, f0, f1)         \
++      UNEVEN_VISCHUNK_LAST(dest, f0, f1)      \
+       ba,a,pt                 %xcc, 93f;
+ 
+       .register       %g2,#scratch
+       .register       %g3,#scratch
+ 
+       .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x)  x
++ENTRY(U1_g1_1_fp)
++      VISExitHalf
++      add             %g1, 1, %g1
++      add             %g1, %g2, %g1
++      retl
++       add            %g1, %o2, %o0
++ENDPROC(U1_g1_1_fp)
++ENTRY(U1_g2_0_fp)
++      VISExitHalf
++      retl
++       add            %g2, %o2, %o0
++ENDPROC(U1_g2_0_fp)
++ENTRY(U1_g2_8_fp)
++      VISExitHalf
++      add             %g2, 8, %g2
++      retl
++       add            %g2, %o2, %o0
++ENDPROC(U1_g2_8_fp)
++ENTRY(U1_gs_0_fp)
++      VISExitHalf
++      add             %GLOBAL_SPARE, %g3, %o0
++      retl
++       add            %o0, %o2, %o0
++ENDPROC(U1_gs_0_fp)
++ENTRY(U1_gs_80_fp)
++      VISExitHalf
++      add             %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
++      add             %GLOBAL_SPARE, %g3, %o0
++      retl
++       add            %o0, %o2, %o0
++ENDPROC(U1_gs_80_fp)
++ENTRY(U1_gs_40_fp)
++      VISExitHalf
++      add             %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
++      add             %GLOBAL_SPARE, %g3, %o0
++      retl
++       add            %o0, %o2, %o0
++ENDPROC(U1_gs_40_fp)
++ENTRY(U1_g3_0_fp)
++      VISExitHalf
++      retl
++       add            %g3, %o2, %o0
++ENDPROC(U1_g3_0_fp)
++ENTRY(U1_g3_8_fp)
++      VISExitHalf
++      add             %g3, 8, %g3
++      retl
++       add            %g3, %o2, %o0
++ENDPROC(U1_g3_8_fp)
++ENTRY(U1_o2_0_fp)
++      VISExitHalf
++      retl
++       mov            %o2, %o0
++ENDPROC(U1_o2_0_fp)
++ENTRY(U1_o2_1_fp)
++      VISExitHalf
++      retl
++       add            %o2, 1, %o0
++ENDPROC(U1_o2_1_fp)
++ENTRY(U1_gs_0)
++      VISExitHalf
++      retl
++       add            %GLOBAL_SPARE, %o2, %o0
++ENDPROC(U1_gs_0)
++ENTRY(U1_gs_8)
++      VISExitHalf
++      add             %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
++      retl
++       add            %GLOBAL_SPARE, 0x8, %o0
++ENDPROC(U1_gs_8)
++ENTRY(U1_gs_10)
++      VISExitHalf
++      add             %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
++      retl
++       add            %GLOBAL_SPARE, 0x10, %o0
++ENDPROC(U1_gs_10)
++ENTRY(U1_o2_0)
++      retl
++       mov            %o2, %o0
++ENDPROC(U1_o2_0)
++ENTRY(U1_o2_8)
++      retl
++       add            %o2, 8, %o0
++ENDPROC(U1_o2_8)
++ENTRY(U1_o2_4)
++      retl
++       add            %o2, 4, %o0
++ENDPROC(U1_o2_4)
++ENTRY(U1_o2_1)
++      retl
++       add            %o2, 1, %o0
++ENDPROC(U1_o2_1)
++ENTRY(U1_g1_0)
++      retl
++       add            %g1, %o2, %o0
++ENDPROC(U1_g1_0)
++ENTRY(U1_g1_1)
++      add             %g1, 1, %g1
++      retl
++       add            %g1, %o2, %o0
++ENDPROC(U1_g1_1)
++ENTRY(U1_gs_0_o2_adj)
++      and             %o2, 7, %o2
++      retl
++       add            %GLOBAL_SPARE, %o2, %o0
++ENDPROC(U1_gs_0_o2_adj)
++ENTRY(U1_gs_8_o2_adj)
++      and             %o2, 7, %o2
++      add             %GLOBAL_SPARE, 8, %GLOBAL_SPARE
++      retl
++       add            %GLOBAL_SPARE, %o2, %o0
++ENDPROC(U1_gs_8_o2_adj)
++#endif
++
+       .align          64
+ 
+       .globl          FUNC_NAME
+@@ -166,8 +279,8 @@ FUNC_NAME:         /* %o0=dst, %o1=src, %o2=len */
+        and            %g2, 0x38, %g2
+ 
+ 1:    subcc           %g1, 0x1, %g1
+-      EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
+-      EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
++      EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
++      EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
+       bgu,pt          %XCC, 1b
+        add            %o1, 0x1, %o1
+ 
+@@ -178,20 +291,20 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       be,pt           %icc, 3f
+        alignaddr      %o1, %g0, %o1
+ 
+-      EX_LD_FP(LOAD(ldd, %o1, %f4))
+-1:    EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
++      EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
++1:    EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
+       add             %o1, 0x8, %o1
+       subcc           %g2, 0x8, %g2
+       faligndata      %f4, %f6, %f0
+-      EX_ST_FP(STORE(std, %f0, %o0))
++      EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
+       be,pn           %icc, 3f
+        add            %o0, 0x8, %o0
+ 
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
+       add             %o1, 0x8, %o1
+       subcc           %g2, 0x8, %g2
+       faligndata      %f6, %f4, %f0
+-      EX_ST_FP(STORE(std, %f0, %o0))
++      EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
+       bne,pt          %icc, 1b
+        add            %o0, 0x8, %o0
+ 
+@@ -214,13 +327,13 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       add             %g1, %GLOBAL_SPARE, %g1
+       subcc           %o2, %g3, %o2
+ 
+-      EX_LD_FP(LOAD_BLK(%o1, %f0))
++      EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
+       add             %o1, 0x40, %o1
+       add             %g1, %g3, %g1
+-      EX_LD_FP(LOAD_BLK(%o1, %f16))
++      EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
+       add             %o1, 0x40, %o1
+       sub             %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
+-      EX_LD_FP(LOAD_BLK(%o1, %f32))
++      EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
+       add             %o1, 0x40, %o1
+ 
+       /* There are 8 instances of the unrolled loop,
+@@ -240,11 +353,11 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+ 
+       .align          64
+ 1:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
+-      LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++      LOOP_CHUNK1(o1, o0, 1f)
+       FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
+-      LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++      LOOP_CHUNK2(o1, o0, 2f)
+       FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
+-      LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++      LOOP_CHUNK3(o1, o0, 3f)
+       ba,pt           %xcc, 1b+4
+        faligndata     %f0, %f2, %f48
+ 1:    FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
+@@ -261,11 +374,11 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       STORE_JUMP(o0, f48, 56f)
+ 
+ 1:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
+-      LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++      LOOP_CHUNK1(o1, o0, 1f)
+       FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
+-      LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++      LOOP_CHUNK2(o1, o0, 2f)
+       FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
+-      LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++      LOOP_CHUNK3(o1, o0, 3f)
+       ba,pt           %xcc, 1b+4
+        faligndata     %f2, %f4, %f48
+ 1:    FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
+@@ -282,11 +395,11 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       STORE_JUMP(o0, f48, 57f)
+ 
+ 1:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
+-      LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++      LOOP_CHUNK1(o1, o0, 1f)
+       FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
+-      LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++      LOOP_CHUNK2(o1, o0, 2f)
+       FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
+-      LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++      LOOP_CHUNK3(o1, o0, 3f)
+       ba,pt           %xcc, 1b+4
+        faligndata     %f4, %f6, %f48
+ 1:    FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
+@@ -303,11 +416,11 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       STORE_JUMP(o0, f48, 58f)
+ 
+ 1:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
+-      LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++      LOOP_CHUNK1(o1, o0, 1f)
+       FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
+-      LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++      LOOP_CHUNK2(o1, o0, 2f)
+       FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 
+-      LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++      LOOP_CHUNK3(o1, o0, 3f)
+       ba,pt           %xcc, 1b+4
+        faligndata     %f6, %f8, %f48
+ 1:    FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
+@@ -324,11 +437,11 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       STORE_JUMP(o0, f48, 59f)
+ 
+ 1:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
+-      LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++      LOOP_CHUNK1(o1, o0, 1f)
+       FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
+-      LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++      LOOP_CHUNK2(o1, o0, 2f)
+       FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
+-      LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++      LOOP_CHUNK3(o1, o0, 3f)
+       ba,pt           %xcc, 1b+4
+        faligndata     %f8, %f10, %f48
+ 1:    FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
+@@ -345,11 +458,11 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       STORE_JUMP(o0, f48, 60f)
+ 
+ 1:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
+-      LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++      LOOP_CHUNK1(o1, o0, 1f)
+       FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
+-      LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++      LOOP_CHUNK2(o1, o0, 2f)
+       FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
+-      LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++      LOOP_CHUNK3(o1, o0, 3f)
+       ba,pt           %xcc, 1b+4
+        faligndata     %f10, %f12, %f48
+ 1:    FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
+@@ -366,11 +479,11 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       STORE_JUMP(o0, f48, 61f)
+ 
+ 1:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
+-      LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++      LOOP_CHUNK1(o1, o0, 1f)
+       FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
+-      LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++      LOOP_CHUNK2(o1, o0, 2f)
+       FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
+-      LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++      LOOP_CHUNK3(o1, o0, 3f)
+       ba,pt           %xcc, 1b+4
+        faligndata     %f12, %f14, %f48
+ 1:    FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
+@@ -387,11 +500,11 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       STORE_JUMP(o0, f48, 62f)
+ 
+ 1:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
+-      LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++      LOOP_CHUNK1(o1, o0, 1f)
+       FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
+-      LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++      LOOP_CHUNK2(o1, o0, 2f)
+       FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
+-      LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++      LOOP_CHUNK3(o1, o0, 3f)
+       ba,pt           %xcc, 1b+4
+        faligndata     %f14, %f16, %f48
+ 1:    FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
+@@ -407,53 +520,53 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+       FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
+       STORE_JUMP(o0, f48, 63f)
+ 
+-40:   FINISH_VISCHUNK(o0, f0,  f2,  g3)
+-41:   FINISH_VISCHUNK(o0, f2,  f4,  g3)
+-42:   FINISH_VISCHUNK(o0, f4,  f6,  g3)
+-43:   FINISH_VISCHUNK(o0, f6,  f8,  g3)
+-44:   FINISH_VISCHUNK(o0, f8,  f10, g3)
+-45:   FINISH_VISCHUNK(o0, f10, f12, g3)
+-46:   FINISH_VISCHUNK(o0, f12, f14, g3)
+-47:   UNEVEN_VISCHUNK(o0, f14, f0,  g3)
+-48:   FINISH_VISCHUNK(o0, f16, f18, g3)
+-49:   FINISH_VISCHUNK(o0, f18, f20, g3)
+-50:   FINISH_VISCHUNK(o0, f20, f22, g3)
+-51:   FINISH_VISCHUNK(o0, f22, f24, g3)
+-52:   FINISH_VISCHUNK(o0, f24, f26, g3)
+-53:   FINISH_VISCHUNK(o0, f26, f28, g3)
+-54:   FINISH_VISCHUNK(o0, f28, f30, g3)
+-55:   UNEVEN_VISCHUNK(o0, f30, f0,  g3)
+-56:   FINISH_VISCHUNK(o0, f32, f34, g3)
+-57:   FINISH_VISCHUNK(o0, f34, f36, g3)
+-58:   FINISH_VISCHUNK(o0, f36, f38, g3)
+-59:   FINISH_VISCHUNK(o0, f38, f40, g3)
+-60:   FINISH_VISCHUNK(o0, f40, f42, g3)
+-61:   FINISH_VISCHUNK(o0, f42, f44, g3)
+-62:   FINISH_VISCHUNK(o0, f44, f46, g3)
+-63:   UNEVEN_VISCHUNK_LAST(o0, f46, f0,  g3)
+-
+-93:   EX_LD_FP(LOAD(ldd, %o1, %f2))
++40:   FINISH_VISCHUNK(o0, f0,  f2)
++41:   FINISH_VISCHUNK(o0, f2,  f4)
++42:   FINISH_VISCHUNK(o0, f4,  f6)
++43:   FINISH_VISCHUNK(o0, f6,  f8)
++44:   FINISH_VISCHUNK(o0, f8,  f10)
++45:   FINISH_VISCHUNK(o0, f10, f12)
++46:   FINISH_VISCHUNK(o0, f12, f14)
++47:   UNEVEN_VISCHUNK(o0, f14, f0)
++48:   FINISH_VISCHUNK(o0, f16, f18)
++49:   FINISH_VISCHUNK(o0, f18, f20)
++50:   FINISH_VISCHUNK(o0, f20, f22)
++51:   FINISH_VISCHUNK(o0, f22, f24)
++52:   FINISH_VISCHUNK(o0, f24, f26)
++53:   FINISH_VISCHUNK(o0, f26, f28)
++54:   FINISH_VISCHUNK(o0, f28, f30)
++55:   UNEVEN_VISCHUNK(o0, f30, f0)
++56:   FINISH_VISCHUNK(o0, f32, f34)
++57:   FINISH_VISCHUNK(o0, f34, f36)
++58:   FINISH_VISCHUNK(o0, f36, f38)
++59:   FINISH_VISCHUNK(o0, f38, f40)
++60:   FINISH_VISCHUNK(o0, f40, f42)
++61:   FINISH_VISCHUNK(o0, f42, f44)
++62:   FINISH_VISCHUNK(o0, f44, f46)
++63:   UNEVEN_VISCHUNK_LAST(o0, f46, f0)
++
++93:   EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
+       add             %o1, 8, %o1
+       subcc           %g3, 8, %g3
+       faligndata      %f0, %f2, %f8
+-      EX_ST_FP(STORE(std, %f8, %o0))
++      EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
+       bl,pn           %xcc, 95f
+        add            %o0, 8, %o0
+-      EX_LD_FP(LOAD(ldd, %o1, %f0))
++      EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
+       add             %o1, 8, %o1
+       subcc           %g3, 8, %g3
+       faligndata      %f2, %f0, %f8
+-      EX_ST_FP(STORE(std, %f8, %o0))
++      EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
+       bge,pt          %xcc, 93b
+        add            %o0, 8, %o0
+ 
+ 95:   brz,pt          %o2, 2f
+        mov            %g1, %o1
+ 
+-1:    EX_LD_FP(LOAD(ldub, %o1, %o3))
++1:    EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
+       add             %o1, 1, %o1
+       subcc           %o2, 1, %o2
+-      EX_ST_FP(STORE(stb, %o3, %o0))
++      EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
+       bne,pt          %xcc, 1b
+        add            %o0, 1, %o0
+ 
+@@ -469,27 +582,27 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+ 
+ 72:   andn            %o2, 0xf, %GLOBAL_SPARE
+       and             %o2, 0xf, %o2
+-1:    EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
+-      EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
++1:    EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
++      EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
+       subcc           %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
+-      EX_ST(STORE(stx, %o5, %o1 + %o3))
++      EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
+       add             %o1, 0x8, %o1
+-      EX_ST(STORE(stx, %g1, %o1 + %o3))
++      EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
+       bgu,pt          %XCC, 1b
+        add            %o1, 0x8, %o1
+ 73:   andcc           %o2, 0x8, %g0
+       be,pt           %XCC, 1f
+        nop
+-      EX_LD(LOAD(ldx, %o1, %o5))
++      EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
+       sub             %o2, 0x8, %o2
+-      EX_ST(STORE(stx, %o5, %o1 + %o3))
++      EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
+       add             %o1, 0x8, %o1
+ 1:    andcc           %o2, 0x4, %g0
+       be,pt           %XCC, 1f
+        nop
+-      EX_LD(LOAD(lduw, %o1, %o5))
++      EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
+       sub             %o2, 0x4, %o2
+-      EX_ST(STORE(stw, %o5, %o1 + %o3))
++      EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
+       add             %o1, 0x4, %o1
+ 1:    cmp             %o2, 0
+       be,pt           %XCC, 85f
+@@ -503,9 +616,9 @@ FUNC_NAME:         /* %o0=dst, %o1=src, %o2=len */
+        sub            %g0, %g1, %g1
+       sub             %o2, %g1, %o2
+ 
+-1:    EX_LD(LOAD(ldub, %o1, %o5))
++1:    EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
+       subcc           %g1, 1, %g1
+-      EX_ST(STORE(stb, %o5, %o1 + %o3))
++      EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
+       bgu,pt          %icc, 1b
+        add            %o1, 1, %o1
+ 
+@@ -521,16 +634,16 @@ FUNC_NAME:               /* %o0=dst, %o1=src, %o2=len */
+ 
+ 8:    mov             64, %o3
+       andn            %o1, 0x7, %o1
+-      EX_LD(LOAD(ldx, %o1, %g2))
++      EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
+       sub             %o3, %g1, %o3
+       andn            %o2, 0x7, %GLOBAL_SPARE
+       sllx            %g2, %g1, %g2
+-1:    EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
++1:    EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
+       subcc           %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
+       add             %o1, 0x8, %o1
+       srlx            %g3, %o3, %o5
+       or              %o5, %g2, %o5
+-      EX_ST(STORE(stx, %o5, %o0))
++      EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
+       add             %o0, 0x8, %o0
+       bgu,pt          %icc, 1b
+        sllx           %g3, %g1, %g2
+@@ -548,9 +661,9 @@ FUNC_NAME:         /* %o0=dst, %o1=src, %o2=len */
+       bne,pn          %XCC, 90f
+        sub            %o0, %o1, %o3
+ 
+-1:    EX_LD(LOAD(lduw, %o1, %g1))
++1:    EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
+       subcc           %o2, 4, %o2
+-      EX_ST(STORE(stw, %g1, %o1 + %o3))
++      EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
+       bgu,pt          %XCC, 1b
+        add            %o1, 4, %o1
+ 
+@@ -558,9 +671,9 @@ FUNC_NAME:         /* %o0=dst, %o1=src, %o2=len */
+        mov            EX_RETVAL(%o4), %o0
+ 
+       .align          32
+-90:   EX_LD(LOAD(ldub, %o1, %g1))
++90:   EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
+       subcc           %o2, 1, %o2
+-      EX_ST(STORE(stb, %g1, %o1 + %o3))
++      EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
+       bgu,pt          %XCC, 90b
+        add            %o1, 1, %o1
+       retl
+diff --git a/arch/sparc/lib/U3copy_from_user.S 
b/arch/sparc/lib/U3copy_from_user.S
+index 88ad73d86fe4..db73010a1af8 100644
+--- a/arch/sparc/lib/U3copy_from_user.S
++++ b/arch/sparc/lib/U3copy_from_user.S
+@@ -3,19 +3,19 @@
+  * Copyright (C) 1999, 2000, 2004 David S. Miller ([email protected])
+  */
+ 
+-#define EX_LD(x)              \
++#define EX_LD(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one;  \
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+-#define EX_LD_FP(x)           \
++#define EX_LD_FP(x,y)         \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_fp;\
++      .word 98b, y##_fp;      \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
+index 845139d75537..c4ee858e352a 100644
+--- a/arch/sparc/lib/U3copy_to_user.S
++++ b/arch/sparc/lib/U3copy_to_user.S
+@@ -3,19 +3,19 @@
+  * Copyright (C) 1999, 2000, 2004 David S. Miller ([email protected])
+  */
+ 
+-#define EX_ST(x)              \
++#define EX_ST(x,y)            \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one;  \
++      .word 98b, y;           \
+       .text;                  \
+       .align 4;
+ 
+-#define EX_ST_FP(x)           \
++#define EX_ST_FP(x,y)         \
+ 98:   x;                      \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one_fp;\
++      .word 98b, y##_fp;      \
+       .text;                  \
+       .align 4;
+ 
+diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
+index 491ee69e4995..54f98706b03b 100644
+--- a/arch/sparc/lib/U3memcpy.S
++++ b/arch/sparc/lib/U3memcpy.S
+@@ -4,6 +4,7 @@
+  */
+ 
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/visasm.h>
+ #include <asm/asi.h>
+ #define GLOBAL_SPARE  %g7
+@@ -22,21 +23,17 @@
+ #endif
+ 
+ #ifndef EX_LD
+-#define EX_LD(x)      x
++#define EX_LD(x,y)    x
+ #endif
+ #ifndef EX_LD_FP
+-#define EX_LD_FP(x)   x
++#define EX_LD_FP(x,y) x
+ #endif
+ 
+ #ifndef EX_ST
+-#define EX_ST(x)      x
++#define EX_ST(x,y)    x
+ #endif
+ #ifndef EX_ST_FP
+-#define EX_ST_FP(x)   x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x)  x
++#define EX_ST_FP(x,y) x
+ #endif
+ 
+ #ifndef LOAD
+@@ -77,6 +74,87 @@
+        */
+ 
+       .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x)  x
++__restore_fp:
++      VISExitHalf
++      retl
++       nop
++ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
++      add     %g1, 1, %g1
++      add     %g2, %g1, %g2
++      ba,pt   %xcc, __restore_fp
++       add    %o2, %g2, %o0
++ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
++ENTRY(U3_retl_o2_plus_g2_fp)
++      ba,pt   %xcc, __restore_fp
++       add    %o2, %g2, %o0
++ENDPROC(U3_retl_o2_plus_g2_fp)
++ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
++      add     %g2, 8, %g2
++      ba,pt   %xcc, __restore_fp
++       add    %o2, %g2, %o0
++ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
++ENTRY(U3_retl_o2)
++      retl
++       mov    %o2, %o0
++ENDPROC(U3_retl_o2)
++ENTRY(U3_retl_o2_plus_1)
++      retl
++       add    %o2, 1, %o0
++ENDPROC(U3_retl_o2_plus_1)
++ENTRY(U3_retl_o2_plus_4)
++      retl
++       add    %o2, 4, %o0
++ENDPROC(U3_retl_o2_plus_4)
++ENTRY(U3_retl_o2_plus_8)
++      retl
++       add    %o2, 8, %o0
++ENDPROC(U3_retl_o2_plus_8)
++ENTRY(U3_retl_o2_plus_g1_plus_1)
++      add     %g1, 1, %g1
++      retl
++       add    %o2, %g1, %o0
++ENDPROC(U3_retl_o2_plus_g1_plus_1)
++ENTRY(U3_retl_o2_fp)
++      ba,pt   %xcc, __restore_fp
++       mov    %o2, %o0
++ENDPROC(U3_retl_o2_fp)
++ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
++      sll     %o3, 6, %o3
++      add     %o3, 0x80, %o3
++      ba,pt   %xcc, __restore_fp
++       add    %o2, %o3, %o0
++ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
++ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
++      sll     %o3, 6, %o3
++      add     %o3, 0x40, %o3
++      ba,pt   %xcc, __restore_fp
++       add    %o2, %o3, %o0
++ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
++ENTRY(U3_retl_o2_plus_GS_plus_0x10)
++      add     GLOBAL_SPARE, 0x10, GLOBAL_SPARE
++      retl
++       add    %o2, GLOBAL_SPARE, %o0
++ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
++ENTRY(U3_retl_o2_plus_GS_plus_0x08)
++      add     GLOBAL_SPARE, 0x08, GLOBAL_SPARE
++      retl
++       add    %o2, GLOBAL_SPARE, %o0
++ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
++ENTRY(U3_retl_o2_and_7_plus_GS)
++      and     %o2, 7, %o2
++      retl
++       add    %o2, GLOBAL_SPARE, %o2
++ENDPROC(U3_retl_o2_and_7_plus_GS)
++ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
++      add     GLOBAL_SPARE, 8, GLOBAL_SPARE
++      and     %o2, 7, %o2
++      retl
++       add    %o2, GLOBAL_SPARE, %o2
++ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
++#endif
++
+       .align          64
+ 
+       /* The cheetah's flexible spine, oversized liver, enlarged heart,
+@@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+        and            %g2, 0x38, %g2
+ 
+ 1:    subcc           %g1, 0x1, %g1
+-      EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
+-      EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
++      EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
++      EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), 
U3_retl_o2_plus_g2_plus_g1_plus_1)
+       bgu,pt          %XCC, 1b
+        add            %o1, 0x1, %o1
+ 
+@@ -138,20 +216,20 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       be,pt           %icc, 3f
+        alignaddr      %o1, %g0, %o1
+ 
+-      EX_LD_FP(LOAD(ldd, %o1, %f4))
+-1:    EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
++      EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
++1:    EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
+       add             %o1, 0x8, %o1
+       subcc           %g2, 0x8, %g2
+       faligndata      %f4, %f6, %f0
+-      EX_ST_FP(STORE(std, %f0, %o0))
++      EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
+       be,pn           %icc, 3f
+        add            %o0, 0x8, %o0
+ 
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
+       add             %o1, 0x8, %o1
+       subcc           %g2, 0x8, %g2
+       faligndata      %f6, %f4, %f2
+-      EX_ST_FP(STORE(std, %f2, %o0))
++      EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
+       bne,pt          %icc, 1b
+        add            %o0, 0x8, %o0
+ 
+@@ -161,25 +239,25 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       LOAD(prefetch, %o1 + 0x080, #one_read)
+       LOAD(prefetch, %o1 + 0x0c0, #one_read)
+       LOAD(prefetch, %o1 + 0x100, #one_read)
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
+       LOAD(prefetch, %o1 + 0x140, #one_read)
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
+       LOAD(prefetch, %o1 + 0x180, #one_read)
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
+       LOAD(prefetch, %o1 + 0x1c0, #one_read)
+       faligndata      %f0, %f2, %f16
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
+       faligndata      %f2, %f4, %f18
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
+       faligndata      %f4, %f6, %f20
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
+       faligndata      %f6, %f8, %f22
+ 
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
+       faligndata      %f8, %f10, %f24
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
+       faligndata      %f10, %f12, %f26
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
+ 
+       subcc           GLOBAL_SPARE, 0x80, GLOBAL_SPARE
+       add             %o1, 0x40, %o1
+@@ -190,26 +268,26 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+       .align          64
+ 1:
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), 
U3_retl_o2_plus_o3_sll_6_plus_0x80)
+       faligndata      %f12, %f14, %f28
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), 
U3_retl_o2_plus_o3_sll_6_plus_0x80)
+       faligndata      %f14, %f0, %f30
+-      EX_ST_FP(STORE_BLK(%f16, %o0))
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
++      EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
++      EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       faligndata      %f0, %f2, %f16
+       add             %o0, 0x40, %o0
+ 
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       faligndata      %f2, %f4, %f18
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       faligndata      %f4, %f6, %f20
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       subcc           %o3, 0x01, %o3
+       faligndata      %f6, %f8, %f22
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), 
U3_retl_o2_plus_o3_sll_6_plus_0x80)
+ 
+       faligndata      %f8, %f10, %f24
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), 
U3_retl_o2_plus_o3_sll_6_plus_0x80)
+       LOAD(prefetch, %o1 + 0x1c0, #one_read)
+       faligndata      %f10, %f12, %f26
+       bg,pt           %XCC, 1b
+@@ -217,29 +295,29 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+       /* Finally we copy the last full 64-byte block. */
+ 2:
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), 
U3_retl_o2_plus_o3_sll_6_plus_0x80)
+       faligndata      %f12, %f14, %f28
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), 
U3_retl_o2_plus_o3_sll_6_plus_0x80)
+       faligndata      %f14, %f0, %f30
+-      EX_ST_FP(STORE_BLK(%f16, %o0))
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
++      EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
++      EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       faligndata      %f0, %f2, %f16
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       faligndata      %f2, %f4, %f18
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       faligndata      %f4, %f6, %f20
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       faligndata      %f6, %f8, %f22
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       faligndata      %f8, %f10, %f24
+       cmp             %g1, 0
+       be,pt           %XCC, 1f
+        add            %o0, 0x40, %o0
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), 
U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ 1:    faligndata      %f10, %f12, %f26
+       faligndata      %f12, %f14, %f28
+       faligndata      %f14, %f0, %f30
+-      EX_ST_FP(STORE_BLK(%f16, %o0))
++      EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+       add             %o0, 0x40, %o0
+       add             %o1, 0x40, %o1
+       membar          #Sync
+@@ -259,20 +337,20 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+       sub             %o2, %g2, %o2
+       be,a,pt         %XCC, 1f
+-       EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
++       EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
+ 
+-1:    EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
++1:    EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
+       add             %o1, 0x8, %o1
+       subcc           %g2, 0x8, %g2
+       faligndata      %f0, %f2, %f8
+-      EX_ST_FP(STORE(std, %f8, %o0))
++      EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
+       be,pn           %XCC, 2f
+        add            %o0, 0x8, %o0
+-      EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
++      EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
+       add             %o1, 0x8, %o1
+       subcc           %g2, 0x8, %g2
+       faligndata      %f2, %f0, %f8
+-      EX_ST_FP(STORE(std, %f8, %o0))
++      EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
+       bne,pn          %XCC, 1b
+        add            %o0, 0x8, %o0
+ 
+@@ -292,30 +370,33 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+        andcc          %o2, 0x8, %g0
+       be,pt           %icc, 1f
+        nop
+-      EX_LD(LOAD(ldx, %o1, %o5))
+-      EX_ST(STORE(stx, %o5, %o1 + %o3))
++      EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
++      EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
+       add             %o1, 0x8, %o1
++      sub             %o2, 8, %o2
+ 
+ 1:    andcc           %o2, 0x4, %g0
+       be,pt           %icc, 1f
+        nop
+-      EX_LD(LOAD(lduw, %o1, %o5))
+-      EX_ST(STORE(stw, %o5, %o1 + %o3))
++      EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
++      EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
+       add             %o1, 0x4, %o1
++      sub             %o2, 4, %o2
+ 
+ 1:    andcc           %o2, 0x2, %g0
+       be,pt           %icc, 1f
+        nop
+-      EX_LD(LOAD(lduh, %o1, %o5))
+-      EX_ST(STORE(sth, %o5, %o1 + %o3))
++      EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
++      EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
+       add             %o1, 0x2, %o1
++      sub             %o2, 2, %o2
+ 
+ 1:    andcc           %o2, 0x1, %g0
+       be,pt           %icc, 85f
+        nop
+-      EX_LD(LOAD(ldub, %o1, %o5))
++      EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
+       ba,pt           %xcc, 85f
+-       EX_ST(STORE(stb, %o5, %o1 + %o3))
++       EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
+ 
+       .align          64
+ 70: /* 16 < len <= 64 */
+@@ -326,26 +407,26 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+       andn            %o2, 0xf, GLOBAL_SPARE
+       and             %o2, 0xf, %o2
+ 1:    subcc           GLOBAL_SPARE, 0x10, GLOBAL_SPARE
+-      EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
+-      EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
+-      EX_ST(STORE(stx, %o5, %o1 + %o3))
++      EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
++      EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
++      EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
+       add             %o1, 0x8, %o1
+-      EX_ST(STORE(stx, %g1, %o1 + %o3))
++      EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
+       bgu,pt          %XCC, 1b
+        add            %o1, 0x8, %o1
+ 73:   andcc           %o2, 0x8, %g0
+       be,pt           %XCC, 1f
+        nop
+       sub             %o2, 0x8, %o2
+-      EX_LD(LOAD(ldx, %o1, %o5))
+-      EX_ST(STORE(stx, %o5, %o1 + %o3))
++      EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
++      EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
+       add             %o1, 0x8, %o1
+ 1:    andcc           %o2, 0x4, %g0
+       be,pt           %XCC, 1f
+        nop
+       sub             %o2, 0x4, %o2
+-      EX_LD(LOAD(lduw, %o1, %o5))
+-      EX_ST(STORE(stw, %o5, %o1 + %o3))
++      EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
++      EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
+       add             %o1, 0x4, %o1
+ 1:    cmp             %o2, 0
+       be,pt           %XCC, 85f
+@@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+       sub             %o2, %g1, %o2
+ 
+ 1:    subcc           %g1, 1, %g1
+-      EX_LD(LOAD(ldub, %o1, %o5))
+-      EX_ST(STORE(stb, %o5, %o1 + %o3))
++      EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
++      EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
+       bgu,pt          %icc, 1b
+        add            %o1, 1, %o1
+ 
+@@ -378,16 +459,16 @@ FUNC_NAME:       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 8:    mov             64, %o3
+       andn            %o1, 0x7, %o1
+-      EX_LD(LOAD(ldx, %o1, %g2))
++      EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
+       sub             %o3, %g1, %o3
+       andn            %o2, 0x7, GLOBAL_SPARE
+       sllx            %g2, %g1, %g2
+-1:    EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
++1:    EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
+       subcc           GLOBAL_SPARE, 0x8, GLOBAL_SPARE
+       add             %o1, 0x8, %o1
+       srlx            %g3, %o3, %o5
+       or              %o5, %g2, %o5
+-      EX_ST(STORE(stx, %o5, %o0))
++      EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
+       add             %o0, 0x8, %o0
+       bgu,pt          %icc, 1b
+        sllx           %g3, %g1, %g2
+@@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ 
+ 1:
+       subcc           %o2, 4, %o2
+-      EX_LD(LOAD(lduw, %o1, %g1))
+-      EX_ST(STORE(stw, %g1, %o1 + %o3))
++      EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
++      EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
+       bgu,pt          %XCC, 1b
+        add            %o1, 4, %o1
+ 
+@@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+       .align          32
+ 90:
+       subcc           %o2, 1, %o2
+-      EX_LD(LOAD(ldub, %o1, %g1))
+-      EX_ST(STORE(stb, %g1, %o1 + %o3))
++      EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
++      EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
+       bgu,pt          %XCC, 90b
+        add            %o1, 1, %o1
+       retl
+diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
+index 302c0e60dc2c..4c89b486fa0d 100644
+--- a/arch/sparc/lib/copy_in_user.S
++++ b/arch/sparc/lib/copy_in_user.S
+@@ -8,18 +8,33 @@
+ 
+ #define XCC xcc
+ 
+-#define EX(x,y)                       \
++#define EX(x,y,z)             \
+ 98:   x,y;                    \
+       .section __ex_table,"a";\
+       .align 4;               \
+-      .word 98b, __retl_one;  \
++      .word 98b, z;           \
+       .text;                  \
+       .align 4;
+ 
++#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
++#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
++#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
++
+       .register       %g2,#scratch
+       .register       %g3,#scratch
+ 
+       .text
++__retl_o4_plus_8:
++      add     %o4, %o2, %o4
++      retl
++       add    %o4, 8, %o0
++__retl_o2_plus_4:
++      retl
++       add    %o2, 4, %o0
++__retl_o2_plus_1:
++      retl
++       add    %o2, 1, %o0
++
+       .align  32
+ 
+       /* Don't try to get too fancy here, just nice and
+@@ -44,8 +59,8 @@ ENTRY(___copy_in_user)       /* %o0=dst, %o1=src, %o2=len */
+       andn            %o2, 0x7, %o4
+       and             %o2, 0x7, %o2
+ 1:    subcc           %o4, 0x8, %o4
+-      EX(ldxa [%o1] %asi, %o5)
+-      EX(stxa %o5, [%o0] %asi)
++      EX_O4(ldxa [%o1] %asi, %o5)
++      EX_O4(stxa %o5, [%o0] %asi)
+       add             %o1, 0x8, %o1
+       bgu,pt          %XCC, 1b
+        add            %o0, 0x8, %o0
+@@ -53,8 +68,8 @@ ENTRY(___copy_in_user)       /* %o0=dst, %o1=src, %o2=len */
+       be,pt           %XCC, 1f
+        nop
+       sub             %o2, 0x4, %o2
+-      EX(lduwa [%o1] %asi, %o5)
+-      EX(stwa %o5, [%o0] %asi)
++      EX_O2_4(lduwa [%o1] %asi, %o5)
++      EX_O2_4(stwa %o5, [%o0] %asi)
+       add             %o1, 0x4, %o1
+       add             %o0, 0x4, %o0
+ 1:    cmp             %o2, 0
+@@ -70,8 +85,8 @@ ENTRY(___copy_in_user)       /* %o0=dst, %o1=src, %o2=len */
+ 
+ 82:
+       subcc           %o2, 4, %o2
+-      EX(lduwa [%o1] %asi, %g1)
+-      EX(stwa %g1, [%o0] %asi)
++      EX_O2_4(lduwa [%o1] %asi, %g1)
++      EX_O2_4(stwa %g1, [%o0] %asi)
+       add             %o1, 4, %o1
+       bgu,pt          %XCC, 82b
+        add            %o0, 4, %o0
+@@ -82,8 +97,8 @@ ENTRY(___copy_in_user)       /* %o0=dst, %o1=src, %o2=len */
+       .align  32
+ 90:
+       subcc           %o2, 1, %o2
+-      EX(lduba [%o1] %asi, %g1)
+-      EX(stba %g1, [%o0] %asi)
++      EX_O2_1(lduba [%o1] %asi, %g1)
++      EX_O2_1(stba %g1, [%o0] %asi)
+       add             %o1, 1, %o1
+       bgu,pt          %XCC, 90b
+        add            %o0, 1, %o0
+diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
+deleted file mode 100644
+index ac96ae236709..000000000000
+--- a/arch/sparc/lib/user_fixup.c
++++ /dev/null
+@@ -1,71 +0,0 @@
+-/* user_fixup.c: Fix up user copy faults.
+- *
+- * Copyright (C) 2004 David S. Miller <[email protected]>
+- */
+-
+-#include <linux/compiler.h>
+-#include <linux/kernel.h>
+-#include <linux/string.h>
+-#include <linux/errno.h>
+-#include <linux/module.h>
+-
+-#include <asm/uaccess.h>
+-
+-/* Calculating the exact fault address when using
+- * block loads and stores can be very complicated.
+- *
+- * Instead of trying to be clever and handling all
+- * of the cases, just fix things up simply here.
+- */
+-
+-static unsigned long compute_size(unsigned long start, unsigned long size, 
unsigned long *offset)
+-{
+-      unsigned long fault_addr = current_thread_info()->fault_address;
+-      unsigned long end = start + size;
+-
+-      if (fault_addr < start || fault_addr >= end) {
+-              *offset = 0;
+-      } else {
+-              *offset = fault_addr - start;
+-              size = end - fault_addr;
+-      }
+-      return size;
+-}
+-
+-unsigned long copy_from_user_fixup(void *to, const void __user *from, 
unsigned long size)
+-{
+-      unsigned long offset;
+-
+-      size = compute_size((unsigned long) from, size, &offset);
+-      if (likely(size))
+-              memset(to + offset, 0, size);
+-
+-      return size;
+-}
+-EXPORT_SYMBOL(copy_from_user_fixup);
+-
+-unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned 
long size)
+-{
+-      unsigned long offset;
+-
+-      return compute_size((unsigned long) to, size, &offset);
+-}
+-EXPORT_SYMBOL(copy_to_user_fixup);
+-
+-unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned 
long size)
+-{
+-      unsigned long fault_addr = current_thread_info()->fault_address;
+-      unsigned long start = (unsigned long) to;
+-      unsigned long end = start + size;
+-
+-      if (fault_addr >= start && fault_addr < end)
+-              return end - fault_addr;
+-
+-      start = (unsigned long) from;
+-      end = start + size;
+-      if (fault_addr >= start && fault_addr < end)
+-              return end - fault_addr;
+-
+-      return size;
+-}
+-EXPORT_SYMBOL(copy_in_user_fixup);
+diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
+index f2b77112e9d8..e20fbbafb0b0 100644
+--- a/arch/sparc/mm/tsb.c
++++ b/arch/sparc/mm/tsb.c
+@@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned 
long vaddr)
+       return (tag == (vaddr >> 22));
+ }
+ 
++static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long 
end)
++{
++      unsigned long idx;
++
++      for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
++              struct tsb *ent = &swapper_tsb[idx];
++              unsigned long match = idx << 13;
++
++              match |= (ent->tag << 22);
++              if (match >= start && match < end)
++                      ent->tag = (1UL << TSB_TAG_INVALID_BIT);
++      }
++}
++
+ /* TSB flushes need only occur on the processor initiating the address
+  * space modification, not on each cpu the address space has run on.
+  * Only the TLB flush needs that treatment.
+@@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned 
long end)
+ {
+       unsigned long v;
+ 
++      if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
++              return flush_tsb_kernel_range_scan(start, end);
++
+       for (v = start; v < end; v += PAGE_SIZE) {
+               unsigned long hash = tsb_hash(v, PAGE_SHIFT,
+                                             KERNEL_TSB_NENTRIES);
+diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
+index b4f4733abc6e..5d2fd6cd3189 100644
+--- a/arch/sparc/mm/ultra.S
++++ b/arch/sparc/mm/ultra.S
+@@ -30,7 +30,7 @@
+       .text
+       .align          32
+       .globl          __flush_tlb_mm
+-__flush_tlb_mm:               /* 18 insns */
++__flush_tlb_mm:               /* 19 insns */
+       /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
+       ldxa            [%o1] ASI_DMMU, %g2
+       cmp             %g2, %o0
+@@ -81,7 +81,7 @@ __flush_tlb_page:    /* 22 insns */
+ 
+       .align          32
+       .globl          __flush_tlb_pending
+-__flush_tlb_pending:  /* 26 insns */
++__flush_tlb_pending:  /* 27 insns */
+       /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+       rdpr            %pstate, %g7
+       sllx            %o1, 3, %o1
+@@ -113,12 +113,14 @@ __flush_tlb_pending:     /* 26 insns */
+ 
+       .align          32
+       .globl          __flush_tlb_kernel_range
+-__flush_tlb_kernel_range:     /* 16 insns */
++__flush_tlb_kernel_range:     /* 31 insns */
+       /* %o0=start, %o1=end */
+       cmp             %o0, %o1
+       be,pn           %xcc, 2f
++       sub            %o1, %o0, %o3
++      srlx            %o3, 18, %o4
++      brnz,pn         %o4, __spitfire_flush_tlb_kernel_range_slow
+        sethi          %hi(PAGE_SIZE), %o4
+-      sub             %o1, %o0, %o3
+       sub             %o3, %o4, %o3
+       or              %o0, 0x20, %o0          ! Nucleus
+ 1:    stxa            %g0, [%o0 + %o3] ASI_DMMU_DEMAP
+@@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */
+       retl
+        nop
+       nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++
++__spitfire_flush_tlb_kernel_range_slow:
++      mov             63 * 8, %o4
++1:    ldxa            [%o4] ASI_ITLB_DATA_ACCESS, %o3
++      andcc           %o3, 0x40, %g0                  /* _PAGE_L_4U */
++      bne,pn          %xcc, 2f
++       mov            TLB_TAG_ACCESS, %o3
++      stxa            %g0, [%o3] ASI_IMMU
++      stxa            %g0, [%o4] ASI_ITLB_DATA_ACCESS
++      membar          #Sync
++2:    ldxa            [%o4] ASI_DTLB_DATA_ACCESS, %o3
++      andcc           %o3, 0x40, %g0
++      bne,pn          %xcc, 2f
++       mov            TLB_TAG_ACCESS, %o3
++      stxa            %g0, [%o3] ASI_DMMU
++      stxa            %g0, [%o4] ASI_DTLB_DATA_ACCESS
++      membar          #Sync
++2:    sub             %o4, 8, %o4
++      brgez,pt        %o4, 1b
++       nop
++      retl
++       nop
+ 
+ __spitfire_flush_tlb_mm_slow:
+       rdpr            %pstate, %g1
+@@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending:      /* 27 insns */
+       retl
+        wrpr           %g7, 0x0, %pstate
+ 
++__cheetah_flush_tlb_kernel_range:     /* 31 insns */
++      /* %o0=start, %o1=end */
++      cmp             %o0, %o1
++      be,pn           %xcc, 2f
++       sub            %o1, %o0, %o3
++      srlx            %o3, 18, %o4
++      brnz,pn         %o4, 3f
++       sethi          %hi(PAGE_SIZE), %o4
++      sub             %o3, %o4, %o3
++      or              %o0, 0x20, %o0          ! Nucleus
++1:    stxa            %g0, [%o0 + %o3] ASI_DMMU_DEMAP
++      stxa            %g0, [%o0 + %o3] ASI_IMMU_DEMAP
++      membar          #Sync
++      brnz,pt         %o3, 1b
++       sub            %o3, %o4, %o3
++2:    sethi           %hi(KERNBASE), %o3
++      flush           %o3
++      retl
++       nop
++3:    mov             0x80, %o4
++      stxa            %g0, [%o4] ASI_DMMU_DEMAP
++      membar          #Sync
++      stxa            %g0, [%o4] ASI_IMMU_DEMAP
++      membar          #Sync
++      retl
++       nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++
+ #ifdef DCACHE_ALIASING_POSSIBLE
+ __cheetah_flush_dcache_page: /* 11 insns */
+       sethi           %hi(PAGE_OFFSET), %g1
+@@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
+       ret
+        restore
+ 
+-__hypervisor_flush_tlb_mm: /* 10 insns */
++__hypervisor_flush_tlb_mm: /* 19 insns */
+       mov             %o0, %o2        /* ARG2: mmu context */
+       mov             0, %o0          /* ARG0: CPU lists unimplemented */
+       mov             0, %o1          /* ARG1: CPU lists unimplemented */
+       mov             HV_MMU_ALL, %o3 /* ARG3: flags */
+       mov             HV_FAST_MMU_DEMAP_CTX, %o5
+       ta              HV_FAST_TRAP
+-      brnz,pn         %o0, __hypervisor_tlb_tl0_error
++      brnz,pn         %o0, 1f
+        mov            HV_FAST_MMU_DEMAP_CTX, %o1
+       retl
+        nop
++1:    sethi           %hi(__hypervisor_tlb_tl0_error), %o5
++      jmpl            %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
++       nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
+ 
+-__hypervisor_flush_tlb_page: /* 11 insns */
++__hypervisor_flush_tlb_page: /* 22 insns */
+       /* %o0 = context, %o1 = vaddr */
+       mov             %o0, %g2
+       mov             %o1, %o0              /* ARG0: vaddr + IMMU-bit */
+@@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
+       srlx            %o0, PAGE_SHIFT, %o0
+       sllx            %o0, PAGE_SHIFT, %o0
+       ta              HV_MMU_UNMAP_ADDR_TRAP
+-      brnz,pn         %o0, __hypervisor_tlb_tl0_error
++      brnz,pn         %o0, 1f
+        mov            HV_MMU_UNMAP_ADDR_TRAP, %o1
+       retl
+        nop
++1:    sethi           %hi(__hypervisor_tlb_tl0_error), %o2
++      jmpl            %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
++       nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
+ 
+-__hypervisor_flush_tlb_pending: /* 16 insns */
++__hypervisor_flush_tlb_pending: /* 27 insns */
+       /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+       sllx            %o1, 3, %g1
+       mov             %o2, %g2
+@@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
+       srlx            %o0, PAGE_SHIFT, %o0
+       sllx            %o0, PAGE_SHIFT, %o0
+       ta              HV_MMU_UNMAP_ADDR_TRAP
+-      brnz,pn         %o0, __hypervisor_tlb_tl0_error
++      brnz,pn         %o0, 1f
+        mov            HV_MMU_UNMAP_ADDR_TRAP, %o1
+       brnz,pt         %g1, 1b
+        nop
+       retl
+        nop
++1:    sethi           %hi(__hypervisor_tlb_tl0_error), %o2
++      jmpl            %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
++       nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
+ 
+-__hypervisor_flush_tlb_kernel_range: /* 16 insns */
++__hypervisor_flush_tlb_kernel_range: /* 31 insns */
+       /* %o0=start, %o1=end */
+       cmp             %o0, %o1
+       be,pn           %xcc, 2f
+-       sethi          %hi(PAGE_SIZE), %g3
+-      mov             %o0, %g1
+-      sub             %o1, %g1, %g2
++       sub            %o1, %o0, %g2
++      srlx            %g2, 18, %g3
++      brnz,pn         %g3, 4f
++       mov            %o0, %g1
++      sethi           %hi(PAGE_SIZE), %g3
+       sub             %g2, %g3, %g2
+ 1:    add             %g1, %g2, %o0   /* ARG0: virtual address */
+       mov             0, %o1          /* ARG1: mmu context */
+       mov             HV_MMU_ALL, %o2 /* ARG2: flags */
+       ta              HV_MMU_UNMAP_ADDR_TRAP
+-      brnz,pn         %o0, __hypervisor_tlb_tl0_error
++      brnz,pn         %o0, 3f
+        mov            HV_MMU_UNMAP_ADDR_TRAP, %o1
+       brnz,pt         %g2, 1b
+        sub            %g2, %g3, %g2
+ 2:    retl
+        nop
++3:    sethi           %hi(__hypervisor_tlb_tl0_error), %o2
++      jmpl            %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
++       nop
++4:    mov             0, %o0          /* ARG0: CPU lists unimplemented */
++      mov             0, %o1          /* ARG1: CPU lists unimplemented */
++      mov             0, %o2          /* ARG2: mmu context == nucleus */
++      mov             HV_MMU_ALL, %o3 /* ARG3: flags */
++      mov             HV_FAST_MMU_DEMAP_CTX, %o5
++      ta              HV_FAST_TRAP
++      brnz,pn         %o0, 3b
++       mov            HV_FAST_MMU_DEMAP_CTX, %o1
++      retl
++       nop
+ 
+ #ifdef DCACHE_ALIASING_POSSIBLE
+       /* XXX Niagara and friends have an 8K cache, so no aliasing is
+@@ -394,43 +511,6 @@ tlb_patch_one:
+       retl
+        nop
+ 
+-      .globl          cheetah_patch_cachetlbops
+-cheetah_patch_cachetlbops:
+-      save            %sp, -128, %sp
+-
+-      sethi           %hi(__flush_tlb_mm), %o0
+-      or              %o0, %lo(__flush_tlb_mm), %o0
+-      sethi           %hi(__cheetah_flush_tlb_mm), %o1
+-      or              %o1, %lo(__cheetah_flush_tlb_mm), %o1
+-      call            tlb_patch_one
+-       mov            19, %o2
+-
+-      sethi           %hi(__flush_tlb_page), %o0
+-      or              %o0, %lo(__flush_tlb_page), %o0
+-      sethi           %hi(__cheetah_flush_tlb_page), %o1
+-      or              %o1, %lo(__cheetah_flush_tlb_page), %o1
+-      call            tlb_patch_one
+-       mov            22, %o2
+-
+-      sethi           %hi(__flush_tlb_pending), %o0
+-      or              %o0, %lo(__flush_tlb_pending), %o0
+-      sethi           %hi(__cheetah_flush_tlb_pending), %o1
+-      or              %o1, %lo(__cheetah_flush_tlb_pending), %o1
+-      call            tlb_patch_one
+-       mov            27, %o2
+-
+-#ifdef DCACHE_ALIASING_POSSIBLE
+-      sethi           %hi(__flush_dcache_page), %o0
+-      or              %o0, %lo(__flush_dcache_page), %o0
+-      sethi           %hi(__cheetah_flush_dcache_page), %o1
+-      or              %o1, %lo(__cheetah_flush_dcache_page), %o1
+-      call            tlb_patch_one
+-       mov            11, %o2
+-#endif /* DCACHE_ALIASING_POSSIBLE */
+-
+-      ret
+-       restore
+-
+ #ifdef CONFIG_SMP
+       /* These are all called by the slaves of a cross call, at
+        * trap level 1, with interrupts fully disabled.
+@@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
+        */
+       .align          32
+       .globl          xcall_flush_tlb_mm
+-xcall_flush_tlb_mm:   /* 21 insns */
++xcall_flush_tlb_mm:   /* 24 insns */
+       mov             PRIMARY_CONTEXT, %g2
+       ldxa            [%g2] ASI_DMMU, %g3
+       srlx            %g3, CTX_PGSZ1_NUC_SHIFT, %g4
+@@ -469,9 +549,12 @@ xcall_flush_tlb_mm:       /* 21 insns */
+       nop
+       nop
+       nop
++      nop
++      nop
++      nop
+ 
+       .globl          xcall_flush_tlb_page
+-xcall_flush_tlb_page: /* 17 insns */
++xcall_flush_tlb_page: /* 20 insns */
+       /* %g5=context, %g1=vaddr */
+       mov             PRIMARY_CONTEXT, %g4
+       ldxa            [%g4] ASI_DMMU, %g2
+@@ -490,15 +573,20 @@ xcall_flush_tlb_page:    /* 17 insns */
+       retry
+       nop
+       nop
++      nop
++      nop
++      nop
+ 
+       .globl          xcall_flush_tlb_kernel_range
+-xcall_flush_tlb_kernel_range: /* 25 insns */
++xcall_flush_tlb_kernel_range: /* 44 insns */
+       sethi           %hi(PAGE_SIZE - 1), %g2
+       or              %g2, %lo(PAGE_SIZE - 1), %g2
+       andn            %g1, %g2, %g1
+       andn            %g7, %g2, %g7
+       sub             %g7, %g1, %g3
+-      add             %g2, 1, %g2
++      srlx            %g3, 18, %g2
++      brnz,pn         %g2, 2f
++       add            %g2, 1, %g2
+       sub             %g3, %g2, %g3
+       or              %g1, 0x20, %g1          ! Nucleus
+ 1:    stxa            %g0, [%g1 + %g3] ASI_DMMU_DEMAP
+@@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range:     /* 25 insns */
+       brnz,pt         %g3, 1b
+        sub            %g3, %g2, %g3
+       retry
+-      nop
+-      nop
++2:    mov             63 * 8, %g1
++1:    ldxa            [%g1] ASI_ITLB_DATA_ACCESS, %g2
++      andcc           %g2, 0x40, %g0                  /* _PAGE_L_4U */
++      bne,pn          %xcc, 2f
++       mov            TLB_TAG_ACCESS, %g2
++      stxa            %g0, [%g2] ASI_IMMU
++      stxa            %g0, [%g1] ASI_ITLB_DATA_ACCESS
++      membar          #Sync
++2:    ldxa            [%g1] ASI_DTLB_DATA_ACCESS, %g2
++      andcc           %g2, 0x40, %g0
++      bne,pn          %xcc, 2f
++       mov            TLB_TAG_ACCESS, %g2
++      stxa            %g0, [%g2] ASI_DMMU
++      stxa            %g0, [%g1] ASI_DTLB_DATA_ACCESS
++      membar          #Sync
++2:    sub             %g1, 8, %g1
++      brgez,pt        %g1, 1b
++       nop
++      retry
+       nop
+       nop
+       nop
+@@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
+ 
+       retry
+ 
++__cheetah_xcall_flush_tlb_kernel_range:       /* 44 insns */
++      sethi           %hi(PAGE_SIZE - 1), %g2
++      or              %g2, %lo(PAGE_SIZE - 1), %g2
++      andn            %g1, %g2, %g1
++      andn            %g7, %g2, %g7
++      sub             %g7, %g1, %g3
++      srlx            %g3, 18, %g2
++      brnz,pn         %g2, 2f
++       add            %g2, 1, %g2
++      sub             %g3, %g2, %g3
++      or              %g1, 0x20, %g1          ! Nucleus
++1:    stxa            %g0, [%g1 + %g3] ASI_DMMU_DEMAP
++      stxa            %g0, [%g1 + %g3] ASI_IMMU_DEMAP
++      membar          #Sync
++      brnz,pt         %g3, 1b
++       sub            %g3, %g2, %g3
++      retry
++2:    mov             0x80, %g2
++      stxa            %g0, [%g2] ASI_DMMU_DEMAP
++      membar          #Sync
++      stxa            %g0, [%g2] ASI_IMMU_DEMAP
++      membar          #Sync
++      retry
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++      nop
++
+ #ifdef DCACHE_ALIASING_POSSIBLE
+       .align          32
+       .globl          xcall_flush_dcache_page_cheetah
+@@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
+       ba,a,pt %xcc, rtrap
+ 
+       .globl          __hypervisor_xcall_flush_tlb_mm
+-__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
++__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
+       /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
+       mov             %o0, %g2
+       mov             %o1, %g3
+@@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
+       mov             HV_FAST_MMU_DEMAP_CTX, %o5
+       ta              HV_FAST_TRAP
+       mov             HV_FAST_MMU_DEMAP_CTX, %g6
+-      brnz,pn         %o0, __hypervisor_tlb_xcall_error
++      brnz,pn         %o0, 1f
+        mov            %o0, %g5
+       mov             %g2, %o0
+       mov             %g3, %o1
+@@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
+       mov             %g7, %o5
+       membar          #Sync
+       retry
++1:    sethi           %hi(__hypervisor_tlb_xcall_error), %g4
++      jmpl            %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
++       nop
+ 
+       .globl          __hypervisor_xcall_flush_tlb_page
+-__hypervisor_xcall_flush_tlb_page: /* 17 insns */
++__hypervisor_xcall_flush_tlb_page: /* 20 insns */
+       /* %g5=ctx, %g1=vaddr */
+       mov             %o0, %g2
+       mov             %o1, %g3
+@@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
+       sllx            %o0, PAGE_SHIFT, %o0
+       ta              HV_MMU_UNMAP_ADDR_TRAP
+       mov             HV_MMU_UNMAP_ADDR_TRAP, %g6
+-      brnz,a,pn       %o0, __hypervisor_tlb_xcall_error
++      brnz,a,pn       %o0, 1f
+        mov            %o0, %g5
+       mov             %g2, %o0
+       mov             %g3, %o1
+       mov             %g4, %o2
+       membar          #Sync
+       retry
++1:    sethi           %hi(__hypervisor_tlb_xcall_error), %g4
++      jmpl            %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
++       nop
+ 
+       .globl          __hypervisor_xcall_flush_tlb_kernel_range
+-__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
++__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
+       /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
+       sethi           %hi(PAGE_SIZE - 1), %g2
+       or              %g2, %lo(PAGE_SIZE - 1), %g2
+       andn            %g1, %g2, %g1
+       andn            %g7, %g2, %g7
+       sub             %g7, %g1, %g3
++      srlx            %g3, 18, %g7
+       add             %g2, 1, %g2
+       sub             %g3, %g2, %g3
+       mov             %o0, %g2
+       mov             %o1, %g4
+-      mov             %o2, %g7
++      brnz,pn         %g7, 2f
++       mov            %o2, %g7
+ 1:    add             %g1, %g3, %o0   /* ARG0: virtual address */
+       mov             0, %o1          /* ARG1: mmu context */
+       mov             HV_MMU_ALL, %o2 /* ARG2: flags */
+       ta              HV_MMU_UNMAP_ADDR_TRAP
+       mov             HV_MMU_UNMAP_ADDR_TRAP, %g6
+-      brnz,pn         %o0, __hypervisor_tlb_xcall_error
++      brnz,pn         %o0, 1f
+        mov            %o0, %g5
+       sethi           %hi(PAGE_SIZE), %o2
+       brnz,pt         %g3, 1b
+        sub            %g3, %o2, %g3
+-      mov             %g2, %o0
++5:    mov             %g2, %o0
+       mov             %g4, %o1
+       mov             %g7, %o2
+       membar          #Sync
+       retry
++1:    sethi           %hi(__hypervisor_tlb_xcall_error), %g4
++      jmpl            %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
++       nop
++2:    mov             %o3, %g1
++      mov             %o5, %g3
++      mov             0, %o0          /* ARG0: CPU lists unimplemented */
++      mov             0, %o1          /* ARG1: CPU lists unimplemented */
++      mov             0, %o2          /* ARG2: mmu context == nucleus */
++      mov             HV_MMU_ALL, %o3 /* ARG3: flags */
++      mov             HV_FAST_MMU_DEMAP_CTX, %o5
++      ta              HV_FAST_TRAP
++      mov             %g1, %o3
++      brz,pt          %o0, 5b
++       mov            %g3, %o5
++      mov             HV_FAST_MMU_DEMAP_CTX, %g6
++      ba,pt           %xcc, 1b
++       clr            %g5
+ 
+       /* These just get rescheduled to PIL vectors. */
+       .globl          xcall_call_function
+@@ -809,6 +985,58 @@ xcall_kgdb_capture:
+ 
+ #endif /* CONFIG_SMP */
+ 
++      .globl          cheetah_patch_cachetlbops
++cheetah_patch_cachetlbops:
++      save            %sp, -128, %sp
++
++      sethi           %hi(__flush_tlb_mm), %o0
++      or              %o0, %lo(__flush_tlb_mm), %o0
++      sethi           %hi(__cheetah_flush_tlb_mm), %o1
++      or              %o1, %lo(__cheetah_flush_tlb_mm), %o1
++      call            tlb_patch_one
++       mov            19, %o2
++
++      sethi           %hi(__flush_tlb_page), %o0
++      or              %o0, %lo(__flush_tlb_page), %o0
++      sethi           %hi(__cheetah_flush_tlb_page), %o1
++      or              %o1, %lo(__cheetah_flush_tlb_page), %o1
++      call            tlb_patch_one
++       mov            22, %o2
++
++      sethi           %hi(__flush_tlb_pending), %o0
++      or              %o0, %lo(__flush_tlb_pending), %o0
++      sethi           %hi(__cheetah_flush_tlb_pending), %o1
++      or              %o1, %lo(__cheetah_flush_tlb_pending), %o1
++      call            tlb_patch_one
++       mov            27, %o2
++
++      sethi           %hi(__flush_tlb_kernel_range), %o0
++      or              %o0, %lo(__flush_tlb_kernel_range), %o0
++      sethi           %hi(__cheetah_flush_tlb_kernel_range), %o1
++      or              %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
++      call            tlb_patch_one
++       mov            31, %o2
++
++#ifdef DCACHE_ALIASING_POSSIBLE
++      sethi           %hi(__flush_dcache_page), %o0
++      or              %o0, %lo(__flush_dcache_page), %o0
++      sethi           %hi(__cheetah_flush_dcache_page), %o1
++      or              %o1, %lo(__cheetah_flush_dcache_page), %o1
++      call            tlb_patch_one
++       mov            11, %o2
++#endif /* DCACHE_ALIASING_POSSIBLE */
++
++#ifdef CONFIG_SMP
++      sethi           %hi(xcall_flush_tlb_kernel_range), %o0
++      or              %o0, %lo(xcall_flush_tlb_kernel_range), %o0
++      sethi           %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
++      or              %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
++      call            tlb_patch_one
++       mov            44, %o2
++#endif /* CONFIG_SMP */
++
++      ret
++       restore
+ 
+       .globl          hypervisor_patch_cachetlbops
+ hypervisor_patch_cachetlbops:
+@@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
+       sethi           %hi(__hypervisor_flush_tlb_mm), %o1
+       or              %o1, %lo(__hypervisor_flush_tlb_mm), %o1
+       call            tlb_patch_one
+-       mov            10, %o2
++       mov            19, %o2
+ 
+       sethi           %hi(__flush_tlb_page), %o0
+       or              %o0, %lo(__flush_tlb_page), %o0
+       sethi           %hi(__hypervisor_flush_tlb_page), %o1
+       or              %o1, %lo(__hypervisor_flush_tlb_page), %o1
+       call            tlb_patch_one
+-       mov            11, %o2
++       mov            22, %o2
+ 
+       sethi           %hi(__flush_tlb_pending), %o0
+       or              %o0, %lo(__flush_tlb_pending), %o0
+       sethi           %hi(__hypervisor_flush_tlb_pending), %o1
+       or              %o1, %lo(__hypervisor_flush_tlb_pending), %o1
+       call            tlb_patch_one
+-       mov            16, %o2
++       mov            27, %o2
+ 
+       sethi           %hi(__flush_tlb_kernel_range), %o0
+       or              %o0, %lo(__flush_tlb_kernel_range), %o0
+       sethi           %hi(__hypervisor_flush_tlb_kernel_range), %o1
+       or              %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
+       call            tlb_patch_one
+-       mov            16, %o2
++       mov            31, %o2
+ 
+ #ifdef DCACHE_ALIASING_POSSIBLE
+       sethi           %hi(__flush_dcache_page), %o0
+@@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
+       sethi           %hi(__hypervisor_xcall_flush_tlb_mm), %o1
+       or              %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
+       call            tlb_patch_one
+-       mov            21, %o2
++       mov            24, %o2
+ 
+       sethi           %hi(xcall_flush_tlb_page), %o0
+       or              %o0, %lo(xcall_flush_tlb_page), %o0
+       sethi           %hi(__hypervisor_xcall_flush_tlb_page), %o1
+       or              %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
+       call            tlb_patch_one
+-       mov            17, %o2
++       mov            20, %o2
+ 
+       sethi           %hi(xcall_flush_tlb_kernel_range), %o0
+       or              %o0, %lo(xcall_flush_tlb_kernel_range), %o0
+       sethi           %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
+       or              %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
+       call            tlb_patch_one
+-       mov            25, %o2
++       mov            44, %o2
+ #endif /* CONFIG_SMP */
+ 
+       ret
+diff --git a/drivers/net/ethernet/broadcom/bgmac.c 
b/drivers/net/ethernet/broadcom/bgmac.c
+index c4751ece76f6..45e87c9cc828 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
+       u32 ctl;
+ 
+       ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
++
++      /* preserve ONLY bits 16-17 from current hardware value */
++      ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
++
+       if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
+               ctl &= ~BGMAC_DMA_RX_BL_MASK;
+               ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
+@@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
+               ctl &= ~BGMAC_DMA_RX_PT_MASK;
+               ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
+       }
+-      ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
+       ctl |= BGMAC_DMA_RX_ENABLE;
+       ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
+       ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
+diff --git a/drivers/net/ethernet/broadcom/bnx2.c 
b/drivers/net/ethernet/broadcom/bnx2.c
+index 505ceaf451e2..2c850a92ab15 100644
+--- a/drivers/net/ethernet/broadcom/bnx2.c
++++ b/drivers/net/ethernet/broadcom/bnx2.c
+@@ -49,6 +49,7 @@
+ #include <linux/firmware.h>
+ #include <linux/log2.h>
+ #include <linux/aer.h>
++#include <linux/crash_dump.h>
+ 
+ #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
+ #define BCM_CNIC 1
+@@ -4759,15 +4760,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp)
+       BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR);
+ }
+ 
+-static int
+-bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
++static void
++bnx2_wait_dma_complete(struct bnx2 *bp)
+ {
+       u32 val;
+-      int i, rc = 0;
+-      u8 old_port;
++      int i;
+ 
+-      /* Wait for the current PCI transaction to complete before
+-       * issuing a reset. */
++      /*
++       * Wait for the current PCI transaction to complete before
++       * issuing a reset.
++       */
+       if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) ||
+           (BNX2_CHIP(bp) == BNX2_CHIP_5708)) {
+               BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS,
+@@ -4791,6 +4793,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
+               }
+       }
+ 
++      return;
++}
++
++
++static int
++bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
++{
++      u32 val;
++      int i, rc = 0;
++      u8 old_port;
++
++      /* Wait for the current PCI transaction to complete before
++       * issuing a reset. */
++      bnx2_wait_dma_complete(bp);
++
+       /* Wait for the firmware to tell us it is ok to issue a reset. */
+       bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1);
+ 
+@@ -6356,6 +6373,10 @@ bnx2_open(struct net_device *dev)
+       struct bnx2 *bp = netdev_priv(dev);
+       int rc;
+ 
++      rc = bnx2_request_firmware(bp);
++      if (rc < 0)
++              goto out;
++
+       netif_carrier_off(dev);
+ 
+       bnx2_disable_int(bp);
+@@ -6424,6 +6445,7 @@ open_err:
+       bnx2_free_irq(bp);
+       bnx2_free_mem(bp);
+       bnx2_del_napi(bp);
++      bnx2_release_firmware(bp);
+       goto out;
+ }
+ 
+@@ -8570,12 +8592,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
+ 
+       pci_set_drvdata(pdev, dev);
+ 
+-      rc = bnx2_request_firmware(bp);
+-      if (rc < 0)
+-              goto error;
+-
++      /*
++       * In-flight DMA from 1st kernel could continue going in kdump kernel.
++       * New io-page table has been created before bnx2 does reset at open 
stage.
++       * We have to wait for the in-flight DMA to complete to avoid it look up
++       * into the newly created io-page table.
++       */
++      if (is_kdump_kernel())
++              bnx2_wait_dma_complete(bp);
+ 
+-      bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
+       memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
+ 
+       dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
+@@ -8608,7 +8633,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
+       return 0;
+ 
+ error:
+-      bnx2_release_firmware(bp);
+       pci_iounmap(pdev, bp->regview);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+index d48873bcbddf..5cdc96bdd444 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
+ 
+       span_entry->used = true;
+       span_entry->id = index;
+-      span_entry->ref_count = 0;
++      span_entry->ref_count = 1;
+       span_entry->local_port = local_port;
+       return span_entry;
+ }
+@@ -268,6 +268,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct 
mlxsw_sp_port *port)
+ 
+       span_entry = mlxsw_sp_span_entry_find(port);
+       if (span_entry) {
++              /* Already exists, just take a reference */
+               span_entry->ref_count++;
+               return span_entry;
+       }
+@@ -278,6 +279,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct 
mlxsw_sp_port *port)
+ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_span_entry *span_entry)
+ {
++      WARN_ON(!span_entry->ref_count);
+       if (--span_entry->ref_count == 0)
+               mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+       return 0;
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+index 3f5c51da6d3e..62514b9bf988 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+@@ -777,6 +777,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct 
mlxsw_sp *mlxsw_sp,
+       }
+ }
+ 
++static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
++{
++      u8 num_rec, last_rec_index, num_entries;
++
++      num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
++      last_rec_index = num_rec - 1;
++
++      if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
++              return false;
++      if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
++          MLXSW_REG_RAUHTD_TYPE_IPV6)
++              return true;
++
++      num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
++                                                              last_rec_index);
++      if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
++              return true;
++      return false;
++}
++
+ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+ {
+       char *rauhtd_pl;
+@@ -803,7 +823,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct 
mlxsw_sp *mlxsw_sp)
+               for (i = 0; i < num_rec; i++)
+                       mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
+                                                         i);
+-      } while (num_rec);
++      } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
+       rtnl_unlock();
+ 
+       kfree(rauhtd_pl);
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index 4c8c60af7985..fe9e7b1979b8 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -871,6 +871,13 @@ static int stmmac_init_phy(struct net_device *dev)
+               return -ENODEV;
+       }
+ 
++      /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid
++       * subsequent PHY polling, make sure we force a link transition if
++       * we have a UP/DOWN/UP transition
++       */
++      if (phydev->is_pseudo_fixed_link)
++              phydev->irq = PHY_POLL;
++
+       pr_debug("stmmac_init_phy:  %s: attached to PHY (UID 0x%x)"
+                " Link = %d\n", dev->name, phydev->phy_id, phydev->link);
+ 
+diff --git a/drivers/usb/gadget/function/f_fs.c 
b/drivers/usb/gadget/function/f_fs.c
+index 5c8429f23a89..3a5530d0511b 100644
+--- a/drivers/usb/gadget/function/f_fs.c
++++ b/drivers/usb/gadget/function/f_fs.c
+@@ -133,8 +133,60 @@ struct ffs_epfile {
+       /*
+        * Buffer for holding data from partial reads which may happen since
+        * we’re rounding user read requests to a multiple of a max packet size.
++       *
++       * The pointer is initialised with NULL value and may be set by
++       * __ffs_epfile_read_data function to point to a temporary buffer.
++       *
++       * In normal operation, calls to __ffs_epfile_read_buffered will consume
++       * data from said buffer and eventually free it.  Importantly, while the
++       * function is using the buffer, it sets the pointer to NULL.  This is
++       * all right since __ffs_epfile_read_data and __ffs_epfile_read_buffered
++       * can never run concurrently (they are synchronised by epfile->mutex)
++       * so the latter will not assign a new value to the pointer.
++       *
++       * Meanwhile ffs_func_eps_disable frees the buffer (if the pointer is
++       * valid) and sets the pointer to READ_BUFFER_DROP value.  This special
++       * value is crux of the synchronisation between ffs_func_eps_disable and
++       * __ffs_epfile_read_data.
++       *
++       * Once __ffs_epfile_read_data is about to finish it will try to set the
++       * pointer back to its old value (as described above), but seeing as the
++       * pointer is not-NULL (namely READ_BUFFER_DROP) it will instead free
++       * the buffer.
++       *
++       * == State transitions ==
++       *
++       * • ptr == NULL:  (initial state)
++       *   ◦ __ffs_epfile_read_buffer_free: go to ptr == DROP
++       *   ◦ __ffs_epfile_read_buffered:    nop
++       *   ◦ __ffs_epfile_read_data allocates temp buffer: go to ptr == buf
++       *   ◦ reading finishes:              n/a, not in ‘and reading’ state
++       * • ptr == DROP:
++       *   ◦ __ffs_epfile_read_buffer_free: nop
++       *   ◦ __ffs_epfile_read_buffered:    go to ptr == NULL
++       *   ◦ __ffs_epfile_read_data allocates temp buffer: free buf, nop
++       *   ◦ reading finishes:              n/a, not in ‘and reading’ state
++       * • ptr == buf:
++       *   ◦ __ffs_epfile_read_buffer_free: free buf, go to ptr == DROP
++       *   ◦ __ffs_epfile_read_buffered:    go to ptr == NULL and reading
++       *   ◦ __ffs_epfile_read_data:        n/a, __ffs_epfile_read_buffered
++       *                                    is always called first
++       *   ◦ reading finishes:              n/a, not in ‘and reading’ state
++       * • ptr == NULL and reading:
++       *   ◦ __ffs_epfile_read_buffer_free: go to ptr == DROP and reading
++       *   ◦ __ffs_epfile_read_buffered:    n/a, mutex is held
++       *   ◦ __ffs_epfile_read_data:        n/a, mutex is held
++       *   ◦ reading finishes and …
++       *     … all data read:               free buf, go to ptr == NULL
++       *     … otherwise:                   go to ptr == buf and reading
++       * • ptr == DROP and reading:
++       *   ◦ __ffs_epfile_read_buffer_free: nop
++       *   ◦ __ffs_epfile_read_buffered:    n/a, mutex is held
++       *   ◦ __ffs_epfile_read_data:        n/a, mutex is held
++       *   ◦ reading finishes:              free buf, go to ptr == DROP
+        */
+-      struct ffs_buffer               *read_buffer;   /* P: epfile->mutex */
++      struct ffs_buffer               *read_buffer;
++#define READ_BUFFER_DROP ((struct ffs_buffer *)ERR_PTR(-ESHUTDOWN))
+ 
+       char                            name[5];
+ 
+@@ -733,25 +785,47 @@ static void ffs_epfile_async_io_complete(struct usb_ep 
*_ep,
+       schedule_work(&io_data->work);
+ }
+ 
++static void __ffs_epfile_read_buffer_free(struct ffs_epfile *epfile)
++{
++      /*
++       * See comment in struct ffs_epfile for full read_buffer pointer
++       * synchronisation story.
++       */
++      struct ffs_buffer *buf = xchg(&epfile->read_buffer, READ_BUFFER_DROP);
++      if (buf && buf != READ_BUFFER_DROP)
++              kfree(buf);
++}
++
+ /* Assumes epfile->mutex is held. */
+ static ssize_t __ffs_epfile_read_buffered(struct ffs_epfile *epfile,
+                                         struct iov_iter *iter)
+ {
+-      struct ffs_buffer *buf = epfile->read_buffer;
++      /*
++       * Null out epfile->read_buffer so ffs_func_eps_disable does not free
++       * the buffer while we are using it.  See comment in struct ffs_epfile
++       * for full read_buffer pointer synchronisation story.
++       */
++      struct ffs_buffer *buf = xchg(&epfile->read_buffer, NULL);
+       ssize_t ret;
+-      if (!buf)
++      if (!buf || buf == READ_BUFFER_DROP)
+               return 0;
+ 
+       ret = copy_to_iter(buf->data, buf->length, iter);
+       if (buf->length == ret) {
+               kfree(buf);
+-              epfile->read_buffer = NULL;
+-      } else if (unlikely(iov_iter_count(iter))) {
++              return ret;
++      }
++
++      if (unlikely(iov_iter_count(iter))) {
+               ret = -EFAULT;
+       } else {
+               buf->length -= ret;
+               buf->data += ret;
+       }
++
++      if (cmpxchg(&epfile->read_buffer, NULL, buf))
++              kfree(buf);
++
+       return ret;
+ }
+ 
+@@ -780,7 +854,15 @@ static ssize_t __ffs_epfile_read_data(struct ffs_epfile 
*epfile,
+       buf->length = data_len;
+       buf->data = buf->storage;
+       memcpy(buf->storage, data + ret, data_len);
+-      epfile->read_buffer = buf;
++
++      /*
++       * At this point read_buffer is NULL or READ_BUFFER_DROP (if
++       * ffs_func_eps_disable has been called in the meanwhile).  See comment
++       * in struct ffs_epfile for full read_buffer pointer synchronisation
++       * story.
++       */
++      if (unlikely(cmpxchg(&epfile->read_buffer, NULL, buf)))
++              kfree(buf);
+ 
+       return ret;
+ }
+@@ -1094,8 +1176,7 @@ ffs_epfile_release(struct inode *inode, struct file 
*file)
+ 
+       ENTER();
+ 
+-      kfree(epfile->read_buffer);
+-      epfile->read_buffer = NULL;
++      __ffs_epfile_read_buffer_free(epfile);
+       ffs_data_closed(epfile->ffs);
+ 
+       return 0;
+@@ -1721,24 +1802,20 @@ static void ffs_func_eps_disable(struct ffs_function 
*func)
+       unsigned count            = func->ffs->eps_count;
+       unsigned long flags;
+ 
++      spin_lock_irqsave(&func->ffs->eps_lock, flags);
+       do {
+-              if (epfile)
+-                      mutex_lock(&epfile->mutex);
+-              spin_lock_irqsave(&func->ffs->eps_lock, flags);
+               /* pending requests get nuked */
+               if (likely(ep->ep))
+                       usb_ep_disable(ep->ep);
+               ++ep;
+-              spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
+ 
+               if (epfile) {
+                       epfile->ep = NULL;
+-                      kfree(epfile->read_buffer);
+-                      epfile->read_buffer = NULL;
+-                      mutex_unlock(&epfile->mutex);
++                      __ffs_epfile_read_buffer_free(epfile);
+                       ++epfile;
+               }
+       } while (--count);
++      spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
+ }
+ 
+ static int ffs_func_eps_enable(struct ffs_function *func)
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 156b0c11b524..0ccf6daf6f56 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -47,7 +47,6 @@ struct inet_skb_parm {
+ #define IPSKB_REROUTED                BIT(4)
+ #define IPSKB_DOREDIRECT      BIT(5)
+ #define IPSKB_FRAG_PMTU               BIT(6)
+-#define IPSKB_FRAG_SEGS               BIT(7)
+ 
+       u16                     frag_max_size;
+ };
+diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
+index 43a5a0e4524c..b01d5d1d7439 100644
+--- a/include/net/ip6_tunnel.h
++++ b/include/net/ip6_tunnel.h
+@@ -145,6 +145,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct 
sk_buff *skb,
+ {
+       int pkt_len, err;
+ 
++      memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+       pkt_len = skb->len - skb_inner_network_offset(skb);
+       err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
+       if (unlikely(net_xmit_eval(err)))
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 8741988e6880..c26eab962ec7 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1587,11 +1587,11 @@ static inline void sock_put(struct sock *sk)
+ void sock_gen_put(struct sock *sk);
+ 
+ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
+-                   unsigned int trim_cap);
++                   unsigned int trim_cap, bool refcounted);
+ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+                                const int nested)
+ {
+-      return __sk_receive_skb(sk, skb, nested, 1);
++      return __sk_receive_skb(sk, skb, nested, 1, true);
+ }
+ 
+ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 7717302cab91..0de698940793 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1164,6 +1164,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
+ }
+ 
+ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
++int tcp_filter(struct sock *sk, struct sk_buff *skb);
+ 
+ #undef STATE_TRACE
+ 
+diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h
+index 5cd4d4d2dd1d..9c9c6ad55f14 100644
+--- a/include/uapi/linux/atm_zatm.h
++++ b/include/uapi/linux/atm_zatm.h
+@@ -14,7 +14,6 @@
+ 
+ #include <linux/atmapi.h>
+ #include <linux/atmioc.h>
+-#include <linux/time.h>
+ 
+ #define ZATM_GETPOOL  _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
+                                               /* get pool statistics */
+diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
+index 570eeca7bdfa..ad1bc67aff1b 100644
+--- a/kernel/bpf/hashtab.c
++++ b/kernel/bpf/hashtab.c
+@@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab)
+ 
+               hlist_for_each_entry_safe(l, n, head, hash_node) {
+                       hlist_del_rcu(&l->hash_node);
+-                      htab_elem_free(htab, l);
++                      if (l->state != HTAB_EXTRA_ELEM_USED)
++                              htab_elem_free(htab, l);
+               }
+       }
+ }
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 44b3ba462ba1..9ce9d7284ea7 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb)
+                       goto out;
+       }
+ 
+-      *(__sum16 *)(skb->data + offset) = csum_fold(csum);
++      *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
+ out_set_summed:
+       skb->ip_summed = CHECKSUM_NONE;
+ out:
+diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
+index 52742a02814f..5550a86f7264 100644
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -118,7 +118,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
+       struct flow_dissector_key_tags *key_tags;
+       struct flow_dissector_key_keyid *key_keyid;
+       u8 ip_proto = 0;
+-      bool ret = false;
++      bool ret;
+ 
+       if (!data) {
+               data = skb->data;
+@@ -481,12 +481,17 @@ ip_proto_again:
+ out_good:
+       ret = true;
+ 
+-out_bad:
++      key_control->thoff = (u16)nhoff;
++out:
+       key_basic->n_proto = proto;
+       key_basic->ip_proto = ip_proto;
+-      key_control->thoff = (u16)nhoff;
+ 
+       return ret;
++
++out_bad:
++      ret = false;
++      key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
++      goto out;
+ }
+ EXPORT_SYMBOL(__skb_flow_dissect);
+ 
+diff --git a/net/core/sock.c b/net/core/sock.c
+index fd7b41edf1ce..10acaccca5c8 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff 
*skb)
+ EXPORT_SYMBOL(sock_queue_rcv_skb);
+ 
+ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+-                   const int nested, unsigned int trim_cap)
++                   const int nested, unsigned int trim_cap, bool refcounted)
+ {
+       int rc = NET_RX_SUCCESS;
+ 
+@@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+ 
+       bh_unlock_sock(sk);
+ out:
+-      sock_put(sk);
++      if (refcounted)
++              sock_put(sk);
+       return rc;
+ discard_and_relse:
+       kfree_skb(skb);
+@@ -1563,6 +1564,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const 
gfp_t priority)
+               RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
+ 
+               newsk->sk_err      = 0;
++              newsk->sk_err_soft = 0;
+               newsk->sk_priority = 0;
+               newsk->sk_incoming_cpu = raw_smp_processor_id();
+               atomic64_set(&newsk->sk_cookie, 0);
+diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
+index 345a3aeb8c7e..b567c8725aea 100644
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
+ {
+       const struct iphdr *iph = (struct iphdr *)skb->data;
+       const u8 offset = iph->ihl << 2;
+-      const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
++      const struct dccp_hdr *dh;
+       struct dccp_sock *dp;
+       struct inet_sock *inet;
+       const int type = icmp_hdr(skb)->type;
+@@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
+       int err;
+       struct net *net = dev_net(skb->dev);
+ 
+-      if (skb->len < offset + sizeof(*dh) ||
+-          skb->len < offset + __dccp_basic_hdr_len(dh)) {
+-              __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+-              return;
+-      }
++      /* Only need dccph_dport & dccph_sport which are the first
++       * 4 bytes in dccp header.
++       * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
++       */
++      BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
++      BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
++      dh = (struct dccp_hdr *)(skb->data + offset);
+ 
+       sk = __inet_lookup_established(net, &dccp_hashinfo,
+                                      iph->daddr, dh->dccph_dport,
+@@ -868,7 +870,7 @@ lookup:
+               goto discard_and_relse;
+       nf_reset(skb);
+ 
+-      return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
++      return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
+ 
+ no_dccp_socket:
+       if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
+index 3828f94b234c..715e5d1dc107 100644
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
+                       u8 type, u8 code, int offset, __be32 info)
+ {
+       const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+-      const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
++      const struct dccp_hdr *dh;
+       struct dccp_sock *dp;
+       struct ipv6_pinfo *np;
+       struct sock *sk;
+@@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
+       __u64 seq;
+       struct net *net = dev_net(skb->dev);
+ 
+-      if (skb->len < offset + sizeof(*dh) ||
+-          skb->len < offset + __dccp_basic_hdr_len(dh)) {
+-              __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+-                                ICMP6_MIB_INERRORS);
+-              return;
+-      }
++      /* Only need dccph_dport & dccph_sport which are the first
++       * 4 bytes in dccp header.
++       * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
++       */
++      BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
++      BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
++      dh = (struct dccp_hdr *)(skb->data + offset);
+ 
+       sk = __inet6_lookup_established(net, &dccp_hashinfo,
+                                       &hdr->daddr, dh->dccph_dport,
+@@ -738,7 +739,8 @@ lookup:
+       if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+               goto discard_and_relse;
+ 
+-      return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
++      return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
++                              refcounted) ? -1 : 0;
+ 
+ no_dccp_socket:
+       if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+@@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops 
dccp_ipv6_mapped = {
+       .getsockopt        = ipv6_getsockopt,
+       .addr2sockaddr     = inet6_csk_addr2sockaddr,
+       .sockaddr_len      = sizeof(struct sockaddr_in6),
++      .bind_conflict     = inet6_csk_bind_conflict,
+ #ifdef CONFIG_COMPAT
+       .compat_setsockopt = compat_ipv6_setsockopt,
+       .compat_getsockopt = compat_ipv6_getsockopt,
+diff --git a/net/dccp/proto.c b/net/dccp/proto.c
+index 41e65804ddf5..9fe25bf63296 100644
+--- a/net/dccp/proto.c
++++ b/net/dccp/proto.c
+@@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout)
+               __kfree_skb(skb);
+       }
+ 
++      /* If socket has been already reset kill it. */
++      if (sk->sk_state == DCCP_CLOSED)
++              goto adjudge_to_death;
++
+       if (data_was_unread) {
+               /* Unread data was tossed, send an appropriate Reset Code */
+               DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index e2ffc2a5c7db..7ef703102dca 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -2455,22 +2455,19 @@ static struct key_vector *fib_route_get_idx(struct 
fib_route_iter *iter,
+       struct key_vector *l, **tp = &iter->tnode;
+       t_key key;
+ 
+-      /* use cache location of next-to-find key */
++      /* use cached location of previously found key */
+       if (iter->pos > 0 && pos >= iter->pos) {
+-              pos -= iter->pos;
+               key = iter->key;
+       } else {
+-              iter->pos = 0;
++              iter->pos = 1;
+               key = 0;
+       }
+ 
+-      while ((l = leaf_walk_rcu(tp, key)) != NULL) {
++      pos -= iter->pos;
++
++      while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
+               key = l->key + 1;
+               iter->pos++;
+-
+-              if (--pos <= 0)
+-                      break;
+-
+               l = NULL;
+ 
+               /* handle unlikely case of a key wrap */
+@@ -2479,7 +2476,7 @@ static struct key_vector *fib_route_get_idx(struct 
fib_route_iter *iter,
+       }
+ 
+       if (l)
+-              iter->key = key;        /* remember it */
++              iter->key = l->key;     /* remember it */
+       else
+               iter->pos = 0;          /* forget it */
+ 
+@@ -2507,7 +2504,7 @@ static void *fib_route_seq_start(struct seq_file *seq, 
loff_t *pos)
+               return fib_route_get_idx(iter, *pos);
+ 
+       iter->pos = 0;
+-      iter->key = 0;
++      iter->key = KEY_MAX;
+ 
+       return SEQ_START_TOKEN;
+ }
+@@ -2516,7 +2513,7 @@ static void *fib_route_seq_next(struct seq_file *seq, 
void *v, loff_t *pos)
+ {
+       struct fib_route_iter *iter = seq->private;
+       struct key_vector *l = NULL;
+-      t_key key = iter->key;
++      t_key key = iter->key + 1;
+ 
+       ++*pos;
+ 
+@@ -2525,7 +2522,7 @@ static void *fib_route_seq_next(struct seq_file *seq, 
void *v, loff_t *pos)
+               l = leaf_walk_rcu(&iter->tnode, key);
+ 
+       if (l) {
+-              iter->key = l->key + 1;
++              iter->key = l->key;
+               iter->pos++;
+       } else {
+               iter->pos = 0;
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index 38abe70e595f..48734ee6293f 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
+       fl4->flowi4_proto = IPPROTO_ICMP;
+       fl4->fl4_icmp_type = type;
+       fl4->fl4_icmp_code = code;
+-      fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
++      fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
+ 
+       security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
+       rt = __ip_route_output_key_hash(net, fl4,
+@@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
+       if (err)
+               goto relookup_failed;
+ 
+-      if (inet_addr_type_dev_table(net, skb_in->dev,
++      if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
+                                    fl4_dec.saddr) == RTN_LOCAL) {
+               rt2 = __ip_route_output_key(net, &fl4_dec);
+               if (IS_ERR(rt2))
+diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
+index 8b4ffd216839..9f0a7b96646f 100644
+--- a/net/ipv4/ip_forward.c
++++ b/net/ipv4/ip_forward.c
+@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
+       if (opt->is_strictroute && rt->rt_uses_gateway)
+               goto sr_failed;
+ 
+-      IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
++      IPCB(skb)->flags |= IPSKB_FORWARDED;
+       mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+       if (ip_exceeds_mtu(skb, mtu)) {
+               IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index dde37fb340bf..307daed9a4b9 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -223,11 +223,9 @@ static int ip_finish_output_gso(struct net *net, struct 
sock *sk,
+       struct sk_buff *segs;
+       int ret = 0;
+ 
+-      /* common case: fragmentation of segments is not allowed,
+-       * or seglen is <= mtu
++      /* common case: seglen is <= mtu
+        */
+-      if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
+-            skb_gso_validate_mtu(skb, mtu))
++      if (skb_gso_validate_mtu(skb, mtu))
+               return ip_finish_output2(net, sk, skb);
+ 
+       /* Slowpath -  GSO segment length is exceeding the dst MTU.
+diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
+index 0f227db0e9ac..afd6b5968caf 100644
+--- a/net/ipv4/ip_tunnel_core.c
++++ b/net/ipv4/ip_tunnel_core.c
+@@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, 
struct sk_buff *skb,
+       int pkt_len = skb->len - skb_inner_network_offset(skb);
+       struct net *net = dev_net(rt->dst.dev);
+       struct net_device *dev = skb->dev;
+-      int skb_iif = skb->skb_iif;
+       struct iphdr *iph;
+       int err;
+ 
+@@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, 
struct sk_buff *skb,
+       skb_dst_set(skb, &rt->dst);
+       memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ 
+-      if (skb_iif && !(df & htons(IP_DF))) {
+-              /* Arrived from an ingress interface, got encapsulated, with
+-               * fragmentation of encapulating frames allowed.
+-               * If skb is gso, the resulting encapsulated network segments
+-               * may exceed dst mtu.
+-               * Allow IP Fragmentation of segments.
+-               */
+-              IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
+-      }
+-
+       /* Push down and install the IP header. */
+       skb_push(skb, sizeof(struct iphdr));
+       skb_reset_network_header(skb);
+diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
+index 5f006e13de56..27089f5ebbb1 100644
+--- a/net/ipv4/ipmr.c
++++ b/net/ipv4/ipmr.c
+@@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct 
mr_table *mrt,
+               vif->dev->stats.tx_bytes += skb->len;
+       }
+ 
+-      IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
++      IPCB(skb)->flags |= IPSKB_FORWARDED;
+ 
+       /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+        * not only before forwarding, but after forwarding on all output
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 62c3ed0b7556..2f23ef1a8486 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct 
sk_buff *skb, struct flow
+                       goto reject_redirect;
+       }
+ 
+-      n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
++      n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
++      if (!n)
++              n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
+       if (!IS_ERR(n)) {
+               if (!(n->nud_state & NUD_VALID)) {
+                       neigh_event_send(n, NULL);
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index ffbb218de520..c876f5ddc86c 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1145,7 +1145,7 @@ restart:
+ 
+       err = -EPIPE;
+       if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+-              goto out_err;
++              goto do_error;
+ 
+       sg = !!(sk->sk_route_caps & NETIF_F_SG);
+ 
+@@ -1219,7 +1219,7 @@ new_segment:
+ 
+                       if (!skb_can_coalesce(skb, i, pfrag->page,
+                                             pfrag->offset)) {
+-                              if (i == sysctl_max_skb_frags || !sg) {
++                              if (i >= sysctl_max_skb_frags || !sg) {
+                                       tcp_mark_push(tp, skb);
+                                       goto new_segment;
+                               }
+diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
+index 10d728b6804c..ab37c6775630 100644
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -56,6 +56,7 @@ struct dctcp {
+       u32 next_seq;
+       u32 ce_state;
+       u32 delayed_ack_reserved;
++      u32 loss_cwnd;
+ };
+ 
+ static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
+@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
+               ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
+ 
+               ca->delayed_ack_reserved = 0;
++              ca->loss_cwnd = 0;
+               ca->ce_state = 0;
+ 
+               dctcp_reset(tp, ca);
+@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
+ 
+ static u32 dctcp_ssthresh(struct sock *sk)
+ {
+-      const struct dctcp *ca = inet_csk_ca(sk);
++      struct dctcp *ca = inet_csk_ca(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+ 
++      ca->loss_cwnd = tp->snd_cwnd;
+       return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 
2U);
+ }
+ 
+@@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, 
int *attr,
+       return 0;
+ }
+ 
++static u32 dctcp_cwnd_undo(struct sock *sk)
++{
++      const struct dctcp *ca = inet_csk_ca(sk);
++
++      return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
++}
++
+ static struct tcp_congestion_ops dctcp __read_mostly = {
+       .init           = dctcp_init,
+       .in_ack_event   = dctcp_update_alpha,
+       .cwnd_event     = dctcp_cwnd_event,
+       .ssthresh       = dctcp_ssthresh,
+       .cong_avoid     = tcp_reno_cong_avoid,
++      .undo_cwnd      = dctcp_cwnd_undo,
+       .set_state      = dctcp_state,
+       .get_info       = dctcp_get_info,
+       .flags          = TCP_CONG_NEEDS_ECN,
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 7158d4f8dae4..7b235fa12903 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1537,6 +1537,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
+ }
+ EXPORT_SYMBOL(tcp_prequeue);
+ 
++int tcp_filter(struct sock *sk, struct sk_buff *skb)
++{
++      struct tcphdr *th = (struct tcphdr *)skb->data;
++      unsigned int eaten = skb->len;
++      int err;
++
++      err = sk_filter_trim_cap(sk, skb, th->doff * 4);
++      if (!err) {
++              eaten -= skb->len;
++              TCP_SKB_CB(skb)->end_seq -= eaten;
++      }
++      return err;
++}
++EXPORT_SYMBOL(tcp_filter);
++
+ /*
+  *    From tcp_input.c
+  */
+@@ -1648,8 +1663,10 @@ process:
+ 
+       nf_reset(skb);
+ 
+-      if (sk_filter(sk, skb))
++      if (tcp_filter(sk, skb))
+               goto discard_and_relse;
++      th = (const struct tcphdr *)skb->data;
++      iph = ip_hdr(skb);
+ 
+       skb->dev = NULL;
+ 
+diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
+index bd59c343d35f..7370ad2e693a 100644
+--- a/net/ipv6/icmp.c
++++ b/net/ipv6/icmp.c
+@@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 
code, __u32 info,
+       if (__ipv6_addr_needs_scope_id(addr_type))
+               iif = skb->dev->ifindex;
+       else
+-              iif = l3mdev_master_ifindex(skb->dev);
++              iif = l3mdev_master_ifindex(skb_dst(skb)->dev);
+ 
+       /*
+        *      Must not send error if the source does not uniquely
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index fc67822c42e0..af6a09efad5b 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1228,7 +1228,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff 
*skb)
+       if (skb->protocol == htons(ETH_P_IP))
+               return tcp_v4_do_rcv(sk, skb);
+ 
+-      if (sk_filter(sk, skb))
++      if (tcp_filter(sk, skb))
+               goto discard;
+ 
+       /*
+@@ -1455,8 +1455,10 @@ process:
+       if (tcp_v6_inbound_md5_hash(sk, skb))
+               goto discard_and_relse;
+ 
+-      if (sk_filter(sk, skb))
++      if (tcp_filter(sk, skb))
+               goto discard_and_relse;
++      th = (const struct tcphdr *)skb->data;
++      hdr = ipv6_hdr(skb);
+ 
+       skb->dev = NULL;
+ 
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index baccbf3c1c60..7b0e059bf13b 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk,
+ 
+       timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
+ 
+-      err = sctp_wait_for_connect(asoc, &timeo);
+-      if ((err == 0 || err == -EINPROGRESS) && assoc_id)
++      if (assoc_id)
+               *assoc_id = asoc->assoc_id;
++      err = sctp_wait_for_connect(asoc, &timeo);
++      /* Note: the asoc may be freed after the return of
++       * sctp_wait_for_connect.
++       */
+ 
+       /* Don't free association on exit. */
+       asoc = NULL;
+@@ -4278,19 +4281,18 @@ static void sctp_shutdown(struct sock *sk, int how)
+ {
+       struct net *net = sock_net(sk);
+       struct sctp_endpoint *ep;
+-      struct sctp_association *asoc;
+ 
+       if (!sctp_style(sk, TCP))
+               return;
+ 
+-      if (how & SEND_SHUTDOWN) {
++      ep = sctp_sk(sk)->ep;
++      if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
++              struct sctp_association *asoc;
++
+               sk->sk_state = SCTP_SS_CLOSING;
+-              ep = sctp_sk(sk)->ep;
+-              if (!list_empty(&ep->asocs)) {
+-                      asoc = list_entry(ep->asocs.next,
+-                                        struct sctp_association, asocs);
+-                      sctp_primitive_SHUTDOWN(net, asoc, NULL);
+-              }
++              asoc = list_entry(ep->asocs.next,
++                                struct sctp_association, asocs);
++              sctp_primitive_SHUTDOWN(net, asoc, NULL);
+       }
+ }
+ 
+diff --git a/net/socket.c b/net/socket.c
+index a1bd16106625..03bc2c289c94 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, 
unsigned int vlen,
+               if (err)
+                       break;
+               ++datagrams;
++              if (msg_data_left(&msg_sys))
++                      break;
+               cond_resched();
+       }
+ 
+diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
+index f3825b676e38..f046b77cfefe 100644
+--- a/tools/spi/spidev_test.c
++++ b/tools/spi/spidev_test.c
+@@ -19,6 +19,7 @@
+ #include <getopt.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
++#include <linux/ioctl.h>
+ #include <sys/stat.h>
+ #include <linux/types.h>
+ #include <linux/spi/spidev.h>

Reply via email to