This adds macro definitions and a bit of code that allow to properly
utilise doubleword instructions on mips64. These changes are taken
directly from Linux sources. Also switch some o32-specific register
definitions to generic ones for this code to actually compile for n64
ABI.

While at it, this also removes an unused macro define.

Signed-off-by: Denis Orlov <[email protected]>
---
 arch/mips/Kconfig      |  1 -
 arch/mips/lib/memcpy.S | 37 ++++++++++++++++++++++++++++++++-----
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ab8c8cf176..de2f539cc1 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -376,7 +376,6 @@ config NMON_HELP
 
 config MIPS_OPTIMIZED_STRING_FUNCTIONS
        bool "use assembler optimized string functions"
-       depends on !64BIT
        default y
        help
          Say yes here to use assembler optimized memcpy / memset functions.
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index cee0319dcf..5c01dbdcd3 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -20,7 +20,26 @@
 #define src a1
 #define len a2
 
-#define LOADK lw /* No exception */
+#ifdef CONFIG_64BIT
+
+#define LOAD(reg, addr)                ld reg, addr
+#define LOADL(reg, addr)       ldl reg, addr
+#define LOADR(reg, addr)       ldr reg, addr
+#define STOREL(reg, addr)      sdl reg, addr
+#define STORER(reg, addr)      sdr reg, addr
+#define STORE(reg, addr)       sd reg, addr
+#define ADD    daddu
+#define SUB    dsubu
+#define SRL    dsrl
+#define SRA    dsra
+#define SLL    dsll
+#define SLLV   dsllv
+#define SRLV   dsrlv
+#define NBYTES 8
+#define LOG_NBYTES 3
+
+#else
+
 #define LOAD(reg, addr)                lw reg, addr
 #define LOADL(reg, addr)       lwl reg, addr
 #define LOADR(reg, addr)       lwr reg, addr
@@ -37,6 +56,8 @@
 #define NBYTES 4
 #define LOG_NBYTES 2
 
+#endif /* CONFIG_64BIT */
+
 #define LOADB(reg, addr)       lb reg, addr
 #define STOREB(reg, addr)      sb reg, addr
 
@@ -101,8 +122,8 @@ LEAF(memcpy)                                        /* 
a0=dst a1=src a2=len */
        LOAD(t2, UNIT(2)(src))
        LOAD(t3, UNIT(3)(src))
        SUB     len, len, 8*NBYTES
-       LOAD(t4, UNIT(4)(src))
-       LOAD(t7, UNIT(5)(src))
+       LOAD(ta0, UNIT(4)(src))
+       LOAD(ta3, UNIT(5)(src))
        STORE(t0, UNIT(0)(dst))
        STORE(t1, UNIT(1)(dst))
        LOAD(t0, UNIT(6)(src))
@@ -111,8 +132,8 @@ LEAF(memcpy)                                        /* 
a0=dst a1=src a2=len */
        ADD     dst, dst, 8*NBYTES
        STORE(t2, UNIT(-6)(dst))
        STORE(t3, UNIT(-5)(dst))
-       STORE(t4, UNIT(-4)(dst))
-       STORE(t7, UNIT(-3)(dst))
+       STORE(ta0, UNIT(-4)(dst))
+       STORE(ta3, UNIT(-3)(dst))
        STORE(t0, UNIT(-2)(dst))
        STORE(t1, UNIT(-1)(dst))
        bne     len, rem, 1b
@@ -263,6 +284,12 @@ LEAF(memcpy)                                       /* 
a0=dst a1=src a2=len */
 
        COPY_BYTE(0)
        COPY_BYTE(1)
+#ifdef CONFIG_64BIT
+       COPY_BYTE(2)
+       COPY_BYTE(3)
+       COPY_BYTE(4)
+       COPY_BYTE(5)
+#endif
        LOADB(t0, NBYTES-2(src))
        SUB     len, len, 1
        jr      ra
-- 
2.41.0


Reply via email to