Today LOAD_REG_IMMEDIATE() is a basic #define which loads all
parts on a value into a register, including the parts that are NUL.

This means always 2 instructions on PPC32 and always 5 instructions
on PPC64. And those instructions cannot run in parallele as they are
updating the same register.

Ex: LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in:

3c 20 00 00     lis     r1,0
60 21 00 00     ori     r1,r1,0
78 21 07 c6     rldicr  r1,r1,32,31
64 21 00 00     oris    r1,r1,0
60 21 40 00     ori     r1,r1,16384

Rewrite LOAD_REG_IMMEDIATE() with GAS macro in order to skip
the parts that are NUL.

Rename existing LOAD_REG_IMMEDIATE() as LOAD_REG_IMMEDIATE_SYM()
and use that one for loading value of symbols which are not known
at compile time.

Now LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in:

38 20 40 00     li      r1,16384

Signed-off-by: Christophe Leroy <christophe.le...@c-s.fr>
---
 v2: Fixed the test from (\x) & 0xffffffff to (\x) >= 0x80000000 || (\x) < 
-0x80000000 in __LOAD_REG_IMMEDIATE()

 arch/powerpc/include/asm/ppc_asm.h   | 42 +++++++++++++++++++++++++++++++-----
 arch/powerpc/kernel/exceptions-64e.S | 10 ++++-----
 arch/powerpc/kernel/head_64.S        |  2 +-
 3 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index e0637730a8e7..bc1385b2f0aa 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -311,13 +311,43 @@ GLUE(.,name):
        addis   reg,reg,(name - 0b)@ha;         \
        addi    reg,reg,(name - 0b)@l;
 
-#ifdef __powerpc64__
-#ifdef HAVE_AS_ATHIGH
+#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
 #define __AS_ATHIGH high
 #else
 #define __AS_ATHIGH h
 #endif
-#define LOAD_REG_IMMEDIATE(reg,expr)           \
+
+.macro __LOAD_REG_IMMEDIATE_32 r, x
+       .if (\x) >= 0x8000 || (\x) < -0x8000
+               lis \r, (\x)@__AS_ATHIGH
+               .if (\x) & 0xffff != 0
+                       ori \r, \r, (\x)@l
+               .endif
+       .else
+               li \r, (\x)@l
+       .endif
+.endm
+
+.macro __LOAD_REG_IMMEDIATE r, x
+       .if (\x) >= 0x80000000 || (\x) < -0x80000000
+               __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
+               rldicr  \r, \r, 32, 31
+               .if (\x) & 0xffff0000 != 0
+                       oris \r, \r, (\x)@__AS_ATHIGH
+               .endif
+               .if (\x) & 0xffff != 0
+                       oris \r, \r, (\x)@l
+               .endif
+       .else
+               __LOAD_REG_IMMEDIATE_32 \r, \x
+       .endif
+.endm
+
+#ifdef __powerpc64__
+
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr)       \
        lis     reg,(expr)@highest;             \
        ori     reg,reg,(expr)@higher;  \
        rldicr  reg,reg,32,31;          \
@@ -335,11 +365,13 @@ GLUE(.,name):
 
 #else /* 32-bit */
 
-#define LOAD_REG_IMMEDIATE(reg,expr)           \
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr)               \
        lis     reg,(expr)@ha;          \
        addi    reg,reg,(expr)@l;
 
-#define LOAD_REG_ADDR(reg,name)                LOAD_REG_IMMEDIATE(reg, name)
+#define LOAD_REG_ADDR(reg,name)                LOAD_REG_IMMEDIATE_SYM(reg, 
name)
 
 #define LOAD_REG_ADDRBASE(reg, name)   lis     reg,name@ha
 #define ADDROFF(name)                  name@l
diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..898aae6da167 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -751,8 +751,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
        ld      r14,interrupt_base_book3e@got(r15)
        ld      r15,__end_interrupts@got(r15)
 #else
-       LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
-       LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+       LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
+       LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
 #endif
        cmpld   cr0,r10,r14
        cmpld   cr1,r10,r15
@@ -821,8 +821,8 @@ kernel_dbg_exc:
        ld      r14,interrupt_base_book3e@got(r15)
        ld      r15,__end_interrupts@got(r15)
 #else
-       LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
-       LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+       LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
+       LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
 #endif
        cmpld   cr0,r10,r14
        cmpld   cr1,r10,r15
@@ -1449,7 +1449,7 @@ a2_tlbinit_code_start:
 a2_tlbinit_after_linear_map:
 
        /* Now we branch the new virtual address mapped by this entry */
-       LOAD_REG_IMMEDIATE(r3,1f)
+       LOAD_REG_IMMEDIATE_SYM(r3,1f)
        mtctr   r3
        bctr
 
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 91d297e696dd..1fd44761e997 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -635,7 +635,7 @@ __after_prom_start:
        sub     r5,r5,r11
 #else
        /* just copy interrupts */
-       LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+       LOAD_REG_IMMEDIATE_SYM(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
 #endif
        b       5f
 3:
-- 
2.13.3

Reply via email to