On Sep 17, 2015, at 7:33 PM, James Morse wrote:

Hi James and Will,

> Hi Will,
> 
> On 16/09/15 12:25, Will Deacon wrote:
>> On Sun, Sep 13, 2015 at 03:42:17PM +0100, Jungseok Lee wrote:
>>> diff --git a/arch/arm64/include/asm/thread_info.h 
>>> b/arch/arm64/include/asm/thread_info.h
>>> index dcd06d1..44839c0 100644
>>> --- a/arch/arm64/include/asm/thread_info.h
>>> +++ b/arch/arm64/include/asm/thread_info.h
>>> @@ -73,8 +73,11 @@ static inline struct thread_info 
>>> *current_thread_info(void) __attribute_const__;
>>> 
>>> static inline struct thread_info *current_thread_info(void)
>>> {
>>> -   return (struct thread_info *)
>>> -           (current_stack_pointer & ~(THREAD_SIZE - 1));
>>> +   unsigned long sp_el0;
>>> +
>>> +   asm volatile("mrs %0, sp_el0" : "=r" (sp_el0));
>>> +
>>> +   return (struct thread_info *)(sp_el0 & ~(THREAD_SIZE - 1));
>> 
>> This looks like it will generate worse code than our current implementation,
>> thanks to the asm volatile. Maybe just add something like a global
>> current_stack_pointer_el0?
> 
> Like current_stack_pointer does?:
>> register unsigned long current_stack_pointer_el0 asm ("sp_el0");
> 
> Unfortunately the compiler won't accept this, as it doesn't like the
> register name, it also won't accept instructions in this asm string.
> 
> Dropping the 'volatile' has the desired affect[0]. This would only cause a
> problem over a call to cpu_switch_to(), which writes to sp_el0, but also
> save/restores the callee-saved registers, so they will always be consistent.
> 
> 
> James
> 
> 
> 
> 
> [0] A fictitious example printk:
>> printk("%p%p%u%p", get_fs(), current_thread_info(),
>>       smp_processor_id(), current);
> 
> With this patch compiles to:
> 5f8:   d5384101        mrs     x1, sp_el0
> 5fc:   d5384100        mrs     x0, sp_el0
> 600:   d5384103        mrs     x3, sp_el0
> 604:   d5384104        mrs     x4, sp_el0
> 608:   9272c484        and     x4, x4, #0xffffffffffffc000
> 60c:   9272c463        and     x3, x3, #0xffffffffffffc000
> 610:   9272c421        and     x1, x1, #0xffffffffffffc000
> 614:   aa0403e2        mov     x2, x4
> 618:   90000000        adrp    x0, 0 <do_bad>
> 61c:   f9400884        ldr     x4, [x4,#16]
> 620:   91000000        add     x0, x0, #0x0
> 624:   b9401c63        ldr     w3, [x3,#28]
> 628:   f9400421        ldr     x1, [x1,#8]
> 62c:   94000000        bl      0 <printk>
> 
> Removing the volatile:
> 5e4:   d5384102        mrs     x2, sp_el0
> 5e8:   f9400844        ldr     x4, [x2,#16]
> 5ec:   91000000        add     x0, x0, #0x0
> 5f0:   b9401c43        ldr     w3, [x2,#28]
> 5f4:   f9400441        ldr     x1, [x2,#8]
> 5f8:   94000000        bl      0 <printk>
> 
> 


As Will pointed out, if "worse" means "bigger text size", the change generates
worse codes than current implementation. A data based on System.map is as 
follows.

GCC version: aarch64-linux-gnu-gcc (Linaro GCC 2014.11) 4.9.3 20141031 
(prerelease)

[1] 4.3-rc1 
ffffffc000080000 T _text
ffffffc0007f1524 R _etext

[2] 4.3-rc1 + this patch
ffffffc000080000 T _text
ffffffc0007f8504 R _etext

[3] 4.3-rc1 + this patch + the following hunk
ffffffc000080000 T _text
ffffffc0007ef514 R _etext

diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 44839c0..4ab08a1 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -77,7 +77,7 @@ static inline struct thread_info *current_thread_info(void)
 
        asm volatile("mrs %0, sp_el0" : "=r" (sp_el0));
 
-       return (struct thread_info *)(sp_el0 & ~(THREAD_SIZE - 1));
+       return (struct thread_info *)sp_el0;
 }
 
 #define thread_saved_pc(tsk)   \
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c156540..314ac81 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -88,7 +88,8 @@
 
        .if     \el == 0
        mrs     x21, sp_el0
-       get_thread_info \el, tsk                // Ensure MDSCR_EL1.SS is clear,
+       mov     tsk, sp
+       and     tsk, tsk, #~(THREAD_SIZE - 1)   // Ensure MDSCR_EL1.SS is clear,
        ldr     x19, [tsk, #TI_FLAGS]           // since we can unmask debug
        disable_step_tsk x19, x20               // exceptions when scheduling.
        .else
@@ -105,8 +106,7 @@
        .if     \el == 0
        mvn     x21, xzr
        str     x21, [sp, #S_SYSCALLNO]
-       mov     x25, sp
-       msr     sp_el0, x25
+       msr     sp_el0, tsk
        .endif
 
        /*
@@ -165,13 +165,8 @@ alternative_endif
        eret                                    // return to kernel
        .endm
 
-       .macro  get_thread_info, el, rd
-       .if     \el == 0
-       mov     \rd, sp
-       .else
+       .macro  get_thread_info, rd
        mrs     \rd, sp_el0
-       .endif
-       and     \rd, \rd, #~(THREAD_SIZE - 1)   // bottom of thread stack
        .endm
 
        .macro  get_irq_stack
@@ -400,7 +395,7 @@ el1_irq:
        irq_handler
 
 #ifdef CONFIG_PREEMPT
-       get_thread_info 1, tsk
+       get_thread_info tsk
        ldr     w24, [tsk, #TI_PREEMPT]         // get preempt count
        cbnz    w24, 1f                         // preempt count != 0
        ldr     x0, [tsk, #TI_FLAGS]            // get flags
@@ -636,6 +631,7 @@ ENTRY(cpu_switch_to)
        ldp     x29, x9, [x8], #16
        ldr     lr, [x8]
        mov     sp, x9
+       and     x9, x9, #~(THREAD_SIZE - 1)
        msr     sp_el0, x9
        ret
 ENDPROC(cpu_switch_to)
@@ -695,7 +691,7 @@ ENTRY(ret_from_fork)
        cbz     x19, 1f                         // not a kernel thread
        mov     x0, x20
        blr     x19
-1:     get_thread_info 1, tsk
+1:     get_thread_info tsk
        b       ret_to_user
 ENDPROC(ret_from_fork)
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index cb13290..213df0b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -442,6 +442,7 @@ __mmap_switched:
 2:
        adr_l   sp, initial_sp, x4
        mov     x4, sp
+       and     x4, x4, #~(THREAD_SIZE - 1)
        msr     sp_el0, x4
        str_l   x21, __fdt_pointer, x5          // Save FDT pointer
        str_l   x24, memstart_addr, x6          // Save PHYS_OFFSET
@@ -615,6 +616,7 @@ ENDPROC(secondary_startup)
 ENTRY(__secondary_switched)
        ldr     x0, [x21]                       // get secondary_data.stack
        mov     sp, x0
+       and     x0, x0, #~(THREAD_SIZE - 1)
        msr     sp_el0, x0
        mov     x29, #0
        b       secondary_start_kernel

If struct thread_info address is directly stored into sp_el0, we can avoid
masking operation in many places. It helps to decrease a kernel text size.
This idea comes from James's comment in v1 patch.

Best Regards
Jungseok Lee--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to