Hi Andrew,

On Tue, Apr 23, 2013 at 11:42:22PM +0100, André Hentschel wrote:
> Am 23.04.2013 11:15, schrieb Will Deacon:
> > You could introduce `get' tls functions, which don't do anything for CPUs
> > without the relevant registers.
> 
> Before i have another round of testing and patch formatting/sending,
> what about the untested patch below?

Ok. Comments inline.

> diff --git a/arch/arm/include/asm/thread_info.h 
> b/arch/arm/include/asm/thread_info.h
> index cddda1f..bb5b48d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>       struct cpu_context_save cpu_context;    /* cpu context */
>       __u32                   syscall;        /* syscall number */
>       __u8                    used_cp[16];    /* thread used copro */
> -     unsigned long           tp_value;
> +     unsigned long           tp_value[2];
>  #ifdef CONFIG_CRUNCH
>       struct crunch_state     crunchstate;
>  #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..1c10163 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,13 +2,30 @@
>  #define __ASMARM_TLS_H
>  
>  #ifdef __ASSEMBLY__
> +     .macro get_tls2_none, tp, tmp1
> +     .endm

Cosmetic, but these are really horrible macro names.

> +     .macro get_tls2_v6k, tp, tmp1
> +     mrc     p15, 0, \tmp1, c13, c0, 2               @ get user r/w TLS 
> register
> +     str     \tmp1, [\tp, #4]
> +     .endm
> +
> +     .macro get_tls2_v6, tp, tmp1
> +     ldr     \tmp1, =elf_hwcap
> +     ldr     \tmp1, [\tmp1, #0]
> +     tst     \tmp1, #HWCAP_TLS               @ hardware TLS available?
> +     mrcne   p15, 0, \tmp1, c13, c0, 2               @ get user r/w TLS 
> register
> +     strne   \tmp1, [\tp, #4]

You could factor out some of this hwcap checking now that it's used by both
set and get.

> +     .endm
> +
> +
>       .macro set_tls_none, tp, tmp1, tmp2
>       .endm
>  
>       .macro set_tls_v6k, tp, tmp1, tmp2
> -     mcr     p15, 0, \tp, c13, c0, 3         @ set TLS register
> -     mov     \tmp1, #0
> -     mcr     p15, 0, \tmp1, c13, c0, 2       @ clear user r/w TLS register
> +     ldrd    \tmp1, \tmp2, [\tp]
> +     mcr     p15, 0, \tmp1, c13, c0, 3       @ set user r/o TLS register
> +     mcr     p15, 0, \tmp2, c13, c0, 2       @ set user r/w TLS register
>       .endm
>  
>       .macro set_tls_v6, tp, tmp1, tmp2
> @@ -16,33 +33,39 @@
>       ldr     \tmp1, [\tmp1, #0]
>       mov     \tmp2, #0xffff0fff
>       tst     \tmp1, #HWCAP_TLS               @ hardware TLS available?
> -     mcrne   p15, 0, \tp, c13, c0, 3         @ yes, set TLS register
> -     movne   \tmp1, #0
> -     mcrne   p15, 0, \tmp1, c13, c0, 2       @ clear user r/w TLS register
> -     streq   \tp, [\tmp2, #-15]              @ set TLS value at 0xffff0ff0
> +     ldrdne  \tmp1, \tmp2, [\tp]

Does this work for big-endian CPUs?

> +     ldreq   \tmp1, [\tp]
> +     mcrne   p15, 0, \tmp1, c13, c0, 3       @ yes, set user r/o TLS register
> +     mcrne   p15, 0, \tmp2, c13, c0, 2       @ set user r/w TLS register
> +     streq   \tmp1, [\tmp2, #-15]            @ set TLS value at 0xffff0ff0
>       .endm
>  
>       .macro set_tls_software, tp, tmp1, tmp2
> -     mov     \tmp1, #0xffff0fff
> -     str     \tp, [\tmp1, #-15]              @ set TLS value at 0xffff0ff0
> +     ldr     \tmp1, [\tp]
> +     mov     \tmp2, #0xffff0fff
> +     str     \tmp1, [\tmp2, #-15]            @ set TLS value at 0xffff0ff0
>       .endm
>  #endif
>  
>  #ifdef CONFIG_TLS_REG_EMUL
>  #define tls_emu              1
>  #define has_tls_reg          1
> +#define get_tls2             get_tls2_none
>  #define set_tls              set_tls_none
>  #elif defined(CONFIG_CPU_V6)
>  #define tls_emu              0
>  #define has_tls_reg          (elf_hwcap & HWCAP_TLS)
> +#define get_tls2             get_tls2_v6
>  #define set_tls              set_tls_v6
>  #elif defined(CONFIG_CPU_32v6K)
>  #define tls_emu              0
>  #define has_tls_reg          1
> +#define get_tls2             get_tls2_v6k
>  #define set_tls              set_tls_v6k
>  #else
>  #define tls_emu              0
>  #define has_tls_reg          0
> +#define get_tls2             get_tls2_none
>  #define set_tls              set_tls_software
>  #endif
>  
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..097686b 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,7 +728,7 @@ ENTRY(__switch_to)
>   UNWIND(.fnstart     )
>   UNWIND(.cantunwind  )
>       add     ip, r1, #TI_CPU_SAVE
> -     ldr     r3, [r2, #TI_TP_VALUE]
> +     add     r3, r1, #TI_TP_VALUE
>   ARM(        stmia   ip!, {r4 - sl, fp, sp, lr} )    @ Store most regs on 
> stack
>   THUMB(      stmia   ip!, {r4 - sl, fp}         )    @ Store most regs on 
> stack
>   THUMB(      str     sp, [ip], #4               )
> @@ -736,6 +736,8 @@ ENTRY(__switch_to)
>  #ifdef CONFIG_CPU_USE_DOMAINS
>       ldr     r6, [r2, #TI_CPU_DOMAIN]
>  #endif
> +     get_tls2        r3, r4
> +     add     r3, r2, #TI_TP_VALUE
>       set_tls r3, r4, r5
>  #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>       ldr     r7, [r2, #TI_TASK]
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..6138eb1 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -395,7 +395,8 @@ copy_thread(unsigned long clone_flags, unsigned long 
> stack_start,
>       clear_ptrace_hw_breakpoint(p);
>  
>       if (clone_flags & CLONE_SETTLS)
> -             thread->tp_value = childregs->ARM_r3;
> +             thread->tp_value[0] = childregs->ARM_r3;
> +     thread->tp_value[1] = current_thread_info()->tp_value[1];
>

This still isn't correct. Imagine the following sequence of events:

  - Task foo writes its TPIDRURW register from userspace and then issues a
    fork() system call. No context switch occurs between these two events.

  - We start creating the child task, bar, and end up in copy_thread with
    the `thread' pointing at foo's struct thread_info, which contains the
    *old* TPIDRURW value.

  - We copy out the stale value into bar, which is then scheduled with an
    old TPIDRURW value.

The solution is to reload the value sitting in the register in copy_thread,
rather than relying on the thread_info being up-to-date. That's why I
previously suggested not using asm macros for the getters.

>       thread_notify(THREAD_NOTIFY_COPY, thread);
>  
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 03deeff..2bc1514 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
>  #endif
>  
>               case PTRACE_GET_THREAD_AREA:
> -                     ret = put_user(task_thread_info(child)->tp_value,
> +                     ret = put_user(task_thread_info(child)->tp_value[0],
>                                      datap);
>                       break;

I'm guessing debuggers don't care about the new TLS value, or do we need a
new ptrace request?

Will
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to