Re: [RFC PATCH 3/8] powerpc/64s: put the per-cpu data_offset in r14
On Wed, 20 Dec 2017 18:53:24 +0100 Gabriel Paubert wrote: > On Thu, Dec 21, 2017 at 12:52:01AM +1000, Nicholas Piggin wrote: > > Shifted left by 16 bits, so the low 16 bits of r14 remain available. > > This allows per-cpu pointers to be dereferenced with a single extra > > shift whereas previously it was a load and add. > > --- > > arch/powerpc/include/asm/paca.h | 5 + > > arch/powerpc/include/asm/percpu.h | 2 +- > > arch/powerpc/kernel/entry_64.S| 5 - > > arch/powerpc/kernel/head_64.S | 5 + > > arch/powerpc/kernel/setup_64.c| 11 +-- > > 5 files changed, 16 insertions(+), 12 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/paca.h > > b/arch/powerpc/include/asm/paca.h > > index cd6a9a010895..4dd4ac69e84f 100644 > > --- a/arch/powerpc/include/asm/paca.h > > +++ b/arch/powerpc/include/asm/paca.h > > @@ -35,6 +35,11 @@ > > > > register struct paca_struct *local_paca asm("r13"); > > #ifdef CONFIG_PPC_BOOK3S > > +/* > > + * The top 32-bits of r14 is used as the per-cpu offset, shifted by > > PAGE_SHIFT. > > Top 32, really? It's 48 in later comments. Yep, I used 32 to start with but it wasn't enough. Will fix. Thanks, Nick
Re: [RFC PATCH 3/8] powerpc/64s: put the per-cpu data_offset in r14
On Thu, Dec 21, 2017 at 12:52:01AM +1000, Nicholas Piggin wrote: > Shifted left by 16 bits, so the low 16 bits of r14 remain available. > This allows per-cpu pointers to be dereferenced with a single extra > shift whereas previously it was a load and add. > --- > arch/powerpc/include/asm/paca.h | 5 + > arch/powerpc/include/asm/percpu.h | 2 +- > arch/powerpc/kernel/entry_64.S| 5 - > arch/powerpc/kernel/head_64.S | 5 + > arch/powerpc/kernel/setup_64.c| 11 +-- > 5 files changed, 16 insertions(+), 12 deletions(-) > > diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h > index cd6a9a010895..4dd4ac69e84f 100644 > --- a/arch/powerpc/include/asm/paca.h > +++ b/arch/powerpc/include/asm/paca.h > @@ -35,6 +35,11 @@ > > register struct paca_struct *local_paca asm("r13"); > #ifdef CONFIG_PPC_BOOK3S > +/* > + * The top 32-bits of r14 is used as the per-cpu offset, shifted by > PAGE_SHIFT. Top 32, really? It's 48 in later comments. Gabriel > + * The per-cpu could be moved completely to vmalloc space if we had large > + * vmalloc page mapping? (no, must access it in real mode). > + */ > register u64 local_r14 asm("r14"); > #endif > > diff --git a/arch/powerpc/include/asm/percpu.h > b/arch/powerpc/include/asm/percpu.h > index dce863a7635c..1e0d79d30eac 100644 > --- a/arch/powerpc/include/asm/percpu.h > +++ b/arch/powerpc/include/asm/percpu.h > @@ -12,7 +12,7 @@ > > #include > > -#define __my_cpu_offset local_paca->data_offset > +#define __my_cpu_offset (local_r14 >> 16) > > #endif /* CONFIG_SMP */ > #endif /* __powerpc64__ */ > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S > index 592e4b36065f..6b0e3ac311e8 100644 > --- a/arch/powerpc/kernel/entry_64.S > +++ b/arch/powerpc/kernel/entry_64.S > @@ -262,11 +262,6 @@ system_call_exit: > BEGIN_FTR_SECTION > stdcx. r0,0,r1 /* to clear the reservation */ > END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) > - LOAD_REG_IMMEDIATE(r10, 0xdeadbeefULL << 32) > - mfspr r11,SPRN_PIR > - or r10,r10,r11 > - tdner10,r14 > - > andi. r6,r8,MSR_PR > ld r4,_LINK(r1) > > diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S > index 5a9ec06eab14..cdb710f43681 100644 > --- a/arch/powerpc/kernel/head_64.S > +++ b/arch/powerpc/kernel/head_64.S > @@ -413,10 +413,7 @@ generic_secondary_common_init: > b kexec_wait /* next kernel might do better */ > > 2: SET_PACA(r13) > - LOAD_REG_IMMEDIATE(r14, 0xdeadbeef << 32) > - mfspr r3,SPRN_PIR > - or r14,r14,r3 > - std r14,PACA_R14(r13) > + ld r14,PACA_R14(r13) > > #ifdef CONFIG_PPC_BOOK3E > addir12,r13,PACA_EXTLB /* and TLB exc frame in another */ > diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c > index 9a4c5bf35d92..f4a96ebb523a 100644 > --- a/arch/powerpc/kernel/setup_64.c > +++ b/arch/powerpc/kernel/setup_64.c > @@ -192,8 +192,8 @@ static void __init fixup_boot_paca(void) > get_paca()->data_offset = 0; > /* Mark interrupts disabled in PACA */ > irq_soft_mask_set(IRQ_SOFT_MASK_STD); > - /* Set r14 and paca_r14 to debug value */ > - get_paca()->r14 = (0xdeadbeefULL << 32) | mfspr(SPRN_PIR); > + /* Set r14 and paca_r14 to zero */ > + get_paca()->r14 = 0; > local_r14 = get_paca()->r14; > } > > @@ -761,7 +761,14 @@ void __init setup_per_cpu_areas(void) > for_each_possible_cpu(cpu) { > __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; > paca[cpu].data_offset = __per_cpu_offset[cpu]; > + > + BUG_ON(paca[cpu].data_offset & (PAGE_SIZE-1)); > + BUG_ON(paca[cpu].data_offset >= (1UL << (64 - 16))); > + > + /* The top 48 bits are used for per-cpu data */ > + paca[cpu].r14 |= paca[cpu].data_offset << 16; > } > + local_r14 = paca[smp_processor_id()].r14; > } > #endif > > -- > 2.15.0
[RFC PATCH 3/8] powerpc/64s: put the per-cpu data_offset in r14
Shifted left by 16 bits, so the low 16 bits of r14 remain available. This allows per-cpu pointers to be dereferenced with a single extra shift whereas previously it was a load and add. --- arch/powerpc/include/asm/paca.h | 5 + arch/powerpc/include/asm/percpu.h | 2 +- arch/powerpc/kernel/entry_64.S| 5 - arch/powerpc/kernel/head_64.S | 5 + arch/powerpc/kernel/setup_64.c| 11 +-- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index cd6a9a010895..4dd4ac69e84f 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -35,6 +35,11 @@ register struct paca_struct *local_paca asm("r13"); #ifdef CONFIG_PPC_BOOK3S +/* + * The top 32-bits of r14 is used as the per-cpu offset, shifted by PAGE_SHIFT. + * The per-cpu could be moved completely to vmalloc space if we had large + * vmalloc page mapping? (no, must access it in real mode). + */ register u64 local_r14 asm("r14"); #endif diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h index dce863a7635c..1e0d79d30eac 100644 --- a/arch/powerpc/include/asm/percpu.h +++ b/arch/powerpc/include/asm/percpu.h @@ -12,7 +12,7 @@ #include -#define __my_cpu_offset local_paca->data_offset +#define __my_cpu_offset (local_r14 >> 16) #endif /* CONFIG_SMP */ #endif /* __powerpc64__ */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 592e4b36065f..6b0e3ac311e8 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -262,11 +262,6 @@ system_call_exit: BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - LOAD_REG_IMMEDIATE(r10, 0xdeadbeefULL << 32) - mfspr r11,SPRN_PIR - or r10,r10,r11 - tdner10,r14 - andi. r6,r8,MSR_PR ld r4,_LINK(r1) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 5a9ec06eab14..cdb710f43681 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -413,10 +413,7 @@ generic_secondary_common_init: b kexec_wait /* next kernel might do better */ 2: SET_PACA(r13) - LOAD_REG_IMMEDIATE(r14, 0xdeadbeef << 32) - mfspr r3,SPRN_PIR - or r14,r14,r3 - std r14,PACA_R14(r13) + ld r14,PACA_R14(r13) #ifdef CONFIG_PPC_BOOK3E addir12,r13,PACA_EXTLB /* and TLB exc frame in another */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9a4c5bf35d92..f4a96ebb523a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -192,8 +192,8 @@ static void __init fixup_boot_paca(void) get_paca()->data_offset = 0; /* Mark interrupts disabled in PACA */ irq_soft_mask_set(IRQ_SOFT_MASK_STD); - /* Set r14 and paca_r14 to debug value */ - get_paca()->r14 = (0xdeadbeefULL << 32) | mfspr(SPRN_PIR); + /* Set r14 and paca_r14 to zero */ + get_paca()->r14 = 0; local_r14 = get_paca()->r14; } @@ -761,7 +761,14 @@ void __init setup_per_cpu_areas(void) for_each_possible_cpu(cpu) { __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; paca[cpu].data_offset = __per_cpu_offset[cpu]; + + BUG_ON(paca[cpu].data_offset & (PAGE_SIZE-1)); + BUG_ON(paca[cpu].data_offset >= (1UL << (64 - 16))); + + /* The top 48 bits are used for per-cpu data */ + paca[cpu].r14 |= paca[cpu].data_offset << 16; } + local_r14 = paca[smp_processor_id()].r14; } #endif -- 2.15.0