Re: [PATCH v5 17/22] powerpc/syscall: Do not check unsupported scv vector on PPC32

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 4:13 pm:
> 
> 
> Le 09/02/2021 à 03:00, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> Only PPC64 has scv. No need to check the 0x7ff0 trap on PPC32.
>>> For that, add a helper trap_is_unsupported_scv() similar to
>>> trap_is_scv().
>>>
>>> And ignore the scv parameter in syscall_exit_prepare (Save 14 cycles
>>> 346 => 332 cycles)
>>>
>>> Signed-off-by: Christophe Leroy 
>>> ---
>>> v5: Added a helper trap_is_unsupported_scv()
>>> ---
>>>   arch/powerpc/include/asm/ptrace.h | 5 +
>>>   arch/powerpc/kernel/entry_32.S| 1 -
>>>   arch/powerpc/kernel/interrupt.c   | 7 +--
>>>   3 files changed, 10 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/ptrace.h 
>>> b/arch/powerpc/include/asm/ptrace.h
>>> index 58f9dc060a7b..2c842b11a924 100644
>>> --- a/arch/powerpc/include/asm/ptrace.h
>>> +++ b/arch/powerpc/include/asm/ptrace.h
>>> @@ -229,6 +229,11 @@ static inline bool trap_is_scv(struct pt_regs *regs)
>>> return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000);
>>>   }
>>>   
>>> +static inline bool trap_is_unsupported_scv(struct pt_regs *regs)
>>> +{
>>> +   return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0);
>>> +}
>> 
>> This change is good.
>> 
>>> +
>>>   static inline bool trap_is_syscall(struct pt_regs *regs)
>>>   {
>>> return (trap_is_scv(regs) || TRAP(regs) == 0xc00);
>>> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
>>> index cffe58e63356..7c824e8928d0 100644
>>> --- a/arch/powerpc/kernel/entry_32.S
>>> +++ b/arch/powerpc/kernel/entry_32.S
>>> @@ -344,7 +344,6 @@ transfer_to_syscall:
>>>   
>>>   ret_from_syscall:
>>> addir4,r1,STACK_FRAME_OVERHEAD
>>> -   li  r5,0
>>> bl  syscall_exit_prepare
>> 
>> For this one, I think it would be nice to do the "right" thing and make
>> the function prototypes different on !64S. They could then declare a
>> local const bool scv = 0.
>> 
>> We could have syscall_exit_prepare and syscall_exit_prepare_maybe_scv
>> or something like that, 64s can use the latter one and the former can be
>> a wrapper that passes constant 0 for scv. Then we don't have different
>> prototypes for the same function, but you just have to make the 32-bit
>> version static inline and the 64-bit version exported to asm.
> 
> You can't call a static inline function from ASM, I don't understand you.

I mean

#ifdef CONFIG_PPC_BOOK3S_64
notrace unsigned long syscall_exit_prepare_scv(unsigned long r3,
   struct pt_regs *regs,
   long scv)
#else
static inline long syscall_exit_prepare_scv(unsigned long r3,
   struct pt_regs *regs,
   long scv)
#endif

#ifndef CONFIG_PPC_BOOK3S_64
notrace unsigned long syscall_exit_prepare(unsigned long r3,
   struct pt_regs *regs)
{
return syscall_exit_prepare_scv(r3, regs, 0);
}
#endif


> 
> What is wrong for you really here ? Is that the fact we leave scv random, or 
> is that the below 
> IS_ENABLED() ?

That scv arg is random. I know generated code essentially would be no 
different and no possibility of tracing, but would just prefer to call 
the C "correctly" if possible.

> I don't mind keeping the 'li r5,0' before calling the function if you find it 
> cleaner, the real 
> performance gain is with setting scv to 0 below for PPC32 (and maybe it 
> should be set to zero for 
> book3e/64 too ?).

Yes 64e would like this optimisation.

Thanks,
Nick


Re: [PATCH v5 09/22] powerpc/syscall: Make interrupt.c buildable on PPC32

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 4:02 pm:
> 
> 
> Le 09/02/2021 à 02:27, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> To allow building interrupt.c on PPC32, ifdef out specific PPC64
>>> code or use helpers which are available on both PP32 and PPC64
>>>
>>> Modify Makefile to always build interrupt.o
>>>
>>> Signed-off-by: Christophe Leroy 
>>> ---
>>> v5:
>>> - Also for interrupt exit preparation
>>> - Opted out kuap related code, ppc32 keeps it in ASM for the time being
>>> ---
>>>   arch/powerpc/kernel/Makefile|  4 ++--
>>>   arch/powerpc/kernel/interrupt.c | 31 ---
>>>   2 files changed, 26 insertions(+), 9 deletions(-)
>>>
> 
>>> diff --git a/arch/powerpc/kernel/interrupt.c 
>>> b/arch/powerpc/kernel/interrupt.c
>>> index d6be4f9a67e5..2dac4d2bb1cf 100644
>>> --- a/arch/powerpc/kernel/interrupt.c
>>> +++ b/arch/powerpc/kernel/interrupt.c
>>> @@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long 
>>> r5,
>>> BUG_ON(!(regs->msr & MSR_RI));
>>> BUG_ON(!(regs->msr & MSR_PR));
>>> BUG_ON(!FULL_REGS(regs));
>>> -   BUG_ON(regs->softe != IRQS_ENABLED);
>>> +   BUG_ON(arch_irq_disabled_regs(regs));
>>>   
>>>   #ifdef CONFIG_PPC_PKEY
>>> if (mmu_has_feature(MMU_FTR_PKEY)) {
>>> @@ -65,7 +65,9 @@ notrace long system_call_exception(long r3, long r4, long 
>>> r5,
>>> isync();
>>> } else
>>>   #endif
>>> +#ifdef CONFIG_PPC64
>>> kuap_check_amr();
>>> +#endif
>> 
>> Wouldn't mind trying to get rid of these ifdefs at some point, but
>> there's some kuap / keys changes going on recently so I'm happy enough
>> to let this settle then look at whether we can refactor.
> 
> I have a follow up series that implements interrupts entries/exits in C and 
> that removes all kuap 
> assembly, I will likely release it as RFC later today.
> 
>> 
>>>   
>>> account_cpu_user_entry();
>>>   
>>> @@ -318,7 +323,7 @@ notrace unsigned long syscall_exit_prepare(unsigned 
>>> long r3,
>>> return ret;
>>>   }
>>>   
>>> -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
>>> +#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */
>>>   notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, 
>>> unsigned long msr)
>>>   {
>>>   #ifdef CONFIG_PPC_BOOK3E
>> 
>> Why are you building this for 32? I don't mind if it's just to keep
>> things similar and make it build for now, but you're not using it yet,
>> right?
> 
> The series using that will follow, I thought it would be worth doing this at 
> once.

Yeah that's fine by me then.

Thanks,
Nick


Re: [PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 4:18 pm:
> 
> 
> Le 09/02/2021 à 02:11, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> regs->softe doesn't exist on PPC32.
>>>
>>> Add irq_soft_mask_regs_set_state() helper to set regs->softe.
>>> This helper will void on PPC32.
>>>
>>> Signed-off-by: Christophe Leroy 
>>> ---
>> 
>> You could do the same with the kuap_ functions to change some ifdefs
>> to IS_ENABLED.
>> 
>> That's just my preference but if you prefer this way I guess that's
>> okay.
>> 
> 
> 
> That's also my preference on the long term.
> 
> Here it is ephemeral, I have a follow up series implementing interrupt 
> exit/entry in C and getting 
> rid of all the assembly kuap hence getting rid of those ifdefs.

I thought it might have been because you hate ifdef more tha most :)
 
> The issue I see when using IS_ENABLED() is that you have to indent to the 
> right, then you interfere 
> with the file history and 'git blame'

Valid point if it's just going to indent back the other way in your next 
series.

> Thanks for reviewing my series and looking forward to your feedback on my 
> series on the interrupt 
> entry/exit that I will likely release later today.

Cool, I'm eager to see them.

Thanks,
Nick


Re: [PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 3:57 pm:
> 
> 
> Le 09/02/2021 à 02:11, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> regs->softe doesn't exist on PPC32.
>>>
>>> Add irq_soft_mask_regs_set_state() helper to set regs->softe.
>>> This helper will void on PPC32.
>>>
>>> Signed-off-by: Christophe Leroy 
>>> ---
>>>   arch/powerpc/include/asm/hw_irq.h | 11 +--
>>>   1 file changed, 9 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/hw_irq.h 
>>> b/arch/powerpc/include/asm/hw_irq.h
>>> index 614957f74cee..ed0c3b049dfd 100644
>>> --- a/arch/powerpc/include/asm/hw_irq.h
>>> +++ b/arch/powerpc/include/asm/hw_irq.h
>>> @@ -38,6 +38,8 @@
>>>   #define PACA_IRQ_MUST_HARD_MASK   (PACA_IRQ_EE)
>>>   #endif
>>>   
>>> +#endif /* CONFIG_PPC64 */
>>> +
>>>   /*
>>>* flags for paca->irq_soft_mask
>>>*/
>>> @@ -46,8 +48,6 @@
>>>   #define IRQS_PMI_DISABLED 2
>>>   #define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED)
>>>   
>>> -#endif /* CONFIG_PPC64 */
>>> -
>>>   #ifndef __ASSEMBLY__
>>>   
>>>   #ifdef CONFIG_PPC64
>>> @@ -287,6 +287,10 @@ extern void irq_set_pending_from_srr1(unsigned long 
>>> srr1);
>>>   
>>>   extern void force_external_irq_replay(void);
>>>   
>>> +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, 
>>> unsigned long val)
>>> +{
>>> +   regs->softe = val;
>>> +}
>>>   #else /* CONFIG_PPC64 */
>>>   
>>>   static inline unsigned long arch_local_save_flags(void)
>>> @@ -355,6 +359,9 @@ static inline bool arch_irq_disabled_regs(struct 
>>> pt_regs *regs)
>>>   
>>>   static inline void may_hard_irq_enable(void) { }
>>>   
>>> +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, 
>>> unsigned long val)
>>> +{
>>> +}
>>>   #endif /* CONFIG_PPC64 */
>>>   
>>>   #define ARCH_IRQ_INIT_FLAGS   IRQ_NOREQUEST
>> 
>> What I don't like about this where you use it is it kind of pollutes
>> the ppc32 path with this function which is not valid to use.
>> 
>> I would prefer if you had this purely so it could compile with:
>> 
>>if (IS_ENABLED(CONFIG_PPC64)))
>>irq_soft_mask_regs_set_state(regs, blah);
>> 
>> And then you could make the ppc32 cause a link error if it did not
>> get eliminated at compile time (e.g., call an undefined function).
>> 
>> You could do the same with the kuap_ functions to change some ifdefs
>> to IS_ENABLED.
>> 
>> That's just my preference but if you prefer this way I guess that's
>> okay.
> 
> I see you didn't change your mind since last April :)
> 
> I'll see what I can do.

If you have more patches in the works and will do some cleanup passes I 
don't mind so much.

Thanks,
Nick


Re: [PATCH v7 32/42] powerpc/64: context tracking move to interrupt wrappers

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 3:49 pm:
> 
> 
> Le 30/01/2021 à 14:08, Nicholas Piggin a écrit :
>> This moves exception_enter/exit calls to wrapper functions for
>> synchronous interrupts. More interrupt handlers are covered by
>> this than previously.
> 
> Why did you enclose everything in #ifdef CONFIG_PPC64 ? As far as I 
> understand, before this patch 
> exception_enter() and exception_exit() are called also on PPC32.

PPC32 never selects CONTEXT_TRACKING AFAIKS, but I'm not sure. I worried 
ctx_state would not be no-oped, but if it's all inlined into the same
function then maybe the compiler will eliminate it.

On the other hand I may move some of the wrapper into its own function 
if that helps code size, but we can do something about it then...

Hmm, end result is it shouldn't matter for PPC32 at the moment.

Thanks,
Nick

> 
> Christophe
> 
> 
>> 
>> Signed-off-by: Nicholas Piggin 
>> ---
>>   arch/powerpc/include/asm/interrupt.h  |  9 
>>   arch/powerpc/kernel/traps.c   | 74 ++-
>>   arch/powerpc/mm/book3s64/hash_utils.c |  3 --
>>   arch/powerpc/mm/fault.c   |  9 +---
>>   4 files changed, 27 insertions(+), 68 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/interrupt.h 
>> b/arch/powerpc/include/asm/interrupt.h
>> index 488bdd5bd922..e65ce3e2b071 100644
>> --- a/arch/powerpc/include/asm/interrupt.h
>> +++ b/arch/powerpc/include/asm/interrupt.h
>> @@ -7,10 +7,16 @@
>>   #include 
>>   
>>   struct interrupt_state {
>> +#ifdef CONFIG_PPC64
>> +enum ctx_state ctx_state;
>> +#endif
>>   };
>>   
>>   static inline void interrupt_enter_prepare(struct pt_regs *regs, struct 
>> interrupt_state *state)
>>   {
>> +#ifdef CONFIG_PPC64
>> +state->ctx_state = exception_enter();
>> +#endif
>>   }
>>   
>>   /*
>> @@ -29,6 +35,9 @@ static inline void interrupt_enter_prepare(struct pt_regs 
>> *regs, struct interrup
>>*/
>>   static inline void interrupt_exit_prepare(struct pt_regs *regs, struct 
>> interrupt_state *state)
>>   {
>> +#ifdef CONFIG_PPC64
>> +exception_exit(state->ctx_state);
>> +#endif
>>   }
>>   
>>   static inline void interrupt_async_enter_prepare(struct pt_regs *regs, 
>> struct interrupt_state *state)
>> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
>> index da488e62fb5f..21fd14828827 100644
>> --- a/arch/powerpc/kernel/traps.c
>> +++ b/arch/powerpc/kernel/traps.c
>> @@ -1087,41 +1087,28 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
>>   
>>   DEFINE_INTERRUPT_HANDLER(unknown_exception)
>>   {
>> -enum ctx_state prev_state = exception_enter();
>> -
>>  printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
>> regs->nip, regs->msr, regs->trap);
>>   
>>  _exception(SIGTRAP, regs, TRAP_UNK, 0);
>> -
>> -exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
>>   {
>> -enum ctx_state prev_state = exception_enter();
>> -
>>  printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
>> regs->nip, regs->msr, regs->trap);
>>   
>>  _exception(SIGTRAP, regs, TRAP_UNK, 0);
>> -
>> -exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
>>   {
>> -enum ctx_state prev_state = exception_enter();
>> -
>>  if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
>>  5, SIGTRAP) == NOTIFY_STOP)
>> -goto bail;
>> +return;
>>  if (debugger_iabr_match(regs))
>> -goto bail;
>> +return;
>>  _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
>> -
>> -bail:
>> -exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(RunModeException)
>> @@ -1131,8 +1118,6 @@ DEFINE_INTERRUPT_HANDLER(RunModeException)
>>   
>>   DEFINE_INTERRUPT_HANDLER(single_step_exception)
>>   {
>> -enum ctx_state prev_state = exception_enter();
>> -
>>  clear_single_step(regs);
>>  clear_br_trace(regs);
>>   
>> @@ -1141,14 +1126,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
>>   
>>  if (notify_die(DIE_SSTEP, "single_step", regs, 5,
>>  5, SIGTRAP) == NOTIFY_STOP)
>> -goto bail;
>> +return;
>>  if (debugger_sstep(regs))
>> -goto bail;
>> +return;
>>   
>>  _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
>> -
>> -bail:
>> -exception_exit(prev_state);
>>   }
>>   NOKPROBE_SYMBOL(single_step_exception);
>>   
>> @@ -1476,7 +1458,6 @@ static inline int emulate_math(struct pt_regs *regs) { 
>> return -1; }
>>   
>>   DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   {
>> -enum ctx_state prev_state = exception_enter();
>>  unsigned int reason = get_reason(regs);
>>   
>>  /* We can now get here via a FP Unavailable exception if the core
>> @@ -1485,22 +1466,22 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)

Re: [RFC PATCH v3 0/6] Restricted DMA

2021-02-08 Thread Claire Chang
v4 here: https://lore.kernel.org/patchwork/cover/1378113/


[PATCH v4 14/14] of: Add plumbing for restricted DMA pool

2021-02-08 Thread Claire Chang
If a device is not behind an IOMMU, we look up the device node and set
up the restricted DMA when the restricted-dma-pool is presented.

Signed-off-by: Claire Chang 
---
 drivers/of/address.c| 25 +
 drivers/of/device.c |  3 +++
 drivers/of/of_private.h |  5 +
 3 files changed, 33 insertions(+)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 73ddf2540f3f..b6093c9b135d 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1094,3 +1095,27 @@ bool of_dma_is_coherent(struct device_node *np)
return false;
 }
 EXPORT_SYMBOL_GPL(of_dma_is_coherent);
+
+int of_dma_set_restricted_buffer(struct device *dev)
+{
+   struct device_node *node;
+   int count, i;
+
+   if (!dev->of_node)
+   return 0;
+
+   count = of_property_count_elems_of_size(dev->of_node, "memory-region",
+   sizeof(phandle));
+   for (i = 0; i < count; i++) {
+   node = of_parse_phandle(dev->of_node, "memory-region", i);
+   /* There might be multiple memory regions, but only one
+* restriced-dma-pool region is allowed.
+*/
+   if (of_device_is_compatible(node, "restricted-dma-pool") &&
+   of_device_is_available(node))
+   return of_reserved_mem_device_init_by_idx(
+   dev, dev->of_node, i);
+   }
+
+   return 0;
+}
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 1122daa8e273..38c631f1fafa 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -186,6 +186,9 @@ int of_dma_configure_id(struct device *dev, struct 
device_node *np,
 
arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
 
+   if (!iommu)
+   return of_dma_set_restricted_buffer(dev);
+
return 0;
 }
 EXPORT_SYMBOL_GPL(of_dma_configure_id);
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index d9e6a324de0a..28a2dfa197ba 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -161,12 +161,17 @@ struct bus_dma_region;
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
 int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map);
+int of_dma_set_restricted_buffer(struct device *dev);
 #else
 static inline int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map)
 {
return -ENODEV;
 }
+static inline int of_dma_get_restricted_buffer(struct device *dev)
+{
+   return -ENODEV;
+}
 #endif
 
 #endif /* _LINUX_OF_PRIVATE_H */
-- 
2.30.0.478.g8a0d178c01-goog



[PATCH v4 13/14] dt-bindings: of: Add restricted DMA pool

2021-02-08 Thread Claire Chang
Introduce the new compatible string, restricted-dma-pool, for restricted
DMA. One can specify the address and length of the restricted DMA memory
region by restricted-dma-pool in the reserved-memory node.

Signed-off-by: Claire Chang 
---
 .../reserved-memory/reserved-memory.txt   | 24 +++
 1 file changed, 24 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt 
b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
index e8d3096d922c..fc9a12c2f679 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
@@ -51,6 +51,20 @@ compatible (optional) - standard definition
   used as a shared pool of DMA buffers for a set of devices. It can
   be used by an operating system to instantiate the necessary pool
   management subsystem if necessary.
+- restricted-dma-pool: This indicates a region of memory meant to be
+  used as a pool of restricted DMA buffers for a set of devices. The
+  memory region would be the only region accessible to those devices.
+  When using this, the no-map and reusable properties must not be set,
+  so the operating system can create a virtual mapping that will be 
used
+  for synchronization. The main purpose for restricted DMA is to
+  mitigate the lack of DMA access control on systems without an IOMMU,
+  which could result in the DMA accessing the system memory at
+  unexpected times and/or unexpected addresses, possibly leading to 
data
+  leakage or corruption. The feature on its own provides a basic level
+  of protection against the DMA overwriting buffer contents at
+  unexpected times. However, to protect against general data leakage 
and
+  system memory corruption, the system needs to provide way to lock 
down
+  the memory access, e.g., MPU.
 - vendor specific string in the form ,[-]
 no-map (optional) - empty property
 - Indicates the operating system must not create a virtual mapping
@@ -120,6 +134,11 @@ one for multimedia processing (named 
multimedia-memory@7700, 64MiB).
compatible = "acme,multimedia-memory";
reg = <0x7700 0x400>;
};
+
+   restricted_dma_mem_reserved: restricted_dma_mem_reserved {
+   compatible = "restricted-dma-pool";
+   reg = <0x5000 0x40>;
+   };
};
 
/* ... */
@@ -138,4 +157,9 @@ one for multimedia processing (named 
multimedia-memory@7700, 64MiB).
memory-region = <_reserved>;
/* ... */
};
+
+   pcie_device: pcie_device@0,0 {
+   memory-region = <_dma_mem_reserved>;
+   /* ... */
+   };
 };
-- 
2.30.0.478.g8a0d178c01-goog



[PATCH v4 12/14] swiotlb: Add restricted DMA alloc/free support.

2021-02-08 Thread Claire Chang
Add the functions, dev_swiotlb_{alloc,free} to support the memory
allocation from restricted DMA pool.

Signed-off-by: Claire Chang 
---
 include/linux/swiotlb.h |  2 ++
 kernel/dma/direct.c | 30 ++
 kernel/dma/swiotlb.c| 34 ++
 3 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b9f2a250c8da..2cd39e102915 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -74,6 +74,8 @@ extern enum swiotlb_force swiotlb_force;
 #ifdef CONFIG_DMA_RESTRICTED_POOL
 bool is_swiotlb_force(struct device *dev);
 bool is_dev_swiotlb_force(struct device *dev);
+struct page *dev_swiotlb_alloc(struct device *dev, size_t size, gfp_t gfp);
+bool dev_swiotlb_free(struct device *dev, struct page *page, size_t size);
 #else
 static inline bool is_swiotlb_force(struct device *dev)
 {
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index a76a1a2f24da..f9a9321f7559 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "direct.h"
 
@@ -77,6 +78,10 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t 
phys, size_t size)
 
 static void __dma_direct_free_pages(struct device *dev, struct page *page, 
size_t size)
 {
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   if (dev_swiotlb_free(dev, page, size))
+   return;
+#endif
dma_free_contiguous(dev, page, size);
 }
 
@@ -89,6 +94,12 @@ static struct page *__dma_direct_alloc_pages(struct device 
*dev, size_t size,
 
WARN_ON_ONCE(!PAGE_ALIGNED(size));
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   page = dev_swiotlb_alloc(dev, size, gfp);
+   if (page)
+   return page;
+#endif
+
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
   _limit);
page = dma_alloc_contiguous(dev, size, gfp);
@@ -147,7 +158,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
gfp |= __GFP_NOWARN;
 
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-   !force_dma_unencrypted(dev)) {
+   !force_dma_unencrypted(dev) && !is_dev_swiotlb_force(dev)) {
page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
if (!page)
return NULL;
@@ -160,8 +171,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
}
 
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
-   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-   !dev_is_dma_coherent(dev))
+   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
+   !is_dev_swiotlb_force(dev))
return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
 
/*
@@ -171,7 +182,9 @@ void *dma_direct_alloc(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
!gfpflags_allow_blocking(gfp) &&
(force_dma_unencrypted(dev) ||
-(IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 
!dev_is_dma_coherent(dev
+(IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ !dev_is_dma_coherent(dev))) &&
+   !is_dev_swiotlb_force(dev))
return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
/* we always manually zero the memory once we are done */
@@ -252,15 +265,15 @@ void dma_direct_free(struct device *dev, size_t size,
unsigned int page_order = get_order(size);
 
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-   !force_dma_unencrypted(dev)) {
+   !force_dma_unencrypted(dev) && !is_dev_swiotlb_force(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
dma_free_contiguous(dev, cpu_addr, size);
return;
}
 
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
-   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-   !dev_is_dma_coherent(dev)) {
+   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
+   !is_dev_swiotlb_force(dev)) {
arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
return;
}
@@ -288,7 +301,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, 
size_t size,
void *ret;
 
if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
-   force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp))
+   force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp) &&
+   !is_dev_swiotlb_force(dev))
return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
page = __dma_direct_alloc_pages(dev, size, gfp);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index fd9c1bd183ac..8b77fd64199e 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -836,6 +836,40 @@ late_initcall(swiotlb_create_default_debugfs);
 

[PATCH v4 11/14] swiotlb: Add is_dev_swiotlb_force()

2021-02-08 Thread Claire Chang
Add is_dev_swiotlb_force() which returns true if the device has
restricted DMA pool (e.g. dev->dev_swiotlb is set).

Signed-off-by: Claire Chang 
---
 include/linux/swiotlb.h | 9 +
 kernel/dma/swiotlb.c| 5 +
 2 files changed, 14 insertions(+)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 76f86c684524..b9f2a250c8da 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -73,11 +73,16 @@ extern enum swiotlb_force swiotlb_force;
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
 bool is_swiotlb_force(struct device *dev);
+bool is_dev_swiotlb_force(struct device *dev);
 #else
 static inline bool is_swiotlb_force(struct device *dev)
 {
return unlikely(swiotlb_force == SWIOTLB_FORCE);
 }
+static inline bool is_dev_swiotlb_force(struct device *dev)
+{
+   return false;
+}
 #endif /* CONFIG_DMA_RESTRICTED_POOL */
 
 bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr);
@@ -93,6 +98,10 @@ static inline bool is_swiotlb_force(struct device *dev)
 {
return false;
 }
+static inline bool is_dev_swiotlb_force(struct device *dev)
+{
+   return false;
+}
 static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
return false;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f64cbe6e84cc..fd9c1bd183ac 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -841,6 +841,11 @@ bool is_swiotlb_force(struct device *dev)
return unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dev_swiotlb;
 }
 
+bool is_dev_swiotlb_force(struct device *dev)
+{
+   return dev->dev_swiotlb;
+}
+
 static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
struct device *dev)
 {
-- 
2.30.0.478.g8a0d178c01-goog



[PATCH v4 10/14] dma-direct: Add a new wrapper __dma_direct_free_pages()

2021-02-08 Thread Claire Chang
Add a new wrapper __dma_direct_free_pages() that will be useful later
for dev_swiotlb_free().

Signed-off-by: Claire Chang 
---
 kernel/dma/direct.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 30ccbc08e229..a76a1a2f24da 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -75,6 +75,11 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t 
phys, size_t size)
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
 }
 
+static void __dma_direct_free_pages(struct device *dev, struct page *page, 
size_t size)
+{
+   dma_free_contiguous(dev, page, size);
+}
+
 static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp_t gfp)
 {
@@ -237,7 +242,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
return NULL;
}
 out_free_pages:
-   dma_free_contiguous(dev, page, size);
+   __dma_direct_free_pages(dev, page, size);
return NULL;
 }
 
@@ -273,7 +278,7 @@ void dma_direct_free(struct device *dev, size_t size,
else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
arch_dma_clear_uncached(cpu_addr, size);
 
-   dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
+   __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size);
 }
 
 struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
@@ -310,7 +315,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, 
size_t size,
*dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
return page;
 out_free_pages:
-   dma_free_contiguous(dev, page, size);
+   __dma_direct_free_pages(dev, page, size);
return NULL;
 }
 
@@ -329,7 +334,7 @@ void dma_direct_free_pages(struct device *dev, size_t size,
if (force_dma_unencrypted(dev))
set_memory_encrypted((unsigned long)vaddr, 1 << page_order);
 
-   dma_free_contiguous(dev, page, size);
+   __dma_direct_free_pages(dev, page, size);
 }
 
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
-- 
2.30.0.478.g8a0d178c01-goog



[PATCH v4 09/14] swiotlb: Refactor swiotlb_tbl_{map,unmap}_single

2021-02-08 Thread Claire Chang
Refactor swiotlb_tbl_{map,unmap}_single to make the code reusable for
dev_swiotlb_{alloc,free}.

Signed-off-by: Claire Chang 
---
 kernel/dma/swiotlb.c | 116 ++-
 1 file changed, 71 insertions(+), 45 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 6fdebde8fb1f..f64cbe6e84cc 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -509,14 +509,12 @@ static void swiotlb_bounce(phys_addr_t orig_addr, 
phys_addr_t tlb_addr,
}
 }
 
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
-   size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs)
+static int swiotlb_tbl_find_free_region(struct device *hwdev,
+   dma_addr_t tbl_dma_addr,
+   size_t alloc_size, unsigned long attrs)
 {
struct swiotlb *swiotlb = get_swiotlb(hwdev);
-   dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, 
swiotlb->start);
unsigned long flags;
-   phys_addr_t tlb_addr;
unsigned int nslots, stride, index, wrap;
int i;
unsigned long mask;
@@ -531,15 +529,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, 
phys_addr_t orig_addr,
 #endif
panic("Can not allocate SWIOTLB buffer earlier and can't now 
provide you with the DMA bounce buffer");
 
-   if (mem_encrypt_active())
-   pr_warn_once("Memory encryption is active and system is using 
DMA bounce buffers\n");
-
-   if (mapping_size > alloc_size) {
-   dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: 
%zd bytes)",
- mapping_size, alloc_size);
-   return (phys_addr_t)DMA_MAPPING_ERROR;
-   }
-
mask = dma_get_seg_boundary(hwdev);
 
tbl_dma_addr &= mask;
@@ -601,7 +590,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, 
phys_addr_t orig_addr,
swiotlb->list[i] = 0;
for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != 
IO_TLB_SEGSIZE - 1) && swiotlb->list[i]; i--)
swiotlb->list[i] = ++count;
-   tlb_addr = swiotlb->start + (index << IO_TLB_SHIFT);
 
/*
 * Update the indices to avoid searching in the next
@@ -624,45 +612,20 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, 
phys_addr_t orig_addr,
if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total 
%lu (slots), used %lu (slots)\n",
 alloc_size, swiotlb->nslabs, tmp_io_tlb_used);
-   return (phys_addr_t)DMA_MAPPING_ERROR;
+   return -ENOMEM;
+
 found:
swiotlb->used += nslots;
spin_unlock_irqrestore(>lock, flags);
 
-   /*
-* Save away the mapping from the original address to the DMA address.
-* This is needed when we sync the memory.  Then we sync the buffer if
-* needed.
-*/
-   for (i = 0; i < nslots; i++)
-   swiotlb->orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-   swiotlb_bounce(orig_addr, tlb_addr, mapping_size, 
DMA_TO_DEVICE);
-
-   return tlb_addr;
+   return index;
 }
 
-/*
- * tlb_addr is the physical address of the bounce buffer to unmap.
- */
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
- size_t mapping_size, size_t alloc_size,
- enum dma_data_direction dir, unsigned long attrs)
+static void swiotlb_tbl_release_region(struct device *hwdev, int index, size_t 
size)
 {
struct swiotlb *swiotlb = get_swiotlb(hwdev);
unsigned long flags;
-   int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> 
IO_TLB_SHIFT;
-   int index = (tlb_addr - swiotlb->start) >> IO_TLB_SHIFT;
-   phys_addr_t orig_addr = swiotlb->orig_addr[index];
-
-   /*
-* First, sync the memory before unmapping the entry
-*/
-   if (orig_addr != INVALID_PHYS_ADDR &&
-   !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-   swiotlb_bounce(orig_addr, tlb_addr, mapping_size, 
DMA_FROM_DEVICE);
+   int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 
/*
 * Return the buffer to the free list by setting the corresponding
@@ -694,6 +657,69 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, 
phys_addr_t tlb_addr,
spin_unlock_irqrestore(>lock, flags);
 }
 
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
+  size_t mapping_size, 

[PATCH v4 08/14] swiotlb: Use restricted DMA pool if available

2021-02-08 Thread Claire Chang
Regardless of swiotlb setting, the restricted DMA pool is preferred if
available.

The restricted DMA pools provide a basic level of protection against the
DMA overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system
needs to provide a way to lock down the memory access, e.g., MPU.

Signed-off-by: Claire Chang 
---
 include/linux/swiotlb.h | 13 +
 kernel/dma/direct.h |  2 +-
 kernel/dma/swiotlb.c| 20 +---
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index f13a52a97382..76f86c684524 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -71,6 +71,15 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+bool is_swiotlb_force(struct device *dev);
+#else
+static inline bool is_swiotlb_force(struct device *dev)
+{
+   return unlikely(swiotlb_force == SWIOTLB_FORCE);
+}
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
+
 bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr);
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
@@ -80,6 +89,10 @@ phys_addr_t get_swiotlb_start(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long new_size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
+static inline bool is_swiotlb_force(struct device *dev)
+{
+   return false;
+}
 static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
return false;
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 7b83b1595989..b011db1b625d 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -87,7 +87,7 @@ static inline dma_addr_t dma_direct_map_page(struct device 
*dev,
phys_addr_t phys = page_to_phys(page) + offset;
dma_addr_t dma_addr = phys_to_dma(dev, phys);
 
-   if (unlikely(swiotlb_force == SWIOTLB_FORCE))
+   if (is_swiotlb_force(dev))
return swiotlb_map(dev, phys, size, dir, attrs);
 
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e22e7ae75f1c..6fdebde8fb1f 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -40,6 +40,7 @@
 #include 
 #endif
 #ifdef CONFIG_DMA_RESTRICTED_POOL
+#include 
 #include 
 #include 
 #include 
@@ -109,6 +110,10 @@ static struct swiotlb default_swiotlb;
 
 static inline struct swiotlb *get_swiotlb(struct device *dev)
 {
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   if (dev && dev->dev_swiotlb)
+   return dev->dev_swiotlb;
+#endif
return _swiotlb;
 }
 
@@ -508,7 +513,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, 
phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs)
 {
-   struct swiotlb *swiotlb = _swiotlb;
+   struct swiotlb *swiotlb = get_swiotlb(hwdev);
dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, 
swiotlb->start);
unsigned long flags;
phys_addr_t tlb_addr;
@@ -519,7 +524,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, 
phys_addr_t orig_addr,
unsigned long max_slots;
unsigned long tmp_io_tlb_used;
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   if (no_iotlb_memory && !hwdev->dev_swiotlb)
+#else
if (no_iotlb_memory)
+#endif
panic("Can not allocate SWIOTLB buffer earlier and can't now 
provide you with the DMA bounce buffer");
 
if (mem_encrypt_active())
@@ -641,7 +650,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, 
phys_addr_t tlb_addr,
  size_t mapping_size, size_t alloc_size,
  enum dma_data_direction dir, unsigned long attrs)
 {
-   struct swiotlb *swiotlb = _swiotlb;
+   struct swiotlb *swiotlb = get_swiotlb(hwdev);
unsigned long flags;
int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> 
IO_TLB_SHIFT;
int index = (tlb_addr - swiotlb->start) >> IO_TLB_SHIFT;
@@ -689,7 +698,7 @@ void swiotlb_tbl_sync_single(struct device *hwdev, 
phys_addr_t tlb_addr,
 size_t size, enum dma_data_direction dir,
 enum dma_sync_target target)
 {
-   struct swiotlb *swiotlb = _swiotlb;
+   struct swiotlb *swiotlb = get_swiotlb(hwdev);
int index = (tlb_addr - swiotlb->start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = swiotlb->orig_addr[index];
 
@@ -801,6 +810,11 @@ late_initcall(swiotlb_create_default_debugfs);
 #endif
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
+bool is_swiotlb_force(struct device *dev)
+{
+   return unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dev_swiotlb;
+}
+
 static int rmem_swiotlb_device_init(struct reserved_mem *rmem,

[PATCH v4 07/14] swiotlb: Update swiotlb API to gain a struct device argument

2021-02-08 Thread Claire Chang
Introduce the get_swiotlb() getter and update all callers of
is_swiotlb_active(), is_swiotlb_buffer() and get_swiotlb_start() to gain
a struct device argument.

Signed-off-by: Claire Chang 
---
 drivers/iommu/dma-iommu.c | 12 ++--
 drivers/xen/swiotlb-xen.c |  4 ++--
 include/linux/swiotlb.h   | 10 +-
 kernel/dma/direct.c   |  8 
 kernel/dma/direct.h   |  6 +++---
 kernel/dma/swiotlb.c  | 23 +--
 6 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f659395e7959..abdbe14472cc 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -503,7 +503,7 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, 
dma_addr_t dma_addr,
 
__iommu_dma_unmap(dev, dma_addr, size);
 
-   if (unlikely(is_swiotlb_buffer(phys)))
+   if (unlikely(is_swiotlb_buffer(dev, phys)))
swiotlb_tbl_unmap_single(dev, phys, size,
iova_align(iovad, size), dir, attrs);
 }
@@ -580,7 +580,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
}
 
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-   if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys))
+   if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(dev, phys))
swiotlb_tbl_unmap_single(dev, phys, org_size,
aligned_size, dir, attrs);
 
@@ -753,7 +753,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(phys, size, dir);
 
-   if (is_swiotlb_buffer(phys))
+   if (is_swiotlb_buffer(dev, phys))
swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
 }
 
@@ -766,7 +766,7 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-   if (is_swiotlb_buffer(phys))
+   if (is_swiotlb_buffer(dev, phys))
swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE);
 
if (!dev_is_dma_coherent(dev))
@@ -787,7 +787,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 
-   if (is_swiotlb_buffer(sg_phys(sg)))
+   if (is_swiotlb_buffer(dev, sg_phys(sg)))
swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
dir, SYNC_FOR_CPU);
}
@@ -804,7 +804,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
return;
 
for_each_sg(sgl, sg, nelems, i) {
-   if (is_swiotlb_buffer(sg_phys(sg)))
+   if (is_swiotlb_buffer(dev, sg_phys(sg)))
swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
dir, SYNC_FOR_DEVICE);
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 91f8c68d1a9b..f424d46756b1 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -192,8 +192,8 @@ int __ref xen_swiotlb_init(int verbose, bool early)
/*
 * IO TLB memory already allocated. Just use it.
 */
-   if (is_swiotlb_active()) {
-   xen_io_tlb_start = phys_to_virt(get_swiotlb_start());
+   if (is_swiotlb_active(NULL)) {
+   xen_io_tlb_start = phys_to_virt(get_swiotlb_start(NULL));
goto end;
}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 041611bf3c2a..f13a52a97382 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -71,16 +71,16 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
 
-bool is_swiotlb_buffer(phys_addr_t paddr);
+bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr);
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
-bool is_swiotlb_active(void);
-phys_addr_t get_swiotlb_start(void);
+bool is_swiotlb_active(struct device *dev);
+phys_addr_t get_swiotlb_start(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long new_size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
return false;
 }
@@ -96,7 +96,7 @@ static inline size_t swiotlb_max_mapping_size(struct device 
*dev)
return SIZE_MAX;
 }
 
-static inline bool is_swiotlb_active(void)
+static inline bool is_swiotlb_active(struct device *dev)
 {
return false;
 }
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 

[PATCH v4 06/14] swiotlb: Add restricted DMA pool

2021-02-08 Thread Claire Chang
Add the initialization function to create restricted DMA pools from
matching reserved-memory nodes.

Signed-off-by: Claire Chang 
---
 include/linux/device.h |  4 ++
 kernel/dma/swiotlb.c   | 94 +-
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index 7619a84f8ce4..08d440627b93 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -415,6 +415,7 @@ struct dev_links_info {
  * @dma_pools: Dma pools (if dma'ble device).
  * @dma_mem:   Internal for coherent mem override.
  * @cma_area:  Contiguous memory area for dma allocations
+ * @dev_swiotlb: Internal for swiotlb override.
  * @archdata:  For arch-specific additions.
  * @of_node:   Associated device tree node.
  * @fwnode:Associated device node supplied by platform firmware.
@@ -517,6 +518,9 @@ struct device {
 #ifdef CONFIG_DMA_CMA
struct cma *cma_area;   /* contiguous memory area for dma
   allocations */
+#endif
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   struct swiotlb *dev_swiotlb;
 #endif
/* arch specific additions */
struct dev_archdata archdata;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index dc37951c6924..3a17451c5981 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -39,6 +39,13 @@
 #ifdef CONFIG_DEBUG_FS
 #include 
 #endif
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+#include 
+#include 
+#include 
+#include 
+#include 
+#endif
 
 #include 
 #include 
@@ -75,7 +82,8 @@ enum swiotlb_force swiotlb_force;
  *  range check to see if the memory was in fact allocated by this
  *  API.
  * @nslabs: The number of IO TLB blocks (in groups of 64) between @start 
and
- *  @end. This is command line adjustable via setup_io_tlb_npages.
+ *  @end. For default swiotlb, this is command line adjustable via
+ *  setup_io_tlb_npages.
  * @used:   The number of used IO TLB block.
  * @list:   The free list describing the number of free entries available
  *  from each index.
@@ -780,3 +788,87 @@ static int __init swiotlb_create_default_debugfs(void)
 
 late_initcall(swiotlb_create_default_debugfs);
 #endif
+
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
+   struct device *dev)
+{
+   struct swiotlb *swiotlb = rmem->priv;
+   int ret;
+
+   if (dev->dev_swiotlb)
+   return -EBUSY;
+
+   /* Since multiple devices can share the same pool, the private data,
+* swiotlb struct, will be initialized by the first device attached
+* to it.
+*/
+   if (!swiotlb) {
+   swiotlb = kzalloc(sizeof(*swiotlb), GFP_KERNEL);
+   if (!swiotlb)
+   return -ENOMEM;
+#ifdef CONFIG_ARM
+   unsigned long pfn = PHYS_PFN(reme->base);
+
+   if (!PageHighMem(pfn_to_page(pfn))) {
+   ret = -EINVAL;
+   goto cleanup;
+   }
+#endif /* CONFIG_ARM */
+
+   ret = swiotlb_init_tlb_pool(swiotlb, rmem->base, rmem->size);
+   if (ret)
+   goto cleanup;
+
+   rmem->priv = swiotlb;
+   }
+
+#ifdef CONFIG_DEBUG_FS
+   swiotlb_create_debugfs(swiotlb, rmem->name, default_swiotlb.debugfs);
+#endif /* CONFIG_DEBUG_FS */
+
+   dev->dev_swiotlb = swiotlb;
+
+   return 0;
+
+cleanup:
+   kfree(swiotlb);
+
+   return ret;
+}
+
+static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
+   struct device *dev)
+{
+   if (!dev)
+   return;
+
+#ifdef CONFIG_DEBUG_FS
+   debugfs_remove_recursive(dev->dev_swiotlb->debugfs);
+#endif /* CONFIG_DEBUG_FS */
+   dev->dev_swiotlb = NULL;
+}
+
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+   .device_init = rmem_swiotlb_device_init,
+   .device_release = rmem_swiotlb_device_release,
+};
+
+static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+{
+   unsigned long node = rmem->fdt_node;
+
+   if (of_get_flat_dt_prop(node, "reusable", NULL) ||
+   of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
+   of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
+   of_get_flat_dt_prop(node, "no-map", NULL))
+   return -EINVAL;
+
+   rmem->ops = _swiotlb_ops;
+   pr_info("Reserved memory: created device swiotlb memory pool at %pa, 
size %ld MiB\n",
+   >base, (unsigned long)rmem->size / SZ_1M);
+   return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
-- 
2.30.0.478.g8a0d178c01-goog



[PATCH v4 05/14] swiotlb: Add DMA_RESTRICTED_POOL

2021-02-08 Thread Claire Chang
Add a new kconfig symbol, DMA_RESTRICTED_POOL, for restricted DMA pool.

Signed-off-by: Claire Chang 
---
 kernel/dma/Kconfig | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 479fc145acfc..97ff9f8dd3c8 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -83,6 +83,20 @@ config SWIOTLB
bool
select NEED_DMA_MAP_STATE
 
+config DMA_RESTRICTED_POOL
+   bool "DMA Restricted Pool"
+   depends on OF && OF_RESERVED_MEM
+   select SWIOTLB
+   help
+ This enables support for restricted DMA pools which provide a level of
+ DMA memory protection on systems with limited hardware protection
+ capabilities, such as those lacking an IOMMU.
+
+ For more information see
+ 

+ and .
+ If unsure, say "n".
+
 #
 # Should be selected if we can mmap non-coherent mappings to userspace.
 # The only thing that is really required is a way to set an uncached bit
-- 
2.30.0.478.g8a0d178c01-goog



[PATCH v4 04/14] swiotlb: Refactor swiotlb_late_init_with_tbl

2021-02-08 Thread Claire Chang
Refactor swiotlb_late_init_with_tbl to make the code reusable for
restricted DMA pool initialization.

Signed-off-by: Claire Chang 
---
 kernel/dma/swiotlb.c | 65 
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 28b7bfe7a2a8..dc37951c6924 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -353,20 +353,21 @@ static void swiotlb_cleanup(void)
max_segment = 0;
 }
 
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+static int swiotlb_init_tlb_pool(struct swiotlb *swiotlb, phys_addr_t start,
+   size_t size)
 {
-   struct swiotlb *swiotlb = _swiotlb;
-   unsigned long i, bytes;
+   unsigned long i;
+   void *vaddr = phys_to_virt(start);
 
-   bytes = nslabs << IO_TLB_SHIFT;
+   size = ALIGN(size, 1 << IO_TLB_SHIFT);
+   swiotlb->nslabs = size >> IO_TLB_SHIFT;
+   swiotlb->nslabs = ALIGN(swiotlb->nslabs, IO_TLB_SEGSIZE);
 
-   swiotlb->nslabs = nslabs;
-   swiotlb->start = virt_to_phys(tlb);
-   swiotlb->end = swiotlb->start + bytes;
+   swiotlb->start = start;
+   swiotlb->end = swiotlb->start + size;
 
-   set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-   memset(tlb, 0, bytes);
+   set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
+   memset(vaddr, 0, size);
 
/*
 * Allocate and initialize the free list array.  This array is used
@@ -390,13 +391,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
swiotlb->orig_addr[i] = INVALID_PHYS_ADDR;
}
swiotlb->index = 0;
-   no_iotlb_memory = false;
-
-   swiotlb_print_info();
 
-   late_alloc = 1;
-
-   swiotlb_set_max_segment(swiotlb->nslabs << IO_TLB_SHIFT);
spin_lock_init(>lock);
 
return 0;
@@ -410,6 +405,27 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
return -ENOMEM;
 }
 
+int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+{
+   struct swiotlb *swiotlb = _swiotlb;
+   unsigned long bytes = nslabs << IO_TLB_SHIFT;
+   int ret;
+
+   ret = swiotlb_init_tlb_pool(swiotlb, virt_to_phys(tlb), bytes);
+   if (ret)
+   return ret;
+
+   no_iotlb_memory = false;
+
+   swiotlb_print_info();
+
+   late_alloc = 1;
+
+   swiotlb_set_max_segment(bytes);
+
+   return 0;
+}
+
 void __init swiotlb_exit(void)
 {
struct swiotlb *swiotlb = _swiotlb;
@@ -747,17 +763,20 @@ phys_addr_t get_swiotlb_start(void)
 }
 
 #ifdef CONFIG_DEBUG_FS
-
-static int __init swiotlb_create_debugfs(void)
+static void swiotlb_create_debugfs(struct swiotlb *swiotlb, const char *name,
+  struct dentry *node)
 {
-   struct swiotlb *swiotlb = _swiotlb;
-
-   swiotlb->debugfs = debugfs_create_dir("swiotlb", NULL);
+   swiotlb->debugfs = debugfs_create_dir(name, node);
debugfs_create_ulong("io_tlb_nslabs", 0400, swiotlb->debugfs, 
>nslabs);
debugfs_create_ulong("io_tlb_used", 0400, swiotlb->debugfs, 
>used);
-   return 0;
 }
 
-late_initcall(swiotlb_create_debugfs);
+static int __init swiotlb_create_default_debugfs(void)
+{
+   swiotlb_create_debugfs(_swiotlb, "swiotlb", NULL);
+
+   return 0;
+}
 
+late_initcall(swiotlb_create_default_debugfs);
 #endif
-- 
2.30.0.478.g8a0d178c01-goog



[PATCH v4 03/14] swiotlb: Add struct swiotlb

2021-02-08 Thread Claire Chang
Added a new struct, swiotlb, as the IO TLB memory pool descriptor and
moved relevant global variables into that struct.
This will be useful later to allow for restricted DMA pool.

Signed-off-by: Claire Chang 
---
 kernel/dma/swiotlb.c | 327 +++
 1 file changed, 172 insertions(+), 155 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 678490d39e55..28b7bfe7a2a8 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -61,33 +61,43 @@
  * allocate a contiguous 1MB, we're probably in trouble anyway.
  */
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
 
 enum swiotlb_force swiotlb_force;
 
 /*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by 
this
- * API.
- */
-static phys_addr_t io_tlb_start, io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * The number of used IO TLB block
- */
-static unsigned long io_tlb_used;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
+ * struct swiotlb - Software IO TLB Memory Pool Descriptor
+ *
+ * @start:  The start address of the swiotlb memory pool. Used to do a 
quick
+ *  range check to see if the memory was in fact allocated by this
+ *  API.
+ * @end:The end address of the swiotlb memory pool. Used to do a quick
+ *  range check to see if the memory was in fact allocated by this
+ *  API.
+ * @nslabs: The number of IO TLB blocks (in groups of 64) between @start 
and
+ *  @end. This is command line adjustable via setup_io_tlb_npages.
+ * @used:   The number of used IO TLB block.
+ * @list:   The free list describing the number of free entries available
+ *  from each index.
+ * @index:  The index to start searching in the next round.
+ * @orig_addr:  The original address corresponding to a mapped entry for the
+ *  sync operations.
+ * @lock:   The lock to protect the above data structures in the map and
+ *  unmap calls.
+ * @debugfs:The dentry to debugfs.
  */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
+struct swiotlb {
+   phys_addr_t start;
+   phys_addr_t end;
+   unsigned long nslabs;
+   unsigned long used;
+   unsigned int *list;
+   unsigned int index;
+   phys_addr_t *orig_addr;
+   spinlock_t lock;
+   struct dentry *debugfs;
+};
+static struct swiotlb default_swiotlb;
 
 /*
  * Max segment that we can provide which (if pages are contingous) will
@@ -95,27 +105,17 @@ static unsigned int io_tlb_index;
  */
 static unsigned int max_segment;
 
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
 static int late_alloc;
 
 static int __init
 setup_io_tlb_npages(char *str)
 {
+   struct swiotlb *swiotlb = _swiotlb;
+
if (isdigit(*str)) {
-   io_tlb_nslabs = simple_strtoul(str, , 0);
+   swiotlb->nslabs = simple_strtoul(str, , 0);
/* avoid tail segment of size < IO_TLB_SEGSIZE */
-   io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+   swiotlb->nslabs = ALIGN(swiotlb->nslabs, IO_TLB_SEGSIZE);
}
if (*str == ',')
++str;
@@ -123,7 +123,7 @@ setup_io_tlb_npages(char *str)
swiotlb_force = SWIOTLB_FORCE;
} else if (!strcmp(str, "noforce")) {
swiotlb_force = SWIOTLB_NO_FORCE;
-   io_tlb_nslabs = 1;
+   swiotlb->nslabs = 1;
}
 
return 0;
@@ -134,7 +134,7 @@ static bool no_iotlb_memory;
 
 unsigned long swiotlb_nr_tbl(void)
 {
-   return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs;
+   return unlikely(no_iotlb_memory) ? 0 : default_swiotlb.nslabs;
 }
 EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 
@@ -156,13 +156,14 @@ unsigned long swiotlb_size_or_default(void)
 {
unsigned long size;
 
-   size = io_tlb_nslabs << IO_TLB_SHIFT;
+   size = default_swiotlb.nslabs << IO_TLB_SHIFT;
 
return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
 void __init swiotlb_adjust_size(unsigned long new_size)
 {
+   struct swiotlb *swiotlb = _swiotlb;
unsigned long size;
 
/*
@@ -170,10 +171,10 @@ void __init swiotlb_adjust_size(unsigned long new_size)
 * architectures such as those supporting memory encryption to
 * adjust/expand SWIOTLB size for their use.
 */
-  

[PATCH v4 02/14] swiotlb: Move is_swiotlb_buffer() to swiotlb.c

2021-02-08 Thread Claire Chang
Move is_swiotlb_buffer() to swiotlb.c and make io_tlb_{start,end}
static, so we can entirely hide struct swiotlb inside of swiotlb.c in
the following patches.

Signed-off-by: Claire Chang 
---
 include/linux/swiotlb.h | 7 +--
 kernel/dma/swiotlb.c| 7 ++-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 83200f3b042a..041611bf3c2a 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -70,13 +70,8 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
-extern phys_addr_t io_tlb_start, io_tlb_end;
-
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
-{
-   return paddr >= io_tlb_start && paddr < io_tlb_end;
-}
 
+bool is_swiotlb_buffer(phys_addr_t paddr);
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e180211f6ad9..678490d39e55 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -69,7 +69,7 @@ enum swiotlb_force swiotlb_force;
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by 
this
  * API.
  */
-phys_addr_t io_tlb_start, io_tlb_end;
+static phys_addr_t io_tlb_start, io_tlb_end;
 
 /*
  * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
@@ -719,6 +719,11 @@ bool is_swiotlb_active(void)
return io_tlb_end != 0;
 }
 
+bool is_swiotlb_buffer(phys_addr_t paddr)
+{
+   return paddr >= io_tlb_start && paddr < io_tlb_end;
+}
+
 phys_addr_t get_swiotlb_start(void)
 {
return io_tlb_start;
-- 
2.30.0.478.g8a0d178c01-goog



[PATCH v4 01/14] swiotlb: Remove external access to io_tlb_start

2021-02-08 Thread Claire Chang
Add a new function, get_swiotlb_start(), and remove external access to
io_tlb_start, so we can entirely hide struct swiotlb inside of swiotlb.c
in the following patches.

Signed-off-by: Claire Chang 
---
 arch/powerpc/platforms/pseries/svm.c | 4 ++--
 drivers/xen/swiotlb-xen.c| 4 ++--
 include/linux/swiotlb.h  | 1 +
 kernel/dma/swiotlb.c | 5 +
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/svm.c 
b/arch/powerpc/platforms/pseries/svm.c
index 7b739cc7a8a9..c10c51d49f3d 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -55,8 +55,8 @@ void __init svm_swiotlb_init(void)
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
return;
 
-   if (io_tlb_start)
-   memblock_free_early(io_tlb_start,
+   if (vstart)
+   memblock_free_early(vstart,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
panic("SVM: Cannot allocate SWIOTLB buffer");
 }
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 2b385c1b4a99..91f8c68d1a9b 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -192,8 +192,8 @@ int __ref xen_swiotlb_init(int verbose, bool early)
/*
 * IO TLB memory already allocated. Just use it.
 */
-   if (io_tlb_start != 0) {
-   xen_io_tlb_start = phys_to_virt(io_tlb_start);
+   if (is_swiotlb_active()) {
+   xen_io_tlb_start = phys_to_virt(get_swiotlb_start());
goto end;
}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index d9c9fc9ca5d2..83200f3b042a 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -81,6 +81,7 @@ void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
 bool is_swiotlb_active(void);
+phys_addr_t get_swiotlb_start(void);
 void __init swiotlb_adjust_size(unsigned long new_size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7c42df6e6100..e180211f6ad9 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -719,6 +719,11 @@ bool is_swiotlb_active(void)
return io_tlb_end != 0;
 }
 
+phys_addr_t get_swiotlb_start(void)
+{
+   return io_tlb_start;
+}
+
 #ifdef CONFIG_DEBUG_FS
 
 static int __init swiotlb_create_debugfs(void)
--

This can be dropped if Christoph's swiotlb cleanups are landed.
https://lore.kernel.org/linux-iommu/20210207160934.2955931-1-...@lst.de/T/#m7124f29b6076d462101fcff6433295157621da09
 

2.30.0.478.g8a0d178c01-goog



[PATCH v4 00/14] Restricted DMA

2021-02-08 Thread Claire Chang
This series implements mitigations for lack of DMA access control on
systems without an IOMMU, which could result in the DMA accessing the
system memory at unexpected times and/or unexpected addresses, possibly
leading to data leakage or corruption.

For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
not behind an IOMMU. As PCI-e, by design, gives the device full access to
system memory, a vulnerability in the Wi-Fi firmware could easily escalate
to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
full chain of exploits; [2], [3]).

To mitigate the security concerns, we introduce restricted DMA. Restricted
DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
specially allocated region and does memory allocation from the same region.
The feature on its own provides a basic level of protection against the DMA
overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system needs
to provide a way to restrict the DMA to a predefined memory region (this is
usually done at firmware level, e.g. MPU in ATF on some ARM platforms [4]).

[1a] 
https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
[1b] 
https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
[2] https://blade.tencent.com/en/advisories/qualpwn/
[3] 
https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/
[4] 
https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/mediatek/mt8183/drivers/emi_mpu/emi_mpu.c#L132

Claire Chang (14):
  swiotlb: Remove external access to io_tlb_start
  swiotlb: Move is_swiotlb_buffer() to swiotlb.c
  swiotlb: Add struct swiotlb
  swiotlb: Refactor swiotlb_late_init_with_tbl
  swiotlb: Add DMA_RESTRICTED_POOL
  swiotlb: Add restricted DMA pool
  swiotlb: Update swiotlb API to gain a struct device argument
  swiotlb: Use restricted DMA pool if available
  swiotlb: Refactor swiotlb_tbl_{map,unmap}_single
  dma-direct: Add a new wrapper __dma_direct_free_pages()
  swiotlb: Add is_dev_swiotlb_force()
  swiotlb: Add restricted DMA alloc/free support.
  dt-bindings: of: Add restricted DMA pool
  of: Add plumbing for restricted DMA pool

 .../reserved-memory/reserved-memory.txt   |  24 +
 arch/powerpc/platforms/pseries/svm.c  |   4 +-
 drivers/iommu/dma-iommu.c |  12 +-
 drivers/of/address.c  |  25 +
 drivers/of/device.c   |   3 +
 drivers/of/of_private.h   |   5 +
 drivers/xen/swiotlb-xen.c |   4 +-
 include/linux/device.h|   4 +
 include/linux/swiotlb.h   |  32 +-
 kernel/dma/Kconfig|  14 +
 kernel/dma/direct.c   |  51 +-
 kernel/dma/direct.h   |   8 +-
 kernel/dma/swiotlb.c  | 636 --
 13 files changed, 582 insertions(+), 240 deletions(-)

-- 

v4:
  - Fix spinlock bad magic
  - Use rmem->name for debugfs entry
  - Address the comments in v3

v3:
  Using only one reserved memory region for both streaming DMA and memory
  allocation.
  https://lore.kernel.org/patchwork/cover/1360992/

v2:
  Building on top of swiotlb.
  https://lore.kernel.org/patchwork/cover/1280705/

v1:
  Using dma_map_ops.
  https://lore.kernel.org/patchwork/cover/1271660/

2.30.0.478.g8a0d178c01-goog



Re: [PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe

2021-02-08 Thread Christophe Leroy




Le 09/02/2021 à 02:11, Nicholas Piggin a écrit :

Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:

regs->softe doesn't exist on PPC32.

Add irq_soft_mask_regs_set_state() helper to set regs->softe.
This helper will void on PPC32.

Signed-off-by: Christophe Leroy 
---


You could do the same with the kuap_ functions to change some ifdefs
to IS_ENABLED.

That's just my preference but if you prefer this way I guess that's
okay.




That's also my preference on the long term.

Here it is ephemeral, I have a follow up series implementing interrupt exit/entry in C and getting 
rid of all the assembly kuap hence getting rid of those ifdefs.


The issue I see when using IS_ENABLED() is that you have to indent to the right, then you interfere 
with the file history and 'git blame'


Thanks for reviewing my series and looking forward to your feedback on my series on the interrupt 
entry/exit that I will likely release later today.


Christophe


Re: [PATCH v5 17/22] powerpc/syscall: Do not check unsupported scv vector on PPC32

2021-02-08 Thread Christophe Leroy




Le 09/02/2021 à 03:00, Nicholas Piggin a écrit :

Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:

Only PPC64 has scv. No need to check the 0x7ff0 trap on PPC32.
For that, add a helper trap_is_unsupported_scv() similar to
trap_is_scv().

And ignore the scv parameter in syscall_exit_prepare (Save 14 cycles
346 => 332 cycles)

Signed-off-by: Christophe Leroy 
---
v5: Added a helper trap_is_unsupported_scv()
---
  arch/powerpc/include/asm/ptrace.h | 5 +
  arch/powerpc/kernel/entry_32.S| 1 -
  arch/powerpc/kernel/interrupt.c   | 7 +--
  3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index 58f9dc060a7b..2c842b11a924 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -229,6 +229,11 @@ static inline bool trap_is_scv(struct pt_regs *regs)
return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000);
  }
  
+static inline bool trap_is_unsupported_scv(struct pt_regs *regs)

+{
+   return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0);
+}


This change is good.


+
  static inline bool trap_is_syscall(struct pt_regs *regs)
  {
return (trap_is_scv(regs) || TRAP(regs) == 0xc00);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index cffe58e63356..7c824e8928d0 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -344,7 +344,6 @@ transfer_to_syscall:
  
  ret_from_syscall:

addir4,r1,STACK_FRAME_OVERHEAD
-   li  r5,0
bl  syscall_exit_prepare


For this one, I think it would be nice to do the "right" thing and make
the function prototypes different on !64S. They could then declare a
local const bool scv = 0.

We could have syscall_exit_prepare and syscall_exit_prepare_maybe_scv
or something like that, 64s can use the latter one and the former can be
a wrapper that passes constant 0 for scv. Then we don't have different
prototypes for the same function, but you just have to make the 32-bit
version static inline and the 64-bit version exported to asm.


You can't call a static inline function from ASM, I don't understand you.

What is wrong for you really here ? Is that the fact we leave scv random, or is that the below 
IS_ENABLED() ?


I don't mind keeping the 'li r5,0' before calling the function if you find it cleaner, the real 
performance gain is with setting scv to 0 below for PPC32 (and maybe it should be set to zero for 
book3e/64 too ?).


Other solution would be to do replace (!scv) by (!scv || !IS_ENABLED(CONFIG_PPC_BOOK3S_64)) in the 
two places it is used in syscall_exit_prepare().


Any preference ?

Thanks
Christophe


@@ -224,6 +224,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
unsigned long ti_flags;
unsigned long ret = 0;
  
+	if (IS_ENABLED(CONFIG_PPC32))

+   scv = 0;
+
CT_WARN_ON(ct_state() == CONTEXT_USER);
  
  #ifdef CONFIG_PPC64

--
2.25.0




Re: [PATCH v5 09/22] powerpc/syscall: Make interrupt.c buildable on PPC32

2021-02-08 Thread Christophe Leroy




Le 09/02/2021 à 02:27, Nicholas Piggin a écrit :

Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:

To allow building interrupt.c on PPC32, ifdef out specific PPC64
code or use helpers which are available on both PP32 and PPC64

Modify Makefile to always build interrupt.o

Signed-off-by: Christophe Leroy 
---
v5:
- Also for interrupt exit preparation
- Opted out kuap related code, ppc32 keeps it in ASM for the time being
---
  arch/powerpc/kernel/Makefile|  4 ++--
  arch/powerpc/kernel/interrupt.c | 31 ---
  2 files changed, 26 insertions(+), 9 deletions(-)




diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index d6be4f9a67e5..2dac4d2bb1cf 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
BUG_ON(!FULL_REGS(regs));
-   BUG_ON(regs->softe != IRQS_ENABLED);
+   BUG_ON(arch_irq_disabled_regs(regs));
  
  #ifdef CONFIG_PPC_PKEY

if (mmu_has_feature(MMU_FTR_PKEY)) {
@@ -65,7 +65,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
isync();
} else
  #endif
+#ifdef CONFIG_PPC64
kuap_check_amr();
+#endif


Wouldn't mind trying to get rid of these ifdefs at some point, but
there's some kuap / keys changes going on recently so I'm happy enough
to let this settle then look at whether we can refactor.


I have a follow up series that implements interrupts entries/exits in C and that removes all kuap 
assembly, I will likely release it as RFC later today.




  
  	account_cpu_user_entry();
  
@@ -318,7 +323,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,

return ret;
  }
  
-#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */

+#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */
  notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, 
unsigned long msr)
  {
  #ifdef CONFIG_PPC_BOOK3E


Why are you building this for 32? I don't mind if it's just to keep
things similar and make it build for now, but you're not using it yet,
right?


The series using that will follow, I thought it would be worth doing this at 
once.

  
Reviewed-by: Nicholas Piggin 




Re: [PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe

2021-02-08 Thread Christophe Leroy




Le 09/02/2021 à 02:11, Nicholas Piggin a écrit :

Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:

regs->softe doesn't exist on PPC32.

Add irq_soft_mask_regs_set_state() helper to set regs->softe.
This helper will void on PPC32.

Signed-off-by: Christophe Leroy 
---
  arch/powerpc/include/asm/hw_irq.h | 11 +--
  1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 614957f74cee..ed0c3b049dfd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -38,6 +38,8 @@
  #define PACA_IRQ_MUST_HARD_MASK   (PACA_IRQ_EE)
  #endif
  
+#endif /* CONFIG_PPC64 */

+
  /*
   * flags for paca->irq_soft_mask
   */
@@ -46,8 +48,6 @@
  #define IRQS_PMI_DISABLED 2
  #define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED)
  
-#endif /* CONFIG_PPC64 */

-
  #ifndef __ASSEMBLY__
  
  #ifdef CONFIG_PPC64

@@ -287,6 +287,10 @@ extern void irq_set_pending_from_srr1(unsigned long srr1);
  
  extern void force_external_irq_replay(void);
  
+static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)

+{
+   regs->softe = val;
+}
  #else /* CONFIG_PPC64 */
  
  static inline unsigned long arch_local_save_flags(void)

@@ -355,6 +359,9 @@ static inline bool arch_irq_disabled_regs(struct pt_regs 
*regs)
  
  static inline void may_hard_irq_enable(void) { }
  
+static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)

+{
+}
  #endif /* CONFIG_PPC64 */
  
  #define ARCH_IRQ_INIT_FLAGS	IRQ_NOREQUEST


What I don't like about this where you use it is it kind of pollutes
the ppc32 path with this function which is not valid to use.

I would prefer if you had this purely so it could compile with:

   if (IS_ENABLED(CONFIG_PPC64)))
   irq_soft_mask_regs_set_state(regs, blah);

And then you could make the ppc32 cause a link error if it did not
get eliminated at compile time (e.g., call an undefined function).

You could do the same with the kuap_ functions to change some ifdefs
to IS_ENABLED.

That's just my preference but if you prefer this way I guess that's
okay.


I see you didn't change your mind since last April :)

I'll see what I can do.

Christophe


Re: [PATCH v7 32/42] powerpc/64: context tracking move to interrupt wrappers

2021-02-08 Thread Christophe Leroy




Le 30/01/2021 à 14:08, Nicholas Piggin a écrit :

This moves exception_enter/exit calls to wrapper functions for
synchronous interrupts. More interrupt handlers are covered by
this than previously.


Why did you enclose everything in #ifdef CONFIG_PPC64 ? As far as I understand, before this patch 
exception_enter() and exception_exit() are called also on PPC32.


Christophe




Signed-off-by: Nicholas Piggin 
---
  arch/powerpc/include/asm/interrupt.h  |  9 
  arch/powerpc/kernel/traps.c   | 74 ++-
  arch/powerpc/mm/book3s64/hash_utils.c |  3 --
  arch/powerpc/mm/fault.c   |  9 +---
  4 files changed, 27 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index 488bdd5bd922..e65ce3e2b071 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -7,10 +7,16 @@
  #include 
  
  struct interrupt_state {

+#ifdef CONFIG_PPC64
+   enum ctx_state ctx_state;
+#endif
  };
  
  static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)

  {
+#ifdef CONFIG_PPC64
+   state->ctx_state = exception_enter();
+#endif
  }
  
  /*

@@ -29,6 +35,9 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
   */
  static inline void interrupt_exit_prepare(struct pt_regs *regs, struct 
interrupt_state *state)
  {
+#ifdef CONFIG_PPC64
+   exception_exit(state->ctx_state);
+#endif
  }
  
  static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index da488e62fb5f..21fd14828827 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1087,41 +1087,28 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
  
  DEFINE_INTERRUPT_HANDLER(unknown_exception)

  {
-   enum ctx_state prev_state = exception_enter();
-
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
   regs->nip, regs->msr, regs->trap);
  
  	_exception(SIGTRAP, regs, TRAP_UNK, 0);

-
-   exception_exit(prev_state);
  }
  
  DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)

  {
-   enum ctx_state prev_state = exception_enter();
-
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
   regs->nip, regs->msr, regs->trap);
  
  	_exception(SIGTRAP, regs, TRAP_UNK, 0);

-
-   exception_exit(prev_state);
  }
  
  DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)

  {
-   enum ctx_state prev_state = exception_enter();
-
if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
-   goto bail;
+   return;
if (debugger_iabr_match(regs))
-   goto bail;
+   return;
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
-
-bail:
-   exception_exit(prev_state);
  }
  
  DEFINE_INTERRUPT_HANDLER(RunModeException)

@@ -1131,8 +1118,6 @@ DEFINE_INTERRUPT_HANDLER(RunModeException)
  
  DEFINE_INTERRUPT_HANDLER(single_step_exception)

  {
-   enum ctx_state prev_state = exception_enter();
-
clear_single_step(regs);
clear_br_trace(regs);
  
@@ -1141,14 +1126,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
  
  	if (notify_die(DIE_SSTEP, "single_step", regs, 5,

5, SIGTRAP) == NOTIFY_STOP)
-   goto bail;
+   return;
if (debugger_sstep(regs))
-   goto bail;
+   return;
  
  	_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);

-
-bail:
-   exception_exit(prev_state);
  }
  NOKPROBE_SYMBOL(single_step_exception);
  
@@ -1476,7 +1458,6 @@ static inline int emulate_math(struct pt_regs *regs) { return -1; }
  
  DEFINE_INTERRUPT_HANDLER(program_check_exception)

  {
-   enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
  
  	/* We can now get here via a FP Unavailable exception if the core

@@ -1485,22 +1466,22 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
if (reason & REASON_FP) {
/* IEEE FP exception */
parse_fpe(regs);
-   goto bail;
+   return;
}
if (reason & REASON_TRAP) {
unsigned long bugaddr;
/* Debugger is first in line to stop recursive faults in
 * rcu_lock, notify_die, or atomic_notifier_call_chain */
if (debugger_bpt(regs))
-   goto bail;
+   return;
  
  		if (kprobe_handler(regs))

-   goto bail;
+   return;
  
  		/* trap exception */

if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
== NOTIFY_STOP)
-   goto bail;
+  

Re: [PATCH v5 20/22] powerpc/syscall: Avoid storing 'current' in another pointer

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> By saving the pointer pointing to thread_info.flags, gcc copies r2
> in a non-volatile register.
> 
> We know 'current' doesn't change, so avoid that intermediaite pointer.
> 
> Reduces null_syscall benchmark by 2 cycles (322 => 320 cycles)
> 
> On PPC64, gcc seems to know that 'current' is not changing, and it keeps
> it in a non volatile register to avoid multiple read of 'current' in paca.
> 
> Signed-off-by: Christophe Leroy 

What if you did this?

---
 arch/powerpc/include/asm/current.h | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/current.h 
b/arch/powerpc/include/asm/current.h
index bbfb94800415..59ab327972a5 100644
--- a/arch/powerpc/include/asm/current.h
+++ b/arch/powerpc/include/asm/current.h
@@ -23,16 +23,19 @@ static inline struct task_struct *get_current(void)
 
return task;
 }
-#define currentget_current()
 
 #else
 
-/*
- * We keep `current' in r2 for speed.
- */
-register struct task_struct *current asm ("r2");
+static inline struct task_struct *get_current(void)
+{
+   register struct task_struct *task asm ("r2");
+
+   return task;
+}
 
 #endif
 
+#define currentget_current()
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_CURRENT_H */
-- 


Re: [PATCH v5 19/22] powerpc/syscall: Optimise checks in beginning of system_call_exception()

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> Combine all tests of regs->msr into a single logical one.

Okay by me unless we choose to do the config option and put these all 
under it. I think I would prefer that because sometimes the registers
are in a state you can't easily see what the values in the expression
were. In this case it doesn't matter so much because they should be in
regs in the interrupt frame.

Thanks,
Nick

> 
> Before the patch:
> 
>0: 81 6a 00 84 lwz r11,132(r10)
>4: 90 6a 00 88 stw r3,136(r10)
>8: 69 60 00 02 xorir0,r11,2
>c: 54 00 ff fe rlwinm  r0,r0,31,31,31
>   10: 0f 00 00 00 twnei   r0,0
>   14: 69 63 40 00 xorir3,r11,16384
>   18: 54 63 97 fe rlwinm  r3,r3,18,31,31
>   1c: 0f 03 00 00 twnei   r3,0
>   20: 69 6b 80 00 xorir11,r11,32768
>   24: 55 6b 8f fe rlwinm  r11,r11,17,31,31
>   28: 0f 0b 00 00 twnei   r11,0
> 
> After the patch:
> 
>0: 81 6a 00 84 lwz r11,132(r10)
>4: 90 6a 00 88 stw r3,136(r10)
>8: 7d 6b 58 f8 not r11,r11
>c: 71 6b c0 02 andi.   r11,r11,49154
>   10: 0f 0b 00 00 twnei   r11,0
> 
> 6 cycles less on powerpc 8xx (328 => 322 cycles).
> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/kernel/interrupt.c | 10 +++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 55e1aa18cdb9..8c38e8c95be2 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -28,6 +28,7 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>  unsigned long r0, struct pt_regs *regs)
>  {
>   syscall_fn f;
> + unsigned long expected_msr;
>  
>   regs->orig_gpr3 = r3;
>  
> @@ -39,10 +40,13 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>  
>   trace_hardirqs_off(); /* finish reconciling */
>  
> + expected_msr = MSR_PR;
>   if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
> - BUG_ON(!(regs->msr & MSR_RI));
> - BUG_ON(!(regs->msr & MSR_PR));
> - BUG_ON(arch_irq_disabled_regs(regs));
> + expected_msr |= MSR_RI;
> + if (IS_ENABLED(CONFIG_PPC32))
> + expected_msr |= MSR_EE;
> + BUG_ON((regs->msr & expected_msr) ^ expected_msr);
> + BUG_ON(IS_ENABLED(CONFIG_PPC64) && arch_irq_disabled_regs(regs));
>  
>  #ifdef CONFIG_PPC_PKEY
>   if (mmu_has_feature(MMU_FTR_PKEY)) {
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 18/22] powerpc/syscall: Remove FULL_REGS verification in system_call_exception

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> For book3s/64, FULL_REGS() is 'true' at all time, so the test voids.
> For others, non volatile registers are saved inconditionally.
> 
> So the verification is pointless.
> 
> Should one fail to do it, it would anyway be caught by the
> CHECK_FULL_REGS() in copy_thread() as we have removed the
> special versions ppc_fork() and friends.
> 
> null_syscall benchmark reduction 4 cycles (332 => 328 cycles)

I wonder if we rather make a CONFIG option for a bunch of these simpler
debug checks here (and also in interrupt exit, wrappers, etc) rather
than remove them entirely.

Thanks,
Nick

> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/kernel/interrupt.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 8fafca727b8b..55e1aa18cdb9 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -42,7 +42,6 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>   if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
>   BUG_ON(!(regs->msr & MSR_RI));
>   BUG_ON(!(regs->msr & MSR_PR));
> - BUG_ON(!FULL_REGS(regs));
>   BUG_ON(arch_irq_disabled_regs(regs));
>  
>  #ifdef CONFIG_PPC_PKEY
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 17/22] powerpc/syscall: Do not check unsupported scv vector on PPC32

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> Only PPC64 has scv. No need to check the 0x7ff0 trap on PPC32.
> For that, add a helper trap_is_unsupported_scv() similar to
> trap_is_scv().
> 
> And ignore the scv parameter in syscall_exit_prepare (Save 14 cycles
> 346 => 332 cycles)
> 
> Signed-off-by: Christophe Leroy 
> ---
> v5: Added a helper trap_is_unsupported_scv()
> ---
>  arch/powerpc/include/asm/ptrace.h | 5 +
>  arch/powerpc/kernel/entry_32.S| 1 -
>  arch/powerpc/kernel/interrupt.c   | 7 +--
>  3 files changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/ptrace.h 
> b/arch/powerpc/include/asm/ptrace.h
> index 58f9dc060a7b..2c842b11a924 100644
> --- a/arch/powerpc/include/asm/ptrace.h
> +++ b/arch/powerpc/include/asm/ptrace.h
> @@ -229,6 +229,11 @@ static inline bool trap_is_scv(struct pt_regs *regs)
>   return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000);
>  }
>  
> +static inline bool trap_is_unsupported_scv(struct pt_regs *regs)
> +{
> + return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0);
> +}

This change is good.

> +
>  static inline bool trap_is_syscall(struct pt_regs *regs)
>  {
>   return (trap_is_scv(regs) || TRAP(regs) == 0xc00);
> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
> index cffe58e63356..7c824e8928d0 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -344,7 +344,6 @@ transfer_to_syscall:
>  
>  ret_from_syscall:
>   addir4,r1,STACK_FRAME_OVERHEAD
> - li  r5,0
>   bl  syscall_exit_prepare

For this one, I think it would be nice to do the "right" thing and make 
the function prototypes different on !64S. They could then declare a
local const bool scv = 0.

We could have syscall_exit_prepare and syscall_exit_prepare_maybe_scv
or something like that, 64s can use the latter one and the former can be
a wrapper that passes constant 0 for scv. Then we don't have different
prototypes for the same function, but you just have to make the 32-bit
version static inline and the 64-bit version exported to asm.

Thanks,
Nick

>  #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
>   /* If the process has its own DBCR0 value, load it up.  The internal
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 205902052112..8fafca727b8b 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -88,7 +88,7 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>   local_irq_enable();
>  
>   if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
> - if (unlikely(regs->trap == 0x7ff0)) {
> + if (unlikely(trap_is_unsupported_scv(regs))) {
>   /* Unsupported scv vector */
>   _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
>   return regs->gpr[3];
> @@ -111,7 +111,7 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>   r8 = regs->gpr[8];
>  
>   } else if (unlikely(r0 >= NR_syscalls)) {
> - if (unlikely(regs->trap == 0x7ff0)) {
> + if (unlikely(trap_is_unsupported_scv(regs))) {
>   /* Unsupported scv vector */
>   _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
>   return regs->gpr[3];
> @@ -224,6 +224,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long 
> r3,
>   unsigned long ti_flags;
>   unsigned long ret = 0;
>  
> + if (IS_ENABLED(CONFIG_PPC32))
> + scv = 0;
> +
>   CT_WARN_ON(ct_state() == CONTEXT_USER);
>  
>  #ifdef CONFIG_PPC64
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 16/22] powerpc/syscall: Avoid stack frame in likely part of system_call_exception()

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> When r3 is not modified, reload it from regs->orig_r3 to free
> volatile registers. This avoids a stack frame for the likely part
> of system_call_exception()

This doesn't on my 64s build, but it does reduce one non volatile
register save/restore. With quite a bit more register pressure
reduction 64s can avoid the stack frame as well.

It's a cool trick but quite code and compiler specific so I don't know 
how worthwhile it is to keep considering we're calling out into random
kernel C code after this.

Maybe just keep it PPC32 specific for the moment, will have to do more
tuning for 64 and we have other stuff to do there first.

If you are happy to make it 32-bit only then

Reviewed-by: Nicholas Piggin 

> 
> Before the patch:
> 
> c000b4d4 :
> c000b4d4: 7c 08 02 a6 mflrr0
> c000b4d8: 94 21 ff e0 stwur1,-32(r1)
> c000b4dc: 93 e1 00 1c stw r31,28(r1)
> c000b4e0: 90 01 00 24 stw r0,36(r1)
> c000b4e4: 90 6a 00 88 stw r3,136(r10)
> c000b4e8: 81 6a 00 84 lwz r11,132(r10)
> c000b4ec: 69 6b 00 02 xorir11,r11,2
> c000b4f0: 55 6b ff fe rlwinm  r11,r11,31,31,31
> c000b4f4: 0f 0b 00 00 twnei   r11,0
> c000b4f8: 81 6a 00 a0 lwz r11,160(r10)
> c000b4fc: 55 6b 07 fe clrlwi  r11,r11,31
> c000b500: 0f 0b 00 00 twnei   r11,0
> c000b504: 7c 0c 42 e6 mftbr0
> c000b508: 83 e2 00 08 lwz r31,8(r2)
> c000b50c: 81 82 00 28 lwz r12,40(r2)
> c000b510: 90 02 00 24 stw r0,36(r2)
> c000b514: 7d 8c f8 50 subfr12,r12,r31
> c000b518: 7c 0c 02 14 add r0,r12,r0
> c000b51c: 90 02 00 08 stw r0,8(r2)
> c000b520: 7c 10 13 a6 mtspr   80,r0
> c000b524: 81 62 00 70 lwz r11,112(r2)
> c000b528: 71 60 86 91 andi.   r0,r11,34449
> c000b52c: 40 82 00 34 bne c000b560 
> c000b530: 2b 89 01 b6 cmplwi  cr7,r9,438
> c000b534: 41 9d 00 64 bgt cr7,c000b598 
> 
> c000b538: 3d 40 c0 5c lis r10,-16292
> c000b53c: 55 29 10 3a rlwinm  r9,r9,2,0,29
> c000b540: 39 4a 41 e8 addir10,r10,16872
> c000b544: 80 01 00 24 lwz r0,36(r1)
> c000b548: 7d 2a 48 2e lwzxr9,r10,r9
> c000b54c: 7c 08 03 a6 mtlrr0
> c000b550: 7d 29 03 a6 mtctr   r9
> c000b554: 83 e1 00 1c lwz r31,28(r1)
> c000b558: 38 21 00 20 addir1,r1,32
> c000b55c: 4e 80 04 20 bctr
> 
> After the patch:
> 
> c000b4d4 :
> c000b4d4: 81 6a 00 84 lwz r11,132(r10)
> c000b4d8: 90 6a 00 88 stw r3,136(r10)
> c000b4dc: 69 6b 00 02 xorir11,r11,2
> c000b4e0: 55 6b ff fe rlwinm  r11,r11,31,31,31
> c000b4e4: 0f 0b 00 00 twnei   r11,0
> c000b4e8: 80 6a 00 a0 lwz r3,160(r10)
> c000b4ec: 54 63 07 fe clrlwi  r3,r3,31
> c000b4f0: 0f 03 00 00 twnei   r3,0
> c000b4f4: 7d 6c 42 e6 mftbr11
> c000b4f8: 81 82 00 08 lwz r12,8(r2)
> c000b4fc: 80 02 00 28 lwz r0,40(r2)
> c000b500: 91 62 00 24 stw r11,36(r2)
> c000b504: 7c 00 60 50 subfr0,r0,r12
> c000b508: 7d 60 5a 14 add r11,r0,r11
> c000b50c: 91 62 00 08 stw r11,8(r2)
> c000b510: 7c 10 13 a6 mtspr   80,r0
> c000b514: 80 62 00 70 lwz r3,112(r2)
> c000b518: 70 6b 86 91 andi.   r11,r3,34449
> c000b51c: 40 82 00 28 bne c000b544 
> c000b520: 2b 89 01 b6 cmplwi  cr7,r9,438
> c000b524: 41 9d 00 84 bgt cr7,c000b5a8 
> 
> c000b528: 80 6a 00 88 lwz r3,136(r10)
> c000b52c: 3d 40 c0 5c lis r10,-16292
> c000b530: 55 29 10 3a rlwinm  r9,r9,2,0,29
> c000b534: 39 4a 41 e4 addir10,r10,16868
> c000b538: 7d 2a 48 2e lwzxr9,r10,r9
> c000b53c: 7d 29 03 a6 mtctr   r9
> c000b540: 4e 80 04 20 bctr
> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/kernel/interrupt.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 107ec39f05cb..205902052112 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -117,6 +117,9 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>   return regs->gpr[3];
>   }
>   return -ENOSYS;
> + } else {
> + /* Restore r3 from orig_gpr3 to free up a volatile reg */
> + r3 = regs->orig_gpr3;
>   }
>  
>   /* May be faster to do array_index_nospec? */
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 12/22] powerpc/syscall: Change condition to check MSR_RI

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> In system_call_exception(), MSR_RI also needs to be checked on 8xx.
> Only booke and 40x doesn't have MSR_RI.

Reviewed-by: Nicholas Piggin 

...
> 
> Signed-off-by: Christophe Leroy 
> ---
> v5: Also in interrupt exit prepare
> ---
>  arch/powerpc/kernel/interrupt.c | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 1a2dec49f811..107ec39f05cb 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>  
>   trace_hardirqs_off(); /* finish reconciling */
>  
> - if (IS_ENABLED(CONFIG_PPC_BOOK3S))
> + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
>   BUG_ON(!(regs->msr & MSR_RI));
>   BUG_ON(!(regs->msr & MSR_PR));
>   BUG_ON(!FULL_REGS(regs));
> @@ -338,7 +338,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
> pt_regs *regs, unsigned
>   unsigned long flags;
>   unsigned long ret = 0;
>  
> - if (IS_ENABLED(CONFIG_PPC_BOOK3S))
> + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
>   BUG_ON(!(regs->msr & MSR_RI));
>   BUG_ON(!(regs->msr & MSR_PR));
>   BUG_ON(!FULL_REGS(regs));
> @@ -436,7 +436,8 @@ notrace unsigned long 
> interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
>   unsigned long amr;
>  #endif
>  
> - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
> + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
> + unlikely(!(regs->msr & MSR_RI)))
>   unrecoverable_exception(regs);
>   BUG_ON(regs->msr & MSR_PR);
>   BUG_ON(!FULL_REGS(regs));
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 11/22] powerpc/syscall: Save r3 in regs->orig_r3

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> Save r3 in regs->orig_r3 in system_call_exception()
> 
> Signed-off-by: Christophe Leroy 

Reviewed-by: Nicholas Piggin 

> ---
> v5: Removed the assembly one on SCV type system call
> ---
>  arch/powerpc/kernel/entry_64.S  | 2 --
>  arch/powerpc/kernel/interrupt.c | 2 ++
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 33ddfeef4fe9..a91c2def165d 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -108,7 +108,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
>   li  r11,\trapnr
>   std r11,_TRAP(r1)
>   std r12,_CCR(r1)
> - std r3,ORIG_GPR3(r1)
>   addir10,r1,STACK_FRAME_OVERHEAD
>   ld  r11,exception_marker@toc(r2)
>   std r11,-16(r10)/* "regshere" marker */
> @@ -278,7 +277,6 @@ END_BTB_FLUSH_SECTION
>   std r10,_LINK(r1)
>   std r11,_TRAP(r1)
>   std r12,_CCR(r1)
> - std r3,ORIG_GPR3(r1)
>   addir10,r1,STACK_FRAME_OVERHEAD
>   ld  r11,exception_marker@toc(r2)
>   std r11,-16(r10)/* "regshere" marker */
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 46fd195ca659..1a2dec49f811 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -29,6 +29,8 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>  {
>   syscall_fn f;
>  
> + regs->orig_gpr3 = r3;
> +
>   if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
>   BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
>  
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 10/22] powerpc/syscall: Use is_compat_task()

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> Instead of hard comparing task flags with _TIF_32BIT, use
> is_compat_task(). The advantage is that it returns 0 on PPC32
> allthough _TIF_32BIT is always set.
> 
> Signed-off-by: Christophe Leroy 

Reviewed-by: Nicholas Piggin 


> ---
>  arch/powerpc/kernel/interrupt.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 2dac4d2bb1cf..46fd195ca659 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -2,6 +2,8 @@
>  
>  #include 
>  #include 
> +#include 
> +
>  #include 
>  #include 
>  #include 
> @@ -118,7 +120,7 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>   /* May be faster to do array_index_nospec? */
>   barrier_nospec();
>  
> - if (unlikely(is_32bit_task())) {
> + if (unlikely(is_compat_task())) {
>   f = (void *)compat_sys_call_table[r0];
>  
>   r3 &= 0xULL;
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 09/22] powerpc/syscall: Make interrupt.c buildable on PPC32

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> To allow building interrupt.c on PPC32, ifdef out specific PPC64
> code or use helpers which are available on both PP32 and PPC64
> 
> Modify Makefile to always build interrupt.o
> 
> Signed-off-by: Christophe Leroy 
> ---
> v5:
> - Also for interrupt exit preparation
> - Opted out kuap related code, ppc32 keeps it in ASM for the time being
> ---
>  arch/powerpc/kernel/Makefile|  4 ++--
>  arch/powerpc/kernel/interrupt.c | 31 ---
>  2 files changed, 26 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 26ff8c6e06b7..163755b1cef4 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -57,10 +57,10 @@ obj-y := cputable.o 
> syscalls.o \
>  prom.o traps.o setup-common.o \
>  udbg.o misc.o io.o misc_$(BITS).o \
>  of_platform.o prom_parse.o firmware.o \
> -hw_breakpoint_constraints.o
> +hw_breakpoint_constraints.o interrupt.o
>  obj-y+= ptrace/
>  obj-$(CONFIG_PPC64)  += setup_64.o \
> -paca.o nvram_64.o note.o interrupt.o
> +paca.o nvram_64.o note.o
>  obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o
>  obj-$(CONFIG_VDSO32) += vdso32_wrapper.o
>  obj-$(CONFIG_PPC_WATCHDOG)   += watchdog.o
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index d6be4f9a67e5..2dac4d2bb1cf 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>   BUG_ON(!(regs->msr & MSR_RI));
>   BUG_ON(!(regs->msr & MSR_PR));
>   BUG_ON(!FULL_REGS(regs));
> - BUG_ON(regs->softe != IRQS_ENABLED);
> + BUG_ON(arch_irq_disabled_regs(regs));
>  
>  #ifdef CONFIG_PPC_PKEY
>   if (mmu_has_feature(MMU_FTR_PKEY)) {
> @@ -65,7 +65,9 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>   isync();
>   } else
>  #endif
> +#ifdef CONFIG_PPC64
>   kuap_check_amr();
> +#endif

Wouldn't mind trying to get rid of these ifdefs at some point, but 
there's some kuap / keys changes going on recently so I'm happy enough 
to let this settle then look at whether we can refactor.

>  
>   account_cpu_user_entry();
>  
> @@ -77,7 +79,7 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>* frame, or if the unwinder was taught the first stack frame always
>* returns to user with IRQS_ENABLED, this store could be avoided!
>*/
> - regs->softe = IRQS_ENABLED;
> + irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
>  
>   local_irq_enable();
>  
> @@ -151,6 +153,7 @@ static notrace inline bool 
> __prep_irq_for_enabled_exit(bool clear_ri)
>   __hard_EE_RI_disable();
>   else
>   __hard_irq_disable();
> +#ifdef CONFIG_PPC64
>   if (unlikely(lazy_irq_pending_nocheck())) {
>   /* Took an interrupt, may have more exit work to do. */
>   if (clear_ri)
> @@ -162,7 +165,7 @@ static notrace inline bool 
> __prep_irq_for_enabled_exit(bool clear_ri)
>   }
>   local_paca->irq_happened = 0;
>   irq_soft_mask_set(IRQS_ENABLED);
> -
> +#endif
>   return true;
>  }
>  

Do we prefer space before return except in trivial cases?

> @@ -216,7 +219,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long 
> r3,
>  
>   CT_WARN_ON(ct_state() == CONTEXT_USER);
>  
> +#ifdef CONFIG_PPC64
>   kuap_check_amr();
> +#endif
>  
>   regs->result = r3;
>  
> @@ -309,7 +314,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long 
> r3,
>  
>   account_cpu_user_exit();
>  
> -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
> +#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */
>   /*
>* We do this at the end so that we do context switch with KERNEL AMR
>*/
> @@ -318,7 +323,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long 
> r3,
>   return ret;
>  }
>  
> -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
> +#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */
>  notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, 
> unsigned long msr)
>  {
>  #ifdef CONFIG_PPC_BOOK3E

Why are you building this for 32? I don't mind if it's just to keep 
things similar and make it build for now, but you're not using it yet,
right?
 
Reviewed-by: Nicholas Piggin 

> @@ -333,14 +338,16 @@ notrace unsigned long 
> interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
>   BUG_ON(!(regs->msr & MSR_RI));
>   BUG_ON(!(regs->msr & 

Re: [PATCH v5 08/22] powerpc/syscall: Rename syscall_64.c into interrupt.c

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> syscall_64.c will be reused almost as is for PPC32.
> 
> As this file also contains functions to handle other types
> of interrupts rename it interrupt.c
> 
> Signed-off-by: Christophe Leroy 

Reviewed-by: Nicholas Piggin 

> ---
>  arch/powerpc/kernel/Makefile  | 2 +-
>  arch/powerpc/kernel/{syscall_64.c => interrupt.c} | 0
>  2 files changed, 1 insertion(+), 1 deletion(-)
>  rename arch/powerpc/kernel/{syscall_64.c => interrupt.c} (100%)
> 
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index c173efd66c00..26ff8c6e06b7 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -60,7 +60,7 @@ obj-y   := cputable.o 
> syscalls.o \
>  hw_breakpoint_constraints.o
>  obj-y+= ptrace/
>  obj-$(CONFIG_PPC64)  += setup_64.o \
> -paca.o nvram_64.o note.o syscall_64.o
> +paca.o nvram_64.o note.o interrupt.o
>  obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o
>  obj-$(CONFIG_VDSO32) += vdso32_wrapper.o
>  obj-$(CONFIG_PPC_WATCHDOG)   += watchdog.o
> diff --git a/arch/powerpc/kernel/syscall_64.c 
> b/arch/powerpc/kernel/interrupt.c
> similarity index 100%
> rename from arch/powerpc/kernel/syscall_64.c
> rename to arch/powerpc/kernel/interrupt.c
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 07/22] powerpc/irq: Add stub irq_soft_mask_return() for PPC32

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> To allow building syscall_64.c smoothly on PPC32, add stub version
> of irq_soft_mask_return().
> 
> Signed-off-by: Christophe Leroy 

Same kind of comment as the other soft mask stuff. Again not a big deal 
but there might be a way to improve it. For example make a
debug_syscall_entry(regs) function that ppc64 could put the soft mask
checks into.

No big deal, if you don't make any changes now I might see about doing 
something like that after your series goes in.

Reviewed-by: Nicholas Piggin 

> ---
>  arch/powerpc/include/asm/hw_irq.h | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/hw_irq.h 
> b/arch/powerpc/include/asm/hw_irq.h
> index 4739f61e632c..56a98936a6a9 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -330,6 +330,11 @@ static inline void irq_soft_mask_regs_set_state(struct 
> pt_regs *regs, unsigned l
>  }
>  #else /* CONFIG_PPC64 */
>  
> +static inline notrace unsigned long irq_soft_mask_return(void)
> +{
> + return 0;
> +}
> +
>  static inline unsigned long arch_local_save_flags(void)
>  {
>   return mfmsr();
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 06/22] powerpc/irq: Rework helpers that manipulate MSR[EE/RI]

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> In preparation of porting PPC32 to C syscall entry/exit,
> rewrite the following helpers as static inline functions and
> add support for PPC32 in them:
>   __hard_irq_enable()
>   __hard_irq_disable()
>   __hard_EE_RI_disable()
>   __hard_RI_enable()
> 
> Then use them in PPC32 version of arch_local_irq_disable()
> and arch_local_irq_enable() to avoid code duplication.
> 

Reviewed-by: Nicholas Piggin 

> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/include/asm/hw_irq.h | 75 +--
>  arch/powerpc/include/asm/reg.h|  1 +
>  2 files changed, 52 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/hw_irq.h 
> b/arch/powerpc/include/asm/hw_irq.h
> index ed0c3b049dfd..4739f61e632c 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -50,6 +50,55 @@
>  
>  #ifndef __ASSEMBLY__
>  
> +static inline void __hard_irq_enable(void)
> +{
> + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
> + wrtee(MSR_EE);
> + else if (IS_ENABLED(CONFIG_PPC_8xx))
> + wrtspr(SPRN_EIE);
> + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
> + __mtmsrd(MSR_EE | MSR_RI, 1);
> + else
> + mtmsr(mfmsr() | MSR_EE);
> +}
> +
> +static inline void __hard_irq_disable(void)
> +{
> + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
> + wrtee(0);
> + else if (IS_ENABLED(CONFIG_PPC_8xx))
> + wrtspr(SPRN_EID);
> + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
> + __mtmsrd(MSR_RI, 1);
> + else
> + mtmsr(mfmsr() & ~MSR_EE);
> +}
> +
> +static inline void __hard_EE_RI_disable(void)
> +{
> + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
> + wrtee(0);
> + else if (IS_ENABLED(CONFIG_PPC_8xx))
> + wrtspr(SPRN_NRI);
> + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
> + __mtmsrd(0, 1);
> + else
> + mtmsr(mfmsr() & ~(MSR_EE | MSR_RI));
> +}
> +
> +static inline void __hard_RI_enable(void)
> +{
> + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
> + return;
> +
> + if (IS_ENABLED(CONFIG_PPC_8xx))
> + wrtspr(SPRN_EID);
> + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
> + __mtmsrd(MSR_RI, 1);
> + else
> + mtmsr(mfmsr() | MSR_RI);
> +}
> +
>  #ifdef CONFIG_PPC64
>  #include 
>  
> @@ -212,18 +261,6 @@ static inline bool arch_irqs_disabled(void)
>  
>  #endif /* CONFIG_PPC_BOOK3S */
>  
> -#ifdef CONFIG_PPC_BOOK3E
> -#define __hard_irq_enable()  wrtee(MSR_EE)
> -#define __hard_irq_disable() wrtee(0)
> -#define __hard_EE_RI_disable()   wrtee(0)
> -#define __hard_RI_enable()   do { } while (0)
> -#else
> -#define __hard_irq_enable()  __mtmsrd(MSR_EE|MSR_RI, 1)
> -#define __hard_irq_disable() __mtmsrd(MSR_RI, 1)
> -#define __hard_EE_RI_disable()   __mtmsrd(0, 1)
> -#define __hard_RI_enable()   __mtmsrd(MSR_RI, 1)
> -#endif
> -
>  #define hard_irq_disable()   do {\
>   unsigned long flags;\
>   __hard_irq_disable();   \
> @@ -322,22 +359,12 @@ static inline unsigned long arch_local_irq_save(void)
>  
>  static inline void arch_local_irq_disable(void)
>  {
> - if (IS_ENABLED(CONFIG_BOOKE))
> - wrtee(0);
> - else if (IS_ENABLED(CONFIG_PPC_8xx))
> - wrtspr(SPRN_EID);
> - else
> - mtmsr(mfmsr() & ~MSR_EE);
> + __hard_irq_disable();
>  }
>  
>  static inline void arch_local_irq_enable(void)
>  {
> - if (IS_ENABLED(CONFIG_BOOKE))
> - wrtee(MSR_EE);
> - else if (IS_ENABLED(CONFIG_PPC_8xx))
> - wrtspr(SPRN_EIE);
> - else
> - mtmsr(mfmsr() | MSR_EE);
> + __hard_irq_enable();
>  }
>  
>  static inline bool arch_irqs_disabled_flags(unsigned long flags)
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index c5a3e856191c..bc4305ba00d0 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -1375,6 +1375,7 @@
>  #define mtmsr(v) asm volatile("mtmsr %0" : \
>: "r" ((unsigned long)(v)) \
>: "memory")
> +#define __mtmsrd(v, l)   BUILD_BUG()
>  #define __MTMSR  "mtmsr"
>  #endif
>  
> -- 
> 2.25.0
> 
> 


Re: [PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> regs->softe doesn't exist on PPC32.
> 
> Add irq_soft_mask_regs_set_state() helper to set regs->softe.
> This helper will void on PPC32.
> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/include/asm/hw_irq.h | 11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/hw_irq.h 
> b/arch/powerpc/include/asm/hw_irq.h
> index 614957f74cee..ed0c3b049dfd 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -38,6 +38,8 @@
>  #define PACA_IRQ_MUST_HARD_MASK  (PACA_IRQ_EE)
>  #endif
>  
> +#endif /* CONFIG_PPC64 */
> +
>  /*
>   * flags for paca->irq_soft_mask
>   */
> @@ -46,8 +48,6 @@
>  #define IRQS_PMI_DISABLED2
>  #define IRQS_ALL_DISABLED(IRQS_DISABLED | IRQS_PMI_DISABLED)
>  
> -#endif /* CONFIG_PPC64 */
> -
>  #ifndef __ASSEMBLY__
>  
>  #ifdef CONFIG_PPC64
> @@ -287,6 +287,10 @@ extern void irq_set_pending_from_srr1(unsigned long 
> srr1);
>  
>  extern void force_external_irq_replay(void);
>  
> +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, 
> unsigned long val)
> +{
> + regs->softe = val;
> +}
>  #else /* CONFIG_PPC64 */
>  
>  static inline unsigned long arch_local_save_flags(void)
> @@ -355,6 +359,9 @@ static inline bool arch_irq_disabled_regs(struct pt_regs 
> *regs)
>  
>  static inline void may_hard_irq_enable(void) { }
>  
> +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, 
> unsigned long val)
> +{
> +}
>  #endif /* CONFIG_PPC64 */
>  
>  #define ARCH_IRQ_INIT_FLAGS  IRQ_NOREQUEST

What I don't like about this where you use it is it kind of pollutes
the ppc32 path with this function which is not valid to use.

I would prefer if you had this purely so it could compile with:

  if (IS_ENABLED(CONFIG_PPC64)))
  irq_soft_mask_regs_set_state(regs, blah);

And then you could make the ppc32 cause a link error if it did not
get eliminated at compile time (e.g., call an undefined function).

You could do the same with the kuap_ functions to change some ifdefs
to IS_ENABLED.

That's just my preference but if you prefer this way I guess that's
okay.

Thanks,
Nick


Re: [PATCH v5 00/22] powerpc/32: Implement C syscall entry/exit

2021-02-08 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
> This series implements C syscall entry/exit for PPC32. It reuses
> the work already done for PPC64.
> 
> This series is based on today's merge-test 
> (b6f72fc05389e3fc694bf5a5fa1bbd33f61879e0)
> 
> In terms on performance we have the following number of cycles on an
> 8xx running null_syscall benchmark:
> - mainline: 296 cycles
> - after patch 4: 283 cycles
> - after patch 16: 304 cycles
> - after patch 17: 348 cycles
> - at the end of the series: 320 cycles
> 
> So in summary, we have a degradation of performance of 8% on null_syscall.
> 
> I think it is not a big degradation, it is worth it.

I guess it's 13% from 283. But it's very nice to use the shared C code.

There might be a few more percent speedup in there we can find later.

Thanks,
Nick



Re: [PATCH v2 2/2] memblock: do not start bottom-up allocations with kernel_end

2021-02-08 Thread Thiago Jung Bauermann


Mike Rapoport  writes:

> On Sat, Jan 23, 2021 at 06:09:11PM -0800, Andrew Morton wrote:
>> On Fri, 22 Jan 2021 01:37:14 -0300 Thiago Jung Bauermann 
>>  wrote:
>> 
>> > Mike Rapoport  writes:
>> > 
>> > > > Signed-off-by: Roman Gushchin 
>> > > 
>> > > Reviewed-by: Mike Rapoport 
>> > 
>> > I've seen a couple of spurious triggers of the WARN_ONCE() removed by this
>> > patch. This happens on some ppc64le bare metal (powernv) server machines 
>> > with
>> > CONFIG_SWIOTLB=y and crashkernel=4G, as described in a candidate patch I 
>> > posted
>> > to solve this issue in a different way:
>> > 
>> > https://lore.kernel.org/linuxppc-dev/20201218062103.76102-1-bauer...@linux.ibm.com/
>> > 
>> > Since this patch solves that problem, is it possible to include it in the 
>> > next
>> > feasible v5.11-rcX, with the following tag?
>> 
>> We could do this, if we're confident that this patch doesn't depend on
>> [1/2] "mm: cma: allocate cma areas bottom-up"?  I think it is...
>
> A think it does not depend on cma bottom-up allocation, it's rather the other
> way around: without this CMA bottom-up allocation could fail with KASLR
> enabled.

I noticed that this patch is now upstream as:

2dcb39645441 memblock: do not start bottom-up allocations with kernel_end

> Still, this patch may need updates to the way x86 does early reservations:
>
> https://lore.kernel.org/lkml/20210115083255.12744-1-r...@kernel.org

... but the patches from this link still aren't. Isn't this a potential
problem for x86?

The patch series on the link above is now superseded by v2:

https://lore.kernel.org/linux-mm/20210128105711.10428-1-r...@kernel.org/

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


[PATCH v5 05/10] powerpc: dts: akebono: Harmonize EHCI/OHCI DT nodes name

2021-02-08 Thread Serge Semin
In accordance with the Generic EHCI/OHCI bindings the corresponding node
name is suppose to comply with the Generic USB HCD DT schema, which
requires the USB nodes to have the name acceptable by the regexp:
"^usb(@.*)?" . Make sure the "generic-ehci" and "generic-ohci"-compatible
nodes are correctly named.

Signed-off-by: Serge Semin 
Acked-by: Krzysztof Kozlowski 
---
 arch/powerpc/boot/dts/akebono.dts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/boot/dts/akebono.dts 
b/arch/powerpc/boot/dts/akebono.dts
index df18f8dc4642..343326c30380 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -126,7 +126,7 @@ SATA0: sata@301 {
interrupts = <93 2>;
};
 
-   EHCI0: ehci@3001000 {
+   EHCI0: usb@3001000 {
compatible = "ibm,476gtr-ehci", "generic-ehci";
reg = <0x300 0x1000 0x0 0x1>;
interrupt-parent = <>;
@@ -140,14 +140,14 @@ SD0: sd@300 {
interrupt-parent = <>;
};
 
-   OHCI0: ohci@3001001 {
+   OHCI0: usb@3001001 {
compatible = "ibm,476gtr-ohci", "generic-ohci";
reg = <0x300 0x1001 0x0 0x1>;
interrupt-parent = <>;
interrupts = <89 1>;
};
 
-   OHCI1: ohci@3001002 {
+   OHCI1: usb@3001002 {
compatible = "ibm,476gtr-ohci", "generic-ohci";
reg = <0x300 0x1002 0x0 0x1>;
interrupt-parent = <>;
-- 
2.29.2



[PATCH v5 00/10] dt-bindings: usb: Harmonize xHCI/EHCI/OHCI/DWC3 nodes name

2021-02-08 Thread Serge Semin
As the subject states this series is an attempt to harmonize the xHCI,
EHCI, OHCI and DWC USB3 DT nodes with the DT schema introduced in the
framework of the patchset [1].

Firstly as Krzysztof suggested we've deprecated a support of DWC USB3
controllers with "synopsys,"-vendor prefix compatible string in favor of
the ones with valid "snps,"-prefix. It's done in all the DTS files,
which have been unfortunate to define such nodes.

Secondly we suggest to fix the snps,quirk-frame-length-adjustment property
declaration in the Amlogic meson-g12-common.dtsi DTS file, since it has
been erroneously declared as boolean while having uint32 type. Neil said
it was ok to init that property with 0x20 value.

Thirdly the main part of the patchset concern fixing the xHCI, EHCI/OHCI
and DWC USB3 DT nodes name as in accordance with their DT schema the
corresponding node name is suppose to comply with the Generic USB HCD DT
schema, which requires the USB nodes to have the name acceptable by the
regexp: "^usb(@.*)?". Such requirement had been applicable even before we
introduced the new DT schema in [1], but as we can see it hasn't been
strictly implemented for a lot the DTS files. Since DT schema is now
available the automated DTS validation shall make sure that the rule isn't
violated.

Note most of these patches have been a part of the last three patches of
[1]. But since there is no way to have them merged in in a combined
manner, I had to move them to the dedicated series and split them up so to
be accepted by the corresponding subsystem maintainers one-by-one.

[1] Link: 
https://lore.kernel.org/linux-usb/20201014101402.18271-1-sergey.se...@baikalelectronics.ru/
Changelog v1:
- As Krzysztof suggested I've created a script which checked whether the
  node names had been also updated in all the depended dts files. As a
  result I found two more files which should have been also modified:
  arch/arc/boot/dts/{axc003.dtsi,axc003_idu.dtsi}
- Correct the USB DWC3 nodes name found in
  arch/arm64/boot/dts/apm/{apm-storm.dtsi,apm-shadowcat.dtsi} too.

Link: 
https://lore.kernel.org/linux-usb/20201020115959.2658-1-sergey.se...@baikalelectronics.ru
Changelog v2:
- Drop the patch:
  [PATCH 01/29] usb: dwc3: Discard synopsys,dwc3 compatibility string
  and get back the one which marks the "synopsys,dwc3" compatible string
  as deprecated into the DT schema related series.
- Drop the patches:
  [PATCH 03/29] arm: dts: am437x: Correct DWC USB3 compatible string
  [PATCH 04/29] arm: dts: exynos: Correct DWC USB3 compatible string
  [PATCH 07/29] arm: dts: bcm53x: Harmonize EHCI/OHCI DT nodes name
  [PATCH 08/29] arm: dts: stm32: Harmonize EHCI/OHCI DT nodes name
  [PATCH 16/29] arm: dts: bcm5301x: Harmonize xHCI DT nodes name
  [PATCH 19/29] arm: dts: exynos: Harmonize DWC USB3 DT nodes name
  [PATCH 21/29] arm: dts: ls1021a: Harmonize DWC USB3 DT nodes name
  [PATCH 22/29] arm: dts: omap5: Harmonize DWC USB3 DT nodes name
  [PATCH 24/29] arm64: dts: allwinner: h6: Harmonize DWC USB3 DT nodes name
  [PATCH 26/29] arm64: dts: exynos: Harmonize DWC USB3 DT nodes name
  [PATCH 27/29] arm64: dts: layerscape: Harmonize DWC USB3 DT nodes name
  since they have been applied to the corresponding maintainers repos.
- Fix drivers/usb/dwc3/dwc3-qcom.c to be looking for the "usb@"-prefixed
  sub-node and falling back to the "dwc3@"-prefixed one on failure.

Link: 
https://lore.kernel.org/linux-usb/2020091552.15593-1-sergey.se...@baikalelectronics.ru
Changelog v3:
- Drop the patches:
  [PATCH v2 04/18] arm: dts: hisi-x5hd2: Harmonize EHCI/OHCI DT nodes name
  [PATCH v2 06/18] arm64: dts: hisi: Harmonize EHCI/OHCI DT nodes name
  [PATCH v2 07/18] mips: dts: jz47x: Harmonize EHCI/OHCI DT nodes name
  [PATCH v2 08/18] mips: dts: sead3: Harmonize EHCI/OHCI DT nodes name
  [PATCH v2 09/18] mips: dts: ralink: mt7628a: Harmonize EHCI/OHCI DT nodes name
  [PATCH v2 11/18] arm64: dts: marvell: cp11x: Harmonize xHCI DT nodes name
  [PATCH v2 12/18] arm: dts: marvell: armada-375: Harmonize DWC USB3 DT nodes 
name
  [PATCH v2 16/18] arm64: dts: hi3660: Harmonize DWC USB3 DT nodes name
  since they have been applied to the corresponding maintainers repos.

Link: 
https://lore.kernel.org/linux-usb/20201205155621.3045-1-sergey.se...@baikalelectronics.ru
Changelog v4:
- Just resend.

Link: 
https://lore.kernel.org/linux-usb/20201210091756.18057-1-sergey.se...@baikalelectronics.ru/
Changelog v5:
- Drop the patch:
  [PATCH v4 02/10] arm64: dts: amlogic: meson-g12: Set FL-adj property value
  since it has been applied to the corresponding maintainers repos.
- Get back the patch:
  [PATCH 21/29] arm: dts: ls1021a: Harmonize DWC USB3 DT nodes name
  as it has been missing in the kernel 5.11-rc7
- Rebase onto the kernel 5.11-rc7

Cc: Vineet Gupta 
Cc: Rafal Milecki 
Cc: Wei Xu 
Cc: Thomas Bogendoerfer 
Cc: Michael Ellerman 
Cc: Jason Cooper 
Cc: Santosh Shilimkar 
Cc: Shawn Guo 
Cc: Benoit Cousson 
Cc: Patrice Chotard 
Cc: Maxime Ripard 
Cc: Khuong Dinh 
Cc: Andy 

Re: [PATCH] ASoC: fsl: constify static snd_soc_dai_ops structs

2021-02-08 Thread Mark Brown
On Sat, 6 Feb 2021 23:58:49 +0100, Rikard Falkeborn wrote:
> The only usage of these is to assign their address to the 'ops' field in
> the snd_soc_dai_driver struct, which is a pointer to const. Make them
> const to allow the compiler to put them in read-only memory.

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[1/1] ASoC: fsl: constify static snd_soc_dai_ops structs
  commit: 38d89a564847048c0f6fe53a829d15edb4f21da3

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark


Re: [PATCH v4 11/23] powerpc/syscall: Rename syscall_64.c into syscall.c

2021-02-08 Thread Christophe Leroy




Le 26/01/2021 à 11:21, Nicholas Piggin a écrit :

Excerpts from Christophe Leroy's message of January 26, 2021 12:48 am:

syscall_64.c will be reused almost as is for PPC32.

Rename it syscall.c


Could you rename it to interrupt.c instead? A system call is an
interrupt, and the file now also has code to return from other
interrupts as well, and it matches the new asm/interrupt.h from
the interrupts series.



Done in v5

Christophe


Re: [PATCH v4 14/23] powerpc/syscall: Save r3 in regs->orig_r3

2021-02-08 Thread Christophe Leroy




Le 26/01/2021 à 11:18, Nicholas Piggin a écrit :

Excerpts from Christophe Leroy's message of January 26, 2021 12:48 am:

Save r3 in regs->orig_r3 in system_call_exception()

Signed-off-by: Christophe Leroy 
---
  arch/powerpc/kernel/entry_64.S | 1 -
  arch/powerpc/kernel/syscall.c  | 2 ++
  2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index aa1af139d947..a562a4240aa6 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -278,7 +278,6 @@ END_BTB_FLUSH_SECTION
std r10,_LINK(r1)
std r11,_TRAP(r1)
std r12,_CCR(r1)
-   std r3,ORIG_GPR3(r1)
addir10,r1,STACK_FRAME_OVERHEAD
ld  r11,exception_marker@toc(r2)
std r11,-16(r10)/* "regshere" marker */


This misses system_call_vectored.


Oops yes, this patch was cooked before SCV where introduced. Fixes in v5.

Thanks
Christophe


Re: [PATCH v4 20/23] powerpc/syscall: Do not check unsupported scv vector on PPC32

2021-02-08 Thread Christophe Leroy




Le 26/01/2021 à 11:16, Nicholas Piggin a écrit :

Excerpts from Christophe Leroy's message of January 26, 2021 12:48 am:

Only PPC64 has scv. No need to check the 0x7ff0 trap on PPC32.

And ignore the scv parameter in syscall_exit_prepare (Save 14 cycles
346 => 332 cycles)

Signed-off-by: Christophe Leroy 
---
  arch/powerpc/kernel/entry_32.S | 1 -
  arch/powerpc/kernel/syscall.c  | 7 +--
  2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 9922a04650f7..6ae9c7bcb06c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -343,7 +343,6 @@ transfer_to_syscall:
  
  ret_from_syscall:

addir4,r1,STACK_FRAME_OVERHEAD
-   li  r5,0
bl  syscall_exit_prepare
  #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
/* If the process has its own DBCR0 value, load it up.  The internal
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 476909b11051..30f8a397a522 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -86,7 +86,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
local_irq_enable();
  
  	if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {

-   if (unlikely(regs->trap == 0x7ff0)) {
+   if (IS_ENABLED(CONFIG_PPC64) && unlikely(regs->trap == 0x7ff0)) 
{
/* Unsupported scv vector */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
return regs->gpr[3];
@@ -109,7 +109,7 @@ notrace long system_call_exception(long r3, long r4, long 
r5,
r8 = regs->gpr[8];
  
  	} else if (unlikely(r0 >= NR_syscalls)) {

-   if (unlikely(regs->trap == 0x7ff0)) {
+   if (IS_ENABLED(CONFIG_PPC64) && unlikely(regs->trap == 0x7ff0)) 
{


Perhaps this could be hidden behind a function like trap_is_scv()?

trap_is_unsupported_scv() ?



Ok, I did that in v5

Thanks
Christophe


Re: [RFC PATCH 1/3] powerpc/lib: implement strlen() in assembly for PPC64

2021-02-08 Thread Christophe Leroy




Le 05/07/2018 à 10:53, Christophe Leroy a écrit :

The generic implementation of strlen() reads strings byte per byte.

This patch implements strlen() in assembly based on a read of entire
words, in the same spirit as what some other arches and glibc do.

strlen() selftest on an  provides the following values:

Before the patch (ie with the generic strlen() in lib/string.c):

After the patch:



I think we should implement it in C using word-at-a-time

Christophe




Signed-off-by: Christophe Leroy 
---
  This serie applies on top of the PPC32 strlen optimisation serie

  Untested

  arch/powerpc/include/asm/string.h |  3 +-
  arch/powerpc/lib/Makefile |  4 +-
  arch/powerpc/lib/strlen_64.S  | 88 +++
  3 files changed, 91 insertions(+), 4 deletions(-)
  create mode 100644 arch/powerpc/lib/strlen_64.S

diff --git a/arch/powerpc/include/asm/string.h 
b/arch/powerpc/include/asm/string.h
index 1647de15a31e..8fdcb532de72 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -13,6 +13,7 @@
  #define __HAVE_ARCH_MEMCHR
  #define __HAVE_ARCH_MEMSET16
  #define __HAVE_ARCH_MEMCPY_FLUSHCACHE
+#define __HAVE_ARCH_STRLEN
  
  extern char * strcpy(char *,const char *);

  extern char * strncpy(char *,const char *, __kernel_size_t);
@@ -50,8 +51,6 @@ static inline void *memset64(uint64_t *p, uint64_t v, 
__kernel_size_t n)
return __memset64(p, v, n * 8);
  }
  #else
-#define __HAVE_ARCH_STRLEN
-
  extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
  #endif
  #endif /* __KERNEL__ */
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 670286808928..93706b4cdbde 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -12,7 +12,7 @@ CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
  
  obj-y += string.o alloc.o code-patching.o feature-fixups.o
  
-obj-$(CONFIG_PPC32)	+= div64.o copy_32.o crtsavres.o strlen_32.o

+obj-$(CONFIG_PPC32)+= div64.o copy_32.o crtsavres.o
  
  # See corresponding test in arch/powerpc/Makefile

  # 64-bit linker creates .sfpr on demand for final link (vmlinux),
@@ -33,7 +33,7 @@ obj64-$(CONFIG_ALTIVEC)   += vmx-helper.o
  obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
  
  obj-y			+= checksum_$(BITS).o checksum_wrappers.o \

-  string_$(BITS).o memcmp_$(BITS).o
+  string_$(BITS).o memcmp_$(BITS).o strlen_$(BITS).o
  
  obj-y			+= sstep.o ldstfp.o quad.o

  obj64-y   += quad.o
diff --git a/arch/powerpc/lib/strlen_64.S b/arch/powerpc/lib/strlen_64.S
new file mode 100644
index ..c9704f2b697d
--- /dev/null
+++ b/arch/powerpc/lib/strlen_64.S
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * strlen() for PPC64
+ *
+ * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
+ *
+ * Inspired from glibc implementation
+ */
+#include 
+#include 
+#include 
+
+   .text
+
+/*
+ * Algorithm:
+ *
+ * 1) Given a word 'x', we can test to see if it contains any 0 bytes
+ *by subtracting 0x01010101, and seeing if any of the high bits of each
+ *byte changed from 0 to 1. This works because the least significant
+ *0 byte must have had no incoming carry (otherwise it's not the least
+ *significant), so it is 0x00 - 0x01 == 0xff. For all other
+ *byte values, either they have the high bit set initially, or when
+ *1 is subtracted you get a value in the range 0x00-0x7f, none of which
+ *have their high bit set. The expression here is
+ *(x - 0x01010101) & ~x & 0x80808080), which gives 0x when
+ *there were no 0x00 bytes in the word.  You get 0x80 in bytes that
+ *match, but possibly false 0x80 matches in the next more significant
+ *byte to a true match due to carries.  For little-endian this is
+ *of no consequence since the least significant match is the one
+ *we're interested in, but big-endian needs method 2 to find which
+ *byte matches.
+ * 2) Given a word 'x', we can test to see _which_ byte was zero by
+ *calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
+ *This produces 0x80 in each byte that was zero, and 0x00 in all
+ *the other bytes. The '| ~0x80808080' clears the low 7 bits in each
+ *byte, and the '| x' part ensures that bytes with the high bit set
+ *produce 0x00. The addition will carry into the high bit of each byte
+ *iff that byte had one of its low 7 bits set. We can then just see
+ *which was the most significant bit set and divide by 8 to find how
+ *many to add to the index.
+ *This is from the book 'The PowerPC Compiler Writer's Guide',
+ *by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
+ */
+
+_GLOBAL(strlen)
+   andi.   r0, r3, 7
+   lis r7, 0x0101
+   addir10, r3, -8
+   addic   r7, r7, 0x0101  /* r7 = 0x01010101 (lomagic) & clear XER[CA] */
+   rldimi  

Re: [PATCH 2/8] powerpc/signal: Add unsafe_copy_{vsx, fpr}_from_user()

2021-02-08 Thread Christophe Leroy




Le 08/02/2021 à 18:14, Christopher M. Riedl a écrit :

On Sun Feb 7, 2021 at 4:12 AM CST, Christophe Leroy wrote:



Le 06/02/2021 à 18:39, Christopher M. Riedl a écrit :

On Sat Feb 6, 2021 at 10:32 AM CST, Christophe Leroy wrote:



Le 20/10/2020 à 04:01, Christopher M. Riedl a écrit :

On Fri Oct 16, 2020 at 10:48 AM CDT, Christophe Leroy wrote:



Le 15/10/2020 à 17:01, Christopher M. Riedl a écrit :

Reuse the "safe" implementation from signal.c except for calling
unsafe_copy_from_user() to copy into a local buffer. Unlike the
unsafe_copy_{vsx,fpr}_to_user() functions the "copy from" functions
cannot use unsafe_get_user() directly to bypass the local buffer since
doing so significantly reduces signal handling performance.


Why can't the functions use unsafe_get_user(), why does it significantly
reduces signal handling
performance ? How much significant ? I would expect that not going
through an intermediate memory
area would be more efficient



Here is a comparison, 'unsafe-signal64-regs' avoids the intermediate buffer:

|  | hash   | radix  |
|  | -- | -- |
| linuxppc/next| 289014 | 158408 |
| unsafe-signal64  | 298506 | 253053 |
| unsafe-signal64-regs | 254898 | 220831 |

I have not figured out the 'why' yet. As you mentioned in your series,
technically calling __copy_tofrom_user() is overkill for these
operations. The only obvious difference between unsafe_put_user() and
unsafe_get_user() is that we don't have asm-goto for the 'get' variant.
Instead we wrap with unsafe_op_wrap() which inserts a conditional and
then goto to the label.

Implemenations:

#define unsafe_copy_fpr_from_user(task, from, label)   do {\
   struct task_struct *__t = task; \
   u64 __user *buf = (u64 __user *)from;   \
   int i;  \
   \
   for (i = 0; i < ELF_NFPREG - 1; i++)\
   unsafe_get_user(__t->thread.TS_FPR(i), [i], label); \
   unsafe_get_user(__t->thread.fp_state.fpscr, [i], label);\
} while (0)

#define unsafe_copy_vsx_from_user(task, from, label)   do {\
   struct task_struct *__t = task; \
   u64 __user *buf = (u64 __user *)from;   \
   int i;  \
   \
   for (i = 0; i < ELF_NVSRHALFREG ; i++)  \
   
unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
   [i], label);\
} while (0)



Do you have CONFIG_PROVE_LOCKING or CONFIG_DEBUG_ATOMIC_SLEEP enabled in
your config ?


I don't have these set in my config (ppc64le_defconfig). I think I
figured this out - the reason for the lower signal throughput is the
barrier_nospec() in __get_user_nocheck(). When looping we incur that
cost on every iteration. Commenting it out results in signal performance
of ~316K w/ hash on the unsafe-signal64-regs branch. Obviously the
barrier is there for a reason but it is quite costly.


Interesting.

Can you try with the patch I just sent out
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/c72f014730823b413528e90ab6c4d3bcb79f8497.1612692067.git.christophe.le...@csgroup.eu/


Yeah that patch solves the problem. Using unsafe_get_user() in a loop is
actually faster on radix than using the intermediary buffer step. A
summary of results below (unsafe-signal64-v6 uses unsafe_get_user() and
avoids the local buffer):

 |  | hash   | radix  |
 |  | -- | -- |
 | unsafe-signal64-v5   | 194533 | 230089 |
 | unsafe-signal64-v6   | 176739 | 202840 |
 | unsafe-signal64-v5+barrier patch | 203037 | 234936 |
 | unsafe-signal64-v6+barrier patch | 205484 | 241030 |

I am still expecting some comments/feedback on my v5 before sending out
v6. Should I include your patch in my series as well?



My patch is now flagged "under review" in patchwork so I suppose Michael picked 
it already.

Christophe


Re: [PATCH 2/8] powerpc/signal: Add unsafe_copy_{vsx,fpr}_from_user()

2021-02-08 Thread Christopher M. Riedl
On Sun Feb 7, 2021 at 4:12 AM CST, Christophe Leroy wrote:
>
>
> Le 06/02/2021 à 18:39, Christopher M. Riedl a écrit :
> > On Sat Feb 6, 2021 at 10:32 AM CST, Christophe Leroy wrote:
> >>
> >>
> >> Le 20/10/2020 à 04:01, Christopher M. Riedl a écrit :
> >>> On Fri Oct 16, 2020 at 10:48 AM CDT, Christophe Leroy wrote:
> 
> 
>  Le 15/10/2020 à 17:01, Christopher M. Riedl a écrit :
> > Reuse the "safe" implementation from signal.c except for calling
> > unsafe_copy_from_user() to copy into a local buffer. Unlike the
> > unsafe_copy_{vsx,fpr}_to_user() functions the "copy from" functions
> > cannot use unsafe_get_user() directly to bypass the local buffer since
> > doing so significantly reduces signal handling performance.
> 
>  Why can't the functions use unsafe_get_user(), why does it significantly
>  reduces signal handling
>  performance ? How much significant ? I would expect that not going
>  through an intermediate memory
>  area would be more efficient
> 
> >>>
> >>> Here is a comparison, 'unsafe-signal64-regs' avoids the intermediate 
> >>> buffer:
> >>>
> >>>   |  | hash   | radix  |
> >>>   |  | -- | -- |
> >>>   | linuxppc/next| 289014 | 158408 |
> >>>   | unsafe-signal64  | 298506 | 253053 |
> >>>   | unsafe-signal64-regs | 254898 | 220831 |
> >>>
> >>> I have not figured out the 'why' yet. As you mentioned in your series,
> >>> technically calling __copy_tofrom_user() is overkill for these
> >>> operations. The only obvious difference between unsafe_put_user() and
> >>> unsafe_get_user() is that we don't have asm-goto for the 'get' variant.
> >>> Instead we wrap with unsafe_op_wrap() which inserts a conditional and
> >>> then goto to the label.
> >>>
> >>> Implemenations:
> >>>
> >>>   #define unsafe_copy_fpr_from_user(task, from, label)   do {\
> >>>  struct task_struct *__t = task; \
> >>>  u64 __user *buf = (u64 __user *)from;   \
> >>>  int i;  \
> >>>  \
> >>>  for (i = 0; i < ELF_NFPREG - 1; i++)\
> >>>  unsafe_get_user(__t->thread.TS_FPR(i), [i], label); \
> >>>  unsafe_get_user(__t->thread.fp_state.fpscr, [i], label);\
> >>>   } while (0)
> >>>
> >>>   #define unsafe_copy_vsx_from_user(task, from, label)   do {\
> >>>  struct task_struct *__t = task; \
> >>>  u64 __user *buf = (u64 __user *)from;   \
> >>>  int i;  \
> >>>  \
> >>>  for (i = 0; i < ELF_NVSRHALFREG ; i++)  \
> >>>  
> >>> unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
> >>>  [i], label);\
> >>>   } while (0)
> >>>
> >>
> >> Do you have CONFIG_PROVE_LOCKING or CONFIG_DEBUG_ATOMIC_SLEEP enabled in
> >> your config ?
> > 
> > I don't have these set in my config (ppc64le_defconfig). I think I
> > figured this out - the reason for the lower signal throughput is the
> > barrier_nospec() in __get_user_nocheck(). When looping we incur that
> > cost on every iteration. Commenting it out results in signal performance
> > of ~316K w/ hash on the unsafe-signal64-regs branch. Obviously the
> > barrier is there for a reason but it is quite costly.
>
> Interesting.
>
> Can you try with the patch I just sent out
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/c72f014730823b413528e90ab6c4d3bcb79f8497.1612692067.git.christophe.le...@csgroup.eu/

Yeah that patch solves the problem. Using unsafe_get_user() in a loop is
actually faster on radix than using the intermediary buffer step. A
summary of results below (unsafe-signal64-v6 uses unsafe_get_user() and
avoids the local buffer):

|  | hash   | radix  |
|  | -- | -- |
| unsafe-signal64-v5   | 194533 | 230089 |
| unsafe-signal64-v6   | 176739 | 202840 |
| unsafe-signal64-v5+barrier patch | 203037 | 234936 |
| unsafe-signal64-v6+barrier patch | 205484 | 241030 |

I am still expecting some comments/feedback on my v5 before sending out
v6. Should I include your patch in my series as well?

>
> Thanks
> Christophe



Re: [PATCH v2 1/1] powerpc/kvm: Save Timebase Offset to fix sched_clock() while running guest code.

2021-02-08 Thread Leonardo Bras
Hello Nick,

On Sat, 2021-02-06 at 13:03 +1000, Nicholas Piggin wrote:
> Excerpts from Leonardo Bras's message of February 5, 2021 5:01 pm:
> > Hey Nick, thanks for reviewing :)
> > 
> > On Fri, 2021-02-05 at 16:28 +1000, Nicholas Piggin wrote:
> > > Excerpts from Leonardo Bras's message of February 5, 2021 4:06 pm:
> > > > Before guest entry, TBU40 register is changed to reflect guest timebase.
> > > > After exitting guest, the register is reverted to it's original value.
> > > > 
> > > > If one tries to get the timestamp from host between those changes, it
> > > > will present an incorrect value.
> > > > 
> > > > An example would be trying to add a tracepoint in
> > > > kvmppc_guest_entry_inject_int(), which depending on last tracepoint
> > > > acquired could actually cause the host to crash.
> > > > 
> > > > Save the Timebase Offset to PACA and use it on sched_clock() to always
> > > > get the correct timestamp.
> > > 
> > > Ouch. Not sure how reasonable it is to half switch into guest registers 
> > > and expect to call into the wider kernel, fixing things up as we go. 
> > > What if mftb is used in other places?
> > 
> > IIUC, the CPU is not supposed to call anything as host between guest
> > entry and guest exit, except guest-related cases, like
> 
> When I say "call", I'm including tracing in that. If a function is not 
> marked as no trace, then it will call into the tracing subsystem.
> 
> > kvmppc_guest_entry_inject_int(), but anyway, if something calls mftb it
> > will still get the same value as before.
> 
> Right, so it'll be out of whack again.
> 
> > This is only supposed to change stuff that depends on sched_clock, like
> > Tracepoints, that can happen in those exceptions.
> 
> If they depend on sched_clock that's one thing. Do they definitely have 
> no dependencies on mftb from other calls?

We could change that on get_tb() or mftb() @ timebase.h, which would
have a broader reach, but would not reach any mftb from asm code.

> > > Especially as it doesn't seem like there is a reason that function _has_
> > > to be called after the timebase is switched to guest, that's just how 
> > > the code is structured.
> > 
> > Correct, but if called, like in rb routines, used by tracepoints, the
> > difference between last tb and current (lower) tb may cause the CPU to
> > trap PROGRAM exception, crashing host. 
> 
> Yes, so I agree with Michael any function that is involved when we begin 
> to switch into guest context (or have not completed switching back to 
> host going the other way) should be marked as no trace (noinstr even, 
> perhaps).

Sure, that would avoid having to get paca->tb_offset for every mftb()
called, and avoid inconsistencies when different ways to get time are
used in code.

On the other hand, it would make it very hard to debug functions like
kvmppc_guest_entry_inject_int() as I am doing right now.

> 
> > > As a local hack to work out a bug okay. If you really need it upstream 
> > > could you put it under a debug config option?
> > 
> > You mean something that is automatically selected whenever those
> > configs are enabled? 
> > 
> > CONFIG_TRACEPOINT && CONFIG_KVM_BOOK3S_HANDLER && CONFIG_PPC_BOOK3S_64
> > 
> > Or something the user need to select himself in menuconfig?
> 
> Yeah I meant a default n thing under powerpc kernel debugging somewhere.

So, IIUC all we can do is split this in 2 changes:
1 - Adding notrace to those functions
2 - Introducing a kernel debug config that reverts (1) and 'fixes' mftb

If that's correct, I have some ideas we can use. 

For debug option, should we add the offset on get_tb() or mftb()?

Another option would be to adding this tb_offset only in the routines
used by tracing. But this could probably mean having to add a function
in arch-generic code, but still an option.

What do you think?

> 
> Thanks,
> Nick

Thank you!
Leonardo Bras



[PATCH v5 22/22] powerpc/32: Handle bookE debugging in C in syscall entry/exit

2021-02-08 Thread Christophe Leroy
The handling of SPRN_DBCR0 and other registers can easily
be done in C instead of ASM.

Signed-off-by: Christophe Leroy 
---
v5: New
---
 arch/powerpc/include/asm/reg_booke.h |  3 +++
 arch/powerpc/kernel/entry_32.S   |  7 ---
 arch/powerpc/kernel/head_32.h| 15 --
 arch/powerpc/kernel/head_booke.h | 19 --
 arch/powerpc/kernel/interrupt.c  | 29 +++-
 5 files changed, 31 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/include/asm/reg_booke.h 
b/arch/powerpc/include/asm/reg_booke.h
index 262782f08fd4..17b8dcd9a40d 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -691,6 +691,9 @@
 #define mttmr(rn, v)   asm volatile(MTTMR(rn, %0) : \
 : "r" ((unsigned long)(v)) \
 : "memory")
+
+extern unsigned long global_dbcr0[];
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_POWERPC_REG_BOOKE_H__ */
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index a574201b0eb6..8dea4d3b1d06 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -342,13 +342,6 @@ transfer_to_syscall:
 ret_from_syscall:
addir4,r1,STACK_FRAME_OVERHEAD
bl  syscall_exit_prepare
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-   /* If the process has its own DBCR0 value, load it up.  The internal
-  debug mode bit tells us that dbcr0 should be loaded. */
-   lwz r0,THREAD+THREAD_DBCR0(r2)
-   andis.  r10,r0,DBCR0_IDM@h
-   bnel-   load_dbcr0
-#endif
 #ifdef CONFIG_PPC_47x
lis r4,icache_44x_need_flush@ha
lwz r5,icache_44x_need_flush@l(r4)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 5001c6ecc3ec..961b1ce3b6bf 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -153,21 +153,6 @@
SAVE_4GPRS(3, r11)
SAVE_2GPRS(7, r11)
addir2,r12,-THREAD
-#if defined(CONFIG_40x)
-   /* Check to see if the dbcr0 register is set up to debug.  Use the
-  internal debug mode bit to do this. */
-   lwz r12,THREAD_DBCR0(r12)
-   andis.  r12,r12,DBCR0_IDM@h
-   beq+3f
-   /* From user and task is ptraced - load up global dbcr0 */
-   li  r12,-1  /* clear all pending debug events */
-   mtspr   SPRN_DBSR,r12
-   lis r11,global_dbcr0@ha
-   addir11,r11,global_dbcr0@l
-   lwz r12,0(r11)
-   mtspr   SPRN_DBCR0,r12
-3:
-#endif
b   transfer_to_syscall /* jump to handler */
 .endm
 
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 5f565232b99d..47857795f50a 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -130,25 +130,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
SAVE_2GPRS(7, r11)
 
addir2,r10,-THREAD
-   /* Check to see if the dbcr0 register is set up to debug.  Use the
-  internal debug mode bit to do this. */
-   lwz r12,THREAD_DBCR0(r10)
-   andis.  r12,r12,DBCR0_IDM@h
-   beq+3f
-   /* From user and task is ptraced - load up global dbcr0 */
-   li  r12,-1  /* clear all pending debug events */
-   mtspr   SPRN_DBSR,r12
-   lis r11,global_dbcr0@ha
-   addir11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
-   lwz r10, TASK_CPU(r2)
-   slwir10, r10, 2
-   add r11, r11, r10
-#endif
-   lwz r12,0(r11)
-   mtspr   SPRN_DBCR0,r12
-
-3:
b   transfer_to_syscall /* jump to handler */
 .endm
 
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index c89a8eac3e24..6111acf61373 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -76,6 +76,13 @@ notrace long system_call_exception(long r3, long r4, long r5,
kuap_check_amr();
 #endif
 
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+   if (IS_ENABLED(CONFIG_PPC32) && unlikely(current->thread.debug.dbcr0 & 
DBCR0_IDM)) {
+   mtspr(SPRN_DBSR, -1);
+   mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]);
+   }
+#endif
+
account_cpu_user_entry();
 
account_stolen_time();
@@ -324,6 +331,22 @@ notrace unsigned long syscall_exit_prepare(unsigned long 
r3,
local_paca->tm_scratch = regs->msr;
 #endif
 
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+   if (unlikely(current->thread.debug.dbcr0 & DBCR0_IDM)) {
+   /*
+* Check to see if the dbcr0 register is set up to debug.
+* Use the internal debug mode bit to do this.
+*/
+   mtmsr(mfmsr() & ~MSR_DE);
+   if (IS_ENABLED(CONFIG_PPC32)) {
+   isync();
+   global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
+ 

[PATCH v5 21/22] powerpc/32: Remove the counter in global_dbcr0

2021-02-08 Thread Christophe Leroy
global_dbcr0 has two parts, 4 bytes to save/restore the
value of SPRN_DBCR0, and 4 bytes that are incremented/decremented
everytime something is saving/loading the above value.

This counter is only incremented/decremented, its value is never
used and never read.

Remove the counter and devide the size of global_dbcr0 by 2.

Signed-off-by: Christophe Leroy 
---
v5: New
---
 arch/powerpc/kernel/entry_32.S   | 12 +++-
 arch/powerpc/kernel/head_32.h|  3 ---
 arch/powerpc/kernel/head_booke.h |  5 +
 3 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 7c824e8928d0..a574201b0eb6 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -175,14 +175,11 @@ transfer_to_handler:
addir11,r11,global_dbcr0@l
 #ifdef CONFIG_SMP
lwz r9,TASK_CPU(r2)
-   slwir9,r9,3
+   slwir9,r9,2
add r11,r11,r9
 #endif
lwz r12,0(r11)
mtspr   SPRN_DBCR0,r12
-   lwz r12,4(r11)
-   addir12,r12,-1
-   stw r12,4(r11)
 #endif
 
b   3f
@@ -980,14 +977,11 @@ load_dbcr0:
addir11,r11,global_dbcr0@l
 #ifdef CONFIG_SMP
lwz r9,TASK_CPU(r2)
-   slwir9,r9,3
+   slwir9,r9,2
add r11,r11,r9
 #endif
stw r10,0(r11)
mtspr   SPRN_DBCR0,r0
-   lwz r10,4(r11)
-   addir10,r10,1
-   stw r10,4(r11)
li  r11,-1
mtspr   SPRN_DBSR,r11   /* clear all pending debug events */
blr
@@ -996,7 +990,7 @@ load_dbcr0:
.align  4
.global global_dbcr0
 global_dbcr0:
-   .space  8*NR_CPUS
+   .space  4*NR_CPUS
.previous
 #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
 
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 282d8fd443a9..5001c6ecc3ec 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -166,9 +166,6 @@
addir11,r11,global_dbcr0@l
lwz r12,0(r11)
mtspr   SPRN_DBCR0,r12
-   lwz r12,4(r11)
-   addir12,r12,-1
-   stw r12,4(r11)
 3:
 #endif
b   transfer_to_syscall /* jump to handler */
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index bfbd240cc8a2..5f565232b99d 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -142,14 +142,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
addir11,r11,global_dbcr0@l
 #ifdef CONFIG_SMP
lwz r10, TASK_CPU(r2)
-   slwir10, r10, 3
+   slwir10, r10, 2
add r11, r11, r10
 #endif
lwz r12,0(r11)
mtspr   SPRN_DBCR0,r12
-   lwz r12,4(r11)
-   addir12,r12,-1
-   stw r12,4(r11)
 
 3:
b   transfer_to_syscall /* jump to handler */
-- 
2.25.0



[PATCH v5 20/22] powerpc/syscall: Avoid storing 'current' in another pointer

2021-02-08 Thread Christophe Leroy
By saving the pointer pointing to thread_info.flags, gcc copies r2
in a non-volatile register.

We know 'current' doesn't change, so avoid that intermediaite pointer.

Reduces null_syscall benchmark by 2 cycles (322 => 320 cycles)

On PPC64, gcc seems to know that 'current' is not changing, and it keeps
it in a non volatile register to avoid multiple read of 'current' in paca.

Signed-off-by: Christophe Leroy 
---
v5: Also in interrupt exit prepare
---
 arch/powerpc/kernel/interrupt.c | 28 +++-
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 8c38e8c95be2..c89a8eac3e24 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -223,7 +223,6 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
   struct pt_regs *regs,
   long scv)
 {
-   unsigned long *ti_flagsp = _thread_info()->flags;
unsigned long ti_flags;
unsigned long ret = 0;
 
@@ -241,7 +240,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
/* Check whether the syscall is issued inside a restartable sequence */
rseq_syscall(regs);
 
-   ti_flags = *ti_flagsp;
+   ti_flags = current_thread_info()->flags;
 
if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL {
@@ -255,7 +254,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
ret = _TIF_RESTOREALL;
else
regs->gpr[3] = r3;
-   clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+   clear_bits(_TIF_PERSYSCALL_MASK, _thread_info()->flags);
} else {
regs->gpr[3] = r3;
}
@@ -268,7 +267,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
local_irq_disable();
 
 again:
-   ti_flags = READ_ONCE(*ti_flagsp);
+   ti_flags = READ_ONCE(current_thread_info()->flags);
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
local_irq_enable();
if (ti_flags & _TIF_NEED_RESCHED) {
@@ -284,7 +283,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
do_notify_resume(regs, ti_flags);
}
local_irq_disable();
-   ti_flags = READ_ONCE(*ti_flagsp);
+   ti_flags = READ_ONCE(current_thread_info()->flags);
}
 
if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
@@ -339,10 +338,6 @@ notrace unsigned long syscall_exit_prepare(unsigned long 
r3,
 #ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, 
unsigned long msr)
 {
-#ifdef CONFIG_PPC_BOOK3E
-   struct thread_struct *ts = >thread;
-#endif
-   unsigned long *ti_flagsp = _thread_info()->flags;
unsigned long ti_flags;
unsigned long flags;
unsigned long ret = 0;
@@ -365,7 +360,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
pt_regs *regs, unsigned
local_irq_save(flags);
 
 again:
-   ti_flags = READ_ONCE(*ti_flagsp);
+   ti_flags = READ_ONCE(current_thread_info()->flags);
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
local_irq_enable(); /* returning to user: may enable */
if (ti_flags & _TIF_NEED_RESCHED) {
@@ -376,7 +371,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
pt_regs *regs, unsigned
do_notify_resume(regs, ti_flags);
}
local_irq_disable();
-   ti_flags = READ_ONCE(*ti_flagsp);
+   ti_flags = READ_ONCE(current_thread_info()->flags);
}
 
if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
@@ -407,13 +402,13 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
pt_regs *regs, unsigned
}
 
 #ifdef CONFIG_PPC_BOOK3E
-   if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
+   if (unlikely(current->thread.debug.dbcr0 & DBCR0_IDM)) {
/*
 * Check to see if the dbcr0 register is set up to debug.
 * Use the internal debug mode bit to do this.
 */
mtmsr(mfmsr() & ~MSR_DE);
-   mtspr(SPRN_DBCR0, ts->debug.dbcr0);
+   mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
mtspr(SPRN_DBSR, -1);
}
 #endif
@@ -438,7 +433,6 @@ void preempt_schedule_irq(void);
 
 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, 
unsigned long msr)
 {
-   unsigned long *ti_flagsp = _thread_info()->flags;
unsigned long flags;
unsigned long ret = 0;
 #ifdef CONFIG_PPC64
@@ -461,8 +455,8 @@ notrace 

[PATCH v5 19/22] powerpc/syscall: Optimise checks in beginning of system_call_exception()

2021-02-08 Thread Christophe Leroy
Combine all tests of regs->msr into a single logical one.

Before the patch:

   0:   81 6a 00 84 lwz r11,132(r10)
   4:   90 6a 00 88 stw r3,136(r10)
   8:   69 60 00 02 xorir0,r11,2
   c:   54 00 ff fe rlwinm  r0,r0,31,31,31
  10:   0f 00 00 00 twnei   r0,0
  14:   69 63 40 00 xorir3,r11,16384
  18:   54 63 97 fe rlwinm  r3,r3,18,31,31
  1c:   0f 03 00 00 twnei   r3,0
  20:   69 6b 80 00 xorir11,r11,32768
  24:   55 6b 8f fe rlwinm  r11,r11,17,31,31
  28:   0f 0b 00 00 twnei   r11,0

After the patch:

   0:   81 6a 00 84 lwz r11,132(r10)
   4:   90 6a 00 88 stw r3,136(r10)
   8:   7d 6b 58 f8 not r11,r11
   c:   71 6b c0 02 andi.   r11,r11,49154
  10:   0f 0b 00 00 twnei   r11,0

6 cycles less on powerpc 8xx (328 => 322 cycles).

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/interrupt.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 55e1aa18cdb9..8c38e8c95be2 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -28,6 +28,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
   unsigned long r0, struct pt_regs *regs)
 {
syscall_fn f;
+   unsigned long expected_msr;
 
regs->orig_gpr3 = r3;
 
@@ -39,10 +40,13 @@ notrace long system_call_exception(long r3, long r4, long 
r5,
 
trace_hardirqs_off(); /* finish reconciling */
 
+   expected_msr = MSR_PR;
if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
-   BUG_ON(!(regs->msr & MSR_RI));
-   BUG_ON(!(regs->msr & MSR_PR));
-   BUG_ON(arch_irq_disabled_regs(regs));
+   expected_msr |= MSR_RI;
+   if (IS_ENABLED(CONFIG_PPC32))
+   expected_msr |= MSR_EE;
+   BUG_ON((regs->msr & expected_msr) ^ expected_msr);
+   BUG_ON(IS_ENABLED(CONFIG_PPC64) && arch_irq_disabled_regs(regs));
 
 #ifdef CONFIG_PPC_PKEY
if (mmu_has_feature(MMU_FTR_PKEY)) {
-- 
2.25.0



[PATCH v5 18/22] powerpc/syscall: Remove FULL_REGS verification in system_call_exception

2021-02-08 Thread Christophe Leroy
For book3s/64, FULL_REGS() is 'true' at all time, so the test voids.
For others, non volatile registers are saved inconditionally.

So the verification is pointless.

Should one fail to do it, it would anyway be caught by the
CHECK_FULL_REGS() in copy_thread() as we have removed the
special versions ppc_fork() and friends.

null_syscall benchmark reduction 4 cycles (332 => 328 cycles)

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/interrupt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 8fafca727b8b..55e1aa18cdb9 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -42,7 +42,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
-   BUG_ON(!FULL_REGS(regs));
BUG_ON(arch_irq_disabled_regs(regs));
 
 #ifdef CONFIG_PPC_PKEY
-- 
2.25.0



[PATCH v5 17/22] powerpc/syscall: Do not check unsupported scv vector on PPC32

2021-02-08 Thread Christophe Leroy
Only PPC64 has scv. No need to check the 0x7ff0 trap on PPC32.
For that, add a helper trap_is_unsupported_scv() similar to
trap_is_scv().

And ignore the scv parameter in syscall_exit_prepare (Save 14 cycles
346 => 332 cycles)

Signed-off-by: Christophe Leroy 
---
v5: Added a helper trap_is_unsupported_scv()
---
 arch/powerpc/include/asm/ptrace.h | 5 +
 arch/powerpc/kernel/entry_32.S| 1 -
 arch/powerpc/kernel/interrupt.c   | 7 +--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index 58f9dc060a7b..2c842b11a924 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -229,6 +229,11 @@ static inline bool trap_is_scv(struct pt_regs *regs)
return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000);
 }
 
+static inline bool trap_is_unsupported_scv(struct pt_regs *regs)
+{
+   return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0);
+}
+
 static inline bool trap_is_syscall(struct pt_regs *regs)
 {
return (trap_is_scv(regs) || TRAP(regs) == 0xc00);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index cffe58e63356..7c824e8928d0 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -344,7 +344,6 @@ transfer_to_syscall:
 
 ret_from_syscall:
addir4,r1,STACK_FRAME_OVERHEAD
-   li  r5,0
bl  syscall_exit_prepare
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
/* If the process has its own DBCR0 value, load it up.  The internal
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 205902052112..8fafca727b8b 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -88,7 +88,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
local_irq_enable();
 
if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
-   if (unlikely(regs->trap == 0x7ff0)) {
+   if (unlikely(trap_is_unsupported_scv(regs))) {
/* Unsupported scv vector */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
return regs->gpr[3];
@@ -111,7 +111,7 @@ notrace long system_call_exception(long r3, long r4, long 
r5,
r8 = regs->gpr[8];
 
} else if (unlikely(r0 >= NR_syscalls)) {
-   if (unlikely(regs->trap == 0x7ff0)) {
+   if (unlikely(trap_is_unsupported_scv(regs))) {
/* Unsupported scv vector */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
return regs->gpr[3];
@@ -224,6 +224,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
unsigned long ti_flags;
unsigned long ret = 0;
 
+   if (IS_ENABLED(CONFIG_PPC32))
+   scv = 0;
+
CT_WARN_ON(ct_state() == CONTEXT_USER);
 
 #ifdef CONFIG_PPC64
-- 
2.25.0



[PATCH v5 16/22] powerpc/syscall: Avoid stack frame in likely part of system_call_exception()

2021-02-08 Thread Christophe Leroy
When r3 is not modified, reload it from regs->orig_r3 to free
volatile registers. This avoids a stack frame for the likely part
of system_call_exception()

Before the patch:

c000b4d4 :
c000b4d4:   7c 08 02 a6 mflrr0
c000b4d8:   94 21 ff e0 stwur1,-32(r1)
c000b4dc:   93 e1 00 1c stw r31,28(r1)
c000b4e0:   90 01 00 24 stw r0,36(r1)
c000b4e4:   90 6a 00 88 stw r3,136(r10)
c000b4e8:   81 6a 00 84 lwz r11,132(r10)
c000b4ec:   69 6b 00 02 xorir11,r11,2
c000b4f0:   55 6b ff fe rlwinm  r11,r11,31,31,31
c000b4f4:   0f 0b 00 00 twnei   r11,0
c000b4f8:   81 6a 00 a0 lwz r11,160(r10)
c000b4fc:   55 6b 07 fe clrlwi  r11,r11,31
c000b500:   0f 0b 00 00 twnei   r11,0
c000b504:   7c 0c 42 e6 mftbr0
c000b508:   83 e2 00 08 lwz r31,8(r2)
c000b50c:   81 82 00 28 lwz r12,40(r2)
c000b510:   90 02 00 24 stw r0,36(r2)
c000b514:   7d 8c f8 50 subfr12,r12,r31
c000b518:   7c 0c 02 14 add r0,r12,r0
c000b51c:   90 02 00 08 stw r0,8(r2)
c000b520:   7c 10 13 a6 mtspr   80,r0
c000b524:   81 62 00 70 lwz r11,112(r2)
c000b528:   71 60 86 91 andi.   r0,r11,34449
c000b52c:   40 82 00 34 bne c000b560 
c000b530:   2b 89 01 b6 cmplwi  cr7,r9,438
c000b534:   41 9d 00 64 bgt cr7,c000b598 

c000b538:   3d 40 c0 5c lis r10,-16292
c000b53c:   55 29 10 3a rlwinm  r9,r9,2,0,29
c000b540:   39 4a 41 e8 addir10,r10,16872
c000b544:   80 01 00 24 lwz r0,36(r1)
c000b548:   7d 2a 48 2e lwzxr9,r10,r9
c000b54c:   7c 08 03 a6 mtlrr0
c000b550:   7d 29 03 a6 mtctr   r9
c000b554:   83 e1 00 1c lwz r31,28(r1)
c000b558:   38 21 00 20 addir1,r1,32
c000b55c:   4e 80 04 20 bctr

After the patch:

c000b4d4 :
c000b4d4:   81 6a 00 84 lwz r11,132(r10)
c000b4d8:   90 6a 00 88 stw r3,136(r10)
c000b4dc:   69 6b 00 02 xorir11,r11,2
c000b4e0:   55 6b ff fe rlwinm  r11,r11,31,31,31
c000b4e4:   0f 0b 00 00 twnei   r11,0
c000b4e8:   80 6a 00 a0 lwz r3,160(r10)
c000b4ec:   54 63 07 fe clrlwi  r3,r3,31
c000b4f0:   0f 03 00 00 twnei   r3,0
c000b4f4:   7d 6c 42 e6 mftbr11
c000b4f8:   81 82 00 08 lwz r12,8(r2)
c000b4fc:   80 02 00 28 lwz r0,40(r2)
c000b500:   91 62 00 24 stw r11,36(r2)
c000b504:   7c 00 60 50 subfr0,r0,r12
c000b508:   7d 60 5a 14 add r11,r0,r11
c000b50c:   91 62 00 08 stw r11,8(r2)
c000b510:   7c 10 13 a6 mtspr   80,r0
c000b514:   80 62 00 70 lwz r3,112(r2)
c000b518:   70 6b 86 91 andi.   r11,r3,34449
c000b51c:   40 82 00 28 bne c000b544 
c000b520:   2b 89 01 b6 cmplwi  cr7,r9,438
c000b524:   41 9d 00 84 bgt cr7,c000b5a8 

c000b528:   80 6a 00 88 lwz r3,136(r10)
c000b52c:   3d 40 c0 5c lis r10,-16292
c000b530:   55 29 10 3a rlwinm  r9,r9,2,0,29
c000b534:   39 4a 41 e4 addir10,r10,16868
c000b538:   7d 2a 48 2e lwzxr9,r10,r9
c000b53c:   7d 29 03 a6 mtctr   r9
c000b540:   4e 80 04 20 bctr

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/interrupt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 107ec39f05cb..205902052112 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -117,6 +117,9 @@ notrace long system_call_exception(long r3, long r4, long 
r5,
return regs->gpr[3];
}
return -ENOSYS;
+   } else {
+   /* Restore r3 from orig_gpr3 to free up a volatile reg */
+   r3 = regs->orig_gpr3;
}
 
/* May be faster to do array_index_nospec? */
-- 
2.25.0



[PATCH v5 15/22] powerpc/32: Remove verification of MSR_PR on syscall in the ASM entry

2021-02-08 Thread Christophe Leroy
system_call_exception() checks MSR_PR and BUGs if a syscall
is issued from kernel mode.

No need to handle it anymore from the ASM entry code.

null_syscall reduction 2 cycles (348 => 346 cycles)

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 30 --
 arch/powerpc/kernel/head_32.h|  3 ---
 arch/powerpc/kernel/head_booke.h |  3 ---
 3 files changed, 36 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index bbf7ecea6fe0..cffe58e63356 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -421,36 +421,6 @@ ret_from_kernel_thread:
li  r3,0
b   ret_from_syscall
 
-   /*
-* System call was called from kernel. We get here with SRR1 in r9.
-* Mark the exception as recoverable once we have retrieved SRR0,
-* trap a warning and return ENOSYS with CR[SO] set.
-*/
-   .globl  ret_from_kernel_syscall
-ret_from_kernel_syscall:
-   mfspr   r9, SPRN_SRR0
-   mfspr   r10, SPRN_SRR1
-#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE)
-   LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR|MSR_DR))
-   mtmsr   r11
-#endif
-
-0: trap
-   EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-
-   li  r3, ENOSYS
-   crset   so
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-   mtspr   SPRN_NRI, r0
-#endif
-   mtspr   SPRN_SRR0, r9
-   mtspr   SPRN_SRR1, r10
-   rfi
-#ifdef CONFIG_40x
-   b . /* Prevent prefetch past rfi */
-#endif
-_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall)
-
 /*
  * Top-level page fault handling.
  * This is in assembler because if do_page_fault tells us that
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index fea7fe00a690..282d8fd443a9 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -118,8 +118,6 @@
 .macro SYSCALL_ENTRY trapno
mfspr   r9, SPRN_SRR1
mfspr   r10, SPRN_SRR0
-   andi.   r11, r9, MSR_PR
-   beq-99f
LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions 
*/
lis r12, 1f@h
ori r12, r12, 1f@l
@@ -174,7 +172,6 @@
 3:
 #endif
b   transfer_to_syscall /* jump to handler */
-99:b   ret_from_kernel_syscall
 .endm
 
 .macro save_dar_dsisr_on_stack reg1, reg2, sp
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index db931f1167aa..bfbd240cc8a2 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -106,10 +106,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 #endif
mfspr   r9, SPRN_SRR1
BOOKE_CLEAR_BTB(r11)
-   andi.   r11, r9, MSR_PR
lwz r11, TASK_STACK - THREAD(r10)
rlwinm  r12,r12,0,4,2   /* Clear SO bit in CR */
-   beq-99f
ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
stw r12, _CCR(r11)  /* save various registers */
mflrr12
@@ -155,7 +153,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 
 3:
b   transfer_to_syscall /* jump to handler */
-99:b   ret_from_kernel_syscall
 .endm
 
 /* To handle the additional exception priority levels on 40x and Book-E
-- 
2.25.0



[PATCH v5 13/22] powerpc/32: Always save non volatile GPRs at syscall entry

2021-02-08 Thread Christophe Leroy
In preparation for porting syscall entry/exit to C, inconditionally
save non volatile general purpose registers.

Commit 965dd3ad3076 ("powerpc/64/syscall: Remove non-volatile GPR save
optimisation") provides detailed explanation.

This increases the number of cycles by 24 cycles on 8xx with
null_syscall benchmark (280 => 304 cycles)

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 46 +---
 arch/powerpc/kernel/head_32.h|  2 +-
 arch/powerpc/kernel/head_booke.h |  2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl | 20 +++
 4 files changed, 8 insertions(+), 62 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index b1e36602c013..97dc28a68465 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -351,6 +351,7 @@ trace_syscall_entry_irq_off:
 
.globl  transfer_to_syscall
 transfer_to_syscall:
+   SAVE_NVGPRS(r1)
 #ifdef CONFIG_PPC_BOOK3S_32
kuep_lock r11, r12
 #endif
@@ -614,51 +615,6 @@ ret_from_kernel_syscall:
 #endif
 _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall)
 
-/*
- * The fork/clone functions need to copy the full register set into
- * the child process. Therefore we need to save all the nonvolatile
- * registers (r13 - r31) before calling the C code.
- */
-   .globl  ppc_fork
-ppc_fork:
-   SAVE_NVGPRS(r1)
-   lwz r0,_TRAP(r1)
-   rlwinm  r0,r0,0,0,30/* clear LSB to indicate full */
-   stw r0,_TRAP(r1)/* register set saved */
-   b   sys_fork
-
-   .globl  ppc_vfork
-ppc_vfork:
-   SAVE_NVGPRS(r1)
-   lwz r0,_TRAP(r1)
-   rlwinm  r0,r0,0,0,30/* clear LSB to indicate full */
-   stw r0,_TRAP(r1)/* register set saved */
-   b   sys_vfork
-
-   .globl  ppc_clone
-ppc_clone:
-   SAVE_NVGPRS(r1)
-   lwz r0,_TRAP(r1)
-   rlwinm  r0,r0,0,0,30/* clear LSB to indicate full */
-   stw r0,_TRAP(r1)/* register set saved */
-   b   sys_clone
-
-   .globl  ppc_clone3
-ppc_clone3:
-   SAVE_NVGPRS(r1)
-   lwz r0,_TRAP(r1)
-   rlwinm  r0,r0,0,0,30/* clear LSB to indicate full */
-   stw r0,_TRAP(r1)/* register set saved */
-   b   sys_clone3
-
-   .globl  ppc_swapcontext
-ppc_swapcontext:
-   SAVE_NVGPRS(r1)
-   lwz r0,_TRAP(r1)
-   rlwinm  r0,r0,0,0,30/* clear LSB to indicate full */
-   stw r0,_TRAP(r1)/* register set saved */
-   b   sys_swapcontext
-
 /*
  * Top-level page fault handling.
  * This is in assembler because if do_page_fault tells us that
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 24dc326e0d56..7b12736ec546 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -148,7 +148,7 @@
stw r2,GPR2(r11)
addir10,r10,STACK_FRAME_REGS_MARKER@l
stw r9,_MSR(r11)
-   li  r2, \trapno + 1
+   li  r2, \trapno
stw r10,8(r11)
stw r2,_TRAP(r11)
SAVE_GPR(0, r11)
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index b3c502c503a0..626e716576ce 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -124,7 +124,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
stw r2,GPR2(r11)
addir12, r12, STACK_FRAME_REGS_MARKER@l
stw r9,_MSR(r11)
-   li  r2, \trapno + 1
+   li  r2, \trapno
stw r12, 8(r11)
stw r2,_TRAP(r11)
SAVE_GPR(0, r11)
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
b/arch/powerpc/kernel/syscalls/syscall.tbl
index f744eb5cba88..96b2157f0371 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -9,9 +9,7 @@
 #
 0  nospu   restart_syscall sys_restart_syscall
 1  nospu   exitsys_exit
-2  32  forkppc_fork
sys_fork
-2  64  forksys_fork
-2  spu forksys_ni_syscall
+2  nospu   forksys_fork
 3  common  readsys_read
 4  common  write   sys_write
 5  common  opensys_open
compat_sys_open
@@ -160,9 +158,7 @@
 11932  sigreturn   sys_sigreturn   
compat_sys_sigreturn
 11964  sigreturn   sys_ni_syscall
 119spu sigreturn   sys_ni_syscall
-12032  clone   ppc_clone   
sys_clone
-12064  clone   sys_clone
-120spu clone 

[PATCH v5 14/22] powerpc/syscall: implement system call entry/exit logic in C for PPC32

2021-02-08 Thread Christophe Leroy
That's port of PPC64 syscall entry/exit logic in C to PPC32.

Performancewise on 8xx:
Before : 304 cycles on null_syscall
After  : 348 cycles on null_syscall

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 224 +--
 arch/powerpc/kernel/head_32.h|  18 ---
 arch/powerpc/kernel/head_booke.h |  17 ---
 3 files changed, 30 insertions(+), 229 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 97dc28a68465..bbf7ecea6fe0 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -329,117 +329,23 @@ stack_ovf:
 _ASM_NOKPROBE_SYMBOL(stack_ovf)
 #endif
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-trace_syscall_entry_irq_off:
-   /*
-* Syscall shouldn't happen while interrupts are disabled,
-* so let's do a warning here.
-*/
-0: trap
-   EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-   bl  trace_hardirqs_on
-
-   /* Now enable for real */
-   LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
-   mtmsr   r10
-
-   REST_GPR(0, r1)
-   REST_4GPRS(3, r1)
-   REST_2GPRS(7, r1)
-   b   DoSyscall
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
.globl  transfer_to_syscall
 transfer_to_syscall:
SAVE_NVGPRS(r1)
 #ifdef CONFIG_PPC_BOOK3S_32
kuep_lock r11, r12
 #endif
-#ifdef CONFIG_TRACE_IRQFLAGS
-   andi.   r12,r9,MSR_EE
-   beq-trace_syscall_entry_irq_off
-#endif /* CONFIG_TRACE_IRQFLAGS */
 
-/*
- * Handle a system call.
- */
-   .stabs  "arch/powerpc/kernel/",N_SO,0,0,0f
-   .stabs  "entry_32.S",N_SO,0,0,0f
-0:
-
-_GLOBAL(DoSyscall)
-   stw r3,ORIG_GPR3(r1)
-   li  r12,0
-   stw r12,RESULT(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
-   /* Make sure interrupts are enabled */
-   mfmsr   r11
-   andi.   r12,r11,MSR_EE
-   /* We came in with interrupts disabled, we WARN and mark them enabled
-* for lockdep now */
-0: tweqi   r12, 0
-   EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-#endif /* CONFIG_TRACE_IRQFLAGS */
-   lwz r11,TI_FLAGS(r2)
-   andi.   r11,r11,_TIF_SYSCALL_DOTRACE
-   bne-syscall_dotrace
-syscall_dotrace_cont:
-   cmplwi  0,r0,NR_syscalls
-   lis r10,sys_call_table@h
-   ori r10,r10,sys_call_table@l
-   slwir0,r0,2
-   bge-66f
-
-   barrier_nospec_asm
-   /*
-* Prevent the load of the handler below (based on the user-passed
-* system call number) being speculatively executed until the test
-* against NR_syscalls and branch to .66f above has
-* committed.
-*/
+   /* Calling convention has r9 = orig r0, r10 = regs */
+   addir10,r1,STACK_FRAME_OVERHEAD
+   mr  r9,r0
+   stw r10,THREAD+PT_REGS(r2)
+   bl  system_call_exception
 
-   lwzxr10,r10,r0  /* Fetch system call handler [ptr] */
-   mtlrr10
-   addir9,r1,STACK_FRAME_OVERHEAD
-   PPC440EP_ERR42
-   blrl/* Call handler */
-   .globl  ret_from_syscall
 ret_from_syscall:
-#ifdef CONFIG_DEBUG_RSEQ
-   /* Check whether the syscall is issued inside a restartable sequence */
-   stw r3,GPR3(r1)
-   addir3,r1,STACK_FRAME_OVERHEAD
-   bl  rseq_syscall
-   lwz r3,GPR3(r1)
-#endif
-   mr  r6,r3
-   /* disable interrupts so current_thread_info()->flags can't change */
-   LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)  /* doesn't include MSR_EE */
-   /* Note: We don't bother telling lockdep about it */
-   mtmsr   r10
-   lwz r9,TI_FLAGS(r2)
-   li  r8,-MAX_ERRNO
-   andi.   
r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
-   bne-syscall_exit_work
-   cmplw   0,r3,r8
-   blt+syscall_exit_cont
-   lwz r11,_CCR(r1)/* Load CR */
-   neg r3,r3
-   orisr11,r11,0x1000  /* Set SO bit in CR */
-   stw r11,_CCR(r1)
-syscall_exit_cont:
-   lwz r8,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
-   /* If we are going to return from the syscall with interrupts
-* off, we trace that here. It shouldn't normally happen.
-*/
-   andi.   r10,r8,MSR_EE
-   bne+1f
-   stw r3,GPR3(r1)
-   bl  trace_hardirqs_off
-   lwz r3,GPR3(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
+   addir4,r1,STACK_FRAME_OVERHEAD
+   li  r5,0
+   bl  syscall_exit_prepare
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
/* If the process has its own DBCR0 value, load it up.  The internal
   debug mode bit tells us that dbcr0 should be loaded. */
@@ -453,12 +359,6 @@ syscall_exit_cont:
cmplwi  cr0,r5,0
bne-2f
 #endif /* CONFIG_PPC_47x */
-1:
-BEGIN_FTR_SECTION
-   lwarx   r7,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
-   stwcx.  

[PATCH v5 12/22] powerpc/syscall: Change condition to check MSR_RI

2021-02-08 Thread Christophe Leroy
In system_call_exception(), MSR_RI also needs to be checked on 8xx.
Only booke and 40x doesn't have MSR_RI.

Signed-off-by: Christophe Leroy 
---
v5: Also in interrupt exit prepare
---
 arch/powerpc/kernel/interrupt.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 1a2dec49f811..107ec39f05cb 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 
trace_hardirqs_off(); /* finish reconciling */
 
-   if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+   if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
BUG_ON(!FULL_REGS(regs));
@@ -338,7 +338,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
pt_regs *regs, unsigned
unsigned long flags;
unsigned long ret = 0;
 
-   if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+   if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
BUG_ON(!FULL_REGS(regs));
@@ -436,7 +436,8 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct 
pt_regs *regs, unsign
unsigned long amr;
 #endif
 
-   if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
+   if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
+   unlikely(!(regs->msr & MSR_RI)))
unrecoverable_exception(regs);
BUG_ON(regs->msr & MSR_PR);
BUG_ON(!FULL_REGS(regs));
-- 
2.25.0



[PATCH v5 09/22] powerpc/syscall: Make interrupt.c buildable on PPC32

2021-02-08 Thread Christophe Leroy
To allow building interrupt.c on PPC32, ifdef out specific PPC64
code or use helpers which are available on both PP32 and PPC64

Modify Makefile to always build interrupt.o

Signed-off-by: Christophe Leroy 
---
v5:
- Also for interrupt exit preparation
- Opted out kuap related code, ppc32 keeps it in ASM for the time being
---
 arch/powerpc/kernel/Makefile|  4 ++--
 arch/powerpc/kernel/interrupt.c | 31 ---
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 26ff8c6e06b7..163755b1cef4 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -57,10 +57,10 @@ obj-y   := cputable.o 
syscalls.o \
   prom.o traps.o setup-common.o \
   udbg.o misc.o io.o misc_$(BITS).o \
   of_platform.o prom_parse.o firmware.o \
-  hw_breakpoint_constraints.o
+  hw_breakpoint_constraints.o interrupt.o
 obj-y  += ptrace/
 obj-$(CONFIG_PPC64)+= setup_64.o \
-  paca.o nvram_64.o note.o interrupt.o
+  paca.o nvram_64.o note.o
 obj-$(CONFIG_COMPAT)   += sys_ppc32.o signal_32.o
 obj-$(CONFIG_VDSO32)   += vdso32_wrapper.o
 obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index d6be4f9a67e5..2dac4d2bb1cf 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
BUG_ON(!FULL_REGS(regs));
-   BUG_ON(regs->softe != IRQS_ENABLED);
+   BUG_ON(arch_irq_disabled_regs(regs));
 
 #ifdef CONFIG_PPC_PKEY
if (mmu_has_feature(MMU_FTR_PKEY)) {
@@ -65,7 +65,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
isync();
} else
 #endif
+#ifdef CONFIG_PPC64
kuap_check_amr();
+#endif
 
account_cpu_user_entry();
 
@@ -77,7 +79,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 * frame, or if the unwinder was taught the first stack frame always
 * returns to user with IRQS_ENABLED, this store could be avoided!
 */
-   regs->softe = IRQS_ENABLED;
+   irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
 
local_irq_enable();
 
@@ -151,6 +153,7 @@ static notrace inline bool __prep_irq_for_enabled_exit(bool 
clear_ri)
__hard_EE_RI_disable();
else
__hard_irq_disable();
+#ifdef CONFIG_PPC64
if (unlikely(lazy_irq_pending_nocheck())) {
/* Took an interrupt, may have more exit work to do. */
if (clear_ri)
@@ -162,7 +165,7 @@ static notrace inline bool __prep_irq_for_enabled_exit(bool 
clear_ri)
}
local_paca->irq_happened = 0;
irq_soft_mask_set(IRQS_ENABLED);
-
+#endif
return true;
 }
 
@@ -216,7 +219,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 
CT_WARN_ON(ct_state() == CONTEXT_USER);
 
+#ifdef CONFIG_PPC64
kuap_check_amr();
+#endif
 
regs->result = r3;
 
@@ -309,7 +314,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 
account_cpu_user_exit();
 
-#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
+#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */
/*
 * We do this at the end so that we do context switch with KERNEL AMR
 */
@@ -318,7 +323,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
return ret;
 }
 
-#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
+#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, 
unsigned long msr)
 {
 #ifdef CONFIG_PPC_BOOK3E
@@ -333,14 +338,16 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
pt_regs *regs, unsigned
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
BUG_ON(!FULL_REGS(regs));
-   BUG_ON(regs->softe != IRQS_ENABLED);
+   BUG_ON(arch_irq_disabled_regs(regs));
CT_WARN_ON(ct_state() == CONTEXT_USER);
 
/*
 * We don't need to restore AMR on the way back to userspace for KUAP.
 * AMR can only have been unlocked if we interrupted the kernel.
 */
+#ifdef CONFIG_PPC64
kuap_check_amr();
+#endif
 
local_irq_save(flags);
 
@@ -407,7 +414,9 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
pt_regs *regs, unsigned
/*
 * We do this at the end so that we do context switch with KERNEL AMR
 */
+#ifdef CONFIG_PPC64
 

[PATCH v5 10/22] powerpc/syscall: Use is_compat_task()

2021-02-08 Thread Christophe Leroy
Instead of hard comparing task flags with _TIF_32BIT, use
is_compat_task(). The advantage is that it returns 0 on PPC32
allthough _TIF_32BIT is always set.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/interrupt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 2dac4d2bb1cf..46fd195ca659 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -2,6 +2,8 @@
 
 #include 
 #include 
+#include 
+
 #include 
 #include 
 #include 
@@ -118,7 +120,7 @@ notrace long system_call_exception(long r3, long r4, long 
r5,
/* May be faster to do array_index_nospec? */
barrier_nospec();
 
-   if (unlikely(is_32bit_task())) {
+   if (unlikely(is_compat_task())) {
f = (void *)compat_sys_call_table[r0];
 
r3 &= 0xULL;
-- 
2.25.0



[PATCH v5 07/22] powerpc/irq: Add stub irq_soft_mask_return() for PPC32

2021-02-08 Thread Christophe Leroy
To allow building syscall_64.c smoothly on PPC32, add stub version
of irq_soft_mask_return().

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/hw_irq.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 4739f61e632c..56a98936a6a9 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -330,6 +330,11 @@ static inline void irq_soft_mask_regs_set_state(struct 
pt_regs *regs, unsigned l
 }
 #else /* CONFIG_PPC64 */
 
+static inline notrace unsigned long irq_soft_mask_return(void)
+{
+   return 0;
+}
+
 static inline unsigned long arch_local_save_flags(void)
 {
return mfmsr();
-- 
2.25.0



[PATCH v5 11/22] powerpc/syscall: Save r3 in regs->orig_r3

2021-02-08 Thread Christophe Leroy
Save r3 in regs->orig_r3 in system_call_exception()

Signed-off-by: Christophe Leroy 
---
v5: Removed the assembly one on SCV type system call
---
 arch/powerpc/kernel/entry_64.S  | 2 --
 arch/powerpc/kernel/interrupt.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 33ddfeef4fe9..a91c2def165d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -108,7 +108,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
li  r11,\trapnr
std r11,_TRAP(r1)
std r12,_CCR(r1)
-   std r3,ORIG_GPR3(r1)
addir10,r1,STACK_FRAME_OVERHEAD
ld  r11,exception_marker@toc(r2)
std r11,-16(r10)/* "regshere" marker */
@@ -278,7 +277,6 @@ END_BTB_FLUSH_SECTION
std r10,_LINK(r1)
std r11,_TRAP(r1)
std r12,_CCR(r1)
-   std r3,ORIG_GPR3(r1)
addir10,r1,STACK_FRAME_OVERHEAD
ld  r11,exception_marker@toc(r2)
std r11,-16(r10)/* "regshere" marker */
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 46fd195ca659..1a2dec49f811 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -29,6 +29,8 @@ notrace long system_call_exception(long r3, long r4, long r5,
 {
syscall_fn f;
 
+   regs->orig_gpr3 = r3;
+
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
 
-- 
2.25.0



[PATCH v5 08/22] powerpc/syscall: Rename syscall_64.c into interrupt.c

2021-02-08 Thread Christophe Leroy
syscall_64.c will be reused almost as is for PPC32.

As this file also contains functions to handle other types
of interrupts rename it interrupt.c

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/Makefile  | 2 +-
 arch/powerpc/kernel/{syscall_64.c => interrupt.c} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename arch/powerpc/kernel/{syscall_64.c => interrupt.c} (100%)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index c173efd66c00..26ff8c6e06b7 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -60,7 +60,7 @@ obj-y := cputable.o syscalls.o \
   hw_breakpoint_constraints.o
 obj-y  += ptrace/
 obj-$(CONFIG_PPC64)+= setup_64.o \
-  paca.o nvram_64.o note.o syscall_64.o
+  paca.o nvram_64.o note.o interrupt.o
 obj-$(CONFIG_COMPAT)   += sys_ppc32.o signal_32.o
 obj-$(CONFIG_VDSO32)   += vdso32_wrapper.o
 obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/interrupt.c
similarity index 100%
rename from arch/powerpc/kernel/syscall_64.c
rename to arch/powerpc/kernel/interrupt.c
-- 
2.25.0



[PATCH v5 06/22] powerpc/irq: Rework helpers that manipulate MSR[EE/RI]

2021-02-08 Thread Christophe Leroy
In preparation of porting PPC32 to C syscall entry/exit,
rewrite the following helpers as static inline functions and
add support for PPC32 in them:
__hard_irq_enable()
__hard_irq_disable()
__hard_EE_RI_disable()
__hard_RI_enable()

Then use them in PPC32 version of arch_local_irq_disable()
and arch_local_irq_enable() to avoid code duplication.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/hw_irq.h | 75 +--
 arch/powerpc/include/asm/reg.h|  1 +
 2 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index ed0c3b049dfd..4739f61e632c 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -50,6 +50,55 @@
 
 #ifndef __ASSEMBLY__
 
+static inline void __hard_irq_enable(void)
+{
+   if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
+   wrtee(MSR_EE);
+   else if (IS_ENABLED(CONFIG_PPC_8xx))
+   wrtspr(SPRN_EIE);
+   else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+   __mtmsrd(MSR_EE | MSR_RI, 1);
+   else
+   mtmsr(mfmsr() | MSR_EE);
+}
+
+static inline void __hard_irq_disable(void)
+{
+   if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
+   wrtee(0);
+   else if (IS_ENABLED(CONFIG_PPC_8xx))
+   wrtspr(SPRN_EID);
+   else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+   __mtmsrd(MSR_RI, 1);
+   else
+   mtmsr(mfmsr() & ~MSR_EE);
+}
+
+static inline void __hard_EE_RI_disable(void)
+{
+   if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
+   wrtee(0);
+   else if (IS_ENABLED(CONFIG_PPC_8xx))
+   wrtspr(SPRN_NRI);
+   else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+   __mtmsrd(0, 1);
+   else
+   mtmsr(mfmsr() & ~(MSR_EE | MSR_RI));
+}
+
+static inline void __hard_RI_enable(void)
+{
+   if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x))
+   return;
+
+   if (IS_ENABLED(CONFIG_PPC_8xx))
+   wrtspr(SPRN_EID);
+   else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+   __mtmsrd(MSR_RI, 1);
+   else
+   mtmsr(mfmsr() | MSR_RI);
+}
+
 #ifdef CONFIG_PPC64
 #include 
 
@@ -212,18 +261,6 @@ static inline bool arch_irqs_disabled(void)
 
 #endif /* CONFIG_PPC_BOOK3S */
 
-#ifdef CONFIG_PPC_BOOK3E
-#define __hard_irq_enable()wrtee(MSR_EE)
-#define __hard_irq_disable()   wrtee(0)
-#define __hard_EE_RI_disable() wrtee(0)
-#define __hard_RI_enable() do { } while (0)
-#else
-#define __hard_irq_enable()__mtmsrd(MSR_EE|MSR_RI, 1)
-#define __hard_irq_disable()   __mtmsrd(MSR_RI, 1)
-#define __hard_EE_RI_disable() __mtmsrd(0, 1)
-#define __hard_RI_enable() __mtmsrd(MSR_RI, 1)
-#endif
-
 #define hard_irq_disable() do {\
unsigned long flags;\
__hard_irq_disable();   \
@@ -322,22 +359,12 @@ static inline unsigned long arch_local_irq_save(void)
 
 static inline void arch_local_irq_disable(void)
 {
-   if (IS_ENABLED(CONFIG_BOOKE))
-   wrtee(0);
-   else if (IS_ENABLED(CONFIG_PPC_8xx))
-   wrtspr(SPRN_EID);
-   else
-   mtmsr(mfmsr() & ~MSR_EE);
+   __hard_irq_disable();
 }
 
 static inline void arch_local_irq_enable(void)
 {
-   if (IS_ENABLED(CONFIG_BOOKE))
-   wrtee(MSR_EE);
-   else if (IS_ENABLED(CONFIG_PPC_8xx))
-   wrtspr(SPRN_EIE);
-   else
-   mtmsr(mfmsr() | MSR_EE);
+   __hard_irq_enable();
 }
 
 static inline bool arch_irqs_disabled_flags(unsigned long flags)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c5a3e856191c..bc4305ba00d0 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1375,6 +1375,7 @@
 #define mtmsr(v)   asm volatile("mtmsr %0" : \
 : "r" ((unsigned long)(v)) \
 : "memory")
+#define __mtmsrd(v, l) BUILD_BUG()
 #define __MTMSR"mtmsr"
 #endif
 
-- 
2.25.0



[PATCH v5 02/22] powerpc/32: Always enable data translation on syscall entry

2021-02-08 Thread Christophe Leroy
If the code can use a stack in vm area, it can also use a
stack in linear space.

Simplify code by removing old non VMAP stack code on PPC32 in syscall.

That means the data translation is now re-enabled early in
syscall entry in all cases, not only when using VMAP stacks.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h| 23 +--
 arch/powerpc/kernel/head_booke.h |  2 --
 2 files changed, 1 insertion(+), 24 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index a2f72c966baf..fdc07beab844 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -118,7 +118,6 @@
 .macro SYSCALL_ENTRY trapno
mfspr   r12,SPRN_SPRG_THREAD
mfspr   r9, SPRN_SRR1
-#ifdef CONFIG_VMAP_STACK
mfspr   r11, SPRN_SRR0
mtctr   r11
andi.   r11, r9, MSR_PR
@@ -126,30 +125,16 @@
lwz r1,TASK_STACK-THREAD(r12)
beq-99f
addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
-   li  r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+   LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take 
DTLB miss */
mtmsr   r10
isync
tovirt(r12, r12)
stw r11,GPR1(r1)
stw r11,0(r1)
mr  r11, r1
-#else
-   andi.   r11, r9, MSR_PR
-   lwz r11,TASK_STACK-THREAD(r12)
-   beq-99f
-   addir11, r11, THREAD_SIZE - INT_FRAME_SIZE
-   tophys(r11, r11)
-   stw r1,GPR1(r11)
-   stw r1,0(r11)
-   tovirt(r1, r11) /* set new kernel sp */
-#endif
mflrr10
stw r10, _LINK(r11)
-#ifdef CONFIG_VMAP_STACK
mfctr   r10
-#else
-   mfspr   r10,SPRN_SRR0
-#endif
stw r10,_NIP(r11)
mfcrr10
rlwinm  r10,r10,0,4,2   /* Clear SO bit in CR */
@@ -157,11 +142,7 @@
 #ifdef CONFIG_40x
rlwinm  r9,r9,0,14,12   /* clear MSR_WE (necessary?) */
 #else
-#ifdef CONFIG_VMAP_STACK
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */
-#else
-   LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take 
exceptions */
-#endif
mtmsr   r10 /* (except for mach check in rtas) */
 #endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -190,7 +171,6 @@
li  r12,-1  /* clear all pending debug events */
mtspr   SPRN_DBSR,r12
lis r11,global_dbcr0@ha
-   tophys(r11,r11)
addir11,r11,global_dbcr0@l
lwz r12,0(r11)
mtspr   SPRN_DBCR0,r12
@@ -200,7 +180,6 @@
 #endif
 
 3:
-   tovirt_novmstack r2, r2 /* set r2 to current */
lis r11, transfer_to_syscall@h
ori r11, r11, transfer_to_syscall@l
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index bf33af714d11..706cd9368992 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -144,7 +144,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
li  r12,-1  /* clear all pending debug events */
mtspr   SPRN_DBSR,r12
lis r11,global_dbcr0@ha
-   tophys(r11,r11)
addir11,r11,global_dbcr0@l
 #ifdef CONFIG_SMP
lwz r10, TASK_CPU(r2)
@@ -158,7 +157,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
stw r12,4(r11)
 
 3:
-   tovirt(r2, r2)  /* set r2 to current */
lis r11, transfer_to_syscall@h
ori r11, r11, transfer_to_syscall@l
 #ifdef CONFIG_TRACE_IRQFLAGS
-- 
2.25.0



[PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe

2021-02-08 Thread Christophe Leroy
regs->softe doesn't exist on PPC32.

Add irq_soft_mask_regs_set_state() helper to set regs->softe.
This helper will void on PPC32.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/hw_irq.h | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 614957f74cee..ed0c3b049dfd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -38,6 +38,8 @@
 #define PACA_IRQ_MUST_HARD_MASK(PACA_IRQ_EE)
 #endif
 
+#endif /* CONFIG_PPC64 */
+
 /*
  * flags for paca->irq_soft_mask
  */
@@ -46,8 +48,6 @@
 #define IRQS_PMI_DISABLED  2
 #define IRQS_ALL_DISABLED  (IRQS_DISABLED | IRQS_PMI_DISABLED)
 
-#endif /* CONFIG_PPC64 */
-
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_PPC64
@@ -287,6 +287,10 @@ extern void irq_set_pending_from_srr1(unsigned long srr1);
 
 extern void force_external_irq_replay(void);
 
+static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned 
long val)
+{
+   regs->softe = val;
+}
 #else /* CONFIG_PPC64 */
 
 static inline unsigned long arch_local_save_flags(void)
@@ -355,6 +359,9 @@ static inline bool arch_irq_disabled_regs(struct pt_regs 
*regs)
 
 static inline void may_hard_irq_enable(void) { }
 
+static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned 
long val)
+{
+}
 #endif /* CONFIG_PPC64 */
 
 #define ARCH_IRQ_INIT_FLAGSIRQ_NOREQUEST
-- 
2.25.0



[PATCH v5 00/22] powerpc/32: Implement C syscall entry/exit

2021-02-08 Thread Christophe Leroy
This series implements C syscall entry/exit for PPC32. It reuses
the work already done for PPC64.

This series is based on today's merge-test 
(b6f72fc05389e3fc694bf5a5fa1bbd33f61879e0)

In terms on performance we have the following number of cycles on an
8xx running null_syscall benchmark:
- mainline: 296 cycles
- after patch 4: 283 cycles
- after patch 16: 304 cycles
- after patch 17: 348 cycles
- at the end of the series: 320 cycles

So in summary, we have a degradation of performance of 8% on null_syscall.

I think it is not a big degradation, it is worth it.

v4 was the first mature version.

v5:
- Comments from Nick
- Converted booke DBCR0 handling in C
- Removed convertion of KUAP restore in C (will be done as part of interrupt 
entry/exit porting to C)
- Added a few more changes in preparatory patches to prepare for interrupt 
entry/exit in C which will follow

Christophe Leroy (22):
  powerpc/32s: Add missing call to kuep_lock on syscall entry
  powerpc/32: Always enable data translation on syscall entry
  powerpc/32: On syscall entry, enable instruction translation at the
same time as data
  powerpc/32: Reorder instructions to avoid using CTR in syscall entry
  powerpc/irq: Add helper to set regs->softe
  powerpc/irq: Rework helpers that manipulate MSR[EE/RI]
  powerpc/irq: Add stub irq_soft_mask_return() for PPC32
  powerpc/syscall: Rename syscall_64.c into interrupt.c
  powerpc/syscall: Make interrupt.c buildable on PPC32
  powerpc/syscall: Use is_compat_task()
  powerpc/syscall: Save r3 in regs->orig_r3
  powerpc/syscall: Change condition to check MSR_RI
  powerpc/32: Always save non volatile GPRs at syscall entry
  powerpc/syscall: implement system call entry/exit logic in C for PPC32
  powerpc/32: Remove verification of MSR_PR on syscall in the ASM entry
  powerpc/syscall: Avoid stack frame in likely part of
system_call_exception()
  powerpc/syscall: Do not check unsupported scv vector on PPC32
  powerpc/syscall: Remove FULL_REGS verification in
system_call_exception
  powerpc/syscall: Optimise checks in beginning of
system_call_exception()
  powerpc/syscall: Avoid storing 'current' in another pointer
  powerpc/32: Remove the counter in global_dbcr0
  powerpc/32: Handle bookE debugging in C in syscall entry/exit

 arch/powerpc/include/asm/hw_irq.h |  91 +++--
 arch/powerpc/include/asm/ptrace.h |   5 +
 arch/powerpc/include/asm/reg.h|   1 +
 arch/powerpc/include/asm/reg_booke.h  |   3 +
 arch/powerpc/kernel/Makefile  |   4 +-
 arch/powerpc/kernel/entry_32.S| 321 ++
 arch/powerpc/kernel/entry_64.S|   2 -
 arch/powerpc/kernel/head_32.h |  96 +-
 arch/powerpc/kernel/head_booke.h  |  51 +--
 .../kernel/{syscall_64.c => interrupt.c}  | 120 +--
 arch/powerpc/kernel/syscalls/syscall.tbl  |  20 +-
 11 files changed, 218 insertions(+), 496 deletions(-)
 rename arch/powerpc/kernel/{syscall_64.c => interrupt.c} (80%)

-- 
2.25.0



[PATCH v5 04/22] powerpc/32: Reorder instructions to avoid using CTR in syscall entry

2021-02-08 Thread Christophe Leroy
Now that we are using rfi instead of mtmsr to reactivate MMU, it is
possible to reorder instructions and avoid the need to use CTR for
stashing SRR0.

null_syscall on 8xx is reduced by 3 cycles (283 => 280 cycles).

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4029c51dce5d..24dc326e0d56 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -116,30 +116,28 @@
 .endm
 
 .macro SYSCALL_ENTRY trapno
-   mfspr   r12,SPRN_SPRG_THREAD
mfspr   r9, SPRN_SRR1
-   mfspr   r11, SPRN_SRR0
-   mtctr   r11
+   mfspr   r10, SPRN_SRR0
andi.   r11, r9, MSR_PR
+   beq-99f
+   LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions 
*/
+   lis r12, 1f@h
+   ori r12, r12, 1f@l
+   mtspr   SPRN_SRR1, r11
+   mtspr   SPRN_SRR0, r12
+   mfspr   r12,SPRN_SPRG_THREAD
mr  r11, r1
lwz r1,TASK_STACK-THREAD(r12)
-   beq-99f
+   tovirt(r12, r12)
addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
-   LOAD_REG_IMMEDIATE(r10, MSR_KERNEL) /* can take exceptions 
*/
-   mtspr   SPRN_SRR1, r10
-   lis r10, 1f@h
-   ori r10, r10, 1f@l
-   mtspr   SPRN_SRR0, r10
rfi
 1:
-   tovirt(r12, r12)
stw r11,GPR1(r1)
stw r11,0(r1)
mr  r11, r1
+   stw r10,_NIP(r11)
mflrr10
stw r10, _LINK(r11)
-   mfctr   r10
-   stw r10,_NIP(r11)
mfcrr10
rlwinm  r10,r10,0,4,2   /* Clear SO bit in CR */
stw r10,_CCR(r11)   /* save registers */
-- 
2.25.0



[PATCH v5 03/22] powerpc/32: On syscall entry, enable instruction translation at the same time as data

2021-02-08 Thread Christophe Leroy
On 40x and 8xx, kernel text is pinned.
On book3s/32, kernel text is mapped by BATs.

Enable instruction translation at the same time as data translation, it
makes things simpler.

MSR_RI can also be set at the same time because srr0/srr1 are already
saved and r1 is set properly.

On booke, translation is always on, so at the end all PPC32
have translation on early.

This reduces null_syscall benchmark by 13 cycles on 8xx
(296 ==> 283 cycles).

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h| 26 +-
 arch/powerpc/kernel/head_booke.h |  7 ++-
 2 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index fdc07beab844..4029c51dce5d 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -125,9 +125,13 @@
lwz r1,TASK_STACK-THREAD(r12)
beq-99f
addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
-   LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take 
DTLB miss */
-   mtmsr   r10
-   isync
+   LOAD_REG_IMMEDIATE(r10, MSR_KERNEL) /* can take exceptions 
*/
+   mtspr   SPRN_SRR1, r10
+   lis r10, 1f@h
+   ori r10, r10, 1f@l
+   mtspr   SPRN_SRR0, r10
+   rfi
+1:
tovirt(r12, r12)
stw r11,GPR1(r1)
stw r11,0(r1)
@@ -141,9 +145,6 @@
stw r10,_CCR(r11)   /* save registers */
 #ifdef CONFIG_40x
rlwinm  r9,r9,0,14,12   /* clear MSR_WE (necessary?) */
-#else
-   LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */
-   mtmsr   r10 /* (except for mach check in rtas) */
 #endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
stw r2,GPR2(r11)
@@ -180,8 +181,6 @@
 #endif
 
 3:
-   lis r11, transfer_to_syscall@h
-   ori r11, r11, transfer_to_syscall@l
 #ifdef CONFIG_TRACE_IRQFLAGS
/*
 * If MSR is changing we need to keep interrupts disabled at this point
@@ -193,15 +192,8 @@
 #else
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
 #endif
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-   mtspr   SPRN_NRI, r0
-#endif
-   mtspr   SPRN_SRR1,r10
-   mtspr   SPRN_SRR0,r11
-   rfi /* jump to handler, enable MMU */
-#ifdef CONFIG_40x
-   b . /* Prevent prefetch past rfi */
-#endif
+   mtmsr   r10
+   b   transfer_to_syscall /* jump to handler */
 99:b   ret_from_kernel_syscall
 .endm
 
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 706cd9368992..b3c502c503a0 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -157,8 +157,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
stw r12,4(r11)
 
 3:
-   lis r11, transfer_to_syscall@h
-   ori r11, r11, transfer_to_syscall@l
 #ifdef CONFIG_TRACE_IRQFLAGS
/*
 * If MSR is changing we need to keep interrupts disabled at this point
@@ -172,9 +170,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
lis r10, (MSR_KERNEL | MSR_EE)@h
ori r10, r10, (MSR_KERNEL | MSR_EE)@l
 #endif
-   mtspr   SPRN_SRR1,r10
-   mtspr   SPRN_SRR0,r11
-   rfi /* jump to handler, enable MMU */
+   mtmsr   r10
+   b   transfer_to_syscall /* jump to handler */
 99:b   ret_from_kernel_syscall
 .endm
 
-- 
2.25.0



[PATCH v5 01/22] powerpc/32s: Add missing call to kuep_lock on syscall entry

2021-02-08 Thread Christophe Leroy
Userspace Execution protection and fast syscall entry were implemented
independently from each other and were both merged in kernel 5.2,
leading to syscall entry missing userspace execution protection.

On syscall entry, execution of user space memory must be
locked in the same way as on exception entry.

Fixes: b86fb88855ea ("powerpc/32: implement fast entry for syscalls on non 
BOOKE")
Cc: sta...@vger.kernel.org
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index b102b40c4988..b1e36602c013 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -351,6 +351,9 @@ trace_syscall_entry_irq_off:
 
.globl  transfer_to_syscall
 transfer_to_syscall:
+#ifdef CONFIG_PPC_BOOK3S_32
+   kuep_lock r11, r12
+#endif
 #ifdef CONFIG_TRACE_IRQFLAGS
andi.   r12,r9,MSR_EE
beq-trace_syscall_entry_irq_off
-- 
2.25.0



[PATCH 2/2] powerpc/uaccess: Move might_fault() into user_access_begin()

2021-02-08 Thread Michael Ellerman
We have a might_fault() check in __unsafe_put_user_goto(), but that is
dangerous as it potentially calls lots of code while user access is
enabled.

It also triggers the check Alexey added to the irq restore path to
catch cases like that:

  WARNING: CPU: 30 PID: 1 at arch/powerpc/include/asm/book3s/64/kup.h:324 
arch_local_irq_restore+0x160/0x190
  NIP arch_local_irq_restore+0x160/0x190
  LR  lock_is_held_type+0x140/0x200
  Call Trace:
0xc0007f392ff8 (unreliable)
___might_sleep+0x180/0x320
__might_fault+0x50/0xe0
filldir64+0x2d0/0x5d0
call_filldir+0xc8/0x180
ext4_readdir+0x948/0xb40
iterate_dir+0x1ec/0x240
sys_getdents64+0x80/0x290
system_call_exception+0x160/0x280
system_call_common+0xf0/0x27c

So remove the might fault check from unsafe_put_user().

Any call to unsafe_put_user() must be inside a region that's had user
access enabled with user_access_begin(), so move the might_fault() in
there. That also allows us to drop the is_kernel_addr() test, because
there should be no code using user_access_begin() in order to access a
kernel address.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/uaccess.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index 70347ee34c94..71640eca7341 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -214,8 +214,6 @@ do {
\
 #define __unsafe_put_user_goto(x, ptr, size, label)\
 do {   \
__typeof__(*(ptr)) __user *__pu_addr = (ptr);   \
-   if (!is_kernel_addr((unsigned long)__pu_addr))  \
-   might_fault();  \
__chk_user_ptr(ptr);\
__put_user_size_goto((x), __pu_addr, (size), label);\
 } while (0)
@@ -494,6 +492,8 @@ extern void memcpy_page_flushcache(char *to, struct page 
*page, size_t offset,
 
 static __must_check inline bool user_access_begin(const void __user *ptr, 
size_t len)
 {
+   might_fault();
+
if (unlikely(!access_ok(ptr, len)))
return false;
allow_read_write_user((void __user *)ptr, ptr, len);
-- 
2.25.1



[PATCH 1/2] powerpc/uaccess: Simplify unsafe_put_user() implementation

2021-02-08 Thread Michael Ellerman
Currently unsafe_put_user() expands to __put_user_goto(), which
expands to __put_user_nocheck_goto().

There are no other uses of __put_user_nocheck_goto(), and although
there are some other uses of __put_user_goto() those could just use
unsafe_put_user().

Every layer of indirection introduces the possibility that some code
is calling that layer, and makes keeping track of the required
semantics at each point more complicated.

So drop __put_user_goto(), and rename __put_user_nocheck_goto() to
__unsafe_put_user_goto(). The "nocheck" is implied by "unsafe".

Replace the few uses of __put_user_goto() with unsafe_put_user().

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/uaccess.h | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index efa4be2fa951..70347ee34c94 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -52,8 +52,6 @@ static inline bool __access_ok(unsigned long addr, unsigned 
long size)
__get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
 #define __put_user(x, ptr) \
__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-#define __put_user_goto(x, ptr, label) \
-   __put_user_nocheck_goto((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), 
label)
 
 #define __get_user_allowed(x, ptr) \
__get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
@@ -213,7 +211,7 @@ do {
\
}   \
 } while (0)
 
-#define __put_user_nocheck_goto(x, ptr, size, label)   \
+#define __unsafe_put_user_goto(x, ptr, size, label)\
 do {   \
__typeof__(*(ptr)) __user *__pu_addr = (ptr);   \
if (!is_kernel_addr((unsigned long)__pu_addr))  \
@@ -530,7 +528,8 @@ user_write_access_begin(const void __user *ptr, size_t len)
 
 #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
 #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
-#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
+#define unsafe_put_user(x, p, e) \
+   __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e)
 
 #define unsafe_copy_from_user(d, s, l, e) \
unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
@@ -543,17 +542,17 @@ do {  
\
int _i; \
\
for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) 
\
-   __put_user_goto(*(long*)(_src + _i), (long __user *)(_dst + 
_i), e);\
+   unsafe_put_user(*(long*)(_src + _i), (long __user *)(_dst + 
_i), e); \
if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) {   \
-   __put_user_goto(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), 
e);  \
+   unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), 
e); \
_i += 4;\
}   \
if (_len & 2) { \
-   __put_user_goto(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), 
e);  \
+   unsafe_put_user(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), 
e); \
_i += 2;\
}   \
if (_len & 1) \
-   __put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), 
e);\
+   unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), 
e); \
 } while (0)
 
 #define HAVE_GET_KERNEL_NOFAULT
-- 
2.25.1



Re: [PATCH v7 07/42] powerpc/fsl_booke/32: CacheLockingException remove args

2021-02-08 Thread Michael Ellerman
Gautham R Shenoy  writes:
> On Sat, Jan 30, 2021 at 11:08:17PM +1000, Nicholas Piggin wrote:
>> Like other interrupt handler conversions, switch to getting registers
>> from the pt_regs argument.
>> 
>> Signed-off-by: Nicholas Piggin 
>> ---
>>  arch/powerpc/kernel/head_fsl_booke.S | 6 +++---
>>  arch/powerpc/kernel/traps.c  | 5 +++--
>>  2 files changed, 6 insertions(+), 5 deletions(-)
>> 
>> diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
>> b/arch/powerpc/kernel/head_fsl_booke.S
>> index fdd4d274c245..0d4d9a6fcca1 100644
>> --- a/arch/powerpc/kernel/head_fsl_booke.S
>> +++ b/arch/powerpc/kernel/head_fsl_booke.S
>> @@ -364,12 +364,12 @@ interrupt_base:
>>  /* Data Storage Interrupt */
>>  START_EXCEPTION(DataStorage)
>>  NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
>> -mfspr   r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
>> +mfspr   r5,SPRN_ESR /* Grab the ESR, save it3 */
>  ^^
> Sorry for the nitpick.. Should be "/* Grab the ESR, save it */"

Thanks, I fixed it up.

cheers


Re: module loader dead code removal and cleanups v3

2021-02-08 Thread Jessica Yu

+++ Christoph Hellwig [02/02/21 13:13 +0100]:

Hi all,

this series removes support for long term unused export types and
cleans up various loose ends in the module loader.

Changes since v2:
- clean up klp_find_object_symbol a bit
- remove the now unused module_assert_mutex helper

Changes since v1:
- move struct symsearch to module.c
- rework drm to not call find_module at all
- allow RCU-sched locking for find_module
- keep find_module as a public API instead of module_loaded
- update a few comments and commit logs

Diffstat:


Queued on modules-next (along with the updated patch 10).

Thanks everyone,

Jessica


Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-08 Thread Ravi Bangoria




On 2/4/21 6:38 PM, Naveen N. Rao wrote:

On 2021/02/04 04:17PM, Ravi Bangoria wrote:

Don't allow Uprobe on 2nd word of a prefixed instruction. As per
ISA 3.1, prefixed instruction should not cross 64-byte boundary.
So don't allow Uprobe on such prefixed instruction as well.

There are two ways probed instruction is changed in mapped pages.
First, when Uprobe is activated, it searches for all the relevant
pages and replace instruction in them. In this case, if we notice
that probe is on the 2nd word of prefixed instruction, error out
directly. Second, when Uprobe is already active and user maps a
relevant page via mmap(), instruction is replaced via mmap() code
path. But because Uprobe is invalid, entire mmap() operation can
not be stopped. In this case just print an error and continue.

Signed-off-by: Ravi Bangoria 
---
v1: http://lore.kernel.org/r/20210119091234.76317-1-ravi.bango...@linux.ibm.com
v1->v2:
   - Instead of introducing new arch hook from verify_opcode(), use
 existing hook arch_uprobe_analyze_insn().
   - Add explicit check for prefixed instruction crossing 64-byte
 boundary. If probe is on such instruction, throw an error.

  arch/powerpc/kernel/uprobes.c | 66 ++-
  1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index e8a63713e655..485d19a2a31f 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -7,6 +7,7 @@
   * Adapted from the x86 port by Ananth N Mavinakayanahalli 
   */
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -28,6 +29,69 @@ bool is_trap_insn(uprobe_opcode_t *insn)
return (is_trap(*insn));
  }
  
+#ifdef CONFIG_PPC64

+static int get_instr(struct mm_struct *mm, unsigned long addr, u32 *instr)
+{
+   struct page *page;
+   struct vm_area_struct *vma;
+   void *kaddr;
+   unsigned int gup_flags = FOLL_FORCE | FOLL_SPLIT_PMD;
+
+   if (get_user_pages_remote(mm, addr, 1, gup_flags, , , NULL) <= 
0)
+   return -EINVAL;
+
+   kaddr = kmap_atomic(page);
+   *instr = *((u32 *)(kaddr + (addr & ~PAGE_MASK)));
+   kunmap_atomic(kaddr);
+   put_page(page);
+   return 0;
+}
+
+static int validate_prefixed_instr(struct mm_struct *mm, unsigned long addr)
+{
+   struct ppc_inst inst;
+   u32 prefix, suffix;
+
+   /*
+* No need to check if addr is pointing to beginning of the
+* page. Even if probe is on a suffix of page-unaligned
+* prefixed instruction, hw will raise exception and kernel
+* will send SIGBUS.
+*/
+   if (!(addr & ~PAGE_MASK))
+   return 0;
+
+   if (get_instr(mm, addr, ) < 0)
+   return -EINVAL;
+   if (get_instr(mm, addr + 4, ) < 0)
+   return -EINVAL;
+
+   inst = ppc_inst_prefix(prefix, suffix);
+   if (ppc_inst_prefixed(inst) && (addr & 0x3F) == 0x3C) {
+   printk_ratelimited("Cannot register a uprobe on 64 byte "

^^ pr_info_ratelimited()

It should be sufficient to check the primary opcode to determine if it
is a prefixed instruction. You don't have to read the suffix. I see that
we don't have a helper to do this currently, so you could do:

if (ppc_inst_primary_opcode(ppc_inst(prefix)) == 1)


Ok.



In the future, it might be worthwhile to add IS_PREFIX() as a macro
similar to IS_MTMSRD() if there are more such uses.

Along with this, if you also add the below to the start of this
function, you can get rid of the #ifdef:

if (!IS_ENABLED(CONFIG_PPC64))
return 0;


Yeah this is better.

Thanks for the review, Naveen!
Ravi


Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-08 Thread Ravi Bangoria




On 2/4/21 9:42 PM, Naveen N. Rao wrote:

On 2021/02/04 06:38PM, Naveen N. Rao wrote:

On 2021/02/04 04:17PM, Ravi Bangoria wrote:

Don't allow Uprobe on 2nd word of a prefixed instruction. As per
ISA 3.1, prefixed instruction should not cross 64-byte boundary.
So don't allow Uprobe on such prefixed instruction as well.

There are two ways probed instruction is changed in mapped pages.
First, when Uprobe is activated, it searches for all the relevant
pages and replace instruction in them. In this case, if we notice
that probe is on the 2nd word of prefixed instruction, error out
directly. Second, when Uprobe is already active and user maps a
relevant page via mmap(), instruction is replaced via mmap() code
path. But because Uprobe is invalid, entire mmap() operation can
not be stopped. In this case just print an error and continue.

Signed-off-by: Ravi Bangoria 
---
v1: http://lore.kernel.org/r/20210119091234.76317-1-ravi.bango...@linux.ibm.com
v1->v2:
   - Instead of introducing new arch hook from verify_opcode(), use
 existing hook arch_uprobe_analyze_insn().
   - Add explicit check for prefixed instruction crossing 64-byte
 boundary. If probe is on such instruction, throw an error.

  arch/powerpc/kernel/uprobes.c | 66 ++-
  1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index e8a63713e655..485d19a2a31f 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -7,6 +7,7 @@
   * Adapted from the x86 port by Ananth N Mavinakayanahalli 
   */
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -28,6 +29,69 @@ bool is_trap_insn(uprobe_opcode_t *insn)
return (is_trap(*insn));
  }
  
+#ifdef CONFIG_PPC64

+static int get_instr(struct mm_struct *mm, unsigned long addr, u32 *instr)
+{
+   struct page *page;
+   struct vm_area_struct *vma;
+   void *kaddr;
+   unsigned int gup_flags = FOLL_FORCE | FOLL_SPLIT_PMD;
+
+   if (get_user_pages_remote(mm, addr, 1, gup_flags, , , NULL) <= 
0)
+   return -EINVAL;
+
+   kaddr = kmap_atomic(page);
+   *instr = *((u32 *)(kaddr + (addr & ~PAGE_MASK)));
+   kunmap_atomic(kaddr);
+   put_page(page);
+   return 0;
+}
+
+static int validate_prefixed_instr(struct mm_struct *mm, unsigned long addr)
+{
+   struct ppc_inst inst;
+   u32 prefix, suffix;
+
+   /*
+* No need to check if addr is pointing to beginning of the
+* page. Even if probe is on a suffix of page-unaligned
+* prefixed instruction, hw will raise exception and kernel
+* will send SIGBUS.
+*/
+   if (!(addr & ~PAGE_MASK))
+   return 0;
+
+   if (get_instr(mm, addr, ) < 0)
+   return -EINVAL;
+   if (get_instr(mm, addr + 4, ) < 0)
+   return -EINVAL;
+
+   inst = ppc_inst_prefix(prefix, suffix);
+   if (ppc_inst_prefixed(inst) && (addr & 0x3F) == 0x3C) {
+   printk_ratelimited("Cannot register a uprobe on 64 byte "

^^ pr_info_ratelimited()

It should be sufficient to check the primary opcode to determine if it
is a prefixed instruction. You don't have to read the suffix. I see that
we don't have a helper to do this currently, so you could do:

if (ppc_inst_primary_opcode(ppc_inst(prefix)) == 1)


Seeing the kprobes code, I realized that we have to check for another
scenario (Thanks, Jordan!). If this is the suffix of a prefix
instruction for which a uprobe has already been installed, then the
previous word will be a 'trap' instruction. You need to check if there
is a uprobe at the previous word, and if the original instruction there
was a prefix instruction.


Yes, this patch will fail to detect such scenario. I think I should
read the instruction directly from file, like what copy_insn() does.
With that, I'll get original instruction rather that 'trap'.

I'll think more along this line.

Ravi


Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-08 Thread Ravi Bangoria




On 2/4/21 6:45 PM, Naveen N. Rao wrote:

On 2021/02/04 04:19PM, Ravi Bangoria wrote:



On 2/4/21 4:17 PM, Ravi Bangoria wrote:

Don't allow Uprobe on 2nd word of a prefixed instruction. As per
ISA 3.1, prefixed instruction should not cross 64-byte boundary.
So don't allow Uprobe on such prefixed instruction as well.

There are two ways probed instruction is changed in mapped pages.
First, when Uprobe is activated, it searches for all the relevant
pages and replace instruction in them. In this case, if we notice
that probe is on the 2nd word of prefixed instruction, error out
directly. Second, when Uprobe is already active and user maps a
relevant page via mmap(), instruction is replaced via mmap() code
path. But because Uprobe is invalid, entire mmap() operation can
not be stopped. In this case just print an error and continue.


@mpe,

arch_uprobe_analyze_insn() can return early if
cpu_has_feature(CPU_FTR_ARCH_31) is not set. But that will
miss out a rare scenario of user running binary with prefixed
instruction on p10 predecessors. Please let me know if I
should add cpu_has_feature(CPU_FTR_ARCH_31) or not.


The check you are adding is very specific to prefixed instructions, so
it makes sense to add a cpu feature check for v3.1.

On older processors, those are invalid instructions like any other. The
instruction emulation infrastructure will refuse to emulate it and the
instruction will be single stepped.


Sure will add it.

Ravi


Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-08 Thread Ravi Bangoria




On 2/6/21 11:36 PM, Oleg Nesterov wrote:

On 02/04, Ravi Bangoria wrote:


+static int get_instr(struct mm_struct *mm, unsigned long addr, u32 *instr)
+{
+   struct page *page;
+   struct vm_area_struct *vma;
+   void *kaddr;
+   unsigned int gup_flags = FOLL_FORCE | FOLL_SPLIT_PMD;
+
+   if (get_user_pages_remote(mm, addr, 1, gup_flags, , , NULL) <= 
0)
+   return -EINVAL;


"vma" is not used,


Ok.


and I don't think you need FOLL_SPLIT_PMD.

Isn't it needed if the target page is hugepage?


Otherwise I can't really comment this ppc-specific change.

To be honest, I don't even understand why do we need this fix. Sure, the
breakpoint in the middle of 64-bit insn won't work, why do we care? The
user should know what does he do.


That's a valid point. This patch is to protract user from doing
invalid thing.

Though, there is one minor scenario where this patch will help. If
the original prefixed instruction is 64 byte unaligned, and say
user probes it, Uprobe infrastructure will emulate such instruction
transparently without notifying user that the instruction is
improperly aligned.


Not to mention we can't really trust get_user_pages() in that this page
can be modified by mm owner or debugger...


As Naveen pointed out, there might be existing uprobe on the prefix
and this patch will fail to detect such scenario. So I'm thinking to
read the instruction directly from file backed page (like copy_insn),
in which case I won't use get_user_pages().

Thanks Oleg for the review!

Ravi


[PATCH] selftests/powerpc: remove unneeded semicolon

2021-02-08 Thread Yang Li
Eliminate the following coccicheck warning:
./tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c:327:4-5: Unneeded
semicolon

Reported-by: Abaci Robot 
Signed-off-by: Yang Li 
---
 tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c 
b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
index 02dffb6..b099753 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -324,7 +324,7 @@ int compress_file(int argc, char **argv, void *handle)
fprintf(stderr, "error: cannot progress; ");
fprintf(stderr, "too many faults\n");
exit(-1);
-   };
+   }
}
 
fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
-- 
1.8.3.1



Re: [PATCH v7 07/42] powerpc/fsl_booke/32: CacheLockingException remove args

2021-02-08 Thread Gautham R Shenoy
On Sat, Jan 30, 2021 at 11:08:17PM +1000, Nicholas Piggin wrote:
> Like other interrupt handler conversions, switch to getting registers
> from the pt_regs argument.
> 
> Signed-off-by: Nicholas Piggin 
> ---
>  arch/powerpc/kernel/head_fsl_booke.S | 6 +++---
>  arch/powerpc/kernel/traps.c  | 5 +++--
>  2 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
> b/arch/powerpc/kernel/head_fsl_booke.S
> index fdd4d274c245..0d4d9a6fcca1 100644
> --- a/arch/powerpc/kernel/head_fsl_booke.S
> +++ b/arch/powerpc/kernel/head_fsl_booke.S
> @@ -364,12 +364,12 @@ interrupt_base:
>   /* Data Storage Interrupt */
>   START_EXCEPTION(DataStorage)
>   NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
> - mfspr   r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
> + mfspr   r5,SPRN_ESR /* Grab the ESR, save it3 */
 ^^
Sorry for the nitpick.. Should be "/* Grab the ESR, save it */"


>   stw r5,_ESR(r11)
> - mfspr   r4,SPRN_DEAR/* Grab the DEAR, save it, pass arg2 */
> + mfspr   r4,SPRN_DEAR/* Grab the DEAR, save it */
> + stw r4, _DEAR(r11)
>   andis.  r10,r5,(ESR_ILK|ESR_DLK)@h
>   bne 1f
> - stw r4, _DEAR(r11)
>   EXC_XFER_LITE(0x0300, handle_page_fault)
>  1:
>   addir3,r1,STACK_FRAME_OVERHEAD
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 3ec7b443fe6b..1c77b1a8f7c9 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -2062,9 +2062,10 @@ void altivec_assist_exception(struct pt_regs *regs)
>  #endif /* CONFIG_ALTIVEC */
> 
>  #ifdef CONFIG_FSL_BOOKE
> -void CacheLockingException(struct pt_regs *regs, unsigned long address,
> -unsigned long error_code)
> +void CacheLockingException(struct pt_regs *regs)
>  {
> + unsigned long error_code = regs->dsisr;
> +
>   /* We treat cache locking instructions from the user
>* as priv ops, in the future we could try to do
>* something smarter
> -- 
> 2.23.0
> 


[PATCH] crypto: sha: remove unneeded semicolon

2021-02-08 Thread Yang Li
Eliminate the following coccicheck warning:
./arch/powerpc/crypto/sha1-spe-glue.c:110:2-3: Unneeded semicolon

Reported-by: Abaci Robot 
Signed-off-by: Yang Li 
---
 arch/powerpc/crypto/sha1-spe-glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/crypto/sha1-spe-glue.c 
b/arch/powerpc/crypto/sha1-spe-glue.c
index b1e577c..88e8ea7 100644
--- a/arch/powerpc/crypto/sha1-spe-glue.c
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -107,7 +107,7 @@ static int ppc_spe_sha1_update(struct shash_desc *desc, 
const u8 *data,
 
src += bytes;
len -= bytes;
-   };
+   }
 
memcpy((char *)sctx->buffer, src, len);
return 0;
-- 
1.8.3.1