[PATCH 6/6] powerpc/64s: Clear gprs on interrupt routine entry

2022-05-31 Thread Rohan McLure
Zero GPRS r0, r2-r11, r14-r31, on entry into the kernel for all
other interrupt sources to limit influence of user-space values
in potential speculation gadgets. The remaining gprs are overwritten by
entry macros to interrupt handlers, irrespective of whether or not a
given handler consumes these register values.

Prior to this commit, r14-r31 are restored on a per-interrupt basis at
exit, but now they are always restored. Remove explicit REST_NVGPRS
invocations on interrupt entry and simplify exit logic.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/exceptions-64s.S | 19 +++
 arch/powerpc/kernel/interrupt_64.S   |  9 ++---
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 102896fc6a86..8e2c1c924a4d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -502,6 +502,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
std r10,0(r1)   /* make stack chain pointer */
std r0,GPR0(r1) /* save r0 in stackframe*/
std r10,GPR1(r1)/* save r1 in stackframe*/
+   ZERO_GPR(0)
 
/* Mark our [H]SRRs valid for return */
li  r10,1
@@ -538,14 +539,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld  r10,IAREA+EX_R10(r13)
std r9,GPR9(r1)
std r10,GPR10(r1)
+   ZERO_GPRS(9, 10)
ld  r9,IAREA+EX_R11(r13)/* move r11 - r13 to stackframe */
ld  r10,IAREA+EX_R12(r13)
ld  r11,IAREA+EX_R13(r13)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
+   ZERO_GPR(11) /* keep r12 ([H]SRR1/MSR), r13 (PACA) for interrupt 
routine */
 
SAVE_NVGPRS(r1)
+   ZERO_NVGPRS()
 
.if IDAR
.if IISIDE
@@ -577,8 +581,9 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld  r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
-   std r2,GPR2(r1) /* save r2 in stackframe*/
+   SAVE_GPR(2, r1) /* save r2 in stackframe*/
SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe   */
+   ZERO_GPRS(2, 8)
mflrr9  /* Get LR, later save to stack  */
ld  r2,PACATOC(r13) /* get kernel TOC into r2   */
std r9,_LINK(r1)
@@ -696,6 +701,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlrr9
ld  r9,_CCR(r1)
mtcrr9
+   REST_NVGPRS(r1)
REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
@@ -1368,11 +1374,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
b   interrupt_return_srr
 
 1: bl  do_break
-   /*
-* do_break() may have changed the NV GPRS while handling a breakpoint.
-* If so, we need to restore them with their updated values.
-*/
-   REST_NVGPRS(r1)
b   interrupt_return_srr
 
 
@@ -1598,7 +1599,6 @@ EXC_COMMON_BEGIN(alignment_common)
GEN_COMMON alignment
addir3,r1,STACK_FRAME_OVERHEAD
bl  alignment_exception
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b   interrupt_return_srr
 
 
@@ -1708,7 +1708,6 @@ EXC_COMMON_BEGIN(program_check_common)
 .Ldo_program_check:
addir3,r1,STACK_FRAME_OVERHEAD
bl  program_check_exception
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b   interrupt_return_srr
 
 
@@ -2139,7 +2138,6 @@ EXC_COMMON_BEGIN(emulation_assist_common)
GEN_COMMON emulation_assist
addir3,r1,STACK_FRAME_OVERHEAD
bl  emulation_assist_interrupt
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b   interrupt_return_hsrr
 
 
@@ -2457,7 +2455,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
GEN_COMMON facility_unavailable
addir3,r1,STACK_FRAME_OVERHEAD
bl  facility_unavailable_exception
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b   interrupt_return_srr
 
 
@@ -2485,7 +2482,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
GEN_COMMON h_facility_unavailable
addir3,r1,STACK_FRAME_OVERHEAD
bl  facility_unavailable_exception
-   REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
b   interrupt_return_hsrr
 
 
@@ -2711,7 +2707,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
addir3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_ALTIVEC
bl  altivec_assist_exception
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 #else
bl  unknown_exception
 #endif
diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index 92740d9889a3..3c742c07f4b6 100644
--- 

[PATCH 5/6] powerpc: Move syscall handler prototypes to header

2022-05-31 Thread Rohan McLure
Since some power syscall handlers call into other syscall handlers with
the usual in-register calling convention, emit symbols for both
conventions when required. The prototypes for handlers supporting
in-register parameters should exist in a header rather than immediately
preceding their usage.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/include/asm/syscalls.h | 16 
 arch/powerpc/kernel/syscalls.c  |  7 ---
 arch/powerpc/kernel/vdso.c  |  3 +--
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/syscalls.h 
b/arch/powerpc/include/asm/syscalls.h
index 75d8e1822caf..1e5f2ddcabe0 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -43,6 +43,22 @@
 
 struct rtas_args;
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+#include 
+asmlinkage long sys_ni_syscall(void);
+#ifdef CONFIG_PPC32
+asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
+asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+  fd_set __user *exp,
+  struct __kernel_old_timeval __user *tvp);
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+asmlinkage long sys_personality(unsigned int personality);
+#endif /* CONFIG_PPC64 */
+
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
 asmlinkage long sys_mmap(unsigned long addr, size_t len,
 unsigned long prot, unsigned long flags,
 unsigned long fd, off_t offset);
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index c64cdb5c4435..6107bdd5dad1 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -64,11 +64,6 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
 }
 
 #ifdef CONFIG_PPC32
-asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
-asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
-  fd_set __user *exp,
-  struct __kernel_old_timeval __user *tvp);
-
 /*
  * Due to some executables calling the wrong select we sometimes
  * get wrong args.  This determines how the args are being passed
@@ -87,8 +82,6 @@ PPC_SYSCALL_DEFINE(5, long, ppc_select,
 #endif
 
 #ifdef CONFIG_PPC64
-asmlinkage long sys_personality(unsigned int personality);
-
 PPC_SYSCALL_DEFINE(1, long, ppc64_personality, unsigned long, personality)
 {
long ret;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index dcf57c07dbad..8a56e290fcaf 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -22,6 +22,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -40,8 +41,6 @@
 extern char vdso32_start, vdso32_end;
 extern char vdso64_start, vdso64_end;
 
-asmlinkage long sys_ni_syscall(void);
-
 /*
  * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
  * Once the early boot kernel code no longer needs to muck around
-- 
2.34.1



[PATCH 4/6] powerpc: Fix comment, use clear and restore macros

2022-05-31 Thread Rohan McLure
Only r10 is saved to the PACA. Reflect this in the inline comment.

Replace instructions for restoring gprs from the stack and clearing them
with the REST_GPRS and ZERO_GPRS convenience macros.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/exceptions-64s.S |  2 +-
 arch/powerpc/kernel/interrupt_64.S   | 13 +++--
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index b66dd6f775a4..102896fc6a86 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -281,7 +281,7 @@ BEGIN_FTR_SECTION
mfspr   r9,SPRN_PPR
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
-   std r10,IAREA+EX_R10(r13)   /* save r10 - r12 */
+   std r10,IAREA+EX_R10(r13)   /* save r10 */
.if ICFAR
 BEGIN_FTR_SECTION
mfspr   r10,SPRN_CFAR
diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index e601ed999798..92740d9889a3 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -152,17 +152,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Could zero these as per ABI, but we may consider a stricter ABI
 * which preserves these if libc implementations can benefit, so
 * restore them for now until further measurement is done. */
-   ld  r0,GPR0(r1)
-   ld  r4,GPR4(r1)
-   ld  r5,GPR5(r1)
-   ld  r6,GPR6(r1)
-   ld  r7,GPR7(r1)
-   ld  r8,GPR8(r1)
+   REST_GPR(0, r1)
+   REST_GPRS(4, 8, r1)
/* Zero volatile regs that may contain sensitive kernel data */
-   li  r9,0
-   li  r10,0
-   li  r11,0
-   li  r12,0
+   ZERO_GPRS(9, 12)
mtspr   SPRN_XER,r0
 
/*
-- 
2.34.1



[PATCH 3/6] powerpc: Make syscalls save and restore gprs

2022-05-31 Thread Rohan McLure
Clears user state in gprs to reduce the influence of user registers on
speculation within kernel syscall handlers.

Remove conditional branches on result of `syscall_exit_prepare` to
restore non-volatile gprs, as these registers are always cleared and
hence always must be restored.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/interrupt_64.S | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index b11c2bd84827..e601ed999798 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -108,6 +108,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 * but this is the best we can do.
 */
 
+   ZERO_GPRS(5, 12)
+   ZERO_NVGPRS()
+
/* Calling convention has r3 = orig r0, r4 = regs */
mr  r3,r0
bl  system_call_exception
@@ -138,6 +141,7 @@ BEGIN_FTR_SECTION
HMT_MEDIUM_LOW
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
+   REST_NVGPRS(r1)
cmpdi   r3,0
bne .Lsyscall_vectored_\name\()_restore_regs
 
@@ -180,7 +184,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld  r4,_LINK(r1)
ld  r5,_XER(r1)
 
-   REST_NVGPRS(r1)
ld  r0,GPR0(r1)
mtcrr2
mtctr   r3
@@ -308,6 +311,9 @@ END_BTB_FLUSH_SECTION
wrteei  1
 #endif
 
+   ZERO_GPRS(5, 12)
+   ZERO_NVGPRS()
+
/* Calling convention has r3 = orig r0, r4 = regs */
mr  r3,r0
bl  system_call_exception
@@ -350,6 +356,7 @@ BEGIN_FTR_SECTION
stdcx.  r0,0,r1 /* to clear the reservation */
 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
+   REST_NVGPRS(r1)
cmpdi   r3,0
bne .Lsyscall_restore_regs
/* Zero volatile regs that may contain sensitive kernel data */
@@ -377,7 +384,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 .Lsyscall_restore_regs:
ld  r3,_CTR(r1)
ld  r4,_XER(r1)
-   REST_NVGPRS(r1)
mtctr   r3
mtspr   SPRN_XER,r4
REST_GPR(0, r1)
@@ -445,7 +451,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
bl  interrupt_exit_user_prepare
cmpdi   r3,0
bne-.Lrestore_nvgprs_\srr
-.Lrestore_nvgprs_\srr\()_cont:
+   .Lrestore_nvgprs_\srr\()_cont:
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
 #ifdef CONFIG_PPC_BOOK3S
 .Linterrupt_return_\srr\()_user_rst_start:
-- 
2.34.1



[PATCH 2/6] powerpc: Provide syscall wrapper

2022-05-31 Thread Rohan McLure
Syscall wrapper implemented as per s390, x86, arm64, providing the
option for gprs to be cleared on entry to the kernel, reducing caller
influence influence on speculation within syscall routine. The wrapper
is a macro that emits syscall handler implementations with parameters
passed by stack pointer.

For platforms supporting this syscall wrapper, emit symbols with usual
in-register parameters (`sys...`) to support calls to syscall handlers
from within the kernel.

Syscalls are wrapped on all platforms except Cell processor. SPUs require
access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
enabled.

Co-developed-by: Andrew Donnellan 
Signed-off-by: Andrew Donnellan 
Signed-off-by: Rohan McLure 
---
 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/include/asm/interrupt.h   |  3 +-
 arch/powerpc/include/asm/syscall_wrapper.h | 92 ++
 arch/powerpc/include/asm/syscalls.h| 83 +--
 arch/powerpc/kernel/entry_32.S |  6 +-
 arch/powerpc/kernel/interrupt.c| 35 
 arch/powerpc/kernel/interrupt_64.S | 30 +++
 arch/powerpc/kernel/sys_ppc32.c| 50 +++-
 arch/powerpc/kernel/syscalls.c | 19 +++--
 arch/powerpc/kernel/systbl.S   | 21 +
 arch/powerpc/kernel/vdso.c |  2 +
 11 files changed, 255 insertions(+), 87 deletions(-)
 create mode 100644 arch/powerpc/include/asm/syscall_wrapper.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 174edabb74fa..e58287a70061 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,6 +137,7 @@ config PPC
select ARCH_HAS_STRICT_KERNEL_RWX   if (PPC_BOOK3S || PPC_8xx || 
40x) && !HIBERNATION
select ARCH_HAS_STRICT_KERNEL_RWX   if FSL_BOOKE && !HIBERNATION && 
!RANDOMIZE_BASE
select ARCH_HAS_STRICT_MODULE_RWX   if ARCH_HAS_STRICT_KERNEL_RWX
+   select ARCH_HAS_SYSCALL_WRAPPER if !SPU_BASE
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UBSAN_SANITIZE_ALL
diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index f964ef5c57d8..8e8949e4db7a 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -636,8 +636,7 @@ static inline void interrupt_cond_local_irq_enable(struct 
pt_regs *regs)
local_irq_enable();
 }
 
-long system_call_exception(long r3, long r4, long r5, long r6, long r7, long 
r8,
-  unsigned long r0, struct pt_regs *regs);
+long system_call_exception(unsigned long r0, struct pt_regs *regs);
 notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs 
*regs, long scv);
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/syscall_wrapper.h 
b/arch/powerpc/include/asm/syscall_wrapper.h
new file mode 100644
index ..23da22b081e4
--- /dev/null
+++ b/arch/powerpc/include/asm/syscall_wrapper.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - powerpc specific wrappers to syscall definitions
+ *
+ * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
+ */
+
+#ifndef __ASM_SYSCALL_WRAPPER_H
+#define __ASM_SYSCALL_WRAPPER_H
+
+struct pt_regs;
+
+#define SC_POWERPC_REGS_TO_ARGS(x, ...)\
+   __MAP(x,__SC_ARGS   \
+ ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5]\
+ ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
+
+#ifdef CONFIG_COMPAT
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)   
\
+   asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs); 
\
+   ALLOW_ERROR_INJECTION(__powerpc_compat_sys##name, ERRNO);   
\
+   static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));  
\
+   static inline long 
__do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));   \
+   asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs)  
\
+   {   
\
+   return 
__se_compat_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));   \
+   }   
\
+   static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))   
\
+   {   
\
+   return 
__do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\
+   }   
\
+   static inline long 

[PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears

2022-05-31 Thread Rohan McLure
Macros for restoring saving registers to and from the stack exist.
Provide a macro for simply zeroing a range of gprs, or an individual
gpr.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/include/asm/ppc_asm.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 4dea2d963738..3fb37a9767f7 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -33,6 +33,19 @@
.endr
 .endm
 
+/*
+ * Simplification of OP_REGS, for an arbitrary right hand operand.
+ *
+ *   op  reg, rhs
+ */
+.macro BINOP_REGS op, rhs, start, end
+   .Lreg=\start
+   .rept (\end - \start + 1)
+   \op .Lreg, \rhs
+   .Lreg=.Lreg+1
+   .endr
+.endm
+
 /*
  * Macros for storing registers into and loading registers from
  * exception frames.
@@ -49,6 +62,10 @@
 #define REST_NVGPRS(base)  REST_GPRS(13, 31, base)
 #endif
 
+#define ZERO_GPRS(start, end)  BINOP_REGS li, 0, start, end
+#define ZERO_NVGPRS()  ZERO_GPRS(14, 31)
+#define ZERO_GPR(n)ZERO_GPRS(n, n)
+
 #define SAVE_GPR(n, base)  SAVE_GPRS(n, n, base)
 #define REST_GPR(n, base)  REST_GPRS(n, n, base)
 
-- 
2.34.1



[PATCH kernel] powerpc/pseries/iommu: Print ibm,query-pe-dma-windows parameters

2022-05-31 Thread Alexey Kardashevskiy
PowerVM has a stricter policy about allocating TCEs for LPARs and
often there is not enough TCEs for 1:1 mapping, this adds the supported
numbers into dev_info() to help analyzing bugreports.

Signed-off-by: Alexey Kardashevskiy 
---

A PowerVM admin can enable "enlarged IO capacity" for a passed
though PCI device but there is no way from inside LPAR to know if that
worked or how many more TCEs became available.

---
 arch/powerpc/platforms/pseries/iommu.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/iommu.c 
b/arch/powerpc/platforms/pseries/iommu.c
index 7639e7355df2..84edc8d730e1 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1022,9 +1022,6 @@ static int query_ddw(struct pci_dev *dev, const u32 
*ddw_avail,
 
ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
cfg_addr, BUID_HI(buid), BUID_LO(buid));
-   dev_info(>dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned 
%d\n",
-ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
-BUID_LO(buid), ret);
 
switch (out_sz) {
case 5:
@@ -1042,6 +1039,11 @@ static int query_ddw(struct pci_dev *dev, const u32 
*ddw_avail,
break;
}
 
+   dev_info(>dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, 
lb=%llx ps=%x wn=%d\n",
+ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+BUID_LO(buid), ret, query->largest_available_block,
+query->page_size, query->windows_available);
+
return ret;
 }
 
-- 
2.30.2



Re: [PATCH] powerpc/64: Drop ppc_inst_as_str()

2022-05-31 Thread Bagas Sanjaya
On 6/1/22 10:03, Bagas Sanjaya wrote:
>>
>> Reported-by: Bagas Sanjaya 
>> Reported-by: Petr Mladek 
>> Signed-off-by: Michael Ellerman 
> 
> The arch/powerpc/kernel/trace/ftrace.c builds successfully, however
> there is also other build error for which I have reported at [1].
> 
> Thanks.
> 
> Tested-by: Bagas Sanjaya 
> 
> [1]: https://lore.kernel.org/linuxppc-dev/ypbucprm61rli...@debian.me/
> 

Oops, I forget to mention: is this patch meant for backporting into
stable tree? I don't see Fixes: tag and Cc: sta...@vger.kernel.org
in the patch.

-- 
An old man doll... just what I always wanted! - Clara


Re: [PATCH] powerpc/64: Drop ppc_inst_as_str()

2022-05-31 Thread Bagas Sanjaya
Hi,

On Tue, May 31, 2022 at 04:59:36PM +1000, Michael Ellerman wrote:
> The ppc_inst_as_str() macro tries to make printing variable length,
> aka "prefixed", instructions convenient. It mostly succeeds, but it does
> hide an on-stack buffer, which triggers stack protector.
> 
> More problematically it doesn't compile at all with GCC 12, due to the
> fact that it returns the char buffer declared inside the macro:
> 
>   arch/powerpc/kernel/trace/ftrace.c: In function '__ftrace_modify_call':
>   ./include/linux/printk.h:475:44: error: using a dangling pointer to '__str' 
> [-Werror=dangling-pointer=]
> 475 | #define printk(fmt, ...) printk_index_wrap(_printk, fmt, 
> ##__VA_ARGS__)
> ...
>   arch/powerpc/kernel/trace/ftrace.c:567:17: note: in expansion of macro 
> 'pr_err'
> 567 | pr_err("Not expected bl: opcode is %s\n", 
> ppc_inst_as_str(op));
> | ^~
>   ./arch/powerpc/include/asm/inst.h:156:14: note: '__str' declared here
> 156 | char __str[PPC_INST_STR_LEN];   \
> |  ^
> 
> This could be fixed by having the caller declare the buffer, but in some
> places there'd need to be two buffers. In all cases where
> ppc_inst_as_str() is used the output is not really meant for user
> consumption, it's almost always indicative of a kernel bug.
> 
> A simpler solution is to just print the value as an unsigned long. For
> normal instructions the output is identical. For prefixed instructions
> the value is printed as a single 64-bit quantity, whereas previously the
> low half was printed first. But that is good enough for debug output,
> especially as prefixed instructions will be rare in practice.
> 
> Old:
>   c070  6042  ori r2,r2,0
>   c074  0411 e580fb00 .long 0xe580fb000411
> 
> New:
>   c010f90c  6042  ori r2,r2,0
>   c010f910  e580fb000411  .long 0xe580fb000411
> 
> Reported-by: Bagas Sanjaya 
> Reported-by: Petr Mladek 
> Signed-off-by: Michael Ellerman 

The arch/powerpc/kernel/trace/ftrace.c builds successfully, however
there is also other build error for which I have reported at [1].

Thanks.

Tested-by: Bagas Sanjaya 

[1]: https://lore.kernel.org/linuxppc-dev/ypbucprm61rli...@debian.me/

-- 
An old man doll... just what I always wanted! - Clara


outside array bounds error on ppc64_defconfig, GCC 12.1.0

2022-05-31 Thread Bagas Sanjaya
Hi,

I'm trying to verify Drop ppc_inst_as_str() patch on [1] by performing
ppc64_defconfig build with powerpc64-unknown-linux-gnu-gcc (GCC 12.1.0).
The patch is applied on top of powerpc tree, next branch.

I got outside array bounds error:

  CC  arch/powerpc/kernel/dbell.o
In function 'do_byte_reverse',
inlined from 'do_vec_store' at arch/powerpc/lib/sstep.c:722:3,
inlined from 'emulate_loadstore' at arch/powerpc/lib/sstep.c:3509:9:
arch/powerpc/lib/sstep.c:286:25: error: array subscript [3, 4] is outside array 
bounds of 'union [1]' [-Werror=array-bounds]
  286 | up[0] = byterev_8(up[3]);
  | ^~~~
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:708:11: note: at offset [24, 39] into object 'u' of 
size 16
  708 | } u;
  |   ^
In function 'do_byte_reverse',
inlined from 'do_vec_store' at arch/powerpc/lib/sstep.c:722:3,
inlined from 'emulate_loadstore' at arch/powerpc/lib/sstep.c:3509:9:
arch/powerpc/lib/sstep.c:287:23: error: array subscript [3, 4] is outside array 
bounds of 'union [1]' [-Werror=array-bounds]
  287 | up[3] = tmp;
  | ~~^
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:708:11: note: at offset [24, 39] into object 'u' of 
size 16
  708 | } u;
  |   ^
In function 'do_byte_reverse',
inlined from 'do_vec_store' at arch/powerpc/lib/sstep.c:722:3,
inlined from 'emulate_loadstore' at arch/powerpc/lib/sstep.c:3509:9:
arch/powerpc/lib/sstep.c:288:23: error: array subscript 2 is outside array 
bounds of 'union [1]' [-Werror=array-bounds]
  288 | tmp = byterev_8(up[2]);
  |   ^~~~
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:708:11: note: at offset 16 into object 'u' of size 16
  708 | } u;
  |   ^
In function 'do_byte_reverse',
inlined from 'do_vec_store' at arch/powerpc/lib/sstep.c:722:3,
inlined from 'emulate_loadstore' at arch/powerpc/lib/sstep.c:3509:9:
arch/powerpc/lib/sstep.c:289:23: error: array subscript 2 is outside array 
bounds of 'union [1]' [-Werror=array-bounds]
  289 | up[2] = byterev_8(up[1]);
  | ~~^~
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:708:11: note: at offset 16 into object 'u' of size 16
  708 | } u;
  |   ^
In function 'do_byte_reverse',
inlined from 'do_vec_load' at arch/powerpc/lib/sstep.c:691:3,
inlined from 'emulate_loadstore' at arch/powerpc/lib/sstep.c:3438:9:
arch/powerpc/lib/sstep.c:286:25: error: array subscript [3, 4] is outside array 
bounds of 'u8[16]' {aka 'unsigned char[16]'} [-Werror=array-bounds]
  286 | up[0] = byterev_8(up[3]);
  | ^~~~
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
  681 | } u = {};
  |   ^
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
In function 'do_byte_reverse',
inlined from 'do_vec_load' at arch/powerpc/lib/sstep.c:691:3,
inlined from 'emulate_loadstore' at arch/powerpc/lib/sstep.c:3438:9:
arch/powerpc/lib/sstep.c:287:23: error: array subscript [3, 4] is outside array 
bounds of 'u8[16]' {aka 'unsigned char[16]'} [-Werror=array-bounds]
  287 | up[3] = tmp;
  | ~~^
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
  681 | } u = {};
  |   ^
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into object 'u' of 
size 16
In function 'do_byte_reverse',
inlined from 'do_vec_load' at arch/powerpc/lib/sstep.c:691:3,
inlined from 'emulate_loadstore' at arch/powerpc/lib/sstep.c:3438:9:
arch/powerpc/lib/sstep.c:288:23: error: array subscript 2 is outside array 
bounds of 'u8[16]' {aka 'unsigned char[16]'} [-Werror=array-bounds]
  288 | tmp = byterev_8(up[2]);
  |   ^~~~
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:681:11: note: at offset 16 into object 'u' of size 16
  681 | 

Re: [FSL P50x0] Keyboard and mouse don't work anymore after the devicetree updates for 5.19

2022-05-31 Thread Christian Zigotzky
On 31. May 2022, at 15:46, Rob Herring  wrote:

Do you have a dmesg log?

The other way to fix is creating a IRQ resource and adding it to the
child device resources.

Rob

——

Rob,

Do you mean a dmesg from the boot loop?
The other way is a good idea.

Cheers,
Christian


Re: [PATCH] xen: replace xen_remap() with memremap()

2022-05-31 Thread Stefano Stabellini
On Mon, 30 May 2022, Juergen Gross wrote:
> xen_remap() is used to establish mappings for frames not under direct
> control of the kernel: for Xenstore and console ring pages, and for
> grant pages of non-PV guests.
> 
> Today xen_remap() is defined to use ioremap() on x86 (doing uncached
> mappings), and ioremap_cache() on Arm (doing cached mappings).
> 
> Uncached mappings for those use cases are bad for performance, so they
> should be avoided if possible. As all use cases of xen_remap() don't
> require uncached mappings (the mapped area is always physical RAM),
> a mapping using the standard WB cache mode is fine.
> 
> As sparse is flagging some of the xen_remap() use cases to be not
> appropriate for iomem(), as the result is not annotated with the
> __iomem modifier, eliminate xen_remap() completely and replace all
> use cases with memremap() specifying the MEMREMAP_WB caching mode.
> 
> xen_unmap() can be replaced with memunmap().
> 
> Reported-by: kernel test robot 
> Signed-off-by: Juergen Gross 

Acked-by: Stefano Stabellini 


> ---
>  arch/x86/include/asm/xen/page.h   | 3 ---
>  drivers/tty/hvc/hvc_xen.c | 2 +-
>  drivers/xen/grant-table.c | 6 +++---
>  drivers/xen/xenbus/xenbus_probe.c | 8 
>  include/xen/arm/page.h| 3 ---
>  5 files changed, 8 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index 1fc67df50014..fa9ec20783fa 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -347,9 +347,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr);
>  void make_lowmem_page_readonly(void *vaddr);
>  void make_lowmem_page_readwrite(void *vaddr);
>  
> -#define xen_remap(cookie, size) ioremap((cookie), (size))
> -#define xen_unmap(cookie) iounmap((cookie))
> -
>  static inline bool xen_arch_need_swiotlb(struct device *dev,
>phys_addr_t phys,
>dma_addr_t dev_addr)
> diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
> index ebaf7500f48f..7c23112dc923 100644
> --- a/drivers/tty/hvc/hvc_xen.c
> +++ b/drivers/tty/hvc/hvc_xen.c
> @@ -253,7 +253,7 @@ static int xen_hvm_console_init(void)
>   if (r < 0 || v == 0)
>   goto err;
>   gfn = v;
> - info->intf = xen_remap(gfn << XEN_PAGE_SHIFT, XEN_PAGE_SIZE);
> + info->intf = memremap(gfn << XEN_PAGE_SHIFT, XEN_PAGE_SIZE, 
> MEMREMAP_WB);
>   if (info->intf == NULL)
>   goto err;
>   info->vtermno = HVC_COOKIE;
> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
> index 1a1aec0a88a1..2f4f0ed5d8f8 100644
> --- a/drivers/xen/grant-table.c
> +++ b/drivers/xen/grant-table.c
> @@ -632,7 +632,7 @@ int gnttab_setup_auto_xlat_frames(phys_addr_t addr)
>   if (xen_auto_xlat_grant_frames.count)
>   return -EINVAL;
>  
> - vaddr = xen_remap(addr, XEN_PAGE_SIZE * max_nr_gframes);
> + vaddr = memremap(addr, XEN_PAGE_SIZE * max_nr_gframes, MEMREMAP_WB);
>   if (vaddr == NULL) {
>   pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n",
>   );
> @@ -640,7 +640,7 @@ int gnttab_setup_auto_xlat_frames(phys_addr_t addr)
>   }
>   pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
>   if (!pfn) {
> - xen_unmap(vaddr);
> + memunmap(vaddr);
>   return -ENOMEM;
>   }
>   for (i = 0; i < max_nr_gframes; i++)
> @@ -659,7 +659,7 @@ void gnttab_free_auto_xlat_frames(void)
>   if (!xen_auto_xlat_grant_frames.count)
>   return;
>   kfree(xen_auto_xlat_grant_frames.pfn);
> - xen_unmap(xen_auto_xlat_grant_frames.vaddr);
> + memunmap(xen_auto_xlat_grant_frames.vaddr);
>  
>   xen_auto_xlat_grant_frames.pfn = NULL;
>   xen_auto_xlat_grant_frames.count = 0;
> diff --git a/drivers/xen/xenbus/xenbus_probe.c 
> b/drivers/xen/xenbus/xenbus_probe.c
> index d367f2bd2b93..58b732dcbfb8 100644
> --- a/drivers/xen/xenbus/xenbus_probe.c
> +++ b/drivers/xen/xenbus/xenbus_probe.c
> @@ -752,8 +752,8 @@ static void xenbus_probe(void)
>   xenstored_ready = 1;
>  
>   if (!xen_store_interface) {
> - xen_store_interface = xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
> - XEN_PAGE_SIZE);
> + xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
> +XEN_PAGE_SIZE, MEMREMAP_WB);
>   /*
>* Now it is safe to free the IRQ used for xenstore late
>* initialization. No need to unbind: it is about to be
> @@ -1009,8 +1009,8 @@ static int __init xenbus_init(void)
>  #endif
>   xen_store_gfn = (unsigned long)v;
>   xen_store_interface =
> - xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
> -   

Re: [PATCH] powerpc/64: Drop ppc_inst_as_str()

2022-05-31 Thread Segher Boessenkool
Hi!

On Tue, May 31, 2022 at 04:59:36PM +1000, Michael Ellerman wrote:
> More problematically it doesn't compile at all with GCC 12, due to the
> fact that it returns the char buffer declared inside the macro:

It returns a pointer to a buffer on stack.  It is not valid C to access
that buffer after the function has returned (and indeed it does not
work, in general).

> A simpler solution is to just print the value as an unsigned long. For
> normal instructions the output is identical. For prefixed instructions
> the value is printed as a single 64-bit quantity, whereas previously the
> low half was printed first. But that is good enough for debug output,
> especially as prefixed instructions will be rare in practice.

Prefixed insns might be somewhat rare currently, but it will not stay
that way.

It is not hard to fix the problem here?  The only tricky part is that
ppc_inst_as_ulong swaps the two halves for LE, for as far as I can see
no reason at all :-(

If it didn't it would be easy to detect prefixed insns (because they
then are guaranteed to be > 0x), and it is easy to print them
with a space between the two opcodes, with a utility function:

void print_insn_bytes_nicely(unsigned long insn)
{
if (insn > 0x)
printf("%08x ", insn >> 32);
printf("%08x", insn & 0x);
}

or something like that.


Segher


Re: [PATCH v5] mm: Avoid unnecessary page fault retires on shared memory types

2022-05-31 Thread Heiko Carstens
hal Simek , Thomas Bogendoerfer , 
linux-par...@vger.kernel.org, Max Filippov , 
linux-ker...@vger.kernel.org, Dinh Nguyen , 
linux-ri...@lists.infradead.org, Palmer Dabbelt , Sven 
Schnelle , Guo Ren , 
linux-hexa...@vger.kernel.org, Ivan Kokshaysky , 
Johannes Berg , linuxppc-dev@lists.ozlabs.org, 
"David S . Miller" 
Errors-To: linuxppc-dev-bounces+archive=mail-archive@lists.ozlabs.org
Sender: "Linuxppc-dev" 


On Mon, May 30, 2022 at 02:34:50PM -0400, Peter Xu wrote:
> I observed that for each of the shared file-backed page faults, we're very
> likely to retry one more time for the 1st write fault upon no page.  It's
> because we'll need to release the mmap lock for dirty rate limit purpose
> with balance_dirty_pages_ratelimited() (in fault_dirty_shared_page()).
> 
> Then after that throttling we return VM_FAULT_RETRY.
> 
> We did that probably because VM_FAULT_RETRY is the only way we can return
> to the fault handler at that time telling it we've released the mmap lock.
> 
> However that's not ideal because it's very likely the fault does not need
> to be retried at all since the pgtable was well installed before the
> throttling, so the next continuous fault (including taking mmap read lock,
> walk the pgtable, etc.) could be in most cases unnecessary.
> 
> It's not only slowing down page faults for shared file-backed, but also add
> more mmap lock contention which is in most cases not needed at all.
> 
> To observe this, one could try to write to some shmem page and look at
> "pgfault" value in /proc/vmstat, then we should expect 2 counts for each
> shmem write simply because we retried, and vm event "pgfault" will capture
> that.
> 
> To make it more efficient, add a new VM_FAULT_COMPLETED return code just to
> show that we've completed the whole fault and released the lock.  It's also
> a hint that we should very possibly not need another fault immediately on
> this page because we've just completed it.
> 
> This patch provides a ~12% perf boost on my aarch64 test VM with a simple
> program sequentially dirtying 400MB shmem file being mmap()ed and these are
> the time it needs:
> 
>   Before: 650.980 ms (+-1.94%)
>   After:  569.396 ms (+-1.38%)
> 
> I believe it could help more than that.
> 
> We need some special care on GUP and the s390 pgfault handler (for gmap
> code before returning from pgfault), the rest changes in the page fault
> handlers should be relatively straightforward.
> 
> Another thing to mention is that mm_account_fault() does take this new
> fault as a generic fault to be accounted, unlike VM_FAULT_RETRY.
> 
> I explicitly didn't touch hmm_vma_fault() and break_ksm() because they do
> not handle VM_FAULT_RETRY even with existing code, so I'm literally keeping
> them as-is.
> 
> Acked-by: Geert Uytterhoeven 
> Acked-by: Peter Zijlstra (Intel) 
> Acked-by: Johannes Weiner 
> Acked-by: Vineet Gupta 
> Acked-by: Guo Ren 
> Acked-by: Max Filippov 
> Acked-by: Christian Borntraeger 
> Acked-by: Michael Ellerman  (powerpc)
> Acked-by: Catalin Marinas 
> Reviewed-by: Alistair Popple 
> Reviewed-by: Ingo Molnar 
> Signed-off-by: Peter Xu 
> ---
...
>  arch/s390/mm/fault.c  | 12 
> diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
> index e173b6187ad5..973dcd05c293 100644
> --- a/arch/s390/mm/fault.c
> +++ b/arch/s390/mm/fault.c
> @@ -433,6 +433,17 @@ static inline vm_fault_t do_exception(struct pt_regs 
> *regs, int access)
>   goto out_up;
>   goto out;
>   }
> +
> + /* The fault is fully completed (including releasing mmap lock) */
> + if (fault & VM_FAULT_COMPLETED) {
> + if (gmap) {
> + mmap_read_lock(mm);
> + goto out_gmap;
> + }
> + fault = 0;
> + goto out;
> + }
> +
>   if (unlikely(fault & VM_FAULT_ERROR))
>   goto out_up;
>  
> @@ -452,6 +463,7 @@ static inline vm_fault_t do_exception(struct pt_regs 
> *regs, int access)
>   mmap_read_lock(mm);
>   goto retry;
>   }
> +out_gmap:
>   if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
>   address =  __gmap_link(gmap, current->thread.gmap_addr,
>  address);

FWIW:
Acked-by: Heiko Carstens 


Re: [PATCH v5] mm: Avoid unnecessary page fault retires on shared memory types

2022-05-31 Thread Russell King (Oracle)
l Simek , Thomas Bogendoerfer , 
linux-par...@vger.kernel.org, Max Filippov , 
linux-ker...@vger.kernel.org, Dinh Nguyen , 
linux-ri...@lists.infradead.org, Palmer Dabbelt , Sven 
Schnelle , Guo Ren , 
linux-hexa...@vger.kernel.org, Ivan Kokshaysky , 
Johannes Berg , linuxppc-dev@lists.ozlabs.org, 
"David S . Miller" 
Errors-To: linuxppc-dev-bounces+archive=mail-archive@lists.ozlabs.org
Sender: "Linuxppc-dev" 


On Mon, May 30, 2022 at 02:34:50PM -0400, Peter Xu wrote:
> I observed that for each of the shared file-backed page faults, we're very
> likely to retry one more time for the 1st write fault upon no page.  It's
> because we'll need to release the mmap lock for dirty rate limit purpose
> with balance_dirty_pages_ratelimited() (in fault_dirty_shared_page()).
> 
> Then after that throttling we return VM_FAULT_RETRY.
> 
> We did that probably because VM_FAULT_RETRY is the only way we can return
> to the fault handler at that time telling it we've released the mmap lock.
> 
> However that's not ideal because it's very likely the fault does not need
> to be retried at all since the pgtable was well installed before the
> throttling, so the next continuous fault (including taking mmap read lock,
> walk the pgtable, etc.) could be in most cases unnecessary.
> 
> It's not only slowing down page faults for shared file-backed, but also add
> more mmap lock contention which is in most cases not needed at all.
> 
> To observe this, one could try to write to some shmem page and look at
> "pgfault" value in /proc/vmstat, then we should expect 2 counts for each
> shmem write simply because we retried, and vm event "pgfault" will capture
> that.
> 
> To make it more efficient, add a new VM_FAULT_COMPLETED return code just to
> show that we've completed the whole fault and released the lock.  It's also
> a hint that we should very possibly not need another fault immediately on
> this page because we've just completed it.
> 
> This patch provides a ~12% perf boost on my aarch64 test VM with a simple
> program sequentially dirtying 400MB shmem file being mmap()ed and these are
> the time it needs:
> 
>   Before: 650.980 ms (+-1.94%)
>   After:  569.396 ms (+-1.38%)
> 
> I believe it could help more than that.
> 
> We need some special care on GUP and the s390 pgfault handler (for gmap
> code before returning from pgfault), the rest changes in the page fault
> handlers should be relatively straightforward.
> 
> Another thing to mention is that mm_account_fault() does take this new
> fault as a generic fault to be accounted, unlike VM_FAULT_RETRY.
> 
> I explicitly didn't touch hmm_vma_fault() and break_ksm() because they do
> not handle VM_FAULT_RETRY even with existing code, so I'm literally keeping
> them as-is.
> 
> Acked-by: Geert Uytterhoeven 
> Acked-by: Peter Zijlstra (Intel) 
> Acked-by: Johannes Weiner 
> Acked-by: Vineet Gupta 
> Acked-by: Guo Ren 
> Acked-by: Max Filippov 
> Acked-by: Christian Borntraeger 
> Acked-by: Michael Ellerman  (powerpc)
> Acked-by: Catalin Marinas 
> Reviewed-by: Alistair Popple 
> Reviewed-by: Ingo Molnar 
> Signed-off-by: Peter Xu 

For:

> diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
> index a062e07516dd..46cccd6bf705 100644
> --- a/arch/arm/mm/fault.c
> +++ b/arch/arm/mm/fault.c
> @@ -322,6 +322,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, 
> struct pt_regs *regs)
>   return 0;
>   }
>  
> + /* The fault is fully completed (including releasing mmap lock) */
> + if (fault & VM_FAULT_COMPLETED)
> + return 0;
> +
>   if (!(fault & VM_FAULT_ERROR)) {
>   if (fault & VM_FAULT_RETRY) {
>   flags |= FAULT_FLAG_TRIED;

Acked-by: Russell King (Oracle) 

Thanks!

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!


Re: [PATCH] xen: replace xen_remap() with memremap()

2022-05-31 Thread Boris Ostrovsky



On 5/30/22 4:26 AM, Juergen Gross wrote:

xen_remap() is used to establish mappings for frames not under direct
control of the kernel: for Xenstore and console ring pages, and for
grant pages of non-PV guests.

Today xen_remap() is defined to use ioremap() on x86 (doing uncached
mappings), and ioremap_cache() on Arm (doing cached mappings).

Uncached mappings for those use cases are bad for performance, so they
should be avoided if possible. As all use cases of xen_remap() don't
require uncached mappings (the mapped area is always physical RAM),
a mapping using the standard WB cache mode is fine.

As sparse is flagging some of the xen_remap() use cases to be not
appropriate for iomem(), as the result is not annotated with the
__iomem modifier, eliminate xen_remap() completely and replace all
use cases with memremap() specifying the MEMREMAP_WB caching mode.

xen_unmap() can be replaced with memunmap().

Reported-by: kernel test robot 
Signed-off-by: Juergen Gross 




Reviewed-by: Boris Ostrovsky 



[PATCH 2/2] powerpc/irq: Perform stack_overflow detection after switching to IRQ stack

2022-05-31 Thread Christophe Leroy
When KASAN is enabled, as shown by the Oops below, the 2k limit is not
enough to allow stack dump after a stack overflow detection when
CONFIG_DEBUG_STACKOVERFLOW is selected:

do_IRQ: stack overflow: 1984
CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1
Call Trace:
Oops: Kernel stack overflow, sig: 11 [#1]
BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac
Modules linked in: sr_mod cdrom radeon(+) ohci_pci(+) hwmon 
i2c_algo_bit drm_ttm_helper ttm drm_dp_helper snd_aoa_i2sbus snd_aoa_soundbus 
snd_pcm ehci_pci snd_timer ohci_hcd snd ssb ehci_hcd 8250_pci soundcore 
drm_kms_helper pcmcia 8250 pcmcia_core syscopyarea usbcore sysfillrect 
8250_base sysimgblt serial_mctrl_gpio fb_sys_fops usb_common pkcs8_key_parser 
fuse drm drm_panel_orientation_quirks configfs
CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1
NIP:  c02e5558 LR: c07eb3bc CTR: c07f46a8
REGS: e7fe9f50 TRAP:    Not tainted  (5.18.0-gentoo-PMacG4)
MSR:  1032   CR: 44a14824  XER: 2000

GPR00: c07eb3bc eaa1c000 c26baea0 eaa1c0a0 0008  c07eb3bc 
eaa1c010
GPR08: eaa1c0a8 04f3f3f3 f1f1f1f1 c07f4c84 44a14824 0080f7e4 0005 
0010
GPR16: 0025 eaa1c154 eaa1c158 c0dbad64 0020 fd543810 eaa1c0a0 
eaa1c29e
GPR24: c0dbad44 c0db8740 05ff fd543802 eaa1c150 c0c9a3c0 eaa1c0a0 
c0c9a3c0
NIP [c02e5558] kasan_check_range+0xc/0x2b4
LR [c07eb3bc] format_decode+0x80/0x604
Call Trace:
[eaa1c000] [c07eb3bc] format_decode+0x80/0x604 (unreliable)
[eaa1c070] [c07f4dac] vsnprintf+0x128/0x938
[eaa1c110] [c07f5788] sprintf+0xa0/0xc0
[eaa1c180] [c0154c1c] __sprint_symbol.constprop.0+0x170/0x198
[eaa1c230] [c07ee71c] symbol_string+0xf8/0x260
[eaa1c430] [c07f46d0] pointer+0x15c/0x710
[eaa1c4b0] [c07f4fbc] vsnprintf+0x338/0x938
[eaa1c550] [c00e8fa0] vprintk_store+0x2a8/0x678
[eaa1c690] [c00e94e4] vprintk_emit+0x174/0x378
[eaa1c6d0] [c00ea008] _printk+0x9c/0xc0
[eaa1c750] [c000ca94] show_stack+0x21c/0x260
[eaa1c7a0] [c07d0bd4] dump_stack_lvl+0x60/0x90
[eaa1c7c0] [c0009234] __do_IRQ+0x170/0x174
[eaa1c800] [c0009258] do_IRQ+0x20/0x34
[eaa1c820] [c00045b4] HardwareInterrupt_virt+0x108/0x10c
...

As the detection is asynchronously performed at IRQs, we could be long
after the limit has been crossed, so increasing the limit would not
solve the problem completely.

In order to be sure that there is enough stack space for the stack
dump, do it after the switch to the IRQ stack. That way it is sure
that the stack is large enough, unless the IRQ stack has been
overfilled in which case the end of life is close.

Reported-by: Erhard Furtner 
Cc: Arnd Bergmann 
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/irq.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 370434f6c316..1de081db50be 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -184,10 +184,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
return sum;
 }
 
-static inline void check_stack_overflow(void)
+static inline void check_stack_overflow(unsigned long sp)
 {
-   long sp;
-
if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW))
return;
 
@@ -221,12 +219,14 @@ static __always_inline void call_do_softirq(const void 
*sp)
 
 DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq);
 
-static void __do_irq(struct pt_regs *regs)
+static void __do_irq(struct pt_regs *regs, unsigned long oldsp)
 {
unsigned int irq;
 
trace_irq_entry(regs);
 
+   check_stack_overflow(oldsp);
+
/*
 * Query the platform PIC for the interrupt & ack it.
 *
@@ -254,6 +254,7 @@ static __always_inline void call_do_irq(struct pt_regs 
*regs, void *sp)
/* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
asm volatile (
 PPC_STLU " %%r1, %[offset](%[sp])  ;"
+   "mr %%r4, %%r1  ;"
"mr %%r1, %[sp] ;"
"bl %[callee]   ;"
 PPC_LL "   %%r1, 0(%%r1)   ;"
@@ -279,11 +280,9 @@ void __do_IRQ(struct pt_regs *regs)
irqsp = hardirq_ctx[raw_smp_processor_id()];
sirqsp = softirq_ctx[raw_smp_processor_id()];
 
-   check_stack_overflow();
-
/* Already there ? */
if (unlikely(cursp == irqsp || cursp == sirqsp)) {
-   __do_irq(regs);
+   __do_irq(regs, current_stack_pointer);
set_irq_regs(old_regs);
return;
}
-- 
2.35.3



[PATCH 1/2] powerpc/irq: Make __do_irq() static

2022-05-31 Thread Christophe Leroy
Since commit 48cf12d88969 ("powerpc/irq: Inline call_do_irq() and
call_do_softirq()"), __do_irq() is not used outside irq.c

Reorder functions and make __do_irq() static and
drop the declaration in irq.h.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/irq.h |  1 -
 arch/powerpc/kernel/irq.c  | 46 +-
 2 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 13f0409dd617..5c1516a5ba8f 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -54,7 +54,6 @@ extern void *softirq_ctx[NR_CPUS];
 
 void __do_IRQ(struct pt_regs *regs);
 extern void __init init_IRQ(void);
-extern void __do_irq(struct pt_regs *regs);
 
 int irq_choose_cpu(const struct cpumask *mask);
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 873e6dffb868..370434f6c316 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -219,31 +219,9 @@ static __always_inline void call_do_softirq(const void *sp)
);
 }
 
-static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
-{
-   register unsigned long r3 asm("r3") = (unsigned long)regs;
-
-   /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
-   asm volatile (
-PPC_STLU " %%r1, %[offset](%[sp])  ;"
-   "mr %%r1, %[sp] ;"
-   "bl %[callee]   ;"
-PPC_LL "   %%r1, 0(%%r1)   ;"
-: // Outputs
-  "+r" (r3)
-: // Inputs
-  [sp] "b" (sp), [offset] "i" (THREAD_SIZE - 
STACK_FRAME_OVERHEAD),
-  [callee] "i" (__do_irq)
-: // Clobbers
-  "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
-  "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-  "r11", "r12"
-   );
-}
-
 DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq);
 
-void __do_irq(struct pt_regs *regs)
+static void __do_irq(struct pt_regs *regs)
 {
unsigned int irq;
 
@@ -269,6 +247,28 @@ void __do_irq(struct pt_regs *regs)
trace_irq_exit(regs);
 }
 
+static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+   register unsigned long r3 asm("r3") = (unsigned long)regs;
+
+   /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
+   asm volatile (
+PPC_STLU " %%r1, %[offset](%[sp])  ;"
+   "mr %%r1, %[sp] ;"
+   "bl %[callee]   ;"
+PPC_LL "   %%r1, 0(%%r1)   ;"
+: // Outputs
+  "+r" (r3)
+: // Inputs
+  [sp] "b" (sp), [offset] "i" (THREAD_SIZE - 
STACK_FRAME_OVERHEAD),
+  [callee] "i" (__do_irq)
+: // Clobbers
+  "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+  "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+  "r11", "r12"
+   );
+}
+
 void __do_IRQ(struct pt_regs *regs)
 {
struct pt_regs *old_regs = set_irq_regs(regs);
-- 
2.35.3



Re: [FSL P50x0] Keyboard and mouse don't work anymore after the devicetree updates for 5.19

2022-05-31 Thread Christian Zigotzky



> On 31. May 2022, at 15:46, Rob Herring  wrote:
> 
> On Mon, May 30, 2022 at 12:26 AM Christian Zigotzky
>  wrote:
>> 
>>> On 27 May 2022 at 04:23 pm, Rob Herring wrote:
>>> The issue is in drivers/usb/host/fsl-mph-dr-of.c which copies the
>>> resources to a child platform device. Can you try the following
>>> change:
>>> 
>>> diff --git a/drivers/usb/host/fsl-mph-dr-of.c 
>>> b/drivers/usb/host/fsl-mph-dr-of.c
>>> index 44a7e58a26e3..47d9b7be60da 100644
>>> --- a/drivers/usb/host/fsl-mph-dr-of.c
>>> +++ b/drivers/usb/host/fsl-mph-dr-of.c
>>> @@ -80,8 +80,6 @@ static struct platform_device *fsl_usb2_device_register(
>>> const char *name, int id)
>>>  {
>>> struct platform_device *pdev;
>>> -   const struct resource *res = ofdev->resource;
>>> -   unsigned int num = ofdev->num_resources;
>>> int retval;
>>> 
>>> pdev = platform_device_alloc(name, id);
>>> @@ -106,11 +104,7 @@ static struct platform_device 
>>> *fsl_usb2_device_register(
>>> if (retval)
>>> goto error;
>>> 
>>> -   if (num) {
>>> -   retval = platform_device_add_resources(pdev, res, num);
>>> -   if (retval)
>>> -   goto error;
>>> -   }
>>> +   pdev->dev.of_node = ofdev->dev.of_node;
>>> 
>>> retval = platform_device_add(pdev);
>>> if (retval)
>> Hello Rob,
>> 
>> Thanks a lot for your patch! Unfortunately, this leads to a boot loop.
>> Do you have another idea?
> 
> Do you have a dmesg log?

From the boot loop?

> 
> The other way to fix is creating a IRQ resource and adding it to the
> child device resources.

Good idea.
> 
> Rob



Re: [PATCH] RISC-V: Prepare dropping week attribute from arch_kexec_apply_relocations[_add]

2022-05-31 Thread Palmer Dabbelt

On Mon, 30 May 2022 22:58:03 PDT (-0700), u.kleine-koe...@pengutronix.de wrote:

On Mon, May 30, 2022 at 04:11:27PM -0700, Palmer Dabbelt wrote:

On Mon, 30 May 2022 12:41:33 PDT (-0700), u.kleine-koe...@pengutronix.de wrote:
> Hello,
> 
> On Mon, May 30, 2022 at 11:58:16AM -0700, Palmer Dabbelt wrote:

> > On Mon, 30 May 2022 00:42:02 PDT (-0700), u.kleine-koe...@pengutronix.de 
wrote:
> > > Without this change arch/riscv/kernel/elf_kexec.c fails to compile once
> > > commit 233c1e6c319c ("kexec_file: drop weak attribute from
> > > arch_kexec_apply_relocations[_add]") is also contained in the tree.
> > > This currently happens in next-20220527.
> > > > Prepare the RISC-V similar to the s390 adaption done in
> > 233c1e6c319c.
> > > This is safe to do on top of the riscv change even without the change to
> > > arch_kexec_apply_relocations.
> > > > Fixes: 838b3e28488f ("RISC-V: Load purgatory in kexec_file")
> > > Looks-good-to: liaochang (A) 
> > > Signed-off-by: Uwe Kleine-König 
> > > ---
> > > > On Mon, May 30, 2022 at 09:43:26AM +0800, liaochang (A) wrote:
> > > > > I can confirm that doing
> > > > > > diff --git a/arch/riscv/include/asm/kexec.h
> > > > b/arch/riscv/include/asm/kexec.h
> > > > > index 206217b23301..eee260e8ab30 100644
> > > > > --- a/arch/riscv/include/asm/kexec.h
> > > > > +++ b/arch/riscv/include/asm/kexec.h
> > > > > @@ -55,6 +55,13 @@ extern riscv_kexec_method riscv_kexec_norelocate;
> > > > >  >  #ifdef CONFIG_KEXEC_FILE
> > > > >  extern const struct kexec_file_ops elf_kexec_ops;
> > > > > +
> > > > > +struct purgatory_info;
> > > > > +int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
> > > > > +   Elf_Shdr *section,
> > > > > +   const Elf_Shdr *relsec,
> > > > > +   const Elf_Shdr *symtab);
> > > > > +#define arch_kexec_apply_relocations_add 
arch_kexec_apply_relocations_add
> > > > >  #endif
> > > > >  >  #endif
> > > > > > LGTM, you could send a fixup patch to riscv, thanks.
> > > > > > > > on top of 838b3e28488f results in a compilable tree. And
> > when
> > > > merging
> > > > > 233c1e6c319c into this, it is still building.
> > > > > > I'm not enough into kexec (and riscv) to judge if this is
> > > > sensible, or
> > > > > create a useful commit log but the obvious way forward is to apply the
> > > > > above patch to the riscv tree before it hits Linus' tree.
> > > > Ok, here comes a patch with a generic commit log.
> > > > @riscv people: If you prefer, squash it into 838b3e28488f.
> > 
> > Sorry, just saw this after I sent my version of the fix.  They're the same,

> > but do you mind sending a full-on patch so I can merge it?
> 
> Sorry, I don't understand your request. I found

> 
https://lore.kernel.org/linux-riscv/20220530180408.16239-1-pal...@rivosinc.com/
> 
> but I don't know what a full-on patch is and what stops you merging my

> patch.
> 
> Is it that it's in reply to a patch series and b4 fails to fetch the

> right patch and you ask to send it in a new thread?

Ya, with the reply bits in there my mail merge (which unfortunately isn't
b4, I haven't gotten around to converting yet) got tripped up.  It's kind of
easy to for me to screw something up trying to pull bits out of replies and
such, but I think this one was pretty simple (looks like maybe some PGP or
MIME thing was the issue).

I just put 

on my staging branch, it looks OK to me but LMK if there's a problem.


there is indeed a problem, but caused by me: If you are ready to rewrite
it, please do s/week/weak/ in the subject line. Otherwise looks good to
me.


Ah, sorry, I missed that too.  I was hoping to send a PR this morning, 
I'd usually just rewrite and delay this one but given it's necessary to 
make the post-merge build work I'm going to just send it up with the 
spelling error.




Best regards and thanks,
Uwe

--
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | https://www.pengutronix.de/ |


Re: [FSL P50x0] Keyboard and mouse don't work anymore after the devicetree updates for 5.19

2022-05-31 Thread Rob Herring
On Mon, May 30, 2022 at 12:26 AM Christian Zigotzky
 wrote:
>
> On 27 May 2022 at 04:23 pm, Rob Herring wrote:
> > The issue is in drivers/usb/host/fsl-mph-dr-of.c which copies the
> > resources to a child platform device. Can you try the following
> > change:
> >
> > diff --git a/drivers/usb/host/fsl-mph-dr-of.c 
> > b/drivers/usb/host/fsl-mph-dr-of.c
> > index 44a7e58a26e3..47d9b7be60da 100644
> > --- a/drivers/usb/host/fsl-mph-dr-of.c
> > +++ b/drivers/usb/host/fsl-mph-dr-of.c
> > @@ -80,8 +80,6 @@ static struct platform_device *fsl_usb2_device_register(
> >  const char *name, int id)
> >   {
> >  struct platform_device *pdev;
> > -   const struct resource *res = ofdev->resource;
> > -   unsigned int num = ofdev->num_resources;
> >  int retval;
> >
> >  pdev = platform_device_alloc(name, id);
> > @@ -106,11 +104,7 @@ static struct platform_device 
> > *fsl_usb2_device_register(
> >  if (retval)
> >  goto error;
> >
> > -   if (num) {
> > -   retval = platform_device_add_resources(pdev, res, num);
> > -   if (retval)
> > -   goto error;
> > -   }
> > +   pdev->dev.of_node = ofdev->dev.of_node;
> >
> >  retval = platform_device_add(pdev);
> >  if (retval)
> Hello Rob,
>
> Thanks a lot for your patch! Unfortunately, this leads to a boot loop.
> Do you have another idea?

Do you have a dmesg log?

The other way to fix is creating a IRQ resource and adding it to the
child device resources.

Rob


Re: [PATCH] powerpc/Kconfig: Force THREAD_SHIFT to at least 14 with KASAN

2022-05-31 Thread Christophe Leroy


Le 31/05/2022 à 09:41, Christoph Hellwig a écrit :
> On Tue, May 31, 2022 at 04:16:19PM +1000, Michael Ellerman wrote:
>> I was thinking of doing it in C, similar to the way arm64 handles it.
>>
>> Something like below. It means we always double the stack size when
>> KASAN is enabled. I think it's preferable, as it will always work
>> regardless of whether the user has an old .config (or is bisectting)?
> 
> Is there any reason to even offer the Kconfig?  It is super cryptic and
> just picking the right value directly in the header would seem much
> more sensible:

Looks like it  was added by commit 476134070c03 ("powerpc: Move 
THREAD_SHIFT config to Kconfig") in 2017.
https://github.com/torvalds/linux/commit/476134070c037820bd909ff6e43e0d3eae33f376

Seems like powerpc is the only architecture with it configurable via 
Kconfig.

I'd be inclined to reverting that commit, and then I'd do something 
similar to arm64:

#ifdef CONFIG_KSAN
#define KASAN_THREAD_SHIFT  1
#else
#define KASAN_THREAD_SHIFT  0
#endif

#if defined(CONFIG_PPC_256K_PAGES)
#define MIN_THREAD_SHIFT(15 + KASAN_THREAD_SHIFT)
#elif defined(CONFIG_PPC64)
#define MIN_THREAD_SHIFT(14 + KASAN_THREAD_SHIFT)
#else
#define MIN_THREAD_SHIFT(13 + KASAN_THREAD_SHIFT)
#endif

#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT)
#define THREAD_SHIFTPAGE_SHIFT
#else
#define THREAD_SHIFTMIN_THREAD_SHIFT
#endif


And maybe the CONFIG_PPC_256K_PAGES case is already big enough and 
doesn't require the + 1 for KASAN.

> 
> #if defined(CONFIG_PPC_256K_PAGES)
> #define MIN_THREAD_SHIFT  15
> #elif defined(CONFIG_PPC64)
> #define MIN_THREAD_SHIFT  14
> #else
> #define MIN_THREAD_SHIFT  13
> #endif
> 
> #ifdef CONFIG_KASAN
> #define THREAD_SHIFT  (MIN_THREAD_SHIFT + 1)
> #else
> #define THREAD_SHIFT  MIN_THREAD_SHIFT
> #endif
> 
> #if defined(CONFIG_VMAP_STACK) && THREAD_SHIFT < PAGE_SHIFT
> #undef THREAD_SHIFT
> #define THREAD_SHIFT  PAGE_SHIFT
> #endif

[PATCH] powerpc/papr_scm: use dev_get_drvdata

2022-05-31 Thread Haowen Bai
Eliminate direct accesses to the driver_data field.

Signed-off-by: Haowen Bai 
---
 arch/powerpc/platforms/pseries/papr_scm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index 181b855b3050..4e793e6e7d11 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -354,7 +354,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, 
struct device *dev,
 {
struct papr_scm_perf_stat *stat;
struct papr_scm_perf_stats *stats;
-   struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data;
+   struct papr_scm_priv *p = dev_get_drvdata(dev);
int rc, size;
 
/* Allocate request buffer enough to hold single performance stat */
-- 
2.7.4



Re: [PATCH] powerpc/perf: Give generic PMU a nice name

2022-05-31 Thread Madhavan Srinivasan



On 5/26/22 12:07 PM, Joel Stanley wrote:

When booting on a machine that uses the compat pmu driver we see this:

  [0.071192] GENERIC_COMPAT performance monitor hardware support registered

Sorry that was my mistake.
I agree having it as ISAv3 is better.

Maddy



Which is a bit shouty. Give it a nicer name.

Signed-off-by: Joel Stanley 
---

Other options:

  - ISAv3 (because it is relevant for PowerISA 3.0B and beyond, see the
comment in init_generic_compat_pmu)

  - Generic Compat (same, but less shouty)

  arch/powerpc/perf/generic-compat-pmu.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/generic-compat-pmu.c 
b/arch/powerpc/perf/generic-compat-pmu.c
index f3db88aee4dd..5be5a5ebaf42 100644
--- a/arch/powerpc/perf/generic-compat-pmu.c
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -292,7 +292,7 @@ static int generic_compute_mmcr(u64 event[], int n_ev,
  }
  
  static struct power_pmu generic_compat_pmu = {

-   .name   = "GENERIC_COMPAT",
+   .name   = "Architected",
.n_counter  = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
.test_adder = ISA207_TEST_ADDER,


Re: [PATCH] powerpc/Kconfig: Force THREAD_SHIFT to at least 14 with KASAN

2022-05-31 Thread Christoph Hellwig
On Tue, May 31, 2022 at 04:16:19PM +1000, Michael Ellerman wrote:
> I was thinking of doing it in C, similar to the way arm64 handles it.
> 
> Something like below. It means we always double the stack size when
> KASAN is enabled. I think it's preferable, as it will always work
> regardless of whether the user has an old .config (or is bisectting)?

Is there any reason to even offer the Kconfig?  It is super cryptic and
just picking the right value directly in the header would seem much
more sensible:

#if defined(CONFIG_PPC_256K_PAGES)
#define MIN_THREAD_SHIFT15
#elif defined(CONFIG_PPC64)
#define MIN_THREAD_SHIFT14
#else
#define MIN_THREAD_SHIFT13
#endif

#ifdef CONFIG_KASAN
#define THREAD_SHIFT(MIN_THREAD_SHIFT + 1)
#else
#define THREAD_SHIFTMIN_THREAD_SHIFT
#endif

#if defined(CONFIG_VMAP_STACK) && THREAD_SHIFT < PAGE_SHIFT
#undef THREAD_SHIFT
#define THREAD_SHIFTPAGE_SHIFT
#endif


[PATCH] powerpc/64: Drop ppc_inst_as_str()

2022-05-31 Thread Michael Ellerman
The ppc_inst_as_str() macro tries to make printing variable length,
aka "prefixed", instructions convenient. It mostly succeeds, but it does
hide an on-stack buffer, which triggers stack protector.

More problematically it doesn't compile at all with GCC 12, due to the
fact that it returns the char buffer declared inside the macro:

  arch/powerpc/kernel/trace/ftrace.c: In function '__ftrace_modify_call':
  ./include/linux/printk.h:475:44: error: using a dangling pointer to '__str' 
[-Werror=dangling-pointer=]
475 | #define printk(fmt, ...) printk_index_wrap(_printk, fmt, 
##__VA_ARGS__)
...
  arch/powerpc/kernel/trace/ftrace.c:567:17: note: in expansion of macro 
'pr_err'
567 | pr_err("Not expected bl: opcode is %s\n", 
ppc_inst_as_str(op));
| ^~
  ./arch/powerpc/include/asm/inst.h:156:14: note: '__str' declared here
156 | char __str[PPC_INST_STR_LEN];   \
|  ^

This could be fixed by having the caller declare the buffer, but in some
places there'd need to be two buffers. In all cases where
ppc_inst_as_str() is used the output is not really meant for user
consumption, it's almost always indicative of a kernel bug.

A simpler solution is to just print the value as an unsigned long. For
normal instructions the output is identical. For prefixed instructions
the value is printed as a single 64-bit quantity, whereas previously the
low half was printed first. But that is good enough for debug output,
especially as prefixed instructions will be rare in practice.

Old:
  c070  6042  ori r2,r2,0
  c074  0411 e580fb00 .long 0xe580fb000411

New:
  c010f90c  6042  ori r2,r2,0
  c010f910  e580fb000411  .long 0xe580fb000411

Reported-by: Bagas Sanjaya 
Reported-by: Petr Mladek 
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/inst.h  | 19 ---
 arch/powerpc/kernel/kprobes.c|  2 +-
 arch/powerpc/kernel/trace/ftrace.c   | 24 +---
 arch/powerpc/lib/test_emulate_step.c |  6 +++---
 arch/powerpc/xmon/xmon.c |  2 +-
 5 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index b49aae9f6f27..684d3f453282 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -139,25 +139,6 @@ static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x)
*(u64 *)ptr = ppc_inst_as_ulong(x);
 }
 
-#define PPC_INST_STR_LEN sizeof(" ")
-
-static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x)
-{
-   if (ppc_inst_prefixed(x))
-   sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x));
-   else
-   sprintf(str, "%08x", ppc_inst_val(x));
-
-   return str;
-}
-
-#define ppc_inst_as_str(x) \
-({ \
-   char __str[PPC_INST_STR_LEN];   \
-   __ppc_inst_as_str(__str, x);\
-   __str;  \
-})
-
 static inline int __copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
 {
unsigned int val, suffix;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 1c97c0f177ae..912d4f8a13be 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -269,7 +269,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs 
*regs)
 * So, we should never get here... but, its still
 * good to catch them, just in case...
 */
-   printk("Can't step on instruction %s\n", ppc_inst_as_str(insn));
+   printk("Can't step on instruction %08lx\n", 
ppc_inst_as_ulong(insn));
BUG();
} else {
/*
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 2a893e06e4f1..cab67b5120b9 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -69,8 +69,8 @@ ftrace_modify_code(unsigned long ip, ppc_inst_t old, 
ppc_inst_t new)
 
/* Make sure it is what we expect it to be */
if (!ppc_inst_equal(replaced, old)) {
-   pr_err("%p: replaced (%s) != old (%s)",
-   (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old));
+   pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip,
+  ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old));
return -EINVAL;
}
 
@@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod,
 
/* Make sure that that this is still a 24bit jump */
if (!is_bl_op(op)) {
-   pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
+   pr_err("Not expected bl: opcode is %08lx\n", 
ppc_inst_as_ulong(op));
return -EINVAL;
}
 
@@ -159,8 +159,8 @@ 

Re: [PATCH] powerpc/irq: Increase stack_overflow detection limit when KASAN is enabled

2022-05-31 Thread Christophe Leroy


Le 31/05/2022 à 08:21, Michael Ellerman a écrit :
> Christophe Leroy  writes:
>> When KASAN is enabled, as shown by the Oops below, the 2k limit is not
>> enough to allow stack dump after a stack overflow detection when
>> CONFIG_DEBUG_STACKOVERFLOW is selected:
>>
>>  do_IRQ: stack overflow: 1984
>>  CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1
>>  Call Trace:
>>  Oops: Kernel stack overflow, sig: 11 [#1]
>>  BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac
>>  Modules linked in: sr_mod cdrom radeon(+) ohci_pci(+) hwmon 
>> i2c_algo_bit drm_ttm_helper ttm drm_dp_helper snd_aoa_i2sbus 
>> snd_aoa_soundbus snd_pcm ehci_pci snd_timer ohci_hcd snd ssb ehci_hcd 
>> 8250_pci soundcore drm_kms_helper pcmcia 8250 pcmcia_core syscopyarea 
>> usbcore sysfillrect 8250_base sysimgblt serial_mctrl_gpio fb_sys_fops 
>> usb_common pkcs8_key_parser fuse drm drm_panel_orientation_quirks configfs
>>  CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1
>>  NIP:  c02e5558 LR: c07eb3bc CTR: c07f46a8
>>  REGS: e7fe9f50 TRAP:    Not tainted  (5.18.0-gentoo-PMacG4)
>>  MSR:  1032   CR: 44a14824  XER: 2000
>>
>>  GPR00: c07eb3bc eaa1c000 c26baea0 eaa1c0a0 0008  c07eb3bc 
>> eaa1c010
>>  GPR08: eaa1c0a8 04f3f3f3 f1f1f1f1 c07f4c84 44a14824 0080f7e4 0005 
>> 0010
>>  GPR16: 0025 eaa1c154 eaa1c158 c0dbad64 0020 fd543810 eaa1c0a0 
>> eaa1c29e
>>  GPR24: c0dbad44 c0db8740 05ff fd543802 eaa1c150 c0c9a3c0 eaa1c0a0 
>> c0c9a3c0
>>  NIP [c02e5558] kasan_check_range+0xc/0x2b4
>>  LR [c07eb3bc] format_decode+0x80/0x604
>>  Call Trace:
>>  [eaa1c000] [c07eb3bc] format_decode+0x80/0x604 (unreliable)
>>  [eaa1c070] [c07f4dac] vsnprintf+0x128/0x938
>>  [eaa1c110] [c07f5788] sprintf+0xa0/0xc0
>>  [eaa1c180] [c0154c1c] __sprint_symbol.constprop.0+0x170/0x198
>>  [eaa1c230] [c07ee71c] symbol_string+0xf8/0x260
>>  [eaa1c430] [c07f46d0] pointer+0x15c/0x710
>>  [eaa1c4b0] [c07f4fbc] vsnprintf+0x338/0x938
>>  [eaa1c550] [c00e8fa0] vprintk_store+0x2a8/0x678
>>  [eaa1c690] [c00e94e4] vprintk_emit+0x174/0x378
>>  [eaa1c6d0] [c00ea008] _printk+0x9c/0xc0
>>  [eaa1c750] [c000ca94] show_stack+0x21c/0x260
>>  [eaa1c7a0] [c07d0bd4] dump_stack_lvl+0x60/0x90
>>  [eaa1c7c0] [c0009234] __do_IRQ+0x170/0x174
>>  [eaa1c800] [c0009258] do_IRQ+0x20/0x34
>>  [eaa1c820] [c00045b4] HardwareInterrupt_virt+0x108/0x10c
> 
> Is this actually caused by KASAN? There's no stack frames in there that
> are KASAN related AFAICS.

Yes but enabling KASAN often increases the size of any functions.

And by the way here you have NIP in kasan_check_range()

But I can try to perform some more tests.

> 
> Seems like the 2K limit is never going to be enough even if KASAN is not
> enabled. Presumably we just haven't noticed because we don't trigger the
> check unless KASAN is enabled.

I think what trigger the Oops really is VMAP_STACK. Without VMAP_STACK 
we just silently overwrite other memory.

> 
>> ...
>>
>> Increase the limit to 3k when KASAN is enabled.
>>
>> While at it remove the 'inline' keywork for check_stack_overflow().
>> This function is called only once so it will be inlined regardless.
> 
> I'd rather that was a separate change, in case it has some unintended
> affect.

ok

> 
>> Reported-by: Erhard Furtner 
>> Cc: Arnd Bergmann 
>> Signed-off-by: Christophe Leroy 
>> ---
>>   arch/powerpc/kernel/irq.c | 16 ++--
>>   1 file changed, 10 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
>> index 873e6dffb868..5ff4cf69fc2f 100644
>> --- a/arch/powerpc/kernel/irq.c
>> +++ b/arch/powerpc/kernel/irq.c
>> @@ -53,6 +53,7 @@
>>   #include 
>>   #include 
>>   #include 
>> +#include 
>>   
>>   #include 
>>   #include 
>> @@ -184,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
>>  return sum;
>>   }
>>   
>> -static inline void check_stack_overflow(void)
>> +static void check_stack_overflow(void)
>>   {
>>  long sp;
>>   
>> @@ -193,11 +194,14 @@ static inline void check_stack_overflow(void)
>>
> 
> Wouldn't it be cleaner to just do:
> 
> #ifdef CONFIG_KASAN
> #define STACK_CHECK_LIMIT (3 * 1024)
> #else
> #define STACK_CHECK_LIMIT (2 * 1024)
> #endif

Well, as you think 2k is not enough even without KASAN, then we should 
just increase it to 3k ?

In the meantime I was thinking about moving the test into __do_irq(), so 
that it will be done on IRQ stack. That would ease things unless we 
overfill the IRQ stack itself.

Because even if we put a detection limit at 3 or 4k, as the detection is 
asynchronous we still have a risk that the stack filling be much more 
than the limit and still be unable to perform the stack dump within the 
remaining stack space.

> 
>>  sp = current_stack_pointer & (THREAD_SIZE - 1);
>>   
>> -/* check for stack overflow: is there less than 2KB free? */
>> 

Re: [PATCH] powerpc/Kconfig: Force THREAD_SHIFT to at least 14 with KASAN

2022-05-31 Thread Christophe Leroy


Le 31/05/2022 à 08:16, Michael Ellerman a écrit :
> Christophe Leroy  writes:
>> Allthough 14 is the default THREAD_SHIFT when KASAN is selected,
>> taking an old config may keep 13 when CONFIG_EXPERT is selected.
>>
>> Force it to 14 as a minimum when KASAN is selected.
>>
>> Also default to 15 when KASAN on PPC64.
>>
>> Reported-by: Erhard Furtner 
>> Signed-off-by: Christophe Leroy 
>> ---
>>   arch/powerpc/Kconfig | 4 +++-
>>   1 file changed, 3 insertions(+), 1 deletion(-)
> 
> I was thinking of doing it in C, similar to the way arm64 handles it.
> 
> Something like below. It means we always double the stack size when
> KASAN is enabled. I think it's preferable, as it will always work
> regardless of whether the user has an old .config (or is bisectting)?

Yes good idea.

I leave it to you then.

You can therefore also remove the default value for KASAN in Kconfig.

Christophe

> 
> cheers
> 
> diff --git a/arch/powerpc/include/asm/thread_info.h 
> b/arch/powerpc/include/asm/thread_info.h
> index 125328d1b980..c9735f93f8e6 100644
> --- a/arch/powerpc/include/asm/thread_info.h
> +++ b/arch/powerpc/include/asm/thread_info.h
> @@ -14,12 +14,17 @@
>   
>   #ifdef __KERNEL__
>   
> -#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT
> -#define THREAD_SHIFT PAGE_SHIFT
> +#ifdef CONFIG_KASAN
> +#define THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1)
>   #else
>   #define THREAD_SHIFTCONFIG_THREAD_SHIFT
>   #endif
>   
> +#if defined(CONFIG_VMAP_STACK) && THREAD_SHIFT < PAGE_SHIFT
> +#undef THREAD_SHIFT
> +#define THREAD_SHIFT PAGE_SHIFT
> +#endif
> +
>   #define THREAD_SIZE (1 << THREAD_SHIFT)
>   
>   /*
> 
> 

Re: [PATCH v1 0/4] Kill the time spent in patch_instruction()

2022-05-31 Thread Christophe Leroy


Le 17/05/2022 à 14:37, Michael Ellerman a écrit :
> Christophe Leroy  writes:
>> Le 15/05/2022 à 12:28, Michael Ellerman a écrit :
>>> On Tue, 22 Mar 2022 16:40:17 +0100, Christophe Leroy wrote:
 This series reduces by 70% the time required to activate
 ftrace on an 8xx with CONFIG_STRICT_KERNEL_RWX.

 Measure is performed in function ftrace_replace_code() using mftb()
 around the loop.

 With the series,
 - Without CONFIG_STRICT_KERNEL_RWX, 416000 TB ticks are measured.
 - With CONFIG_STRICT_KERNEL_RWX, 546000 TB ticks are measured.

 [...]
>>>
>>> Patches 1, 3 and 4 applied to powerpc/next.
>>>
>>> [1/4] powerpc/code-patching: Don't call is_vmalloc_or_module_addr() without 
>>> CONFIG_MODULES
>>> 
>>> https://git.kernel.org/powerpc/c/cb3ac45214c03852430979a43180371a44b74596
>>> [3/4] powerpc/code-patching: Use jump_label for testing freed initmem
>>> 
>>> https://git.kernel.org/powerpc/c/b033767848c4115e486b1a51946de3bee2ac0fa6
>>> [4/4] powerpc/code-patching: Use jump_label to check if poking_init() is 
>>> done
>>> 
>>> https://git.kernel.org/powerpc/c/1751289268ef959db68b0b6f798d904d6403309a
>>>
>>
>> Patch 2 was the keystone of this series. What happened to it ?
> 
> It broke on 64-bit. I think I know why but I haven't had time to test
> it. Will try and get it fixed in the next day or two.
> 

You didn't find any solution at the end, or didn't have time ?

What was the problem exactly ? I made a quick try on QEMU and it was 
working as expected.

Christophe

Re: [PATCH] powerpc/irq: Increase stack_overflow detection limit when KASAN is enabled

2022-05-31 Thread Michael Ellerman
Christophe Leroy  writes:
> When KASAN is enabled, as shown by the Oops below, the 2k limit is not
> enough to allow stack dump after a stack overflow detection when
> CONFIG_DEBUG_STACKOVERFLOW is selected:
>
>   do_IRQ: stack overflow: 1984
>   CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1
>   Call Trace:
>   Oops: Kernel stack overflow, sig: 11 [#1]
>   BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac
>   Modules linked in: sr_mod cdrom radeon(+) ohci_pci(+) hwmon 
> i2c_algo_bit drm_ttm_helper ttm drm_dp_helper snd_aoa_i2sbus snd_aoa_soundbus 
> snd_pcm ehci_pci snd_timer ohci_hcd snd ssb ehci_hcd 8250_pci soundcore 
> drm_kms_helper pcmcia 8250 pcmcia_core syscopyarea usbcore sysfillrect 
> 8250_base sysimgblt serial_mctrl_gpio fb_sys_fops usb_common pkcs8_key_parser 
> fuse drm drm_panel_orientation_quirks configfs
>   CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1
>   NIP:  c02e5558 LR: c07eb3bc CTR: c07f46a8
>   REGS: e7fe9f50 TRAP:    Not tainted  (5.18.0-gentoo-PMacG4)
>   MSR:  1032   CR: 44a14824  XER: 2000
>
>   GPR00: c07eb3bc eaa1c000 c26baea0 eaa1c0a0 0008  c07eb3bc 
> eaa1c010
>   GPR08: eaa1c0a8 04f3f3f3 f1f1f1f1 c07f4c84 44a14824 0080f7e4 0005 
> 0010
>   GPR16: 0025 eaa1c154 eaa1c158 c0dbad64 0020 fd543810 eaa1c0a0 
> eaa1c29e
>   GPR24: c0dbad44 c0db8740 05ff fd543802 eaa1c150 c0c9a3c0 eaa1c0a0 
> c0c9a3c0
>   NIP [c02e5558] kasan_check_range+0xc/0x2b4
>   LR [c07eb3bc] format_decode+0x80/0x604
>   Call Trace:
>   [eaa1c000] [c07eb3bc] format_decode+0x80/0x604 (unreliable)
>   [eaa1c070] [c07f4dac] vsnprintf+0x128/0x938
>   [eaa1c110] [c07f5788] sprintf+0xa0/0xc0
>   [eaa1c180] [c0154c1c] __sprint_symbol.constprop.0+0x170/0x198
>   [eaa1c230] [c07ee71c] symbol_string+0xf8/0x260
>   [eaa1c430] [c07f46d0] pointer+0x15c/0x710
>   [eaa1c4b0] [c07f4fbc] vsnprintf+0x338/0x938
>   [eaa1c550] [c00e8fa0] vprintk_store+0x2a8/0x678
>   [eaa1c690] [c00e94e4] vprintk_emit+0x174/0x378
>   [eaa1c6d0] [c00ea008] _printk+0x9c/0xc0
>   [eaa1c750] [c000ca94] show_stack+0x21c/0x260
>   [eaa1c7a0] [c07d0bd4] dump_stack_lvl+0x60/0x90
>   [eaa1c7c0] [c0009234] __do_IRQ+0x170/0x174
>   [eaa1c800] [c0009258] do_IRQ+0x20/0x34
>   [eaa1c820] [c00045b4] HardwareInterrupt_virt+0x108/0x10c

Is this actually caused by KASAN? There's no stack frames in there that
are KASAN related AFAICS.

Seems like the 2K limit is never going to be enough even if KASAN is not
enabled. Presumably we just haven't noticed because we don't trigger the
check unless KASAN is enabled.

> ...
>
> Increase the limit to 3k when KASAN is enabled.
>
> While at it remove the 'inline' keywork for check_stack_overflow().
> This function is called only once so it will be inlined regardless.

I'd rather that was a separate change, in case it has some unintended
affect.

> Reported-by: Erhard Furtner 
> Cc: Arnd Bergmann 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/kernel/irq.c | 16 ++--
>  1 file changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index 873e6dffb868..5ff4cf69fc2f 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -53,6 +53,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -184,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
>   return sum;
>  }
>  
> -static inline void check_stack_overflow(void)
> +static void check_stack_overflow(void)
>  {
>   long sp;
>  
> @@ -193,11 +194,14 @@ static inline void check_stack_overflow(void)
>

Wouldn't it be cleaner to just do:

#ifdef CONFIG_KASAN
#define STACK_CHECK_LIMIT (3 * 1024)
#else
#define STACK_CHECK_LIMIT (2 * 1024)
#endif

>   sp = current_stack_pointer & (THREAD_SIZE - 1);
>  
> - /* check for stack overflow: is there less than 2KB free? */
> - if (unlikely(sp < 2048)) {
 
+   if (unlikely(sp < STACK_CHECK_LIMIT)) {
 
And then the code could stay as it is?

cheers

> - pr_err("do_IRQ: stack overflow: %ld\n", sp);
> - dump_stack();
> - }
> + /* check for stack overflow: is there less than 2/3KB free? */
> + if (!IS_ENABLED(KASAN) && likely(sp >= SZ_2K))
> + return;
> + if (IS_ENABLED(KASAN) && likely(sp >= SZ_2K + SZ_1K))
> + return;
> +
> + pr_err("do_IRQ: stack overflow: %ld\n", sp);
> + dump_stack();
>  }


Re: [RFC PATCH 4/4] objtool/powerpc: Add --mcount specific implementation

2022-05-31 Thread Christophe Leroy




Le 25/05/2022 à 19:27, Christophe Leroy a écrit :



Le 24/05/2022 à 15:33, Christophe Leroy a écrit :



Le 24/05/2022 à 13:00, Sathvika Vasireddy a écrit :



+{
+    switch (elf->ehdr.e_machine) {
+    case EM_X86_64:
+    return R_X86_64_64;
+    case EM_PPC64:
+    return R_PPC64_ADDR64;
+    default:
+    WARN("unknown machine...");
+    exit(-1);
+    }
+}

Wouldn't it be better to make that function arch specific ?


This is so that we can support cross architecture builds.




I'm not sure I follow you here.

This is only based on the target, it doesn't depend on the build host so
I can't the link with cross arch builds.

The same as you have arch_decode_instruction(), you could have
arch_elf_reloc_type_long()
It would make sense indeed, because there is no point in supporting X86
relocation when you don't support X86 instruction decoding.



Could simply be some macro defined in 
tools/objtool/arch/powerpc/include/arch/elf.h and 
tools/objtool/arch/x86/include/arch/elf.h


The x86 version would be:

#define R_ADDR(elf) R_X86_64_64

And the powerpc version would be:

#define R_ADDR(elf) (elf->ehdr.e_machine == EM_PPC64 ? R_PPC64_ADDR64 : 
R_PPC_ADDR32)




Well, looking once more, and taking into account the patch from Chen 
https://lore.kernel.org/lkml/20220531020744.236970-4-chenzhong...@huawei.com/


It would be easier to just define two macros:

#define R_ABS64 R_PPC64_ADDR64
#define R_ABS32 R_PPC_ADDR32

And then in the caller, as we know the size, do something like

size == sizeof(u64) ? R_ABS64 : R_ABS32;

Christophe


Re: [PATCH] powerpc/Kconfig: Force THREAD_SHIFT to at least 14 with KASAN

2022-05-31 Thread Michael Ellerman
Christophe Leroy  writes:
> Allthough 14 is the default THREAD_SHIFT when KASAN is selected,
> taking an old config may keep 13 when CONFIG_EXPERT is selected.
>
> Force it to 14 as a minimum when KASAN is selected.
>
> Also default to 15 when KASAN on PPC64.
>
> Reported-by: Erhard Furtner 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/Kconfig | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)

I was thinking of doing it in C, similar to the way arm64 handles it.

Something like below. It means we always double the stack size when
KASAN is enabled. I think it's preferable, as it will always work
regardless of whether the user has an old .config (or is bisectting)?

cheers

diff --git a/arch/powerpc/include/asm/thread_info.h 
b/arch/powerpc/include/asm/thread_info.h
index 125328d1b980..c9735f93f8e6 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -14,12 +14,17 @@
 
 #ifdef __KERNEL__
 
-#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT
-#define THREAD_SHIFT   PAGE_SHIFT
+#ifdef CONFIG_KASAN
+#define THREAD_SHIFT   (CONFIG_THREAD_SHIFT + 1)
 #else
 #define THREAD_SHIFT   CONFIG_THREAD_SHIFT
 #endif
 
+#if defined(CONFIG_VMAP_STACK) && THREAD_SHIFT < PAGE_SHIFT
+#undef THREAD_SHIFT
+#define THREAD_SHIFT   PAGE_SHIFT
+#endif
+
 #define THREAD_SIZE(1 << THREAD_SHIFT)
 
 /*




Fwd: [PATCH 0/4] objtool: Reorganize x86 arch-specific code

2022-05-31 Thread Christophe Leroy
All available at 
https://lore.kernel.org/lkml/20220531020744.236970-1-chenzhong...@huawei.com/T/#t

Chen, can you please copy linuxppc-dev list next time, as powerpc is 
interested in objtool.


 Message transféré 
Sujet : [PATCH 0/4] objtool: Reorganize x86 arch-specific code
Date : Tue, 31 May 2022 10:07:40 +0800
De : Chen Zhongjin 
Pour : linux-ker...@vger.kernel.org, linux-a...@vger.kernel.org, 
x...@kernel.org
Copie à : jpoim...@redhat.com, pet...@infradead.org, 
madve...@linux.microsoft.com, t...@linutronix.de, mi...@redhat.com, 
b...@alien8.de, h...@zytor.com, a...@arndb.de, a...@linux-foundation.org, 
andreyk...@gmail.com, wangkefeng.w...@huawei.com, 
andrealm...@collabora.com, mhira...@kernel.org, mcg...@kernel.org, 
christophe.le...@csgroup.eu, dmitry.torok...@gmail.com, 
yangtie...@loongson.cn, dave.han...@linux.intel.com

This patch set reorganize current x86 related code in objtool, [1-3] move
arch-specific to arch files and extract some common codes and [4] fixes
a cross-compile problem.

It make objtool more arch-generic, which makes other patches on different
architectures easier to be reviewed and merged.

Tested on x86 with unwind on kernel and module context.

Rebased to:
tip/objtool/core:22682a07acc3 (objtool: Fix objtool regression on x32 
systems)

Chen Zhongjin (2):
   objtool: Add generic symbol for relocation type
   objtool: Specify host-arch for making LIBSUBCMD

Madhavan T. Venkataraman (2):
   objtool: Make ORC type code arch-specific
   objtool: Make ORC init and lookup code arch-generic

  arch/x86/include/asm/unwind.h |   5 -
  arch/x86/kernel/module.c  |   7 +-
  arch/x86/kernel/unwind_orc.c  | 256 +
  arch/x86/kernel/vmlinux.lds.S |   2 +-
  .../asm => include/asm-generic}/orc_lookup.h  |  42 +++
  kernel/Makefile   |   2 +
  kernel/orc_lookup.c   | 261 ++
  tools/objtool/Makefile|   2 +-
  tools/objtool/arch/x86/Build  |   1 +
  tools/objtool/arch/x86/include/arch/elf.h |   5 +-
  tools/objtool/arch/x86/orc.c  | 137 +
  tools/objtool/arch/x86/special.c  |   5 +-
  tools/objtool/check.c |  12 +-
  tools/objtool/include/objtool/orc.h   |  17 ++
  tools/objtool/orc_dump.c  |  59 +---
  tools/objtool/orc_gen.c   |  79 +-
  16 files changed, 491 insertions(+), 401 deletions(-)
  rename {arch/x86/include/asm => include/asm-generic}/orc_lookup.h (51%)
  create mode 100644 kernel/orc_lookup.c
  create mode 100644 tools/objtool/arch/x86/orc.c
  create mode 100644 tools/objtool/include/objtool/orc.h

-- 
2.17.1