[PATCH 1/2] powerpc: Introduce POWER10_DD1 feature

2020-10-19 Thread Ravi Bangoria
POWER10_DD1 feature flag will be needed while adding
conditional code that applies only for Power10 DD1.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/cputable.h | 8 ++--
 arch/powerpc/kernel/dt_cpu_ftrs.c   | 3 +++
 arch/powerpc/kernel/prom.c  | 9 +
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index 93bc70d4c9a1..d486f56c0d33 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -216,6 +216,7 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTR_P9_RADIX_PREFETCH_BUG  LONG_ASM_CONST(0x0002)
 #define CPU_FTR_ARCH_31
LONG_ASM_CONST(0x0004)
 #define CPU_FTR_DAWR1  LONG_ASM_CONST(0x0008)
+#define CPU_FTR_POWER10_DD1LONG_ASM_CONST(0x0010)
 
 #ifndef __ASSEMBLY__
 
@@ -479,6 +480,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \
CPU_FTR_DAWR | CPU_FTR_DAWR1)
+#define CPU_FTRS_POWER10_DD1   (CPU_FTRS_POWER10 | CPU_FTR_POWER10_DD1)
 #define CPU_FTRS_CELL  (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -497,14 +499,16 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTRS_POSSIBLE  \
(CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
 CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \
-CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10)
+CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10 | 
\
+CPU_FTRS_POWER10_DD1)
 #else
 #define CPU_FTRS_POSSIBLE  \
(CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
 CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
 CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \
 CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \
-CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10)
+CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10 | 
\
+CPU_FTRS_POWER10_DD1)
 #endif /* CONFIG_CPU_LITTLE_ENDIAN */
 #endif
 #else
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 1098863e17ee..b2327f2967ff 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -811,6 +811,9 @@ static __init void cpufeatures_cpu_quirks(void)
}
 
update_tlbie_feature_flag(version);
+
+   if ((version & 0x) == 0x00800100)
+   cur_cpu_spec->cpu_features |= CPU_FTR_POWER10_DD1;
 }
 
 static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index c1545f22c077..c778c81284f7 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -305,6 +305,14 @@ static void __init check_cpu_feature_properties(unsigned 
long node)
}
 }
 
+static void __init fixup_cpu_features(void)
+{
+   unsigned long version = mfspr(SPRN_PVR);
+
+   if ((version & 0x) == 0x00800100)
+   cur_cpu_spec->cpu_features |= CPU_FTR_POWER10_DD1;
+}
+
 static int __init early_init_dt_scan_cpus(unsigned long node,
  const char *uname, int depth,
  void *data)
@@ -378,6 +386,7 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
 
check_cpu_feature_properties(node);
check_cpu_pa_features(node);
+   fixup_cpu_features();
}
 
identical_pvr_fixup(node);
-- 
2.25.1



[PATCH 2/2] powerpc/watchpoint: Workaround P10 DD1 issue with VSX-32 byte instructions

2020-10-19 Thread Ravi Bangoria
POWER10 DD1 has an issue where it generates watchpoint exceptions when it
shouldn't. The conditions where this occur are:

 - octword op
 - ending address of DAWR range is less than starting address of op
 - those addresses need to be in the same or in two consecutive 512B
   blocks
 - 'op address + 64B' generates an address that has a carry into bit
   52 (crosses 2K boundary)

Handle such spurious exception by considering them as extraneous and
emulating/single-steeping instruction without generating an event.

Signed-off-by: Ravi Bangoria 
---

Dependency: VSX-32 byte emulation support patches
  https://lore.kernel.org/r/20201011050908.72173-1-ravi.bango...@linux.ibm.com

 arch/powerpc/kernel/hw_breakpoint.c | 69 -
 1 file changed, 67 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index f4e8f21046f5..4514745d27c3 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -499,6 +499,11 @@ static bool is_larx_stcx_instr(int type)
return type == LARX || type == STCX;
 }
 
+static bool is_octword_vsx_instr(int type, int size)
+{
+   return ((type == LOAD_VSX || type == STORE_VSX) && size == 32);
+}
+
 /*
  * We've failed in reliably handling the hw-breakpoint. Unregister
  * it and throw a warning message to let the user know about it.
@@ -549,6 +554,60 @@ static bool stepping_handler(struct pt_regs *regs, struct 
perf_event **bp,
return true;
 }
 
+static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info,
+int *hit, unsigned long ea)
+{
+   int i;
+   unsigned long hw_start_addr;
+   unsigned long hw_end_addr;
+
+   /*
+* Handle spurious exception only when any bp_per_reg is set.
+* Otherwise this might be created by xmon and not actually a
+* spurious exception.
+*/
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (!info[i])
+   continue;
+
+   hw_start_addr = ALIGN_DOWN(info[i]->address, 
HW_BREAKPOINT_SIZE);
+   hw_end_addr = ALIGN(info[i]->address + info[i]->len, 
HW_BREAKPOINT_SIZE);
+
+   /*
+* Ending address of DAWR range is less than starting
+* address of op.
+*/
+   if ((hw_end_addr - 1) >= ea)
+   continue;
+
+   /*
+* Those addresses need to be in the same or in two
+* consecutive 512B blocks;
+*/
+   if (((hw_end_addr - 1) >> 10) != (ea >> 10))
+   continue;
+
+   /*
+* 'op address + 64B' generates an address that has a
+* carry into bit 52 (crosses 2K boundary).
+*/
+   if ((ea & 0x800) == ((ea + 64) & 0x800))
+   continue;
+
+   break;
+   }
+
+   if (i == nr_wp_slots())
+   return;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (info[i]) {
+   hit[i] = 1;
+   info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+   }
+   }
+}
+
 int hw_breakpoint_handler(struct die_args *args)
 {
bool err = false;
@@ -607,8 +666,14 @@ int hw_breakpoint_handler(struct die_args *args)
goto reset;
 
if (!nr_hit) {
-   rc = NOTIFY_DONE;
-   goto out;
+   if (cpu_has_feature(CPU_FTR_POWER10_DD1) &&
+   !IS_ENABLED(CONFIG_PPC_8xx) &&
+   is_octword_vsx_instr(type, size)) {
+   handle_p10dd1_spurious_exception(info, hit, ea);
+   } else {
+   rc = NOTIFY_DONE;
+   goto out;
+   }
}
 
/*
-- 
2.25.1



[powerpc:merge] BUILD SUCCESS 96b5a60d059984a2f0eaef90e97f59ac4a76bff4

2020-10-19 Thread kernel test robot
workpad_defconfig
mips loongson1b_defconfig
powerpc   currituck_defconfig
powerpc  iss476-smp_defconfig
armspear3xx_defconfig
m68km5407c3_defconfig
sh   se7343_defconfig
arcnsimosci_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
c6x  allyesconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386defconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a004-20201019
x86_64   randconfig-a002-20201019
x86_64   randconfig-a006-20201019
x86_64   randconfig-a003-20201019
x86_64   randconfig-a005-20201019
x86_64   randconfig-a001-20201019
i386 randconfig-a006-20201019
i386 randconfig-a005-20201019
i386 randconfig-a001-20201019
i386 randconfig-a003-20201019
i386 randconfig-a004-20201019
i386 randconfig-a002-20201019
i386 randconfig-a015-20201019
i386 randconfig-a013-20201019
i386 randconfig-a016-20201019
i386 randconfig-a012-20201019
i386 randconfig-a011-20201019
i386 randconfig-a014-20201019
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a016-20201019
x86_64   randconfig-a015-20201019
x86_64   randconfig-a012-20201019
x86_64   randconfig-a013-20201019
x86_64   randconfig-a011-20201019
x86_64   randconfig-a014-20201019

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[powerpc:next-test] BUILD SUCCESS c9818c0abfb0c3500684bb2bc75981123d63134d

2020-10-19 Thread kernel test robot
powerpc ps3_defconfig
sh  rsk7269_defconfig
armmvebu_v7_defconfig
m68kmvme147_defconfig
arc  allyesconfig
sh   se7724_defconfig
powerpc  ppc40x_defconfig
powerpc mpc85xx_cds_defconfig
sh   se7750_defconfig
sh  kfr2r09_defconfig
powerpc mpc512x_defconfig
powerpcsocrates_defconfig
c6xevmc6678_defconfig
powerpc skiroot_defconfig
armzeus_defconfig
powerpc  katmai_defconfig
mipsmaltaup_xpa_defconfig
arm   tegra_defconfig
powerpc  mpc866_ads_defconfig
mipsnlm_xlp_defconfig
sh  rsk7264_defconfig
sh magicpanelr2_defconfig
powerpc mpc8272_ads_defconfig
sh  sdk7786_defconfig
armoxnas_v6_defconfig
arc nsimosci_hs_defconfig
armvt8500_v6_v7_defconfig
mipsworkpad_defconfig
mips loongson1b_defconfig
h8300 edosk2674_defconfig
powerpc   currituck_defconfig
powerpc  iss476-smp_defconfig
armspear3xx_defconfig
arm  colibri_pxa300_defconfig
powerpcge_imp3a_defconfig
mips allmodconfig
mips mpc30x_defconfig
alpha   defconfig
mips tb0219_defconfig
sh  urquell_defconfig
nios2 10m50_defconfig
sparc64 defconfig
arm   omap1_defconfig
mipsvocore2_defconfig
shedosk7705_defconfig
m68km5407c3_defconfig
sh   se7343_defconfig
arcnsimosci_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68kdefconfig
m68k allyesconfig
nds32 allnoconfig
c6x  allyesconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
i386defconfig
mips allyesconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a004-20201019
x86_64   randconfig-a002-20201019
x86_64   randconfig-a006-20201019
x86_64   randconfig-a003-20201019
x86_64   randconfig-a005-20201019
x86_64   randconfig-a001-20201019
i386 randconfig-a006-20201019
i386 randconfig-a005-20201019
i386 randconfig-a001-20201019
i386 randconfig-a003-20201019
i386 randconfig-a004-20201019
i386 randconfig-a002-20201019
i386 randconfig-a015-20201019
i386 randconfig-a013-20201019
i386 randconfig-a016-20201019
i386 randconfig-a012-20201019
i386 randconfig-a011-20201019
i386 randconfig-a014-20201019
riscvnommu_k210_defconfig
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a016-20201019
x86_64   randconfig-a015-20201019
x86_64   randconfig-a012-20201019
x86_64   randconfig-a013-20201019
x86_64   randconfig-a011-20201019
x86_64

[powerpc:fixes] BUILD SUCCESS 358ab796ce78ba271a6ff82834183ffb2cb68c4c

2020-10-19 Thread kernel test robot
 tb0226_defconfig
sh shx3_defconfig
arm   efm32_defconfig
powerpcamigaone_defconfig
powerpc powernv_defconfig
sh   j2_defconfig
mips  cavium_octeon_defconfig
arm   omap2plus_defconfig
shecovec24-romimage_defconfig
arm   cns3420vb_defconfig
arm s5pv210_defconfig
powerpc ps3_defconfig
sh  rsk7269_defconfig
armmvebu_v7_defconfig
m68kmvme147_defconfig
arc  allyesconfig
sh   se7724_defconfig
powerpc  ppc40x_defconfig
powerpc mpc85xx_cds_defconfig
sh   se7750_defconfig
sh  kfr2r09_defconfig
powerpc mpc512x_defconfig
powerpcsocrates_defconfig
c6xevmc6678_defconfig
powerpc skiroot_defconfig
armzeus_defconfig
powerpc  katmai_defconfig
mipsmaltaup_xpa_defconfig
arm   tegra_defconfig
powerpc  mpc866_ads_defconfig
mipsnlm_xlp_defconfig
sh  rsk7264_defconfig
sh magicpanelr2_defconfig
powerpc mpc8272_ads_defconfig
sh  sdk7786_defconfig
armoxnas_v6_defconfig
arc nsimosci_hs_defconfig
armvt8500_v6_v7_defconfig
mipsworkpad_defconfig
mips loongson1b_defconfig
h8300 edosk2674_defconfig
powerpc   currituck_defconfig
powerpc  iss476-smp_defconfig
armspear3xx_defconfig
arm  colibri_pxa300_defconfig
powerpcge_imp3a_defconfig
mips allmodconfig
mips mpc30x_defconfig
alpha   defconfig
mips tb0219_defconfig
sh  urquell_defconfig
nios2 10m50_defconfig
sparc64 defconfig
arm   omap1_defconfig
mipsvocore2_defconfig
shedosk7705_defconfig
powerpc akebono_defconfig
mips bigsur_defconfig
shdreamcast_defconfig
m68km5407c3_defconfig
sh   se7343_defconfig
arcnsimosci_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68kdefconfig
m68k allyesconfig
nds32 allnoconfig
c6x  allyesconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
i386defconfig
mips allyesconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a004-20201019
x86_64   randconfig-a002-20201019
x86_64   randconfig-a006-20201019
x86_64   randconfig-a003-20201019
x86_64   randconfig-a005-20201019
x86_64   randconfig-a001-20201019
i386 randconfig-a006-20201019
i386 randconfig-a005-20201019
i386 randconfig-a001-20201019
i386 randconfig-a003-20201019
i386 randconfig-a004-20201019
i386 randconfig-a002-20201019
i386 randconfig-a015-20201019
i386 randconfig-a013-20201019
i386 randconfig-a016-20201019
i386 randconfig-a012-20201019
i386 randconfig-a011-20201019
i386 randconfig-a014-20201019
riscvnommu_k210_defconfig
riscvallyesconfig
riscv allnoconfig
riscv

Re: [PATCH 6/8] powerpc/signal64: Replace setup_trampoline() w/ unsafe_setup_trampoline()

2020-10-19 Thread Christophe Leroy




Le 20/10/2020 à 04:42, Christopher M. Riedl a écrit :

On Fri Oct 16, 2020 at 10:56 AM CDT, Christophe Leroy wrote:



Le 15/10/2020 à 17:01, Christopher M. Riedl a écrit :

From: Daniel Axtens 

Previously setup_trampoline() performed a costly KUAP switch on every
uaccess operation. These repeated uaccess switches cause a significant
drop in signal handling performance.

Rewrite setup_trampoline() to assume that a userspace write access
window is open. Replace all uaccess functions with their 'unsafe'
versions to avoid the repeated uaccess switches.

Signed-off-by: Daniel Axtens 
Signed-off-by: Christopher M. Riedl 
---
   arch/powerpc/kernel/signal_64.c | 32 +++-
   1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index bd92064e5576..6d4f7a5c4fbf 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -600,30 +600,33 @@ static long restore_tm_sigcontexts(struct task_struct 
*tsk,
   /*
* Setup the trampoline code on the stack
*/
-static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
+#define unsafe_setup_trampoline(syscall, tramp, e) \
+   unsafe_op_wrap(__unsafe_setup_trampoline(syscall, tramp), e)
+static long notrace __unsafe_setup_trampoline(unsigned int syscall,
+   unsigned int __user *tramp)
   {
int i;
-   long err = 0;
   
   	/* bctrl # call the handler */

-   err |= __put_user(PPC_INST_BCTRL, [0]);
+   unsafe_put_user(PPC_INST_BCTRL, [0], err);
/* addi r1, r1, __SIGNAL_FRAMESIZE  # Pop the dummy stackframe */
-   err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
- (__SIGNAL_FRAMESIZE & 0x), [1]);
+   unsafe_put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
+ (__SIGNAL_FRAMESIZE & 0x), [1], err);
/* li r0, __NR_[rt_]sigreturn| */
-   err |= __put_user(PPC_INST_ADDI | (syscall & 0x), [2]);
+   unsafe_put_user(PPC_INST_ADDI | (syscall & 0x), [2], err);
/* sc */
-   err |= __put_user(PPC_INST_SC, [3]);
+   unsafe_put_user(PPC_INST_SC, [3], err);
   
   	/* Minimal traceback info */

for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
-   err |= __put_user(0, [i]);
+   unsafe_put_user(0, [i], err);
   
-	if (!err)

-   flush_icache_range((unsigned long) [0],
-  (unsigned long) [TRAMP_SIZE]);
+   flush_icache_range((unsigned long)[0],
+  (unsigned long)[TRAMP_SIZE]);


This flush should be done outside the user_write_access block.



Hmm, I suppose that means setup_trampoline() cannot be completely
"unsafe". I'll see if I can re-arrange the code which calls this
function to avoid an additional uaccess block instead and push the
start()/end() into setup_trampoline() directly.


I think we shouldn't put too much effort on setup_trampoline(). Nowadays 99.999% of applications use 
the VDSO. Using the trampoline on stack requires to unmap the VDSO and remap the STACK RW. That's 
really a corner case, I think it would be good enough to call it outside the main access begin/end 
block, and let it do its own access_begin/end.


This corner functionnality can be tested using the sigreturn_vdso selftest in 
selftests/powerpc/signal/

Christophe



   
-	return err;

+   return 0;
+err:
+   return 1;
   }
   
   /*

@@ -888,7 +891,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) {
regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp;
} else {
-   err |= setup_trampoline(__NR_rt_sigreturn, >tramp[0]);
+   if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
+   return -EFAULT;
+   err |= __unsafe_setup_trampoline(__NR_rt_sigreturn, 
>tramp[0]);
+   user_write_access_end();
if (err)
goto badframe;
regs->nip = (unsigned long) >tramp[0];



Christophe




Re: [PATCH 1/8] powerpc/uaccess: Add unsafe_copy_from_user

2020-10-19 Thread Christopher M. Riedl
On Fri Oct 16, 2020 at 10:17 AM CDT, Christophe Leroy wrote:
>
>
> Le 15/10/2020 à 17:01, Christopher M. Riedl a écrit :
> > Implement raw_copy_from_user_allowed() which assumes that userspace read
> > access is open. Use this new function to implement raw_copy_from_user().
> > Finally, wrap the new function to follow the usual "unsafe_" convention
> > of taking a label argument. The new raw_copy_from_user_allowed() calls
> > __copy_tofrom_user() internally, but this is still safe to call in user
> > access blocks formed with user_*_access_begin()/user_*_access_end()
> > since asm functions are not instrumented for tracing.
>
> Would objtool accept that if it was implemented on powerpc ?
>
> __copy_tofrom_user() is a function which is optimised for larger memory
> copies (using dcbz, etc ...)
> Do we need such an optimisation for unsafe_copy_from_user() ? Or can we
> do a simple loop as done for
> unsafe_copy_to_user() instead ?

I tried using a simple loop based on your unsafe_copy_to_user()
implementation. Similar to the copy_{vsx,fpr}_from_user() results there
is a hit to signal handling performance. The results with the loop are
in the 'unsafe-signal64-copy' column:

|  | hash   | radix  |
|  | -- | -- |
| linuxppc/next| 289014 | 158408 |
| unsafe-signal64  | 298506 | 253053 |
| unsafe-signal64-copy | 197029 | 177002 |

Similar to the copy_{vsx,fpr}_from_user() patch I don't fully understand
why this performs so badly yet.

Implementation:

unsafe_copy_from_user(d, s, l, e) \
do {   \
   u8 *_dst = (u8 *)(d);   \
   const u8 __user *_src = (u8 __user*)(s); 
   \
   size_t _len = (l);  \
   int _i; \
   \
   for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += 
sizeof(long)) \
   unsafe_get_user(*(long*)(_dst + _i), (long __user 
*)(_src + _i), e);\
   if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) {   \
   unsafe_get_user(*(u32*)(_dst + _i), (u32 __user *)(_src 
+ _i), e);  \
   _i += 4;\
   }   \
   if (_len & 2) { \
   unsafe_get_user(*(u16*)(_dst + _i), (u16 __user *)(_src 
+ _i), e);  \
   _i += 2;\
   }   \
   if (_len & 1)   \
   unsafe_get_user(*(u8*)(_dst + _i), (u8 __user *)(_src + 
_i), e);\
} while (0)

>
> Christophe
>
> > 
> > Signed-off-by: Christopher M. Riedl 
> > ---
> >   arch/powerpc/include/asm/uaccess.h | 28 +++-
> >   1 file changed, 19 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/uaccess.h 
> > b/arch/powerpc/include/asm/uaccess.h
> > index 26781b044932..66940b4eb692 100644
> > --- a/arch/powerpc/include/asm/uaccess.h
> > +++ b/arch/powerpc/include/asm/uaccess.h
> > @@ -418,38 +418,45 @@ raw_copy_in_user(void __user *to, const void __user 
> > *from, unsigned long n)
> >   }
> >   #endif /* __powerpc64__ */
> >   
> > -static inline unsigned long raw_copy_from_user(void *to,
> > -   const void __user *from, unsigned long n)
> > +static inline unsigned long
> > +raw_copy_from_user_allowed(void *to, const void __user *from, unsigned 
> > long n)
> >   {
> > -   unsigned long ret;
> > if (__builtin_constant_p(n) && (n <= 8)) {
> > -   ret = 1;
> > +   unsigned long ret = 1;
> >   
> > switch (n) {
> > case 1:
> > barrier_nospec();
> > -   __get_user_size(*(u8 *)to, from, 1, ret);
> > +   __get_user_size_allowed(*(u8 *)to, from, 1, ret);
> > break;
> > case 2:
> > barrier_nospec();
> > -   __get_user_size(*(u16 *)to, from, 2, ret);
> > +   __get_user_size_allowed(*(u16 *)to, from, 2, ret);
> > break;
> > case 4:
> > barrier_nospec();
> > -   __get_user_size(*(u32 *)to, from, 4, ret);
> > +   __get_user_size_allowed(*(u32 *)to, from, 4, ret);
> > break;
> > case 8:
> > barrier_nospec();
> > -  

Re: [PATCH 8/8] powerpc/signal64: Rewrite rt_sigreturn() to minimise uaccess switches

2020-10-19 Thread Christopher M. Riedl
On Fri Oct 16, 2020 at 11:07 AM CDT, Christophe Leroy wrote:
>
>
> Le 15/10/2020 à 17:01, Christopher M. Riedl a écrit :
> > From: Daniel Axtens 
> > 
> > Add uaccess blocks and use the 'unsafe' versions of functions doing user
> > access where possible to reduce the number of times uaccess has to be
> > opened/closed.
> > 
> > Signed-off-by: Daniel Axtens 
> > Signed-off-by: Christopher M. Riedl 
> > ---
> >   arch/powerpc/kernel/signal_64.c | 23 +++
> >   1 file changed, 15 insertions(+), 8 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/signal_64.c 
> > b/arch/powerpc/kernel/signal_64.c
> > index 3b97e3681a8f..0f4ff7a5bfc1 100644
> > --- a/arch/powerpc/kernel/signal_64.c
> > +++ b/arch/powerpc/kernel/signal_64.c
> > @@ -779,18 +779,22 @@ SYSCALL_DEFINE0(rt_sigreturn)
> >  */
> > regs->msr &= ~MSR_TS_MASK;
> >   
> > -   if (__get_user(msr, >uc_mcontext.gp_regs[PT_MSR]))
> > +   if (!user_read_access_begin(uc, sizeof(*uc)))
> > goto badframe;
> > +
> > +   unsafe_get_user(msr, >uc_mcontext.gp_regs[PT_MSR], badframe_block);
> > +
> > if (MSR_TM_ACTIVE(msr)) {
> > /* We recheckpoint on return. */
> > struct ucontext __user *uc_transact;
> >   
> > /* Trying to start TM on non TM system */
> > if (!cpu_has_feature(CPU_FTR_TM))
> > -   goto badframe;
> > +   goto badframe_block;
> > +
> > +   unsafe_get_user(uc_transact, >uc_link, badframe_block);
> > +   user_read_access_end();
>
> user_access_end() only in the if branch ?
>
> >   
> > -   if (__get_user(uc_transact, >uc_link))
> > -   goto badframe;
> > if (restore_tm_sigcontexts(current, >uc_mcontext,
> >_transact->uc_mcontext))
> > goto badframe;
> > @@ -810,12 +814,13 @@ SYSCALL_DEFINE0(rt_sigreturn)
> >  * causing a TM bad thing.
> >  */
> > current->thread.regs->msr &= ~MSR_TS_MASK;
> > +
> > +#ifndef CONFIG_PPC_TRANSACTIONAL_MEM
> > if (!user_read_access_begin(uc, sizeof(*uc)))
>
> The matching user_read_access_end() is not in the same #ifndef ? That's
> dirty and hard to follow.
> Can you re-organise the code to avoid all those nesting ?

Yes, thanks for pointing this out. I really wanted to avoid changing too
much of the logic inside these functions. But I suppose I ended up
creating a mess - I will fix this in the next spin.

>
> > -   return -EFAULT;
> > -   if (__unsafe_restore_sigcontext(current, NULL, 1, 
> > >uc_mcontext)) {
> > -   user_read_access_end();
> > goto badframe;
> > -   }
> > +#endif
> > +   unsafe_restore_sigcontext(current, NULL, 1, >uc_mcontext,
> > + badframe_block);
> > user_read_access_end();
> > }
> >   
> > @@ -825,6 +830,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
> > set_thread_flag(TIF_RESTOREALL);
> > return 0;
> >   
> > +badframe_block:
> > +   user_read_access_end();
> >   badframe:
> > signal_fault(current, regs, "rt_sigreturn", uc);
> >   
> > 
>
> Christophe



Re: [PATCH 7/8] powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches

2020-10-19 Thread Christopher M. Riedl
On Fri Oct 16, 2020 at 11:00 AM CDT, Christophe Leroy wrote:
>
>
> Le 15/10/2020 à 17:01, Christopher M. Riedl a écrit :
> > From: Daniel Axtens 
> > 
> > Add uaccess blocks and use the 'unsafe' versions of functions doing user
> > access where possible to reduce the number of times uaccess has to be
> > opened/closed.
> > 
> > There is no 'unsafe' version of copy_siginfo_to_user, so move it
> > slightly to allow for a "longer" uaccess block.
> > 
> > Signed-off-by: Daniel Axtens 
> > Signed-off-by: Christopher M. Riedl 
> > ---
> >   arch/powerpc/kernel/signal_64.c | 54 -
> >   1 file changed, 27 insertions(+), 27 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/signal_64.c 
> > b/arch/powerpc/kernel/signal_64.c
> > index 6d4f7a5c4fbf..3b97e3681a8f 100644
> > --- a/arch/powerpc/kernel/signal_64.c
> > +++ b/arch/powerpc/kernel/signal_64.c
> > @@ -843,46 +843,42 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t 
> > *set,
> > /* Save the thread's msr before get_tm_stackpointer() changes it */
> > unsigned long msr = regs->msr;
> >   #endif
> > -
> > frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
> > -   if (!access_ok(frame, sizeof(*frame)))
> > +   if (!user_write_access_begin(frame, sizeof(*frame)))
> > goto badframe;
> >   
> > -   err |= __put_user(>info, >pinfo);
> > -   err |= __put_user(>uc, >puc);
> > -   err |= copy_siginfo_to_user(>info, >info);
> > -   if (err)
> > -   goto badframe;
> > +   unsafe_put_user(>info, >pinfo, badframe_block);
> > +   unsafe_put_user(>uc, >puc, badframe_block);
> >   
> > /* Create the ucontext.  */
> > -   err |= __put_user(0, >uc.uc_flags);
> > -   err |= __save_altstack(>uc.uc_stack, regs->gpr[1]);
> > +   unsafe_put_user(0, >uc.uc_flags, badframe_block);
> > +   unsafe_save_altstack(>uc.uc_stack, regs->gpr[1], badframe_block);
> > +
> >   #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> > if (MSR_TM_ACTIVE(msr)) {
> > /* The ucontext_t passed to userland points to the second
> >  * ucontext_t (for transactional state) with its uc_link ptr.
> >  */
> > -   err |= __put_user(>uc_transact, >uc.uc_link);
> > +   unsafe_put_user(>uc_transact, >uc.uc_link, 
> > badframe_block);
> > +   user_write_access_end();
>
> Whaou. Doing this inside an #ifdef sequence is dirty.
> Can you reorganise code to avoid that and to avoid nesting #ifdef/#endif
> and the if/else as I did in
> signal32 ?

Hopefully yes - next spin!

>
> > err |= setup_tm_sigcontexts(>uc.uc_mcontext,
> > >uc_transact.uc_mcontext,
> > tsk, ksig->sig, NULL,
> > (unsigned 
> > long)ksig->ka.sa.sa_handler,
> > msr);
> > +   if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
> > +   goto badframe;
> > +
> > } else
> >   #endif
> > {
> > -   err |= __put_user(0, >uc.uc_link);
> > -
> > -   if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
> > -   return -EFAULT;
> > -   err |= __unsafe_setup_sigcontext(>uc.uc_mcontext, tsk,
> > -   ksig->sig, NULL,
> > -   (unsigned 
> > long)ksig->ka.sa.sa_handler, 1);
> > -   user_write_access_end();
> > +   unsafe_put_user(0, >uc.uc_link, badframe_block);
> > +   unsafe_setup_sigcontext(>uc.uc_mcontext, tsk, ksig->sig,
> > +   NULL, (unsigned 
> > long)ksig->ka.sa.sa_handler,
> > +   1, badframe_block);
> > }
> > -   err |= __copy_to_user(>uc.uc_sigmask, set, sizeof(*set));
> > -   if (err)
> > -   goto badframe;
> > +
> > +   unsafe_copy_to_user(>uc.uc_sigmask, set, sizeof(*set), 
> > badframe_block);
> >   
> > /* Make sure signal handler doesn't get spurious FP exceptions */
> > tsk->thread.fp_state.fpscr = 0;
> > @@ -891,15 +887,17 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t 
> > *set,
> > if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) {
> > regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp;
> > } else {
> > -   if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
> > -   return -EFAULT;
> > -   err |= __unsafe_setup_trampoline(__NR_rt_sigreturn, 
> > >tramp[0]);
> > -   user_write_access_end();
> > -   if (err)
> > -   goto badframe;
> > +   unsafe_setup_trampoline(__NR_rt_sigreturn, >tramp[0],
> > +   badframe_block);
> > regs->nip = (unsigned long) >tramp[0];
> > }
> >   
> > +   user_write_access_end();
> > +
> > +   /* Save the siginfo outside of the unsafe block. */
> > +   if 

Re: [PATCH 6/8] powerpc/signal64: Replace setup_trampoline() w/ unsafe_setup_trampoline()

2020-10-19 Thread Christopher M. Riedl
On Fri Oct 16, 2020 at 10:56 AM CDT, Christophe Leroy wrote:
>
>
> Le 15/10/2020 à 17:01, Christopher M. Riedl a écrit :
> > From: Daniel Axtens 
> > 
> > Previously setup_trampoline() performed a costly KUAP switch on every
> > uaccess operation. These repeated uaccess switches cause a significant
> > drop in signal handling performance.
> > 
> > Rewrite setup_trampoline() to assume that a userspace write access
> > window is open. Replace all uaccess functions with their 'unsafe'
> > versions to avoid the repeated uaccess switches.
> > 
> > Signed-off-by: Daniel Axtens 
> > Signed-off-by: Christopher M. Riedl 
> > ---
> >   arch/powerpc/kernel/signal_64.c | 32 +++-
> >   1 file changed, 19 insertions(+), 13 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/signal_64.c 
> > b/arch/powerpc/kernel/signal_64.c
> > index bd92064e5576..6d4f7a5c4fbf 100644
> > --- a/arch/powerpc/kernel/signal_64.c
> > +++ b/arch/powerpc/kernel/signal_64.c
> > @@ -600,30 +600,33 @@ static long restore_tm_sigcontexts(struct task_struct 
> > *tsk,
> >   /*
> >* Setup the trampoline code on the stack
> >*/
> > -static long setup_trampoline(unsigned int syscall, unsigned int __user 
> > *tramp)
> > +#define unsafe_setup_trampoline(syscall, tramp, e) \
> > +   unsafe_op_wrap(__unsafe_setup_trampoline(syscall, tramp), e)
> > +static long notrace __unsafe_setup_trampoline(unsigned int syscall,
> > +   unsigned int __user *tramp)
> >   {
> > int i;
> > -   long err = 0;
> >   
> > /* bctrl # call the handler */
> > -   err |= __put_user(PPC_INST_BCTRL, [0]);
> > +   unsafe_put_user(PPC_INST_BCTRL, [0], err);
> > /* addi r1, r1, __SIGNAL_FRAMESIZE  # Pop the dummy stackframe */
> > -   err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
> > - (__SIGNAL_FRAMESIZE & 0x), [1]);
> > +   unsafe_put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
> > + (__SIGNAL_FRAMESIZE & 0x), [1], err);
> > /* li r0, __NR_[rt_]sigreturn| */
> > -   err |= __put_user(PPC_INST_ADDI | (syscall & 0x), [2]);
> > +   unsafe_put_user(PPC_INST_ADDI | (syscall & 0x), [2], err);
> > /* sc */
> > -   err |= __put_user(PPC_INST_SC, [3]);
> > +   unsafe_put_user(PPC_INST_SC, [3], err);
> >   
> > /* Minimal traceback info */
> > for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
> > -   err |= __put_user(0, [i]);
> > +   unsafe_put_user(0, [i], err);
> >   
> > -   if (!err)
> > -   flush_icache_range((unsigned long) [0],
> > -  (unsigned long) [TRAMP_SIZE]);
> > +   flush_icache_range((unsigned long)[0],
> > +  (unsigned long)[TRAMP_SIZE]);
>
> This flush should be done outside the user_write_access block.
>

Hmm, I suppose that means setup_trampoline() cannot be completely
"unsafe". I'll see if I can re-arrange the code which calls this
function to avoid an additional uaccess block instead and push the
start()/end() into setup_trampoline() directly.

> >   
> > -   return err;
> > +   return 0;
> > +err:
> > +   return 1;
> >   }
> >   
> >   /*
> > @@ -888,7 +891,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t 
> > *set,
> > if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) {
> > regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp;
> > } else {
> > -   err |= setup_trampoline(__NR_rt_sigreturn, >tramp[0]);
> > +   if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
> > +   return -EFAULT;
> > +   err |= __unsafe_setup_trampoline(__NR_rt_sigreturn, 
> > >tramp[0]);
> > +   user_write_access_end();
> > if (err)
> > goto badframe;
> > regs->nip = (unsigned long) >tramp[0];
> > 
>
> Christophe



Re: [PATCH 2/8] powerpc/signal: Add unsafe_copy_{vsx,fpr}_from_user()

2020-10-19 Thread Christopher M. Riedl
On Fri Oct 16, 2020 at 10:48 AM CDT, Christophe Leroy wrote:
>
>
> Le 15/10/2020 à 17:01, Christopher M. Riedl a écrit :
> > Reuse the "safe" implementation from signal.c except for calling
> > unsafe_copy_from_user() to copy into a local buffer. Unlike the
> > unsafe_copy_{vsx,fpr}_to_user() functions the "copy from" functions
> > cannot use unsafe_get_user() directly to bypass the local buffer since
> > doing so significantly reduces signal handling performance.
>
> Why can't the functions use unsafe_get_user(), why does it significantly
> reduces signal handling
> performance ? How much significant ? I would expect that not going
> through an intermediate memory
> area would be more efficient
>

Here is a comparison, 'unsafe-signal64-regs' avoids the intermediate buffer:

|  | hash   | radix  |
|  | -- | -- |
| linuxppc/next| 289014 | 158408 |
| unsafe-signal64  | 298506 | 253053 |
| unsafe-signal64-regs | 254898 | 220831 |

I have not figured out the 'why' yet. As you mentioned in your series,
technically calling __copy_tofrom_user() is overkill for these
operations. The only obvious difference between unsafe_put_user() and
unsafe_get_user() is that we don't have asm-goto for the 'get' variant.
Instead we wrap with unsafe_op_wrap() which inserts a conditional and
then goto to the label.

Implemenations:

#define unsafe_copy_fpr_from_user(task, from, label)   do {\
   struct task_struct *__t = task; \
   u64 __user *buf = (u64 __user *)from;   \
   int i;  \
   \
   for (i = 0; i < ELF_NFPREG - 1; i++)\
   unsafe_get_user(__t->thread.TS_FPR(i), [i], label); \
   unsafe_get_user(__t->thread.fp_state.fpscr, [i], label);\
} while (0)

#define unsafe_copy_vsx_from_user(task, from, label)   do {\
   struct task_struct *__t = task; \
   u64 __user *buf = (u64 __user *)from;   \
   int i;  \
   \
   for (i = 0; i < ELF_NVSRHALFREG ; i++)  \
   
unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
   [i], label);\
} while (0)

> Christophe
>
>
> > 
> > Signed-off-by: Christopher M. Riedl 
> > ---
> >   arch/powerpc/kernel/signal.h | 33 +
> >   1 file changed, 33 insertions(+)
> > 
> > diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
> > index 2559a681536e..e9aaeac0da37 100644
> > --- a/arch/powerpc/kernel/signal.h
> > +++ b/arch/powerpc/kernel/signal.h
> > @@ -53,6 +53,33 @@ unsigned long copy_ckfpr_from_user(struct task_struct 
> > *task, void __user *from);
> > [i], label);\
> >   } while (0)
> >   
> > +#define unsafe_copy_fpr_from_user(task, from, label)   do {
> > \
> > +   struct task_struct *__t = task; \
> > +   u64 __user *__f = (u64 __user *)from;   \
> > +   u64 buf[ELF_NFPREG];\
> > +   int i;  \
> > +   \
> > +   unsafe_copy_from_user(buf, __f, ELF_NFPREG * sizeof(double),\
> > +   label); \
> > +   for (i = 0; i < ELF_NFPREG - 1; i++)\
> > +   __t->thread.TS_FPR(i) = buf[i]; \
> > +   __t->thread.fp_state.fpscr = buf[i];\
> > +} while (0)
> > +
> > +#define unsafe_copy_vsx_from_user(task, from, label)   do {
> > \
> > +   struct task_struct *__t = task; \
> > +   u64 __user *__f = (u64 __user *)from;   \
> > +   u64 buf[ELF_NVSRHALFREG];   \
> > +   int i;  \
> > +   \
> > +   unsafe_copy_from_user(buf, __f, \
> > +   ELF_NVSRHALFREG * sizeof(double),   \
> > +   label); \
> > +   for (i = 0; i < ELF_NVSRHALFREG ; i++)  \
> > +   __t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 

[PATCH v3] soc: fsl: dpio: Get the cpumask through cpumask_of(cpu)

2020-10-19 Thread Yi Wang
From: Hao Si 

The local variable 'cpumask_t mask' is in the stack memory, and its address
is assigned to 'desc->affinity' in 'irq_set_affinity_hint()'.
But the memory area where this variable is located is at risk of being
modified.

During LTP testing, the following error was generated:

Unable to handle kernel paging request at virtual address 12e9b790
Mem abort info:
  ESR = 0x9607
  Exception class = DABT (current EL), IL = 32 bits
  SET = 0, FnV = 0
  EA = 0, S1PTW = 0
Data abort info:
  ISV = 0, ISS = 0x0007
  CM = 0, WnR = 0
swapper pgtable: 4k pages, 48-bit VAs, pgdp = 75ac5e07
[12e9b790] pgd=0027dbffe003, pud=0027dbffd003,
pmd=0027b6d61003, pte=
Internal error: Oops: 9607 [#1] PREEMPT SMP
Modules linked in: xt_conntrack
Process read_all (pid: 20171, stack limit = 0x44ea4095)
CPU: 14 PID: 20171 Comm: read_all Tainted: GB   W
Hardware name: NXP Layerscape LX2160ARDB (DT)
pstate: 8085 (Nzcv daIf -PAN -UAO)
pc : irq_affinity_hint_proc_show+0x54/0xb0
lr : irq_affinity_hint_proc_show+0x4c/0xb0
sp : 1138bc10
x29: 1138bc10 x28: d131d1e0
x27: 007000c0 x26: 8025b9480dc0
x25: 8025b9480da8 x24: 03ff
x23: 8027334f8300 x22: 80272e97d000
x21: 80272e97d0b0 x20: 8025b9480d80
x19: 09a49000 x18: 
x17:  x16: 
x15:  x14: 
x13:  x12: 0040
x11:  x10: 802735b79b88
x9 :  x8 : 
x7 : 09a49848 x6 : 0003
x5 :  x4 : 08157d6c
x3 : 1138bc10 x2 : 12e9b790
x1 :  x0 : 
Call trace:
 irq_affinity_hint_proc_show+0x54/0xb0
 seq_read+0x1b0/0x440
 proc_reg_read+0x80/0xd8
 __vfs_read+0x60/0x178
 vfs_read+0x94/0x150
 ksys_read+0x74/0xf0
 __arm64_sys_read+0x24/0x30
 el0_svc_common.constprop.0+0xd8/0x1a0
 el0_svc_handler+0x34/0x88
 el0_svc+0x10/0x14
Code: f9001bbf 943e0732 f94066c2 b462 (f9400041)
---[ end trace b495bdcb0b3b732b ]---
Kernel panic - not syncing: Fatal exception
SMP: stopping secondary CPUs
SMP: failed to stop secondary CPUs 0,2-4,6,8,11,13-15
Kernel Offset: disabled
CPU features: 0x0,21006008
Memory Limit: none
---[ end Kernel panic - not syncing: Fatal exception ]---

Fix it by using 'cpumask_of(cpu)' to get the cpumask.

Signed-off-by: Hao Si 
Signed-off-by: Lin Chen 
Signed-off-by: Yi Wang 
---
v3: Use cpumask_of(cpu) to get the pre-defined cpumask in the static 
cpu_bit_bitmap array.
v2: Place 'cpumask_t mask' in the driver's private data and while at it, 
rename it to cpu_mask.

 drivers/soc/fsl/dpio/dpio-driver.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/soc/fsl/dpio/dpio-driver.c 
b/drivers/soc/fsl/dpio/dpio-driver.c
index 7b642c3..7f397b4 100644
--- a/drivers/soc/fsl/dpio/dpio-driver.c
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -95,7 +95,6 @@ static int register_dpio_irq_handlers(struct fsl_mc_device 
*dpio_dev, int cpu)
 {
int error;
struct fsl_mc_device_irq *irq;
-   cpumask_t mask;
 
irq = dpio_dev->irqs[0];
error = devm_request_irq(_dev->dev,
@@ -112,9 +111,7 @@ static int register_dpio_irq_handlers(struct fsl_mc_device 
*dpio_dev, int cpu)
}
 
/* set the affinity hint */
-   cpumask_clear();
-   cpumask_set_cpu(cpu, );
-   if (irq_set_affinity_hint(irq->msi_desc->irq, ))
+   if (irq_set_affinity_hint(irq->msi_desc->irq, cpumask_of(cpu)))
dev_err(_dev->dev,
"irq_set_affinity failed irq %d cpu %d\n",
irq->msi_desc->irq, cpu);
-- 
2.15.2

Re: [PATCH 3/8] powerpc: Mark functions called inside uaccess blocks w/ 'notrace'

2020-10-19 Thread Christopher M. Riedl
On Fri Oct 16, 2020 at 4:02 AM CDT, Christophe Leroy wrote:
>
>
> Le 15/10/2020 à 17:01, Christopher M. Riedl a écrit :
> > Functions called between user_*_access_begin() and user_*_access_end()
> > should be either inlined or marked 'notrace' to prevent leaving
> > userspace access exposed. Mark any such functions relevant to signal
> > handling so that subsequent patches can call them inside uaccess blocks.
>
> Is it enough to mark it "notrace" ? I see that when I activate KASAN,
> there are still KASAN calls in
> those functions.
>

Maybe not enough after all :(

> In my series for 32 bits, I re-ordered stuff in order to do all those
> calls before doing the
> _access_begin(), can't you do the same on PPC64 ? (See
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/f6eac65781b4a57220477c8864bca2b57f29a5d5.1597770847.git.christophe.le...@csgroup.eu/)
>

Yes, I will give this another shot in the next spin.

> Christophe
>
> > 
> > Signed-off-by: Christopher M. Riedl 
> > ---
> >   arch/powerpc/kernel/process.c | 20 ++--
> >   arch/powerpc/mm/mem.c |  4 ++--
> >   2 files changed, 12 insertions(+), 12 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> > index ba2c987b8403..bf5d9654bd2c 100644
> > --- a/arch/powerpc/kernel/process.c
> > +++ b/arch/powerpc/kernel/process.c
> > @@ -84,7 +84,7 @@ extern unsigned long _get_SP(void);
> >*/
> >   bool tm_suspend_disabled __ro_after_init = false;
> >   
> > -static void check_if_tm_restore_required(struct task_struct *tsk)
> > +static void notrace check_if_tm_restore_required(struct task_struct *tsk)
> >   {
> > /*
> >  * If we are saving the current thread's registers, and the
> > @@ -151,7 +151,7 @@ void notrace __msr_check_and_clear(unsigned long bits)
> >   EXPORT_SYMBOL(__msr_check_and_clear);
> >   
> >   #ifdef CONFIG_PPC_FPU
> > -static void __giveup_fpu(struct task_struct *tsk)
> > +static void notrace __giveup_fpu(struct task_struct *tsk)
> >   {
> > unsigned long msr;
> >   
> > @@ -163,7 +163,7 @@ static void __giveup_fpu(struct task_struct *tsk)
> > tsk->thread.regs->msr = msr;
> >   }
> >   
> > -void giveup_fpu(struct task_struct *tsk)
> > +void notrace giveup_fpu(struct task_struct *tsk)
> >   {
> > check_if_tm_restore_required(tsk);
> >   
> > @@ -177,7 +177,7 @@ EXPORT_SYMBOL(giveup_fpu);
> >* Make sure the floating-point register state in the
> >* the thread_struct is up to date for task tsk.
> >*/
> > -void flush_fp_to_thread(struct task_struct *tsk)
> > +void notrace flush_fp_to_thread(struct task_struct *tsk)
> >   {
> > if (tsk->thread.regs) {
> > /*
> > @@ -234,7 +234,7 @@ static inline void __giveup_fpu(struct task_struct 
> > *tsk) { }
> >   #endif /* CONFIG_PPC_FPU */
> >   
> >   #ifdef CONFIG_ALTIVEC
> > -static void __giveup_altivec(struct task_struct *tsk)
> > +static void notrace __giveup_altivec(struct task_struct *tsk)
> >   {
> > unsigned long msr;
> >   
> > @@ -246,7 +246,7 @@ static void __giveup_altivec(struct task_struct *tsk)
> > tsk->thread.regs->msr = msr;
> >   }
> >   
> > -void giveup_altivec(struct task_struct *tsk)
> > +void notrace giveup_altivec(struct task_struct *tsk)
> >   {
> > check_if_tm_restore_required(tsk);
> >   
> > @@ -285,7 +285,7 @@ EXPORT_SYMBOL(enable_kernel_altivec);
> >* Make sure the VMX/Altivec register state in the
> >* the thread_struct is up to date for task tsk.
> >*/
> > -void flush_altivec_to_thread(struct task_struct *tsk)
> > +void notrace flush_altivec_to_thread(struct task_struct *tsk)
> >   {
> > if (tsk->thread.regs) {
> > preempt_disable();
> > @@ -300,7 +300,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
> >   #endif /* CONFIG_ALTIVEC */
> >   
> >   #ifdef CONFIG_VSX
> > -static void __giveup_vsx(struct task_struct *tsk)
> > +static void notrace __giveup_vsx(struct task_struct *tsk)
> >   {
> > unsigned long msr = tsk->thread.regs->msr;
> >   
> > @@ -317,7 +317,7 @@ static void __giveup_vsx(struct task_struct *tsk)
> > __giveup_altivec(tsk);
> >   }
> >   
> > -static void giveup_vsx(struct task_struct *tsk)
> > +static void notrace giveup_vsx(struct task_struct *tsk)
> >   {
> > check_if_tm_restore_required(tsk);
> >   
> > @@ -352,7 +352,7 @@ void enable_kernel_vsx(void)
> >   }
> >   EXPORT_SYMBOL(enable_kernel_vsx);
> >   
> > -void flush_vsx_to_thread(struct task_struct *tsk)
> > +void notrace flush_vsx_to_thread(struct task_struct *tsk)
> >   {
> > if (tsk->thread.regs) {
> > preempt_disable();
> > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> > index ddc32cc1b6cf..da2345a2abc6 100644
> > --- a/arch/powerpc/mm/mem.c
> > +++ b/arch/powerpc/mm/mem.c
> > @@ -378,7 +378,7 @@ static inline bool flush_coherent_icache(unsigned long 
> > addr)
> >* @start: the start address
> >* @stop: the stop address (exclusive)
> >*/
> > -static void 

Re: [PATCH 3/3] powerpc: Fix pre-update addressing in inline assembly

2020-10-19 Thread Segher Boessenkool
On Mon, Oct 19, 2020 at 12:12:48PM +, Christophe Leroy wrote:
> In several places, inline assembly uses the "%Un" modifier
> to enable the use of instruction with pre-update addressing,

Calling this "pre-update" is misleading: the register is not updated
before the address is generated (or the memory access done!), and the
addressing is exactly the same as the "non-u" insn would use.  It is
called an "update form" instruction, because (at the same time as doing
the memory access, logically anyway) it writes back the address used to
the base register.

> but the associated "<>" constraint is missing.

But that is just fine.  Pointless, sure, but not a bug.

> Use UPD_CONSTR macro everywhere %Un modifier is used.

Eww.  My poor stomach.

Have you verified that update form is *correct* in all these, and that
we even *want* this there?


Segher


Re: [PATCH 2/3] powerpc: Fix incorrect stw{, ux, u, x} instructions in __set_pte_at

2020-10-19 Thread Segher Boessenkool
On Mon, Oct 19, 2020 at 12:12:47PM +, Christophe Leroy wrote:
> From: Mathieu Desnoyers 
> 
> The placeholder for instruction selection should use the second
> argument's operand, which is %1, not %0. This could generate incorrect
> assembly code if the instruction selection for argument %0 ever differs
> from argument %1.

"Instruction selection" isn't correct here...  "if the memory addressing
of operand 0 is a different form from that of operand 1", perhaps?

The patch looks fine of course :-)

Acked-by: Segher Boessenkool 


Segher


Re: [PATCH 1/3] powerpc/uaccess: Don't use "m<>" constraint with GCC 4.9

2020-10-19 Thread Segher Boessenkool
On Mon, Oct 19, 2020 at 12:12:46PM +, Christophe Leroy wrote:
> GCC 4.9 sometimes fails to build with "m<>" constraint in
> inline assembly.

> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -223,7 +223,7 @@ do {  
> \
>   "1: " op "%U1%X1 %0,%1  # put_user\n"   \
>   EX_TABLE(1b, %l2)   \
>   :   \
> - : "r" (x), "m<>" (*addr)\
> + : "r" (x), "m"UPD_CONSTR (*addr)\
>   :   \
>   : label)
>  
> @@ -294,7 +294,7 @@ extern long __get_user_bad(void);
>   ".previous\n"   \
>   EX_TABLE(1b, 3b)\
>   : "=r" (err), "=r" (x)  \
> - : "m<>" (*addr), "i" (-EFAULT), "0" (err))
> + : "m"UPD_CONSTR (*addr), "i" (-EFAULT), "0" (err))

Wow, ugly!  But these are the only two places that use this, so

Acked-by: Segher Boessenkool  

I just hope that we get rid of 4.9 before we would use this a lot more ;-)


Segher


Re: [PATCH 3/3] powerpc: Fix pre-update addressing in inline assembly

2020-10-19 Thread Christophe Leroy




Le 19/10/2020 à 17:35, kernel test robot a écrit :

Hi Christophe,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on linus/master next-20201016]
[cannot apply to kvm-ppc/kvm-ppc-next mpe/next v5.9]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-uaccess-Don-t-use-m-constraint-with-GCC-4-9/20201019-201504
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc64-randconfig-r012-20201019 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
094e9f4779eb9b5c6a49014f2f80b8cbb833572f)
reproduce (this is a W=1 build):
 wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
 chmod +x ~/bin/make.cross
 # install powerpc64 cross compiling tool for clang build
 # apt-get install binutils-powerpc64-linux-gnu
 # 
https://github.com/0day-ci/linux/commit/d57fd8d270993414b8c0414d7be4b03cc3de1856
 git remote add linux-review https://github.com/0day-ci/linux
 git fetch --no-tags linux-review 
Christophe-Leroy/powerpc-uaccess-Don-t-use-m-constraint-with-GCC-4-9/20201019-201504
 git checkout d57fd8d270993414b8c0414d7be4b03cc3de1856
 # save the attached .config to linux build tree
 COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross 
ARCH=powerpc64

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:14:
In file included from include/linux/sem.h:5:
In file included from include/uapi/linux/sem.h:5:
In file included from include/linux/ipc.h:5:
In file included from include/linux/spinlock.h:51:
In file included from include/linux/preempt.h:78:
In file included from ./arch/powerpc/include/generated/asm/preempt.h:1:
In file included from include/asm-generic/preempt.h:5:
In file included from include/linux/thread_info.h:21:
In file included from arch/powerpc/include/asm/current.h:13:
In file included from arch/powerpc/include/asm/paca.h:31:
In file included from arch/powerpc/include/asm/atomic.h:13:
In file included from arch/powerpc/include/asm/ppc_asm.h:9:
In file included from arch/powerpc/include/asm/processor.h:40:

arch/powerpc/include/asm/ptrace.h:288:20: error: use of undeclared identifier 
'THREAD_SIZE'

return ((addr & ~(THREAD_SIZE - 1))  ==
  ^
arch/powerpc/include/asm/ptrace.h:289:35: error: use of undeclared 
identifier 'THREAD_SIZE'
(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));


Most likely a circular inclusion problem.

I'll have to put it in a header that doesn't include pile of other stuff. The least bad candidate 
seems to be asm-const.h


Christophe


^
In file included from arch/powerpc/kernel/asm-offsets.c:21:
include/linux/mman.h:137:9: warning: division by zero is undefined 
[-Wdivision-by-zero]
   _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED) |
   ^~~~
include/linux/mman.h:115:21: note: expanded from macro '_calc_vm_trans'
   : ((x) & (bit1)) / ((bit1) / (bit2
^ ~
include/linux/mman.h:138:9: warning: division by zero is undefined 
[-Wdivision-by-zero]
   _calc_vm_trans(flags, MAP_SYNC,   VM_SYNC  );
   ^~~~
include/linux/mman.h:115:21: note: expanded from macro '_calc_vm_trans'
   : ((x) & (bit1)) / ((bit1) / (bit2
^ ~
2 warnings and 2 errors generated.
make[2]: *** [scripts/Makefile.build:117: 
arch/powerpc/kernel/asm-offsets.s] Error 1
make[2]: Target '__build' not remade because of errors.
make[1]: *** [Makefile:1202: prepare0] Error 2
make[1]: Target 'prepare' not remade because of errors.
make: *** [Makefile:185: __sub-make] Error 2
make: Target 'prepare' not remade because of errors.

vim +/THREAD_SIZE +288 arch/powerpc/include/asm/ptrace.h

359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  275
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  276  /**
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  277   * 
regs_within_kernel_stack() - check the address in the stack
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  278   * @regs:  pt_regs which 
contains kernel stack pointer.
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  279  

Re: [PATCH] asm-generic: Force inlining of get_order() to work around gcc10 poor decision

2020-10-19 Thread Segher Boessenkool
On Mon, Oct 19, 2020 at 10:54:40AM +0200, Christophe Leroy wrote:
> Le 19/10/2020 à 10:32, Segher Boessenkool a écrit :
> >The kernel should just use __always_inline if that is what it *wants*;
> >that is true here most likely.  GCC could perhaps improve its heuristics
> >so that it no longer thinks these functions are often too big for
> >inlining (they *are* pretty big, but not after basic optimisations with
> >constant integer arguments).
> 
> Yes I guess __always_inline is to be added on functions like this defined 
> in headers for exactly that, and that's the purpose of this patch.
> 
> However I find it odd that get_order() is outlined by GCC even in some 
> object files that don't use it at all, for instance in fs/pipe.o

It is (arguably) too big too always inline if you do not consider that
__builtin_constant_p will remove half of the function one way or
another.  Not sure if that is what happens here, but now we have a PR
(thanks!) and we will find out.


Segher


Re: [PATCH 11/20] dt-bindings: usb: dwc3: Add synopsys,dwc3 compatible string

2020-10-19 Thread Serge Semin
On Fri, Oct 16, 2020 at 01:53:40PM -0500, Rob Herring wrote:
> On Thu, Oct 15, 2020 at 12:35:54AM +0300, Serge Semin wrote:
> > On Wed, Oct 14, 2020 at 10:18:18PM +0200, Krzysztof Kozlowski wrote:
> > > On Wed, Oct 14, 2020 at 01:13:53PM +0300, Serge Semin wrote:
> > > > The DWC USB3 driver and some DTS files like Exynos 5250, Keystone k2e, 
> > > > etc
> > > > expects the DWC USB3 DT node to have the compatible string with the
> > > > "synopsys" vendor prefix. Let's add the corresponding compatible string 
> > > > to
> > > > the controller DT schema, but mark it as deprecated seeing the Synopsys,
> > > > Inc. is presented with just "snps" vendor prefix.
> > > 
> > 
> > > Instead of adding deprecated schema just correct the DTSes to use snps.
> > > The "synopsys" is not even in vendor prefixes.
> > 
> > Yeah, it's not, but the driver and some dts'es use it this way. I am not 
> > sure
> > that the solution suggested by you is much better than mine. So let's hear 
> > the
> > Rob'es opinion out in this matter. @Rob, what do you think?
> 

> I think we should fix the dts files given there's only 5.

Ok. I'll do that.

-Sergey

> 
> Rob


Re: [PATCH 1/4] KVM: PPC: Book3S HV: Make struct kernel_param_ops definition const

2020-10-19 Thread Paolo Bonzini
On 04/10/20 02:18, Joe Perches wrote:
> This should be const, so make it so.
> 
> Signed-off-by: Joe Perches 
> ---
>  arch/powerpc/kvm/book3s_hv.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 4ba06a2a306c..2b215852cdc9 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -111,7 +111,7 @@ module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
>  MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core 
> (requires indep_threads_mode=N)");
>  
>  #ifdef CONFIG_KVM_XICS
> -static struct kernel_param_ops module_param_ops = {
> +static const struct kernel_param_ops module_param_ops = {
>   .set = param_set_int,
>   .get = param_get_int,
>  };
> 

Queued, thanks.

Paolo



Re: [PATCH] drm/amd/display: Fix missing declaration of enable_kernel_vsx()

2020-10-19 Thread Christophe Leroy




Le 19/10/2020 à 14:52, kernel test robot a écrit :

Hi Christophe,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.9 next-20201016]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/drm-amd-display-Fix-missing-declaration-of-enable_kernel_vsx/20201019-174155
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
7cf726a59435301046250c42131554d9ccc566b8
config: arc-randconfig-r013-20201019 (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
 wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
 chmod +x ~/bin/make.cross
 # 
https://github.com/0day-ci/linux/commit/33f0ea8bebc4132d957107f55776d8f1e02df928
 git remote add linux-review https://github.com/0day-ci/linux
 git fetch --no-tags linux-review 
Christophe-Leroy/drm-amd-display-Fix-missing-declaration-of-enable_kernel_vsx/20201019-174155
 git checkout 33f0ea8bebc4132d957107f55776d8f1e02df928
 # save the attached .config to linux build tree
 COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

In file included from 
drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services_types.h:29,
 from 
drivers/gpu/drm/amd/amdgpu/../include/dm_pp_interface.h:26,
 from drivers/gpu/drm/amd/amdgpu/amdgpu.h:67,
 from drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:40:

drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h:36:10: fatal error: 
asm/switch-to.h: No such file or directory

   36 | #include 
  |  ^
compilation terminated.



Argh ! Yes that's a typo. And anyway it fixes nothing because  
is already included.

The issue is that enable_kernel_vsx() is only declared when CONFIG_VSX is set. The simplest solution 
will probably be to declare it at all time.


Christophe



vim +36 drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h

 34 
 35 #include 
   > 36  #include 
 37 

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org



Re: [PATCH 3/3] powerpc: Fix pre-update addressing in inline assembly

2020-10-19 Thread kernel test robot
Hi Christophe,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on linus/master next-20201016]
[cannot apply to kvm-ppc/kvm-ppc-next mpe/next v5.9]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-uaccess-Don-t-use-m-constraint-with-GCC-4-9/20201019-201504
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc64-randconfig-r012-20201019 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
094e9f4779eb9b5c6a49014f2f80b8cbb833572f)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install powerpc64 cross compiling tool for clang build
# apt-get install binutils-powerpc64-linux-gnu
# 
https://github.com/0day-ci/linux/commit/d57fd8d270993414b8c0414d7be4b03cc3de1856
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Christophe-Leroy/powerpc-uaccess-Don-t-use-m-constraint-with-GCC-4-9/20201019-201504
git checkout d57fd8d270993414b8c0414d7be4b03cc3de1856
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross 
ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from arch/powerpc/kernel/asm-offsets.c:14:
   In file included from include/linux/compat.h:14:
   In file included from include/linux/sem.h:5:
   In file included from include/uapi/linux/sem.h:5:
   In file included from include/linux/ipc.h:5:
   In file included from include/linux/spinlock.h:51:
   In file included from include/linux/preempt.h:78:
   In file included from ./arch/powerpc/include/generated/asm/preempt.h:1:
   In file included from include/asm-generic/preempt.h:5:
   In file included from include/linux/thread_info.h:21:
   In file included from arch/powerpc/include/asm/current.h:13:
   In file included from arch/powerpc/include/asm/paca.h:31:
   In file included from arch/powerpc/include/asm/atomic.h:13:
   In file included from arch/powerpc/include/asm/ppc_asm.h:9:
   In file included from arch/powerpc/include/asm/processor.h:40:
>> arch/powerpc/include/asm/ptrace.h:288:20: error: use of undeclared 
>> identifier 'THREAD_SIZE'
   return ((addr & ~(THREAD_SIZE - 1))  ==
 ^
   arch/powerpc/include/asm/ptrace.h:289:35: error: use of undeclared 
identifier 'THREAD_SIZE'
   (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
   ^
   In file included from arch/powerpc/kernel/asm-offsets.c:21:
   include/linux/mman.h:137:9: warning: division by zero is undefined 
[-Wdivision-by-zero]
  _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED) |
  ^~~~
   include/linux/mman.h:115:21: note: expanded from macro '_calc_vm_trans'
  : ((x) & (bit1)) / ((bit1) / (bit2
   ^ ~
   include/linux/mman.h:138:9: warning: division by zero is undefined 
[-Wdivision-by-zero]
  _calc_vm_trans(flags, MAP_SYNC,   VM_SYNC  );
  ^~~~
   include/linux/mman.h:115:21: note: expanded from macro '_calc_vm_trans'
  : ((x) & (bit1)) / ((bit1) / (bit2
   ^ ~
   2 warnings and 2 errors generated.
   make[2]: *** [scripts/Makefile.build:117: arch/powerpc/kernel/asm-offsets.s] 
Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1202: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:185: __sub-make] Error 2
   make: Target 'prepare' not remade because of errors.

vim +/THREAD_SIZE +288 arch/powerpc/include/asm/ptrace.h

359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  275  
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  276  /**
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  277   * 
regs_within_kernel_stack() - check the address in the stack
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  278   * @regs:  pt_regs which 
contains kernel stack pointer.
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  279   * @addr:  address which 
is checked.
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  280   *
359e4284a3f37ab Mahesh Salgaonkar 2010-04-07  281   * 
regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
359e4284a3f37ab Mahesh Salgaonkar 2010

Re: [PATCH] drm/amd/display: Fix missing declaration of enable_kernel_vsx()

2020-10-19 Thread kernel test robot
Hi Christophe,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.9 next-20201016]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/drm-amd-display-Fix-missing-declaration-of-enable_kernel_vsx/20201019-174155
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
7cf726a59435301046250c42131554d9ccc566b8
config: x86_64-randconfig-a015-20201019 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
094e9f4779eb9b5c6a49014f2f80b8cbb833572f)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# 
https://github.com/0day-ci/linux/commit/33f0ea8bebc4132d957107f55776d8f1e02df928
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Christophe-Leroy/drm-amd-display-Fix-missing-declaration-of-enable_kernel_vsx/20201019-174155
git checkout 33f0ea8bebc4132d957107f55776d8f1e02df928
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from 
drivers/gpu/drm/amd/amdgpu/../pm/powerplay/hwmgr/tonga_baco.c:23:
   In file included from drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgpu.h:67:
   In file included from 
drivers/gpu/drm/amd/amdgpu/../include/dm_pp_interface.h:26:
   In file included from 
drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services_types.h:29:
>> drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h:36:10: fatal error: 
>> 'asm/switch-to.h' file not found
   #include 
^
   1 error generated.

vim +36 drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h

34  
35  #include 
  > 36  #include 
37  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH v4 2/2] lkdtm/powerpc: Add SLB multihit test

2020-10-19 Thread Michal Suchánek
On Mon, Oct 19, 2020 at 09:59:57PM +1100, Michael Ellerman wrote:
> Hi Ganesh,
> 
> Some comments below ...
> 
> Ganesh Goudar  writes:
> > To check machine check handling, add support to inject slb
> > multihit errors.
> >
> > Cc: Kees Cook 
> > Reviewed-by: Michal Suchánek 
> > Co-developed-by: Mahesh Salgaonkar 
> > Signed-off-by: Mahesh Salgaonkar 
> > Signed-off-by: Ganesh Goudar 
> > ---
> >  drivers/misc/lkdtm/Makefile |   1 +
> >  drivers/misc/lkdtm/core.c   |   3 +
> >  drivers/misc/lkdtm/lkdtm.h  |   3 +
> >  drivers/misc/lkdtm/powerpc.c| 156 
> >  tools/testing/selftests/lkdtm/tests.txt |   1 +
> >  5 files changed, 164 insertions(+)
> >  create mode 100644 drivers/misc/lkdtm/powerpc.c
> >
> ..
> > diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c
> > new file mode 100644
> > index ..f388b53dccba
> > --- /dev/null
> > +++ b/drivers/misc/lkdtm/powerpc.c
> > @@ -0,0 +1,156 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#include "lkdtm.h"
> > +#include 
> > +#include 
> 
> Usual style is to include the linux headers first and then the local header.
> 
> > +
> > +/* Gets index for new slb entry */
> > +static inline unsigned long get_slb_index(void)
> > +{
> > +   unsigned long index;
> > +
> > +   index = get_paca()->stab_rr;
> > +
> > +   /*
> > +* simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
> > +*/
> > +   if (index < (mmu_slb_size - 1))
> > +   index++;
> > +   else
> > +   index = SLB_NUM_BOLTED;
> > +   get_paca()->stab_rr = index;
> > +   return index;
> > +}
> 
> I'm not sure we need that really?
> 
> We can just always insert at SLB_MUM_BOLTED and SLB_NUM_BOLTED + 1.
> 
> Or we could allocate from the top down using mmu_slb_size - 1, and
> mmu_slb_size - 2.
> 
> 
> > +#define slb_esid_mask(ssize)   \
> > +   (((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T)
> > +
> > +/* Form the operand for slbmte */
> > +static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
> > +unsigned long slot)
> > +{
> > +   return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
> > +}
> > +
> > +#define slb_vsid_shift(ssize)  \
> > +   ((ssize) == MMU_SEGSIZE_256M ? SLB_VSID_SHIFT : SLB_VSID_SHIFT_1T)
> > +
> > +/* Form the operand for slbmte */
> > +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
> > +unsigned long flags)
> > +{
> > +   return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
> > +   ((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT);
> > +}
> 
> I realise it's not much code, but I'd rather those were in a header,
> rather than copied from slb.c. That way they can never skew vs the
> versions in slb.c
> 
> Best place I think would be arch/powerpc/include/asm/book3s/64/mmu-hash.h
> 
> 
> > +
> > +/* Inserts new slb entry */
> 
> It inserts two.
> 
> > +static void insert_slb_entry(char *p, int ssize)
> > +{
> > +   unsigned long flags, entry;
> > +
> > +   flags = SLB_VSID_KERNEL | mmu_psize_defs[MMU_PAGE_64K].sllp;
> 
> That won't work if the kernel is built for 4K pages. Or at least it
> won't work the way we want it to.
> 
> You should use mmu_linear_psize.
> 
> But for vmalloc you should use mmu_vmalloc_psize, so it will need to be
> a parameter.
> 
> > +   preempt_disable();
> > +
> > +   entry = get_slb_index();
> > +   asm volatile("slbmte %0,%1" :
> > +   : "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
> > + "r" (mk_esid_data((unsigned long)p, ssize, entry))
> > +   : "memory");
> > +
> > +   entry = get_slb_index();
> > +   asm volatile("slbmte %0,%1" :
> > +   : "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
> > + "r" (mk_esid_data((unsigned long)p, ssize, entry))
> > +   : "memory");
> > +   preempt_enable();
> > +   /*
> > +* This triggers exception, If handled correctly we must recover
> > +* from this error.
> > +*/
> > +   p[0] = '!';
> 
> That doesn't belong in here, it should be done by the caller.
> 
> That would also mean p could be unsigned long in here, so you wouldn't
> have to cast it four times.
> 
> > +}
> > +
> > +/* Inject slb multihit on vmalloc-ed address i.e 0xD00... */
> > +static void inject_vmalloc_slb_multihit(void)
> > +{
> > +   char *p;
> > +
> > +   p = vmalloc(2048);
> 
> vmalloc() allocates whole pages, so it may as well be vmalloc(PAGE_SIZE).
> 
> > +   if (!p)
> > +   return;
> 
> That's unlikely, but it should be an error that's propagated up to the caller.
> 
> > +
> > +   insert_slb_entry(p, MMU_SEGSIZE_1T);
> > +   vfree(p);
> > +}
> > +
> > +/* Inject slb multihit on kmalloc-ed address i.e 0xC00... */
> > +static void inject_kmalloc_slb_multihit(void)
> > +{
> > +   char *p;
> > +
> > +   p = kmalloc(2048, GFP_KERNEL);

Re: [PATCH] drm/amd/display: Fix missing declaration of enable_kernel_vsx()

2020-10-19 Thread kernel test robot
Hi Christophe,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.9 next-20201016]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/drm-amd-display-Fix-missing-declaration-of-enable_kernel_vsx/20201019-174155
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
7cf726a59435301046250c42131554d9ccc566b8
config: arc-randconfig-r013-20201019 (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/33f0ea8bebc4132d957107f55776d8f1e02df928
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Christophe-Leroy/drm-amd-display-Fix-missing-declaration-of-enable_kernel_vsx/20201019-174155
git checkout 33f0ea8bebc4132d957107f55776d8f1e02df928
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from 
drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services_types.h:29,
from 
drivers/gpu/drm/amd/amdgpu/../include/dm_pp_interface.h:26,
from drivers/gpu/drm/amd/amdgpu/amdgpu.h:67,
from drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:40:
>> drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h:36:10: fatal error: 
>> asm/switch-to.h: No such file or directory
  36 | #include 
 |  ^
   compilation terminated.

vim +36 drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h

34  
35  #include 
  > 36  #include 
37  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


[PATCH 1/3] powerpc/uaccess: Don't use "m<>" constraint with GCC 4.9

2020-10-19 Thread Christophe Leroy
GCC 4.9 sometimes fails to build with "m<>" constraint in
inline assembly.

  CC  lib/iov_iter.o
In file included from ./arch/powerpc/include/asm/cmpxchg.h:6:0,
 from ./arch/powerpc/include/asm/atomic.h:11,
 from ./include/linux/atomic.h:7,
 from ./include/linux/crypto.h:15,
 from ./include/crypto/hash.h:11,
 from lib/iov_iter.c:2:
lib/iov_iter.c: In function 'iovec_from_user.part.30':
./arch/powerpc/include/asm/uaccess.h:287:2: error: 'asm' operand has impossible 
constraints
  __asm__ __volatile__(\
  ^
./include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
 # define unlikely(x) __builtin_expect(!!(x), 0)
  ^
./arch/powerpc/include/asm/uaccess.h:583:34: note: in expansion of macro 
'unsafe_op_wrap'
 #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
  ^
./arch/powerpc/include/asm/uaccess.h:329:10: note: in expansion of macro 
'__get_user_asm'
  case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \
  ^
./arch/powerpc/include/asm/uaccess.h:363:3: note: in expansion of macro 
'__get_user_size_allowed'
   __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \
   ^
./arch/powerpc/include/asm/uaccess.h:100:2: note: in expansion of macro 
'__get_user_nocheck'
  __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
  ^
./arch/powerpc/include/asm/uaccess.h:583:49: note: in expansion of macro 
'__get_user_allowed'
 #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
 ^
lib/iov_iter.c:1663:3: note: in expansion of macro 'unsafe_get_user'
   unsafe_get_user(len, [i].iov_len, uaccess_end);
   ^
make[1]: *** [scripts/Makefile.build:283: lib/iov_iter.o] Error 1

Define a UPD_CONSTR macro that is "<>" by default and
only "" with GCC prior to GCC 5.

Fixes: fcf1f26895a4 ("powerpc/uaccess: Add pre-update addressing to 
__put_user_asm_goto()")
Fixes: 2f279eeb68b8 ("powerpc/uaccess: Add pre-update addressing to 
__get_user_asm() and __put_user_asm()")
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/ppc_asm.h | 14 ++
 arch/powerpc/include/asm/uaccess.h |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 511786f0e40d..471c7c57fc98 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -803,6 +803,20 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, 
CPU_FTR_CELL_TB_BUG, 96)
 
 #endif /* !CONFIG_PPC_BOOK3E */
 
+#else /* __ASSEMBLY */
+
+/*
+ * Inline assembly memory constraint
+ *
+ * GCC 4.9 doesn't properly handle pre update memory constraint "m<>"
+ *
+ */
+#if defined(GCC_VERSION) && GCC_VERSION < 5
+#define UPD_CONSTR ""
+#else
+#define UPD_CONSTR "<>"
+#endif
+
 #endif /*  __ASSEMBLY__ */
 
 /*
diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index 604d705f1bb8..8f27ea48fadb 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -223,7 +223,7 @@ do {
\
"1: " op "%U1%X1 %0,%1  # put_user\n"   \
EX_TABLE(1b, %l2)   \
:   \
-   : "r" (x), "m<>" (*addr)\
+   : "r" (x), "m"UPD_CONSTR (*addr)\
:   \
: label)
 
@@ -294,7 +294,7 @@ extern long __get_user_bad(void);
".previous\n"   \
EX_TABLE(1b, 3b)\
: "=r" (err), "=r" (x)  \
-   : "m<>" (*addr), "i" (-EFAULT), "0" (err))
+   : "m"UPD_CONSTR (*addr), "i" (-EFAULT), "0" (err))
 
 #ifdef __powerpc64__
 #define __get_user_asm2(x, addr, err)  \
-- 
2.25.0



[PATCH 3/3] powerpc: Fix pre-update addressing in inline assembly

2020-10-19 Thread Christophe Leroy
In several places, inline assembly uses the "%Un" modifier
to enable the use of instruction with pre-update addressing,
but the associated "<>" constraint is missing.

As mentioned in previous patch, this fails with gcc 4.9, so
"<>" can't be used directly.

Use UPD_CONSTR macro everywhere %Un modifier is used.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/atomic.h| 9 +
 arch/powerpc/include/asm/book3s/32/pgtable.h | 2 +-
 arch/powerpc/include/asm/io.h| 4 ++--
 arch/powerpc/include/asm/nohash/pgtable.h| 2 +-
 arch/powerpc/kvm/powerpc.c   | 4 ++--
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/atomic.h 
b/arch/powerpc/include/asm/atomic.h
index 8a55eb8cc97b..b82f9154e45a 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
@@ -26,14 +27,14 @@ static __inline__ int atomic_read(const atomic_t *v)
 {
int t;
 
-   __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
+   __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : 
"m"UPD_CONSTR(v->counter));
 
return t;
 }
 
 static __inline__ void atomic_set(atomic_t *v, int i)
 {
-   __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+   __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : 
"r"(i));
 }
 
 #define ATOMIC_OP(op, asm_op)  \
@@ -316,14 +317,14 @@ static __inline__ s64 atomic64_read(const atomic64_t *v)
 {
s64 t;
 
-   __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
+   __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : 
"m"UPD_CONSTR(v->counter));
 
return t;
 }
 
 static __inline__ void atomic64_set(atomic64_t *v, s64 i)
 {
-   __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+   __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : 
"r"(i));
 }
 
 #define ATOMIC64_OP(op, asm_op)
\
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 34f5ca391f0c..0e1b6e020cef 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -525,7 +525,7 @@ static inline void __set_pte_at(struct mm_struct *mm, 
unsigned long addr,
stw%U0%X0 %2,%0\n\
eieio\n\
stw%U1%X1 %L2,%1"
-   : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+   : "=m"UPD_CONSTR (*ptep), "=m"UPD_CONSTR (*((unsigned char *)ptep+4))
: "r" (pte) : "memory");
 
 #else
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 58635960403c..87964dfb838e 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -122,7 +122,7 @@ static inline u##size name(const volatile u##size __iomem 
*addr)\
 {  \
u##size ret;\
__asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\
-   : "=r" (ret) : "m" (*addr) : "memory"); \
+   : "=r" (ret) : "m"UPD_CONSTR (*addr) : "memory");   \
return ret; \
 }
 
@@ -130,7 +130,7 @@ static inline u##size name(const volatile u##size __iomem 
*addr)\
 static inline void name(volatile u##size __iomem *addr, u##size val)   \
 {  \
__asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \
-   : "=m" (*addr) : "r" (val) : "memory"); \
+   : "=m"UPD_CONSTR (*addr) : "r" (val) : "memory");   \
mmiowb_set_pending();   \
 }
 
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index a00e4c1746d6..55ef2112ed00 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -200,7 +200,7 @@ static inline void __set_pte_at(struct mm_struct *mm, 
unsigned long addr,
stw%U0%X0 %2,%0\n\
eieio\n\
stw%U1%X1 %L2,%1"
-   : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+   : "=m"UPD_CONSTR (*ptep), "=m"UPD_CONSTR (*((unsigned char 
*)ptep+4))
: "r" (pte) : "memory");
return;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 13999123b735..cf52d26f49cd 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1087,7 +1087,7 @@ static inline u64 sp_to_dp(u32 

[PATCH 2/3] powerpc: Fix incorrect stw{, ux, u, x} instructions in __set_pte_at

2020-10-19 Thread Christophe Leroy
From: Mathieu Desnoyers 

The placeholder for instruction selection should use the second
argument's operand, which is %1, not %0. This could generate incorrect
assembly code if the instruction selection for argument %0 ever differs
from argument %1.

Fixes: 9bf2b5cdc5fe ("powerpc: Fixes for CONFIG_PTE_64BIT for SMP support")
Signed-off-by: Mathieu Desnoyers 
Cc: Christophe Leroy 
Cc: Kumar Gala 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
Cc:  # v2.6.28+
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/32/pgtable.h | 2 +-
 arch/powerpc/include/asm/nohash/pgtable.h| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 36443cda8dcf..34f5ca391f0c 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -524,7 +524,7 @@ static inline void __set_pte_at(struct mm_struct *mm, 
unsigned long addr,
__asm__ __volatile__("\
stw%U0%X0 %2,%0\n\
eieio\n\
-   stw%U0%X0 %L2,%1"
+   stw%U1%X1 %L2,%1"
: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
: "r" (pte) : "memory");
 
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index 4b7c3472eab1..a00e4c1746d6 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -199,7 +199,7 @@ static inline void __set_pte_at(struct mm_struct *mm, 
unsigned long addr,
__asm__ __volatile__("\
stw%U0%X0 %2,%0\n\
eieio\n\
-   stw%U0%X0 %L2,%1"
+   stw%U1%X1 %L2,%1"
: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
: "r" (pte) : "memory");
return;
-- 
2.25.0



[PATCH v1 1/2] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE (nested case only)

2020-10-19 Thread Bharata B Rao
Implements H_RPT_INVALIDATE hcall and supports only nested case
currently.

A KVM capability KVM_CAP_RPT_INVALIDATE is added to indicate the
support for this hcall.

Signed-off-by: Bharata B Rao 
---
 Documentation/virt/kvm/api.rst| 17 
 .../include/asm/book3s/64/tlbflush-radix.h| 18 
 arch/powerpc/include/asm/kvm_book3s.h |  3 +
 arch/powerpc/kvm/book3s_hv.c  | 32 +++
 arch/powerpc/kvm/book3s_hv_nested.c   | 94 +++
 arch/powerpc/kvm/powerpc.c|  3 +
 arch/powerpc/mm/book3s64/radix_tlb.c  |  4 -
 include/uapi/linux/kvm.h  |  1 +
 8 files changed, 168 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 1f26d83e6b168..67e98a56271ae 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5852,6 +5852,23 @@ controlled by the kvm module parameter halt_poll_ns. 
This capability allows
 the maximum halt time to specified on a per-VM basis, effectively overriding
 the module parameter for the target VM.
 
+7.21 KVM_CAP_RPT_INVALIDATE
+--
+
+:Capability: KVM_CAP_RPT_INVALIDATE
+:Architectures: ppc
+:Type: vm
+
+This capability indicates that the kernel is capable of handling
+H_RPT_INVALIDATE hcall.
+
+In order to enable the use of H_RPT_INVALIDATE in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+This capability is always enabled.
+
 8. Other capabilities.
 ==
 
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h 
b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 94439e0cefc9c..aace7e9b2397d 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -4,6 +4,10 @@
 
 #include 
 
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
 struct vm_area_struct;
 struct mm_struct;
 struct mmu_gather;
@@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
return H_RPTI_PAGE_ALL;
 }
 
+static inline int rpti_pgsize_to_psize(unsigned long page_size)
+{
+   if (page_size == H_RPTI_PAGE_4K)
+   return MMU_PAGE_4K;
+   if (page_size == H_RPTI_PAGE_64K)
+   return MMU_PAGE_64K;
+   if (page_size == H_RPTI_PAGE_2M)
+   return MMU_PAGE_2M;
+   if (page_size == H_RPTI_PAGE_1G)
+   return MMU_PAGE_1G;
+   else
+   return MMU_PAGE_64K; /* Default */
+}
+
 static inline int mmu_get_ap(int psize)
 {
return mmu_psize_defs[psize].ap;
diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index d32ec9ae73bd4..0f1c5fa6e8ce3 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 
dw1);
 void kvmhv_release_all_nested(struct kvm *kvm);
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
+unsigned long type, unsigned long pg_sizes,
+unsigned long start, unsigned long end);
 int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
  u64 time_limit, unsigned long lpcr);
 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3bd3118c76330..6cbd37af91ebf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -904,6 +904,28 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
return yield_count;
 }
 
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+   unsigned long pid, unsigned long target,
+   unsigned long type, unsigned long pg_sizes,
+   unsigned long start, unsigned long end)
+{
+   if (end < start)
+   return H_P5;
+
+   if ((!type & H_RPTI_TYPE_NESTED))
+   return H_P3;
+
+   if (!nesting_enabled(vcpu->kvm))
+   return H_FUNCTION;
+
+   /* Support only cores as target */
+   if (target != H_RPTI_TARGET_CMMU)
+   return H_P2;
+
+   return kvmhv_h_rpti_nested(vcpu, pid, (type & ~H_RPTI_TYPE_NESTED),
+  pg_sizes, start, end);
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -1112,6 +1134,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 */
ret = kvmppc_h_svm_init_abort(vcpu->kvm);
break;
+ 

[PATCH v1 2/2] KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM

2020-10-19 Thread Bharata B Rao
In the nested KVM case, replace H_TLB_INVALIDATE by the new hcall
H_RPT_INVALIDATE if available. The availability of this hcall
is determined from "hcall-rpt-invalidate" string in ibm,hypertas-functions
DT property.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 26 +-
 arch/powerpc/kvm/book3s_hv_nested.c| 13 +++--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 22a677b18695e..9934a91adcc3b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Supported radix tree geometry.
@@ -318,9 +319,17 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned 
long addr,
}
 
psi = shift_to_mmu_psize(pshift);
-   rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
-   rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
-   lpid, rb);
+   if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+   rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+   rc = plpar_hcall_norets(H_TLB_INVALIDATE,
+   H_TLBIE_P1_ENC(0, 0, 1), lpid, rb);
+   } else {
+   rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+   H_RPTI_TYPE_NESTED |
+   H_RPTI_TYPE_TLB,
+   psize_to_rpti_pgsize(psi),
+   addr, addr + psize);
+   }
if (rc)
pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
 }
@@ -334,8 +343,15 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, 
unsigned int lpid)
return;
}
 
-   rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
-   lpid, TLBIEL_INVAL_SET_LPID);
+   if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+   rc = plpar_hcall_norets(H_TLB_INVALIDATE,
+   H_TLBIE_P1_ENC(1, 0, 1),
+   lpid, TLBIEL_INVAL_SET_LPID);
+   else
+   rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+   H_RPTI_TYPE_NESTED |
+   H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+   0, -1UL);
if (rc)
pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index 3ec0231628b42..2a187c782e89b 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static struct patb_entry *pseries_partition_tb;
 
@@ -402,8 +403,16 @@ static void kvmhv_flush_lpid(unsigned int lpid)
return;
}
 
-   rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
-   lpid, TLBIEL_INVAL_SET_LPID);
+   if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+   rc = plpar_hcall_norets(H_TLB_INVALIDATE,
+   H_TLBIE_P1_ENC(2, 0, 1),
+   lpid, TLBIEL_INVAL_SET_LPID);
+   else
+   rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+   H_RPTI_TYPE_NESTED |
+   H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+   H_RPTI_TYPE_PAT,
+   H_RPTI_PAGE_ALL, 0, -1UL);
if (rc)
pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
 }
-- 
2.26.2



[PATCH v1 0/2] Use H_RPT_INVALIDATE for nested guest

2020-10-19 Thread Bharata B Rao
This patchset adds support for the new hcall H_RPT_INVALIDATE
(currently handles nested case only) and replaces the nested tlb flush
calls with this new hcall if the support for the same exists.

Changes in v1:
-
- Removed the bits that added the FW_FEATURE_RPT_INVALIDATE feature
  as they are already upstream.

v0: 
https://lore.kernel.org/linuxppc-dev/20200703104420.21349-1-bhar...@linux.ibm.com/T/#m1800c5f5b3d4f6a154ae58fc1c617c06f286358f

H_RPT_INVALIDATE

Syntax:
int64   /* H_Success: Return code on successful completion */
    /* H_Busy - repeat the call with the same */
    /* H_Parameter, H_P2, H_P3, H_P4, H_P5 : Invalid parameters */
    hcall(const uint64 H_RPT_INVALIDATE, /* Invalidate RPT translation 
lookaside information */
  uint64 pid,   /* PID/LPID to invalidate */
  uint64 target,    /* Invalidation target */
  uint64 type,  /* Type of lookaside information */
  uint64 pageSizes, /* Page sizes */
  uint64 start, /* Start of Effective Address (EA) range 
(inclusive) */
  uint64 end)   /* End of EA range (exclusive) */

Invalidation targets (target)
-
Core MMU    0x01 /* All virtual processors in the partition */
Core local MMU  0x02 /* Current virtual processor */
Nest MMU    0x04 /* All nest/accelerator agents in use by the partition */

A combination of the above can be specified, except core and core local.

Type of translation to invalidate (type)
---
NESTED   0x0001  /* Invalidate nested guest partition-scope */
TLB  0x0002  /* Invalidate TLB */
PWC  0x0004  /* Invalidate Page Walk Cache */
PRT  0x0008  /* Invalidate Process Table Entries if NESTED is clear */
PAT  0x0008  /* Invalidate Partition Table Entries if NESTED is set */

A combination of the above can be specified.

Page size mask (pageSizes)
--
4K  0x01
64K 0x02
2M  0x04
1G  0x08
All sizes   (-1UL)

A combination of the above can be specified.
All page sizes can be selected with -1.

Semantics: Invalidate radix tree lookaside information
   matching the parameters given.
* Return H_P2, H_P3 or H_P4 if target, type, or pageSizes parameters are
  different from the defined values.
* Return H_PARAMETER if NESTED is set and pid is not a valid nested
  LPID allocated to this partition
* Return H_P5 if (start, end) doesn't form a valid range. Start and end
  should be a valid Quadrant address and  end > start.
* Return H_NotSupported if the partition is not in running in radix
  translation mode.
* May invalidate more translation information than requested.
* If start = 0 and end = -1, set the range to cover all valid addresses.
  Else start and end should be aligned to 4kB (lower 11 bits clear).
* If NESTED is clear, then invalidate process scoped lookaside information.
  Else pid specifies a nested LPID, and the invalidation is performed
  on nested guest partition table and nested guest partition scope real
  addresses.
* If pid = 0 and NESTED is clear, then valid addresses are quadrant 3 and
  quadrant 0 spaces, Else valid addresses are quadrant 0.
* Pages which are fully covered by the range are to be invalidated.
  Those which are partially covered are considered outside invalidation
  range, which allows a caller to optimally invalidate ranges that may
  contain mixed page sizes.
* Return H_SUCCESS on success.

Bharata B Rao (2):
  KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE (nested case
only)
  KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM

 Documentation/virt/kvm/api.rst|  17 +++
 .../include/asm/book3s/64/tlbflush-radix.h|  18 +++
 arch/powerpc/include/asm/kvm_book3s.h |   3 +
 arch/powerpc/kvm/book3s_64_mmu_radix.c|  26 -
 arch/powerpc/kvm/book3s_hv.c  |  32 ++
 arch/powerpc/kvm/book3s_hv_nested.c   | 107 +-
 arch/powerpc/kvm/powerpc.c|   3 +
 arch/powerpc/mm/book3s64/radix_tlb.c  |   4 -
 include/uapi/linux/kvm.h  |   1 +
 9 files changed, 200 insertions(+), 11 deletions(-)

-- 
2.26.2



Re: [PATCH v4 2/2] lkdtm/powerpc: Add SLB multihit test

2020-10-19 Thread Michael Ellerman
Hi Ganesh,

Some comments below ...

Ganesh Goudar  writes:
> To check machine check handling, add support to inject slb
> multihit errors.
>
> Cc: Kees Cook 
> Reviewed-by: Michal Suchánek 
> Co-developed-by: Mahesh Salgaonkar 
> Signed-off-by: Mahesh Salgaonkar 
> Signed-off-by: Ganesh Goudar 
> ---
>  drivers/misc/lkdtm/Makefile |   1 +
>  drivers/misc/lkdtm/core.c   |   3 +
>  drivers/misc/lkdtm/lkdtm.h  |   3 +
>  drivers/misc/lkdtm/powerpc.c| 156 
>  tools/testing/selftests/lkdtm/tests.txt |   1 +
>  5 files changed, 164 insertions(+)
>  create mode 100644 drivers/misc/lkdtm/powerpc.c
>
..
> diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c
> new file mode 100644
> index ..f388b53dccba
> --- /dev/null
> +++ b/drivers/misc/lkdtm/powerpc.c
> @@ -0,0 +1,156 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "lkdtm.h"
> +#include 
> +#include 

Usual style is to include the linux headers first and then the local header.

> +
> +/* Gets index for new slb entry */
> +static inline unsigned long get_slb_index(void)
> +{
> + unsigned long index;
> +
> + index = get_paca()->stab_rr;
> +
> + /*
> +  * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
> +  */
> + if (index < (mmu_slb_size - 1))
> + index++;
> + else
> + index = SLB_NUM_BOLTED;
> + get_paca()->stab_rr = index;
> + return index;
> +}

I'm not sure we need that really?

We can just always insert at SLB_MUM_BOLTED and SLB_NUM_BOLTED + 1.

Or we could allocate from the top down using mmu_slb_size - 1, and
mmu_slb_size - 2.


> +#define slb_esid_mask(ssize) \
> + (((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T)
> +
> +/* Form the operand for slbmte */
> +static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
> +  unsigned long slot)
> +{
> + return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
> +}
> +
> +#define slb_vsid_shift(ssize)\
> + ((ssize) == MMU_SEGSIZE_256M ? SLB_VSID_SHIFT : SLB_VSID_SHIFT_1T)
> +
> +/* Form the operand for slbmte */
> +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
> +  unsigned long flags)
> +{
> + return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
> + ((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT);
> +}

I realise it's not much code, but I'd rather those were in a header,
rather than copied from slb.c. That way they can never skew vs the
versions in slb.c

Best place I think would be arch/powerpc/include/asm/book3s/64/mmu-hash.h


> +
> +/* Inserts new slb entry */

It inserts two.

> +static void insert_slb_entry(char *p, int ssize)
> +{
> + unsigned long flags, entry;
> +
> + flags = SLB_VSID_KERNEL | mmu_psize_defs[MMU_PAGE_64K].sllp;

That won't work if the kernel is built for 4K pages. Or at least it
won't work the way we want it to.

You should use mmu_linear_psize.

But for vmalloc you should use mmu_vmalloc_psize, so it will need to be
a parameter.

> + preempt_disable();
> +
> + entry = get_slb_index();
> + asm volatile("slbmte %0,%1" :
> + : "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
> +   "r" (mk_esid_data((unsigned long)p, ssize, entry))
> + : "memory");
> +
> + entry = get_slb_index();
> + asm volatile("slbmte %0,%1" :
> + : "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
> +   "r" (mk_esid_data((unsigned long)p, ssize, entry))
> + : "memory");
> + preempt_enable();
> + /*
> +  * This triggers exception, If handled correctly we must recover
> +  * from this error.
> +  */
> + p[0] = '!';

That doesn't belong in here, it should be done by the caller.

That would also mean p could be unsigned long in here, so you wouldn't
have to cast it four times.

> +}
> +
> +/* Inject slb multihit on vmalloc-ed address i.e 0xD00... */
> +static void inject_vmalloc_slb_multihit(void)
> +{
> + char *p;
> +
> + p = vmalloc(2048);

vmalloc() allocates whole pages, so it may as well be vmalloc(PAGE_SIZE).

> + if (!p)
> + return;

That's unlikely, but it should be an error that's propagated up to the caller.

> +
> + insert_slb_entry(p, MMU_SEGSIZE_1T);
> + vfree(p);
> +}
> +
> +/* Inject slb multihit on kmalloc-ed address i.e 0xC00... */
> +static void inject_kmalloc_slb_multihit(void)
> +{
> + char *p;
> +
> + p = kmalloc(2048, GFP_KERNEL);
> + if (!p)
> + return;
> +
> + insert_slb_entry(p, MMU_SEGSIZE_1T);
> + kfree(p);
> +}
> +
> +/*
> + * Few initial SLB entries are bolted. Add a test to inject
> + * multihit in bolted entry 0.
> + */
> +static void insert_dup_slb_entry_0(void)
> +{
> + unsigned long 

[PATCH] drm/amd/display: Fix missing declaration of enable_kernel_vsx()

2020-10-19 Thread Christophe Leroy
Include  in order to avoid following build failure
because of missing declaration of enable_kernel_vsx()

  CC [M]  drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.o
In file included from 
./drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services_types.h:29,
 from 
./drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services.h:37,
 from 
drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c:27:
drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c: In function 
'dcn_bw_apply_registry_override':
./drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h:64:3: error: implicit 
declaration of function 'enable_kernel_vsx'; did you mean 'enable_kernel_fp'? 
[-Werror=implicit-function-declaration]
   64 |   enable_kernel_vsx(); \
  |   ^
drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c:640:2: note: in 
expansion of macro 'DC_FP_START'
  640 |  DC_FP_START();
  |  ^~~
./drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h:75:3: error: implicit 
declaration of function 'disable_kernel_vsx'; did you mean 'disable_kernel_fp'? 
[-Werror=implicit-function-declaration]
   75 |   disable_kernel_vsx(); \
  |   ^~
drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c:676:2: note: in 
expansion of macro 'DC_FP_END'
  676 |  DC_FP_END();
  |  ^
cc1: some warnings being treated as errors
make[5]: *** [drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.o] Error 
1

Fixes: 16a9dea110a6 ("amdgpu: Enable initial DCN support on POWER")
Cc: sta...@vger.kernel.org
Signed-off-by: Christophe Leroy 
---
 drivers/gpu/drm/amd/display/dc/os_types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h 
b/drivers/gpu/drm/amd/display/dc/os_types.h
index c3bbfe397e8d..9000cf188544 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -33,6 +33,7 @@
 #include 
 
 #include 
+#include 
 
 #include 
 
-- 
2.25.0



Re: [PATCH] asm-generic: Force inlining of get_order() to work around gcc10 poor decision

2020-10-19 Thread Christophe Leroy




Le 19/10/2020 à 10:32, Segher Boessenkool a écrit :

On Mon, Oct 19, 2020 at 07:50:41AM +0200, Christophe Leroy wrote:

Le 19/10/2020 à 06:55, Joel Stanley a écrit :

In the old days, marking a function 'static inline' was forcing
GCC to inline, but since commit ac7c3e4ff401 ("compiler: enable
CONFIG_OPTIMIZE_INLINING forcibly") GCC may decide to not inline
a function.

It looks like GCC 10 is taking poor decisions on this.



1952 bytes smaller with your patch applied. Did you raise this with
anyone from GCC?


Yes I did, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97445

For the time being, it's at a standstill.


The kernel should just use __always_inline if that is what it *wants*;
that is true here most likely.  GCC could perhaps improve its heuristics
so that it no longer thinks these functions are often too big for
inlining (they *are* pretty big, but not after basic optimisations with
constant integer arguments).



Yes I guess __always_inline is to be added on functions like this defined in headers for exactly 
that, and that's the purpose of this patch.


However I find it odd that get_order() is outlined by GCC even in some object files that don't use 
it at all, for instance in fs/pipe.o


Christophe


Re: [PATCH] asm-generic: Force inlining of get_order() to work around gcc10 poor decision

2020-10-19 Thread Segher Boessenkool
On Mon, Oct 19, 2020 at 07:50:41AM +0200, Christophe Leroy wrote:
> Le 19/10/2020 à 06:55, Joel Stanley a écrit :
> >>In the old days, marking a function 'static inline' was forcing
> >>GCC to inline, but since commit ac7c3e4ff401 ("compiler: enable
> >>CONFIG_OPTIMIZE_INLINING forcibly") GCC may decide to not inline
> >>a function.
> >>
> >>It looks like GCC 10 is taking poor decisions on this.

> >1952 bytes smaller with your patch applied. Did you raise this with
> >anyone from GCC?
> 
> Yes I did, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97445
> 
> For the time being, it's at a standstill.

The kernel should just use __always_inline if that is what it *wants*;
that is true here most likely.  GCC could perhaps improve its heuristics
so that it no longer thinks these functions are often too big for
inlining (they *are* pretty big, but not after basic optimisations with
constant integer arguments).


Segher