Re: [PATCH] powerpc/pseries/vas: Don't print an error when VAS is unavailable

2021-11-29 Thread Haren Myneni
On Tue, 2021-11-30 at 10:25 +1100, Michael Ellerman wrote:
> Nicholas Piggin  writes:
> > Excerpts from Cédric Le Goater's message of November 26, 2021 5:13
> > pm:
> > > On 11/26/21 06:21, Nicholas Piggin wrote:
> > > > KVM does not support VAS so guests always print a useless error
> > > > on boot
> > > > 
> > > >  vas: HCALL(398) error -2, query_type 0, result buffer
> > > > 0x57f2000
> > > > 
> > > > Change this to only print the message if the error is not
> > > > H_FUNCTION.
> > > 
> > > Just being curious, why is it even called since "ibm,compression"
> > > should
> > > not be exposed in the DT ?
> > 
> > It looks like vas does not test for it. I guess in theory there can
> > be 
> > other functions than compression implemented as an accelerator.
> > Maybe
> > that's why?
> 
> Yeah I guess, or it's just not structured that well. The vas platform
> code is a bit awkward, it's there to support drivers, but it's not
> actually driver code.
> 
> I think we can probably rework it so the vas code does nothing until
> a
> driver calls in to it.
> 
> eg. something like below.

Correct, Even though NXGZIP is the only usage right now, VAS is
accelerator switchboard which should support other coprocessor types
such as GZIP and 842 or SW type solutions such as fast thread wakeup
and fast memory copy. 

So can we leave VAS initialization separate from drivers and use some
feature such as FW_FEATURE_LPAR to differentiate from KVM guests?

Thanks
Haren

> 
> cheers
> 
> 
> diff --git a/arch/powerpc/platforms/pseries/vas.c
> b/arch/powerpc/platforms/pseries/vas.c
> index b043e3936d21..dc3491fc919d 100644
> --- a/arch/powerpc/platforms/pseries/vas.c
> +++ b/arch/powerpc/platforms/pseries/vas.c
> @@ -454,6 +454,8 @@ static const struct vas_user_win_ops vops_pseries
> = {
>   .close_win  = vas_deallocate_window, /* Close window */
>  };
>  
> +static int pseries_vas_init(void);
> +
>  /*
>   * Supporting only nx-gzip coprocessor type now, but this API code
>   * extended to other coprocessor types later.
> @@ -463,7 +465,8 @@ int vas_register_api_pseries(struct module *mod,
> enum vas_cop_type cop_type,
>  {
>   int rc;
>  
> - if (!copypaste_feat)
> + rc = pseries_vas_init();
> + if (rc || !copypaste_feat)
>   return -ENOTSUPP;
>  
>   rc = vas_register_coproc_api(mod, cop_type, name,
> _pseries);
> @@ -531,7 +534,7 @@ static int get_vas_capabilities(u8 feat, enum
> vas_cop_feat_type type,
>   return 0;
>  }
>  
> -static int __init pseries_vas_init(void)
> +static int pseries_vas_init(void)
>  {
>   struct hv_vas_cop_feat_caps *hv_cop_caps;
>   struct hv_vas_all_caps *hv_caps;
> @@ -592,4 +595,3 @@ static int __init pseries_vas_init(void)
>   kfree(hv_caps);
>   return rc;
>  }
> -machine_device_initcall(pseries, pseries_vas_init);



[RFC PATCH] powerpc/signal: sanitise PT_NIP and sa_handler low bits

2021-11-29 Thread Nicholas Piggin
The bottom 2 bits of NIP are ignored when RFI returns with SRR0 = NIP,
so regs->nip does not correspond to the actual return address if either
of those bits are set. Further, these bits are reserved in SRR0 so they
should not be set. Sanitize PT_NIP from signal handlers to ensure they
can't be set by userspace, this also keeps the low 2 bit of TFHAR clear,
which are similarly reserved. 32-bit signal delivery returns directly to
the handler, so sa_handler is sanitised similarly there.

This can cause a bug when CONFIG_PPC_RFI_SRR_DEBUG=y on a processor that
does not implement the 2 low bits of SRR0 (always read back 0) because
SRR0 will not match regs->nip. This was caught by sigfuz, but a simple
reproducer follows.

  #include 
  #include 
  #include 

  static void trap_signal_handler(int signo, siginfo_t *si, void *uc)
  {
  ucontext_t *ucp = uc;
  ucp->uc_mcontext.gp_regs[PT_NIP] |= 3;
  }

  int main(void)
  {
  struct sigaction trap_sa;
  trap_sa.sa_flags = SA_SIGINFO;
  trap_sa.sa_sigaction = trap_signal_handler;
  sigaction(SIGUSR1, _sa, NULL);
  raise(SIGUSR1);
  exit(EXIT_SUCCESS);
  }

Reported-by: Sachin Sant 
Signed-off-by: Nicholas Piggin 
---
I'm not entirely sure about the 32-bit / compat part. Or the 64-bit for
that matter except that it does seem to fix the bug caused by the test
program.

Thanks,
Nick

 arch/powerpc/kernel/signal_32.c | 23 ---
 arch/powerpc/kernel/signal_64.c | 17 -
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 3e053e2fd6b6..5379bece8072 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -116,7 +116,7 @@ __unsafe_restore_general_regs(struct pt_regs *regs, struct 
mcontext __user *sr)
int i;
 
for (i = 0; i <= PT_RESULT; i++) {
-   if ((i == PT_MSR) || (i == PT_SOFTE))
+   if ((i == PT_NIP) || (i == PT_MSR) || (i == PT_SOFTE))
continue;
unsafe_get_user(gregs[i], >mc_gregs[i], failed);
}
@@ -156,7 +156,7 @@ static __always_inline
 int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user 
*sr)
 {
/* copy up to but not including MSR */
-   unsafe_copy_from_user(regs, >mc_gregs, PT_MSR * sizeof(elf_greg_t), 
failed);
+   unsafe_copy_from_user(regs, >mc_gregs, PT_NIP * sizeof(elf_greg_t), 
failed);
 
/* copy from orig_r3 (the word after the MSR) up to the end */
unsafe_copy_from_user(>orig_gpr3, >mc_gregs[PT_ORIG_R3],
@@ -458,7 +458,7 @@ static long restore_user_regs(struct pt_regs *regs,
  struct mcontext __user *sr, int sig)
 {
unsigned int save_r2 = 0;
-   unsigned long msr;
+   unsigned long nip, msr;
 #ifdef CONFIG_VSX
int i;
 #endif
@@ -473,6 +473,9 @@ static long restore_user_regs(struct pt_regs *regs,
save_r2 = (unsigned int)regs->gpr[2];
unsafe_restore_general_regs(regs, sr, failed);
set_trap_norestart(regs);
+   unsafe_get_user(nip, >mc_gregs[PT_NIP], failed);
+   nip &= ~3UL;
+   regs_set_return_ip(regs, nip);
unsafe_get_user(msr, >mc_gregs[PT_MSR], failed);
if (!sig)
regs->gpr[2] = (unsigned long) save_r2;
@@ -560,7 +563,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 struct mcontext __user *sr,
 struct mcontext __user *tm_sr)
 {
-   unsigned long msr, msr_hi;
+   unsigned long nip, msr, msr_hi;
int i;
 
if (tm_suspend_disabled)
@@ -576,7 +579,9 @@ static long restore_tm_user_regs(struct pt_regs *regs,
return 1;
 
unsafe_restore_general_regs(>thread.ckpt_regs, sr, failed);
-   unsafe_get_user(current->thread.tm_tfhar, >mc_gregs[PT_NIP], 
failed);
+   unsafe_get_user(nip, >mc_gregs[PT_NIP], failed);
+   nip &= ~3UL;
+   current->thread.tm_tfhar = nip;
unsafe_get_user(msr, >mc_gregs[PT_MSR], failed);
 
/* Restore the previous little-endian mode */
@@ -646,6 +651,10 @@ static long restore_tm_user_regs(struct pt_regs *regs,
current->thread.used_vsr = true;
}
 
+   unsafe_get_user(nip, _sr->mc_gregs[PT_NIP], failed);
+   nip &= ~3UL;
+   regs_set_return_ip(regs, nip);
+
/* Get the top half of the MSR from the user context */
unsafe_get_user(msr_hi, _sr->mc_gregs[PT_MSR], failed);
msr_hi <<= 32;
@@ -801,7 +810,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t 
*oldset,
regs->gpr[4] = (unsigned long)>info;
regs->gpr[5] = (unsigned long)>uc;
regs->gpr[6] = (unsigned long)frame;
-   regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
+   regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler & ~3UL);
/* enter the signal handler in 

Re: [PATCH v5 5/5] powerpc/inst: Optimise copy_inst_from_kernel_nofault()

2021-11-29 Thread Christophe Leroy




Le 29/11/2021 à 23:55, kernel test robot a écrit :

Hi Christophe,

I love your patch! Perhaps something to improve:

[auto build test WARNING on powerpc/next]
[also build test WARNING on v5.16-rc3 next-20211129]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211130-015346
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-r023-20211129 
(https://download.01.org/0day-ci/archive/20211130/202111300652.0ydbnvyj-...@intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 
df08b2fe8b35cb63dfb3b49738a3494b9b4e6f8e)
reproduce (this is a W=1 build):
 wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
 chmod +x ~/bin/make.cross
 # install powerpc cross compiling tool for clang build
 # apt-get install binutils-powerpc-linux-gnu
 # 
https://github.com/0day-ci/linux/commit/fb7bff30cc0efc7e4df1b48bb69de1f325eee826
 git remote add linux-review https://github.com/0day-ci/linux
 git fetch --no-tags linux-review 
Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211130-015346
 git checkout fb7bff30cc0efc7e4df1b48bb69de1f325eee826
 # save the config file to linux build tree
 mkdir build_dir
 COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
O=build_dir ARCH=powerpc prepare

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

In file included from arch/powerpc/kernel/asm-offsets.c:71:
In file included from arch/powerpc/kernel/../xmon/xmon_bpts.h:7:

arch/powerpc/include/asm/inst.h:165:20: warning: variable 'val' is 
uninitialized when used here [-Wuninitialized]

*inst = ppc_inst(val);
 ^~~
arch/powerpc/include/asm/inst.h:53:22: note: expanded from macro 'ppc_inst'
#define ppc_inst(x) (x)
 ^
arch/powerpc/include/asm/inst.h:155:18: note: initialize the variable 'val' 
to silence this warning
unsigned int val, suffix;
^
 = 0


I can't understand what's wrong here.

We have

__get_kernel_nofault(, src, u32, Efault);
if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
__get_kernel_nofault(, src + 1, u32, Efault);
*inst = ppc_inst_prefix(val, suffix);
} else {
*inst = ppc_inst(val);
}

With

#define __get_kernel_nofault(dst, src, type, err_label) \
__get_user_size_goto(*((type *)(dst)),  \
(__force type __user *)(src), sizeof(type), err_label)


And

#define __get_user_size_goto(x, ptr, size, label)   
\
do {
\
BUILD_BUG_ON(size > sizeof(x)); 
 \
switch (size) { 
\
case 1: __get_user_asm_goto(x, (u8 __user *)ptr, label, "lbz"); break;  
  \
case 2: __get_user_asm_goto(x, (u16 __user *)ptr, label, "lhz"); break; 
  \
case 4: __get_user_asm_goto(x, (u32 __user *)ptr, label, "lwz"); break; 
  \
case 8: __get_user_asm2_goto(x, (u64 __user *)ptr, label);  break;  
\
default: x = 0; BUILD_BUG();
\
}   
\
} while (0)

And

#define __get_user_asm_goto(x, addr, label, op) \
asm_volatile_goto(  \
"1:"op"%U1%X1 %0, %1 # get_user\n"  \
EX_TABLE(1b, %l2)   \
: "=r" (x)\
: "m<>" (*addr) \
:   \
: label)


I see no possibility, no alternative path where val wouldn't be set. The 
asm clearly has *addr as an output param so it is always set.



1 warning generated.
:1559:2: warning: syscall futex_waitv not implemented [-W#warnings]
#warning syscall futex_waitv not implemented
 ^
1 warning generated.
arch/powerpc/kernel/vdso32/gettimeofday.S:72:8: error: unsupported 
directive '.stabs'
.stabs "_restgpr_31_x:F-1",36,0,0,_restgpr_31_x; .glob

Re: [PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.

2021-11-29 Thread Nathan Lynch
Mahesh Salgaonkar  writes:
> When certain PHB HW failure causes phyp to recover PHB, it marks the PE
> state as temporarily unavailable until recovery is complete. This also
> triggers an EEH handler in Linux which needs to notify drivers, and perform
> recovery. But before notifying the driver about the pci error it uses
> get_adapter_state()->get-sesnor-state() operation of the hotplug_slot to
> determine if the slot contains a device or not. if the slot is empty, the
> recovery is skipped entirely.
>
> However on certain PHB failures, the rtas call get-sesnor-state() returns
> extended busy error (9902) until PHB is recovered by phyp. Once PHB is
> recovered, the get-sensor-state() returns success with correct presence
> status. The rtas call interface rtas_get_sensor() loops over the rtas call
> on extended delay return code (9902) until the return value is either
> success (0) or error (-1). This causes the EEH handler to get stuck for ~6
> seconds before it could notify that the pci error has been detected and
> stop any active operations.

I am curious whether you see any difference with "powerpc/rtas:
rtas_busy_delay() improvements" which was recently applied. It will
cause the the calling task to sleep in response to a 990x status instead
of immediately retrying:

https://git.kernel.org/powerpc/c/38f7b7067dae0c101be573106018e8af22a90fdf

If that commit helps then maybe this change isn't needed.

Otherwise, see my comments below.


> -int rtas_get_sensor_fast(int sensor, int index, int *state)
> +static int
> +__rtas_get_sensor(int sensor, int index, int *state, bool warn_on)

Boolean flag parameters in this style are undesirable. As a reader you
can't infer the significance of a 'true' or 'false' in the argument list
at the call site.

>  {
>   int token = rtas_token("get-sensor-state");
>   int rc;
> @@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int 
> *state)
>   return -ENOENT;
>  
>   rc = rtas_call(token, 2, 2, state, sensor, index);
> - WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
> - rc <= RTAS_EXTENDED_DELAY_MAX));
> + WARN_ON(warn_on &&
> + (rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
> + rc <= RTAS_EXTENDED_DELAY_MAX)));
>  
>   if (rc < 0)
>   return rtas_error_rc(rc);
>   return rc;
>  }

Issues I see with this, in terms of correctness and convention:

* On non-negative status from rtas_call(), including 990x,
  __rtas_get_sensor() returns the RTAS status unchanged. On a negative
  status, it returns a Linux errno value. On a -2 (busy) status
  rtas_error_rc() prints an error message and returns -ERANGE. Seems
  difficult for a caller to handle. Generally we want rtas_* APIs to
  adhere to a Linux 0/-errno convention or to return the RTAS
  status unchanged, but not a mixture.

* __rtas_get_sensor() is called by rtas_get_sensor_fast() and
  rtas_get_sensor_nonblocking(), but is not called by rtas_get_sensor(),
  despite common practice with __-prefixed functions.

> +int rtas_get_sensor_fast(int sensor, int index, int *state)
> +{
> + return __rtas_get_sensor(sensor, index, state, true);
> +}
> +
> +int rtas_get_sensor_nonblocking(int sensor, int index, int *state)
> +{
> + return __rtas_get_sensor(sensor, index, state, false);
> +}
> +EXPORT_SYMBOL(rtas_get_sensor_nonblocking);
> +
>  bool rtas_indicator_present(int token, int *maxindex)
>  {
>   int proplen, count, i;
> diff --git a/drivers/pci/hotplug/rpaphp_pci.c 
> b/drivers/pci/hotplug/rpaphp_pci.c
> index c380bdacd1466..8a7d681254ce9 100644
> --- a/drivers/pci/hotplug/rpaphp_pci.c
> +++ b/drivers/pci/hotplug/rpaphp_pci.c
> @@ -23,7 +23,7 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state)
>   int rc;
>   int setlevel;
>  
> - rc = rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state);
> + rc = rtas_get_sensor_nonblocking(DR_ENTITY_SENSE, slot->index, state);
>  
>   if (rc < 0) {
>   if (rc == -EFAULT || rc == -EEXIST) {
> @@ -38,10 +38,10 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state)
>   if (rc < 0) {
>   dbg("%s: power on slot[%s] failed rc=%d.\n",
>   __func__, slot->name, rc);
> - } else {
> - rc = rtas_get_sensor(DR_ENTITY_SENSE,
> -  slot->index, state);
> + return rc;
>   }
> + rc = rtas_get_sensor_nonblocking(DR_ENTITY_SENSE,
> +  slot->index, state);
>   } else if (rc == -ENODEV)
>   info("%s: slot is unusable\n", __func__);
>   else

If I'm reading it right rpaphp_get_sensor_state() now returns 9902 in
the situation this change is trying to 

[powerpc:merge] BUILD SUCCESS 260ac081931897ee7f554740c6cfc01c475aa703

2021-11-29 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
merge
branch HEAD: 260ac081931897ee7f554740c6cfc01c475aa703  Automatic merge of 
'master' into merge (2021-11-29 16:53)

elapsed time: 730m

configs tested: 146
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
i386 randconfig-c001-20211128
mips   capcella_defconfig
powerpc tqm8555_defconfig
i386defconfig
arm mv78xx0_defconfig
armmulti_v5_defconfig
m68k   sun3_defconfig
powerpc tqm8540_defconfig
um  defconfig
armvt8500_v6_v7_defconfig
arm  ep93xx_defconfig
xtensa   common_defconfig
arm s3c2410_defconfig
powerpc kilauea_defconfig
mips  malta_defconfig
arm davinci_all_defconfig
mipsmalta_qemu_32r6_defconfig
powerpc  cm5200_defconfig
mipsqi_lb60_defconfig
mips tb0219_defconfig
armqcom_defconfig
mips   xway_defconfig
pariscgeneric-64bit_defconfig
powerpc  makalu_defconfig
mips tb0287_defconfig
arm s5pv210_defconfig
powerpc   currituck_defconfig
arm  imote2_defconfig
s390   zfcpdump_defconfig
powerpcsocrates_defconfig
riscv nommu_k210_sdcard_defconfig
sparc   sparc32_defconfig
mips cobalt_defconfig
mips   gcw0_defconfig
arm   imx_v6_v7_defconfig
mipsomega2p_defconfig
arm   h5000_defconfig
mips   rs90_defconfig
sh   se7722_defconfig
m68km5307c3_defconfig
powerpc   ebony_defconfig
arm   omap1_defconfig
powerpc mpc83xx_defconfig
sh  urquell_defconfig
powerpc  ep88xc_defconfig
powerpc redwood_defconfig
sh  sh7785lcr_32bit_defconfig
arm  simpad_defconfig
arm shannon_defconfig
sh  rsk7203_defconfig
arm   u8500_defconfig
armcerfcube_defconfig
m68km5272c3_defconfig
arm  randconfig-c002-20211128
arm  randconfig-c002-20211130
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
arc defconfig
sh   allmodconfig
h8300allyesconfig
xtensa   allyesconfig
parisc  defconfig
s390 allmodconfig
parisc   allyesconfig
s390defconfig
s390 allyesconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386   debian-10.3-kselftests
i386  debian-10.3
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a001-20211129
i386 randconfig-a002-20211129
i386 randconfig-a006-20211129
i386 randconfig-a005-20211129
i386 randconfig-a004-20211129
i386 randconfig-a003-20211129
x86_64   randconfig-a011-20211128
x86_64

[powerpc:fixes-test] BUILD SUCCESS 3dc709e518b47386e6af937eaec37bb36539edfd

2021-11-29 Thread kernel test robot
   imx_v4_v5_defconfig
arm  simpad_defconfig
m68k   m5249evb_defconfig
mips  bmips_stb_defconfig
arm  ep93xx_defconfig
armneponset_defconfig
arm   milbeaut_m10v_defconfig
shshmin_defconfig
m68km5272c3_defconfig
armtrizeps4_defconfig
powerpc   maple_defconfig
openrisc  or1klitex_defconfig
sparcallyesconfig
xtensa  defconfig
powerpc mpc837x_rdb_defconfig
powerpc mpc834x_itx_defconfig
m68k  atari_defconfig
sh   se7712_defconfig
arm  ixp4xx_defconfig
powerpc  ep88xc_defconfig
sh  sh7785lcr_32bit_defconfig
arm shannon_defconfig
sh  rsk7203_defconfig
arm   u8500_defconfig
armcerfcube_defconfig
arm  randconfig-c002-20211128
arm  randconfig-c002-20211130
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
nds32 allnoconfig
nios2allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
s390 allmodconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparc   defconfig
i386   debian-10.3-kselftests
i386  debian-10.3
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a001-20211129
i386 randconfig-a002-20211129
i386 randconfig-a006-20211129
i386 randconfig-a005-20211129
i386 randconfig-a004-20211129
i386 randconfig-a003-20211129
x86_64   randconfig-a011-20211128
x86_64   randconfig-a014-20211128
x86_64   randconfig-a012-20211128
x86_64   randconfig-a016-20211128
x86_64   randconfig-a013-20211128
x86_64   randconfig-a015-20211128
i386 randconfig-a015-20211128
i386 randconfig-a016-20211128
i386 randconfig-a013-20211128
i386 randconfig-a012-20211128
i386 randconfig-a014-20211128
i386 randconfig-a011-20211128
arc  randconfig-r043-20211128
s390 randconfig-r044-20211128
riscvrandconfig-r042-20211128
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64rhel-8.3-kselftests
um   x86_64_defconfig
um i386_defconfig
x86_64   allyesconfig
x86_64   rhel-8.3
x86_64  rhel-8.3-func
x86_64  kexec

clang tested configs:
s390 randconfig-c005-20211128
i386 randconfig-c001-20211128
riscvrandconfig-c006-20211128
arm  randconfig-c002-20211128
powerpc  randconfig-c003-20211128
x86_64   randconfig-c007-20211128
mips randconfig-c004-20211128
x86_64   randconfig-a001-20211128
x86_64   randconfig-a006-20211128
x86_64   randconfig-a003-20211128
x86_64   randconfig-a005-20211128
x86_64   randconfig-a004-20211128
x86_64   randconfig-a002-20211128
i386 randconfig-a001-20211128
i386 randconfig-a002-20211128
i386 randconfig-a006-20211128
i386 randconfig

[powerpc:next] BUILD SUCCESS af3fdce4ab0781ea183107c90de9cbf21d701c54

2021-11-29 Thread kernel test robot
config
arm  simpad_defconfig
m68k   m5249evb_defconfig
mips  bmips_stb_defconfig
arm  ep93xx_defconfig
armneponset_defconfig
arm   milbeaut_m10v_defconfig
shshmin_defconfig
m68km5272c3_defconfig
armtrizeps4_defconfig
powerpc   maple_defconfig
openrisc  or1klitex_defconfig
sparcallyesconfig
xtensa  defconfig
powerpc mpc837x_rdb_defconfig
powerpc mpc834x_itx_defconfig
m68k  atari_defconfig
sh   se7712_defconfig
arm  ixp4xx_defconfig
powerpc  ep88xc_defconfig
powerpc redwood_defconfig
arm shannon_defconfig
sh  rsk7203_defconfig
arm   u8500_defconfig
armcerfcube_defconfig
arm  randconfig-c002-20211128
arm  randconfig-c002-20211129
arm  randconfig-c002-20211130
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
nds32 allnoconfig
nios2allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
s390 allmodconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparc   defconfig
i386   debian-10.3-kselftests
i386  debian-10.3
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a001-20211129
i386 randconfig-a002-20211129
i386 randconfig-a006-20211129
i386 randconfig-a005-20211129
i386 randconfig-a004-20211129
i386 randconfig-a003-20211129
x86_64   randconfig-a011-20211128
x86_64   randconfig-a014-20211128
x86_64   randconfig-a012-20211128
x86_64   randconfig-a016-20211128
x86_64   randconfig-a013-20211128
x86_64   randconfig-a015-20211128
i386 randconfig-a015-20211128
i386 randconfig-a016-20211128
i386 randconfig-a013-20211128
i386 randconfig-a012-20211128
i386 randconfig-a014-20211128
i386 randconfig-a011-20211128
arc  randconfig-r043-20211128
s390 randconfig-r044-20211128
riscvrandconfig-r042-20211128
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64rhel-8.3-kselftests
um   x86_64_defconfig
um i386_defconfig
x86_64   allyesconfig
x86_64   rhel-8.3
x86_64  rhel-8.3-func
x86_64  kexec

clang tested configs:
s390 randconfig-c005-20211128
i386 randconfig-c001-20211128
riscvrandconfig-c006-20211128
arm  randconfig-c002-20211128
powerpc  randconfig-c003-20211128
x86_64   randconfig-c007-20211128
mips randconfig-c004-20211128
x86_64   randconfig-a001-20211128
x86_64   randconfig-a006-20211128
x86_64   randconfig-a003-20211128
x86_64   randconfig-a005-20211128
x86_64   randconfig-a004-20211128
x86_64   randconfig-a002-20211128
i386 randconfig-a001-20211128
i386 randconfig-a002-20211128
i386 randconfig-a006-20211128
i386 randconfig-a005-202

[powerpc:next-test] BUILD SUCCESS aebd1fb45c622e9a2b06fb70665d084d3a8d6c78

2021-11-29 Thread kernel test robot
   defconfig
nios2allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
s390 allmodconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386   debian-10.3-kselftests
i386  debian-10.3
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a001-20211129
i386 randconfig-a002-20211129
i386 randconfig-a006-20211129
i386 randconfig-a005-20211129
i386 randconfig-a004-20211129
i386 randconfig-a003-20211129
x86_64   randconfig-a011-20211128
x86_64   randconfig-a014-20211128
x86_64   randconfig-a012-20211128
x86_64   randconfig-a016-20211128
x86_64   randconfig-a013-20211128
x86_64   randconfig-a015-20211128
i386 randconfig-a015-20211128
i386 randconfig-a016-20211128
i386 randconfig-a013-20211128
i386 randconfig-a012-20211128
i386 randconfig-a014-20211128
i386 randconfig-a011-20211128
arc  randconfig-r043-20211128
s390 randconfig-r044-20211128
riscvrandconfig-r042-20211128
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64rhel-8.3-kselftests
um   x86_64_defconfig
um i386_defconfig
x86_64   allyesconfig
x86_64   rhel-8.3
x86_64  rhel-8.3-func
x86_64  kexec

clang tested configs:
s390 randconfig-c005-20211128
i386 randconfig-c001-20211128
riscvrandconfig-c006-20211128
arm  randconfig-c002-20211128
powerpc  randconfig-c003-20211128
x86_64   randconfig-c007-20211128
mips randconfig-c004-20211128
x86_64   randconfig-a001-20211128
x86_64   randconfig-a006-20211128
x86_64   randconfig-a003-20211128
x86_64   randconfig-a005-20211128
x86_64   randconfig-a004-20211128
x86_64   randconfig-a002-20211128
i386 randconfig-a001-20211128
i386 randconfig-a002-20211128
i386 randconfig-a006-20211128
i386 randconfig-a005-20211128
i386 randconfig-a004-20211128
i386 randconfig-a003-20211128
i386 randconfig-a015-20211129
i386 randconfig-a016-20211129
i386 randconfig-a013-20211129
i386 randconfig-a012-20211129
i386 randconfig-a014-20211129
i386 randconfig-a011-20211129
hexagon  randconfig-r045-20211129
hexagon  randconfig-r041-20211129
s390 randconfig-r044-20211129
riscvrandconfig-r042-20211129
hexagon  randconfig-r045-20211128
hexagon  randconfig-r041-20211128

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.

2021-11-29 Thread Tyrel Datwyler
On 11/29/21 5:06 PM, Nathan Lynch wrote:
> Tyrel Datwyler  writes:
>> On 11/29/21 12:58 AM, Mahesh Salgaonkar wrote:
>>> -int rtas_get_sensor_fast(int sensor, int index, int *state)
>>> +static int
>>> +__rtas_get_sensor(int sensor, int index, int *state, bool warn_on)
>>>  {
>>> int token = rtas_token("get-sensor-state");
>>> int rc;
>>> @@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int 
>>> *state)
>>> return -ENOENT;
>>>
>>> rc = rtas_call(token, 2, 2, state, sensor, index);
>>> -   WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
>>> -   rc <= RTAS_EXTENDED_DELAY_MAX));
>>> +   WARN_ON(warn_on &&
>>> +   (rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
>>> +   rc <= RTAS_EXTENDED_DELAY_MAX)));
>>
>> The whole point of rtas_get_sensor_fast() is that on busy we will just let it
>> error out because we don't want to wait. I'm not sure I see the point of the
>> spurious WARN_ONs anytime we hit a BUSY or DELAY return code. Maybe 
>> converting
>> that to a pr_debug() might be better and save expanding the API with a _fast 
>> and
>> _nonblocking variant that do the same thing minus one surpressing a
>> WARN_ON splat.
> 
> There is a subset of sensors that are specified to not ever return busy
> or delay statuses. rtas_get_sensor_fast() is meant to be used with
> those, and it would be an error to use it on a sensor not in that set.
> So the WARN_ON() is appropriate IMO; if it triggers it indicates either
> a misuse of the API or a firmware bug. See commit 1c2cb594441d
> "powerpc/rtas: Introduce rtas_get_sensor_fast() for IRQ handlers"
> 

Fair enough. Seems I misremembered the nature of the original problem and should
have looked back at the commit to completely jog my memory.


Re: [PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.

2021-11-29 Thread Nathan Lynch
Tyrel Datwyler  writes:
> On 11/29/21 12:58 AM, Mahesh Salgaonkar wrote:
>> -int rtas_get_sensor_fast(int sensor, int index, int *state)
>> +static int
>> +__rtas_get_sensor(int sensor, int index, int *state, bool warn_on)
>>  {
>>  int token = rtas_token("get-sensor-state");
>>  int rc;
>> @@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int 
>> *state)
>>  return -ENOENT;
>> 
>>  rc = rtas_call(token, 2, 2, state, sensor, index);
>> -WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
>> -rc <= RTAS_EXTENDED_DELAY_MAX));
>> +WARN_ON(warn_on &&
>> +(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
>> +rc <= RTAS_EXTENDED_DELAY_MAX)));
>
> The whole point of rtas_get_sensor_fast() is that on busy we will just let it
> error out because we don't want to wait. I'm not sure I see the point of the
> spurious WARN_ONs anytime we hit a BUSY or DELAY return code. Maybe converting
> that to a pr_debug() might be better and save expanding the API with a _fast 
> and
> _nonblocking variant that do the same thing minus one surpressing a
> WARN_ON splat.

There is a subset of sensors that are specified to not ever return busy
or delay statuses. rtas_get_sensor_fast() is meant to be used with
those, and it would be an error to use it on a sensor not in that set.
So the WARN_ON() is appropriate IMO; if it triggers it indicates either
a misuse of the API or a firmware bug. See commit 1c2cb594441d
"powerpc/rtas: Introduce rtas_get_sensor_fast() for IRQ handlers"



Re: [PATCH 0/2] of: remove reserved regions count restriction

2021-11-29 Thread Rob Herring
On Sun, Nov 21, 2021 at 08:43:47AM +0200, Mike Rapoport wrote:
> On Fri, Nov 19, 2021 at 03:58:17PM +0800, Calvin Zhang wrote:
> > The count of reserved regions in /reserved-memory was limited because
> > the struct reserved_mem array was defined statically. This series sorts
> > out reserved memory code and allocates that array from early allocator.
> > 
> > Note: reserved region with fixed location must be reserved before any
> > memory allocation. While struct reserved_mem array should be allocated
> > after allocator is activated. We make early_init_fdt_scan_reserved_mem()
> > do reservation only and add another call to initialize reserved memory.
> > So arch code have to change for it.
> 
> I think much simpler would be to use the same constant for sizing
> memblock.reserved and reserved_mem arrays.

Do those arrays get shrunk? Or do we waste the memory forever?

Maybe we can copy and shrink the initial array? Though I suspect struct 
reserved_mem pointers have already been given out.

> 
> If there is too much reserved regions in the device tree, reserving them in
> memblock will fail anyway because memblock also starts with static array
> for memblock.reserved, so doing one pass with memblock_reserve() and
> another to set up reserved_mem wouldn't help anyway.
> 
> > I'm only familiar with arm and arm64 architectures. Approvals from arch
> > maintainers are required. Thank you all.
> > 
> > Calvin Zhang (2):
> >   of: Sort reserved_mem related code
> >   of: reserved_mem: Remove reserved regions count restriction
> > 
> >  arch/arc/mm/init.c |   3 +
> >  arch/arm/kernel/setup.c|   2 +
> >  arch/arm64/kernel/setup.c  |   3 +
> >  arch/csky/kernel/setup.c   |   3 +
> >  arch/h8300/kernel/setup.c  |   2 +
> >  arch/mips/kernel/setup.c   |   3 +
> >  arch/nds32/kernel/setup.c  |   3 +
> >  arch/nios2/kernel/setup.c  |   2 +
> >  arch/openrisc/kernel/setup.c   |   3 +
> >  arch/powerpc/kernel/setup-common.c |   3 +
> >  arch/riscv/kernel/setup.c  |   2 +
> >  arch/sh/kernel/setup.c |   3 +
> >  arch/xtensa/kernel/setup.c |   2 +
> >  drivers/of/fdt.c   | 107 +---
> >  drivers/of/of_private.h|  12 +-
> >  drivers/of/of_reserved_mem.c   | 189 -
> >  include/linux/of_reserved_mem.h|   4 +
> >  17 files changed, 207 insertions(+), 139 deletions(-)
> > 
> > -- 
> > 2.30.2
> > 
> 
> -- 
> Sincerely yours,
> Mike.
> 


Re: [PATCH 1/2] of: Sort reserved_mem related code

2021-11-29 Thread Rob Herring
On Fri, Nov 19, 2021 at 03:58:18PM +0800, Calvin Zhang wrote:
> Move code about parsing /reserved-memory and initializing of
> reserved_mems array to of_reserved_mem.c for better modularity.
> 
> Rename array name from reserved_mem to reserved_mems to distinguish
> from type definition.
> 
> Signed-off-by: Calvin Zhang 
> ---
>  drivers/of/fdt.c| 108 +
>  drivers/of/of_private.h |  12 ++-
>  drivers/of/of_reserved_mem.c| 163 ++--
>  include/linux/of_reserved_mem.h |   4 +
>  4 files changed, 149 insertions(+), 138 deletions(-)
> 
> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
> index bdca35284ceb..445af4e69300 100644
> --- a/drivers/of/fdt.c
> +++ b/drivers/of/fdt.c
> @@ -80,7 +80,7 @@ void __init of_fdt_limit_memory(int limit)
>   }
>  }
>  
> -static bool of_fdt_device_is_available(const void *blob, unsigned long node)
> +bool of_fdt_device_is_available(const void *blob, unsigned long node)
>  {
>   const char *status = fdt_getprop(blob, node, "status", NULL);
>  
> @@ -476,7 +476,7 @@ void *initial_boot_params __ro_after_init;
>  
>  static u32 of_fdt_crc32;
>  
> -static int __init early_init_dt_reserve_memory_arch(phys_addr_t base,
> +int __init early_init_dt_reserve_memory_arch(phys_addr_t base,
>   phys_addr_t size, bool nomap)

I think you can move this function too if you change the nomap==false 
callers to just call memblock_reserve directly.


>  {
>   if (nomap) {
> @@ -492,108 +492,6 @@ static int __init 
> early_init_dt_reserve_memory_arch(phys_addr_t base,
>   return memblock_reserve(base, size);
>  }
>  
> -/*
> - * __reserved_mem_reserve_reg() - reserve all memory described in 'reg' 
> property
> - */
> -static int __init __reserved_mem_reserve_reg(unsigned long node,
> -  const char *uname)
> -{
> - int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32);
> - phys_addr_t base, size;
> - int len;
> - const __be32 *prop;
> - int first = 1;
> - bool nomap;
> -
> - prop = of_get_flat_dt_prop(node, "reg", );
> - if (!prop)
> - return -ENOENT;
> -
> - if (len && len % t_len != 0) {
> - pr_err("Reserved memory: invalid reg property in '%s', skipping 
> node.\n",
> -uname);
> - return -EINVAL;
> - }
> -
> - nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
> -
> - while (len >= t_len) {
> - base = dt_mem_next_cell(dt_root_addr_cells, );
> - size = dt_mem_next_cell(dt_root_size_cells, );
> -
> - if (size &&
> - early_init_dt_reserve_memory_arch(base, size, nomap) == 0)
> - pr_debug("Reserved memory: reserved region for node 
> '%s': base %pa, size %lu MiB\n",
> - uname, , (unsigned long)(size / SZ_1M));
> - else
> - pr_info("Reserved memory: failed to reserve memory for 
> node '%s': base %pa, size %lu MiB\n",
> - uname, , (unsigned long)(size / SZ_1M));
> -
> - len -= t_len;
> - if (first) {
> - fdt_reserved_mem_save_node(node, uname, base, size);
> - first = 0;
> - }
> - }
> - return 0;
> -}
> -
> -/*
> - * __reserved_mem_check_root() - check if #size-cells, #address-cells 
> provided
> - * in /reserved-memory matches the values supported by the current 
> implementation,
> - * also check if ranges property has been provided
> - */
> -static int __init __reserved_mem_check_root(unsigned long node)
> -{
> - const __be32 *prop;
> -
> - prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
> - if (!prop || be32_to_cpup(prop) != dt_root_size_cells)
> - return -EINVAL;
> -
> - prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
> - if (!prop || be32_to_cpup(prop) != dt_root_addr_cells)
> - return -EINVAL;
> -
> - prop = of_get_flat_dt_prop(node, "ranges", NULL);
> - if (!prop)
> - return -EINVAL;
> - return 0;
> -}
> -
> -/*
> - * fdt_scan_reserved_mem() - scan a single FDT node for reserved memory
> - */
> -static int __init fdt_scan_reserved_mem(void)
> -{
> - int node, child;
> - const void *fdt = initial_boot_params;
> -
> - node = fdt_path_offset(fdt, "/reserved-memory");
> - if (node < 0)
> - return -ENODEV;
> -
> - if (__reserved_mem_check_root(node) != 0) {
> - pr_err("Reserved memory: unsupported node format, ignoring\n");
> - return -EINVAL;
> - }
> -
> - fdt_for_each_subnode(child, fdt, node) {
> - const char *uname;
> - int err;
> -
> - if (!of_fdt_device_is_available(fdt, child))
> - continue;
> -
> - uname = fdt_get_name(fdt, child, NULL);
> -
> -   

[PATCH] scsi: ibmvfc: replace snprintf with sysfs_emit

2021-11-29 Thread davidcomponentone
From: Yang Guang 

coccinelle report:
./drivers/scsi/ibmvscsi/ibmvfc.c:3453:8-16:
WARNING: use scnprintf or sprintf
./drivers/scsi/ibmvscsi/ibmvfc.c:3416:8-16:
WARNING: use scnprintf or sprintf
./drivers/scsi/ibmvscsi/ibmvfc.c:3436:8-16:
WARNING: use scnprintf or sprintf
./drivers/scsi/ibmvscsi/ibmvfc.c:3426:8-16:
WARNING: use scnprintf or sprintf
./drivers/scsi/ibmvscsi/ibmvfc.c:3445:8-16:
WARNING: use scnprintf or sprintf
./drivers/scsi/ibmvscsi/ibmvfc.c:3406:8-16:
WARNING: use scnprintf or sprintf

Use sysfs_emit instead of scnprintf or sprintf makes more sense.

Reported-by: Zeal Robot 
Signed-off-by: Yang Guang 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index d0eab5700dc5..69bf55c037a5 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3403,7 +3403,7 @@ static ssize_t ibmvfc_show_host_partition_name(struct 
device *dev,
struct Scsi_Host *shost = class_to_shost(dev);
struct ibmvfc_host *vhost = shost_priv(shost);
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
+   return sysfs_emit(buf, "%s\n",
vhost->login_buf->resp.partition_name);
 }
 
@@ -3413,7 +3413,7 @@ static ssize_t ibmvfc_show_host_device_name(struct device 
*dev,
struct Scsi_Host *shost = class_to_shost(dev);
struct ibmvfc_host *vhost = shost_priv(shost);
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
+   return sysfs_emit(buf, "%s\n",
vhost->login_buf->resp.device_name);
 }
 
@@ -3423,7 +3423,7 @@ static ssize_t ibmvfc_show_host_loc_code(struct device 
*dev,
struct Scsi_Host *shost = class_to_shost(dev);
struct ibmvfc_host *vhost = shost_priv(shost);
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
+   return sysfs_emit(buf, "%s\n",
vhost->login_buf->resp.port_loc_code);
 }
 
@@ -3433,7 +3433,7 @@ static ssize_t ibmvfc_show_host_drc_name(struct device 
*dev,
struct Scsi_Host *shost = class_to_shost(dev);
struct ibmvfc_host *vhost = shost_priv(shost);
 
-   return snprintf(buf, PAGE_SIZE, "%s\n",
+   return sysfs_emit(buf, "%s\n",
vhost->login_buf->resp.drc_name);
 }
 
@@ -3442,7 +3442,7 @@ static ssize_t ibmvfc_show_host_npiv_version(struct 
device *dev,
 {
struct Scsi_Host *shost = class_to_shost(dev);
struct ibmvfc_host *vhost = shost_priv(shost);
-   return snprintf(buf, PAGE_SIZE, "%d\n", 
be32_to_cpu(vhost->login_buf->resp.version));
+   return sysfs_emit(buf, "%d\n", 
be32_to_cpu(vhost->login_buf->resp.version));
 }
 
 static ssize_t ibmvfc_show_host_capabilities(struct device *dev,
@@ -3450,7 +3450,7 @@ static ssize_t ibmvfc_show_host_capabilities(struct 
device *dev,
 {
struct Scsi_Host *shost = class_to_shost(dev);
struct ibmvfc_host *vhost = shost_priv(shost);
-   return snprintf(buf, PAGE_SIZE, "%llx\n", 
be64_to_cpu(vhost->login_buf->resp.capabilities));
+   return sysfs_emit(buf, "%llx\n", 
be64_to_cpu(vhost->login_buf->resp.capabilities));
 }
 
 /**
-- 
2.30.2



RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.

2021-11-29 Thread Eugene Bordenkircher
Agreed,

We are happy pick up the torch on this, but I'd like to try and hear from 
Joakim first before we do.  The patch set is his, so I'd like to give him the 
opportunity.  I think he's the only one that can add a truly proper description 
as well because he mentioned that this includes a "few more fixes" than just 
the one we ran into.  I'd rather hear from him than try to reverse engineer 
what was being addressed.  

Joakim, if you are still watching the thread, would you like to take a stab at 
it?  If I don't hear from you in a couple days, we'll pick up the torch and do 
what we can.

Eugene T. Bordenkircher

-Original Message-
From: Leo Li  
Sent: Monday, November 29, 2021 3:37 PM
To: Eugene Bordenkircher ; Thorsten Leemhuis 
; jo...@infinera.com 
; linuxppc-dev@lists.ozlabs.org; 
linux-...@vger.kernel.org
Cc: gre...@linuxfoundation.org; ba...@kernel.org
Subject: RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to 
unrecoverable loop.

[Caution - External]

> -Original Message-
> From: Eugene Bordenkircher 
> Sent: Monday, November 29, 2021 11:25 AM
> To: Thorsten Leemhuis ; jo...@infinera.com 
> ; linuxppc-dev@lists.ozlabs.org; linux- 
> u...@vger.kernel.org
> Cc: Leo Li ; gre...@linuxfoundation.org; 
> ba...@kernel.org
> Subject: RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list 
> leads to unrecoverable loop.
>
> The final result of our testing is that the patch set posted seems to 
> address all known defects in the Linux kernel.  The mentioned 
> additional problems are entirely caused by the antivirus solution on 
> the windows box.  The antivirus solution blocks the disconnect 
> messages from reaching the RNDIS driver so it has no idea the USB 
> device went away.  There is nothing we can do to address this in the Linux 
> kernel.

Thanks for the confirmation.

>
> I propose we move forward with the patchset.

I think that we should proceed to merge the patchset but it seems to need some 
cleanup for coding style issues and better description before submitted 
formally.

>
> Eugene T. Bordenkircher
>
> -Original Message-
> From: Thorsten Leemhuis 
> Sent: Thursday, November 25, 2021 5:59 AM
> To: Eugene Bordenkircher ; Thorsten 
> Leemhuis ; Joakim Tjernlund 
> ; linuxppc-dev@lists.ozlabs.org; linux- 
> u...@vger.kernel.org
> Cc: leoyang...@nxp.com; gre...@linuxfoundation.org; ba...@kernel.org
> Subject: Re: bug: usb: gadget: FSL_UDC_CORE Corrupted request list 
> leads to unrecoverable loop.
>
> Hi, this is your Linux kernel regression tracker speaking.
>
> Top-posting for once, to make this easy to process for everyone:
>
> Li Yang and Felipe Balbi: how to move on with this? It's quite an old 
> regression, but nevertheless it is one and thus should be fixed. Part 
> of my position is to make that happen and thus remind developers and 
> maintainers about this until the regression is resolved.
>
> Ciao, Thorsten
>
> On 16.11.21 20:11, Eugene Bordenkircher wrote:
> > On 02.11.21 22:15, Joakim Tjernlund wrote:
> >> On Sat, 2021-10-30 at 14:20 +, Joakim Tjernlund wrote:
> >>> On Fri, 2021-10-29 at 17:14 +, Eugene Bordenkircher wrote:
> >>
>  We've discovered a situation where the FSL udc driver
> (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating 
> over the request queue, but the queue has been corrupted at some point 
> so it loops infinitely.  I believe we have narrowed into the offending 
> code, but we are in need of assistance trying to find an appropriate 
> fix for the problem.  The identified code appears to be in all 
> versions of the Linux kernel the driver exists in.
> 
>  The problem appears to be when handling a USB_REQ_GET_STATUS
> request.  The driver gets this request and then calls the 
> ch9getstatus() function.  In this function, it starts a request by 
> "borrowing" the per device status_req, filling it in, and then queuing 
> it with a call to list_add_tail() to add the request to the endpoint 
> queue.  Right before it exits the function however, it's calling 
> ep0_prime_status(), which is filling out that same status_req 
> structure and then queuing it with another call to list_add_tail() to 
> add the request to the endpoint queue.  This adds two instances of the 
> exact same LIST_HEAD to the endpoint queue, which breaks the list 
> since the prev and next pointers end up pointing to the wrong things.  
> This ends up causing a hard loop the next time nuke() gets called, which 
> happens on the next setup IRQ.
> 
>  I'm not sure what the appropriate fix to this problem is, mostly 
>  due to
> my lack of expertise in USB and this driver stack.  The code has been 
> this way in the kernel for a very long time, which suggests that it 
> has been working, unless USB_REQ_GET_STATUS requests are never made.  
> This further suggests that there is something else going on that I don't 
> understand.
> Deleting the call to ep0_prime_status() and the following ep0stall() 
> 

RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.

2021-11-29 Thread Leo Li



> -Original Message-
> From: Eugene Bordenkircher 
> Sent: Monday, November 29, 2021 11:25 AM
> To: Thorsten Leemhuis ; jo...@infinera.com
> ; linuxppc-dev@lists.ozlabs.org; linux-
> u...@vger.kernel.org
> Cc: Leo Li ; gre...@linuxfoundation.org;
> ba...@kernel.org
> Subject: RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to
> unrecoverable loop.
> 
> The final result of our testing is that the patch set posted seems to address 
> all
> known defects in the Linux kernel.  The mentioned additional problems are
> entirely caused by the antivirus solution on the windows box.  The antivirus
> solution blocks the disconnect messages from reaching the RNDIS driver so it
> has no idea the USB device went away.  There is nothing we can do to
> address this in the Linux kernel.

Thanks for the confirmation.

> 
> I propose we move forward with the patchset.

I think that we should proceed to merge the patchset but it seems to need some 
cleanup for coding style issues and better description before submitted 
formally.

> 
> Eugene T. Bordenkircher
> 
> -Original Message-
> From: Thorsten Leemhuis 
> Sent: Thursday, November 25, 2021 5:59 AM
> To: Eugene Bordenkircher ; Thorsten
> Leemhuis ; Joakim Tjernlund
> ; linuxppc-dev@lists.ozlabs.org; linux-
> u...@vger.kernel.org
> Cc: leoyang...@nxp.com; gre...@linuxfoundation.org; ba...@kernel.org
> Subject: Re: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to
> unrecoverable loop.
> 
> Hi, this is your Linux kernel regression tracker speaking.
> 
> Top-posting for once, to make this easy to process for everyone:
> 
> Li Yang and Felipe Balbi: how to move on with this? It's quite an old
> regression, but nevertheless it is one and thus should be fixed. Part of my
> position is to make that happen and thus remind developers and maintainers
> about this until the regression is resolved.
> 
> Ciao, Thorsten
> 
> On 16.11.21 20:11, Eugene Bordenkircher wrote:
> > On 02.11.21 22:15, Joakim Tjernlund wrote:
> >> On Sat, 2021-10-30 at 14:20 +, Joakim Tjernlund wrote:
> >>> On Fri, 2021-10-29 at 17:14 +, Eugene Bordenkircher wrote:
> >>
>  We've discovered a situation where the FSL udc driver
> (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating over the
> request queue, but the queue has been corrupted at some point so it loops
> infinitely.  I believe we have narrowed into the offending code, but we are in
> need of assistance trying to find an appropriate fix for the problem.  The
> identified code appears to be in all versions of the Linux kernel the driver
> exists in.
> 
>  The problem appears to be when handling a USB_REQ_GET_STATUS
> request.  The driver gets this request and then calls the ch9getstatus()
> function.  In this function, it starts a request by "borrowing" the per device
> status_req, filling it in, and then queuing it with a call to list_add_tail() 
> to add
> the request to the endpoint queue.  Right before it exits the function
> however, it's calling ep0_prime_status(), which is filling out that same
> status_req structure and then queuing it with another call to list_add_tail() 
> to
> add the request to the endpoint queue.  This adds two instances of the exact
> same LIST_HEAD to the endpoint queue, which breaks the list since the prev
> and next pointers end up pointing to the wrong things.  This ends up causing
> a hard loop the next time nuke() gets called, which happens on the next
> setup IRQ.
> 
>  I'm not sure what the appropriate fix to this problem is, mostly due to
> my lack of expertise in USB and this driver stack.  The code has been this way
> in the kernel for a very long time, which suggests that it has been working,
> unless USB_REQ_GET_STATUS requests are never made.  This further
> suggests that there is something else going on that I don't understand.
> Deleting the call to ep0_prime_status() and the following ep0stall() call
> appears, on the surface, to get the device working again, but may have side
> effects that I'm not seeing.
> 
>  I'm hopeful someone in the community can help provide some
> information on what I may be missing or help come up with a solution to the
> problem.  A big thank you to anyone who would like to help out.
> >>>
> >>> Run into this to a while ago. Found the bug and a few more fixes.
> >>> This is against 4.19 so you may have to tweak them a bit.
> >>> Feel free to upstream them.
> >>
> >> Curious, did my patches help? Good to known once we upgrade as well.
> >
> > There's good news and bad news.
> >
> > The good news is that this appears to stop the driver from entering an
> > infinite loop, which prevents the Linux system from locking up and
> > never recovering.  So I'm willing to say we've made the behavior
> > better.
> >
> > The bad news is that once we get past this point, there is new bad
> > behavior.  What is on top of this driver in our system is the RNDIS
> > gadget driver communicating to 

Re: [PATCH] powerpc/pseries/vas: Don't print an error when VAS is unavailable

2021-11-29 Thread Michael Ellerman
Nicholas Piggin  writes:
> Excerpts from Cédric Le Goater's message of November 26, 2021 5:13 pm:
>> On 11/26/21 06:21, Nicholas Piggin wrote:
>>> KVM does not support VAS so guests always print a useless error on boot
>>> 
>>>  vas: HCALL(398) error -2, query_type 0, result buffer 0x57f2000
>>> 
>>> Change this to only print the message if the error is not H_FUNCTION.
>> 
>> 
>> Just being curious, why is it even called since "ibm,compression" should
>> not be exposed in the DT ?
>
> It looks like vas does not test for it. I guess in theory there can be 
> other functions than compression implemented as an accelerator. Maybe
> that's why?

Yeah I guess, or it's just not structured that well. The vas platform
code is a bit awkward, it's there to support drivers, but it's not
actually driver code.

I think we can probably rework it so the vas code does nothing until a
driver calls in to it.

eg. something like below.

cheers


diff --git a/arch/powerpc/platforms/pseries/vas.c 
b/arch/powerpc/platforms/pseries/vas.c
index b043e3936d21..dc3491fc919d 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -454,6 +454,8 @@ static const struct vas_user_win_ops vops_pseries = {
.close_win  = vas_deallocate_window, /* Close window */
 };
 
+static int pseries_vas_init(void);
+
 /*
  * Supporting only nx-gzip coprocessor type now, but this API code
  * extended to other coprocessor types later.
@@ -463,7 +465,8 @@ int vas_register_api_pseries(struct module *mod, enum 
vas_cop_type cop_type,
 {
int rc;
 
-   if (!copypaste_feat)
+   rc = pseries_vas_init();
+   if (rc || !copypaste_feat)
return -ENOTSUPP;
 
rc = vas_register_coproc_api(mod, cop_type, name, _pseries);
@@ -531,7 +534,7 @@ static int get_vas_capabilities(u8 feat, enum 
vas_cop_feat_type type,
return 0;
 }
 
-static int __init pseries_vas_init(void)
+static int pseries_vas_init(void)
 {
struct hv_vas_cop_feat_caps *hv_cop_caps;
struct hv_vas_all_caps *hv_caps;
@@ -592,4 +595,3 @@ static int __init pseries_vas_init(void)
kfree(hv_caps);
return rc;
 }
-machine_device_initcall(pseries, pseries_vas_init);


Re: [PATCH v5 5/5] powerpc/inst: Optimise copy_inst_from_kernel_nofault()

2021-11-29 Thread kernel test robot
Hi Christophe,

I love your patch! Perhaps something to improve:

[auto build test WARNING on powerpc/next]
[also build test WARNING on v5.16-rc3 next-20211129]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211130-015346
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-r023-20211129 
(https://download.01.org/0day-ci/archive/20211130/202111300652.0ydbnvyj-...@intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 
df08b2fe8b35cb63dfb3b49738a3494b9b4e6f8e)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install powerpc cross compiling tool for clang build
# apt-get install binutils-powerpc-linux-gnu
# 
https://github.com/0day-ci/linux/commit/fb7bff30cc0efc7e4df1b48bb69de1f325eee826
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211130-015346
git checkout fb7bff30cc0efc7e4df1b48bb69de1f325eee826
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
O=build_dir ARCH=powerpc prepare

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

   In file included from arch/powerpc/kernel/asm-offsets.c:71:
   In file included from arch/powerpc/kernel/../xmon/xmon_bpts.h:7:
>> arch/powerpc/include/asm/inst.h:165:20: warning: variable 'val' is 
>> uninitialized when used here [-Wuninitialized]
   *inst = ppc_inst(val);
^~~
   arch/powerpc/include/asm/inst.h:53:22: note: expanded from macro 'ppc_inst'
   #define ppc_inst(x) (x)
^
   arch/powerpc/include/asm/inst.h:155:18: note: initialize the variable 'val' 
to silence this warning
   unsigned int val, suffix;
   ^
= 0
   1 warning generated.
   :1559:2: warning: syscall futex_waitv not implemented [-W#warnings]
   #warning syscall futex_waitv not implemented
^
   1 warning generated.
   arch/powerpc/kernel/vdso32/gettimeofday.S:72:8: error: unsupported directive 
'.stabs'
   .stabs "_restgpr_31_x:F-1",36,0,0,_restgpr_31_x; .globl _restgpr_31_x; 
_restgpr_31_x:
  ^
   arch/powerpc/kernel/vdso32/gettimeofday.S:73:8: error: unsupported directive 
'.stabs'
   .stabs "_rest32gpr_31_x:F-1",36,0,0,_rest32gpr_31_x; .globl _rest32gpr_31_x; 
_rest32gpr_31_x:
  ^
   make[2]: *** [arch/powerpc/kernel/vdso32/Makefile:55: 
arch/powerpc/kernel/vdso32/gettimeofday.o] Error 1
   make[2]: Target 'include/generated/vdso32-offsets.h' not remade because of 
errors.
   make[1]: *** [arch/powerpc/Makefile:421: vdso_prepare] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:219: __sub-make] Error 2
   make: Target 'prepare' not remade because of errors.


vim +/val +165 arch/powerpc/include/asm/inst.h

   152  
   153  static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 
*src)
   154  {
   155  unsigned int val, suffix;
   156  
   157  if (unlikely(!is_kernel_addr((unsigned long)src)))
   158  return -ERANGE;
   159  
   160  __get_kernel_nofault(, src, u32, Efault);
   161  if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
   162  __get_kernel_nofault(, src + 1, u32, Efault);
   163  *inst = ppc_inst_prefix(val, suffix);
   164  } else {
 > 165  *inst = ppc_inst(val);
   166  }
   167  return 0;
   168  Efault:
   169  return -EFAULT;
   170  }
   171  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton

2021-11-29 Thread Dennis Zhou
Hello,

On Sun, Nov 21, 2021 at 05:35:53PM +0800, Kefeng Wang wrote:
> When support page mapping percpu first chunk allocator on arm64, we
> found there are lots of duplicated codes in percpu embed/page first
> chunk allocator. This patchset is aimed to cleanup them and should
> no funciton change, only test on arm64.
> 
> Kefeng Wang (4):
>   mm: percpu: Generalize percpu related config
>   mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
>   mm: percpu: Add generic pcpu_fc_alloc/free funciton
>   mm: percpu: Add generic pcpu_populate_pte() function
> 
>  arch/arm64/Kconfig |  20 +
>  arch/ia64/Kconfig  |   9 +--
>  arch/mips/Kconfig  |  10 +--
>  arch/mips/mm/init.c|  14 +---
>  arch/powerpc/Kconfig   |  17 +---
>  arch/powerpc/kernel/setup_64.c |  92 +
>  arch/riscv/Kconfig |  10 +--
>  arch/sparc/Kconfig |  12 +--
>  arch/sparc/kernel/smp_64.c | 105 +---
>  arch/x86/Kconfig   |  17 +---
>  arch/x86/kernel/setup_percpu.c |  66 ++-
>  drivers/base/arch_numa.c   |  68 +---
>  include/linux/percpu.h |  13 +--
>  mm/Kconfig |  12 +++
>  mm/percpu.c| 143 +
>  15 files changed, 165 insertions(+), 443 deletions(-)
> 
> -- 
> 2.26.2
> 

I've made a few comments. I think this will be a little bit of a
challenge to get through due to it touching so many architectures. For
ease, it probably makes sense to run it through mny tree, but we'll need
explicit acks as I mentioned.

I like getting rid of the pcpu_alloc_bootmem()/pcpu_free_bootmem()
functions. However, let's keep the implementation identical to x86.


I don't think we should get rid of the populate_pte_fn(). I'm not
comfortable changing x86's implementation. Simply offer a NULL, and if
NULL use the default.

Do you have a tree that intel pulls? I suggest cleaning up the patches
and pushing to a remote branch that they pick up. That would have caught
the mips typo. Send a PR creating a file in [1] for your branch, github
is fine. Basic validation needs to be done before I can pick this up
too on more than arm64.

[1] https://github.com/intel/lkp-tests/tree/master/repo/linux

Thanks,
Dennis


Re: [PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.

2021-11-29 Thread Tyrel Datwyler
On 11/29/21 12:58 AM, Mahesh Salgaonkar wrote:
> When certain PHB HW failure causes phyp to recover PHB, it marks the PE
> state as temporarily unavailable until recovery is complete. This also
> triggers an EEH handler in Linux which needs to notify drivers, and perform
> recovery. But before notifying the driver about the pci error it uses
> get_adapter_state()->get-sesnor-state() operation of the hotplug_slot to
> determine if the slot contains a device or not. if the slot is empty, the
> recovery is skipped entirely.
> 
> However on certain PHB failures, the rtas call get-sesnor-state() returns
> extended busy error (9902) until PHB is recovered by phyp. Once PHB is
> recovered, the get-sensor-state() returns success with correct presence
> status. The rtas call interface rtas_get_sensor() loops over the rtas call
> on extended delay return code (9902) until the return value is either
> success (0) or error (-1). This causes the EEH handler to get stuck for ~6
> seconds before it could notify that the pci error has been detected and
> stop any active operations. Hence with running I/O traffic, during this 6
> seconds, the network driver continues its operation and hits a timeout
> (netdev watchdog). On timeouts, network driver go into ffdc capture mode
> and reset path assuming the PCI device is in fatal condition. This
> sometimes causes EEH recovery to fail. This impacts the ssh connection and
> leads to the system being inaccessible.
> 
> 
> [52732.244731] DEBUG: ibm_read_slot_reset_state2()
> [52732.244762] DEBUG: ret = 0, rets[0]=5, rets[1]=1, rets[2]=4000, rets[3]=>
> [52732.244798] DEBUG: in eeh_slot_presence_check
> [52732.244804] DEBUG: error state check
> [52732.244807] DEBUG: Is slot hotpluggable
> [52732.244810] DEBUG: hotpluggable ops ?
> [52732.244953] DEBUG: Calling ops->get_adapter_status
> [52732.244958] DEBUG: calling rpaphp_get_sensor_state
> [52736.564262] [ cut here ]
> [52736.564299] NETDEV WATCHDOG: enP64p1s0f3 (tg3): transmit queue 0 timed o>
> [52736.564324] WARNING: CPU: 1442 PID: 0 at net/sched/sch_generic.c:478 dev>
> [...]
> [52736.564505] NIP [c0c32368] dev_watchdog+0x438/0x440
> [52736.564513] LR [c0c32364] dev_watchdog+0x434/0x440
> 
> 
> Fix this issue by introducing a new rtas_get_sensor_nonblocking() that does
> not get blocked on BUSY condition and returns immediately with error. Use
> this function in pseries pci hotplug driver which can return an error if
> slot presence state can not be detected immediately. Please note that only
> in certain PHB failures, the slot presence check returns BUSY condition. In
> normal cases it returns immediately with a correct presence state value.
> Hence this change has no impact on normal pci dlpar operations.
> 
> We could use rtas_get_sensor_fast() variant, but it thorws WARN_ON on BUSY
> condition. The rtas_get_sensor_nonblocking() suppresses WARN_ON.
> 
> Signed-off-by: Mahesh Salgaonkar 
> ---
> 
> This is an alternate approach to fix the EEH issue instead of delaying slot
> presence check proposed at
> https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-November/236956.html
> 
> Also refer:
> https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-November/237027.html
> ---
>  arch/powerpc/include/asm/rtas.h  |1 +
>  arch/powerpc/kernel/rtas.c   |   19 ---
>  drivers/pci/hotplug/rpaphp_pci.c |8 
>  3 files changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> index 9dc97d2f9d27e..d8e8befb1c193 100644
> --- a/arch/powerpc/include/asm/rtas.h
> +++ b/arch/powerpc/include/asm/rtas.h
> @@ -250,6 +250,7 @@ extern void rtas_os_term(char *str);
>  void rtas_activate_firmware(void);
>  extern int rtas_get_sensor(int sensor, int index, int *state);
>  extern int rtas_get_sensor_fast(int sensor, int index, int *state);
> +int rtas_get_sensor_nonblocking(int sensor, int index, int *state);
>  extern int rtas_get_power_level(int powerdomain, int *level);
>  extern int rtas_set_power_level(int powerdomain, int level, int *setlevel);
>  extern bool rtas_indicator_present(int token, int *maxindex);
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index ac61e226c9af6..fd5aa3bbd46c5 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -609,7 +609,8 @@ int rtas_get_sensor(int sensor, int index, int *state)
>  }
>  EXPORT_SYMBOL(rtas_get_sensor);
> 
> -int rtas_get_sensor_fast(int sensor, int index, int *state)
> +static int
> +__rtas_get_sensor(int sensor, int index, int *state, bool warn_on)
>  {
>   int token = rtas_token("get-sensor-state");
>   int rc;
> @@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int 
> *state)
>   return -ENOENT;
> 
>   rc = rtas_call(token, 2, 2, state, sensor, index);
> - WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
> - 

Re: [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function

2021-11-29 Thread Dennis Zhou
On Sun, Nov 21, 2021 at 05:35:57PM +0800, Kefeng Wang wrote:
> When NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to
> populate pte, add a generic pcpu populate pte function and switch
> to use it.
> 
> Signed-off-by: Kefeng Wang 
> ---
>  arch/powerpc/kernel/setup_64.c | 47 +
>  arch/sparc/kernel/smp_64.c | 57 +
>  arch/x86/kernel/setup_percpu.c |  5 +--
>  drivers/base/arch_numa.c   | 51 +-
>  include/linux/percpu.h |  5 +--
>  mm/percpu.c| 77 +++---
>  6 files changed, 79 insertions(+), 163 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 364b1567f822..1a17828af77f 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -788,51 +788,6 @@ static int pcpu_cpu_distance(unsigned int from, unsigned 
> int to)
>  unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
>  EXPORT_SYMBOL(__per_cpu_offset);
>  
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> - pgd_t *pgd = pgd_offset_k(addr);
> - p4d_t *p4d;
> - pud_t *pud;
> - pmd_t *pmd;
> -
> - p4d = p4d_offset(pgd, addr);
> - if (p4d_none(*p4d)) {
> - pud_t *new;
> -
> - new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
> - if (!new)
> - goto err_alloc;
> - p4d_populate(_mm, p4d, new);
> - }
> -
> - pud = pud_offset(p4d, addr);
> - if (pud_none(*pud)) {
> - pmd_t *new;
> -
> - new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pud_populate(_mm, pud, new);
> - }
> -
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd)) {
> - pte_t *new;
> -
> - new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pmd_populate_kernel(_mm, pmd, new);
> - }
> -
> - return;
> -
> -err_alloc:
> - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> -   __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -
> -
>  void __init setup_per_cpu_areas(void)
>  {
>   const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
> @@ -861,7 +816,7 @@ void __init setup_per_cpu_areas(void)
>   }
>  
>   if (rc < 0)
> - rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, 
> pcpu_populate_pte);
> + rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
>   if (rc < 0)
>   panic("cannot initialize percpu area (err=%d)", rc);
>  
> diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
> index 198dadddb75d..00dffe2d834b 100644
> --- a/arch/sparc/kernel/smp_64.c
> +++ b/arch/sparc/kernel/smp_64.c
> @@ -1534,59 +1534,6 @@ static int __init pcpu_cpu_distance(unsigned int from, 
> unsigned int to)
>   return REMOTE_DISTANCE;
>  }
>  
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> - pgd_t *pgd = pgd_offset_k(addr);
> - p4d_t *p4d;
> - pud_t *pud;
> - pmd_t *pmd;
> -
> - if (pgd_none(*pgd)) {
> - pud_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pgd_populate(_mm, pgd, new);
> - }
> -
> - p4d = p4d_offset(pgd, addr);
> - if (p4d_none(*p4d)) {
> - pud_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - p4d_populate(_mm, p4d, new);
> - }
> -
> - pud = pud_offset(p4d, addr);
> - if (pud_none(*pud)) {
> - pmd_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pud_populate(_mm, pud, new);
> - }
> -
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd)) {
> - pte_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pmd_populate_kernel(_mm, pmd, new);
> - }
> -
> - return;
> -
> -err_alloc:
> - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> -   __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -
>  void __init setup_per_cpu_areas(void)
>  {
>   unsigned long delta;
> @@ -1604,9 +1551,7 @@ void __init setup_per_cpu_areas(void)
>   pcpu_fc_names[pcpu_chosen_fc], rc);
>   }
>   if (rc < 0)
> - rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> -cpu_to_node,
> -

Re: [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton

2021-11-29 Thread Dennis Zhou
On Sun, Nov 21, 2021 at 05:35:56PM +0800, Kefeng Wang wrote:
> With previous patch, we could add a generic pcpu first chunk
> allocation and free function to cleanup the duplicated definations
> on each architecture.
> 
> Signed-off-by: Kefeng Wang 
> ---
>  arch/mips/mm/init.c| 16 +
>  arch/powerpc/kernel/setup_64.c | 51 ++
>  arch/sparc/kernel/smp_64.c | 50 +-
>  arch/x86/kernel/setup_percpu.c | 59 +-
>  drivers/base/arch_numa.c   | 19 +-
>  include/linux/percpu.h |  9 +
>  mm/percpu.c| 66 ++
>  7 files changed, 42 insertions(+), 228 deletions(-)
> 
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index ebbf6923532c..5a8002839550 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -524,19 +524,6 @@ static int __init pcpu_cpu_to_node(int cpu)
>   return cpu_to_node(cpu);
>  }
>  
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t 
> align,
> -pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> - return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
> -   MEMBLOCK_ALLOC_ACCESSIBLE,
> -   cpu_to_nd_fun(cpu));
> -}
> -
> -static void __init pcpu_fc_free(void *ptr, size_t size)
> -{
> - memblock_free(ptr, size);
> -}
> -
>  void __init setup_per_cpu_areas(void)
>  {
>   unsigned long delta;
> @@ -550,8 +537,7 @@ void __init setup_per_cpu_areas(void)
>   rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
>   PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
>   pcpu_cpu_distance,
> - pcpu_cpu_to_node,
> - pcpu_fc_alloc, pcpu_fc_free);
> + pcpu_cpu_to_node);
>   if (rc < 0)
>   panic("Failed to initialize percpu areas.");
>  
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 9a5609c821df..364b1567f822 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -777,50 +777,6 @@ static __init int pcpu_cpu_to_node(int cpu)
>   return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
>  }
>  
> -/**
> - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
> - * @cpu: cpu to allocate for
> - * @size: size allocation in bytes
> - * @align: alignment
> - *
> - * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
> - * does the right thing for NUMA regardless of the current
> - * configuration.
> - *
> - * RETURNS:
> - * Pointer to the allocated area on success, NULL on failure.
> - */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, 
> size_t align,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> - const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> -#ifdef CONFIG_NUMA
> - int node = cpu_to_nd_fun(cpu);
> - void *ptr;
> -
> - if (!node_online(node) || !NODE_DATA(node)) {
> - ptr = memblock_alloc_from(size, align, goal);
> - pr_info("cpu %d has no node %d or node-local memory\n",
> - cpu, node);
> - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
> -  cpu, size, __pa(ptr));
> - } else {
> - ptr = memblock_alloc_try_nid(size, align, goal,
> -  MEMBLOCK_ALLOC_ACCESSIBLE, node);
> - pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
> -  "%016lx\n", cpu, size, node, __pa(ptr));
> - }
> - return ptr;
> -#else
> - return memblock_alloc_from(size, align, goal);
> -#endif
> -}
> -
> -static void __init pcpu_free_bootmem(void *ptr, size_t size)
> -{
> - memblock_free(ptr, size);
> -}
> -
>  static int pcpu_cpu_distance(unsigned int from, unsigned int to)
>  {
>   if (early_cpu_to_node(from) == early_cpu_to_node(to))
> @@ -897,8 +853,7 @@ void __init setup_per_cpu_areas(void)
>  
>   if (pcpu_chosen_fc != PCPU_FC_PAGE) {
>   rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, 
> pcpu_cpu_distance,
> - pcpu_cpu_to_node,
> - pcpu_alloc_bootmem, 
> pcpu_free_bootmem);
> + pcpu_cpu_to_node);
>   if (rc)
>   pr_warn("PERCPU: %s allocator failed (%d), "
>   "falling back to page size\n",
> @@ -906,9 +861,7 @@ void __init setup_per_cpu_areas(void)
>   }
>  
>   if (rc < 0)
> - rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, 
> pcpu_free_bootmem,
> -pcpu_cpu_to_node,
> - 

Re: [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef

2021-11-29 Thread Dennis Zhou
On Sun, Nov 21, 2021 at 05:35:55PM +0800, Kefeng Wang wrote:
> Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t,
> pcpu first chunk allocation will call it to alloc memblock on the
> corresponding node by it.
> 
> Signed-off-by: Kefeng Wang 
> ---
>  arch/mips/mm/init.c| 12 +---
>  arch/powerpc/kernel/setup_64.c | 14 +++---
>  arch/sparc/kernel/smp_64.c |  8 +---
>  arch/x86/kernel/setup_percpu.c | 18 +-
>  drivers/base/arch_numa.c   |  8 +---
>  include/linux/percpu.h |  7 +--
>  mm/percpu.c| 14 +-
>  7 files changed, 57 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index 325e1552cbea..ebbf6923532c 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -519,12 +519,17 @@ static int __init pcpu_cpu_distance(unsigned int from, 
> unsigned int to)
>   return node_distance(cpu_to_node(from), cpu_to_node(to));
>  }
>  
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
> -size_t align)
> +static int __init pcpu_cpu_to_node(int cpu)
> +{
> + return cpu_to_node(cpu);
> +}
> +
> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t 
> align,
> +pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>   return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
> MEMBLOCK_ALLOC_ACCESSIBLE,
> -   cpu_to_node(cpu));
> +   cpu_to_nd_fun(cpu));
>  }
>  
>  static void __init pcpu_fc_free(void *ptr, size_t size)
> @@ -545,6 +550,7 @@ void __init setup_per_cpu_areas(void)
>   rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
>   PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
>   pcpu_cpu_distance,
> + pcpu_cpu_to_node,
>   pcpu_fc_alloc, pcpu_fc_free);
>   if (rc < 0)
>   panic("Failed to initialize percpu areas.");
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 6052f5d5ded3..9a5609c821df 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -771,6 +771,12 @@ void __init emergency_stack_init(void)
>  }
>  
>  #ifdef CONFIG_SMP
> +
> +static __init int pcpu_cpu_to_node(int cpu)
> +{
> + return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
> +}
> +
>  /**
>   * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
>   * @cpu: cpu to allocate for
> @@ -784,12 +790,12 @@ void __init emergency_stack_init(void)
>   * RETURNS:
>   * Pointer to the allocated area on success, NULL on failure.
>   */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
> - size_t align)
> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, 
> size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>   const unsigned long goal = __pa(MAX_DMA_ADDRESS);
>  #ifdef CONFIG_NUMA
> - int node = early_cpu_to_node(cpu);
> + int node = cpu_to_nd_fun(cpu);

^ typo - cpu_to_nd_fn().

>   void *ptr;
>  
>   if (!node_online(node) || !NODE_DATA(node)) {
> @@ -891,6 +897,7 @@ void __init setup_per_cpu_areas(void)
>  
>   if (pcpu_chosen_fc != PCPU_FC_PAGE) {
>   rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, 
> pcpu_cpu_distance,
> + pcpu_cpu_to_node,
>   pcpu_alloc_bootmem, 
> pcpu_free_bootmem);
>   if (rc)
>   pr_warn("PERCPU: %s allocator failed (%d), "
> @@ -900,6 +907,7 @@ void __init setup_per_cpu_areas(void)
>  
>   if (rc < 0)
>   rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, 
> pcpu_free_bootmem,
> +pcpu_cpu_to_node,
>  pcpu_populate_pte);
>   if (rc < 0)
>   panic("cannot initialize percpu area (err=%d)", rc);
> diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
> index b98a7bbe6728..026aa3ccbc30 100644
> --- a/arch/sparc/kernel/smp_64.c
> +++ b/arch/sparc/kernel/smp_64.c
> @@ -1539,12 +1539,12 @@ void smp_send_stop(void)
>   * RETURNS:
>   * Pointer to the allocated area on success, NULL on failure.
>   */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
> - size_t align)
> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, 
> size_t align,
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>   const unsigned long goal = __pa(MAX_DMA_ADDRESS);
>  #ifdef CONFIG_NUMA
> - int node = cpu_to_node(cpu);
> 

Re: [PATCH RFC 1/4] mm: percpu: Generalize percpu related config

2021-11-29 Thread Dennis Zhou
Hello,

On Sun, Nov 21, 2021 at 05:35:54PM +0800, Kefeng Wang wrote:
> The HAVE_SETUP_PER_CPU_AREA/NEED_PER_CPU_EMBED_FIRST_CHUNK/
> NEED_PER_CPU_PAGE_FIRST_CHUNK/USE_PERCPU_NUMA_NODE_ID configs,
> which has duplicate definitions on platforms that subscribe it.
> 
> Move them into mm, drop these redundant definitions and instead
> just select it on applicable platforms.
> 
> Signed-off-by: Kefeng Wang 
> ---
>  arch/arm64/Kconfig   | 20 
>  arch/ia64/Kconfig|  9 ++---
>  arch/mips/Kconfig| 10 ++
>  arch/powerpc/Kconfig | 17 -
>  arch/riscv/Kconfig   | 10 ++
>  arch/sparc/Kconfig   | 12 +++-
>  arch/x86/Kconfig | 17 -
>  mm/Kconfig   | 12 
>  8 files changed, 33 insertions(+), 74 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index c4207cf9bb17..4ff73299f8a9 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1135,6 +1135,10 @@ config NUMA
>   select GENERIC_ARCH_NUMA
>   select ACPI_NUMA if ACPI
>   select OF_NUMA
> + select HAVE_SETUP_PER_CPU_AREA
> + select NEED_PER_CPU_EMBED_FIRST_CHUNK
> + select NEED_PER_CPU_PAGE_FIRST_CHUNK
> + select USE_PERCPU_NUMA_NODE_ID
>   help
> Enable NUMA (Non-Uniform Memory Access) support.
>  
> @@ -1151,22 +1155,6 @@ config NODES_SHIFT
> Specify the maximum number of NUMA Nodes available on the target
> system.  Increases memory reserved to accommodate various tables.
>  
> -config USE_PERCPU_NUMA_NODE_ID
> - def_bool y
> - depends on NUMA
> -
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool y
> - depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y
> - depends on NUMA
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> - def_bool y
> - depends on NUMA
> -
>  source "kernel/Kconfig.hz"
>  
>  config ARCH_SPARSEMEM_ENABLE
> diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
> index 1e33666fa679..703952819e10 100644
> --- a/arch/ia64/Kconfig
> +++ b/arch/ia64/Kconfig
> @@ -32,6 +32,7 @@ config IA64
>   select HAVE_FTRACE_MCOUNT_RECORD
>   select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
>   select HAVE_FUNCTION_TRACER
> + select HAVE_SETUP_PER_CPU_AREA
>   select TTY
>   select HAVE_ARCH_TRACEHOOK
>   select HAVE_VIRT_CPU_ACCOUNTING
> @@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY
>   bool
>   default y
>  
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool y
> -
>  config DMI
>   bool
>   default y
> @@ -292,6 +290,7 @@ config NUMA
>   bool "NUMA support"
>   depends on !FLATMEM
>   select SMP
> + select USE_PERCPU_NUMA_NODE_ID
>   help
> Say Y to compile the kernel to support NUMA (Non-Uniform Memory
> Access).  This option is for configuring high-end multiprocessor
> @@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION
>   def_bool y
>   depends on NUMA
>  
> -config USE_PERCPU_NUMA_NODE_ID
> - def_bool y
> - depends on NUMA
> -
>  config HAVE_MEMORYLESS_NODES
>   def_bool NUMA
>  
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index de60ad190057..c106a2080877 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -2666,6 +2666,8 @@ config NUMA
>   bool "NUMA Support"
>   depends on SYS_SUPPORTS_NUMA
>   select SMP
> + select HAVE_SETUP_PER_CPU_AREA
> + select NEED_PER_CPU_EMBED_FIRST_CHUNK
>   help
> Say Y to compile the kernel to support NUMA (Non-Uniform Memory
> Access).  This option improves performance on systems with more
> @@ -2676,14 +2678,6 @@ config NUMA
>  config SYS_SUPPORTS_NUMA
>   bool
>  
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool y
> - depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y
> - depends on NUMA
> -
>  config RELOCATABLE
>   bool "Relocatable kernel"
>   depends on SYS_SUPPORTS_RELOCATABLE
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index dea74d7717c0..8badd39854a0 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
>   default 9 if PPC_16K_PAGES  #  9 = 23 (8MB) - 14 (16K)
>   default 11  # 11 = 23 (8MB) - 12 (4K)
>  
> -config HAVE_SETUP_PER_CPU_AREA
> - def_bool PPC64
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> - def_bool y if PPC64
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> - def_bool y if PPC64
> -
>  config NR_IRQS
>   int "Number of virtual interrupt numbers"
>   range 32 1048576
> @@ -240,6 +231,7 @@ config PPC
>   select HAVE_REGS_AND_STACK_ACCESS_API
>   select HAVE_RELIABLE_STACKTRACE
>   select HAVE_RSEQ
> + select HAVE_SETUP_PER_CPU_AREA  if PPC64
>   select HAVE_SOFTIRQ_ON_OWN_STACK
>   select HAVE_STACKPROTECTOR  if PPC32 && 
> $(cc-option,-mstack-protector-guard=tls 

Re: [RFC PATCH 0/3] Use pageblock_order for cma and alloc_contig_range alignment.

2021-11-29 Thread Zi Yan
On 23 Nov 2021, at 12:32, Vlastimil Babka wrote:

> On 11/23/21 17:35, Zi Yan wrote:
>> On 19 Nov 2021, at 10:15, Zi Yan wrote:
> From what my understanding, cma required alignment of
> max(MAX_ORDER - 1, pageblock_order), because when MIGRATE_CMA was 
> introduced,
> __free_one_page() does not prevent merging two different pageblocks, when
> MAX_ORDER - 1 > pageblock_order. But current __free_one_page() 
> implementation
> does prevent that.

 But it does prevent that only for isolated pageblock, not CMA, and yout
 patchset doesn't seem to expand that to CMA? Or am I missing something.
>>>
>>> Yeah, you are right. Originally, I thought preventing merging isolated 
>>> pageblock
>>> with other types of pageblocks is sufficient, since MIGRATE_CMA is always
>>> converted from MIGRATE_ISOLATE. But that is not true. I will rework the 
>>> code.
>>> Thanks for pointing this out.
>>>
>>
>> I find that two pageblocks with different migratetypes, like 
>> MIGRATE_RECLAIMABLE
>> and MIGRATE_MOVABLE can be merged into a single free page after I checked
>> __free_one_page() in detail and printed pageblock information during buddy 
>> page
>> merging.
>
> Yes, that can happen.
>
> I am not sure what consequence it will cause. Do you have any idea?
>
> For MIGRATE_RECLAIMABLE or MIGRATE_MOVABLE or even MIGRATE_UNMOVABLE it's
> absolutely fine. As long as these pageblocks are fully free (and they are if
> it's a single free page spanning 2 pageblocks), they can be of any of these
> type, as they can be reused as needed without causing fragmentation.
>
> But in case of MIGRATE_CMA and MIGRATE_ISOLATE, uncontrolled merging would
> break the specifics of those types. That's why the code is careful for
> MIGRATE_ISOLATE, and MIGRATE_CMA was until now done in MAX_ORDER granularity.

Thanks for the explanation. Basically migratetypes that can fall back to each
other can be merged into a single free page, right?

How about MIGRATE_HIGHATOMIC? It should not be merged with other migratetypes
from my understanding.


--
Best Regards,
Yan, Zi


signature.asc
Description: OpenPGP digital signature


Re: [patch 05/22] genirq/msi: Fixup includes

2021-11-29 Thread Thomas Gleixner
Cedric,

On Mon, Nov 29 2021 at 08:33, Cédric Le Goater wrote:
> On 11/27/21 02:18, Thomas Gleixner wrote:
>> Remove the kobject.h include from msi.h as it's not required and add a
>> sysfs.h include to the core code instead.
>> 
>> Signed-off-by: Thomas Gleixner 
>
>
> This patch breaks compile on powerpc :
>
>CC  arch/powerpc/kernel/msi.o
> In file included from ../arch/powerpc/kernel/msi.c:7:
> ../include/linux/msi.h:410:65: error: ‘struct cpumask’ declared inside 
> parameter list will not be visible outside of this definition or declaration 
> [-Werror]
>410 | int msi_domain_set_affinity(struct irq_data *data, const struct 
> cpumask *mask,
>| 
> ^~~
> cc1: all warnings being treated as errors
>
> Below is fix you can merge in patch 5.

thanks for having a look. I fixed up this and other fallout and pushed out an
updated series (all 4 parts) to:

git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel msi

Thanks,

tglx


[Bug 215169] UBSAN: shift-out-of-bounds in arch/powerpc/mm/kasan/book3s_32.c:22:23

2021-11-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=215169

--- Comment #1 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 299779
  --> https://bugzilla.kernel.org/attachment.cgi?id=299779=edit
kernel .config (5.15.5, PowerMac G4 DP)

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 215169] New: UBSAN: shift-out-of-bounds in arch/powerpc/mm/kasan/book3s_32.c:22:23

2021-11-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=215169

Bug ID: 215169
   Summary: UBSAN: shift-out-of-bounds in
arch/powerpc/mm/kasan/book3s_32.c:22:23
   Product: Platform Specific/Hardware
   Version: 2.5
Kernel Version: 5.15.5
  Hardware: PPC-32
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: PPC-32
  Assignee: platform_ppc...@kernel-bugs.osdl.org
  Reporter: erhar...@mailbox.org
Regression: No

Created attachment 299777
  --> https://bugzilla.kernel.org/attachment.cgi?id=299777=edit
dmesg (5.15.5, INLINE KASAN, PowerMac G4 DP)

Noticed another small glitch during 5.15.x testing. Happens when inline KASAN
is selected:

Total memory = 2048MB; using 4096kB for hash table
Activating Kernel Userspace Access Protection
Linux version 5.15.5-gentoo-PowerMacG4 (root@T1000) (gcc (Gentoo 11.2.0 p1)
11.2.0, GNU ld (Gentoo 2.37_p1 p0) 2.37) #9 SMP Mon Nov 29 20:46:44 CET 2021

UBSAN: shift-out-of-bounds in arch/powerpc/mm/kasan/book3s_32.c:22:23
shift exponent -1 is negative
CPU: 0 PID: 0 Comm: swapper Not tainted 5.15.5-gentoo-PowerMacG4 #9
Call Trace:
[c214be60] [c0ba0048] dump_stack_lvl+0x80/0xb0 (unreliable)
[c214be80] [c0b99288] ubsan_epilogue+0x10/0x5c
[c214be90] [c0b98fe0] __ubsan_handle_shift_out_of_bounds+0x94/0x138
[c214bf00] [c1c0f010] kasan_init_region+0xd8/0x26c
[c214bf30] [c1c0ed84] kasan_init+0xc0/0x198
[c214bf70] [c1c08024] setup_arch+0x18/0x54c
[c214bfc0] [c1c037f0] start_kernel+0x90/0x33c
[c214bff0] [3610] 0x3610

setbat: no BAT available for mapping 0x1c00
KASAN init done
[...]

The other UBSAN complaint in the netconsole.log is bug #214867.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 205099] KASAN hit at raid6_pq: BUG: Unable to handle kernel data access at 0x00f0fd0d

2021-11-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=205099

--- Comment #45 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 299773
  --> https://bugzilla.kernel.org/attachment.cgi?id=299773=edit
kernel_page_tables (5.15.5, OUTLINE KASAN, LOWMEM_SIZE=0x3000, PowerMac G4
DP)

Ah yes, I forgot about including the /sys/kernel/debug/kernel_page_tables..
Sorry! Here you are.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 205099] KASAN hit at raid6_pq: BUG: Unable to handle kernel data access at 0x00f0fd0d

2021-11-29 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=205099

--- Comment #46 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 299775
  --> https://bugzilla.kernel.org/attachment.cgi?id=299775=edit
kernel_page_tables (5.15.5, INLINE KASAN, LOWMEM_SIZE=0x2000, PowerMac G4
DP)

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH] powerpc/pseries/vas: Don't print an error when VAS is unavailable

2021-11-29 Thread Tyrel Datwyler
On 11/26/21 2:31 AM, Nicholas Piggin wrote:
> Excerpts from Cédric Le Goater's message of November 26, 2021 5:13 pm:
>> On 11/26/21 06:21, Nicholas Piggin wrote:
>>> KVM does not support VAS so guests always print a useless error on boot
>>>
>>>  vas: HCALL(398) error -2, query_type 0, result buffer 0x57f2000
>>>
>>> Change this to only print the message if the error is not H_FUNCTION.
>>
>>
>> Just being curious, why is it even called since "ibm,compression" should
>> not be exposed in the DT ?
> 
> It looks like vas does not test for it. I guess in theory there can be 
> other functions than compression implemented as an accelerator. Maybe
> that's why?
> 
> Thanks,
> Nick
> 
Looks like pseries_vas_init() simply calls h_query_vas_capabilities() to test
for VAS coprocessor support. I would assume KVM doesn't expose hcall-vas or
hcall-nx in /rtas/ibm,hypertas-functions? Doesn't look like hcall-vas or
hcall-nx have been added to the hypertas_fw_feature matching, but maybe they
should and we can gate VAS initialization on those, or at the minimum
FW_FEATURE_VAS?

-Tyrel


[PATCH 11/11] locking: Allow to include asm/spinlock_types.h from linux/spinlock_types_raw.h

2021-11-29 Thread Sebastian Andrzej Siewior
The printk header file includes ratelimit_types.h for its __ratelimit()
based usage. It is required for the static initializer used in
printk_ratelimited(). It uses a raw_spinlock_t and includes the
spinlock_types.h.

PREEMPT_RT substitutes spinlock_t with a rtmutex based implementation and so
its spinlock_t implmentation (provided by spinlock_rt.h) includes rtmutex.h and
atomic.h which leads to recursive includes where defines are missing.

By including only the raw_spinlock_t defines it avoids the atomic.h
related includes at this stage.

An example on powerpc:

|  CALLscripts/atomic/check-atomics.sh
|In file included from include/linux/bug.h:5,
| from include/linux/page-flags.h:10,
| from kernel/bounds.c:10:
|arch/powerpc/include/asm/page_32.h: In function ‘clear_page’:
|arch/powerpc/include/asm/bug.h:87:4: error: implicit declaration of function 
‘__WARN’ [-Werror=implicit-function-declaration]
|   87 |__WARN();\
|  |^~
|arch/powerpc/include/asm/page_32.h:48:2: note: in expansion of macro ‘WARN_ON’
|   48 |  WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1));
|  |  ^~~
|arch/powerpc/include/asm/bug.h:58:17: error: invalid application of ‘sizeof’ 
to incomplete type ‘struct bug_entry’
|   58 | "i" (sizeof(struct bug_entry)), \
|  | ^~
|arch/powerpc/include/asm/bug.h:89:3: note: in expansion of macro ‘BUG_ENTRY’
|   89 |   BUG_ENTRY(PPC_TLNEI " %4, 0",   \
|  |   ^
|arch/powerpc/include/asm/page_32.h:48:2: note: in expansion of macro ‘WARN_ON’
|   48 |  WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1));
|  |  ^~~
|In file included from arch/powerpc/include/asm/ptrace.h:298,
| from arch/powerpc/include/asm/hw_irq.h:12,
| from arch/powerpc/include/asm/irqflags.h:12,
| from include/linux/irqflags.h:16,
| from include/asm-generic/cmpxchg-local.h:6,
| from arch/powerpc/include/asm/cmpxchg.h:526,
| from arch/powerpc/include/asm/atomic.h:11,
| from include/linux/atomic.h:7,
| from include/linux/rwbase_rt.h:6,
| from include/linux/rwlock_types.h:55,
| from include/linux/spinlock_types.h:74,
| from include/linux/ratelimit_types.h:7,
| from include/linux/printk.h:10,
| from include/asm-generic/bug.h:22,
| from arch/powerpc/include/asm/bug.h:109,
| from include/linux/bug.h:5,
| from include/linux/page-flags.h:10,
| from kernel/bounds.c:10:
|include/linux/thread_info.h: In function ‘copy_overflow’:
|include/linux/thread_info.h:210:2: error: implicit declaration of function 
‘WARN’ [-Werror=implicit-function-declaration]
|  210 |  WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
|  |  ^~~~

The WARN / BUG include pulls in printk.h and then ptrace.h expects WARN
(from bug.h) which is not yet complete. Even hw_irq.h has WARN_ON()
statements.

On POWERPC64 there are missing atomic64 defines while building 32bit
VDSO:
|  VDSO32C arch/powerpc/kernel/vdso32/vgettimeofday.o
|In file included from include/linux/atomic.h:80,
| from include/linux/rwbase_rt.h:6,
| from include/linux/rwlock_types.h:55,
| from include/linux/spinlock_types.h:74,
| from include/linux/ratelimit_types.h:7,
| from include/linux/printk.h:10,
| from include/linux/kernel.h:19,
| from arch/powerpc/include/asm/page.h:11,
| from arch/powerpc/include/asm/vdso/gettimeofday.h:5,
| from include/vdso/datapage.h:137,
| from lib/vdso/gettimeofday.c:5,
| from :
|include/linux/atomic-arch-fallback.h: In function ‘arch_atomic64_inc’:
|include/linux/atomic-arch-fallback.h:1447:2: error: implicit declaration of 
function ‘arch_atomic64_add’; did you mean ‘arch_atomic_add’? [-Werror=impl
|icit-function-declaration]
| 1447 |  arch_atomic64_add(1, v);
|  |  ^
|  |  arch_atomic_add

The generic fallback is not included, atomics itself are not used. If
kernel.h does not include printk.h then it comes later from the bug.h
include.

Allow asm/spinlock_types.h to be included from
linux/spinlock_types_raw.h.

Cc: Albert Ou 
Cc: Alexander Gordeev 
Cc: Benjamin Herrenschmidt 
Cc: Brian Cain 
Cc: Catalin Marinas 
Cc: Chris Zankel 
Cc: Christian Borntraeger 
Cc: Guo Ren 
Cc: Heiko Carstens 
Cc: Ivan Kokshaysky 
Cc: Matt Turner 
Cc: Max Filippov 
Cc: Michael Ellerman 
Cc: Palmer Dabbelt 
Cc: Paul Mackerras 
Cc: Paul Walmsley 
Cc: Rich Felker 
Cc: Richard Henderson 
Cc: Russell King 
Cc: Vasily Gorbik 
Cc: Yoshinori Sato 
Cc: linux-al...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-c...@vger.kernel.org
Cc: linux-hexa...@vger.kernel.org
Cc: linux-i...@vger.kernel.org

Re: [PATCH] recordmcount: Support empty section from recent binutils

2021-11-29 Thread Christophe Leroy




Le 29/11/2021 à 18:43, Steven Rostedt a écrit :

On Fri, 26 Nov 2021 08:43:23 +
LEROY Christophe  wrote:


Le 24/11/2021 à 15:43, Christophe Leroy a écrit :

Looks like recent binutils (2.36 and over ?) may empty some section,
leading to failure like:

Cannot find symbol for section 11: .text.unlikely.
kernel/kexec_file.o: failed
make[1]: *** [scripts/Makefile.build:287: kernel/kexec_file.o] Error 1

In order to avoid that, ensure that the section has a content before
returning it's name in has_rel_mcount().


This patch doesn't work, on PPC32 I get the following message with this
patch applied:

[0.00] ftrace: No functions to be traced?

Without the patch I get:

[0.00] ftrace: allocating 22381 entries in 66 pages
[0.00] ftrace: allocated 66 pages with 2 groups


Because of this report, I have not applied this patch (even though I was
about to push it to Linus).

I'm pulling it from my queue until this gets resolved.



I have no idea on how to fix that for the moment.

With GCC 10 (binutils 2.36) an objdump -x on kernel/kexec_file.o gives:

 ld  .text.unlikely  .text.unlikely
  wF .text.unlikely	0038 
.arch_kexec_apply_relocations_add
0038  wF .text.unlikely	0038 
.arch_kexec_apply_relocations



With GCC 11 (binutils 2.37) the same gives:

  wF .text.unlikely	0038 
.arch_kexec_apply_relocations_add
0038  wF .text.unlikely	0038 
.arch_kexec_apply_relocations



The problem is that recordmcount drops weak symbols, and it doesn't find 
any non-weak symbol in .text.unlikely


Explication given at 
https://elixir.bootlin.com/linux/v5.16-rc2/source/scripts/recordmcount.h#L506


I have no idea on what to do.

Thanks
Christophe


[PATCH 10/10] powerpc/pseries/vas: Write 'target_creds' for QoS credits change

2021-11-29 Thread Haren Myneni


PowerVM support two types of credits - Default (uses normal priority
FIFO) and Qality of service (QoS uses high priproty FIFO). The user
decides the number of QoS credits and sets this value with HMC
interface. With the core add/removal, this value can be changed in HMC
which invokes drmgr to communicate to the kernel.

This patch adds an interface so that drmgr command can write the new
target QoS credits in sysfs. But the kernel gets the new QoS
capabilities from the hypervisor whenever target_creds is updated
to make sure sync with the values in the hypervisor.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/pseries/vas-sysfs.c | 34 +-
 arch/powerpc/platforms/pseries/vas.c   |  2 +-
 arch/powerpc/platforms/pseries/vas.h   |  1 +
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c 
b/arch/powerpc/platforms/pseries/vas-sysfs.c
index 625082bebcb2..5bae2269d194 100644
--- a/arch/powerpc/platforms/pseries/vas-sysfs.c
+++ b/arch/powerpc/platforms/pseries/vas-sysfs.c
@@ -32,6 +32,34 @@ static ssize_t avail_creds_show(struct vas_cop_feat_caps 
*caps, char *buf)
return sprintf(buf, "%d\n", avail_creds);
 }
 
+/*
+ * This function is used to get the notification from the drmgr when
+ * QoS credits are changed as part of DLPAR core add/removal. Though
+ * receiving the total QoS credits here, get the official QoS
+ * capabilities from the hypervisor.
+ */
+static ssize_t target_creds_store(struct vas_cop_feat_caps *caps,
+  const char *buf, size_t count)
+{
+   int err;
+   u16 creds;
+
+   /*
+* Nothing to do for default credit type.
+*/
+   if (caps->win_type == VAS_GZIP_DEF_FEAT_TYPE)
+   return -EOPNOTSUPP;
+
+   err = kstrtou16(buf, 0, );
+   if (!err)
+   err = vas_reconfig_capabilties(caps->win_type);
+
+   if (err)
+   return -EINVAL;
+
+   return count;
+}
+
 #define sysfs_capbs_entry_read(_name)  \
 static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) 
\
 {  \
@@ -48,8 +76,12 @@ struct vas_sysfs_entry {
sysfs_capbs_entry_read(_name);  \
static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \
0444, _name##_show, NULL);
+#define VAS_ATTR(_name)
\
+   sysfs_capbs_entry_read(_name);  \
+   static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \
+   0644, _name##_show, _name##_store)
 
-VAS_ATTR_RO(target_creds);
+VAS_ATTR(target_creds);
 VAS_ATTR_RO(used_creds);
 
 static struct vas_sysfs_entry avail_creds_attribute =
diff --git a/arch/powerpc/platforms/pseries/vas.c 
b/arch/powerpc/platforms/pseries/vas.c
index c769c8534b3a..d271fa71bded 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -707,7 +707,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, 
int excess_creds)
  * changes. Reconfig window configurations based on the credits
  * availability from this new capabilities.
  */
-static int vas_reconfig_capabilties(u8 type)
+int vas_reconfig_capabilties(u8 type)
 {
int lpar_creds, avail_creds, excess_creds;
struct hv_vas_cop_feat_caps *hv_caps;
diff --git a/arch/powerpc/platforms/pseries/vas.h 
b/arch/powerpc/platforms/pseries/vas.h
index 15bcadf8872a..e369db35f0fd 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -130,5 +130,6 @@ struct pseries_vas_window {
 };
 
 int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
+int vas_reconfig_capabilties(u8 type);
 int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
 #endif /* _VAS_H */
-- 
2.27.0




[PATCH 09/10] powerpc/pseries/vas: sysfs interface to export capabilities

2021-11-29 Thread Haren Myneni


The hypervisor provides the available VAS GZIP capabilities such
as default or QoS window type and the target available credits in
each type. This patch creates sysfs entries and exports the target,
used and the available credits for each feature.

This interface can be used by the user space to determine the credits
usage or to set the target credits in the case of QoS type (for DLPAR).

/sys/devices/vas/vas0/gzip/def_caps: (default GZIP capabilities)
avail_creds /* Available credits to use */
target_creds /* Total credits available. Can be
 /* changed with DLPAR operation */
used_creds  /* Used credits */

/sys/devices/vas/vas0/gzip/qos_caps (QoS GZIP capabilities)
avail_creds
target_creds
used_creds

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/pseries/Makefile|   2 +-
 arch/powerpc/platforms/pseries/vas-sysfs.c | 214 +
 arch/powerpc/platforms/pseries/vas.c   |   6 +
 arch/powerpc/platforms/pseries/vas.h   |   6 +
 4 files changed, 227 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/pseries/vas-sysfs.c

diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index 41d8aee98da4..349f42c31b65 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -30,6 +30,6 @@ obj-$(CONFIG_PPC_SVM) += svm.o
 obj-$(CONFIG_FA_DUMP)  += rtas-fadump.o
 
 obj-$(CONFIG_SUSPEND)  += suspend.o
-obj-$(CONFIG_PPC_VAS)  += vas.o
+obj-$(CONFIG_PPC_VAS)  += vas.o vas-sysfs.o
 
 obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o
diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c 
b/arch/powerpc/platforms/pseries/vas-sysfs.c
new file mode 100644
index ..625082bebcb2
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas-sysfs.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vas.h"
+
+#ifdef CONFIG_SYSFS
+static struct kobject *pseries_vas_kobj;
+static struct kobject *gzip_caps_kobj;
+
+struct vas_caps_entry {
+   struct kobject kobj;
+   struct vas_cop_feat_caps *caps;
+};
+
+#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj)
+
+static ssize_t avail_creds_show(struct vas_cop_feat_caps *caps, char *buf)
+{
+   int avail_creds = atomic_read(>target_creds) -
+   atomic_read(>used_creds);
+   return sprintf(buf, "%d\n", avail_creds);
+}
+
+#define sysfs_capbs_entry_read(_name)  \
+static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) 
\
+{  \
+   return sprintf(buf, "%d\n", atomic_read(>_name)); \
+}
+
+struct vas_sysfs_entry {
+   struct attribute attr;
+   ssize_t (*show)(struct vas_cop_feat_caps *, char *);
+   ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t);
+};
+
+#define VAS_ATTR_RO(_name) \
+   sysfs_capbs_entry_read(_name);  \
+   static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \
+   0444, _name##_show, NULL);
+
+VAS_ATTR_RO(target_creds);
+VAS_ATTR_RO(used_creds);
+
+static struct vas_sysfs_entry avail_creds_attribute =
+   __ATTR(avail_creds, 0444, avail_creds_show, NULL);
+
+static struct attribute *vas_capab_attrs[] = {
+   _creds_attribute.attr,
+   _creds_attribute.attr,
+   _creds_attribute.attr,
+   NULL,
+};
+
+static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr,
+char *buf)
+{
+   struct vas_caps_entry *centry;
+   struct vas_cop_feat_caps *caps;
+   struct vas_sysfs_entry *entry;
+
+   centry = to_caps_entry(kobj);
+   caps = centry->caps;
+   entry = container_of(attr, struct vas_sysfs_entry, attr);
+
+   if (!entry->show)
+   return -EIO;
+
+   return entry->show(caps, buf);
+}
+
+static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+   struct vas_caps_entry *centry;
+   struct vas_cop_feat_caps *caps;
+   struct vas_sysfs_entry *entry;
+
+   centry = to_caps_entry(kobj);
+   caps = centry->caps;
+   entry = container_of(attr, struct vas_sysfs_entry, attr);
+   if (!entry->store)
+   return -EIO;
+
+   return entry->store(caps, buf, count);
+}
+
+static void vas_type_release(struct kobject *kobj)
+{
+   struct vas_caps_entry *centry = to_caps_entry(kobj);
+   kfree(centry);
+}
+
+static const struct sysfs_ops vas_sysfs_ops = {
+   .show   =   vas_type_show,
+   .store  =   vas_type_store,
+};
+
+static struct 

[PATCH 08/10] powerpc/vas: Return paste instruction failure if no active window

2021-11-29 Thread Haren Myneni


The VAS window may not be active if the system looses credits and
the NX generates page fault when it receives request on unmap
paste address.

The kernel handles the fault by remap new paste address if the
window is active again, Otherwise return the paste instruction
failure if the executed instruction that caused the fault was
a paste.

Signed-off-by: Nicholas Piggin 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/include/asm/ppc-opcode.h   |  2 ++
 arch/powerpc/platforms/book3s/vas-api.c | 47 -
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index baea657bc868..30bb3c0e07f9 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -261,6 +261,8 @@
 #define PPC_INST_MFSPR_PVR 0x7c1f42a6
 #define PPC_INST_MFSPR_PVR_MASK0xfc1e
 #define PPC_INST_MTMSRD0x7c000164
+#define PPC_INST_PASTE 0x7c20070d
+#define PPC_INST_PASTE_MASK0xfc2007ff
 #define PPC_INST_POPCNTB   0x7cf4
 #define PPC_INST_POPCNTB_MASK  0xfc0007fe
 #define PPC_INST_RFEBB 0x4c000124
diff --git a/arch/powerpc/platforms/book3s/vas-api.c 
b/arch/powerpc/platforms/book3s/vas-api.c
index 5ceba75c13eb..2ffd34bc4032 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -351,6 +351,41 @@ static int coproc_release(struct inode *inode, struct file 
*fp)
return 0;
 }
 
+/*
+ * If the executed instruction that caused the fault was a paste, then
+ * clear regs CR0[EQ], advance NIP, and return 0. Else return error code.
+ */
+static int do_fail_paste(void)
+{
+   struct pt_regs *regs = current->thread.regs;
+   u32 instword;
+
+   if (WARN_ON_ONCE(!regs))
+   return -EINVAL;
+
+   if (WARN_ON_ONCE(!user_mode(regs)))
+   return -EINVAL;
+
+   /*
+* If we couldn't translate the instruction, the driver should
+* return success without handling the fault, it will be retried
+* or the instruction fetch will fault.
+*/
+   if (get_user(instword, (u32 __user *)(regs->nip)))
+   return -EAGAIN;
+
+   /*
+* Not a paste instruction, driver may fail the fault.
+*/
+   if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE)
+   return -ENOENT;
+
+   regs->ccr &= ~0xe000;   /* Clear CR0[0-2] to fail paste */
+   regs_add_return_ip(regs, 4);/* Skip the paste */
+
+   return 0;
+}
+
 /*
  * This fault handler is invoked when the VAS/NX generates page fault on
  * the paste address. Happens if the kernel closes window in hypervisor
@@ -403,9 +438,19 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
}
mutex_unlock(>task_ref.mmap_mutex);
 
-   return VM_FAULT_SIGBUS;
+   /*
+* Received this fault due to closing the actual window.
+* It can happen during migration or lost credits.
+* Since no mapping, return the paste instruction failure
+* to the user space.
+*/
+   ret = do_fail_paste();
+   if (!ret)
+   return VM_FAULT_NOPAGE;
 
+   return VM_FAULT_SIGBUS;
 }
+
 static const struct vm_operations_struct vas_vm_ops = {
.fault = vas_mmap_fault,
 };
-- 
2.27.0




[PATCH 07/10] powerpc/vas: Add paste address mmap fault handler

2021-11-29 Thread Haren Myneni


The user space opens VAS windows and issues NX requests by pasting
CRB on the corresponding paste address mmap. When the system looses
credits due to core removal, the kernel has to close the window in
the hypervisor and make the window inactive by unmapping this paste
address. Also the OS has to handle NX request page faults if the user
space issue NX requests.

This handler remap the new paste address with the same VMA when the
window is active again (due to core add with DLPAR). Otherwise
returns paste failure.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/book3s/vas-api.c | 60 +
 1 file changed, 60 insertions(+)

diff --git a/arch/powerpc/platforms/book3s/vas-api.c 
b/arch/powerpc/platforms/book3s/vas-api.c
index 2d06bd1b1935..5ceba75c13eb 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -351,6 +351,65 @@ static int coproc_release(struct inode *inode, struct file 
*fp)
return 0;
 }
 
+/*
+ * This fault handler is invoked when the VAS/NX generates page fault on
+ * the paste address. Happens if the kernel closes window in hypervisor
+ * (on PowerVM) due to lost credit or the paste address is not mapped.
+ */
+static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
+{
+   struct vm_area_struct *vma = vmf->vma;
+   struct file *fp = vma->vm_file;
+   struct coproc_instance *cp_inst = fp->private_data;
+   struct vas_window *txwin;
+   u64 paste_addr;
+   int ret;
+
+   /*
+* window is not opened. Shouldn't expect this error.
+*/
+   if (!cp_inst || !cp_inst->txwin) {
+   pr_err("%s(): No send window open?\n", __func__);
+   return VM_FAULT_SIGBUS;
+   }
+
+   txwin = cp_inst->txwin;
+   /*
+* Fault is coming due to missing from the original mmap.
+* Can happen only when the window is closed due to lost
+* credit before mmap() or the user space issued NX request
+* without mapping.
+*/
+   if (txwin->task_ref.vma != vmf->vma) {
+   pr_err("%s(): No previous mapping with paste address\n",
+   __func__);
+   return VM_FAULT_SIGBUS;
+   }
+
+   mutex_lock(>task_ref.mmap_mutex);
+   /*
+* The window may be inactive due to lost credit (Ex: core
+* removal with DLPAR). When the window is active again when
+* the credit is available, remap with the new paste address.
+*/
+   if (txwin->status == VAS_WIN_ACTIVE) {
+   paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+   if (paste_addr) {
+   ret = vmf_insert_pfn(vma, vma->vm_start,
+   (paste_addr >> PAGE_SHIFT));
+   mutex_unlock(>task_ref.mmap_mutex);
+   return ret;
+   }
+   }
+   mutex_unlock(>task_ref.mmap_mutex);
+
+   return VM_FAULT_SIGBUS;
+
+}
+static const struct vm_operations_struct vas_vm_ops = {
+   .fault = vas_mmap_fault,
+};
+
 static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
 {
struct coproc_instance *cp_inst = fp->private_data;
@@ -417,6 +476,7 @@ static int coproc_mmap(struct file *fp, struct 
vm_area_struct *vma)
paste_addr, vma->vm_start, rc);
 
txwin->task_ref.vma = vma;
+   vma->vm_ops = _vm_ops;
 
 out:
mutex_unlock(>task_ref.mmap_mutex);
-- 
2.27.0




Re: [PATCH] recordmcount: Support empty section from recent binutils

2021-11-29 Thread Steven Rostedt
On Fri, 26 Nov 2021 08:43:23 +
LEROY Christophe  wrote:

> Le 24/11/2021 à 15:43, Christophe Leroy a écrit :
> > Looks like recent binutils (2.36 and over ?) may empty some section,
> > leading to failure like:
> > 
> > Cannot find symbol for section 11: .text.unlikely.
> > kernel/kexec_file.o: failed
> > make[1]: *** [scripts/Makefile.build:287: kernel/kexec_file.o] Error 1
> > 
> > In order to avoid that, ensure that the section has a content before
> > returning it's name in has_rel_mcount().  
> 
> This patch doesn't work, on PPC32 I get the following message with this 
> patch applied:
> 
> [0.00] ftrace: No functions to be traced?
> 
> Without the patch I get:
> 
> [0.00] ftrace: allocating 22381 entries in 66 pages
> [0.00] ftrace: allocated 66 pages with 2 groups

Because of this report, I have not applied this patch (even though I was
about to push it to Linus).

I'm pulling it from my queue until this gets resolved.

Thanks,

-- Steve


[PATCH 06/10] powerpc/vas: Map paste address only if window is active

2021-11-29 Thread Haren Myneni


The paste address mapping is done with mmap() after the window is
opened with ioctl. But the window can be closed due to lost credit
due to core removal before mmap(). So if the window is not active,
return mmap() failure with -EACCES and expects the user space reissue
mmap() when the window is active or open new window when the credit
is available.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/book3s/vas-api.c | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/book3s/vas-api.c
b/arch/powerpc/platforms/book3s/vas-api.c
index a63fd48e34a7..2d06bd1b1935 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -379,10 +379,27 @@ static int coproc_mmap(struct file *fp, struct
vm_area_struct *vma)
return -EACCES;
}
 
+   /*
+* The initial mapping is done after the window is opened
+* with ioctl. But this window might have been closed
+* due to lost credit (core removal on PowerVM) before mmap().
+* So if the window is not active, return mmap() failure
+* with -EACCES and expects the user space reconfigure (mmap)
+* window when it is active again or open new window when
+* the credit is available.
+*/
+   mutex_lock(>task_ref.mmap_mutex);
+   if (txwin->status != VAS_WIN_ACTIVE) {
+   pr_err("%s(): Window is not active\n", __func__);
+   rc = -EACCES;
+   goto out;
+   }
+
paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
if (!paste_addr) {
pr_err("%s(): Window paste address failed\n",
__func__);
-   return -EINVAL;
+   rc = -EINVAL;
+   goto out;
}
 
pfn = paste_addr >> PAGE_SHIFT;
@@ -401,6 +418,8 @@ static int coproc_mmap(struct file *fp, struct
vm_area_struct *vma)
 
txwin->task_ref.vma = vma;
 
+out:
+   mutex_unlock(>task_ref.mmap_mutex);
return rc;
 }
 
-- 
2.27.0




[PATCH 05/10] powerpc/pseries/vas: Close windows with DLPAR core removal

2021-11-29 Thread Haren Myneni


The hypervisor reduces the available credits if the core is removed
from the LPAR. So there is possibility of using excessive credits
(windows) in the LPAR and the hypervisor expects the system to close
the excessive windows. Even though the user space can continue to use
these windows to send compression requests to NX, the hypervisor expects
the LPAR to reduce these windows usage so that NX load can be equally
distributed across all LPARs in the system.

When the DLPAR notifier is received, get the new VAS capabilities from
the hypervisor and close the excessive windows in the hypervisor. Also
the kernel unmaps the paste address so that the user space receives paste
failure until these windows are active with the later DLPAR (core add).

Signed-off-by: Haren Myneni 
---
 arch/powerpc/include/asm/vas.h  |  1 +
 arch/powerpc/platforms/book3s/vas-api.c |  2 +
 arch/powerpc/platforms/pseries/vas.c| 93 -
 3 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index 43cea69d1af1..72d1df038b4b 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -73,6 +73,7 @@ struct vas_user_win_ref {
struct mm_struct *mm;   /* Linux process mm_struct */
struct mutex mmap_mutex;/* protects paste address mmap() */
/* with DLPAR close/open windows */
+   struct vm_area_struct *vma; /* Save VMA and used in DLPAR ops */
 };
 
 /*
diff --git a/arch/powerpc/platforms/book3s/vas-api.c 
b/arch/powerpc/platforms/book3s/vas-api.c
index 2b0ced611f32..a63fd48e34a7 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -399,6 +399,8 @@ static int coproc_mmap(struct file *fp, struct 
vm_area_struct *vma)
pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__,
paste_addr, vma->vm_start, rc);
 
+   txwin->task_ref.vma = vma;
+
return rc;
 }
 
diff --git a/arch/powerpc/platforms/pseries/vas.c 
b/arch/powerpc/platforms/pseries/vas.c
index ace8ee7a99e6..ed458620f007 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -431,14 +431,27 @@ static int vas_deallocate_window(struct vas_window *vwin)
 
caps = [win->win_type].caps;
mutex_lock(_pseries_mutex);
+   /*
+* VAS window is already closed in the hypervisor when
+* lost the credit. So just remove the entry from
+* the list, remove task references and free vas_window
+* struct.
+*/
+   if (win->vas_win.status == VAS_WIN_NO_CRED_CLOSE) {
+   vascaps[win->win_type].close_wins--;
+   goto out;
+   }
+
rc = deallocate_free_window(win);
if (rc) {
mutex_unlock(_pseries_mutex);
return rc;
}
 
-   list_del(>win_list);
atomic_dec(>used_creds);
+
+out:
+   list_del(>win_list);
mutex_unlock(_pseries_mutex);
 
put_vas_user_win_ref(>task_ref);
@@ -617,6 +630,74 @@ static int reconfig_open_windows(struct vas_caps *vcaps, 
int creds)
return rc;
 }
 
+/*
+ * The hypervisor reduces the available credits if the LPAR lost core. It
+ * means the excessive windows should not be active and the user space
+ * should not be using these windows to send compression requests to NX.
+ * So the kernel closes the excessive windows and unmap the paste address
+ * such that the user space receives paste instruction failure. Then up to
+ * the user space to fall back to SW compression and manage with the
+ * existing windows.
+ */
+static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds)
+{
+   struct vas_cop_feat_caps *caps =  >caps;
+   struct vm_area_struct *vma;
+   struct pseries_vas_window *win;
+   struct vas_user_win_ref *task_ref;
+   int rc = 0;
+
+   list_for_each_entry(win, >list, win_list) {
+   /*
+* This window is already closed due to lost credit
+* before. Go for next window.
+*/
+   if (win->vas_win.status == VAS_WIN_NO_CRED_CLOSE)
+   continue;
+
+   task_ref = >vas_win.task_ref;
+   mutex_lock(_ref->mmap_mutex);
+   vma = task_ref->vma;
+   /*
+* Number of available credits are reduced, So select
+* and close windows.
+*/
+   win->vas_win.status = VAS_WIN_NO_CRED_CLOSE;
+
+   mmap_write_lock(task_ref->mm);
+   /*
+* vma is set in the original mapping. But this mapping
+* is done with mmap() after the window is opened with ioctl.
+* so we may not see the original mapping if the core remove
+* is done before the original mmap() and after the ioctl.
+

[PATCH v5 2/5] powerpc/inst: Define ppc_inst_t

2021-11-29 Thread Christophe Leroy
In order to stop using 'struct ppc_inst' on PPC32,
define a ppc_inst_t typedef.

Signed-off-by: Christophe Leroy 
---
v3: Rebased and resolved conflicts

v2: Anonymise the structure so that only the typedef can be used
---
 arch/powerpc/include/asm/code-patching.h  | 18 +++
 arch/powerpc/include/asm/hw_breakpoint.h  |  4 +-
 arch/powerpc/include/asm/inst.h   | 36 ++---
 arch/powerpc/include/asm/sstep.h  |  4 +-
 arch/powerpc/kernel/align.c   |  4 +-
 arch/powerpc/kernel/epapr_paravirt.c  |  2 +-
 arch/powerpc/kernel/hw_breakpoint.c   |  4 +-
 .../kernel/hw_breakpoint_constraints.c|  4 +-
 arch/powerpc/kernel/kprobes.c |  4 +-
 arch/powerpc/kernel/mce_power.c   |  2 +-
 arch/powerpc/kernel/optprobes.c   |  4 +-
 arch/powerpc/kernel/process.c |  2 +-
 arch/powerpc/kernel/setup_32.c|  2 +-
 arch/powerpc/kernel/trace/ftrace.c| 54 +--
 arch/powerpc/kernel/vecemu.c  |  2 +-
 arch/powerpc/lib/code-patching.c  | 38 ++---
 arch/powerpc/lib/feature-fixups.c |  4 +-
 arch/powerpc/lib/sstep.c  |  4 +-
 arch/powerpc/lib/test_emulate_step.c  | 10 ++--
 arch/powerpc/mm/maccess.c |  2 +-
 arch/powerpc/perf/8xx-pmu.c   |  2 +-
 arch/powerpc/xmon/xmon.c  | 14 ++---
 arch/powerpc/xmon/xmon_bpts.h |  4 +-
 23 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 4ba834599c4d..46e8c5a8ce51 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -24,20 +24,20 @@
 
 bool is_offset_in_branch_range(long offset);
 bool is_offset_in_cond_branch_range(long offset);
-int create_branch(struct ppc_inst *instr, const u32 *addr,
+int create_branch(ppc_inst_t *instr, const u32 *addr,
  unsigned long target, int flags);
-int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
   unsigned long target, int flags);
 int patch_branch(u32 *addr, unsigned long target, int flags);
-int patch_instruction(u32 *addr, struct ppc_inst instr);
-int raw_patch_instruction(u32 *addr, struct ppc_inst instr);
+int patch_instruction(u32 *addr, ppc_inst_t instr);
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
 
 static inline unsigned long patch_site_addr(s32 *site)
 {
return (unsigned long)site + *site;
 }
 
-static inline int patch_instruction_site(s32 *site, struct ppc_inst instr)
+static inline int patch_instruction_site(s32 *site, ppc_inst_t instr)
 {
return patch_instruction((u32 *)patch_site_addr(site), instr);
 }
@@ -58,11 +58,11 @@ static inline int modify_instruction_site(s32 *site, 
unsigned int clr, unsigned
return modify_instruction((unsigned int *)patch_site_addr(site), clr, 
set);
 }
 
-int instr_is_relative_branch(struct ppc_inst instr);
-int instr_is_relative_link_branch(struct ppc_inst instr);
+int instr_is_relative_branch(ppc_inst_t instr);
+int instr_is_relative_link_branch(ppc_inst_t instr);
 unsigned long branch_target(const u32 *instr);
-int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src);
-extern bool is_conditional_branch(struct ppc_inst instr);
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src);
+bool is_conditional_branch(ppc_inst_t instr);
 #ifdef CONFIG_PPC_BOOK3E_64
 void __patch_exception(int exc, unsigned long addr);
 #define patch_exception(exc, name) do { \
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index abebfbee5b1c..88053d3c68e6 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -56,11 +56,11 @@ static inline int nr_wp_slots(void)
return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1;
 }
 
-bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
  unsigned long ea, int type, int size,
  struct arch_hw_breakpoint *info);
 
-void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
 int *type, int *size, unsigned long *ea);
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 10a5c1b76ca0..b3502f21e0f4 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -8,7 +8,7 @@
 ({ \
long __gui_ret; \
u32 __user 

[PATCH v5 4/5] powerpc/inst: Move ppc_inst_t definition in asm/reg.h

2021-11-29 Thread Christophe Leroy
Because of circular inclusion of asm/hw_breakpoint.h, we
need to move definition of asm/reg.h outside of inst.h
so that asm/hw_breakpoint.h gets it without including
asm/inst.h

Also remove asm/inst.h from asm/uprobes.h as it's not
needed anymore.

Signed-off-by: Christophe Leroy 
---
v4: New to support inlining of copy_inst_from_kernel_nofault() in following 
patch.
---
 arch/powerpc/include/asm/hw_breakpoint.h |  1 -
 arch/powerpc/include/asm/inst.h  | 10 +-
 arch/powerpc/include/asm/reg.h   | 12 
 arch/powerpc/include/asm/uprobes.h   |  1 -
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 88053d3c68e6..84d39fd42f71 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -10,7 +10,6 @@
 #define _PPC_BOOK3S_64_HW_BREAKPOINT_H
 
 #include 
-#include 
 
 #ifdef __KERNEL__
 struct arch_hw_breakpoint {
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 7ef5fd3bb167..53a40faf362a 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -3,6 +3,7 @@
 #define _ASM_POWERPC_INST_H
 
 #include 
+#include 
 
 #define ___get_user_instr(gu_op, dest, ptr)\
 ({ \
@@ -35,13 +36,6 @@
  */
 
 #if defined(CONFIG_PPC64) || defined(__CHECKER__)
-typedef struct {
-   u32 val;
-#ifdef CONFIG_PPC64
-   u32 suffix;
-#endif
-} __packed ppc_inst_t;
-
 static inline u32 ppc_inst_val(ppc_inst_t x)
 {
return x.val;
@@ -50,8 +44,6 @@ static inline u32 ppc_inst_val(ppc_inst_t x)
 #define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
 
 #else
-typedef u32 ppc_inst_t;
-
 static inline u32 ppc_inst_val(ppc_inst_t x)
 {
return x;
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e9d27265253b..85501181f929 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1366,6 +1366,18 @@
 
 /* Macros for setting and retrieving special purpose registers */
 #ifndef __ASSEMBLY__
+
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
+typedef struct {
+   u32 val;
+#ifdef CONFIG_PPC64
+   u32 suffix;
+#endif
+} __packed ppc_inst_t;
+#else
+typedef u32 ppc_inst_t;
+#endif
+
 #define mfmsr()({unsigned long rval; \
asm volatile("mfmsr %0" : "=r" (rval) : \
: "memory"); rval;})
diff --git a/arch/powerpc/include/asm/uprobes.h 
b/arch/powerpc/include/asm/uprobes.h
index fe683371336f..a7ae1860115a 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -11,7 +11,6 @@
 
 #include 
 #include 
-#include 
 
 typedef ppc_opcode_t uprobe_opcode_t;
 
-- 
2.33.1



[PATCH v5 3/5] powerpc/inst: Define ppc_inst_t as u32 on PPC32

2021-11-29 Thread Christophe Leroy
Unlike PPC64 ABI, PPC32 uses the stack to pass a parameter defined
as a struct, even when the struct has a single simple element.

To avoid that, define ppc_inst_t as u32 on PPC32.

Keep it as 'struct ppc_inst' when __CHECKER__ is defined so that
sparse can perform type checking.

Also revert commit 511eea5e2ccd ("powerpc/kprobes: Fix Oops by passing
ppc_inst as a pointer to emulate_step() on ppc32") as now the
instruction to be emulated is passed as a register to emulate_step().

Signed-off-by: Christophe Leroy 
---
v2: Make it work with kprobes
---
 arch/powerpc/include/asm/inst.h | 15 +--
 arch/powerpc/kernel/optprobes.c |  8 ++--
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index b3502f21e0f4..7ef5fd3bb167 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -34,6 +34,7 @@
  * Instruction data type for POWER
  */
 
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
 typedef struct {
u32 val;
 #ifdef CONFIG_PPC64
@@ -46,13 +47,23 @@ static inline u32 ppc_inst_val(ppc_inst_t x)
return x.val;
 }
 
+#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
+
+#else
+typedef u32 ppc_inst_t;
+
+static inline u32 ppc_inst_val(ppc_inst_t x)
+{
+   return x;
+}
+#define ppc_inst(x) (x)
+#endif
+
 static inline int ppc_inst_primary_opcode(ppc_inst_t x)
 {
return ppc_inst_val(x) >> 26;
 }
 
-#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
-
 #ifdef CONFIG_PPC64
 #define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) })
 
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 378db980ded3..3b1c2236cbee 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe 
*op, struct kprobe *p)
/*
 * 3. load instruction to be emulated into relevant register, and
 */
-   if (IS_ENABLED(CONFIG_PPC64)) {
-   temp = ppc_inst_read(p->ainsn.insn);
-   patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + 
TMPL_INSN_IDX);
-   } else {
-   patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + 
TMPL_INSN_IDX);
-   }
+   temp = ppc_inst_read(p->ainsn.insn);
+   patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
 
/*
 * 4. branch back from trampoline
-- 
2.33.1



[PATCH v5 5/5] powerpc/inst: Optimise copy_inst_from_kernel_nofault()

2021-11-29 Thread Christophe Leroy
copy_inst_from_kernel_nofault() uses copy_from_kernel_nofault() to
copy one or two 32bits words. This means calling an out-of-line
function which itself calls back copy_from_kernel_nofault_allowed()
then performs a generic copy with loops.

Rewrite copy_inst_from_kernel_nofault() to do everything at a
single place and use __get_kernel_nofault() directly to perform
single accesses without loops.

Allthough the generic function uses pagefault_disable(), it is not
required on powerpc because do_page_fault() bails earlier when a
kernel mode fault happens on a kernel address.

As the function has now become very small, inline it.

With this change, on an 8xx the time spent in the loop in
ftrace_replace_code() is reduced by 23% at function tracer activation
and 27% at nop tracer activation.
The overall time to activate function tracer (measured with shell
command 'time') is 570ms before the patch and 470ms after the patch.

Even vmlinux size is reduced (by 152 instruction).

Before the patch:

0018 :
  18:   94 21 ff e0 stwur1,-32(r1)
  1c:   7c 08 02 a6 mflrr0
  20:   38 a0 00 04 li  r5,4
  24:   93 e1 00 1c stw r31,28(r1)
  28:   7c 7f 1b 78 mr  r31,r3
  2c:   38 61 00 08 addir3,r1,8
  30:   90 01 00 24 stw r0,36(r1)
  34:   48 00 00 01 bl  34 
34: R_PPC_REL24 copy_from_kernel_nofault
  38:   2c 03 00 00 cmpwi   r3,0
  3c:   40 82 00 0c bne 48 
  40:   81 21 00 08 lwz r9,8(r1)
  44:   91 3f 00 00 stw r9,0(r31)
  48:   80 01 00 24 lwz r0,36(r1)
  4c:   83 e1 00 1c lwz r31,28(r1)
  50:   38 21 00 20 addir1,r1,32
  54:   7c 08 03 a6 mtlrr0
  58:   4e 80 00 20 blr

After the patch (before inlining):

0018 :
  18:   3d 20 b0 00 lis r9,-20480
  1c:   7c 04 48 40 cmplw   r4,r9
  20:   7c 69 1b 78 mr  r9,r3
  24:   41 80 00 14 blt 38 
  28:   81 44 00 00 lwz r10,0(r4)
  2c:   38 60 00 00 li  r3,0
  30:   91 49 00 00 stw r10,0(r9)
  34:   4e 80 00 20 blr

  38:   38 60 ff de li  r3,-34
  3c:   4e 80 00 20 blr
  40:   38 60 ff f2 li  r3,-14
  44:   4e 80 00 20 blr

Signed-off-by: Christophe Leroy 
---
v4: Inline and remove pagefault_disable()

v3: New
---
 arch/powerpc/include/asm/inst.h | 21 -
 arch/powerpc/mm/maccess.c   | 17 -
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 53a40faf362a..631436f3f5c3 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -4,6 +4,8 @@
 
 #include 
 #include 
+#include 
+#include 
 
 #define ___get_user_instr(gu_op, dest, ptr)\
 ({ \
@@ -148,6 +150,23 @@ static inline char *__ppc_inst_as_str(char 
str[PPC_INST_STR_LEN], ppc_inst_t x)
__str;  \
 })
 
-int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src);
+static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
+{
+   unsigned int val, suffix;
+
+   if (unlikely(!is_kernel_addr((unsigned long)src)))
+   return -ERANGE;
+
+   __get_kernel_nofault(, src, u32, Efault);
+   if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
+   __get_kernel_nofault(, src + 1, u32, Efault);
+   *inst = ppc_inst_prefix(val, suffix);
+   } else {
+   *inst = ppc_inst(val);
+   }
+   return 0;
+Efault:
+   return -EFAULT;
+}
 
 #endif /* _ASM_POWERPC_INST_H */
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
index 5abae96b2b46..ea821d0ffe16 100644
--- a/arch/powerpc/mm/maccess.c
+++ b/arch/powerpc/mm/maccess.c
@@ -11,20 +11,3 @@ bool copy_from_kernel_nofault_allowed(const void 
*unsafe_src, size_t size)
 {
return is_kernel_addr((unsigned long)unsafe_src);
 }
-
-int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
-{
-   unsigned int val, suffix;
-   int err;
-
-   err = copy_from_kernel_nofault(, src, sizeof(val));
-   if (err)
-   return err;
-   if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
-   err = copy_from_kernel_nofault(, src + 1, 
sizeof(suffix));
-   *inst = ppc_inst_prefix(val, suffix);
-   } else {
-   *inst = ppc_inst(val);
-   }
-   return err;
-}
-- 
2.33.1



[PATCH v5 1/5] powerpc/inst: Refactor ___get_user_instr()

2021-11-29 Thread Christophe Leroy
PPC64 version of ___get_user_instr() can be used for PPC32 as well,
by simply disabling the suffix part with IS_ENABLED(CONFIG_PPC64).

Signed-off-by: Christophe Leroy 
---
v5: Force use of 'y' in ppc_inst_prefix on PPC32 to avoid 'use variable' 
warning with W=1
---
 arch/powerpc/include/asm/inst.h | 13 ++---
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index b11c0e2f9639..10a5c1b76ca0 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -4,8 +4,6 @@
 
 #include 
 
-#ifdef CONFIG_PPC64
-
 #define ___get_user_instr(gu_op, dest, ptr)\
 ({ \
long __gui_ret; \
@@ -16,7 +14,7 @@
__chk_user_ptr(ptr);\
__gui_ret = gu_op(__prefix, __gui_ptr); \
if (__gui_ret == 0) {   \
-   if ((__prefix >> 26) == OP_PREFIX) {\
+   if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) 
{ \
__gui_ret = gu_op(__suffix, __gui_ptr + 1); \
__gui_inst = ppc_inst_prefix(__prefix, __suffix); \
} else {\
@@ -27,13 +25,6 @@
}   \
__gui_ret;  \
 })
-#else /* !CONFIG_PPC64 */
-#define ___get_user_instr(gu_op, dest, ptr)\
-({ \
-   __chk_user_ptr(ptr);\
-   gu_op((dest).val, (u32 __user *)(ptr)); \
-})
-#endif /* CONFIG_PPC64 */
 
 #define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
 
@@ -71,7 +62,7 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x)
 }
 
 #else
-#define ppc_inst_prefix(x, y) ppc_inst(x)
+#define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x))
 
 static inline u32 ppc_inst_suffix(struct ppc_inst x)
 {
-- 
2.33.1



[PATCH 04/10] powerpc/pseries/vas: Reopen windows with DLPAR core add

2021-11-29 Thread Haren Myneni


VAS windows can be closed in the hypervisor due to lost credits
when the core is removed. If these credits are available later
for core add, reopen these windows and set them active. When the
kernel sees page fault on the paste address, it creates new mapping
on the new paste address. Then the user space can continue to use
these windows and send HW compression requests to NX successfully.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/include/asm/vas.h  |  15 +++
 arch/powerpc/platforms/book3s/vas-api.c |   1 +
 arch/powerpc/platforms/pseries/vas.c| 148 
 arch/powerpc/platforms/pseries/vas.h|   2 +
 4 files changed, 166 insertions(+)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index 57573d9c1e09..43cea69d1af1 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -29,6 +29,18 @@
 #define VAS_THRESH_FIFO_GT_QTR_FULL2
 #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3
 
+/*
+ * VAS window status
+ */
+#define VAS_WIN_ACTIVE 0x0 /* Used in platform independent */
+   /* vas mmap() */
+#define VAS_WIN_CLOSED 0x1
+#define VAS_WIN_INACTIVE   0x2 /* Inactive due to HW failure */
+#define VAS_WIN_MOD_IN_PROCESS 0x3 /* Process of being modified, */
+   /* deallocated, or quiesced */
+#define VAS_WIN_NO_CRED_CLOSE  0x4 /* Linux specific status when */
+   /* window is closed due to lost */
+   /* credit */
 /*
  * Get/Set bit fields
  */
@@ -59,6 +71,8 @@ struct vas_user_win_ref {
struct pid *pid;/* PID of owner */
struct pid *tgid;   /* Thread group ID of owner */
struct mm_struct *mm;   /* Linux process mm_struct */
+   struct mutex mmap_mutex;/* protects paste address mmap() */
+   /* with DLPAR close/open windows */
 };
 
 /*
@@ -67,6 +81,7 @@ struct vas_user_win_ref {
 struct vas_window {
u32 winid;
u32 wcreds_max; /* Window credits */
+   u32 status;
enum vas_cop_type cop;
struct vas_user_win_ref task_ref;
char *dbgname;
diff --git a/arch/powerpc/platforms/book3s/vas-api.c 
b/arch/powerpc/platforms/book3s/vas-api.c
index 4d82c92ddd52..2b0ced611f32 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -316,6 +316,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned 
long arg)
return PTR_ERR(txwin);
}
 
+   mutex_init(>task_ref.mmap_mutex);
cp_inst->txwin = txwin;
 
return 0;
diff --git a/arch/powerpc/platforms/pseries/vas.c 
b/arch/powerpc/platforms/pseries/vas.c
index 6b35f67d5175..ace8ee7a99e6 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -493,6 +493,7 @@ static int get_vas_capabilities(u8 feat, enum 
vas_cop_feat_type type,
memset(vcaps, 0, sizeof(*vcaps));
INIT_LIST_HEAD(>list);
 
+   vcaps->feat = feat;
caps = >caps;
 
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
@@ -531,6 +532,149 @@ static int get_vas_capabilities(u8 feat, enum 
vas_cop_feat_type type,
return 0;
 }
 
+/*
+ * VAS windows can be closed due to lost credits when the core is
+ * removed. So reopen them if credits are available due to DLPAR
+ * core add and set the window active status. When NX sees the page
+ * fault on the unmapped paste address, the kernel handles the fault
+ * by setting the remapping to new paste address if the window is
+ * active.
+ */
+static int reconfig_open_windows(struct vas_caps *vcaps, int creds)
+{
+   long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+   struct vas_cop_feat_caps *caps = >caps;
+   struct pseries_vas_window *win = NULL;
+   int rc, mv_ents = 0;
+
+   /*
+* Nothing to do if there are no closed windows.
+*/
+   if (!vcaps->close_wins)
+   return 0;
+
+   /*
+* For the core removal, the hypervisor reduces the credits
+* assigned to the LPAR and the kernel closes VAS windows
+* in the hypervisor depends on reduced credits. The kernel
+* uses LIFO (the last windows that are opened will be closed
+* first) and expects to open in the same order when credits
+* are available.
+* For example, 40 windows are closed when the LPAR lost 2 cores
+* (dedicated). If 1 core is added, this LPAR can have 20 more
+* credits. It means the kernel can reopen 20 windows. So move
+* 20 entries in the VAS windows lost and reopen next 20 windows.
+*/
+   if (vcaps->close_wins > creds)
+   mv_ents = vcaps->close_wins - creds;
+
+   list_for_each_entry(win, >list, win_list) {
+   if (!mv_ents)
+   break;
+

[PATCH 03/10] powerpc/pseries/vas: Save LPID in pseries_vas_window struct

2021-11-29 Thread Haren Myneni


The kernel sets the VAS window with partition PID when is opened in
the hypervisor. During DLPAR operation, windows can be closed and
reopened in the hypervisor when the credit is available. So saves
this PID in pseries_vas_window struct when the window is opened
initially and reuse it later during DLPAR operation.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/pseries/vas.c | 6 +++---
 arch/powerpc/platforms/pseries/vas.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/vas.c 
b/arch/powerpc/platforms/pseries/vas.c
index 04a6eee2301e..6b35f67d5175 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -107,7 +107,6 @@ static int h_deallocate_vas_window(u64 winid)
 static int h_modify_vas_window(struct pseries_vas_window *win)
 {
long rc;
-   u32 lpid = mfspr(SPRN_PID);
 
/*
 * AMR value is not supported in Linux VAS implementation.
@@ -115,7 +114,7 @@ static int h_modify_vas_window(struct pseries_vas_window 
*win)
 */
do {
rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
-   win->vas_win.winid, lpid, 0,
+   win->vas_win.winid, win->lpid, 0,
VAS_MOD_WIN_FLAGS, 0);
 
rc = hcall_return_busy_check(rc);
@@ -125,7 +124,7 @@ static int h_modify_vas_window(struct pseries_vas_window 
*win)
return 0;
 
pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
-   rc, win->vas_win.winid, lpid);
+   rc, win->vas_win.winid, win->lpid);
return -EIO;
 }
 
@@ -353,6 +352,7 @@ static struct vas_window *vas_allocate_window(int vas_id, 
u64 flags,
if (rc)
goto out;
 
+   txwin->lpid = mfspr(SPRN_PID);
/*
 * Modify window and it is ready to use.
 */
diff --git a/arch/powerpc/platforms/pseries/vas.h 
b/arch/powerpc/platforms/pseries/vas.h
index fa7ce74f1e49..0538760d13be 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -115,6 +115,7 @@ struct pseries_vas_window {
u64 domain[6];  /* Associativity domain Ids */
/* this window is allocated */
u64 util;
+   u32 lpid;
 
/* List of windows opened which is used for LPM */
struct list_head win_list;
-- 
2.27.0




[PATCH 02/10] powerpc/pseries/vas: Add notifier for DLPAR core removal/add

2021-11-29 Thread Haren Myneni


The hypervisor assigns credits for each LPAR based on number of
cores configured in that system. So expects to release credits
(means windows) when the core is removed. This patch adds notifier
for core removal/add so that the OS closes windows if the system
looses credits due to core removal and reopen windows when the
credits available later.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/pseries/vas.c | 34 
 1 file changed, 34 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/vas.c 
b/arch/powerpc/platforms/pseries/vas.c
index ecdd21f517c0..04a6eee2301e 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -531,6 +531,36 @@ static int get_vas_capabilities(u8 feat, enum 
vas_cop_feat_type type,
return 0;
 }
 
+/*
+ * Total number of default credits available (target_credits)
+ * in LPAR depends on number of cores configured. It varies based on
+ * whether processors are in shared mode or dedicated mode.
+ * Get the notifier when CPU configuration is changed with DLPAR
+ * operation so that get the new target_credits (vas default capabilities)
+ * and then update the existing windows usage if needed.
+ */
+static int pseries_vas_notifier(struct notifier_block *nb,
+   unsigned long action, void *data)
+{
+   struct of_reconfig_data *rd = data;
+   struct device_node *dn = rd->dn;
+   const __be32 *intserv;
+   int len, rc = 0;
+
+   intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", );
+   /*
+* Processor config is not changed
+*/
+   if (!intserv)
+   return NOTIFY_OK;
+
+   return rc;
+}
+
+static struct notifier_block pseries_vas_nb = {
+   .notifier_call = pseries_vas_notifier,
+};
+
 static int __init pseries_vas_init(void)
 {
struct hv_vas_cop_feat_caps *hv_cop_caps;
@@ -584,6 +614,10 @@ static int __init pseries_vas_init(void)
goto out_cop;
}
 
+   /* Processors can be added/removed only on LPAR */
+   if (copypaste_feat && firmware_has_feature(FW_FEATURE_LPAR))
+   of_reconfig_notifier_register(_vas_nb);
+
pr_info("GZIP feature is available\n");
 
 out_cop:
-- 
2.27.0




[PATCH 01/10] powerpc/pseries/vas: Use common names in VAS capability structure

2021-11-29 Thread Haren Myneni


target/used/avail_creds provides credits usage to user space via
sysfs and the same interface can be used on PowerNV in future.
Remove "lpar" from these names so that applicable on both PowerVM
and PowerNV.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/pseries/vas.c | 10 +-
 arch/powerpc/platforms/pseries/vas.h |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/vas.c 
b/arch/powerpc/platforms/pseries/vas.c
index b043e3936d21..ecdd21f517c0 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -303,8 +303,8 @@ static struct vas_window *vas_allocate_window(int vas_id, 
u64 flags,
 
cop_feat_caps = >caps;
 
-   if (atomic_inc_return(_feat_caps->used_lpar_creds) >
-   atomic_read(_feat_caps->target_lpar_creds)) {
+   if (atomic_inc_return(_feat_caps->used_creds) >
+   atomic_read(_feat_caps->target_creds)) {
pr_err("Credits are not available to allocate window\n");
rc = -EINVAL;
goto out;
@@ -378,7 +378,7 @@ static struct vas_window *vas_allocate_window(int vas_id, 
u64 flags,
free_irq_setup(txwin);
h_deallocate_vas_window(txwin->vas_win.winid);
 out:
-   atomic_dec(_feat_caps->used_lpar_creds);
+   atomic_dec(_feat_caps->used_creds);
kfree(txwin);
return ERR_PTR(rc);
 }
@@ -438,7 +438,7 @@ static int vas_deallocate_window(struct vas_window *vwin)
}
 
list_del(>win_list);
-   atomic_dec(>used_lpar_creds);
+   atomic_dec(>used_creds);
mutex_unlock(_pseries_mutex);
 
put_vas_user_win_ref(>task_ref);
@@ -514,7 +514,7 @@ static int get_vas_capabilities(u8 feat, enum 
vas_cop_feat_type type,
}
caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
-   atomic_set(>target_lpar_creds,
+   atomic_set(>target_creds,
   be16_to_cpu(hv_caps->target_lpar_creds));
if (feat == VAS_GZIP_DEF_FEAT) {
caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
diff --git a/arch/powerpc/platforms/pseries/vas.h 
b/arch/powerpc/platforms/pseries/vas.h
index 4ecb3fcabd10..fa7ce74f1e49 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -72,9 +72,9 @@ struct vas_cop_feat_caps {
};
/* Total LPAR available credits. Can be different from max LPAR */
/* credits due to DLPAR operation */
-   atomic_ttarget_lpar_creds;
-   atomic_tused_lpar_creds; /* Used credits so far */
-   u16 avail_lpar_creds; /* Remaining available credits */
+   atomic_ttarget_creds;
+   atomic_tused_creds; /* Used credits so far */
+   u16 avail_creds;/* Remaining available credits */
 };
 
 /*
-- 
2.27.0




[PATCH 00/10] powerpc/pseries/vas: NXGZIP support with DLPAR

2021-11-29 Thread Haren Myneni


PowerPC provides HW compression with NX coprocessor. This feature
is available on both PowerNV and PowerVM and included in Linux.
Since each powerpc chip has one NX coprocessor, the VAS introduces
the concept of windows / credits to manage access to this hardware
resource. On powerVM, these limited resources should be available
across all LPARs. So the hypervisor assigns the specific credits
to each LPAR based on processor entitlement so that one LPAR does
not overload NX. The hypervisor can reject the window open request
to a partition if exceeds its credit limit (1 credit per window).

So the total number of target credits in a partition can be changed
if the core configuration is modified. The hypervisor expects the
partition to modify its window usage depends on new target
credits. For example, if the partition uses more credits than the
new target credits, it should close the excessive windows so that
the NX resource will be available to other partitions.

This patch series enables OS to support this dynamic credit
management with DLPAR core removal/add.

Core removal operation:
- Get new VAS capabilities from the hypervisor when the DLPAR
  notifier is received. This capabilities provides the new target
  credits based on new processor entitlement. In the case of QoS
  credit changes, the notification will be issued by updating
  the target_creds via sysfs.
- If the partition is already used more than the new target credits,
  the kernel selects windows, unmap the current paste address and
  close them in the hypervisor, It uses LIFO to identify these
  windows - last windows that are opened are the first ones to be
  closed.
- When the user space issue requests on these windows, NX generates
  page fault on the unmap paste address. The kernel handles the
  fault by returning the paste instruction failure if the window is
  not active (means unmap paste). Then up to the library / user
  space to fall back to SW compression or manage with the current
  windows.

Core add operation:
- The kernel can see increased target credits from the new VAS
  capabilities.
- Scans the window list for the closed windows in the hypervisor
  due to lost credit before and selects windows based on same LIFO.
- Make these corresponding windows active and create remap with
  the same VMA on the new paste address in the fault handler.
- Then the user space should expect paste successful later.

Patch 1: Define common names for sysfs target/used/avail_creds so
 that same sysfs entries can be used even on PowerNV later.
Patch 2: Add VAS notifier for DLPAR core add / removal
Patch 3: Save LPID in the vas window struct  during initial window
 open and use it when reopen later.
Patch 4: When credits are available, reopen windows that are closed
 before with core removal.
Patch 5: Close windows in the hypervisor when the partition exceeds
 its usage than the new target credits.
Patch 6: If the window is closed in the hypervisor before the user
 space issue the initial mmap(), return -EACCES failure.
Patch 7: Add new mmap fault handler which handles the page fault
 from NX on paste address.
Patch 8: Return the paste instruction failure if the window is not
 active.
Patch 9 & 10: The user space determines the credit usage with sysfs
 target/avail/used_creds interfaces. drmgr uses target_creds
to notify OS for QoS credit changes.

Thanks to Nicholas Piggin and Aneesh Kumar for the valuable suggestions
on the NXGZIP design to support DLPAR operations.
  
Haren Myneni (10):
  powerpc/pseries/vas: Use common names in VAS capability structure
  powerpc/pseries/vas: Add notifier for DLPAR core removal/add
  powerpc/pseries/vas: Save partition PID in pseries_vas_window struct
  powerpc/pseries/vas: Reopen windows with DLPAR core add
  powerpc/pseries/vas: Close windows with DLPAR core removal
  powerpc/vas: Map paste address only if window is active
  powerpc/vas: Add paste address mmap fault handler
  powerpc/vas: Return paste instruction failure if window is not active
  powerpc/pseries/vas: sysfs interface to export capabilities
  powerpc/pseries/vas: Write 'target_creds' for QoS credits change

 arch/powerpc/include/asm/ppc-opcode.h  |   2 +
 arch/powerpc/include/asm/vas.h |  16 ++
 arch/powerpc/platforms/book3s/vas-api.c| 129 -
 arch/powerpc/platforms/pseries/Makefile|   2 +-
 arch/powerpc/platforms/pseries/vas-sysfs.c | 246 +
 arch/powerpc/platforms/pseries/vas.c   | 293 -
 arch/powerpc/platforms/pseries/vas.h   |  16 +-
 7 files changed, 691 insertions(+), 13 deletions(-)
 create mode 100644 arch/powerpc/platforms/pseries/vas-sysfs.c

-- 
2.27.0




RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.

2021-11-29 Thread Eugene Bordenkircher
The final result of our testing is that the patch set posted seems to address 
all known defects in the Linux kernel.  The mentioned additional problems are 
entirely caused by the antivirus solution on the windows box.  The antivirus 
solution blocks the disconnect messages from reaching the RNDIS driver so it 
has no idea the USB device went away.  There is nothing we can do to address 
this in the Linux kernel.

I propose we move forward with the patchset.

Eugene T. Bordenkircher

-Original Message-
From: Thorsten Leemhuis  
Sent: Thursday, November 25, 2021 5:59 AM
To: Eugene Bordenkircher ; Thorsten Leemhuis 
; Joakim Tjernlund ; 
linuxppc-dev@lists.ozlabs.org; linux-...@vger.kernel.org
Cc: leoyang...@nxp.com; gre...@linuxfoundation.org; ba...@kernel.org
Subject: Re: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to 
unrecoverable loop.

Hi, this is your Linux kernel regression tracker speaking.

Top-posting for once, to make this easy to process for everyone:

Li Yang and Felipe Balbi: how to move on with this? It's quite an old 
regression, but nevertheless it is one and thus should be fixed. Part of my 
position is to make that happen and thus remind developers and maintainers 
about this until the regression is resolved.

Ciao, Thorsten

On 16.11.21 20:11, Eugene Bordenkircher wrote:
> On 02.11.21 22:15, Joakim Tjernlund wrote:
>> On Sat, 2021-10-30 at 14:20 +, Joakim Tjernlund wrote:
>>> On Fri, 2021-10-29 at 17:14 +, Eugene Bordenkircher wrote:
>>
 We've discovered a situation where the FSL udc driver 
 (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating over 
 the request queue, but the queue has been corrupted at some point so it 
 loops infinitely.  I believe we have narrowed into the offending code, but 
 we are in need of assistance trying to find an appropriate fix for the 
 problem.  The identified code appears to be in all versions of the Linux 
 kernel the driver exists in.

 The problem appears to be when handling a USB_REQ_GET_STATUS request.  The 
 driver gets this request and then calls the ch9getstatus() function.  In 
 this function, it starts a request by "borrowing" the per device 
 status_req, filling it in, and then queuing it with a call to 
 list_add_tail() to add the request to the endpoint queue.  Right before it 
 exits the function however, it's calling ep0_prime_status(), which is 
 filling out that same status_req structure and then queuing it with 
 another call to list_add_tail() to add the request to the endpoint queue.  
 This adds two instances of the exact same LIST_HEAD to the endpoint queue, 
 which breaks the list since the prev and next pointers end up pointing to 
 the wrong things.  This ends up causing a hard loop the next time nuke() 
 gets called, which happens on the next setup IRQ.

 I'm not sure what the appropriate fix to this problem is, mostly due to my 
 lack of expertise in USB and this driver stack.  The code has been this 
 way in the kernel for a very long time, which suggests that it has been 
 working, unless USB_REQ_GET_STATUS requests are never made.  This further 
 suggests that there is something else going on that I don't understand.  
 Deleting the call to ep0_prime_status() and the following ep0stall() call 
 appears, on the surface, to get the device working again, but may have 
 side effects that I'm not seeing.

 I'm hopeful someone in the community can help provide some information on 
 what I may be missing or help come up with a solution to the problem.  A 
 big thank you to anyone who would like to help out.
>>>
>>> Run into this to a while ago. Found the bug and a few more fixes.
>>> This is against 4.19 so you may have to tweak them a bit.
>>> Feel free to upstream them.
>>
>> Curious, did my patches help? Good to known once we upgrade as well.
>
> There's good news and bad news.
>
> The good news is that this appears to stop the driver from entering an 
> infinite loop, which prevents the Linux system from locking up and 
> never recovering.  So I'm willing to say we've made the behavior 
> better.
>
> The bad news is that once we get past this point, there is new bad 
> behavior.  What is on top of this driver in our system is the RNDIS 
> gadget driver communicating to a Laptop running Win10 -1809.
> Everything appears to work fine with the Linux system until there is a 
> USB disconnect.  After the disconnect, the Linux side appears to 
> continue on just fine, but the Windows side doesn't seem to recognize 
> the disconnect, which causes the USB driver on that side to hang 
> forever and eventually blue screen the box.  This doesn't happen on
> all machines, just a select few.   I think we can isolate the
> behavior to a specific antivirus/security software driver that is 
> inserting itself into the USB stack and filtering the disconnect 
> 

Re: [PATCH v4 1/5] powerpc/inst: Refactor ___get_user_instr()

2021-11-29 Thread kernel test robot
Hi Christophe,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v5.16-rc3 next-20211129]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211129-195613
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allnoconfig 
(https://download.01.org/0day-ci/archive/20211130/202111300028.pvdtx2vc-...@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/12f08114cece066b2640aef99e2bc74f49eebef5
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211129-195613
git checkout 12f08114cece066b2640aef99e2bc74f49eebef5
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross 
O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kernel/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from arch/powerpc/include/asm/hw_breakpoint.h:13,
from arch/powerpc/include/asm/processor.h:43,
from arch/powerpc/include/asm/thread_info.h:40,
from include/linux/thread_info.h:60,
from include/asm-generic/preempt.h:5,
from ./arch/powerpc/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/spinlock.h:55,
from include/linux/mmzone.h:8,
from include/linux/gfp.h:6,
from include/linux/mm.h:10,
from arch/powerpc/kernel/align.c:17:
   arch/powerpc/kernel/align.c: In function 'fix_alignment':
>> arch/powerpc/include/asm/inst.h:12:32: error: variable '__suffix' set but 
>> not used [-Werror=unused-but-set-variable]
  12 | unsigned int __prefix, __suffix; 
   \
 |^~~~
   arch/powerpc/include/asm/inst.h:31:34: note: in expansion of macro 
'___get_user_instr'
  31 | #define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, 
ptr)
 |  ^
   arch/powerpc/kernel/align.c:310:21: note: in expansion of macro 
'__get_user_instr'
 310 | r = __get_user_instr(instr, (void __user 
*)regs->nip);
 | ^~~~
   cc1: all warnings being treated as errors
--
   In file included from arch/powerpc/include/asm/hw_breakpoint.h:13,
from arch/powerpc/include/asm/processor.h:43,
from arch/powerpc/include/asm/thread_info.h:40,
from include/linux/thread_info.h:60,
from arch/powerpc/include/asm/ptrace.h:323,
from arch/powerpc/include/asm/hw_irq.h:12,
from arch/powerpc/include/asm/irqflags.h:12,
from include/linux/irqflags.h:16,
from include/asm-generic/cmpxchg-local.h:6,
from arch/powerpc/include/asm/cmpxchg.h:526,
from arch/powerpc/include/asm/atomic.h:11,
from include/linux/atomic.h:7,
from include/linux/rcupdate.h:25,
from include/linux/rculist.h:11,
from include/linux/pid.h:5,
from include/linux/sched.h:14,
from include/linux/uaccess.h:8,
from arch/powerpc/kernel/hw_breakpoint_constraints.c:3:
   arch/powerpc/kernel/hw_breakpoint_constraints.c: In function 
'wp_get_instr_detail':
>> arch/powerpc/include/asm/inst.h:12:32: error: variable '__suffix' set but 
>> not used [-Werror=unused-but-set-variable]
  12 | unsigned int __prefix, __suffix; 
   \
 |^~~~
   arch/powerpc/include/asm/inst.h:31:34: note: in expansion of macro 
'___get_user_instr'
  31 | #define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, 
ptr)
 |  ^
   arch/powerpc/kernel/hw_breakpoint_constraints.c:135:13: note: in expansion 
of macro '__get_user_instr'
 135 | if (__get_user_instr(*instr, (void __user *)regs->nip))
 | 

Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key

2021-11-29 Thread Jiri Olsa
On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
> Sort key p_stage_cyc is used to present the latency
> cycles spend in pipeline stages. perf tool has local
> p_stage_cyc sort key to display this info. There is no
> global variant available for this sort key. local variant
> shows latency in a sinlge sample, whereas, global value
> will be useful to present the total latency (sum of
> latencies) in the hist entry. It represents latency
> number multiplied by the number of samples.
> 
> Add global (p_stage_cyc) and local variant
> (local_p_stage_cyc) for this sort key. Use the
> local_p_stage_cyc as default option for "mem" sort mode.
> Also add this to list of dynamic sort keys.
> 
> Signed-off-by: Athira Rajeev 
> Reported-by: Namhyung Kim 
> ---
>  tools/perf/util/hist.c |  4 +++-
>  tools/perf/util/hist.h |  3 ++-
>  tools/perf/util/sort.c | 34 +-
>  tools/perf/util/sort.h |  3 ++-
>  4 files changed, 32 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index b776465e04ef..0a8033b09e28 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct 
> hist_entry *h)
>   hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>   hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>   hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> - hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> + hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> + hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> +
>   if (symbol_conf.nanosecs)
>   hists__new_col_len(hists, HISTC_TIME, 16);
>   else
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 5343b62476e6..2752ce681108 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -75,7 +75,8 @@ enum hist_column {
>   HISTC_MEM_BLOCKED,
>   HISTC_LOCAL_INS_LAT,
>   HISTC_GLOBAL_INS_LAT,
> - HISTC_P_STAGE_CYC,
> + HISTC_LOCAL_P_STAGE_CYC,
> + HISTC_GLOBAL_P_STAGE_CYC,
>   HISTC_NR_COLS, /* Last entry */
>  };
>  
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index e9216a292a04..e978f7883e07 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -37,7 +37,7 @@ const char  default_parent_pattern[] = 
> "^sys_|^do_page_fault";
>  const char   *parent_pattern = default_parent_pattern;
>  const char   *default_sort_order = "comm,dso,symbol";
>  const char   default_branch_sort_order[] = 
> "comm,dso_from,symbol_from,symbol_to,cycles";
> -const char   default_mem_sort_order[] = 
> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> +const char   default_mem_sort_order[] = 
> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>  const char   default_top_sort_order[] = "dso,symbol";
>  const char   default_diff_sort_order[] = "dso,symbol";
>  const char   default_tracepoint_sort_order[] = "trace";
> @@ -46,8 +46,8 @@ const char  *field_order;
>  regex_t  ignore_callees_regex;
>  int  have_ignore_callees = 0;
>  enum sort_mode   sort__mode = SORT_MODE__NORMAL;
> -const char   *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> -const char   *arch_specific_sort_keys[] = {"p_stage_cyc"};
> +const char   *dynamic_headers[] = {"local_ins_lat", "ins_lat", 
> "local_p_stage_cyc", "p_stage_cyc"};

so you also add global ins_lat, right? will this change
some default behaviour?

> +const char   *arch_specific_sort_keys[] = {"local_p_stage_cyc", 
> "p_stage_cyc"};

nit.. both dynamic_headers and arch_specific_sort_keys could be static right?

thanks,
jirka

>  
>  /*
>   * Replaces all occurrences of a char used with the:
> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>  };
>  
>  static int64_t
> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry 
> *right)
> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>  {
>   return left->p_stage_cyc - right->p_stage_cyc;
>  }
>  
> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, 
> char *bf,
> + size_t size, unsigned int width)
> +{
> + return repsep_snprintf(bf, size, "%-*u", width,
> + he->p_stage_cyc * he->stat.nr_events);
> +}
> +
> +
>  static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>   size_t size, unsigned int width)
>  {
>   return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>  }
>  
> -struct sort_entry sort_p_stage_cyc = {
> - .se_header  = "Pipeline Stage Cycle",
> - .se_cmp = sort__global_p_stage_cyc_cmp,
> +struct sort_entry sort_local_p_stage_cyc = {
> + .se_header  = "Local Pipeline Stage Cycle",
> + 

Re: [PATCH v5 05/12] KVM: RISC-V: Use Makefile.kvm for common files

2021-11-29 Thread David Woodhouse
On Tue, 2021-11-23 at 14:42 +0530, Anup Patel wrote:
> On Sun, Nov 21, 2021 at 6:25 PM David Woodhouse  wrote:
> 
> > From: David Woodhouse 
> > Signed-off-by: David Woodhouse 
> 
> Looks good to me.
> 
> For KVM RISC-V,
> 
> Acked-by: Anup Patel 
> Reviewed-by: Anup Patel 

Thanks. I've included those in the tree at
https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/xen-evtchn
which is based on kvm/master but rebases cleanly to kvm/queue.

I'm working on additional support (IPI, timers, etc.) to go on top but
will post that in a separate series rather than adding more to this
one.


smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key

2021-11-29 Thread Jiri Olsa
On Mon, Nov 29, 2021 at 02:43:48PM +0530, Athira Rajeev wrote:
> 
> 
> > On 28-Nov-2021, at 10:04 PM, Jiri Olsa  wrote:
> > 
> > On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
> >> Sort key p_stage_cyc is used to present the latency
> >> cycles spend in pipeline stages. perf tool has local
> >> p_stage_cyc sort key to display this info. There is no
> >> global variant available for this sort key. local variant
> >> shows latency in a sinlge sample, whereas, global value
> >> will be useful to present the total latency (sum of
> >> latencies) in the hist entry. It represents latency
> >> number multiplied by the number of samples.
> >> 
> >> Add global (p_stage_cyc) and local variant
> >> (local_p_stage_cyc) for this sort key. Use the
> >> local_p_stage_cyc as default option for "mem" sort mode.
> >> Also add this to list of dynamic sort keys.
> >> 
> >> Signed-off-by: Athira Rajeev 
> >> Reported-by: Namhyung Kim 
> > 
> > I can't apply this to Arnaldo's perf/core, could you please rebase?
> > 
> > patching file util/hist.c
> > patching file util/hist.h
> > patching file util/sort.c
> > Hunk #3 FAILED at 1392.
> > Hunk #4 succeeded at 1878 (offset 20 lines).
> > 1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej
> > patching file util/sort.h
> > 
> > thanks,
> > jirka
> 
> Hi Jiri,
> 
> Thanks for checking this patch. 
> 
> Actually these changes are on top of three other fixes from Namhyung which 
> are already part of upstream. Below are the commits.
> 
> 784e8adda4cd ("perf sort: Fix the 'weight' sort key behavior”)
> 4d03c75363ee ("perf sort: Fix the 'ins_lat' sort key behavior”)
> db4b28402909 ("perf sort: Fix the 'p_stage_cyc' sort key behavior”)
> 
> I checked in Arnaldo’s perf/core, but these commits are not there. But I 
> could see them in 'tmp.perf/urgent'
> I think perf/core is not yet updated.

ah ok, I got it applied on perf/urgent, thanks

jirka

> 
> Thanks
> Athira Rajeev
> 
> > 
> >> ---
> >> tools/perf/util/hist.c |  4 +++-
> >> tools/perf/util/hist.h |  3 ++-
> >> tools/perf/util/sort.c | 34 +-
> >> tools/perf/util/sort.h |  3 ++-
> >> 4 files changed, 32 insertions(+), 12 deletions(-)
> >> 
> >> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> >> index b776465e04ef..0a8033b09e28 100644
> >> --- a/tools/perf/util/hist.c
> >> +++ b/tools/perf/util/hist.c
> >> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct 
> >> hist_entry *h)
> >>hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
> >>hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
> >>hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> >> -  hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> >> +  hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> >> +  hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> >> +
> >>if (symbol_conf.nanosecs)
> >>hists__new_col_len(hists, HISTC_TIME, 16);
> >>else
> >> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> >> index 5343b62476e6..2752ce681108 100644
> >> --- a/tools/perf/util/hist.h
> >> +++ b/tools/perf/util/hist.h
> >> @@ -75,7 +75,8 @@ enum hist_column {
> >>HISTC_MEM_BLOCKED,
> >>HISTC_LOCAL_INS_LAT,
> >>HISTC_GLOBAL_INS_LAT,
> >> -  HISTC_P_STAGE_CYC,
> >> +  HISTC_LOCAL_P_STAGE_CYC,
> >> +  HISTC_GLOBAL_P_STAGE_CYC,
> >>HISTC_NR_COLS, /* Last entry */
> >> };
> >> 
> >> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> >> index e9216a292a04..e978f7883e07 100644
> >> --- a/tools/perf/util/sort.c
> >> +++ b/tools/perf/util/sort.c
> >> @@ -37,7 +37,7 @@ const char   default_parent_pattern[] = 
> >> "^sys_|^do_page_fault";
> >> const char *parent_pattern = default_parent_pattern;
> >> const char *default_sort_order = "comm,dso,symbol";
> >> const char default_branch_sort_order[] = 
> >> "comm,dso_from,symbol_from,symbol_to,cycles";
> >> -const chardefault_mem_sort_order[] = 
> >> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> >> +const chardefault_mem_sort_order[] = 
> >> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
> >> const char default_top_sort_order[] = "dso,symbol";
> >> const char default_diff_sort_order[] = "dso,symbol";
> >> const char default_tracepoint_sort_order[] = "trace";
> >> @@ -46,8 +46,8 @@ const char   *field_order;
> >> regex_tignore_callees_regex;
> >> inthave_ignore_callees = 0;
> >> enum sort_mode sort__mode = SORT_MODE__NORMAL;
> >> -const char*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> >> -const char*arch_specific_sort_keys[] = {"p_stage_cyc"};
> >> +const char*dynamic_headers[] = {"local_ins_lat", "ins_lat", 
> >> "local_p_stage_cyc", "p_stage_cyc"};
> >> +const char*arch_specific_sort_keys[] = {"local_p_stage_cyc", 
> >> "p_stage_cyc"};
> >> 
> >> /*
> >>  

Re: [PATCH v4 1/5] powerpc/inst: Refactor ___get_user_instr()

2021-11-29 Thread kernel test robot
Hi Christophe,

I love your patch! Perhaps something to improve:

[auto build test WARNING on powerpc/next]
[also build test WARNING on v5.16-rc3 next-20211129]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211129-195613
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allyesconfig 
(https://download.01.org/0day-ci/archive/20211129/202111292213.tqmvcy38-...@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/12f08114cece066b2640aef99e2bc74f49eebef5
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211129-195613
git checkout 12f08114cece066b2640aef99e2bc74f49eebef5
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross 
O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kernel/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

   In file included from arch/powerpc/include/asm/hw_breakpoint.h:13,
from arch/powerpc/include/asm/processor.h:43,
from arch/powerpc/include/asm/thread_info.h:40,
from include/linux/thread_info.h:60,
from include/asm-generic/preempt.h:5,
from ./arch/powerpc/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/spinlock.h:55,
from include/linux/mmzone.h:8,
from include/linux/gfp.h:6,
from include/linux/mm.h:10,
from arch/powerpc/kernel/align.c:17:
   arch/powerpc/kernel/align.c: In function 'fix_alignment':
>> arch/powerpc/include/asm/inst.h:12:32: warning: variable '__suffix' set but 
>> not used [-Wunused-but-set-variable]
  12 | unsigned int __prefix, __suffix; 
   \
 |^~~~
   arch/powerpc/include/asm/inst.h:31:34: note: in expansion of macro 
'___get_user_instr'
  31 | #define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, 
ptr)
 |  ^
   arch/powerpc/kernel/align.c:310:21: note: in expansion of macro 
'__get_user_instr'
 310 | r = __get_user_instr(instr, (void __user 
*)regs->nip);
 | ^~~~
--
   In file included from arch/powerpc/include/asm/hw_breakpoint.h:13,
from arch/powerpc/include/asm/processor.h:43,
from arch/powerpc/include/asm/thread_info.h:40,
from include/linux/thread_info.h:60,
from arch/powerpc/include/asm/ptrace.h:323,
from arch/powerpc/include/asm/hw_irq.h:12,
from arch/powerpc/include/asm/irqflags.h:12,
from include/linux/irqflags.h:16,
from include/asm-generic/cmpxchg-local.h:6,
from arch/powerpc/include/asm/cmpxchg.h:526,
from arch/powerpc/include/asm/atomic.h:11,
from include/linux/atomic.h:7,
from include/linux/rcupdate.h:25,
from include/linux/rculist.h:11,
from include/linux/pid.h:5,
from include/linux/sched.h:14,
from include/linux/uaccess.h:8,
from arch/powerpc/kernel/hw_breakpoint_constraints.c:3:
   arch/powerpc/kernel/hw_breakpoint_constraints.c: In function 
'wp_get_instr_detail':
>> arch/powerpc/include/asm/inst.h:12:32: warning: variable '__suffix' set but 
>> not used [-Wunused-but-set-variable]
  12 | unsigned int __prefix, __suffix; 
   \
 |^~~~
   arch/powerpc/include/asm/inst.h:31:34: note: in expansion of macro 
'___get_user_instr'
  31 | #define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, 
ptr)
 |  ^
   arch/powerpc/kernel/hw_breakpoint_constraints.c:135:13: note: in expansion 
of macro '__get_user_instr'
 135 | if (__get_user_instr(*instr, (void __user *)regs->nip))
 | ^~~~
--
   In file in

[PATCH v3 02/10] powerpc/mm: Move vma_mmu_pagesize() and hugetlb_get_unmapped_area() to slice.c

2021-11-29 Thread Christophe Leroy
vma_mmu_pagesize() is only required for slices,
otherwise there is a generic weak version.

hugetlb_get_unmapped_area() is dedicated to slices.
radix__hugetlb_get_unmapped_area() as well.

Move them to slice.c

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/hugetlb.h |  4 --
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 55 --
 arch/powerpc/mm/book3s64/slice.c | 76 
 arch/powerpc/mm/hugetlbpage.c| 28 
 4 files changed, 76 insertions(+), 87 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h 
b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 12e150e615b7..b37a28f62cf6 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -8,10 +8,6 @@
  */
 void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long 
vmaddr);
 void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long 
vmaddr);
-extern unsigned long
-radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff,
-   unsigned long flags);
 
 extern void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c 
b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index 23d3e08911d3..d2fb776febb4 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -41,61 +41,6 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct 
*vma, unsigned long st
radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
 }
 
-/*
- * A vairant of hugetlb_get_unmapped_area doing topdown search
- * FIXME!! should we do as x86 does or non hugetlb area does ?
- * ie, use topdown or not based on mmap_is_legacy check ?
- */
-unsigned long
-radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff,
-   unsigned long flags)
-{
-   struct mm_struct *mm = current->mm;
-   struct vm_area_struct *vma;
-   struct hstate *h = hstate_file(file);
-   int fixed = (flags & MAP_FIXED);
-   unsigned long high_limit;
-   struct vm_unmapped_area_info info;
-
-   high_limit = DEFAULT_MAP_WINDOW;
-   if (addr >= high_limit || (fixed && (addr + len > high_limit)))
-   high_limit = TASK_SIZE;
-
-   if (len & ~huge_page_mask(h))
-   return -EINVAL;
-   if (len > high_limit)
-   return -ENOMEM;
-
-   if (fixed) {
-   if (addr > high_limit - len)
-   return -ENOMEM;
-   if (prepare_hugepage_range(file, addr, len))
-   return -EINVAL;
-   return addr;
-   }
-
-   if (addr) {
-   addr = ALIGN(addr, huge_page_size(h));
-   vma = find_vma(mm, addr);
-   if (high_limit - len >= addr && addr >= mmap_min_addr &&
-   (!vma || addr + len <= vm_start_gap(vma)))
-   return addr;
-   }
-   /*
-* We are always doing an topdown search here. Slice code
-* does that too.
-*/
-   info.flags = VM_UNMAPPED_AREA_TOPDOWN;
-   info.length = len;
-   info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-   info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
-   info.align_mask = PAGE_MASK & ~huge_page_mask(h);
-   info.align_offset = 0;
-
-   return vm_unmapped_area();
-}
-
 void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep,
 pte_t old_pte, pte_t pte)
diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index c83be371c6e7..4c3e9601fdf6 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -777,4 +777,80 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, 
unsigned long addr,
 
return !slice_check_range_fits(mm, maskp, addr, len);
 }
+
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+   /* With radix we don't use slice, so derive it from vma*/
+   if (radix_enabled())
+   return vma_kernel_pagesize(vma);
+
+   return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, 
vma->vm_start));
+}
+
+/*
+ * A variant of hugetlb_get_unmapped_area() doing topdown search
+ * FIXME!! should we do as x86 does or non hugetlb area does ?
+ * ie, use topdown or not based on mmap_is_legacy check ?
+ */
+static unsigned long
+radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 
unsigned long len,
+unsigned long 

[PATCH v3 00/10] Convert powerpc to default topdown mmap layout

2021-11-29 Thread Christophe Leroy
Rebased on top of Nic's v5 series "powerpc: Make hash MMU code build 
configurable"

This series converts powerpc to default topdown mmap layout.

powerpc provides its own arch_get_unmapped_area() only when
slices are needed, which is only for book3s/64. First part of
the series moves slices into book3s/64 specific directories
and cleans up other subarchitectures.

Then a small modification is done to core mm to allow
powerpc to still provide its own arch_randomize_brk()

Last part converts to default topdown mmap layout.

Changes in v3:
- Fixed missing  in last patch
- Added a patch to move SZ_1T out of drivers/pci/controller/pci-xgene.c

Changes in v2:
- Moved patch 4 before patch 2
- Make generic arch_randomize_brk() __weak
- Added patch 9

Christophe Leroy (10):
  powerpc/mm: Make slice specific to book3s/64
  powerpc/mm: Move vma_mmu_pagesize() and hugetlb_get_unmapped_area() to
slice.c
  powerpc/mm: Remove CONFIG_PPC_MM_SLICES
  powerpc/mm: Remove asm/slice.h
  powerpc/mm: Call radix__arch_get_unmapped_area() from
arch_get_unmapped_area()
  mm: Allow arch specific arch_randomize_brk() with
CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
  powerpc/mm: Convert to default topdown mmap layout
  powerpc/mm: Properly randomise mmap with slices
  sizes.h: Add SZ_1T macro
  powerpc: Simplify and move arch_randomize_brk()

 arch/powerpc/Kconfig  |   2 +-
 arch/powerpc/include/asm/book3s/64/hash.h |   7 +-
 arch/powerpc/include/asm/book3s/64/hugetlb.h  |   4 -
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |   1 +
 arch/powerpc/include/asm/book3s/64/slice.h|  18 ++
 arch/powerpc/include/asm/hugetlb.h|   2 +-
 arch/powerpc/include/asm/paca.h   |   7 -
 arch/powerpc/include/asm/page.h   |   1 -
 arch/powerpc/include/asm/processor.h  |   2 -
 arch/powerpc/include/asm/slice.h  |  46 
 arch/powerpc/kernel/paca.c|   5 -
 arch/powerpc/kernel/process.c |  41 
 arch/powerpc/mm/Makefile  |   3 +-
 arch/powerpc/mm/book3s64/Makefile |   2 +-
 arch/powerpc/mm/book3s64/hash_utils.c |  33 +--
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c  |  55 -
 arch/powerpc/mm/{ => book3s64}/slice.c| 200 ++-
 arch/powerpc/mm/hugetlbpage.c |  28 ---
 arch/powerpc/mm/mmap.c| 228 --
 arch/powerpc/mm/nohash/mmu_context.c  |   9 -
 arch/powerpc/mm/nohash/tlb.c  |   4 -
 arch/powerpc/platforms/Kconfig.cputype|   4 -
 drivers/pci/controller/pci-xgene.c|   1 -
 include/linux/sizes.h |   2 +
 mm/util.c |   2 +-
 25 files changed, 237 insertions(+), 470 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/slice.h
 rename arch/powerpc/mm/{ => book3s64}/slice.c (80%)
 delete mode 100644 arch/powerpc/mm/mmap.c

-- 
2.33.1



[PATCH v3 10/10] powerpc: Simplify and move arch_randomize_brk()

2021-11-29 Thread Christophe Leroy
arch_randomize_brk() is only needed for hash on book3s/64, for other
platforms the one provided by the default mmap layout is good enough.

Move it to hash_utils.c and use randomize_page() like the generic one.

And properly opt out the radix case instead of making an assumption
on mmu_highuser_ssize.

Also change to a 32M range like most other architectures instead of 8M.

Signed-off-by: Christophe Leroy 
---
v3:
- Add missing include 
- Move SZ_1T in a previous patch that moves it out of 
drivers/pci/controller/pci-xgene.c

v2: New
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/process.c | 41 ---
 arch/powerpc/mm/book3s64/hash_utils.c | 19 +
 2 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a64cfbb85ca2..44c4bce5211d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -34,10 +34,8 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -2310,42 +2308,3 @@ unsigned long arch_align_stack(unsigned long sp)
sp -= get_random_int() & ~PAGE_MASK;
return sp & ~0xf;
 }
-
-static inline unsigned long brk_rnd(void)
-{
-unsigned long rnd = 0;
-
-   /* 8MB for 32bit, 1GB for 64bit */
-   if (is_32bit_task())
-   rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
-   else
-   rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
-
-   return rnd << PAGE_SHIFT;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-   unsigned long base = mm->brk;
-   unsigned long ret;
-
-#ifdef CONFIG_PPC_BOOK3S_64
-   /*
-* If we are using 1TB segments and we are allowed to randomise
-* the heap, we can put it above 1TB so it is backed by a 1TB
-* segment. Otherwise the heap will be in the bottom 1TB
-* which always uses 256MB segments and this may result in a
-* performance penalty.
-*/
-   if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == 
MMU_SEGSIZE_1T))
-   base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
-#endif
-
-   ret = PAGE_ALIGN(base + brk_rnd());
-
-   if (ret < mm->brk)
-   return mm->brk;
-
-   return ret;
-}
-
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index 7ecadf5e6bf9..68a5468b0f19 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -37,6 +37,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -2171,3 +2173,20 @@ void __init print_system_hash_info(void)
if (htab_hash_mask)
pr_info("htab_hash_mask= 0x%lx\n", htab_hash_mask);
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+   /*
+* If we are using 1TB segments and we are allowed to randomise
+* the heap, we can put it above 1TB so it is backed by a 1TB
+* segment. Otherwise the heap will be in the bottom 1TB
+* which always uses 256MB segments and this may result in a
+* performance penalty.
+*/
+   if (is_32bit_task())
+   return randomize_page(mm->brk, SZ_32M);
+   else if (!radix_enabled() && mmu_highuser_ssize == MMU_SEGSIZE_1T)
+   return randomize_page(max_t(unsigned long, mm->brk, SZ_1T), 
SZ_1G);
+   else
+   return randomize_page(mm->brk, SZ_1G);
+}
-- 
2.33.1



[PATCH v3 03/10] powerpc/mm: Remove CONFIG_PPC_MM_SLICES

2021-11-29 Thread Christophe Leroy
CONFIG_PPC_MM_SLICES is always selected by hash book3s/64.
CONFIG_PPC_MM_SLICES is never selected by other platforms.

Remove it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/hash.h |  4 ++--
 arch/powerpc/include/asm/hugetlb.h|  2 +-
 arch/powerpc/include/asm/paca.h   |  7 ---
 arch/powerpc/include/asm/slice.h  | 13 ++---
 arch/powerpc/kernel/paca.c|  5 -
 arch/powerpc/mm/book3s64/Makefile |  3 +--
 arch/powerpc/mm/book3s64/hash_utils.c | 14 --
 arch/powerpc/platforms/Kconfig.cputype|  4 
 8 files changed, 6 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 674fe0e890dc..97f2fc217a49 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -99,10 +99,10 @@
  * Defines the address of the vmemap area, in its own region on
  * hash table CPUs.
  */
-#ifdef CONFIG_PPC_MM_SLICES
+#ifdef CONFIG_PPC_64S_HASH_MMU
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#endif /* CONFIG_PPC_MM_SLICES */
+#endif
 
 /* PTEIDX nibble */
 #define _PTEIDX_SECONDARY  0x8
diff --git a/arch/powerpc/include/asm/hugetlb.h 
b/arch/powerpc/include/asm/hugetlb.h
index f18c543bc01d..86a60ba6bd2a 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -24,7 +24,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 unsigned long addr,
 unsigned long len)
 {
-   if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled())
+   if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU) && !radix_enabled())
return slice_is_hugepage_only_range(mm, addr, len);
return 0;
 }
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 295573a82c66..bd4dd02e61c8 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,16 +152,9 @@ struct paca_struct {
struct tlb_core_data tcd;
 #endif /* CONFIG_PPC_BOOK3E */
 
-#ifdef CONFIG_PPC_BOOK3S
 #ifdef CONFIG_PPC_64S_HASH_MMU
-#ifdef CONFIG_PPC_MM_SLICES
unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
-#else
-   u16 mm_ctx_user_psize;
-   u16 mm_ctx_sllp;
-#endif
-#endif
 #endif
 
/*
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
index 0bdd9c62eca0..b15141f2bd76 100644
--- a/arch/powerpc/include/asm/slice.h
+++ b/arch/powerpc/include/asm/slice.h
@@ -10,7 +10,7 @@
 
 struct mm_struct;
 
-#ifdef CONFIG_PPC_MM_SLICES
+#ifdef CONFIG_PPC_64S_HASH_MMU
 
 #ifdef CONFIG_HUGETLB_PAGE
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
@@ -30,16 +30,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned 
long start,
 void slice_init_new_context_exec(struct mm_struct *mm);
 void slice_setup_new_exec(void);
 
-#else /* CONFIG_PPC_MM_SLICES */
-
-static inline void slice_init_new_context_exec(struct mm_struct *mm) {}
-
-static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long 
addr)
-{
-   return 0;
-}
-
-#endif /* CONFIG_PPC_MM_SLICES */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 39da688a9455..ba593fd60124 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -344,15 +344,10 @@ void copy_mm_to_paca(struct mm_struct *mm)
 {
mm_context_t *context = >context;
 
-#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
memcpy(_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
   LOW_SLICE_ARRAY_SZ);
memcpy(_paca()->mm_ctx_high_slices_psize, 
mm_ctx_high_slices(context),
   TASK_SLICE_ARRAY_SZ(context));
-#else /* CONFIG_PPC_MM_SLICES */
-   get_paca()->mm_ctx_user_psize = context->user_psize;
-   get_paca()->mm_ctx_sllp = context->sllp;
-#endif
 }
 #endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/mm/book3s64/Makefile 
b/arch/powerpc/mm/book3s64/Makefile
index af2f3e75d458..d527dc8e30a8 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -5,7 +5,7 @@ ccflags-y   := $(NO_MINIMAL_TOC)
 obj-y  += mmu_context.o pgtable.o trace.o
 ifdef CONFIG_PPC_64S_HASH_MMU
 CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
-obj-y  += hash_pgtable.o hash_utils.o hash_tlb.o slb.o
+obj-y  += hash_pgtable.o hash_utils.o hash_tlb.o slb.o 
slice.o
 obj-$(CONFIG_PPC_HASH_MMU_NATIVE)  += hash_native.o
 obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
 obj-$(CONFIG_PPC_64K_PAGES)+= hash_64k.o
@@ -21,7 +21,6 @@ obj-$(CONFIG_PPC_RADIX_MMU)   += radix_hugetlbpage.o
 endif
 

[PATCH v3 05/10] powerpc/mm: Call radix__arch_get_unmapped_area() from arch_get_unmapped_area()

2021-11-29 Thread Christophe Leroy
Instead of setting mm->get_unmapped_area() to either
arch_get_unmapped_area() or radix__arch_get_unmapped_area(),
always set it to arch_get_unmapped_area() and call
radix__arch_get_unmapped_area() from there when radix is enabled.

To keep radix__arch_get_unmapped_area() static, move it to slice.c

Do the same with radix__arch_get_unmapped_area_topdown()

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/book3s64/slice.c | 104 ++
 arch/powerpc/mm/mmap.c   | 123 ---
 2 files changed, 104 insertions(+), 123 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index 4c3e9601fdf6..99742dde811c 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -639,12 +639,113 @@ unsigned long slice_get_unmapped_area(unsigned long 
addr, unsigned long len,
 }
 EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
 
+/*
+ * Same function as generic code used only for radix, because we don't need to 
overload
+ * the generic one. But we will have to duplicate, because hash select
+ * HAVE_ARCH_UNMAPPED_AREA
+ */
+static unsigned long
+radix__arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned 
long len,
+ unsigned long pgoff, unsigned long flags)
+{
+   struct mm_struct *mm = current->mm;
+   struct vm_area_struct *vma;
+   int fixed = (flags & MAP_FIXED);
+   unsigned long high_limit;
+   struct vm_unmapped_area_info info;
+
+   high_limit = DEFAULT_MAP_WINDOW;
+   if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+   high_limit = TASK_SIZE;
+
+   if (len > high_limit)
+   return -ENOMEM;
+
+   if (fixed) {
+   if (addr > high_limit - len)
+   return -ENOMEM;
+   return addr;
+   }
+
+   if (addr) {
+   addr = PAGE_ALIGN(addr);
+   vma = find_vma(mm, addr);
+   if (high_limit - len >= addr && addr >= mmap_min_addr &&
+   (!vma || addr + len <= vm_start_gap(vma)))
+   return addr;
+   }
+
+   info.flags = 0;
+   info.length = len;
+   info.low_limit = mm->mmap_base;
+   info.high_limit = high_limit;
+   info.align_mask = 0;
+
+   return vm_unmapped_area();
+}
+
+static unsigned long
+radix__arch_get_unmapped_area_topdown(struct file *filp, const unsigned long 
addr0,
+ const unsigned long len, const unsigned 
long pgoff,
+ const unsigned long flags)
+{
+   struct vm_area_struct *vma;
+   struct mm_struct *mm = current->mm;
+   unsigned long addr = addr0;
+   int fixed = (flags & MAP_FIXED);
+   unsigned long high_limit;
+   struct vm_unmapped_area_info info;
+
+   high_limit = DEFAULT_MAP_WINDOW;
+   if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+   high_limit = TASK_SIZE;
+
+   if (len > high_limit)
+   return -ENOMEM;
+
+   if (fixed) {
+   if (addr > high_limit - len)
+   return -ENOMEM;
+   return addr;
+   }
+
+   if (addr) {
+   addr = PAGE_ALIGN(addr);
+   vma = find_vma(mm, addr);
+   if (high_limit - len >= addr && addr >= mmap_min_addr &&
+   (!vma || addr + len <= vm_start_gap(vma)))
+   return addr;
+   }
+
+   info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+   info.length = len;
+   info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+   info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
+   info.align_mask = 0;
+
+   addr = vm_unmapped_area();
+   if (!(addr & ~PAGE_MASK))
+   return addr;
+   VM_BUG_ON(addr != -ENOMEM);
+
+   /*
+* A failed mmap() very likely causes application failure,
+* so fall back to the bottom-up function here. This scenario
+* can happen with large stack limits and large mmap()
+* allocations.
+*/
+   return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+}
+
 unsigned long arch_get_unmapped_area(struct file *filp,
 unsigned long addr,
 unsigned long len,
 unsigned long pgoff,
 unsigned long flags)
 {
+   if (radix_enabled())
+   return radix__arch_get_unmapped_area(filp, addr, len, pgoff, 
flags);
+
return slice_get_unmapped_area(addr, len, flags,
   
mm_ctx_user_psize(>mm->context), 0);
 }
@@ -655,6 +756,9 @@ unsigned long arch_get_unmapped_area_topdown(struct file 
*filp,
 const unsigned long pgoff,
 const 

[PATCH v3 08/10] powerpc/mm: Properly randomise mmap with slices

2021-11-29 Thread Christophe Leroy
Now that powerpc switched to default topdown mmap layout,
mm->mmap_base is properly randomised.  However
slice_find_area_bottomup() doesn't use mm->mmap_base but
uses the fixed TASK_UNMAPPED_BASE instead.

slice_find_area_bottomup() being used as a fallback to
slice_find_area_topdown(), it can't use mm->mmap_base
directly.

Instead of always using TASK_UNMAPPED_BASE as base address, leave
it to the caller. When called from slice_find_area_topdown()
TASK_UNMAPPED_BASE is used. Otherwise mm->mmap_base is used.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/book3s64/slice.c | 18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index 99742dde811c..997f40184e97 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -276,20 +276,18 @@ static bool slice_scan_available(unsigned long addr,
 }
 
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
- unsigned long len,
+ unsigned long addr, unsigned long 
len,
  const struct slice_mask 
*available,
  int psize, unsigned long 
high_limit)
 {
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
-   unsigned long addr, found, next_end;
+   unsigned long found, next_end;
struct vm_unmapped_area_info info;
 
info.flags = 0;
info.length = len;
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
info.align_offset = 0;
-
-   addr = TASK_UNMAPPED_BASE;
/*
 * Check till the allow max value for this mmap request
 */
@@ -322,12 +320,12 @@ static unsigned long slice_find_area_bottomup(struct 
mm_struct *mm,
 }
 
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
-unsigned long len,
+unsigned long addr, unsigned long 
len,
 const struct slice_mask *available,
 int psize, unsigned long 
high_limit)
 {
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
-   unsigned long addr, found, prev;
+   unsigned long found, prev;
struct vm_unmapped_area_info info;
unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
 
@@ -335,8 +333,6 @@ static unsigned long slice_find_area_topdown(struct 
mm_struct *mm,
info.length = len;
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
info.align_offset = 0;
-
-   addr = mm->mmap_base;
/*
 * If we are trying to allocate above DEFAULT_MAP_WINDOW
 * Add the different to the mmap_base.
@@ -377,7 +373,7 @@ static unsigned long slice_find_area_topdown(struct 
mm_struct *mm,
 * can happen with large stack limits and large mmap()
 * allocations.
 */
-   return slice_find_area_bottomup(mm, len, available, psize, high_limit);
+   return slice_find_area_bottomup(mm, TASK_UNMAPPED_BASE, len, available, 
psize, high_limit);
 }
 
 
@@ -386,9 +382,9 @@ static unsigned long slice_find_area(struct mm_struct *mm, 
unsigned long len,
 int topdown, unsigned long high_limit)
 {
if (topdown)
-   return slice_find_area_topdown(mm, len, mask, psize, 
high_limit);
+   return slice_find_area_topdown(mm, mm->mmap_base, len, mask, 
psize, high_limit);
else
-   return slice_find_area_bottomup(mm, len, mask, psize, 
high_limit);
+   return slice_find_area_bottomup(mm, mm->mmap_base, len, mask, 
psize, high_limit);
 }
 
 static inline void slice_copy_mask(struct slice_mask *dst,
-- 
2.33.1



[PATCH v3 04/10] powerpc/mm: Remove asm/slice.h

2021-11-29 Thread Christophe Leroy
Move necessary stuff in asm/book3s/64/slice.h and
remove asm/slice.h

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/hash.h |  3 ++
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  1 +
 arch/powerpc/include/asm/book3s/64/slice.h| 18 +
 arch/powerpc/include/asm/page.h   |  1 -
 arch/powerpc/include/asm/slice.h  | 37 ---
 5 files changed, 22 insertions(+), 38 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/slice.h

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 97f2fc217a49..fab032f552f3 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -100,6 +100,9 @@
  * hash table CPUs.
  */
 #ifdef CONFIG_PPC_64S_HASH_MMU
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 3004f3323144..b4b2ca111f75 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -18,6 +18,7 @@
  * complete pgtable.h but only a portion of it.
  */
 #include 
+#include 
 #include 
 #include 
 
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h 
b/arch/powerpc/include/asm/book3s/64/slice.h
index f0d3194ba41b..5b0f7105bc8b 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
 #define _ASM_POWERPC_BOOK3S_64_SLICE_H
 
+#ifndef __ASSEMBLY__
+
 #define SLICE_LOW_SHIFT28
 #define SLICE_LOW_TOP  (0x1ul)
 #define SLICE_NUM_LOW  (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
@@ -13,4 +15,20 @@
 
 #define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64
 
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+  unsigned long len, unsigned int psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm);
+void slice_setup_new_exec(void);
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 254687258f42..62e0c6f12869 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -329,6 +329,5 @@ static inline unsigned long kaslr_offset(void)
 
 #include 
 #endif /* __ASSEMBLY__ */
-#include 
 
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
deleted file mode 100644
index b15141f2bd76..
--- a/arch/powerpc/include/asm/slice.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_SLICE_H
-#define _ASM_POWERPC_SLICE_H
-
-#ifdef CONFIG_PPC_BOOK3S_64
-#include 
-#endif
-
-#ifndef __ASSEMBLY__
-
-struct mm_struct;
-
-#ifdef CONFIG_PPC_64S_HASH_MMU
-
-#ifdef CONFIG_HUGETLB_PAGE
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-
-unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
- unsigned long flags, unsigned int psize,
- int topdown);
-
-unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
-
-void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
-  unsigned long len, unsigned int psize);
-
-void slice_init_new_context_exec(struct mm_struct *mm);
-void slice_setup_new_exec(void);
-
-#endif /* CONFIG_PPC_64S_HASH_MMU */
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_POWERPC_SLICE_H */
-- 
2.33.1



[PATCH v3 06/10] mm: Allow arch specific arch_randomize_brk() with CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT

2021-11-29 Thread Christophe Leroy
Commit e7142bf5d231 ("arm64, mm: make randomization selected by
generic topdown mmap layout") introduced a default version of
arch_randomize_brk() provided when
CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT is selected.

powerpc could select CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
but needs to provide its own arch_randomize_brk().

In order to allow that, define generic version of arch_randomize_brk()
as a __weak symbol.

Cc: Alexandre Ghiti 
Signed-off-by: Christophe Leroy 
---
v2: Make the generic version of it a __weak symbol instead of a messy play with 
CONFIG_ items.
---
 mm/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/util.c b/mm/util.c
index 741ba32a43ac..46d1a2dd7a32 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -344,7 +344,7 @@ unsigned long randomize_stack_top(unsigned long stack_top)
 }
 
 #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
-unsigned long arch_randomize_brk(struct mm_struct *mm)
+unsigned long __weak arch_randomize_brk(struct mm_struct *mm)
 {
/* Is the current task 32bit ? */
if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
-- 
2.33.1



[PATCH v3 09/10] sizes.h: Add SZ_1T macro

2021-11-29 Thread Christophe Leroy
Today drivers/pci/controller/pci-xgene.c defines SZ_1T

Move it into linux/sizes.h so that it can be re-used elsewhere.

Cc: Toan Le 
Cc: linux-...@vger.kernel.org
Signed-off-by: Christophe Leroy 
---
v3: new
---
 drivers/pci/controller/pci-xgene.c | 1 -
 include/linux/sizes.h  | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pci-xgene.c 
b/drivers/pci/controller/pci-xgene.c
index 56d0d50338c8..716dcab5ca47 100644
--- a/drivers/pci/controller/pci-xgene.c
+++ b/drivers/pci/controller/pci-xgene.c
@@ -49,7 +49,6 @@
 #define EN_REG 0x0001
 #define OB_LO_IO   0x0002
 #define XGENE_PCIE_DEVICEID0xE004
-#define SZ_1T  (SZ_1G*1024ULL)
 #define PIPE_PHY_RATE_RD(src)  ((0xc000 & (u32)(src)) >> 0xe)
 
 #define XGENE_V1_PCI_EXP_CAP   0x40
diff --git a/include/linux/sizes.h b/include/linux/sizes.h
index 1ac79bcee2bb..84aa448d8bb3 100644
--- a/include/linux/sizes.h
+++ b/include/linux/sizes.h
@@ -47,6 +47,8 @@
 #define SZ_8G  _AC(0x2, ULL)
 #define SZ_16G _AC(0x4, ULL)
 #define SZ_32G _AC(0x8, ULL)
+
+#define SZ_1T  _AC(0x100, ULL)
 #define SZ_64T _AC(0x4000, ULL)
 
 #endif /* __LINUX_SIZES_H__ */
-- 
2.33.1



[PATCH v3 01/10] powerpc/mm: Make slice specific to book3s/64

2021-11-29 Thread Christophe Leroy
Since commit 555904d07eef ("powerpc/8xx: MM_SLICE is not needed
anymore") only book3s/64 selects CONFIG_PPC_MM_SLICES.

Move slice.c into mm/book3s64/

Signed-off-by: Christophe Leroy 
---
v2: Remove now unnecessary #ifdef CONFIG_PPC_BOOK3S_64 in slice.c
---
 arch/powerpc/mm/Makefile   | 1 -
 arch/powerpc/mm/book3s64/Makefile  | 1 +
 arch/powerpc/mm/{ => book3s64}/slice.c | 2 --
 arch/powerpc/mm/nohash/mmu_context.c   | 9 -
 arch/powerpc/mm/nohash/tlb.c   | 4 
 5 files changed, 1 insertion(+), 16 deletions(-)
 rename arch/powerpc/mm/{ => book3s64}/slice.c (99%)

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index df8172da2301..d4c20484dad9 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_PPC_MMU_NOHASH)  += nohash/
 obj-$(CONFIG_PPC_BOOK3S_32)+= book3s32/
 obj-$(CONFIG_PPC_BOOK3S_64)+= book3s64/
 obj-$(CONFIG_NUMA) += numa.o
-obj-$(CONFIG_PPC_MM_SLICES)+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
diff --git a/arch/powerpc/mm/book3s64/Makefile 
b/arch/powerpc/mm/book3s64/Makefile
index 2d50cac499c5..af2f3e75d458 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_PPC_RADIX_MMU)   += radix_hugetlbpage.o
 endif
 obj-$(CONFIG_SPAPR_TCE_IOMMU)  += iommu_api.o
 obj-$(CONFIG_PPC_PKEY) += pkeys.o
+obj-$(CONFIG_PPC_MM_SLICES)+= slice.o
 
 # Instrumenting the SLB fault path can lead to duplicate SLB entries
 KCOV_INSTRUMENT_slb.o := n
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/book3s64/slice.c
similarity index 99%
rename from arch/powerpc/mm/slice.c
rename to arch/powerpc/mm/book3s64/slice.c
index 82b45b1cb973..c83be371c6e7 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -712,7 +712,6 @@ void slice_init_new_context_exec(struct mm_struct *mm)
bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
 }
 
-#ifdef CONFIG_PPC_BOOK3S_64
 void slice_setup_new_exec(void)
 {
struct mm_struct *mm = current->mm;
@@ -724,7 +723,6 @@ void slice_setup_new_exec(void)
 
mm_ctx_set_slb_addr_limit(>context, DEFAULT_MAP_WINDOW);
 }
-#endif
 
 void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
   unsigned long len, unsigned int psize)
diff --git a/arch/powerpc/mm/nohash/mmu_context.c 
b/arch/powerpc/mm/nohash/mmu_context.c
index 44b2b5e7cabe..dc3528e815b9 100644
--- a/arch/powerpc/mm/nohash/mmu_context.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -313,15 +313,6 @@ void switch_mmu_context(struct mm_struct *prev, struct 
mm_struct *next,
  */
 int init_new_context(struct task_struct *t, struct mm_struct *mm)
 {
-   /*
-* We have MMU_NO_CONTEXT set to be ~0. Hence check
-* explicitly against context.id == 0. This ensures that we properly
-* initialize context slice details for newly allocated mm's (which will
-* have id == 0) and don't alter context slice inherited via fork (which
-* will have id != 0).
-*/
-   if (mm->context.id == 0)
-   slice_init_new_context_exec(mm);
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
pte_frag_set(>context, NULL);
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 311281063d48..3359cf7c2a61 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -773,9 +773,5 @@ void __init early_init_mmu(void)
 #ifdef CONFIG_PPC_47x
early_init_mmu_47x();
 #endif
-
-#ifdef CONFIG_PPC_MM_SLICES
-   mm_ctx_set_slb_addr_limit(_mm.context, SLB_ADDR_LIMIT_DEFAULT);
-#endif
 }
 #endif /* CONFIG_PPC64 */
-- 
2.33.1



[PATCH v3 07/10] powerpc/mm: Convert to default topdown mmap layout

2021-11-29 Thread Christophe Leroy
Select CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT and
remove arch/powerpc/mm/mmap.c

This change provides standard randomisation of mmaps.

See commit 8b8addf891de ("x86/mm/32: Enable full randomization on i386
and X86_32") for all the benefits of mmap randomisation.

Signed-off-by: Christophe Leroy 
---
v2: Also remove selection of ARCH_HAS_ELF_RANDOMIZE as it is already selected 
by CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
---
 arch/powerpc/Kconfig |   2 +-
 arch/powerpc/include/asm/processor.h |   2 -
 arch/powerpc/mm/Makefile |   2 +-
 arch/powerpc/mm/mmap.c   | 105 ---
 4 files changed, 2 insertions(+), 109 deletions(-)
 delete mode 100644 arch/powerpc/mm/mmap.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fb48823ccd62..20504a9901f2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -122,7 +122,6 @@ config PPC
select ARCH_HAS_DEBUG_WXif STRICT_KERNEL_RWX
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_MAP_DIRECT  if PPC_PSERIES
-   select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_HUGEPD  if HUGETLB_PAGE
@@ -158,6 +157,7 @@ config PPC
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS  if PPC_QUEUED_SPINLOCKS
select ARCH_USE_QUEUED_SPINLOCKSif PPC_QUEUED_SPINLOCKS
+   select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_LD_ORPHAN_WARN
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index e39bd0ff69f3..d906b14dd599 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -378,8 +378,6 @@ static inline void prefetchw(const void *x)
 
 #define spin_lock_prefetch(x)  prefetchw(x)
 
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
 /* asm stubs */
 extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
 extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index d4c20484dad9..503a6e249940 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
 
 ccflags-$(CONFIG_PPC64):= $(NO_MINIMAL_TOC)
 
-obj-y  := fault.o mem.o pgtable.o mmap.o maccess.o 
pageattr.o \
+obj-y  := fault.o mem.o pgtable.o maccess.o pageattr.o 
\
   init_$(BITS).o pgtable_$(BITS).o \
   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
   init-common.o mmu_context.o drmem.o \
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
deleted file mode 100644
index 5972d619d274..
--- a/arch/powerpc/mm/mmap.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  flexible mmap layout support
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * Started by Ingo Molnar 
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave at least a ~128 MB hole.
- */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-   if (current->personality & ADDR_COMPAT_LAYOUT)
-   return 1;
-
-   if (rlim_stack->rlim_cur == RLIM_INFINITY)
-   return 1;
-
-   return sysctl_legacy_va_layout;
-}
-
-unsigned long arch_mmap_rnd(void)
-{
-   unsigned long shift, rnd;
-
-   shift = mmap_rnd_bits;
-#ifdef CONFIG_COMPAT
-   if (is_32bit_task())
-   shift = mmap_rnd_compat_bits;
-#endif
-   rnd = get_random_long() % (1ul << shift);
-
-   return rnd << PAGE_SHIFT;
-}
-
-static inline unsigned long stack_maxrandom_size(void)
-{
-   if (!(current->flags & PF_RANDOMIZE))
-   return 0;
-
-   /* 8MB for 32bit, 1GB for 64bit */
-   if (is_32bit_task())
-   return (1<<23);
-   else
-   return (1<<30);
-}
-
-static inline unsigned long mmap_base(unsigned long rnd,
- struct rlimit *rlim_stack)
-{
-   unsigned long gap = rlim_stack->rlim_cur;
-   unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
-
-   /* Values close to RLIM_INFINITY can overflow. */
-   if (gap + pad > gap)
-   gap += pad;
-
-   if (gap < MIN_GAP)
-   gap = MIN_GAP;
-   else if (gap > MAX_GAP)
-   gap = MAX_GAP;
-
-   return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd);
-}
-
-/*
- * This function, called very early during the creation of a new
- * process VM image, 

Re: [PATCH v5 15/17] powerpc/64s: Make hash MMU support configurable

2021-11-29 Thread Fabiano Rosas
Nicholas Piggin  writes:

> This adds Kconfig selection which allows 64s hash MMU support to be
> disabled. It can be disabled if radix support is enabled, the minimum
> supported CPU type is POWER9 (or higher), and KVM is not selected.
>
> Signed-off-by: Nicholas Piggin 
> ---
>  arch/powerpc/Kconfig |  3 ++-
>  arch/powerpc/include/asm/mmu.h   | 16 +---
>  arch/powerpc/kernel/dt_cpu_ftrs.c| 14 ++
>  arch/powerpc/kvm/Kconfig |  1 +
>  arch/powerpc/mm/init_64.c| 13 +++--
>  arch/powerpc/platforms/Kconfig.cputype   | 23 +--
>  arch/powerpc/platforms/cell/Kconfig  |  1 +
>  arch/powerpc/platforms/maple/Kconfig |  1 +
>  arch/powerpc/platforms/microwatt/Kconfig |  2 +-
>  arch/powerpc/platforms/pasemi/Kconfig|  1 +
>  arch/powerpc/platforms/powermac/Kconfig  |  1 +
>  arch/powerpc/platforms/powernv/Kconfig   |  2 +-

powernv_defconfig brings CONFIG_CXL=m

../drivers/misc/cxl/main.c: In function ‘cxl_alloc_sst’:
  
../drivers/misc/cxl/main.c:127:45: error: ‘mmu_linear_psize’ undeclared (first 
use in this function); did you mean ‘mmu_virtual_psize’?
  
  127 |  sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 
50;
  | ^~~~
  
  | mmu_virtual_psize   
  
../drivers/misc/cxl/main.c:127:45: note: each undeclared identifier is reported 
only once for each function it appears in   
 
make[4]: *** [../scripts/Makefile.build:287: drivers/misc/cxl/main.o] Error 1 



Re: [PATCH] powerpc/ftrace: Handle large kernel configs

2021-11-29 Thread Christophe Leroy

Hi Naveen,

Le 16/10/2018 à 22:25, Naveen N. Rao a écrit :

Currently, we expect to be able to reach ftrace_caller() from all
ftrace-enabled functions through a single relative branch. With large
kernel configs, we see functions outside of 32MB of ftrace_caller()
causing ftrace_init() to bail.

In such configurations, gcc/ld emits two types of trampolines for mcount():
1. A long_branch, which has a single branch to mcount() for functions that
are one hop away from mcount():
c19e8544 <00031b56.long_branch._mcount>:
c19e8544:   4a 69 3f ac b   c007c4f0 
<._mcount>

2. A plt_branch, for functions that are farther away from mcount():
c51f33f8 <0008ba04.plt_branch._mcount>:
c51f33f8:   3d 82 ff a4 addis   r12,r2,-92
c51f33fc:   e9 8c 04 20 ld  r12,1056(r12)
c51f3400:   7d 89 03 a6 mtctr   r12
c51f3404:   4e 80 04 20 bctr

We can reuse those trampolines for ftrace if we can have those
trampolines go to ftrace_caller() instead. However, with ABIv2, we
cannot depend on r2 being valid. As such, we use only the long_branch
trampolines by patching those to instead branch to ftrace_caller or
ftrace_regs_caller.

In addition, we add additional trampolines around .text and .init.text
to catch locations that are covered by the plt branches. This allows
ftrace to work with most large kernel configurations.

For now, we always patch the trampolines to go to ftrace_regs_caller,
which is slightly inefficient. This can be optimized further at a later
point.

Signed-off-by: Naveen N. Rao 
---
Since RFC:
- Change to patch long_branch to go to ftrace_caller, rather than
   patching mcount()
- Stop using plt_branch since it can't be relied on for ABIv2
- Add trampolines around .text and .init.text to catch remaining
   locations

- Naveen

  arch/powerpc/kernel/trace/ftrace.c| 261 +-
  arch/powerpc/kernel/trace/ftrace_64.S |  12 ++
  arch/powerpc/kernel/vmlinux.lds.S |  13 +-
  3 files changed, 281 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 4bfbb54dee51..4bf051d3e21e 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c


...


+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+   int i, op;
+   unsigned long ptr;
+   static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
+
+   /* Is this a known long jump tramp? */
+   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+   if (!ftrace_tramps[i])
+   break;
+   else if (ftrace_tramps[i] == tramp)
+   return 0;
+
+   /* Is this a known plt tramp? */
+   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+   if (!ftrace_plt_tramps[i])
+   break;
+   else if (ftrace_plt_tramps[i] == tramp)
+   return -1;


I don't understand how this is supposed to work.
ftrace_plt_tramps[] being a static table, it is set to 0s at startup.
So the above loop breaks at first round.

Then ftrace_plt_tramps[i] is never/nowhere set.

So I just see it as useless.

Am I missing something ?

Thanks
Christophe


+
+   /* New trampoline -- read where this goes */
+   if (probe_kernel_read(, (void *)tramp, sizeof(int))) {
+   pr_debug("Fetching opcode failed.\n");
+   return -1;
+   }
+
+   /* Is this a 24 bit branch? */
+   if (!is_b_op(op)) {
+   pr_debug("Trampoline is not a long branch tramp.\n");
+   return -1;
+   }
+
+   /* lets find where the pointer goes */
+   ptr = find_bl_target(tramp, op);
+
+   if (ptr != ppc_global_function_entry((void *)_mcount)) {
+   pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+   return -1;
+   }
+
+   /* Let's re-write the tramp to go to ftrace_[regs_]caller */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+   ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+   ptr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+   if (!create_branch((void *)tramp, ptr, 0)) {
+   pr_debug("%ps is not reachable from existing mcount tramp\n",
+   (void *)ptr);
+   return -1;
+   }
+
+   if (patch_branch((unsigned int *)tramp, ptr, 0)) {
+   pr_debug("REL24 out of range!\n");
+   return -1;
+   }
+
+   if (add_ftrace_tramp(tramp)) {
+   pr_debug("No tramp locations left\n");
+   return -1;
+   }

Re: [PATCH v2 28/45] mfd: rn5t618: Use devm_register_power_handler()

2021-11-29 Thread Dmitry Osipenko
29.11.2021 14:55, Lee Jones пишет:
> On Thu, 28 Oct 2021, Dmitry Osipenko wrote:
> 
>> Use devm_register_power_handler() that replaces global pm_power_off
>> variable and allows to register multiple power-off handlers. It also
>> provides restart-handler support, i.e. all in one API.
>>
>> Signed-off-by: Dmitry Osipenko 
>> ---
>>  drivers/mfd/rn5t618.c | 56 ---
>>  1 file changed, 21 insertions(+), 35 deletions(-)
> 
> For my own reference (apply this as-is to your sign-off block):
> 
>   Acked-for-MFD-by: Lee Jones 
> 

Thanks you. This and other driver patches will be slightly changed
because the power-handler was renamed to sys-off handler starting with
the v3 of this series, but yours ack still will be valid here.


Re: [PATCH v2 28/45] mfd: rn5t618: Use devm_register_power_handler()

2021-11-29 Thread Lee Jones
On Thu, 28 Oct 2021, Dmitry Osipenko wrote:

> Use devm_register_power_handler() that replaces global pm_power_off
> variable and allows to register multiple power-off handlers. It also
> provides restart-handler support, i.e. all in one API.
> 
> Signed-off-by: Dmitry Osipenko 
> ---
>  drivers/mfd/rn5t618.c | 56 ---
>  1 file changed, 21 insertions(+), 35 deletions(-)

For my own reference (apply this as-is to your sign-off block):

  Acked-for-MFD-by: Lee Jones 

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog


Re: [PATCH v4 08/25] kernel: Add combined power-off+restart handler call chain API

2021-11-29 Thread Dmitry Osipenko
29.11.2021 03:36, Michał Mirosław пишет:
> On Mon, Nov 29, 2021 at 12:53:51AM +0300, Dmitry Osipenko wrote:
>> 29.11.2021 00:17, Michał Mirosław пишет:
 I'm having trouble with parsing this comment. Could you please try to
 rephrase it? I don't see how you could check whether power-off handler
 is available if you'll mix all handlers together.
>>> If notify_call_chain() would be fixed to return NOTIFY_OK if any call
>>> returned NOTIFY_OK, then this would be a clear way to gather the
>>> answer if any of the handlers will attempt the final action (reboot or
>>> power off).
>> Could you please show a code snippet that implements your suggestion?
> 
> A rough idea is this:
> 
>  static int notifier_call_chain(struct notifier_block **nl,
>  unsigned long val, void *v,
>  int nr_to_call, int *nr_calls)
>  {
> - int ret = NOTIFY_DONE;
> + int ret, result = NOTIFY_DONE;
>   struct notifier_block *nb, *next_nb;
>  
>   nb = rcu_dereference_raw(*nl);
>  
>   while (nb && nr_to_call) {
> ...
>   ret = nb->notifier_call(nb, val, v);
> +
> + /* Assuming NOTIFY_STOP-carrying return is always greater than 
> non-stopping one. */
> + if (result < ret)
> + result = ret;
> ... 
>   }
> - return ret;
> + return result;
>  }
> 
> Then:
> 
> bool prepare_reboot()
> {
>   int ret = xx_notifier_call_chain(_notifier, PREPARE_REBOOT, 
> ...);
>   return ret == NOTIFY_OK;
> }
> 
> And the return value would signify whether the reboot will be attempted
> when calling the chain for the REBOOT action. (Analogously for powering off.)

If you started to execute call chain, then you began the power-off /
restart sequence, this is a point of no return. Sorry, I still don't
understand what you're trying to achieve.

The approach of having separate call chains is simple and intuitive, I
don't see reasons to change it.


[PATCH v4 2/5] powerpc/inst: Define ppc_inst_t

2021-11-29 Thread Christophe Leroy
In order to stop using 'struct ppc_inst' on PPC32,
define a ppc_inst_t typedef.

Signed-off-by: Christophe Leroy 
---
v3: Rebased and resolved conflicts

v2: Anonymise the structure so that only the typedef can be used
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/code-patching.h  | 18 +++
 arch/powerpc/include/asm/hw_breakpoint.h  |  4 +-
 arch/powerpc/include/asm/inst.h   | 36 ++---
 arch/powerpc/include/asm/sstep.h  |  4 +-
 arch/powerpc/kernel/align.c   |  4 +-
 arch/powerpc/kernel/epapr_paravirt.c  |  2 +-
 arch/powerpc/kernel/hw_breakpoint.c   |  4 +-
 .../kernel/hw_breakpoint_constraints.c|  4 +-
 arch/powerpc/kernel/kprobes.c |  4 +-
 arch/powerpc/kernel/mce_power.c   |  2 +-
 arch/powerpc/kernel/optprobes.c   |  4 +-
 arch/powerpc/kernel/process.c |  2 +-
 arch/powerpc/kernel/setup_32.c|  2 +-
 arch/powerpc/kernel/trace/ftrace.c| 54 +--
 arch/powerpc/kernel/vecemu.c  |  2 +-
 arch/powerpc/lib/code-patching.c  | 38 ++---
 arch/powerpc/lib/feature-fixups.c |  4 +-
 arch/powerpc/lib/sstep.c  |  4 +-
 arch/powerpc/lib/test_emulate_step.c  | 10 ++--
 arch/powerpc/mm/maccess.c |  2 +-
 arch/powerpc/perf/8xx-pmu.c   |  2 +-
 arch/powerpc/xmon/xmon.c  | 14 ++---
 arch/powerpc/xmon/xmon_bpts.h |  4 +-
 23 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 4ba834599c4d..46e8c5a8ce51 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -24,20 +24,20 @@
 
 bool is_offset_in_branch_range(long offset);
 bool is_offset_in_cond_branch_range(long offset);
-int create_branch(struct ppc_inst *instr, const u32 *addr,
+int create_branch(ppc_inst_t *instr, const u32 *addr,
  unsigned long target, int flags);
-int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
   unsigned long target, int flags);
 int patch_branch(u32 *addr, unsigned long target, int flags);
-int patch_instruction(u32 *addr, struct ppc_inst instr);
-int raw_patch_instruction(u32 *addr, struct ppc_inst instr);
+int patch_instruction(u32 *addr, ppc_inst_t instr);
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
 
 static inline unsigned long patch_site_addr(s32 *site)
 {
return (unsigned long)site + *site;
 }
 
-static inline int patch_instruction_site(s32 *site, struct ppc_inst instr)
+static inline int patch_instruction_site(s32 *site, ppc_inst_t instr)
 {
return patch_instruction((u32 *)patch_site_addr(site), instr);
 }
@@ -58,11 +58,11 @@ static inline int modify_instruction_site(s32 *site, 
unsigned int clr, unsigned
return modify_instruction((unsigned int *)patch_site_addr(site), clr, 
set);
 }
 
-int instr_is_relative_branch(struct ppc_inst instr);
-int instr_is_relative_link_branch(struct ppc_inst instr);
+int instr_is_relative_branch(ppc_inst_t instr);
+int instr_is_relative_link_branch(ppc_inst_t instr);
 unsigned long branch_target(const u32 *instr);
-int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src);
-extern bool is_conditional_branch(struct ppc_inst instr);
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src);
+bool is_conditional_branch(ppc_inst_t instr);
 #ifdef CONFIG_PPC_BOOK3E_64
 void __patch_exception(int exc, unsigned long addr);
 #define patch_exception(exc, name) do { \
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index abebfbee5b1c..88053d3c68e6 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -56,11 +56,11 @@ static inline int nr_wp_slots(void)
return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1;
 }
 
-bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
  unsigned long ea, int type, int size,
  struct arch_hw_breakpoint *info);
 
-void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
 int *type, int *size, unsigned long *ea);
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index fea4d46155a9..055de1fa5d46 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -8,7 +8,7 @@
 ({ \
long __gui_ret; 

[PATCH v4 4/5] powerpc/inst: Move ppc_inst_t definition in asm/reg.h

2021-11-29 Thread Christophe Leroy
Because of circular inclusion of asm/hw_breakpoint.h, we
need to move definition of asm/reg.h outside of inst.h
so that asm/hw_breakpoint.h gets it without including
asm/inst.h

Also remove asm/inst.h from asm/uprobes.h as it's not
needed anymore.

Signed-off-by: Christophe Leroy 
---
v4: New to support inlining of copy_inst_from_kernel_nofault() in following 
patch.
---
 arch/powerpc/include/asm/hw_breakpoint.h |  1 -
 arch/powerpc/include/asm/inst.h  | 10 +-
 arch/powerpc/include/asm/reg.h   | 12 
 arch/powerpc/include/asm/uprobes.h   |  1 -
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 88053d3c68e6..84d39fd42f71 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -10,7 +10,6 @@
 #define _PPC_BOOK3S_64_HW_BREAKPOINT_H
 
 #include 
-#include 
 
 #ifdef __KERNEL__
 struct arch_hw_breakpoint {
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 5c503816ebc0..86074e83d2a5 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -3,6 +3,7 @@
 #define _ASM_POWERPC_INST_H
 
 #include 
+#include 
 
 #define ___get_user_instr(gu_op, dest, ptr)\
 ({ \
@@ -35,13 +36,6 @@
  */
 
 #if defined(CONFIG_PPC64) || defined(__CHECKER__)
-typedef struct {
-   u32 val;
-#ifdef CONFIG_PPC64
-   u32 suffix;
-#endif
-} __packed ppc_inst_t;
-
 static inline u32 ppc_inst_val(ppc_inst_t x)
 {
return x.val;
@@ -50,8 +44,6 @@ static inline u32 ppc_inst_val(ppc_inst_t x)
 #define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
 
 #else
-typedef u32 ppc_inst_t;
-
 static inline u32 ppc_inst_val(ppc_inst_t x)
 {
return x;
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e9d27265253b..85501181f929 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1366,6 +1366,18 @@
 
 /* Macros for setting and retrieving special purpose registers */
 #ifndef __ASSEMBLY__
+
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
+typedef struct {
+   u32 val;
+#ifdef CONFIG_PPC64
+   u32 suffix;
+#endif
+} __packed ppc_inst_t;
+#else
+typedef u32 ppc_inst_t;
+#endif
+
 #define mfmsr()({unsigned long rval; \
asm volatile("mfmsr %0" : "=r" (rval) : \
: "memory"); rval;})
diff --git a/arch/powerpc/include/asm/uprobes.h 
b/arch/powerpc/include/asm/uprobes.h
index fe683371336f..a7ae1860115a 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -11,7 +11,6 @@
 
 #include 
 #include 
-#include 
 
 typedef ppc_opcode_t uprobe_opcode_t;
 
-- 
2.33.1



[PATCH v4 5/5] powerpc/inst: Optimise copy_inst_from_kernel_nofault()

2021-11-29 Thread Christophe Leroy
copy_inst_from_kernel_nofault() uses copy_from_kernel_nofault() to
copy one or two 32bits words. This means calling an out-of-line
function which itself calls back copy_from_kernel_nofault_allowed()
then performs a generic copy with loops.

Rewrite copy_inst_from_kernel_nofault() to do everything at a
single place and use __get_kernel_nofault() directly to perform
single accesses without loops.

Allthough the generic function uses pagefault_disable(), it is not
required on powerpc because do_page_fault() bails earlier when a
kernel mode fault happens on a kernel address.

As the function has now become very small, inline it.

With this change, on an 8xx the time spent in the loop in
ftrace_replace_code() is reduced by 23% at function tracer activation
and 27% at nop tracer activation.
The overall time to activate function tracer (measured with shell
command 'time') is 570ms before the patch and 470ms after the patch.

Even vmlinux size is reduced (by 152 instruction).

Before the patch:

0018 :
  18:   94 21 ff e0 stwur1,-32(r1)
  1c:   7c 08 02 a6 mflrr0
  20:   38 a0 00 04 li  r5,4
  24:   93 e1 00 1c stw r31,28(r1)
  28:   7c 7f 1b 78 mr  r31,r3
  2c:   38 61 00 08 addir3,r1,8
  30:   90 01 00 24 stw r0,36(r1)
  34:   48 00 00 01 bl  34 
34: R_PPC_REL24 copy_from_kernel_nofault
  38:   2c 03 00 00 cmpwi   r3,0
  3c:   40 82 00 0c bne 48 
  40:   81 21 00 08 lwz r9,8(r1)
  44:   91 3f 00 00 stw r9,0(r31)
  48:   80 01 00 24 lwz r0,36(r1)
  4c:   83 e1 00 1c lwz r31,28(r1)
  50:   38 21 00 20 addir1,r1,32
  54:   7c 08 03 a6 mtlrr0
  58:   4e 80 00 20 blr

After the patch (before inlining):

0018 :
  18:   3d 20 b0 00 lis r9,-20480
  1c:   7c 04 48 40 cmplw   r4,r9
  20:   7c 69 1b 78 mr  r9,r3
  24:   41 80 00 14 blt 38 
  28:   81 44 00 00 lwz r10,0(r4)
  2c:   38 60 00 00 li  r3,0
  30:   91 49 00 00 stw r10,0(r9)
  34:   4e 80 00 20 blr

  38:   38 60 ff de li  r3,-34
  3c:   4e 80 00 20 blr
  40:   38 60 ff f2 li  r3,-14
  44:   4e 80 00 20 blr

Signed-off-by: Christophe Leroy 
---
v4: Inline and remove pagefault_disable()

v3: New
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/inst.h | 21 -
 arch/powerpc/mm/maccess.c   | 17 -
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 86074e83d2a5..0aa811ff44d5 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -4,6 +4,8 @@
 
 #include 
 #include 
+#include 
+#include 
 
 #define ___get_user_instr(gu_op, dest, ptr)\
 ({ \
@@ -148,6 +150,23 @@ static inline char *__ppc_inst_as_str(char 
str[PPC_INST_STR_LEN], ppc_inst_t x)
__str;  \
 })
 
-int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src);
+static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
+{
+   unsigned int val, suffix;
+
+   if (unlikely(!is_kernel_addr((unsigned long)src)))
+   return -ERANGE;
+
+   __get_kernel_nofault(, src, u32, Efault);
+   if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
+   __get_kernel_nofault(, src + 1, u32, Efault);
+   *inst = ppc_inst_prefix(val, suffix);
+   } else {
+   *inst = ppc_inst(val);
+   }
+   return 0;
+Efault:
+   return -EFAULT;
+}
 
 #endif /* _ASM_POWERPC_INST_H */
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
index 5abae96b2b46..ea821d0ffe16 100644
--- a/arch/powerpc/mm/maccess.c
+++ b/arch/powerpc/mm/maccess.c
@@ -11,20 +11,3 @@ bool copy_from_kernel_nofault_allowed(const void 
*unsafe_src, size_t size)
 {
return is_kernel_addr((unsigned long)unsafe_src);
 }
-
-int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
-{
-   unsigned int val, suffix;
-   int err;
-
-   err = copy_from_kernel_nofault(, src, sizeof(val));
-   if (err)
-   return err;
-   if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
-   err = copy_from_kernel_nofault(, src + 1, 
sizeof(suffix));
-   *inst = ppc_inst_prefix(val, suffix);
-   } else {
-   *inst = ppc_inst(val);
-   }
-   return err;
-}
-- 
2.33.1



[PATCH v4 3/5] powerpc/inst: Define ppc_inst_t as u32 on PPC32

2021-11-29 Thread Christophe Leroy
Unlike PPC64 ABI, PPC32 uses the stack to pass a parameter defined
as a struct, even when the struct has a single simple element.

To avoid that, define ppc_inst_t as u32 on PPC32.

Keep it as 'struct ppc_inst' when __CHECKER__ is defined so that
sparse can perform type checking.

Also revert commit 511eea5e2ccd ("powerpc/kprobes: Fix Oops by passing
ppc_inst as a pointer to emulate_step() on ppc32") as now the
instruction to be emulated is passed as a register to emulate_step().

Signed-off-by: Christophe Leroy 
---
v2: Make it work with kprobes
---
 arch/powerpc/include/asm/inst.h | 15 +--
 arch/powerpc/kernel/optprobes.c |  8 ++--
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 055de1fa5d46..5c503816ebc0 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -34,6 +34,7 @@
  * Instruction data type for POWER
  */
 
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
 typedef struct {
u32 val;
 #ifdef CONFIG_PPC64
@@ -46,13 +47,23 @@ static inline u32 ppc_inst_val(ppc_inst_t x)
return x.val;
 }
 
+#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
+
+#else
+typedef u32 ppc_inst_t;
+
+static inline u32 ppc_inst_val(ppc_inst_t x)
+{
+   return x;
+}
+#define ppc_inst(x) (x)
+#endif
+
 static inline int ppc_inst_primary_opcode(ppc_inst_t x)
 {
return ppc_inst_val(x) >> 26;
 }
 
-#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
-
 #ifdef CONFIG_PPC64
 #define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) })
 
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 378db980ded3..3b1c2236cbee 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe 
*op, struct kprobe *p)
/*
 * 3. load instruction to be emulated into relevant register, and
 */
-   if (IS_ENABLED(CONFIG_PPC64)) {
-   temp = ppc_inst_read(p->ainsn.insn);
-   patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + 
TMPL_INSN_IDX);
-   } else {
-   patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + 
TMPL_INSN_IDX);
-   }
+   temp = ppc_inst_read(p->ainsn.insn);
+   patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
 
/*
 * 4. branch back from trampoline
-- 
2.33.1



[PATCH v4 1/5] powerpc/inst: Refactor ___get_user_instr()

2021-11-29 Thread Christophe Leroy
PPC64 version of ___get_user_instr() can be used for PPC32 as well,
by simply disabling the suffix part with IS_ENABLED(CONFIG_PPC64).

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/inst.h | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index b11c0e2f9639..fea4d46155a9 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -4,8 +4,6 @@
 
 #include 
 
-#ifdef CONFIG_PPC64
-
 #define ___get_user_instr(gu_op, dest, ptr)\
 ({ \
long __gui_ret; \
@@ -16,7 +14,7 @@
__chk_user_ptr(ptr);\
__gui_ret = gu_op(__prefix, __gui_ptr); \
if (__gui_ret == 0) {   \
-   if ((__prefix >> 26) == OP_PREFIX) {\
+   if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) 
{ \
__gui_ret = gu_op(__suffix, __gui_ptr + 1); \
__gui_inst = ppc_inst_prefix(__prefix, __suffix); \
} else {\
@@ -27,13 +25,6 @@
}   \
__gui_ret;  \
 })
-#else /* !CONFIG_PPC64 */
-#define ___get_user_instr(gu_op, dest, ptr)\
-({ \
-   __chk_user_ptr(ptr);\
-   gu_op((dest).val, (u32 __user *)(ptr)); \
-})
-#endif /* CONFIG_PPC64 */
 
 #define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
 
-- 
2.33.1



Re: [PATCH v4 05/25] reboot: Warn if restart handler has duplicated priority

2021-11-29 Thread Dmitry Osipenko
29.11.2021 03:26, Michał Mirosław пишет:
> On Mon, Nov 29, 2021 at 12:06:19AM +0300, Dmitry Osipenko wrote:
>> 28.11.2021 03:28, Michał Mirosław пишет:
>>> On Fri, Nov 26, 2021 at 09:00:41PM +0300, Dmitry Osipenko wrote:
 Add sanity check which ensures that there are no two restart handlers
 registered with the same priority. Normally it's a direct sign of a
 problem if two handlers use the same priority.
>>>
>>> The patch doesn't ensure the property that there are no duplicated-priority
>>> entries on the chain.
>>
>> It's not the exact point of this patch.
>>
>>> I'd rather see a atomic_notifier_chain_register_unique() that returns
>>> -EBUSY or something istead of adding an entry with duplicate priority.
>>> That way it would need only one list traversal unless you want to
>>> register the duplicate anyway (then you would call the older
>>> atomic_notifier_chain_register() after reporting the error).
>>
>> The point of this patch is to warn developers about the problem that
>> needs to be fixed. We already have such troubling drivers in mainline.
>>
>> It's not critical to register different handlers with a duplicated
>> priorities, but such cases really need to be corrected. We shouldn't
>> break users' machines during transition to the new API, meanwhile
>> developers should take action of fixing theirs drivers.
>>
>>> (Or you could return > 0 when a duplicate is registered in
>>> atomic_notifier_chain_register() if the callers are prepared
>>> for that. I don't really like this way, though.)
>>
>> I had a similar thought at some point before and decided that I'm not in
>> favor of this approach. It's nicer to have a dedicated function that
>> verifies the uniqueness, IMO.
> 
> I don't like the part that it traverses the list second time to check
> the uniqueness. But actually you could avoid that if
> notifier_chain_register() would always add equal-priority entries in
> reverse order:
> 
>  static int notifier_chain_register(struct notifier_block **nl,
>   struct notifier_block *n)
>  {
>   while ((*nl) != NULL) {
>   if (unlikely((*nl) == n)) {
>   WARN(1, "double register detected");
>   return 0;
>   }
> - if (n->priority > (*nl)->priority)
> + if (n->priority >= (*nl)->priority)
>   break;
>   nl = &((*nl)->next);
>   }
>   n->next = *nl;
>   rcu_assign_pointer(*nl, n);
>   return 0;
>  }
> 
> Then the check for uniqueness after adding would be:
> 
>  WARN(nb->next && nb->priority == nb->next->priority);

We can't just change the registration order because invocation order of
the call chain depends on the registration order and some of current
users may rely on that order. I'm pretty sure that changing the order
will have unfortunate consequences.


Re: [linux-next] Read-only file system after boot (powerpc)

2021-11-29 Thread Sachin Sant


> On 29-Nov-2021, at 3:29 PM, Christoph Hellwig  wrote:
> 
> Can you check if your tree already includes this commit:
> 
> https://git.kernel.dk/cgit/linux-block/commit/?h=for-5.17/block=3f39d47d7ad858c024bd777f5f2a86fa7f6a9f14
> 
> and if not see if that fixes the problem?

Thanks. Yes, this patch fixes the problem for me.

-Sachin



Re: [patch 09/22] MIPS: Octeon: Use arch_setup_msi_irq()

2021-11-29 Thread Thomas Bogendoerfer
On Sat, Nov 27, 2021 at 02:18:48AM +0100, Thomas Gleixner wrote:
> The core code provides the same loop code except for the MSI-X reject. Move
> that to arch_setup_msi_irq() and remove the duplicated code.
> 
> No functional change.
> 
> Signed-off-by: Thomas Gleixner 
> Cc: Thomas Bogendoerfer 
> Cc: linux-m...@vger.kernel.org
> ---
>  arch/mips/pci/msi-octeon.c |   32 +++-
>  1 file changed, 3 insertions(+), 29 deletions(-)
> 
> --- a/arch/mips/pci/msi-octeon.c
> +++ b/arch/mips/pci/msi-octeon.c
> @@ -68,6 +68,9 @@ int arch_setup_msi_irq(struct pci_dev *d
>   u64 search_mask;
>   int index;
>  
> + if (desc->pci.msi_attrib.is_msix)
> + return -EINVAL;
> +
>   /*
>* Read the MSI config to figure out how many IRQs this device
>* wants.  Most devices only want 1, which will give
> @@ -182,35 +185,6 @@ int arch_setup_msi_irq(struct pci_dev *d
>   return 0;
>  }
>  
> -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
> -{
> - struct msi_desc *entry;
> - int ret;
> -
> - /*
> -  * MSI-X is not supported.
> -  */
> - if (type == PCI_CAP_ID_MSIX)
> - return -EINVAL;
> -
> - /*
> -  * If an architecture wants to support multiple MSI, it needs to
> -  * override arch_setup_msi_irqs()
> -  */
> - if (type == PCI_CAP_ID_MSI && nvec > 1)
> - return 1;
> -
> - for_each_pci_msi_entry(entry, dev) {
> - ret = arch_setup_msi_irq(dev, entry);
> - if (ret < 0)
> - return ret;
> - if (ret > 0)
> - return -ENOSPC;
> - }
> -
> - return 0;
> -}
> -
>  /**
>   * Called when a device no longer needs its MSI interrupts. All
>   * MSI interrupts for the device are freed.

Acked-by: Thomas Bogendoerfer 

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.[ RFC1925, 2.3 ]


Re: [linux-next] Read-only file system after boot (powerpc)

2021-11-29 Thread Christoph Hellwig
Can you check if your tree already includes this commit:

https://git.kernel.dk/cgit/linux-block/commit/?h=for-5.17/block=3f39d47d7ad858c024bd777f5f2a86fa7f6a9f14

and if not see if that fixes the problem?


Re: [patch 00/22] genirq/msi, PCI/MSI: Spring cleaning - Part 1

2021-11-29 Thread Cédric Le Goater

On 11/27/21 02:18, Thomas Gleixner wrote:

The [PCI] MSI code has gained quite some warts over time. A recent
discussion unearthed a shortcoming: the lack of support for expanding
PCI/MSI-X vectors after initialization of MSI-X.

PCI/MSI-X has no requirement to setup all vectors when MSI-X is enabled in
the device. The non-used vectors have just to be masked in the vector
table. For PCI/MSI this is not possible because the number of vectors
cannot be changed after initialization.

The PCI/MSI code, but also the core MSI irq domain code are built around
the assumption that all required vectors are installed at initialization
time and freed when the device is shut down by the driver.

Supporting dynamic expansion at least for MSI-X is important for VFIO so
that the host side interrupts for passthrough devices can be installed on
demand.

This is the first part of a large (total 101 patches) series which
refactors the [PCI]MSI infrastructure to make runtime expansion of MSI-X
vectors possible. The last part (10 patches) provide this functionality.

The first part is mostly a cleanup which consolidates code, moves the PCI
MSI code into a separate directory and splits it up into several parts.

No functional change intended except for patch 2/N which changes the
behaviour of pci_get_vector()/affinity() to get rid of the assumption that
the provided index is the "index" into the descriptor list instead of using
it as the actual MSI[X] index as seen by the hardware. This would break
users of sparse allocated MSI-X entries, but non of them use these
functions.

This series is based on 5.16-rc2 and also available via git:

  git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git msi-v1-part-1

For the curious who can't wait for the next part to arrive the full series
is available via:

  git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git msi-v1-part-4


After fixing the compile failures, I didn't see any regressions on
these platforms :

  PowerNV, pSeries under KVM and PowerVM, using POWER8/9 processors.

Thanks,

C.


Thanks,

tglx
---
  arch/powerpc/platforms/4xx/msi.c|  281 
  b/Documentation/driver-api/pci/pci.rst  |2
  b/arch/mips/pci/msi-octeon.c|   32 -
  b/arch/powerpc/platforms/4xx/Makefile   |1
  b/arch/powerpc/platforms/cell/axon_msi.c|2
  b/arch/powerpc/platforms/powernv/pci-ioda.c |4
  b/arch/powerpc/platforms/pseries/msi.c  |6
  b/arch/powerpc/sysdev/Kconfig   |6
  b/arch/s390/pci/pci_irq.c   |4
  b/arch/sparc/kernel/pci_msi.c   |4
  b/arch/x86/hyperv/irqdomain.c   |   55 --
  b/arch/x86/include/asm/x86_init.h   |6
  b/arch/x86/include/asm/xen/hypervisor.h |8
  b/arch/x86/kernel/apic/msi.c|8
  b/arch/x86/kernel/x86_init.c|   12
  b/arch/x86/pci/xen.c|   19
  b/drivers/irqchip/irq-gic-v2m.c |1
  b/drivers/irqchip/irq-gic-v3-its-pci-msi.c  |1
  b/drivers/irqchip/irq-gic-v3-mbi.c  |1
  b/drivers/net/wireless/ath/ath11k/pci.c |2
  b/drivers/pci/Makefile  |3
  b/drivers/pci/msi/Makefile  |7
  b/drivers/pci/msi/irqdomain.c   |  267 +++
  b/drivers/pci/msi/legacy.c  |   79 +++
  b/drivers/pci/msi/msi.c |  645 

  b/drivers/pci/msi/msi.h |   39 +
  b/drivers/pci/msi/pcidev_msi.c  |   43 +
  b/drivers/pci/pci-sysfs.c   |7
  b/drivers/pci/xen-pcifront.c|2
  b/include/linux/msi.h   |  135 ++---
  b/include/linux/pci.h   |1
  b/kernel/irq/msi.c  |   41 +
  32 files changed, 696 insertions(+), 1028 deletions(-)





Re: [patch 10/22] genirq/msi, treewide: Use a named struct for PCI/MSI attributes

2021-11-29 Thread Kalle Valo
Thomas Gleixner  writes:

> The unnamed struct sucks and is in the way of further cleanups. Stick the
> PCI related MSI data into a real data structure and cleanup all users.
>
> No functional change.
>
> Signed-off-by: Thomas Gleixner 
> Cc: Greg Kroah-Hartman 
> Cc: sparcli...@vger.kernel.org
> Cc: x...@kernel.org
> Cc: xen-de...@lists.xenproject.org
> Cc: ath...@lists.infradead.org
> ---
>  arch/powerpc/platforms/cell/axon_msi.c|2 
>  arch/powerpc/platforms/powernv/pci-ioda.c |4 -
>  arch/powerpc/platforms/pseries/msi.c  |6 -
>  arch/sparc/kernel/pci_msi.c   |4 -
>  arch/x86/kernel/apic/msi.c|2 
>  arch/x86/pci/xen.c|6 -
>  drivers/net/wireless/ath/ath11k/pci.c |2 

For ath11k:

Acked-by: Kalle Valo 

-- 
https://patchwork.kernel.org/project/linux-wireless/list/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches


Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key

2021-11-29 Thread Athira Rajeev



> On 28-Nov-2021, at 10:04 PM, Jiri Olsa  wrote:
> 
> On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
>> Sort key p_stage_cyc is used to present the latency
>> cycles spend in pipeline stages. perf tool has local
>> p_stage_cyc sort key to display this info. There is no
>> global variant available for this sort key. local variant
>> shows latency in a sinlge sample, whereas, global value
>> will be useful to present the total latency (sum of
>> latencies) in the hist entry. It represents latency
>> number multiplied by the number of samples.
>> 
>> Add global (p_stage_cyc) and local variant
>> (local_p_stage_cyc) for this sort key. Use the
>> local_p_stage_cyc as default option for "mem" sort mode.
>> Also add this to list of dynamic sort keys.
>> 
>> Signed-off-by: Athira Rajeev 
>> Reported-by: Namhyung Kim 
> 
> I can't apply this to Arnaldo's perf/core, could you please rebase?
> 
> patching file util/hist.c
> patching file util/hist.h
> patching file util/sort.c
> Hunk #3 FAILED at 1392.
> Hunk #4 succeeded at 1878 (offset 20 lines).
> 1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej
> patching file util/sort.h
> 
> thanks,
> jirka

Hi Jiri,

Thanks for checking this patch. 

Actually these changes are on top of three other fixes from Namhyung which are 
already part of upstream. Below are the commits.

784e8adda4cd ("perf sort: Fix the 'weight' sort key behavior”)
4d03c75363ee ("perf sort: Fix the 'ins_lat' sort key behavior”)
db4b28402909 ("perf sort: Fix the 'p_stage_cyc' sort key behavior”)

I checked in Arnaldo’s perf/core, but these commits are not there. But I could 
see them in 'tmp.perf/urgent'
I think perf/core is not yet updated.

Thanks
Athira Rajeev

> 
>> ---
>> tools/perf/util/hist.c |  4 +++-
>> tools/perf/util/hist.h |  3 ++-
>> tools/perf/util/sort.c | 34 +-
>> tools/perf/util/sort.h |  3 ++-
>> 4 files changed, 32 insertions(+), 12 deletions(-)
>> 
>> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
>> index b776465e04ef..0a8033b09e28 100644
>> --- a/tools/perf/util/hist.c
>> +++ b/tools/perf/util/hist.c
>> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct 
>> hist_entry *h)
>>  hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>>  hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>>  hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
>> -hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
>> +hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
>> +hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
>> +
>>  if (symbol_conf.nanosecs)
>>  hists__new_col_len(hists, HISTC_TIME, 16);
>>  else
>> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
>> index 5343b62476e6..2752ce681108 100644
>> --- a/tools/perf/util/hist.h
>> +++ b/tools/perf/util/hist.h
>> @@ -75,7 +75,8 @@ enum hist_column {
>>  HISTC_MEM_BLOCKED,
>>  HISTC_LOCAL_INS_LAT,
>>  HISTC_GLOBAL_INS_LAT,
>> -HISTC_P_STAGE_CYC,
>> +HISTC_LOCAL_P_STAGE_CYC,
>> +HISTC_GLOBAL_P_STAGE_CYC,
>>  HISTC_NR_COLS, /* Last entry */
>> };
>> 
>> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
>> index e9216a292a04..e978f7883e07 100644
>> --- a/tools/perf/util/sort.c
>> +++ b/tools/perf/util/sort.c
>> @@ -37,7 +37,7 @@ const char default_parent_pattern[] = 
>> "^sys_|^do_page_fault";
>> const char   *parent_pattern = default_parent_pattern;
>> const char   *default_sort_order = "comm,dso,symbol";
>> const char   default_branch_sort_order[] = 
>> "comm,dso_from,symbol_from,symbol_to,cycles";
>> -const char  default_mem_sort_order[] = 
>> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
>> +const char  default_mem_sort_order[] = 
>> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>> const char   default_top_sort_order[] = "dso,symbol";
>> const char   default_diff_sort_order[] = "dso,symbol";
>> const char   default_tracepoint_sort_order[] = "trace";
>> @@ -46,8 +46,8 @@ const char *field_order;
>> regex_t  ignore_callees_regex;
>> int  have_ignore_callees = 0;
>> enum sort_mode   sort__mode = SORT_MODE__NORMAL;
>> -const char  *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
>> -const char  *arch_specific_sort_keys[] = {"p_stage_cyc"};
>> +const char  *dynamic_headers[] = {"local_ins_lat", "ins_lat", 
>> "local_p_stage_cyc", "p_stage_cyc"};
>> +const char  *arch_specific_sort_keys[] = {"local_p_stage_cyc", 
>> "p_stage_cyc"};
>> 
>> /*
>>  * Replaces all occurrences of a char used with the:
>> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>> };
>> 
>> static int64_t
>> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry 
>> *right)
>> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>> {
>>  return left->p_stage_cyc - 

[PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.

2021-11-29 Thread Mahesh Salgaonkar
When certain PHB HW failure causes phyp to recover PHB, it marks the PE
state as temporarily unavailable until recovery is complete. This also
triggers an EEH handler in Linux which needs to notify drivers, and perform
recovery. But before notifying the driver about the pci error it uses
get_adapter_state()->get-sesnor-state() operation of the hotplug_slot to
determine if the slot contains a device or not. if the slot is empty, the
recovery is skipped entirely.

However on certain PHB failures, the rtas call get-sesnor-state() returns
extended busy error (9902) until PHB is recovered by phyp. Once PHB is
recovered, the get-sensor-state() returns success with correct presence
status. The rtas call interface rtas_get_sensor() loops over the rtas call
on extended delay return code (9902) until the return value is either
success (0) or error (-1). This causes the EEH handler to get stuck for ~6
seconds before it could notify that the pci error has been detected and
stop any active operations. Hence with running I/O traffic, during this 6
seconds, the network driver continues its operation and hits a timeout
(netdev watchdog). On timeouts, network driver go into ffdc capture mode
and reset path assuming the PCI device is in fatal condition. This
sometimes causes EEH recovery to fail. This impacts the ssh connection and
leads to the system being inaccessible.


[52732.244731] DEBUG: ibm_read_slot_reset_state2()
[52732.244762] DEBUG: ret = 0, rets[0]=5, rets[1]=1, rets[2]=4000, rets[3]=>
[52732.244798] DEBUG: in eeh_slot_presence_check
[52732.244804] DEBUG: error state check
[52732.244807] DEBUG: Is slot hotpluggable
[52732.244810] DEBUG: hotpluggable ops ?
[52732.244953] DEBUG: Calling ops->get_adapter_status
[52732.244958] DEBUG: calling rpaphp_get_sensor_state
[52736.564262] [ cut here ]
[52736.564299] NETDEV WATCHDOG: enP64p1s0f3 (tg3): transmit queue 0 timed o>
[52736.564324] WARNING: CPU: 1442 PID: 0 at net/sched/sch_generic.c:478 dev>
[...]
[52736.564505] NIP [c0c32368] dev_watchdog+0x438/0x440
[52736.564513] LR [c0c32364] dev_watchdog+0x434/0x440


Fix this issue by introducing a new rtas_get_sensor_nonblocking() that does
not get blocked on BUSY condition and returns immediately with error. Use
this function in pseries pci hotplug driver which can return an error if
slot presence state can not be detected immediately. Please note that only
in certain PHB failures, the slot presence check returns BUSY condition. In
normal cases it returns immediately with a correct presence state value.
Hence this change has no impact on normal pci dlpar operations.

We could use rtas_get_sensor_fast() variant, but it thorws WARN_ON on BUSY
condition. The rtas_get_sensor_nonblocking() suppresses WARN_ON.

Signed-off-by: Mahesh Salgaonkar 
---

This is an alternate approach to fix the EEH issue instead of delaying slot
presence check proposed at
https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-November/236956.html

Also refer:
https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-November/237027.html
---
 arch/powerpc/include/asm/rtas.h  |1 +
 arch/powerpc/kernel/rtas.c   |   19 ---
 drivers/pci/hotplug/rpaphp_pci.c |8 
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9dc97d2f9d27e..d8e8befb1c193 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -250,6 +250,7 @@ extern void rtas_os_term(char *str);
 void rtas_activate_firmware(void);
 extern int rtas_get_sensor(int sensor, int index, int *state);
 extern int rtas_get_sensor_fast(int sensor, int index, int *state);
+int rtas_get_sensor_nonblocking(int sensor, int index, int *state);
 extern int rtas_get_power_level(int powerdomain, int *level);
 extern int rtas_set_power_level(int powerdomain, int level, int *setlevel);
 extern bool rtas_indicator_present(int token, int *maxindex);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ac61e226c9af6..fd5aa3bbd46c5 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -609,7 +609,8 @@ int rtas_get_sensor(int sensor, int index, int *state)
 }
 EXPORT_SYMBOL(rtas_get_sensor);
 
-int rtas_get_sensor_fast(int sensor, int index, int *state)
+static int
+__rtas_get_sensor(int sensor, int index, int *state, bool warn_on)
 {
int token = rtas_token("get-sensor-state");
int rc;
@@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int 
*state)
return -ENOENT;
 
rc = rtas_call(token, 2, 2, state, sensor, index);
-   WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
-   rc <= RTAS_EXTENDED_DELAY_MAX));
+   WARN_ON(warn_on &&
+   (rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
+   rc <= RTAS_EXTENDED_DELAY_MAX)));
 
   

Re: [PATCH] powerpc/eeh: Delay slot presence check once driver is notified about the pci error.

2021-11-29 Thread Mahesh J Salgaonkar
On 2021-11-24 23:01:45 Wed, Oliver O'Halloran wrote:
> On Wed, Nov 24, 2021 at 12:05 AM Mahesh Salgaonkar  
> wrote:
> >
> > *snip*
> >
> > This causes the EEH handler to get stuck for ~6
> > seconds before it could notify that the pci error has been detected and
> > stop any active operations. Hence with running I/O traffic, during this 6
> > seconds, the network driver continues its operation and hits a timeout
> > (netdev watchdog).On timeouts, network driver go into ffdc capture mode
> > and reset path assuming the PCI device is in fatal condition. This causes
> > EEH recovery to fail and sometimes it leads to system hang or crash.
> 
> Whatever is causing that crash is the real issue IMO. PCI error

I have seen crash only once but that was triggered by HTX tool and may
not be related. However, the major concern here is EEH failure. I will
correct the above statement in my next patch.

> reporting is fundamentally asynchronous and the driver always has to
> tolerate some amount of latency between the error occuring and being
> reported. Six seconds is admittedly an eternity, but it should not
> cause a system crash under any circumstances. Printing a warning due
> to a timeout is annoying, but it's not the end of the world.

Yeah, but due to timeout sometimes the driver gets into a situation
where when EEH recovery kicks-in, the driver is unable to recover the
device. Thus EEH recovery fails and disconnects the pci device even when
it could have recovered. To recover, we need to either reboot the lpar
or re-assign the I/O adapter from HMC to get it back in working
condition.

[16532.212197] EEH: PCI-E AER 30:  
[16532.213207] EEH: Reset without hotplug activity
[16534.229469] bnx2x: [bnx2x_clean_tx_queue:1203(enP22p1s0f1)]timeout waiting 
for queue[2]: txdata->tx_pkt_prod(37003) != txdata->tx_pkt_cons(36996)
[16534.385484] EEH: Beginning: 'slot_reset'
[16534.385489] PCI 0016:01:00.0#1: EEH: Invoking bnx2x->slot_reset()
[16536.229469] bnx2x: [bnx2x_clean_tx_queue:1203(enP22p1s0f1)]timeout waiting 
for queue[4]: txdata->tx_pkt_prod(64894) != txdata->tx_pkt_cons(64891)
o[...]
[16623.571502] bnx2x: [bnx2x_nic_load_request:2342(enP22p1s0f1)]MCP response 
failure, aborting
[16623.571507] bnx2x: [bnx2x_acquire_hw_lock:2019(enP22p1s0f1)]lock_status 
0x  resource_bit 0x800
[16623.571881] bnx2x: [bnx2x_io_slot_reset:14359(enP22p1s0f0)]IO slot reset 
initializing...
[16623.571976] bnx2x 0016:01:00.0: enabling device (0140 -> 0142)
[16623.576169] bnx2x: [bnx2x_io_slot_reset:14375(enP22p1s0f0)]IO slot reset --> 
driver unload
[16623.576174] PCI 0016:01:00.0#1: EEH: bnx2x driver reports: 'disconnect'
[16623.576177] PCI 0016:01:00.1#1: EEH: Invoking bnx2x->slot_reset()
[16623.576179] bnx2x: [bnx2x_io_slot_reset:14359(enP22p1s0f1)]IO slot reset 
initializing...
[16623.576239] bnx2x 0016:01:00.1: enabling device (0140 -> 0142)
[16623.580241] bnx2x: [bnx2x_io_slot_reset:14375(enP22p1s0f1)]IO slot reset --> 
driver unload
[16623.580245] PCI 0016:01:00.1#1: EEH: bnx2x driver reports: 'disconnect'
[16623.580246] EEH: Finished:'slot_reset' with aggregate recovery 
state:'disconnect'
[16623.580250] EEH: Unable to recover from failure from PHB#16-PE#1.

Thanks,
-Mahesh.

-- 
Mahesh J Salgaonkar


Re: [patch 17/22] PCI/MSI: Split out !IRQDOMAIN code

2021-11-29 Thread Cédric Le Goater

On 11/27/21 02:19, Thomas Gleixner wrote:

Split out the non irqdomain code into its own file.

Signed-off-by: Thomas Gleixner 
---
  drivers/pci/msi/Makefile |5 ++--
  drivers/pci/msi/legacy.c |   51 
+++
  drivers/pci/msi/msi.c|   46 --
  3 files changed, 54 insertions(+), 48 deletions(-)

--- a/drivers/pci/msi/Makefile
+++ b/drivers/pci/msi/Makefile
@@ -1,5 +1,6 @@
  # SPDX-License-Identifier: GPL-2.0
  #
  # Makefile for the PCI/MSI
-obj-$(CONFIG_PCI)  += pcidev_msi.o
-obj-$(CONFIG_PCI_MSI)  += msi.o
+obj-$(CONFIG_PCI)  += pcidev_msi.o
+obj-$(CONFIG_PCI_MSI)  += msi.o
+obj-$(CONFIG_PCI_MSI_ARCH_FALLBACKS)   += legacy.o
--- /dev/null
+++ b/drivers/pci/msi/legacy.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Message Signaled Interrupt (MSI).
+ *
+ * Legacy architecture specific setup and teardown mechanism.
+ */
+#include "msi.h"



I am getting a :

../drivers/pci/msi/legacy.c:7:10: fatal error: msi.h: No such file or directory
7 | #include "msi.h"

which seems to be fixed later.

C.


+
+/* Arch hooks */
+int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
+   return -EINVAL;
+}
+
+void __weak arch_teardown_msi_irq(unsigned int irq)
+{
+}
+
+int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+   struct msi_desc *desc;
+   int ret;
+
+   /*
+* If an architecture wants to support multiple MSI, it needs to
+* override arch_setup_msi_irqs()
+*/
+   if (type == PCI_CAP_ID_MSI && nvec > 1)
+   return 1;
+
+   for_each_pci_msi_entry(desc, dev) {
+   ret = arch_setup_msi_irq(dev, desc);
+   if (ret)
+   return ret < 0 ? ret : -ENOSPC;
+   }
+
+   return 0;
+}
+
+void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
+{
+   struct msi_desc *desc;
+   int i;
+
+   for_each_pci_msi_entry(desc, dev) {
+   if (desc->irq) {
+   for (i = 0; i < entry->nvec_used; i++)
+   arch_teardown_msi_irq(desc->irq + i);
+   }
+   }
+}
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -50,52 +50,6 @@ static void pci_msi_teardown_msi_irqs(st
  #define pci_msi_teardown_msi_irqs arch_teardown_msi_irqs
  #endif
  
-#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS

-/* Arch hooks */
-int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
-   return -EINVAL;
-}
-
-void __weak arch_teardown_msi_irq(unsigned int irq)
-{
-}
-
-int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-   struct msi_desc *entry;
-   int ret;
-
-   /*
-* If an architecture wants to support multiple MSI, it needs to
-* override arch_setup_msi_irqs()
-*/
-   if (type == PCI_CAP_ID_MSI && nvec > 1)
-   return 1;
-
-   for_each_pci_msi_entry(entry, dev) {
-   ret = arch_setup_msi_irq(dev, entry);
-   if (ret < 0)
-   return ret;
-   if (ret > 0)
-   return -ENOSPC;
-   }
-
-   return 0;
-}
-
-void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
-{
-   int i;
-   struct msi_desc *entry;
-
-   for_each_pci_msi_entry(entry, dev)
-   if (entry->irq)
-   for (i = 0; i < entry->nvec_used; i++)
-   arch_teardown_msi_irq(entry->irq + i);
-}
-#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
-
  /*
   * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
   * mask all MSI interrupts by clearing the MSI enable bit does not work