Re: [PATCH] powerpc/pseries/vas: Don't print an error when VAS is unavailable
On Tue, 2021-11-30 at 10:25 +1100, Michael Ellerman wrote: > Nicholas Piggin writes: > > Excerpts from Cédric Le Goater's message of November 26, 2021 5:13 > > pm: > > > On 11/26/21 06:21, Nicholas Piggin wrote: > > > > KVM does not support VAS so guests always print a useless error > > > > on boot > > > > > > > > vas: HCALL(398) error -2, query_type 0, result buffer > > > > 0x57f2000 > > > > > > > > Change this to only print the message if the error is not > > > > H_FUNCTION. > > > > > > Just being curious, why is it even called since "ibm,compression" > > > should > > > not be exposed in the DT ? > > > > It looks like vas does not test for it. I guess in theory there can > > be > > other functions than compression implemented as an accelerator. > > Maybe > > that's why? > > Yeah I guess, or it's just not structured that well. The vas platform > code is a bit awkward, it's there to support drivers, but it's not > actually driver code. > > I think we can probably rework it so the vas code does nothing until > a > driver calls in to it. > > eg. something like below. Correct, Even though NXGZIP is the only usage right now, VAS is accelerator switchboard which should support other coprocessor types such as GZIP and 842 or SW type solutions such as fast thread wakeup and fast memory copy. So can we leave VAS initialization separate from drivers and use some feature such as FW_FEATURE_LPAR to differentiate from KVM guests? Thanks Haren > > cheers > > > diff --git a/arch/powerpc/platforms/pseries/vas.c > b/arch/powerpc/platforms/pseries/vas.c > index b043e3936d21..dc3491fc919d 100644 > --- a/arch/powerpc/platforms/pseries/vas.c > +++ b/arch/powerpc/platforms/pseries/vas.c > @@ -454,6 +454,8 @@ static const struct vas_user_win_ops vops_pseries > = { > .close_win = vas_deallocate_window, /* Close window */ > }; > > +static int pseries_vas_init(void); > + > /* > * Supporting only nx-gzip coprocessor type now, but this API code > * extended to other coprocessor types later. > @@ -463,7 +465,8 @@ int vas_register_api_pseries(struct module *mod, > enum vas_cop_type cop_type, > { > int rc; > > - if (!copypaste_feat) > + rc = pseries_vas_init(); > + if (rc || !copypaste_feat) > return -ENOTSUPP; > > rc = vas_register_coproc_api(mod, cop_type, name, > _pseries); > @@ -531,7 +534,7 @@ static int get_vas_capabilities(u8 feat, enum > vas_cop_feat_type type, > return 0; > } > > -static int __init pseries_vas_init(void) > +static int pseries_vas_init(void) > { > struct hv_vas_cop_feat_caps *hv_cop_caps; > struct hv_vas_all_caps *hv_caps; > @@ -592,4 +595,3 @@ static int __init pseries_vas_init(void) > kfree(hv_caps); > return rc; > } > -machine_device_initcall(pseries, pseries_vas_init);
[RFC PATCH] powerpc/signal: sanitise PT_NIP and sa_handler low bits
The bottom 2 bits of NIP are ignored when RFI returns with SRR0 = NIP, so regs->nip does not correspond to the actual return address if either of those bits are set. Further, these bits are reserved in SRR0 so they should not be set. Sanitize PT_NIP from signal handlers to ensure they can't be set by userspace, this also keeps the low 2 bit of TFHAR clear, which are similarly reserved. 32-bit signal delivery returns directly to the handler, so sa_handler is sanitised similarly there. This can cause a bug when CONFIG_PPC_RFI_SRR_DEBUG=y on a processor that does not implement the 2 low bits of SRR0 (always read back 0) because SRR0 will not match regs->nip. This was caught by sigfuz, but a simple reproducer follows. #include #include #include static void trap_signal_handler(int signo, siginfo_t *si, void *uc) { ucontext_t *ucp = uc; ucp->uc_mcontext.gp_regs[PT_NIP] |= 3; } int main(void) { struct sigaction trap_sa; trap_sa.sa_flags = SA_SIGINFO; trap_sa.sa_sigaction = trap_signal_handler; sigaction(SIGUSR1, _sa, NULL); raise(SIGUSR1); exit(EXIT_SUCCESS); } Reported-by: Sachin Sant Signed-off-by: Nicholas Piggin --- I'm not entirely sure about the 32-bit / compat part. Or the 64-bit for that matter except that it does seem to fix the bug caused by the test program. Thanks, Nick arch/powerpc/kernel/signal_32.c | 23 --- arch/powerpc/kernel/signal_64.c | 17 - 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3e053e2fd6b6..5379bece8072 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -116,7 +116,7 @@ __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) int i; for (i = 0; i <= PT_RESULT; i++) { - if ((i == PT_MSR) || (i == PT_SOFTE)) + if ((i == PT_NIP) || (i == PT_MSR) || (i == PT_SOFTE)) continue; unsafe_get_user(gregs[i], >mc_gregs[i], failed); } @@ -156,7 +156,7 @@ static __always_inline int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) { /* copy up to but not including MSR */ - unsafe_copy_from_user(regs, >mc_gregs, PT_MSR * sizeof(elf_greg_t), failed); + unsafe_copy_from_user(regs, >mc_gregs, PT_NIP * sizeof(elf_greg_t), failed); /* copy from orig_r3 (the word after the MSR) up to the end */ unsafe_copy_from_user(>orig_gpr3, >mc_gregs[PT_ORIG_R3], @@ -458,7 +458,7 @@ static long restore_user_regs(struct pt_regs *regs, struct mcontext __user *sr, int sig) { unsigned int save_r2 = 0; - unsigned long msr; + unsigned long nip, msr; #ifdef CONFIG_VSX int i; #endif @@ -473,6 +473,9 @@ static long restore_user_regs(struct pt_regs *regs, save_r2 = (unsigned int)regs->gpr[2]; unsafe_restore_general_regs(regs, sr, failed); set_trap_norestart(regs); + unsafe_get_user(nip, >mc_gregs[PT_NIP], failed); + nip &= ~3UL; + regs_set_return_ip(regs, nip); unsafe_get_user(msr, >mc_gregs[PT_MSR], failed); if (!sig) regs->gpr[2] = (unsigned long) save_r2; @@ -560,7 +563,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr, struct mcontext __user *tm_sr) { - unsigned long msr, msr_hi; + unsigned long nip, msr, msr_hi; int i; if (tm_suspend_disabled) @@ -576,7 +579,9 @@ static long restore_tm_user_regs(struct pt_regs *regs, return 1; unsafe_restore_general_regs(>thread.ckpt_regs, sr, failed); - unsafe_get_user(current->thread.tm_tfhar, >mc_gregs[PT_NIP], failed); + unsafe_get_user(nip, >mc_gregs[PT_NIP], failed); + nip &= ~3UL; + current->thread.tm_tfhar = nip; unsafe_get_user(msr, >mc_gregs[PT_MSR], failed); /* Restore the previous little-endian mode */ @@ -646,6 +651,10 @@ static long restore_tm_user_regs(struct pt_regs *regs, current->thread.used_vsr = true; } + unsafe_get_user(nip, _sr->mc_gregs[PT_NIP], failed); + nip &= ~3UL; + regs_set_return_ip(regs, nip); + /* Get the top half of the MSR from the user context */ unsafe_get_user(msr_hi, _sr->mc_gregs[PT_MSR], failed); msr_hi <<= 32; @@ -801,7 +810,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[4] = (unsigned long)>info; regs->gpr[5] = (unsigned long)>uc; regs->gpr[6] = (unsigned long)frame; - regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler); + regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler & ~3UL); /* enter the signal handler in
Re: [PATCH v5 5/5] powerpc/inst: Optimise copy_inst_from_kernel_nofault()
Le 29/11/2021 à 23:55, kernel test robot a écrit : Hi Christophe, I love your patch! Perhaps something to improve: [auto build test WARNING on powerpc/next] [also build test WARNING on v5.16-rc3 next-20211129] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211130-015346 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-randconfig-r023-20211129 (https://download.01.org/0day-ci/archive/20211130/202111300652.0ydbnvyj-...@intel.com/config) compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project df08b2fe8b35cb63dfb3b49738a3494b9b4e6f8e) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install powerpc cross compiling tool for clang build # apt-get install binutils-powerpc-linux-gnu # https://github.com/0day-ci/linux/commit/fb7bff30cc0efc7e4df1b48bb69de1f325eee826 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211130-015346 git checkout fb7bff30cc0efc7e4df1b48bb69de1f325eee826 # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=powerpc prepare If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): In file included from arch/powerpc/kernel/asm-offsets.c:71: In file included from arch/powerpc/kernel/../xmon/xmon_bpts.h:7: arch/powerpc/include/asm/inst.h:165:20: warning: variable 'val' is uninitialized when used here [-Wuninitialized] *inst = ppc_inst(val); ^~~ arch/powerpc/include/asm/inst.h:53:22: note: expanded from macro 'ppc_inst' #define ppc_inst(x) (x) ^ arch/powerpc/include/asm/inst.h:155:18: note: initialize the variable 'val' to silence this warning unsigned int val, suffix; ^ = 0 I can't understand what's wrong here. We have __get_kernel_nofault(, src, u32, Efault); if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { __get_kernel_nofault(, src + 1, u32, Efault); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); } With #define __get_kernel_nofault(dst, src, type, err_label) \ __get_user_size_goto(*((type *)(dst)), \ (__force type __user *)(src), sizeof(type), err_label) And #define __get_user_size_goto(x, ptr, size, label) \ do { \ BUILD_BUG_ON(size > sizeof(x)); \ switch (size) { \ case 1: __get_user_asm_goto(x, (u8 __user *)ptr, label, "lbz"); break; \ case 2: __get_user_asm_goto(x, (u16 __user *)ptr, label, "lhz"); break; \ case 4: __get_user_asm_goto(x, (u32 __user *)ptr, label, "lwz"); break; \ case 8: __get_user_asm2_goto(x, (u64 __user *)ptr, label); break; \ default: x = 0; BUILD_BUG(); \ } \ } while (0) And #define __get_user_asm_goto(x, addr, label, op) \ asm_volatile_goto( \ "1:"op"%U1%X1 %0, %1 # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x)\ : "m<>" (*addr) \ : \ : label) I see no possibility, no alternative path where val wouldn't be set. The asm clearly has *addr as an output param so it is always set. 1 warning generated. :1559:2: warning: syscall futex_waitv not implemented [-W#warnings] #warning syscall futex_waitv not implemented ^ 1 warning generated. arch/powerpc/kernel/vdso32/gettimeofday.S:72:8: error: unsupported directive '.stabs' .stabs "_restgpr_31_x:F-1",36,0,0,_restgpr_31_x; .glob
Re: [PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.
Mahesh Salgaonkar writes: > When certain PHB HW failure causes phyp to recover PHB, it marks the PE > state as temporarily unavailable until recovery is complete. This also > triggers an EEH handler in Linux which needs to notify drivers, and perform > recovery. But before notifying the driver about the pci error it uses > get_adapter_state()->get-sesnor-state() operation of the hotplug_slot to > determine if the slot contains a device or not. if the slot is empty, the > recovery is skipped entirely. > > However on certain PHB failures, the rtas call get-sesnor-state() returns > extended busy error (9902) until PHB is recovered by phyp. Once PHB is > recovered, the get-sensor-state() returns success with correct presence > status. The rtas call interface rtas_get_sensor() loops over the rtas call > on extended delay return code (9902) until the return value is either > success (0) or error (-1). This causes the EEH handler to get stuck for ~6 > seconds before it could notify that the pci error has been detected and > stop any active operations. I am curious whether you see any difference with "powerpc/rtas: rtas_busy_delay() improvements" which was recently applied. It will cause the the calling task to sleep in response to a 990x status instead of immediately retrying: https://git.kernel.org/powerpc/c/38f7b7067dae0c101be573106018e8af22a90fdf If that commit helps then maybe this change isn't needed. Otherwise, see my comments below. > -int rtas_get_sensor_fast(int sensor, int index, int *state) > +static int > +__rtas_get_sensor(int sensor, int index, int *state, bool warn_on) Boolean flag parameters in this style are undesirable. As a reader you can't infer the significance of a 'true' or 'false' in the argument list at the call site. > { > int token = rtas_token("get-sensor-state"); > int rc; > @@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int > *state) > return -ENOENT; > > rc = rtas_call(token, 2, 2, state, sensor, index); > - WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && > - rc <= RTAS_EXTENDED_DELAY_MAX)); > + WARN_ON(warn_on && > + (rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && > + rc <= RTAS_EXTENDED_DELAY_MAX))); > > if (rc < 0) > return rtas_error_rc(rc); > return rc; > } Issues I see with this, in terms of correctness and convention: * On non-negative status from rtas_call(), including 990x, __rtas_get_sensor() returns the RTAS status unchanged. On a negative status, it returns a Linux errno value. On a -2 (busy) status rtas_error_rc() prints an error message and returns -ERANGE. Seems difficult for a caller to handle. Generally we want rtas_* APIs to adhere to a Linux 0/-errno convention or to return the RTAS status unchanged, but not a mixture. * __rtas_get_sensor() is called by rtas_get_sensor_fast() and rtas_get_sensor_nonblocking(), but is not called by rtas_get_sensor(), despite common practice with __-prefixed functions. > +int rtas_get_sensor_fast(int sensor, int index, int *state) > +{ > + return __rtas_get_sensor(sensor, index, state, true); > +} > + > +int rtas_get_sensor_nonblocking(int sensor, int index, int *state) > +{ > + return __rtas_get_sensor(sensor, index, state, false); > +} > +EXPORT_SYMBOL(rtas_get_sensor_nonblocking); > + > bool rtas_indicator_present(int token, int *maxindex) > { > int proplen, count, i; > diff --git a/drivers/pci/hotplug/rpaphp_pci.c > b/drivers/pci/hotplug/rpaphp_pci.c > index c380bdacd1466..8a7d681254ce9 100644 > --- a/drivers/pci/hotplug/rpaphp_pci.c > +++ b/drivers/pci/hotplug/rpaphp_pci.c > @@ -23,7 +23,7 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state) > int rc; > int setlevel; > > - rc = rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state); > + rc = rtas_get_sensor_nonblocking(DR_ENTITY_SENSE, slot->index, state); > > if (rc < 0) { > if (rc == -EFAULT || rc == -EEXIST) { > @@ -38,10 +38,10 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state) > if (rc < 0) { > dbg("%s: power on slot[%s] failed rc=%d.\n", > __func__, slot->name, rc); > - } else { > - rc = rtas_get_sensor(DR_ENTITY_SENSE, > - slot->index, state); > + return rc; > } > + rc = rtas_get_sensor_nonblocking(DR_ENTITY_SENSE, > + slot->index, state); > } else if (rc == -ENODEV) > info("%s: slot is unusable\n", __func__); > else If I'm reading it right rpaphp_get_sensor_state() now returns 9902 in the situation this change is trying to
[powerpc:merge] BUILD SUCCESS 260ac081931897ee7f554740c6cfc01c475aa703
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge branch HEAD: 260ac081931897ee7f554740c6cfc01c475aa703 Automatic merge of 'master' into merge (2021-11-29 16:53) elapsed time: 730m configs tested: 146 configs skipped: 3 The following configs have been built successfully. More configs may be tested in the coming days. gcc tested configs: arm defconfig arm64allyesconfig arm64 defconfig arm allyesconfig arm allmodconfig i386 randconfig-c001-20211128 mips capcella_defconfig powerpc tqm8555_defconfig i386defconfig arm mv78xx0_defconfig armmulti_v5_defconfig m68k sun3_defconfig powerpc tqm8540_defconfig um defconfig armvt8500_v6_v7_defconfig arm ep93xx_defconfig xtensa common_defconfig arm s3c2410_defconfig powerpc kilauea_defconfig mips malta_defconfig arm davinci_all_defconfig mipsmalta_qemu_32r6_defconfig powerpc cm5200_defconfig mipsqi_lb60_defconfig mips tb0219_defconfig armqcom_defconfig mips xway_defconfig pariscgeneric-64bit_defconfig powerpc makalu_defconfig mips tb0287_defconfig arm s5pv210_defconfig powerpc currituck_defconfig arm imote2_defconfig s390 zfcpdump_defconfig powerpcsocrates_defconfig riscv nommu_k210_sdcard_defconfig sparc sparc32_defconfig mips cobalt_defconfig mips gcw0_defconfig arm imx_v6_v7_defconfig mipsomega2p_defconfig arm h5000_defconfig mips rs90_defconfig sh se7722_defconfig m68km5307c3_defconfig powerpc ebony_defconfig arm omap1_defconfig powerpc mpc83xx_defconfig sh urquell_defconfig powerpc ep88xc_defconfig powerpc redwood_defconfig sh sh7785lcr_32bit_defconfig arm simpad_defconfig arm shannon_defconfig sh rsk7203_defconfig arm u8500_defconfig armcerfcube_defconfig m68km5272c3_defconfig arm randconfig-c002-20211128 arm randconfig-c002-20211130 ia64 allmodconfig ia64defconfig ia64 allyesconfig m68k allmodconfig m68kdefconfig m68k allyesconfig nios2 defconfig arc allyesconfig nds32 allnoconfig nds32 defconfig nios2allyesconfig cskydefconfig alpha defconfig alphaallyesconfig arc defconfig sh allmodconfig h8300allyesconfig xtensa allyesconfig parisc defconfig s390 allmodconfig parisc allyesconfig s390defconfig s390 allyesconfig i386 allyesconfig sparcallyesconfig sparc defconfig i386 debian-10.3-kselftests i386 debian-10.3 mips allyesconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig powerpc allnoconfig i386 randconfig-a001-20211129 i386 randconfig-a002-20211129 i386 randconfig-a006-20211129 i386 randconfig-a005-20211129 i386 randconfig-a004-20211129 i386 randconfig-a003-20211129 x86_64 randconfig-a011-20211128 x86_64
[powerpc:fixes-test] BUILD SUCCESS 3dc709e518b47386e6af937eaec37bb36539edfd
imx_v4_v5_defconfig arm simpad_defconfig m68k m5249evb_defconfig mips bmips_stb_defconfig arm ep93xx_defconfig armneponset_defconfig arm milbeaut_m10v_defconfig shshmin_defconfig m68km5272c3_defconfig armtrizeps4_defconfig powerpc maple_defconfig openrisc or1klitex_defconfig sparcallyesconfig xtensa defconfig powerpc mpc837x_rdb_defconfig powerpc mpc834x_itx_defconfig m68k atari_defconfig sh se7712_defconfig arm ixp4xx_defconfig powerpc ep88xc_defconfig sh sh7785lcr_32bit_defconfig arm shannon_defconfig sh rsk7203_defconfig arm u8500_defconfig armcerfcube_defconfig arm randconfig-c002-20211128 arm randconfig-c002-20211130 ia64 allmodconfig ia64defconfig ia64 allyesconfig m68k allmodconfig m68kdefconfig m68k allyesconfig nios2 defconfig nds32 allnoconfig nios2allyesconfig cskydefconfig alpha defconfig alphaallyesconfig xtensa allyesconfig h8300allyesconfig arc defconfig sh allmodconfig parisc defconfig s390 allyesconfig s390 allmodconfig parisc allyesconfig s390defconfig i386 allyesconfig sparc defconfig i386 debian-10.3-kselftests i386 debian-10.3 mips allyesconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig powerpc allnoconfig i386 randconfig-a001-20211129 i386 randconfig-a002-20211129 i386 randconfig-a006-20211129 i386 randconfig-a005-20211129 i386 randconfig-a004-20211129 i386 randconfig-a003-20211129 x86_64 randconfig-a011-20211128 x86_64 randconfig-a014-20211128 x86_64 randconfig-a012-20211128 x86_64 randconfig-a016-20211128 x86_64 randconfig-a013-20211128 x86_64 randconfig-a015-20211128 i386 randconfig-a015-20211128 i386 randconfig-a016-20211128 i386 randconfig-a013-20211128 i386 randconfig-a012-20211128 i386 randconfig-a014-20211128 i386 randconfig-a011-20211128 arc randconfig-r043-20211128 s390 randconfig-r044-20211128 riscvrandconfig-r042-20211128 riscvnommu_k210_defconfig riscvallyesconfig riscvnommu_virt_defconfig riscv allnoconfig riscv defconfig riscv rv32_defconfig riscvallmodconfig x86_64rhel-8.3-kselftests um x86_64_defconfig um i386_defconfig x86_64 allyesconfig x86_64 rhel-8.3 x86_64 rhel-8.3-func x86_64 kexec clang tested configs: s390 randconfig-c005-20211128 i386 randconfig-c001-20211128 riscvrandconfig-c006-20211128 arm randconfig-c002-20211128 powerpc randconfig-c003-20211128 x86_64 randconfig-c007-20211128 mips randconfig-c004-20211128 x86_64 randconfig-a001-20211128 x86_64 randconfig-a006-20211128 x86_64 randconfig-a003-20211128 x86_64 randconfig-a005-20211128 x86_64 randconfig-a004-20211128 x86_64 randconfig-a002-20211128 i386 randconfig-a001-20211128 i386 randconfig-a002-20211128 i386 randconfig-a006-20211128 i386 randconfig
[powerpc:next] BUILD SUCCESS af3fdce4ab0781ea183107c90de9cbf21d701c54
config arm simpad_defconfig m68k m5249evb_defconfig mips bmips_stb_defconfig arm ep93xx_defconfig armneponset_defconfig arm milbeaut_m10v_defconfig shshmin_defconfig m68km5272c3_defconfig armtrizeps4_defconfig powerpc maple_defconfig openrisc or1klitex_defconfig sparcallyesconfig xtensa defconfig powerpc mpc837x_rdb_defconfig powerpc mpc834x_itx_defconfig m68k atari_defconfig sh se7712_defconfig arm ixp4xx_defconfig powerpc ep88xc_defconfig powerpc redwood_defconfig arm shannon_defconfig sh rsk7203_defconfig arm u8500_defconfig armcerfcube_defconfig arm randconfig-c002-20211128 arm randconfig-c002-20211129 arm randconfig-c002-20211130 ia64 allmodconfig ia64defconfig ia64 allyesconfig m68k allmodconfig m68kdefconfig m68k allyesconfig nios2 defconfig nds32 allnoconfig nios2allyesconfig cskydefconfig alpha defconfig alphaallyesconfig xtensa allyesconfig h8300allyesconfig sh allmodconfig parisc defconfig s390 allyesconfig s390 allmodconfig parisc allyesconfig s390defconfig i386 allyesconfig sparc defconfig i386 debian-10.3-kselftests i386 debian-10.3 mips allyesconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig powerpc allnoconfig i386 randconfig-a001-20211129 i386 randconfig-a002-20211129 i386 randconfig-a006-20211129 i386 randconfig-a005-20211129 i386 randconfig-a004-20211129 i386 randconfig-a003-20211129 x86_64 randconfig-a011-20211128 x86_64 randconfig-a014-20211128 x86_64 randconfig-a012-20211128 x86_64 randconfig-a016-20211128 x86_64 randconfig-a013-20211128 x86_64 randconfig-a015-20211128 i386 randconfig-a015-20211128 i386 randconfig-a016-20211128 i386 randconfig-a013-20211128 i386 randconfig-a012-20211128 i386 randconfig-a014-20211128 i386 randconfig-a011-20211128 arc randconfig-r043-20211128 s390 randconfig-r044-20211128 riscvrandconfig-r042-20211128 riscvnommu_k210_defconfig riscvallyesconfig riscvnommu_virt_defconfig riscv allnoconfig riscv defconfig riscv rv32_defconfig riscvallmodconfig x86_64rhel-8.3-kselftests um x86_64_defconfig um i386_defconfig x86_64 allyesconfig x86_64 rhel-8.3 x86_64 rhel-8.3-func x86_64 kexec clang tested configs: s390 randconfig-c005-20211128 i386 randconfig-c001-20211128 riscvrandconfig-c006-20211128 arm randconfig-c002-20211128 powerpc randconfig-c003-20211128 x86_64 randconfig-c007-20211128 mips randconfig-c004-20211128 x86_64 randconfig-a001-20211128 x86_64 randconfig-a006-20211128 x86_64 randconfig-a003-20211128 x86_64 randconfig-a005-20211128 x86_64 randconfig-a004-20211128 x86_64 randconfig-a002-20211128 i386 randconfig-a001-20211128 i386 randconfig-a002-20211128 i386 randconfig-a006-20211128 i386 randconfig-a005-202
[powerpc:next-test] BUILD SUCCESS aebd1fb45c622e9a2b06fb70665d084d3a8d6c78
defconfig nios2allyesconfig cskydefconfig alpha defconfig alphaallyesconfig xtensa allyesconfig h8300allyesconfig sh allmodconfig parisc defconfig s390 allyesconfig s390 allmodconfig parisc allyesconfig s390defconfig i386 allyesconfig sparcallyesconfig sparc defconfig i386 debian-10.3-kselftests i386 debian-10.3 mips allyesconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig powerpc allnoconfig i386 randconfig-a001-20211129 i386 randconfig-a002-20211129 i386 randconfig-a006-20211129 i386 randconfig-a005-20211129 i386 randconfig-a004-20211129 i386 randconfig-a003-20211129 x86_64 randconfig-a011-20211128 x86_64 randconfig-a014-20211128 x86_64 randconfig-a012-20211128 x86_64 randconfig-a016-20211128 x86_64 randconfig-a013-20211128 x86_64 randconfig-a015-20211128 i386 randconfig-a015-20211128 i386 randconfig-a016-20211128 i386 randconfig-a013-20211128 i386 randconfig-a012-20211128 i386 randconfig-a014-20211128 i386 randconfig-a011-20211128 arc randconfig-r043-20211128 s390 randconfig-r044-20211128 riscvrandconfig-r042-20211128 riscvnommu_k210_defconfig riscvallyesconfig riscvnommu_virt_defconfig riscv allnoconfig riscv defconfig riscv rv32_defconfig riscvallmodconfig x86_64rhel-8.3-kselftests um x86_64_defconfig um i386_defconfig x86_64 allyesconfig x86_64 rhel-8.3 x86_64 rhel-8.3-func x86_64 kexec clang tested configs: s390 randconfig-c005-20211128 i386 randconfig-c001-20211128 riscvrandconfig-c006-20211128 arm randconfig-c002-20211128 powerpc randconfig-c003-20211128 x86_64 randconfig-c007-20211128 mips randconfig-c004-20211128 x86_64 randconfig-a001-20211128 x86_64 randconfig-a006-20211128 x86_64 randconfig-a003-20211128 x86_64 randconfig-a005-20211128 x86_64 randconfig-a004-20211128 x86_64 randconfig-a002-20211128 i386 randconfig-a001-20211128 i386 randconfig-a002-20211128 i386 randconfig-a006-20211128 i386 randconfig-a005-20211128 i386 randconfig-a004-20211128 i386 randconfig-a003-20211128 i386 randconfig-a015-20211129 i386 randconfig-a016-20211129 i386 randconfig-a013-20211129 i386 randconfig-a012-20211129 i386 randconfig-a014-20211129 i386 randconfig-a011-20211129 hexagon randconfig-r045-20211129 hexagon randconfig-r041-20211129 s390 randconfig-r044-20211129 riscvrandconfig-r042-20211129 hexagon randconfig-r045-20211128 hexagon randconfig-r041-20211128 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
Re: [PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.
On 11/29/21 5:06 PM, Nathan Lynch wrote: > Tyrel Datwyler writes: >> On 11/29/21 12:58 AM, Mahesh Salgaonkar wrote: >>> -int rtas_get_sensor_fast(int sensor, int index, int *state) >>> +static int >>> +__rtas_get_sensor(int sensor, int index, int *state, bool warn_on) >>> { >>> int token = rtas_token("get-sensor-state"); >>> int rc; >>> @@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int >>> *state) >>> return -ENOENT; >>> >>> rc = rtas_call(token, 2, 2, state, sensor, index); >>> - WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && >>> - rc <= RTAS_EXTENDED_DELAY_MAX)); >>> + WARN_ON(warn_on && >>> + (rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && >>> + rc <= RTAS_EXTENDED_DELAY_MAX))); >> >> The whole point of rtas_get_sensor_fast() is that on busy we will just let it >> error out because we don't want to wait. I'm not sure I see the point of the >> spurious WARN_ONs anytime we hit a BUSY or DELAY return code. Maybe >> converting >> that to a pr_debug() might be better and save expanding the API with a _fast >> and >> _nonblocking variant that do the same thing minus one surpressing a >> WARN_ON splat. > > There is a subset of sensors that are specified to not ever return busy > or delay statuses. rtas_get_sensor_fast() is meant to be used with > those, and it would be an error to use it on a sensor not in that set. > So the WARN_ON() is appropriate IMO; if it triggers it indicates either > a misuse of the API or a firmware bug. See commit 1c2cb594441d > "powerpc/rtas: Introduce rtas_get_sensor_fast() for IRQ handlers" > Fair enough. Seems I misremembered the nature of the original problem and should have looked back at the commit to completely jog my memory.
Re: [PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.
Tyrel Datwyler writes: > On 11/29/21 12:58 AM, Mahesh Salgaonkar wrote: >> -int rtas_get_sensor_fast(int sensor, int index, int *state) >> +static int >> +__rtas_get_sensor(int sensor, int index, int *state, bool warn_on) >> { >> int token = rtas_token("get-sensor-state"); >> int rc; >> @@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int >> *state) >> return -ENOENT; >> >> rc = rtas_call(token, 2, 2, state, sensor, index); >> -WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && >> -rc <= RTAS_EXTENDED_DELAY_MAX)); >> +WARN_ON(warn_on && >> +(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && >> +rc <= RTAS_EXTENDED_DELAY_MAX))); > > The whole point of rtas_get_sensor_fast() is that on busy we will just let it > error out because we don't want to wait. I'm not sure I see the point of the > spurious WARN_ONs anytime we hit a BUSY or DELAY return code. Maybe converting > that to a pr_debug() might be better and save expanding the API with a _fast > and > _nonblocking variant that do the same thing minus one surpressing a > WARN_ON splat. There is a subset of sensors that are specified to not ever return busy or delay statuses. rtas_get_sensor_fast() is meant to be used with those, and it would be an error to use it on a sensor not in that set. So the WARN_ON() is appropriate IMO; if it triggers it indicates either a misuse of the API or a firmware bug. See commit 1c2cb594441d "powerpc/rtas: Introduce rtas_get_sensor_fast() for IRQ handlers"
Re: [PATCH 0/2] of: remove reserved regions count restriction
On Sun, Nov 21, 2021 at 08:43:47AM +0200, Mike Rapoport wrote: > On Fri, Nov 19, 2021 at 03:58:17PM +0800, Calvin Zhang wrote: > > The count of reserved regions in /reserved-memory was limited because > > the struct reserved_mem array was defined statically. This series sorts > > out reserved memory code and allocates that array from early allocator. > > > > Note: reserved region with fixed location must be reserved before any > > memory allocation. While struct reserved_mem array should be allocated > > after allocator is activated. We make early_init_fdt_scan_reserved_mem() > > do reservation only and add another call to initialize reserved memory. > > So arch code have to change for it. > > I think much simpler would be to use the same constant for sizing > memblock.reserved and reserved_mem arrays. Do those arrays get shrunk? Or do we waste the memory forever? Maybe we can copy and shrink the initial array? Though I suspect struct reserved_mem pointers have already been given out. > > If there is too much reserved regions in the device tree, reserving them in > memblock will fail anyway because memblock also starts with static array > for memblock.reserved, so doing one pass with memblock_reserve() and > another to set up reserved_mem wouldn't help anyway. > > > I'm only familiar with arm and arm64 architectures. Approvals from arch > > maintainers are required. Thank you all. > > > > Calvin Zhang (2): > > of: Sort reserved_mem related code > > of: reserved_mem: Remove reserved regions count restriction > > > > arch/arc/mm/init.c | 3 + > > arch/arm/kernel/setup.c| 2 + > > arch/arm64/kernel/setup.c | 3 + > > arch/csky/kernel/setup.c | 3 + > > arch/h8300/kernel/setup.c | 2 + > > arch/mips/kernel/setup.c | 3 + > > arch/nds32/kernel/setup.c | 3 + > > arch/nios2/kernel/setup.c | 2 + > > arch/openrisc/kernel/setup.c | 3 + > > arch/powerpc/kernel/setup-common.c | 3 + > > arch/riscv/kernel/setup.c | 2 + > > arch/sh/kernel/setup.c | 3 + > > arch/xtensa/kernel/setup.c | 2 + > > drivers/of/fdt.c | 107 +--- > > drivers/of/of_private.h| 12 +- > > drivers/of/of_reserved_mem.c | 189 - > > include/linux/of_reserved_mem.h| 4 + > > 17 files changed, 207 insertions(+), 139 deletions(-) > > > > -- > > 2.30.2 > > > > -- > Sincerely yours, > Mike. >
Re: [PATCH 1/2] of: Sort reserved_mem related code
On Fri, Nov 19, 2021 at 03:58:18PM +0800, Calvin Zhang wrote: > Move code about parsing /reserved-memory and initializing of > reserved_mems array to of_reserved_mem.c for better modularity. > > Rename array name from reserved_mem to reserved_mems to distinguish > from type definition. > > Signed-off-by: Calvin Zhang > --- > drivers/of/fdt.c| 108 + > drivers/of/of_private.h | 12 ++- > drivers/of/of_reserved_mem.c| 163 ++-- > include/linux/of_reserved_mem.h | 4 + > 4 files changed, 149 insertions(+), 138 deletions(-) > > diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c > index bdca35284ceb..445af4e69300 100644 > --- a/drivers/of/fdt.c > +++ b/drivers/of/fdt.c > @@ -80,7 +80,7 @@ void __init of_fdt_limit_memory(int limit) > } > } > > -static bool of_fdt_device_is_available(const void *blob, unsigned long node) > +bool of_fdt_device_is_available(const void *blob, unsigned long node) > { > const char *status = fdt_getprop(blob, node, "status", NULL); > > @@ -476,7 +476,7 @@ void *initial_boot_params __ro_after_init; > > static u32 of_fdt_crc32; > > -static int __init early_init_dt_reserve_memory_arch(phys_addr_t base, > +int __init early_init_dt_reserve_memory_arch(phys_addr_t base, > phys_addr_t size, bool nomap) I think you can move this function too if you change the nomap==false callers to just call memblock_reserve directly. > { > if (nomap) { > @@ -492,108 +492,6 @@ static int __init > early_init_dt_reserve_memory_arch(phys_addr_t base, > return memblock_reserve(base, size); > } > > -/* > - * __reserved_mem_reserve_reg() - reserve all memory described in 'reg' > property > - */ > -static int __init __reserved_mem_reserve_reg(unsigned long node, > - const char *uname) > -{ > - int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32); > - phys_addr_t base, size; > - int len; > - const __be32 *prop; > - int first = 1; > - bool nomap; > - > - prop = of_get_flat_dt_prop(node, "reg", ); > - if (!prop) > - return -ENOENT; > - > - if (len && len % t_len != 0) { > - pr_err("Reserved memory: invalid reg property in '%s', skipping > node.\n", > -uname); > - return -EINVAL; > - } > - > - nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; > - > - while (len >= t_len) { > - base = dt_mem_next_cell(dt_root_addr_cells, ); > - size = dt_mem_next_cell(dt_root_size_cells, ); > - > - if (size && > - early_init_dt_reserve_memory_arch(base, size, nomap) == 0) > - pr_debug("Reserved memory: reserved region for node > '%s': base %pa, size %lu MiB\n", > - uname, , (unsigned long)(size / SZ_1M)); > - else > - pr_info("Reserved memory: failed to reserve memory for > node '%s': base %pa, size %lu MiB\n", > - uname, , (unsigned long)(size / SZ_1M)); > - > - len -= t_len; > - if (first) { > - fdt_reserved_mem_save_node(node, uname, base, size); > - first = 0; > - } > - } > - return 0; > -} > - > -/* > - * __reserved_mem_check_root() - check if #size-cells, #address-cells > provided > - * in /reserved-memory matches the values supported by the current > implementation, > - * also check if ranges property has been provided > - */ > -static int __init __reserved_mem_check_root(unsigned long node) > -{ > - const __be32 *prop; > - > - prop = of_get_flat_dt_prop(node, "#size-cells", NULL); > - if (!prop || be32_to_cpup(prop) != dt_root_size_cells) > - return -EINVAL; > - > - prop = of_get_flat_dt_prop(node, "#address-cells", NULL); > - if (!prop || be32_to_cpup(prop) != dt_root_addr_cells) > - return -EINVAL; > - > - prop = of_get_flat_dt_prop(node, "ranges", NULL); > - if (!prop) > - return -EINVAL; > - return 0; > -} > - > -/* > - * fdt_scan_reserved_mem() - scan a single FDT node for reserved memory > - */ > -static int __init fdt_scan_reserved_mem(void) > -{ > - int node, child; > - const void *fdt = initial_boot_params; > - > - node = fdt_path_offset(fdt, "/reserved-memory"); > - if (node < 0) > - return -ENODEV; > - > - if (__reserved_mem_check_root(node) != 0) { > - pr_err("Reserved memory: unsupported node format, ignoring\n"); > - return -EINVAL; > - } > - > - fdt_for_each_subnode(child, fdt, node) { > - const char *uname; > - int err; > - > - if (!of_fdt_device_is_available(fdt, child)) > - continue; > - > - uname = fdt_get_name(fdt, child, NULL); > - > -
[PATCH] scsi: ibmvfc: replace snprintf with sysfs_emit
From: Yang Guang coccinelle report: ./drivers/scsi/ibmvscsi/ibmvfc.c:3453:8-16: WARNING: use scnprintf or sprintf ./drivers/scsi/ibmvscsi/ibmvfc.c:3416:8-16: WARNING: use scnprintf or sprintf ./drivers/scsi/ibmvscsi/ibmvfc.c:3436:8-16: WARNING: use scnprintf or sprintf ./drivers/scsi/ibmvscsi/ibmvfc.c:3426:8-16: WARNING: use scnprintf or sprintf ./drivers/scsi/ibmvscsi/ibmvfc.c:3445:8-16: WARNING: use scnprintf or sprintf ./drivers/scsi/ibmvscsi/ibmvfc.c:3406:8-16: WARNING: use scnprintf or sprintf Use sysfs_emit instead of scnprintf or sprintf makes more sense. Reported-by: Zeal Robot Signed-off-by: Yang Guang --- drivers/scsi/ibmvscsi/ibmvfc.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index d0eab5700dc5..69bf55c037a5 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3403,7 +3403,7 @@ static ssize_t ibmvfc_show_host_partition_name(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct ibmvfc_host *vhost = shost_priv(shost); - return snprintf(buf, PAGE_SIZE, "%s\n", + return sysfs_emit(buf, "%s\n", vhost->login_buf->resp.partition_name); } @@ -3413,7 +3413,7 @@ static ssize_t ibmvfc_show_host_device_name(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct ibmvfc_host *vhost = shost_priv(shost); - return snprintf(buf, PAGE_SIZE, "%s\n", + return sysfs_emit(buf, "%s\n", vhost->login_buf->resp.device_name); } @@ -3423,7 +3423,7 @@ static ssize_t ibmvfc_show_host_loc_code(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct ibmvfc_host *vhost = shost_priv(shost); - return snprintf(buf, PAGE_SIZE, "%s\n", + return sysfs_emit(buf, "%s\n", vhost->login_buf->resp.port_loc_code); } @@ -3433,7 +3433,7 @@ static ssize_t ibmvfc_show_host_drc_name(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct ibmvfc_host *vhost = shost_priv(shost); - return snprintf(buf, PAGE_SIZE, "%s\n", + return sysfs_emit(buf, "%s\n", vhost->login_buf->resp.drc_name); } @@ -3442,7 +3442,7 @@ static ssize_t ibmvfc_show_host_npiv_version(struct device *dev, { struct Scsi_Host *shost = class_to_shost(dev); struct ibmvfc_host *vhost = shost_priv(shost); - return snprintf(buf, PAGE_SIZE, "%d\n", be32_to_cpu(vhost->login_buf->resp.version)); + return sysfs_emit(buf, "%d\n", be32_to_cpu(vhost->login_buf->resp.version)); } static ssize_t ibmvfc_show_host_capabilities(struct device *dev, @@ -3450,7 +3450,7 @@ static ssize_t ibmvfc_show_host_capabilities(struct device *dev, { struct Scsi_Host *shost = class_to_shost(dev); struct ibmvfc_host *vhost = shost_priv(shost); - return snprintf(buf, PAGE_SIZE, "%llx\n", be64_to_cpu(vhost->login_buf->resp.capabilities)); + return sysfs_emit(buf, "%llx\n", be64_to_cpu(vhost->login_buf->resp.capabilities)); } /** -- 2.30.2
RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.
Agreed, We are happy pick up the torch on this, but I'd like to try and hear from Joakim first before we do. The patch set is his, so I'd like to give him the opportunity. I think he's the only one that can add a truly proper description as well because he mentioned that this includes a "few more fixes" than just the one we ran into. I'd rather hear from him than try to reverse engineer what was being addressed. Joakim, if you are still watching the thread, would you like to take a stab at it? If I don't hear from you in a couple days, we'll pick up the torch and do what we can. Eugene T. Bordenkircher -Original Message- From: Leo Li Sent: Monday, November 29, 2021 3:37 PM To: Eugene Bordenkircher ; Thorsten Leemhuis ; jo...@infinera.com ; linuxppc-dev@lists.ozlabs.org; linux-...@vger.kernel.org Cc: gre...@linuxfoundation.org; ba...@kernel.org Subject: RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop. [Caution - External] > -Original Message- > From: Eugene Bordenkircher > Sent: Monday, November 29, 2021 11:25 AM > To: Thorsten Leemhuis ; jo...@infinera.com > ; linuxppc-dev@lists.ozlabs.org; linux- > u...@vger.kernel.org > Cc: Leo Li ; gre...@linuxfoundation.org; > ba...@kernel.org > Subject: RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list > leads to unrecoverable loop. > > The final result of our testing is that the patch set posted seems to > address all known defects in the Linux kernel. The mentioned > additional problems are entirely caused by the antivirus solution on > the windows box. The antivirus solution blocks the disconnect > messages from reaching the RNDIS driver so it has no idea the USB > device went away. There is nothing we can do to address this in the Linux > kernel. Thanks for the confirmation. > > I propose we move forward with the patchset. I think that we should proceed to merge the patchset but it seems to need some cleanup for coding style issues and better description before submitted formally. > > Eugene T. Bordenkircher > > -Original Message- > From: Thorsten Leemhuis > Sent: Thursday, November 25, 2021 5:59 AM > To: Eugene Bordenkircher ; Thorsten > Leemhuis ; Joakim Tjernlund > ; linuxppc-dev@lists.ozlabs.org; linux- > u...@vger.kernel.org > Cc: leoyang...@nxp.com; gre...@linuxfoundation.org; ba...@kernel.org > Subject: Re: bug: usb: gadget: FSL_UDC_CORE Corrupted request list > leads to unrecoverable loop. > > Hi, this is your Linux kernel regression tracker speaking. > > Top-posting for once, to make this easy to process for everyone: > > Li Yang and Felipe Balbi: how to move on with this? It's quite an old > regression, but nevertheless it is one and thus should be fixed. Part > of my position is to make that happen and thus remind developers and > maintainers about this until the regression is resolved. > > Ciao, Thorsten > > On 16.11.21 20:11, Eugene Bordenkircher wrote: > > On 02.11.21 22:15, Joakim Tjernlund wrote: > >> On Sat, 2021-10-30 at 14:20 +, Joakim Tjernlund wrote: > >>> On Fri, 2021-10-29 at 17:14 +, Eugene Bordenkircher wrote: > >> > We've discovered a situation where the FSL udc driver > (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating > over the request queue, but the queue has been corrupted at some point > so it loops infinitely. I believe we have narrowed into the offending > code, but we are in need of assistance trying to find an appropriate > fix for the problem. The identified code appears to be in all > versions of the Linux kernel the driver exists in. > > The problem appears to be when handling a USB_REQ_GET_STATUS > request. The driver gets this request and then calls the > ch9getstatus() function. In this function, it starts a request by > "borrowing" the per device status_req, filling it in, and then queuing > it with a call to list_add_tail() to add the request to the endpoint > queue. Right before it exits the function however, it's calling > ep0_prime_status(), which is filling out that same status_req > structure and then queuing it with another call to list_add_tail() to > add the request to the endpoint queue. This adds two instances of the > exact same LIST_HEAD to the endpoint queue, which breaks the list > since the prev and next pointers end up pointing to the wrong things. > This ends up causing a hard loop the next time nuke() gets called, which > happens on the next setup IRQ. > > I'm not sure what the appropriate fix to this problem is, mostly > due to > my lack of expertise in USB and this driver stack. The code has been > this way in the kernel for a very long time, which suggests that it > has been working, unless USB_REQ_GET_STATUS requests are never made. > This further suggests that there is something else going on that I don't > understand. > Deleting the call to ep0_prime_status() and the following ep0stall() >
RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.
> -Original Message- > From: Eugene Bordenkircher > Sent: Monday, November 29, 2021 11:25 AM > To: Thorsten Leemhuis ; jo...@infinera.com > ; linuxppc-dev@lists.ozlabs.org; linux- > u...@vger.kernel.org > Cc: Leo Li ; gre...@linuxfoundation.org; > ba...@kernel.org > Subject: RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to > unrecoverable loop. > > The final result of our testing is that the patch set posted seems to address > all > known defects in the Linux kernel. The mentioned additional problems are > entirely caused by the antivirus solution on the windows box. The antivirus > solution blocks the disconnect messages from reaching the RNDIS driver so it > has no idea the USB device went away. There is nothing we can do to > address this in the Linux kernel. Thanks for the confirmation. > > I propose we move forward with the patchset. I think that we should proceed to merge the patchset but it seems to need some cleanup for coding style issues and better description before submitted formally. > > Eugene T. Bordenkircher > > -Original Message- > From: Thorsten Leemhuis > Sent: Thursday, November 25, 2021 5:59 AM > To: Eugene Bordenkircher ; Thorsten > Leemhuis ; Joakim Tjernlund > ; linuxppc-dev@lists.ozlabs.org; linux- > u...@vger.kernel.org > Cc: leoyang...@nxp.com; gre...@linuxfoundation.org; ba...@kernel.org > Subject: Re: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to > unrecoverable loop. > > Hi, this is your Linux kernel regression tracker speaking. > > Top-posting for once, to make this easy to process for everyone: > > Li Yang and Felipe Balbi: how to move on with this? It's quite an old > regression, but nevertheless it is one and thus should be fixed. Part of my > position is to make that happen and thus remind developers and maintainers > about this until the regression is resolved. > > Ciao, Thorsten > > On 16.11.21 20:11, Eugene Bordenkircher wrote: > > On 02.11.21 22:15, Joakim Tjernlund wrote: > >> On Sat, 2021-10-30 at 14:20 +, Joakim Tjernlund wrote: > >>> On Fri, 2021-10-29 at 17:14 +, Eugene Bordenkircher wrote: > >> > We've discovered a situation where the FSL udc driver > (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating over the > request queue, but the queue has been corrupted at some point so it loops > infinitely. I believe we have narrowed into the offending code, but we are in > need of assistance trying to find an appropriate fix for the problem. The > identified code appears to be in all versions of the Linux kernel the driver > exists in. > > The problem appears to be when handling a USB_REQ_GET_STATUS > request. The driver gets this request and then calls the ch9getstatus() > function. In this function, it starts a request by "borrowing" the per device > status_req, filling it in, and then queuing it with a call to list_add_tail() > to add > the request to the endpoint queue. Right before it exits the function > however, it's calling ep0_prime_status(), which is filling out that same > status_req structure and then queuing it with another call to list_add_tail() > to > add the request to the endpoint queue. This adds two instances of the exact > same LIST_HEAD to the endpoint queue, which breaks the list since the prev > and next pointers end up pointing to the wrong things. This ends up causing > a hard loop the next time nuke() gets called, which happens on the next > setup IRQ. > > I'm not sure what the appropriate fix to this problem is, mostly due to > my lack of expertise in USB and this driver stack. The code has been this way > in the kernel for a very long time, which suggests that it has been working, > unless USB_REQ_GET_STATUS requests are never made. This further > suggests that there is something else going on that I don't understand. > Deleting the call to ep0_prime_status() and the following ep0stall() call > appears, on the surface, to get the device working again, but may have side > effects that I'm not seeing. > > I'm hopeful someone in the community can help provide some > information on what I may be missing or help come up with a solution to the > problem. A big thank you to anyone who would like to help out. > >>> > >>> Run into this to a while ago. Found the bug and a few more fixes. > >>> This is against 4.19 so you may have to tweak them a bit. > >>> Feel free to upstream them. > >> > >> Curious, did my patches help? Good to known once we upgrade as well. > > > > There's good news and bad news. > > > > The good news is that this appears to stop the driver from entering an > > infinite loop, which prevents the Linux system from locking up and > > never recovering. So I'm willing to say we've made the behavior > > better. > > > > The bad news is that once we get past this point, there is new bad > > behavior. What is on top of this driver in our system is the RNDIS > > gadget driver communicating to
Re: [PATCH] powerpc/pseries/vas: Don't print an error when VAS is unavailable
Nicholas Piggin writes: > Excerpts from Cédric Le Goater's message of November 26, 2021 5:13 pm: >> On 11/26/21 06:21, Nicholas Piggin wrote: >>> KVM does not support VAS so guests always print a useless error on boot >>> >>> vas: HCALL(398) error -2, query_type 0, result buffer 0x57f2000 >>> >>> Change this to only print the message if the error is not H_FUNCTION. >> >> >> Just being curious, why is it even called since "ibm,compression" should >> not be exposed in the DT ? > > It looks like vas does not test for it. I guess in theory there can be > other functions than compression implemented as an accelerator. Maybe > that's why? Yeah I guess, or it's just not structured that well. The vas platform code is a bit awkward, it's there to support drivers, but it's not actually driver code. I think we can probably rework it so the vas code does nothing until a driver calls in to it. eg. something like below. cheers diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b043e3936d21..dc3491fc919d 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -454,6 +454,8 @@ static const struct vas_user_win_ops vops_pseries = { .close_win = vas_deallocate_window, /* Close window */ }; +static int pseries_vas_init(void); + /* * Supporting only nx-gzip coprocessor type now, but this API code * extended to other coprocessor types later. @@ -463,7 +465,8 @@ int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, { int rc; - if (!copypaste_feat) + rc = pseries_vas_init(); + if (rc || !copypaste_feat) return -ENOTSUPP; rc = vas_register_coproc_api(mod, cop_type, name, _pseries); @@ -531,7 +534,7 @@ static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, return 0; } -static int __init pseries_vas_init(void) +static int pseries_vas_init(void) { struct hv_vas_cop_feat_caps *hv_cop_caps; struct hv_vas_all_caps *hv_caps; @@ -592,4 +595,3 @@ static int __init pseries_vas_init(void) kfree(hv_caps); return rc; } -machine_device_initcall(pseries, pseries_vas_init);
Re: [PATCH v5 5/5] powerpc/inst: Optimise copy_inst_from_kernel_nofault()
Hi Christophe, I love your patch! Perhaps something to improve: [auto build test WARNING on powerpc/next] [also build test WARNING on v5.16-rc3 next-20211129] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211130-015346 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-randconfig-r023-20211129 (https://download.01.org/0day-ci/archive/20211130/202111300652.0ydbnvyj-...@intel.com/config) compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project df08b2fe8b35cb63dfb3b49738a3494b9b4e6f8e) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install powerpc cross compiling tool for clang build # apt-get install binutils-powerpc-linux-gnu # https://github.com/0day-ci/linux/commit/fb7bff30cc0efc7e4df1b48bb69de1f325eee826 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211130-015346 git checkout fb7bff30cc0efc7e4df1b48bb69de1f325eee826 # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=powerpc prepare If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): In file included from arch/powerpc/kernel/asm-offsets.c:71: In file included from arch/powerpc/kernel/../xmon/xmon_bpts.h:7: >> arch/powerpc/include/asm/inst.h:165:20: warning: variable 'val' is >> uninitialized when used here [-Wuninitialized] *inst = ppc_inst(val); ^~~ arch/powerpc/include/asm/inst.h:53:22: note: expanded from macro 'ppc_inst' #define ppc_inst(x) (x) ^ arch/powerpc/include/asm/inst.h:155:18: note: initialize the variable 'val' to silence this warning unsigned int val, suffix; ^ = 0 1 warning generated. :1559:2: warning: syscall futex_waitv not implemented [-W#warnings] #warning syscall futex_waitv not implemented ^ 1 warning generated. arch/powerpc/kernel/vdso32/gettimeofday.S:72:8: error: unsupported directive '.stabs' .stabs "_restgpr_31_x:F-1",36,0,0,_restgpr_31_x; .globl _restgpr_31_x; _restgpr_31_x: ^ arch/powerpc/kernel/vdso32/gettimeofday.S:73:8: error: unsupported directive '.stabs' .stabs "_rest32gpr_31_x:F-1",36,0,0,_rest32gpr_31_x; .globl _rest32gpr_31_x; _rest32gpr_31_x: ^ make[2]: *** [arch/powerpc/kernel/vdso32/Makefile:55: arch/powerpc/kernel/vdso32/gettimeofday.o] Error 1 make[2]: Target 'include/generated/vdso32-offsets.h' not remade because of errors. make[1]: *** [arch/powerpc/Makefile:421: vdso_prepare] Error 2 make[1]: Target 'prepare' not remade because of errors. make: *** [Makefile:219: __sub-make] Error 2 make: Target 'prepare' not remade because of errors. vim +/val +165 arch/powerpc/include/asm/inst.h 152 153 static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) 154 { 155 unsigned int val, suffix; 156 157 if (unlikely(!is_kernel_addr((unsigned long)src))) 158 return -ERANGE; 159 160 __get_kernel_nofault(, src, u32, Efault); 161 if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { 162 __get_kernel_nofault(, src + 1, u32, Efault); 163 *inst = ppc_inst_prefix(val, suffix); 164 } else { > 165 *inst = ppc_inst(val); 166 } 167 return 0; 168 Efault: 169 return -EFAULT; 170 } 171 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton
Hello, On Sun, Nov 21, 2021 at 05:35:53PM +0800, Kefeng Wang wrote: > When support page mapping percpu first chunk allocator on arm64, we > found there are lots of duplicated codes in percpu embed/page first > chunk allocator. This patchset is aimed to cleanup them and should > no funciton change, only test on arm64. > > Kefeng Wang (4): > mm: percpu: Generalize percpu related config > mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef > mm: percpu: Add generic pcpu_fc_alloc/free funciton > mm: percpu: Add generic pcpu_populate_pte() function > > arch/arm64/Kconfig | 20 + > arch/ia64/Kconfig | 9 +-- > arch/mips/Kconfig | 10 +-- > arch/mips/mm/init.c| 14 +--- > arch/powerpc/Kconfig | 17 +--- > arch/powerpc/kernel/setup_64.c | 92 + > arch/riscv/Kconfig | 10 +-- > arch/sparc/Kconfig | 12 +-- > arch/sparc/kernel/smp_64.c | 105 +--- > arch/x86/Kconfig | 17 +--- > arch/x86/kernel/setup_percpu.c | 66 ++- > drivers/base/arch_numa.c | 68 +--- > include/linux/percpu.h | 13 +-- > mm/Kconfig | 12 +++ > mm/percpu.c| 143 + > 15 files changed, 165 insertions(+), 443 deletions(-) > > -- > 2.26.2 > I've made a few comments. I think this will be a little bit of a challenge to get through due to it touching so many architectures. For ease, it probably makes sense to run it through mny tree, but we'll need explicit acks as I mentioned. I like getting rid of the pcpu_alloc_bootmem()/pcpu_free_bootmem() functions. However, let's keep the implementation identical to x86. I don't think we should get rid of the populate_pte_fn(). I'm not comfortable changing x86's implementation. Simply offer a NULL, and if NULL use the default. Do you have a tree that intel pulls? I suggest cleaning up the patches and pushing to a remote branch that they pick up. That would have caught the mips typo. Send a PR creating a file in [1] for your branch, github is fine. Basic validation needs to be done before I can pick this up too on more than arm64. [1] https://github.com/intel/lkp-tests/tree/master/repo/linux Thanks, Dennis
Re: [PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.
On 11/29/21 12:58 AM, Mahesh Salgaonkar wrote: > When certain PHB HW failure causes phyp to recover PHB, it marks the PE > state as temporarily unavailable until recovery is complete. This also > triggers an EEH handler in Linux which needs to notify drivers, and perform > recovery. But before notifying the driver about the pci error it uses > get_adapter_state()->get-sesnor-state() operation of the hotplug_slot to > determine if the slot contains a device or not. if the slot is empty, the > recovery is skipped entirely. > > However on certain PHB failures, the rtas call get-sesnor-state() returns > extended busy error (9902) until PHB is recovered by phyp. Once PHB is > recovered, the get-sensor-state() returns success with correct presence > status. The rtas call interface rtas_get_sensor() loops over the rtas call > on extended delay return code (9902) until the return value is either > success (0) or error (-1). This causes the EEH handler to get stuck for ~6 > seconds before it could notify that the pci error has been detected and > stop any active operations. Hence with running I/O traffic, during this 6 > seconds, the network driver continues its operation and hits a timeout > (netdev watchdog). On timeouts, network driver go into ffdc capture mode > and reset path assuming the PCI device is in fatal condition. This > sometimes causes EEH recovery to fail. This impacts the ssh connection and > leads to the system being inaccessible. > > > [52732.244731] DEBUG: ibm_read_slot_reset_state2() > [52732.244762] DEBUG: ret = 0, rets[0]=5, rets[1]=1, rets[2]=4000, rets[3]=> > [52732.244798] DEBUG: in eeh_slot_presence_check > [52732.244804] DEBUG: error state check > [52732.244807] DEBUG: Is slot hotpluggable > [52732.244810] DEBUG: hotpluggable ops ? > [52732.244953] DEBUG: Calling ops->get_adapter_status > [52732.244958] DEBUG: calling rpaphp_get_sensor_state > [52736.564262] [ cut here ] > [52736.564299] NETDEV WATCHDOG: enP64p1s0f3 (tg3): transmit queue 0 timed o> > [52736.564324] WARNING: CPU: 1442 PID: 0 at net/sched/sch_generic.c:478 dev> > [...] > [52736.564505] NIP [c0c32368] dev_watchdog+0x438/0x440 > [52736.564513] LR [c0c32364] dev_watchdog+0x434/0x440 > > > Fix this issue by introducing a new rtas_get_sensor_nonblocking() that does > not get blocked on BUSY condition and returns immediately with error. Use > this function in pseries pci hotplug driver which can return an error if > slot presence state can not be detected immediately. Please note that only > in certain PHB failures, the slot presence check returns BUSY condition. In > normal cases it returns immediately with a correct presence state value. > Hence this change has no impact on normal pci dlpar operations. > > We could use rtas_get_sensor_fast() variant, but it thorws WARN_ON on BUSY > condition. The rtas_get_sensor_nonblocking() suppresses WARN_ON. > > Signed-off-by: Mahesh Salgaonkar > --- > > This is an alternate approach to fix the EEH issue instead of delaying slot > presence check proposed at > https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-November/236956.html > > Also refer: > https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-November/237027.html > --- > arch/powerpc/include/asm/rtas.h |1 + > arch/powerpc/kernel/rtas.c | 19 --- > drivers/pci/hotplug/rpaphp_pci.c |8 > 3 files changed, 21 insertions(+), 7 deletions(-) > > diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h > index 9dc97d2f9d27e..d8e8befb1c193 100644 > --- a/arch/powerpc/include/asm/rtas.h > +++ b/arch/powerpc/include/asm/rtas.h > @@ -250,6 +250,7 @@ extern void rtas_os_term(char *str); > void rtas_activate_firmware(void); > extern int rtas_get_sensor(int sensor, int index, int *state); > extern int rtas_get_sensor_fast(int sensor, int index, int *state); > +int rtas_get_sensor_nonblocking(int sensor, int index, int *state); > extern int rtas_get_power_level(int powerdomain, int *level); > extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); > extern bool rtas_indicator_present(int token, int *maxindex); > diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c > index ac61e226c9af6..fd5aa3bbd46c5 100644 > --- a/arch/powerpc/kernel/rtas.c > +++ b/arch/powerpc/kernel/rtas.c > @@ -609,7 +609,8 @@ int rtas_get_sensor(int sensor, int index, int *state) > } > EXPORT_SYMBOL(rtas_get_sensor); > > -int rtas_get_sensor_fast(int sensor, int index, int *state) > +static int > +__rtas_get_sensor(int sensor, int index, int *state, bool warn_on) > { > int token = rtas_token("get-sensor-state"); > int rc; > @@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int > *state) > return -ENOENT; > > rc = rtas_call(token, 2, 2, state, sensor, index); > - WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && > -
Re: [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function
On Sun, Nov 21, 2021 at 05:35:57PM +0800, Kefeng Wang wrote: > When NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to > populate pte, add a generic pcpu populate pte function and switch > to use it. > > Signed-off-by: Kefeng Wang > --- > arch/powerpc/kernel/setup_64.c | 47 + > arch/sparc/kernel/smp_64.c | 57 + > arch/x86/kernel/setup_percpu.c | 5 +-- > drivers/base/arch_numa.c | 51 +- > include/linux/percpu.h | 5 +-- > mm/percpu.c| 77 +++--- > 6 files changed, 79 insertions(+), 163 deletions(-) > > diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c > index 364b1567f822..1a17828af77f 100644 > --- a/arch/powerpc/kernel/setup_64.c > +++ b/arch/powerpc/kernel/setup_64.c > @@ -788,51 +788,6 @@ static int pcpu_cpu_distance(unsigned int from, unsigned > int to) > unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; > EXPORT_SYMBOL(__per_cpu_offset); > > -static void __init pcpu_populate_pte(unsigned long addr) > -{ > - pgd_t *pgd = pgd_offset_k(addr); > - p4d_t *p4d; > - pud_t *pud; > - pmd_t *pmd; > - > - p4d = p4d_offset(pgd, addr); > - if (p4d_none(*p4d)) { > - pud_t *new; > - > - new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); > - if (!new) > - goto err_alloc; > - p4d_populate(_mm, p4d, new); > - } > - > - pud = pud_offset(p4d, addr); > - if (pud_none(*pud)) { > - pmd_t *new; > - > - new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); > - if (!new) > - goto err_alloc; > - pud_populate(_mm, pud, new); > - } > - > - pmd = pmd_offset(pud, addr); > - if (!pmd_present(*pmd)) { > - pte_t *new; > - > - new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); > - if (!new) > - goto err_alloc; > - pmd_populate_kernel(_mm, pmd, new); > - } > - > - return; > - > -err_alloc: > - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", > - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); > -} > - > - > void __init setup_per_cpu_areas(void) > { > const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; > @@ -861,7 +816,7 @@ void __init setup_per_cpu_areas(void) > } > > if (rc < 0) > - rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, > pcpu_populate_pte); > + rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node); > if (rc < 0) > panic("cannot initialize percpu area (err=%d)", rc); > > diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c > index 198dadddb75d..00dffe2d834b 100644 > --- a/arch/sparc/kernel/smp_64.c > +++ b/arch/sparc/kernel/smp_64.c > @@ -1534,59 +1534,6 @@ static int __init pcpu_cpu_distance(unsigned int from, > unsigned int to) > return REMOTE_DISTANCE; > } > > -static void __init pcpu_populate_pte(unsigned long addr) > -{ > - pgd_t *pgd = pgd_offset_k(addr); > - p4d_t *p4d; > - pud_t *pud; > - pmd_t *pmd; > - > - if (pgd_none(*pgd)) { > - pud_t *new; > - > - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); > - if (!new) > - goto err_alloc; > - pgd_populate(_mm, pgd, new); > - } > - > - p4d = p4d_offset(pgd, addr); > - if (p4d_none(*p4d)) { > - pud_t *new; > - > - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); > - if (!new) > - goto err_alloc; > - p4d_populate(_mm, p4d, new); > - } > - > - pud = pud_offset(p4d, addr); > - if (pud_none(*pud)) { > - pmd_t *new; > - > - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); > - if (!new) > - goto err_alloc; > - pud_populate(_mm, pud, new); > - } > - > - pmd = pmd_offset(pud, addr); > - if (!pmd_present(*pmd)) { > - pte_t *new; > - > - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); > - if (!new) > - goto err_alloc; > - pmd_populate_kernel(_mm, pmd, new); > - } > - > - return; > - > -err_alloc: > - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", > - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); > -} > - > void __init setup_per_cpu_areas(void) > { > unsigned long delta; > @@ -1604,9 +1551,7 @@ void __init setup_per_cpu_areas(void) > pcpu_fc_names[pcpu_chosen_fc], rc); > } > if (rc < 0) > - rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, > -cpu_to_node, > -
Re: [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton
On Sun, Nov 21, 2021 at 05:35:56PM +0800, Kefeng Wang wrote: > With previous patch, we could add a generic pcpu first chunk > allocation and free function to cleanup the duplicated definations > on each architecture. > > Signed-off-by: Kefeng Wang > --- > arch/mips/mm/init.c| 16 + > arch/powerpc/kernel/setup_64.c | 51 ++ > arch/sparc/kernel/smp_64.c | 50 +- > arch/x86/kernel/setup_percpu.c | 59 +- > drivers/base/arch_numa.c | 19 +- > include/linux/percpu.h | 9 + > mm/percpu.c| 66 ++ > 7 files changed, 42 insertions(+), 228 deletions(-) > > diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c > index ebbf6923532c..5a8002839550 100644 > --- a/arch/mips/mm/init.c > +++ b/arch/mips/mm/init.c > @@ -524,19 +524,6 @@ static int __init pcpu_cpu_to_node(int cpu) > return cpu_to_node(cpu); > } > > -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t > align, > -pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn) > -{ > - return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), > - MEMBLOCK_ALLOC_ACCESSIBLE, > - cpu_to_nd_fun(cpu)); > -} > - > -static void __init pcpu_fc_free(void *ptr, size_t size) > -{ > - memblock_free(ptr, size); > -} > - > void __init setup_per_cpu_areas(void) > { > unsigned long delta; > @@ -550,8 +537,7 @@ void __init setup_per_cpu_areas(void) > rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, > PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, > pcpu_cpu_distance, > - pcpu_cpu_to_node, > - pcpu_fc_alloc, pcpu_fc_free); > + pcpu_cpu_to_node); > if (rc < 0) > panic("Failed to initialize percpu areas."); > > diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c > index 9a5609c821df..364b1567f822 100644 > --- a/arch/powerpc/kernel/setup_64.c > +++ b/arch/powerpc/kernel/setup_64.c > @@ -777,50 +777,6 @@ static __init int pcpu_cpu_to_node(int cpu) > return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE; > } > > -/** > - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu > - * @cpu: cpu to allocate for > - * @size: size allocation in bytes > - * @align: alignment > - * > - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper > - * does the right thing for NUMA regardless of the current > - * configuration. > - * > - * RETURNS: > - * Pointer to the allocated area on success, NULL on failure. > - */ > -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, > size_t align, > - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn) > -{ > - const unsigned long goal = __pa(MAX_DMA_ADDRESS); > -#ifdef CONFIG_NUMA > - int node = cpu_to_nd_fun(cpu); > - void *ptr; > - > - if (!node_online(node) || !NODE_DATA(node)) { > - ptr = memblock_alloc_from(size, align, goal); > - pr_info("cpu %d has no node %d or node-local memory\n", > - cpu, node); > - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", > - cpu, size, __pa(ptr)); > - } else { > - ptr = memblock_alloc_try_nid(size, align, goal, > - MEMBLOCK_ALLOC_ACCESSIBLE, node); > - pr_debug("per cpu data for cpu%d %lu bytes on node%d at " > - "%016lx\n", cpu, size, node, __pa(ptr)); > - } > - return ptr; > -#else > - return memblock_alloc_from(size, align, goal); > -#endif > -} > - > -static void __init pcpu_free_bootmem(void *ptr, size_t size) > -{ > - memblock_free(ptr, size); > -} > - > static int pcpu_cpu_distance(unsigned int from, unsigned int to) > { > if (early_cpu_to_node(from) == early_cpu_to_node(to)) > @@ -897,8 +853,7 @@ void __init setup_per_cpu_areas(void) > > if (pcpu_chosen_fc != PCPU_FC_PAGE) { > rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, > pcpu_cpu_distance, > - pcpu_cpu_to_node, > - pcpu_alloc_bootmem, > pcpu_free_bootmem); > + pcpu_cpu_to_node); > if (rc) > pr_warn("PERCPU: %s allocator failed (%d), " > "falling back to page size\n", > @@ -906,9 +861,7 @@ void __init setup_per_cpu_areas(void) > } > > if (rc < 0) > - rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, > pcpu_free_bootmem, > -pcpu_cpu_to_node, > -
Re: [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
On Sun, Nov 21, 2021 at 05:35:55PM +0800, Kefeng Wang wrote: > Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t, > pcpu first chunk allocation will call it to alloc memblock on the > corresponding node by it. > > Signed-off-by: Kefeng Wang > --- > arch/mips/mm/init.c| 12 +--- > arch/powerpc/kernel/setup_64.c | 14 +++--- > arch/sparc/kernel/smp_64.c | 8 +--- > arch/x86/kernel/setup_percpu.c | 18 +- > drivers/base/arch_numa.c | 8 +--- > include/linux/percpu.h | 7 +-- > mm/percpu.c| 14 +- > 7 files changed, 57 insertions(+), 24 deletions(-) > > diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c > index 325e1552cbea..ebbf6923532c 100644 > --- a/arch/mips/mm/init.c > +++ b/arch/mips/mm/init.c > @@ -519,12 +519,17 @@ static int __init pcpu_cpu_distance(unsigned int from, > unsigned int to) > return node_distance(cpu_to_node(from), cpu_to_node(to)); > } > > -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, > -size_t align) > +static int __init pcpu_cpu_to_node(int cpu) > +{ > + return cpu_to_node(cpu); > +} > + > +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t > align, > +pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn) > { > return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), > MEMBLOCK_ALLOC_ACCESSIBLE, > - cpu_to_node(cpu)); > + cpu_to_nd_fun(cpu)); > } > > static void __init pcpu_fc_free(void *ptr, size_t size) > @@ -545,6 +550,7 @@ void __init setup_per_cpu_areas(void) > rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, > PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, > pcpu_cpu_distance, > + pcpu_cpu_to_node, > pcpu_fc_alloc, pcpu_fc_free); > if (rc < 0) > panic("Failed to initialize percpu areas."); > diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c > index 6052f5d5ded3..9a5609c821df 100644 > --- a/arch/powerpc/kernel/setup_64.c > +++ b/arch/powerpc/kernel/setup_64.c > @@ -771,6 +771,12 @@ void __init emergency_stack_init(void) > } > > #ifdef CONFIG_SMP > + > +static __init int pcpu_cpu_to_node(int cpu) > +{ > + return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE; > +} > + > /** > * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu > * @cpu: cpu to allocate for > @@ -784,12 +790,12 @@ void __init emergency_stack_init(void) > * RETURNS: > * Pointer to the allocated area on success, NULL on failure. > */ > -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, > - size_t align) > +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, > size_t align, > + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn) > { > const unsigned long goal = __pa(MAX_DMA_ADDRESS); > #ifdef CONFIG_NUMA > - int node = early_cpu_to_node(cpu); > + int node = cpu_to_nd_fun(cpu); ^ typo - cpu_to_nd_fn(). > void *ptr; > > if (!node_online(node) || !NODE_DATA(node)) { > @@ -891,6 +897,7 @@ void __init setup_per_cpu_areas(void) > > if (pcpu_chosen_fc != PCPU_FC_PAGE) { > rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, > pcpu_cpu_distance, > + pcpu_cpu_to_node, > pcpu_alloc_bootmem, > pcpu_free_bootmem); > if (rc) > pr_warn("PERCPU: %s allocator failed (%d), " > @@ -900,6 +907,7 @@ void __init setup_per_cpu_areas(void) > > if (rc < 0) > rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, > pcpu_free_bootmem, > +pcpu_cpu_to_node, > pcpu_populate_pte); > if (rc < 0) > panic("cannot initialize percpu area (err=%d)", rc); > diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c > index b98a7bbe6728..026aa3ccbc30 100644 > --- a/arch/sparc/kernel/smp_64.c > +++ b/arch/sparc/kernel/smp_64.c > @@ -1539,12 +1539,12 @@ void smp_send_stop(void) > * RETURNS: > * Pointer to the allocated area on success, NULL on failure. > */ > -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, > - size_t align) > +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, > size_t align, > + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn) > { > const unsigned long goal = __pa(MAX_DMA_ADDRESS); > #ifdef CONFIG_NUMA > - int node = cpu_to_node(cpu); >
Re: [PATCH RFC 1/4] mm: percpu: Generalize percpu related config
Hello, On Sun, Nov 21, 2021 at 05:35:54PM +0800, Kefeng Wang wrote: > The HAVE_SETUP_PER_CPU_AREA/NEED_PER_CPU_EMBED_FIRST_CHUNK/ > NEED_PER_CPU_PAGE_FIRST_CHUNK/USE_PERCPU_NUMA_NODE_ID configs, > which has duplicate definitions on platforms that subscribe it. > > Move them into mm, drop these redundant definitions and instead > just select it on applicable platforms. > > Signed-off-by: Kefeng Wang > --- > arch/arm64/Kconfig | 20 > arch/ia64/Kconfig| 9 ++--- > arch/mips/Kconfig| 10 ++ > arch/powerpc/Kconfig | 17 - > arch/riscv/Kconfig | 10 ++ > arch/sparc/Kconfig | 12 +++- > arch/x86/Kconfig | 17 - > mm/Kconfig | 12 > 8 files changed, 33 insertions(+), 74 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index c4207cf9bb17..4ff73299f8a9 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -1135,6 +1135,10 @@ config NUMA > select GENERIC_ARCH_NUMA > select ACPI_NUMA if ACPI > select OF_NUMA > + select HAVE_SETUP_PER_CPU_AREA > + select NEED_PER_CPU_EMBED_FIRST_CHUNK > + select NEED_PER_CPU_PAGE_FIRST_CHUNK > + select USE_PERCPU_NUMA_NODE_ID > help > Enable NUMA (Non-Uniform Memory Access) support. > > @@ -1151,22 +1155,6 @@ config NODES_SHIFT > Specify the maximum number of NUMA Nodes available on the target > system. Increases memory reserved to accommodate various tables. > > -config USE_PERCPU_NUMA_NODE_ID > - def_bool y > - depends on NUMA > - > -config HAVE_SETUP_PER_CPU_AREA > - def_bool y > - depends on NUMA > - > -config NEED_PER_CPU_EMBED_FIRST_CHUNK > - def_bool y > - depends on NUMA > - > -config NEED_PER_CPU_PAGE_FIRST_CHUNK > - def_bool y > - depends on NUMA > - > source "kernel/Kconfig.hz" > > config ARCH_SPARSEMEM_ENABLE > diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig > index 1e33666fa679..703952819e10 100644 > --- a/arch/ia64/Kconfig > +++ b/arch/ia64/Kconfig > @@ -32,6 +32,7 @@ config IA64 > select HAVE_FTRACE_MCOUNT_RECORD > select HAVE_DYNAMIC_FTRACE if (!ITANIUM) > select HAVE_FUNCTION_TRACER > + select HAVE_SETUP_PER_CPU_AREA > select TTY > select HAVE_ARCH_TRACEHOOK > select HAVE_VIRT_CPU_ACCOUNTING > @@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY > bool > default y > > -config HAVE_SETUP_PER_CPU_AREA > - def_bool y > - > config DMI > bool > default y > @@ -292,6 +290,7 @@ config NUMA > bool "NUMA support" > depends on !FLATMEM > select SMP > + select USE_PERCPU_NUMA_NODE_ID > help > Say Y to compile the kernel to support NUMA (Non-Uniform Memory > Access). This option is for configuring high-end multiprocessor > @@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION > def_bool y > depends on NUMA > > -config USE_PERCPU_NUMA_NODE_ID > - def_bool y > - depends on NUMA > - > config HAVE_MEMORYLESS_NODES > def_bool NUMA > > diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig > index de60ad190057..c106a2080877 100644 > --- a/arch/mips/Kconfig > +++ b/arch/mips/Kconfig > @@ -2666,6 +2666,8 @@ config NUMA > bool "NUMA Support" > depends on SYS_SUPPORTS_NUMA > select SMP > + select HAVE_SETUP_PER_CPU_AREA > + select NEED_PER_CPU_EMBED_FIRST_CHUNK > help > Say Y to compile the kernel to support NUMA (Non-Uniform Memory > Access). This option improves performance on systems with more > @@ -2676,14 +2678,6 @@ config NUMA > config SYS_SUPPORTS_NUMA > bool > > -config HAVE_SETUP_PER_CPU_AREA > - def_bool y > - depends on NUMA > - > -config NEED_PER_CPU_EMBED_FIRST_CHUNK > - def_bool y > - depends on NUMA > - > config RELOCATABLE > bool "Relocatable kernel" > depends on SYS_SUPPORTS_RELOCATABLE > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index dea74d7717c0..8badd39854a0 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN > default 9 if PPC_16K_PAGES # 9 = 23 (8MB) - 14 (16K) > default 11 # 11 = 23 (8MB) - 12 (4K) > > -config HAVE_SETUP_PER_CPU_AREA > - def_bool PPC64 > - > -config NEED_PER_CPU_EMBED_FIRST_CHUNK > - def_bool y if PPC64 > - > -config NEED_PER_CPU_PAGE_FIRST_CHUNK > - def_bool y if PPC64 > - > config NR_IRQS > int "Number of virtual interrupt numbers" > range 32 1048576 > @@ -240,6 +231,7 @@ config PPC > select HAVE_REGS_AND_STACK_ACCESS_API > select HAVE_RELIABLE_STACKTRACE > select HAVE_RSEQ > + select HAVE_SETUP_PER_CPU_AREA if PPC64 > select HAVE_SOFTIRQ_ON_OWN_STACK > select HAVE_STACKPROTECTOR if PPC32 && > $(cc-option,-mstack-protector-guard=tls
Re: [RFC PATCH 0/3] Use pageblock_order for cma and alloc_contig_range alignment.
On 23 Nov 2021, at 12:32, Vlastimil Babka wrote: > On 11/23/21 17:35, Zi Yan wrote: >> On 19 Nov 2021, at 10:15, Zi Yan wrote: > From what my understanding, cma required alignment of > max(MAX_ORDER - 1, pageblock_order), because when MIGRATE_CMA was > introduced, > __free_one_page() does not prevent merging two different pageblocks, when > MAX_ORDER - 1 > pageblock_order. But current __free_one_page() > implementation > does prevent that. But it does prevent that only for isolated pageblock, not CMA, and yout patchset doesn't seem to expand that to CMA? Or am I missing something. >>> >>> Yeah, you are right. Originally, I thought preventing merging isolated >>> pageblock >>> with other types of pageblocks is sufficient, since MIGRATE_CMA is always >>> converted from MIGRATE_ISOLATE. But that is not true. I will rework the >>> code. >>> Thanks for pointing this out. >>> >> >> I find that two pageblocks with different migratetypes, like >> MIGRATE_RECLAIMABLE >> and MIGRATE_MOVABLE can be merged into a single free page after I checked >> __free_one_page() in detail and printed pageblock information during buddy >> page >> merging. > > Yes, that can happen. > > I am not sure what consequence it will cause. Do you have any idea? > > For MIGRATE_RECLAIMABLE or MIGRATE_MOVABLE or even MIGRATE_UNMOVABLE it's > absolutely fine. As long as these pageblocks are fully free (and they are if > it's a single free page spanning 2 pageblocks), they can be of any of these > type, as they can be reused as needed without causing fragmentation. > > But in case of MIGRATE_CMA and MIGRATE_ISOLATE, uncontrolled merging would > break the specifics of those types. That's why the code is careful for > MIGRATE_ISOLATE, and MIGRATE_CMA was until now done in MAX_ORDER granularity. Thanks for the explanation. Basically migratetypes that can fall back to each other can be merged into a single free page, right? How about MIGRATE_HIGHATOMIC? It should not be merged with other migratetypes from my understanding. -- Best Regards, Yan, Zi signature.asc Description: OpenPGP digital signature
Re: [patch 05/22] genirq/msi: Fixup includes
Cedric, On Mon, Nov 29 2021 at 08:33, Cédric Le Goater wrote: > On 11/27/21 02:18, Thomas Gleixner wrote: >> Remove the kobject.h include from msi.h as it's not required and add a >> sysfs.h include to the core code instead. >> >> Signed-off-by: Thomas Gleixner > > > This patch breaks compile on powerpc : > >CC arch/powerpc/kernel/msi.o > In file included from ../arch/powerpc/kernel/msi.c:7: > ../include/linux/msi.h:410:65: error: ‘struct cpumask’ declared inside > parameter list will not be visible outside of this definition or declaration > [-Werror] >410 | int msi_domain_set_affinity(struct irq_data *data, const struct > cpumask *mask, >| > ^~~ > cc1: all warnings being treated as errors > > Below is fix you can merge in patch 5. thanks for having a look. I fixed up this and other fallout and pushed out an updated series (all 4 parts) to: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel msi Thanks, tglx
[Bug 215169] UBSAN: shift-out-of-bounds in arch/powerpc/mm/kasan/book3s_32.c:22:23
https://bugzilla.kernel.org/show_bug.cgi?id=215169 --- Comment #1 from Erhard F. (erhar...@mailbox.org) --- Created attachment 299779 --> https://bugzilla.kernel.org/attachment.cgi?id=299779=edit kernel .config (5.15.5, PowerMac G4 DP) -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
[Bug 215169] New: UBSAN: shift-out-of-bounds in arch/powerpc/mm/kasan/book3s_32.c:22:23
https://bugzilla.kernel.org/show_bug.cgi?id=215169 Bug ID: 215169 Summary: UBSAN: shift-out-of-bounds in arch/powerpc/mm/kasan/book3s_32.c:22:23 Product: Platform Specific/Hardware Version: 2.5 Kernel Version: 5.15.5 Hardware: PPC-32 OS: Linux Tree: Mainline Status: NEW Severity: normal Priority: P1 Component: PPC-32 Assignee: platform_ppc...@kernel-bugs.osdl.org Reporter: erhar...@mailbox.org Regression: No Created attachment 299777 --> https://bugzilla.kernel.org/attachment.cgi?id=299777=edit dmesg (5.15.5, INLINE KASAN, PowerMac G4 DP) Noticed another small glitch during 5.15.x testing. Happens when inline KASAN is selected: Total memory = 2048MB; using 4096kB for hash table Activating Kernel Userspace Access Protection Linux version 5.15.5-gentoo-PowerMacG4 (root@T1000) (gcc (Gentoo 11.2.0 p1) 11.2.0, GNU ld (Gentoo 2.37_p1 p0) 2.37) #9 SMP Mon Nov 29 20:46:44 CET 2021 UBSAN: shift-out-of-bounds in arch/powerpc/mm/kasan/book3s_32.c:22:23 shift exponent -1 is negative CPU: 0 PID: 0 Comm: swapper Not tainted 5.15.5-gentoo-PowerMacG4 #9 Call Trace: [c214be60] [c0ba0048] dump_stack_lvl+0x80/0xb0 (unreliable) [c214be80] [c0b99288] ubsan_epilogue+0x10/0x5c [c214be90] [c0b98fe0] __ubsan_handle_shift_out_of_bounds+0x94/0x138 [c214bf00] [c1c0f010] kasan_init_region+0xd8/0x26c [c214bf30] [c1c0ed84] kasan_init+0xc0/0x198 [c214bf70] [c1c08024] setup_arch+0x18/0x54c [c214bfc0] [c1c037f0] start_kernel+0x90/0x33c [c214bff0] [3610] 0x3610 setbat: no BAT available for mapping 0x1c00 KASAN init done [...] The other UBSAN complaint in the netconsole.log is bug #214867. -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
[Bug 205099] KASAN hit at raid6_pq: BUG: Unable to handle kernel data access at 0x00f0fd0d
https://bugzilla.kernel.org/show_bug.cgi?id=205099 --- Comment #45 from Erhard F. (erhar...@mailbox.org) --- Created attachment 299773 --> https://bugzilla.kernel.org/attachment.cgi?id=299773=edit kernel_page_tables (5.15.5, OUTLINE KASAN, LOWMEM_SIZE=0x3000, PowerMac G4 DP) Ah yes, I forgot about including the /sys/kernel/debug/kernel_page_tables.. Sorry! Here you are. -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
[Bug 205099] KASAN hit at raid6_pq: BUG: Unable to handle kernel data access at 0x00f0fd0d
https://bugzilla.kernel.org/show_bug.cgi?id=205099 --- Comment #46 from Erhard F. (erhar...@mailbox.org) --- Created attachment 299775 --> https://bugzilla.kernel.org/attachment.cgi?id=299775=edit kernel_page_tables (5.15.5, INLINE KASAN, LOWMEM_SIZE=0x2000, PowerMac G4 DP) -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
Re: [PATCH] powerpc/pseries/vas: Don't print an error when VAS is unavailable
On 11/26/21 2:31 AM, Nicholas Piggin wrote: > Excerpts from Cédric Le Goater's message of November 26, 2021 5:13 pm: >> On 11/26/21 06:21, Nicholas Piggin wrote: >>> KVM does not support VAS so guests always print a useless error on boot >>> >>> vas: HCALL(398) error -2, query_type 0, result buffer 0x57f2000 >>> >>> Change this to only print the message if the error is not H_FUNCTION. >> >> >> Just being curious, why is it even called since "ibm,compression" should >> not be exposed in the DT ? > > It looks like vas does not test for it. I guess in theory there can be > other functions than compression implemented as an accelerator. Maybe > that's why? > > Thanks, > Nick > Looks like pseries_vas_init() simply calls h_query_vas_capabilities() to test for VAS coprocessor support. I would assume KVM doesn't expose hcall-vas or hcall-nx in /rtas/ibm,hypertas-functions? Doesn't look like hcall-vas or hcall-nx have been added to the hypertas_fw_feature matching, but maybe they should and we can gate VAS initialization on those, or at the minimum FW_FEATURE_VAS? -Tyrel
[PATCH 11/11] locking: Allow to include asm/spinlock_types.h from linux/spinlock_types_raw.h
The printk header file includes ratelimit_types.h for its __ratelimit() based usage. It is required for the static initializer used in printk_ratelimited(). It uses a raw_spinlock_t and includes the spinlock_types.h. PREEMPT_RT substitutes spinlock_t with a rtmutex based implementation and so its spinlock_t implmentation (provided by spinlock_rt.h) includes rtmutex.h and atomic.h which leads to recursive includes where defines are missing. By including only the raw_spinlock_t defines it avoids the atomic.h related includes at this stage. An example on powerpc: | CALLscripts/atomic/check-atomics.sh |In file included from include/linux/bug.h:5, | from include/linux/page-flags.h:10, | from kernel/bounds.c:10: |arch/powerpc/include/asm/page_32.h: In function ‘clear_page’: |arch/powerpc/include/asm/bug.h:87:4: error: implicit declaration of function ‘__WARN’ [-Werror=implicit-function-declaration] | 87 |__WARN();\ | |^~ |arch/powerpc/include/asm/page_32.h:48:2: note: in expansion of macro ‘WARN_ON’ | 48 | WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1)); | | ^~~ |arch/powerpc/include/asm/bug.h:58:17: error: invalid application of ‘sizeof’ to incomplete type ‘struct bug_entry’ | 58 | "i" (sizeof(struct bug_entry)), \ | | ^~ |arch/powerpc/include/asm/bug.h:89:3: note: in expansion of macro ‘BUG_ENTRY’ | 89 | BUG_ENTRY(PPC_TLNEI " %4, 0", \ | | ^ |arch/powerpc/include/asm/page_32.h:48:2: note: in expansion of macro ‘WARN_ON’ | 48 | WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1)); | | ^~~ |In file included from arch/powerpc/include/asm/ptrace.h:298, | from arch/powerpc/include/asm/hw_irq.h:12, | from arch/powerpc/include/asm/irqflags.h:12, | from include/linux/irqflags.h:16, | from include/asm-generic/cmpxchg-local.h:6, | from arch/powerpc/include/asm/cmpxchg.h:526, | from arch/powerpc/include/asm/atomic.h:11, | from include/linux/atomic.h:7, | from include/linux/rwbase_rt.h:6, | from include/linux/rwlock_types.h:55, | from include/linux/spinlock_types.h:74, | from include/linux/ratelimit_types.h:7, | from include/linux/printk.h:10, | from include/asm-generic/bug.h:22, | from arch/powerpc/include/asm/bug.h:109, | from include/linux/bug.h:5, | from include/linux/page-flags.h:10, | from kernel/bounds.c:10: |include/linux/thread_info.h: In function ‘copy_overflow’: |include/linux/thread_info.h:210:2: error: implicit declaration of function ‘WARN’ [-Werror=implicit-function-declaration] | 210 | WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); | | ^~~~ The WARN / BUG include pulls in printk.h and then ptrace.h expects WARN (from bug.h) which is not yet complete. Even hw_irq.h has WARN_ON() statements. On POWERPC64 there are missing atomic64 defines while building 32bit VDSO: | VDSO32C arch/powerpc/kernel/vdso32/vgettimeofday.o |In file included from include/linux/atomic.h:80, | from include/linux/rwbase_rt.h:6, | from include/linux/rwlock_types.h:55, | from include/linux/spinlock_types.h:74, | from include/linux/ratelimit_types.h:7, | from include/linux/printk.h:10, | from include/linux/kernel.h:19, | from arch/powerpc/include/asm/page.h:11, | from arch/powerpc/include/asm/vdso/gettimeofday.h:5, | from include/vdso/datapage.h:137, | from lib/vdso/gettimeofday.c:5, | from : |include/linux/atomic-arch-fallback.h: In function ‘arch_atomic64_inc’: |include/linux/atomic-arch-fallback.h:1447:2: error: implicit declaration of function ‘arch_atomic64_add’; did you mean ‘arch_atomic_add’? [-Werror=impl |icit-function-declaration] | 1447 | arch_atomic64_add(1, v); | | ^ | | arch_atomic_add The generic fallback is not included, atomics itself are not used. If kernel.h does not include printk.h then it comes later from the bug.h include. Allow asm/spinlock_types.h to be included from linux/spinlock_types_raw.h. Cc: Albert Ou Cc: Alexander Gordeev Cc: Benjamin Herrenschmidt Cc: Brian Cain Cc: Catalin Marinas Cc: Chris Zankel Cc: Christian Borntraeger Cc: Guo Ren Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Rich Felker Cc: Richard Henderson Cc: Russell King Cc: Vasily Gorbik Cc: Yoshinori Sato Cc: linux-al...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-c...@vger.kernel.org Cc: linux-hexa...@vger.kernel.org Cc: linux-i...@vger.kernel.org
Re: [PATCH] recordmcount: Support empty section from recent binutils
Le 29/11/2021 à 18:43, Steven Rostedt a écrit : On Fri, 26 Nov 2021 08:43:23 + LEROY Christophe wrote: Le 24/11/2021 à 15:43, Christophe Leroy a écrit : Looks like recent binutils (2.36 and over ?) may empty some section, leading to failure like: Cannot find symbol for section 11: .text.unlikely. kernel/kexec_file.o: failed make[1]: *** [scripts/Makefile.build:287: kernel/kexec_file.o] Error 1 In order to avoid that, ensure that the section has a content before returning it's name in has_rel_mcount(). This patch doesn't work, on PPC32 I get the following message with this patch applied: [0.00] ftrace: No functions to be traced? Without the patch I get: [0.00] ftrace: allocating 22381 entries in 66 pages [0.00] ftrace: allocated 66 pages with 2 groups Because of this report, I have not applied this patch (even though I was about to push it to Linus). I'm pulling it from my queue until this gets resolved. I have no idea on how to fix that for the moment. With GCC 10 (binutils 2.36) an objdump -x on kernel/kexec_file.o gives: ld .text.unlikely .text.unlikely wF .text.unlikely 0038 .arch_kexec_apply_relocations_add 0038 wF .text.unlikely 0038 .arch_kexec_apply_relocations With GCC 11 (binutils 2.37) the same gives: wF .text.unlikely 0038 .arch_kexec_apply_relocations_add 0038 wF .text.unlikely 0038 .arch_kexec_apply_relocations The problem is that recordmcount drops weak symbols, and it doesn't find any non-weak symbol in .text.unlikely Explication given at https://elixir.bootlin.com/linux/v5.16-rc2/source/scripts/recordmcount.h#L506 I have no idea on what to do. Thanks Christophe
[PATCH 10/10] powerpc/pseries/vas: Write 'target_creds' for QoS credits change
PowerVM support two types of credits - Default (uses normal priority FIFO) and Qality of service (QoS uses high priproty FIFO). The user decides the number of QoS credits and sets this value with HMC interface. With the core add/removal, this value can be changed in HMC which invokes drmgr to communicate to the kernel. This patch adds an interface so that drmgr command can write the new target QoS credits in sysfs. But the kernel gets the new QoS capabilities from the hypervisor whenever target_creds is updated to make sure sync with the values in the hypervisor. Signed-off-by: Haren Myneni --- arch/powerpc/platforms/pseries/vas-sysfs.c | 34 +- arch/powerpc/platforms/pseries/vas.c | 2 +- arch/powerpc/platforms/pseries/vas.h | 1 + 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c index 625082bebcb2..5bae2269d194 100644 --- a/arch/powerpc/platforms/pseries/vas-sysfs.c +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -32,6 +32,34 @@ static ssize_t avail_creds_show(struct vas_cop_feat_caps *caps, char *buf) return sprintf(buf, "%d\n", avail_creds); } +/* + * This function is used to get the notification from the drmgr when + * QoS credits are changed as part of DLPAR core add/removal. Though + * receiving the total QoS credits here, get the official QoS + * capabilities from the hypervisor. + */ +static ssize_t target_creds_store(struct vas_cop_feat_caps *caps, + const char *buf, size_t count) +{ + int err; + u16 creds; + + /* +* Nothing to do for default credit type. +*/ + if (caps->win_type == VAS_GZIP_DEF_FEAT_TYPE) + return -EOPNOTSUPP; + + err = kstrtou16(buf, 0, ); + if (!err) + err = vas_reconfig_capabilties(caps->win_type); + + if (err) + return -EINVAL; + + return count; +} + #define sysfs_capbs_entry_read(_name) \ static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \ { \ @@ -48,8 +76,12 @@ struct vas_sysfs_entry { sysfs_capbs_entry_read(_name); \ static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \ 0444, _name##_show, NULL); +#define VAS_ATTR(_name) \ + sysfs_capbs_entry_read(_name); \ + static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \ + 0644, _name##_show, _name##_store) -VAS_ATTR_RO(target_creds); +VAS_ATTR(target_creds); VAS_ATTR_RO(used_creds); static struct vas_sysfs_entry avail_creds_attribute = diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index c769c8534b3a..d271fa71bded 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -707,7 +707,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) * changes. Reconfig window configurations based on the credits * availability from this new capabilities. */ -static int vas_reconfig_capabilties(u8 type) +int vas_reconfig_capabilties(u8 type) { int lpar_creds, avail_creds, excess_creds; struct hv_vas_cop_feat_caps *hv_caps; diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 15bcadf8872a..e369db35f0fd 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -130,5 +130,6 @@ struct pseries_vas_window { }; int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); +int vas_reconfig_capabilties(u8 type); int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); #endif /* _VAS_H */ -- 2.27.0
[PATCH 09/10] powerpc/pseries/vas: sysfs interface to export capabilities
The hypervisor provides the available VAS GZIP capabilities such as default or QoS window type and the target available credits in each type. This patch creates sysfs entries and exports the target, used and the available credits for each feature. This interface can be used by the user space to determine the credits usage or to set the target credits in the case of QoS type (for DLPAR). /sys/devices/vas/vas0/gzip/def_caps: (default GZIP capabilities) avail_creds /* Available credits to use */ target_creds /* Total credits available. Can be /* changed with DLPAR operation */ used_creds /* Used credits */ /sys/devices/vas/vas0/gzip/qos_caps (QoS GZIP capabilities) avail_creds target_creds used_creds Signed-off-by: Haren Myneni --- arch/powerpc/platforms/pseries/Makefile| 2 +- arch/powerpc/platforms/pseries/vas-sysfs.c | 214 + arch/powerpc/platforms/pseries/vas.c | 6 + arch/powerpc/platforms/pseries/vas.h | 6 + 4 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/platforms/pseries/vas-sysfs.c diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 41d8aee98da4..349f42c31b65 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -30,6 +30,6 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o -obj-$(CONFIG_PPC_VAS) += vas.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c new file mode 100644 index ..625082bebcb2 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2016-17 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include + +#include "vas.h" + +#ifdef CONFIG_SYSFS +static struct kobject *pseries_vas_kobj; +static struct kobject *gzip_caps_kobj; + +struct vas_caps_entry { + struct kobject kobj; + struct vas_cop_feat_caps *caps; +}; + +#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj) + +static ssize_t avail_creds_show(struct vas_cop_feat_caps *caps, char *buf) +{ + int avail_creds = atomic_read(>target_creds) - + atomic_read(>used_creds); + return sprintf(buf, "%d\n", avail_creds); +} + +#define sysfs_capbs_entry_read(_name) \ +static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \ +{ \ + return sprintf(buf, "%d\n", atomic_read(>_name)); \ +} + +struct vas_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct vas_cop_feat_caps *, char *); + ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t); +}; + +#define VAS_ATTR_RO(_name) \ + sysfs_capbs_entry_read(_name); \ + static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \ + 0444, _name##_show, NULL); + +VAS_ATTR_RO(target_creds); +VAS_ATTR_RO(used_creds); + +static struct vas_sysfs_entry avail_creds_attribute = + __ATTR(avail_creds, 0444, avail_creds_show, NULL); + +static struct attribute *vas_capab_attrs[] = { + _creds_attribute.attr, + _creds_attribute.attr, + _creds_attribute.attr, + NULL, +}; + +static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, +char *buf) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + + if (!entry->show) + return -EIO; + + return entry->show(caps, buf); +} + +static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + if (!entry->store) + return -EIO; + + return entry->store(caps, buf, count); +} + +static void vas_type_release(struct kobject *kobj) +{ + struct vas_caps_entry *centry = to_caps_entry(kobj); + kfree(centry); +} + +static const struct sysfs_ops vas_sysfs_ops = { + .show = vas_type_show, + .store = vas_type_store, +}; + +static struct
[PATCH 08/10] powerpc/vas: Return paste instruction failure if no active window
The VAS window may not be active if the system looses credits and the NX generates page fault when it receives request on unmap paste address. The kernel handles the fault by remap new paste address if the window is active again, Otherwise return the paste instruction failure if the executed instruction that caused the fault was a paste. Signed-off-by: Nicholas Piggin Signed-off-by: Haren Myneni --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/platforms/book3s/vas-api.c | 47 - 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index baea657bc868..30bb3c0e07f9 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -261,6 +261,8 @@ #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK0xfc1e #define PPC_INST_MTMSRD0x7c000164 +#define PPC_INST_PASTE 0x7c20070d +#define PPC_INST_PASTE_MASK0xfc2007ff #define PPC_INST_POPCNTB 0x7cf4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 5ceba75c13eb..2ffd34bc4032 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -351,6 +351,41 @@ static int coproc_release(struct inode *inode, struct file *fp) return 0; } +/* + * If the executed instruction that caused the fault was a paste, then + * clear regs CR0[EQ], advance NIP, and return 0. Else return error code. + */ +static int do_fail_paste(void) +{ + struct pt_regs *regs = current->thread.regs; + u32 instword; + + if (WARN_ON_ONCE(!regs)) + return -EINVAL; + + if (WARN_ON_ONCE(!user_mode(regs))) + return -EINVAL; + + /* +* If we couldn't translate the instruction, the driver should +* return success without handling the fault, it will be retried +* or the instruction fetch will fault. +*/ + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EAGAIN; + + /* +* Not a paste instruction, driver may fail the fault. +*/ + if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE) + return -ENOENT; + + regs->ccr &= ~0xe000; /* Clear CR0[0-2] to fail paste */ + regs_add_return_ip(regs, 4);/* Skip the paste */ + + return 0; +} + /* * This fault handler is invoked when the VAS/NX generates page fault on * the paste address. Happens if the kernel closes window in hypervisor @@ -403,9 +438,19 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) } mutex_unlock(>task_ref.mmap_mutex); - return VM_FAULT_SIGBUS; + /* +* Received this fault due to closing the actual window. +* It can happen during migration or lost credits. +* Since no mapping, return the paste instruction failure +* to the user space. +*/ + ret = do_fail_paste(); + if (!ret) + return VM_FAULT_NOPAGE; + return VM_FAULT_SIGBUS; } + static const struct vm_operations_struct vas_vm_ops = { .fault = vas_mmap_fault, }; -- 2.27.0
[PATCH 07/10] powerpc/vas: Add paste address mmap fault handler
The user space opens VAS windows and issues NX requests by pasting CRB on the corresponding paste address mmap. When the system looses credits due to core removal, the kernel has to close the window in the hypervisor and make the window inactive by unmapping this paste address. Also the OS has to handle NX request page faults if the user space issue NX requests. This handler remap the new paste address with the same VMA when the window is active again (due to core add with DLPAR). Otherwise returns paste failure. Signed-off-by: Haren Myneni --- arch/powerpc/platforms/book3s/vas-api.c | 60 + 1 file changed, 60 insertions(+) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 2d06bd1b1935..5ceba75c13eb 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -351,6 +351,65 @@ static int coproc_release(struct inode *inode, struct file *fp) return 0; } +/* + * This fault handler is invoked when the VAS/NX generates page fault on + * the paste address. Happens if the kernel closes window in hypervisor + * (on PowerVM) due to lost credit or the paste address is not mapped. + */ +static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct file *fp = vma->vm_file; + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + u64 paste_addr; + int ret; + + /* +* window is not opened. Shouldn't expect this error. +*/ + if (!cp_inst || !cp_inst->txwin) { + pr_err("%s(): No send window open?\n", __func__); + return VM_FAULT_SIGBUS; + } + + txwin = cp_inst->txwin; + /* +* Fault is coming due to missing from the original mmap. +* Can happen only when the window is closed due to lost +* credit before mmap() or the user space issued NX request +* without mapping. +*/ + if (txwin->task_ref.vma != vmf->vma) { + pr_err("%s(): No previous mapping with paste address\n", + __func__); + return VM_FAULT_SIGBUS; + } + + mutex_lock(>task_ref.mmap_mutex); + /* +* The window may be inactive due to lost credit (Ex: core +* removal with DLPAR). When the window is active again when +* the credit is available, remap with the new paste address. +*/ + if (txwin->status == VAS_WIN_ACTIVE) { + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (paste_addr) { + ret = vmf_insert_pfn(vma, vma->vm_start, + (paste_addr >> PAGE_SHIFT)); + mutex_unlock(>task_ref.mmap_mutex); + return ret; + } + } + mutex_unlock(>task_ref.mmap_mutex); + + return VM_FAULT_SIGBUS; + +} +static const struct vm_operations_struct vas_vm_ops = { + .fault = vas_mmap_fault, +}; + static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) { struct coproc_instance *cp_inst = fp->private_data; @@ -417,6 +476,7 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) paste_addr, vma->vm_start, rc); txwin->task_ref.vma = vma; + vma->vm_ops = _vm_ops; out: mutex_unlock(>task_ref.mmap_mutex); -- 2.27.0
Re: [PATCH] recordmcount: Support empty section from recent binutils
On Fri, 26 Nov 2021 08:43:23 + LEROY Christophe wrote: > Le 24/11/2021 à 15:43, Christophe Leroy a écrit : > > Looks like recent binutils (2.36 and over ?) may empty some section, > > leading to failure like: > > > > Cannot find symbol for section 11: .text.unlikely. > > kernel/kexec_file.o: failed > > make[1]: *** [scripts/Makefile.build:287: kernel/kexec_file.o] Error 1 > > > > In order to avoid that, ensure that the section has a content before > > returning it's name in has_rel_mcount(). > > This patch doesn't work, on PPC32 I get the following message with this > patch applied: > > [0.00] ftrace: No functions to be traced? > > Without the patch I get: > > [0.00] ftrace: allocating 22381 entries in 66 pages > [0.00] ftrace: allocated 66 pages with 2 groups Because of this report, I have not applied this patch (even though I was about to push it to Linus). I'm pulling it from my queue until this gets resolved. Thanks, -- Steve
[PATCH 06/10] powerpc/vas: Map paste address only if window is active
The paste address mapping is done with mmap() after the window is opened with ioctl. But the window can be closed due to lost credit due to core removal before mmap(). So if the window is not active, return mmap() failure with -EACCES and expects the user space reissue mmap() when the window is active or open new window when the credit is available. Signed-off-by: Haren Myneni --- arch/powerpc/platforms/book3s/vas-api.c | 21 - 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index a63fd48e34a7..2d06bd1b1935 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -379,10 +379,27 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EACCES; } + /* +* The initial mapping is done after the window is opened +* with ioctl. But this window might have been closed +* due to lost credit (core removal on PowerVM) before mmap(). +* So if the window is not active, return mmap() failure +* with -EACCES and expects the user space reconfigure (mmap) +* window when it is active again or open new window when +* the credit is available. +*/ + mutex_lock(>task_ref.mmap_mutex); + if (txwin->status != VAS_WIN_ACTIVE) { + pr_err("%s(): Window is not active\n", __func__); + rc = -EACCES; + goto out; + } + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); if (!paste_addr) { pr_err("%s(): Window paste address failed\n", __func__); - return -EINVAL; + rc = -EINVAL; + goto out; } pfn = paste_addr >> PAGE_SHIFT; @@ -401,6 +418,8 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) txwin->task_ref.vma = vma; +out: + mutex_unlock(>task_ref.mmap_mutex); return rc; } -- 2.27.0
[PATCH 05/10] powerpc/pseries/vas: Close windows with DLPAR core removal
The hypervisor reduces the available credits if the core is removed from the LPAR. So there is possibility of using excessive credits (windows) in the LPAR and the hypervisor expects the system to close the excessive windows. Even though the user space can continue to use these windows to send compression requests to NX, the hypervisor expects the LPAR to reduce these windows usage so that NX load can be equally distributed across all LPARs in the system. When the DLPAR notifier is received, get the new VAS capabilities from the hypervisor and close the excessive windows in the hypervisor. Also the kernel unmaps the paste address so that the user space receives paste failure until these windows are active with the later DLPAR (core add). Signed-off-by: Haren Myneni --- arch/powerpc/include/asm/vas.h | 1 + arch/powerpc/platforms/book3s/vas-api.c | 2 + arch/powerpc/platforms/pseries/vas.c| 93 - 3 files changed, 94 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 43cea69d1af1..72d1df038b4b 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -73,6 +73,7 @@ struct vas_user_win_ref { struct mm_struct *mm; /* Linux process mm_struct */ struct mutex mmap_mutex;/* protects paste address mmap() */ /* with DLPAR close/open windows */ + struct vm_area_struct *vma; /* Save VMA and used in DLPAR ops */ }; /* diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 2b0ced611f32..a63fd48e34a7 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -399,6 +399,8 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, paste_addr, vma->vm_start, rc); + txwin->task_ref.vma = vma; + return rc; } diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index ace8ee7a99e6..ed458620f007 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -431,14 +431,27 @@ static int vas_deallocate_window(struct vas_window *vwin) caps = [win->win_type].caps; mutex_lock(_pseries_mutex); + /* +* VAS window is already closed in the hypervisor when +* lost the credit. So just remove the entry from +* the list, remove task references and free vas_window +* struct. +*/ + if (win->vas_win.status == VAS_WIN_NO_CRED_CLOSE) { + vascaps[win->win_type].close_wins--; + goto out; + } + rc = deallocate_free_window(win); if (rc) { mutex_unlock(_pseries_mutex); return rc; } - list_del(>win_list); atomic_dec(>used_creds); + +out: + list_del(>win_list); mutex_unlock(_pseries_mutex); put_vas_user_win_ref(>task_ref); @@ -617,6 +630,74 @@ static int reconfig_open_windows(struct vas_caps *vcaps, int creds) return rc; } +/* + * The hypervisor reduces the available credits if the LPAR lost core. It + * means the excessive windows should not be active and the user space + * should not be using these windows to send compression requests to NX. + * So the kernel closes the excessive windows and unmap the paste address + * such that the user space receives paste instruction failure. Then up to + * the user space to fall back to SW compression and manage with the + * existing windows. + */ +static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) +{ + struct vas_cop_feat_caps *caps = >caps; + struct vm_area_struct *vma; + struct pseries_vas_window *win; + struct vas_user_win_ref *task_ref; + int rc = 0; + + list_for_each_entry(win, >list, win_list) { + /* +* This window is already closed due to lost credit +* before. Go for next window. +*/ + if (win->vas_win.status == VAS_WIN_NO_CRED_CLOSE) + continue; + + task_ref = >vas_win.task_ref; + mutex_lock(_ref->mmap_mutex); + vma = task_ref->vma; + /* +* Number of available credits are reduced, So select +* and close windows. +*/ + win->vas_win.status = VAS_WIN_NO_CRED_CLOSE; + + mmap_write_lock(task_ref->mm); + /* +* vma is set in the original mapping. But this mapping +* is done with mmap() after the window is opened with ioctl. +* so we may not see the original mapping if the core remove +* is done before the original mmap() and after the ioctl. +
[PATCH v5 2/5] powerpc/inst: Define ppc_inst_t
In order to stop using 'struct ppc_inst' on PPC32, define a ppc_inst_t typedef. Signed-off-by: Christophe Leroy --- v3: Rebased and resolved conflicts v2: Anonymise the structure so that only the typedef can be used --- arch/powerpc/include/asm/code-patching.h | 18 +++ arch/powerpc/include/asm/hw_breakpoint.h | 4 +- arch/powerpc/include/asm/inst.h | 36 ++--- arch/powerpc/include/asm/sstep.h | 4 +- arch/powerpc/kernel/align.c | 4 +- arch/powerpc/kernel/epapr_paravirt.c | 2 +- arch/powerpc/kernel/hw_breakpoint.c | 4 +- .../kernel/hw_breakpoint_constraints.c| 4 +- arch/powerpc/kernel/kprobes.c | 4 +- arch/powerpc/kernel/mce_power.c | 2 +- arch/powerpc/kernel/optprobes.c | 4 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/setup_32.c| 2 +- arch/powerpc/kernel/trace/ftrace.c| 54 +-- arch/powerpc/kernel/vecemu.c | 2 +- arch/powerpc/lib/code-patching.c | 38 ++--- arch/powerpc/lib/feature-fixups.c | 4 +- arch/powerpc/lib/sstep.c | 4 +- arch/powerpc/lib/test_emulate_step.c | 10 ++-- arch/powerpc/mm/maccess.c | 2 +- arch/powerpc/perf/8xx-pmu.c | 2 +- arch/powerpc/xmon/xmon.c | 14 ++--- arch/powerpc/xmon/xmon_bpts.h | 4 +- 23 files changed, 112 insertions(+), 112 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 4ba834599c4d..46e8c5a8ce51 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -24,20 +24,20 @@ bool is_offset_in_branch_range(long offset); bool is_offset_in_cond_branch_range(long offset); -int create_branch(struct ppc_inst *instr, const u32 *addr, +int create_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); -int create_cond_branch(struct ppc_inst *instr, const u32 *addr, +int create_cond_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); int patch_branch(u32 *addr, unsigned long target, int flags); -int patch_instruction(u32 *addr, struct ppc_inst instr); -int raw_patch_instruction(u32 *addr, struct ppc_inst instr); +int patch_instruction(u32 *addr, ppc_inst_t instr); +int raw_patch_instruction(u32 *addr, ppc_inst_t instr); static inline unsigned long patch_site_addr(s32 *site) { return (unsigned long)site + *site; } -static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) +static inline int patch_instruction_site(s32 *site, ppc_inst_t instr) { return patch_instruction((u32 *)patch_site_addr(site), instr); } @@ -58,11 +58,11 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned return modify_instruction((unsigned int *)patch_site_addr(site), clr, set); } -int instr_is_relative_branch(struct ppc_inst instr); -int instr_is_relative_link_branch(struct ppc_inst instr); +int instr_is_relative_branch(ppc_inst_t instr); +int instr_is_relative_link_branch(ppc_inst_t instr); unsigned long branch_target(const u32 *instr); -int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src); -extern bool is_conditional_branch(struct ppc_inst instr); +int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src); +bool is_conditional_branch(ppc_inst_t instr); #ifdef CONFIG_PPC_BOOK3E_64 void __patch_exception(int exc, unsigned long addr); #define patch_exception(exc, name) do { \ diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index abebfbee5b1c..88053d3c68e6 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -56,11 +56,11 @@ static inline int nr_wp_slots(void) return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; } -bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, +bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr, unsigned long ea, int type, int size, struct arch_hw_breakpoint *info); -void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, +void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea); #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 10a5c1b76ca0..b3502f21e0f4 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -8,7 +8,7 @@ ({ \ long __gui_ret; \ u32 __user
[PATCH v5 4/5] powerpc/inst: Move ppc_inst_t definition in asm/reg.h
Because of circular inclusion of asm/hw_breakpoint.h, we need to move definition of asm/reg.h outside of inst.h so that asm/hw_breakpoint.h gets it without including asm/inst.h Also remove asm/inst.h from asm/uprobes.h as it's not needed anymore. Signed-off-by: Christophe Leroy --- v4: New to support inlining of copy_inst_from_kernel_nofault() in following patch. --- arch/powerpc/include/asm/hw_breakpoint.h | 1 - arch/powerpc/include/asm/inst.h | 10 +- arch/powerpc/include/asm/reg.h | 12 arch/powerpc/include/asm/uprobes.h | 1 - 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 88053d3c68e6..84d39fd42f71 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -10,7 +10,6 @@ #define _PPC_BOOK3S_64_HW_BREAKPOINT_H #include -#include #ifdef __KERNEL__ struct arch_hw_breakpoint { diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 7ef5fd3bb167..53a40faf362a 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_INST_H #include +#include #define ___get_user_instr(gu_op, dest, ptr)\ ({ \ @@ -35,13 +36,6 @@ */ #if defined(CONFIG_PPC64) || defined(__CHECKER__) -typedef struct { - u32 val; -#ifdef CONFIG_PPC64 - u32 suffix; -#endif -} __packed ppc_inst_t; - static inline u32 ppc_inst_val(ppc_inst_t x) { return x.val; @@ -50,8 +44,6 @@ static inline u32 ppc_inst_val(ppc_inst_t x) #define ppc_inst(x) ((ppc_inst_t){ .val = (x) }) #else -typedef u32 ppc_inst_t; - static inline u32 ppc_inst_val(ppc_inst_t x) { return x; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e9d27265253b..85501181f929 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1366,6 +1366,18 @@ /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ + +#if defined(CONFIG_PPC64) || defined(__CHECKER__) +typedef struct { + u32 val; +#ifdef CONFIG_PPC64 + u32 suffix; +#endif +} __packed ppc_inst_t; +#else +typedef u32 ppc_inst_t; +#endif + #define mfmsr()({unsigned long rval; \ asm volatile("mfmsr %0" : "=r" (rval) : \ : "memory"); rval;}) diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index fe683371336f..a7ae1860115a 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -11,7 +11,6 @@ #include #include -#include typedef ppc_opcode_t uprobe_opcode_t; -- 2.33.1
[PATCH v5 3/5] powerpc/inst: Define ppc_inst_t as u32 on PPC32
Unlike PPC64 ABI, PPC32 uses the stack to pass a parameter defined as a struct, even when the struct has a single simple element. To avoid that, define ppc_inst_t as u32 on PPC32. Keep it as 'struct ppc_inst' when __CHECKER__ is defined so that sparse can perform type checking. Also revert commit 511eea5e2ccd ("powerpc/kprobes: Fix Oops by passing ppc_inst as a pointer to emulate_step() on ppc32") as now the instruction to be emulated is passed as a register to emulate_step(). Signed-off-by: Christophe Leroy --- v2: Make it work with kprobes --- arch/powerpc/include/asm/inst.h | 15 +-- arch/powerpc/kernel/optprobes.c | 8 ++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index b3502f21e0f4..7ef5fd3bb167 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -34,6 +34,7 @@ * Instruction data type for POWER */ +#if defined(CONFIG_PPC64) || defined(__CHECKER__) typedef struct { u32 val; #ifdef CONFIG_PPC64 @@ -46,13 +47,23 @@ static inline u32 ppc_inst_val(ppc_inst_t x) return x.val; } +#define ppc_inst(x) ((ppc_inst_t){ .val = (x) }) + +#else +typedef u32 ppc_inst_t; + +static inline u32 ppc_inst_val(ppc_inst_t x) +{ + return x; +} +#define ppc_inst(x) (x) +#endif + static inline int ppc_inst_primary_opcode(ppc_inst_t x) { return ppc_inst_val(x) >> 26; } -#define ppc_inst(x) ((ppc_inst_t){ .val = (x) }) - #ifdef CONFIG_PPC64 #define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) }) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 378db980ded3..3b1c2236cbee 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 3. load instruction to be emulated into relevant register, and */ - if (IS_ENABLED(CONFIG_PPC64)) { - temp = ppc_inst_read(p->ainsn.insn); - patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); - } else { - patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); - } + temp = ppc_inst_read(p->ainsn.insn); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline -- 2.33.1
[PATCH v5 5/5] powerpc/inst: Optimise copy_inst_from_kernel_nofault()
copy_inst_from_kernel_nofault() uses copy_from_kernel_nofault() to copy one or two 32bits words. This means calling an out-of-line function which itself calls back copy_from_kernel_nofault_allowed() then performs a generic copy with loops. Rewrite copy_inst_from_kernel_nofault() to do everything at a single place and use __get_kernel_nofault() directly to perform single accesses without loops. Allthough the generic function uses pagefault_disable(), it is not required on powerpc because do_page_fault() bails earlier when a kernel mode fault happens on a kernel address. As the function has now become very small, inline it. With this change, on an 8xx the time spent in the loop in ftrace_replace_code() is reduced by 23% at function tracer activation and 27% at nop tracer activation. The overall time to activate function tracer (measured with shell command 'time') is 570ms before the patch and 470ms after the patch. Even vmlinux size is reduced (by 152 instruction). Before the patch: 0018 : 18: 94 21 ff e0 stwur1,-32(r1) 1c: 7c 08 02 a6 mflrr0 20: 38 a0 00 04 li r5,4 24: 93 e1 00 1c stw r31,28(r1) 28: 7c 7f 1b 78 mr r31,r3 2c: 38 61 00 08 addir3,r1,8 30: 90 01 00 24 stw r0,36(r1) 34: 48 00 00 01 bl 34 34: R_PPC_REL24 copy_from_kernel_nofault 38: 2c 03 00 00 cmpwi r3,0 3c: 40 82 00 0c bne 48 40: 81 21 00 08 lwz r9,8(r1) 44: 91 3f 00 00 stw r9,0(r31) 48: 80 01 00 24 lwz r0,36(r1) 4c: 83 e1 00 1c lwz r31,28(r1) 50: 38 21 00 20 addir1,r1,32 54: 7c 08 03 a6 mtlrr0 58: 4e 80 00 20 blr After the patch (before inlining): 0018 : 18: 3d 20 b0 00 lis r9,-20480 1c: 7c 04 48 40 cmplw r4,r9 20: 7c 69 1b 78 mr r9,r3 24: 41 80 00 14 blt 38 28: 81 44 00 00 lwz r10,0(r4) 2c: 38 60 00 00 li r3,0 30: 91 49 00 00 stw r10,0(r9) 34: 4e 80 00 20 blr 38: 38 60 ff de li r3,-34 3c: 4e 80 00 20 blr 40: 38 60 ff f2 li r3,-14 44: 4e 80 00 20 blr Signed-off-by: Christophe Leroy --- v4: Inline and remove pagefault_disable() v3: New --- arch/powerpc/include/asm/inst.h | 21 - arch/powerpc/mm/maccess.c | 17 - 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 53a40faf362a..631436f3f5c3 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -4,6 +4,8 @@ #include #include +#include +#include #define ___get_user_instr(gu_op, dest, ptr)\ ({ \ @@ -148,6 +150,23 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x) __str; \ }) -int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src); +static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) +{ + unsigned int val, suffix; + + if (unlikely(!is_kernel_addr((unsigned long)src))) + return -ERANGE; + + __get_kernel_nofault(, src, u32, Efault); + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { + __get_kernel_nofault(, src + 1, u32, Efault); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return 0; +Efault: + return -EFAULT; +} #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index 5abae96b2b46..ea821d0ffe16 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -11,20 +11,3 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return is_kernel_addr((unsigned long)unsafe_src); } - -int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) -{ - unsigned int val, suffix; - int err; - - err = copy_from_kernel_nofault(, src, sizeof(val)); - if (err) - return err; - if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(, src + 1, sizeof(suffix)); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} -- 2.33.1
[PATCH v5 1/5] powerpc/inst: Refactor ___get_user_instr()
PPC64 version of ___get_user_instr() can be used for PPC32 as well, by simply disabling the suffix part with IS_ENABLED(CONFIG_PPC64). Signed-off-by: Christophe Leroy --- v5: Force use of 'y' in ppc_inst_prefix on PPC32 to avoid 'use variable' warning with W=1 --- arch/powerpc/include/asm/inst.h | 13 ++--- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index b11c0e2f9639..10a5c1b76ca0 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -4,8 +4,6 @@ #include -#ifdef CONFIG_PPC64 - #define ___get_user_instr(gu_op, dest, ptr)\ ({ \ long __gui_ret; \ @@ -16,7 +14,7 @@ __chk_user_ptr(ptr);\ __gui_ret = gu_op(__prefix, __gui_ptr); \ if (__gui_ret == 0) { \ - if ((__prefix >> 26) == OP_PREFIX) {\ + if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \ __gui_ret = gu_op(__suffix, __gui_ptr + 1); \ __gui_inst = ppc_inst_prefix(__prefix, __suffix); \ } else {\ @@ -27,13 +25,6 @@ } \ __gui_ret; \ }) -#else /* !CONFIG_PPC64 */ -#define ___get_user_instr(gu_op, dest, ptr)\ -({ \ - __chk_user_ptr(ptr);\ - gu_op((dest).val, (u32 __user *)(ptr)); \ -}) -#endif /* CONFIG_PPC64 */ #define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr) @@ -71,7 +62,7 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x) } #else -#define ppc_inst_prefix(x, y) ppc_inst(x) +#define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x)) static inline u32 ppc_inst_suffix(struct ppc_inst x) { -- 2.33.1
[PATCH 04/10] powerpc/pseries/vas: Reopen windows with DLPAR core add
VAS windows can be closed in the hypervisor due to lost credits when the core is removed. If these credits are available later for core add, reopen these windows and set them active. When the kernel sees page fault on the paste address, it creates new mapping on the new paste address. Then the user space can continue to use these windows and send HW compression requests to NX successfully. Signed-off-by: Haren Myneni --- arch/powerpc/include/asm/vas.h | 15 +++ arch/powerpc/platforms/book3s/vas-api.c | 1 + arch/powerpc/platforms/pseries/vas.c| 148 arch/powerpc/platforms/pseries/vas.h| 2 + 4 files changed, 166 insertions(+) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 57573d9c1e09..43cea69d1af1 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -29,6 +29,18 @@ #define VAS_THRESH_FIFO_GT_QTR_FULL2 #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3 +/* + * VAS window status + */ +#define VAS_WIN_ACTIVE 0x0 /* Used in platform independent */ + /* vas mmap() */ +#define VAS_WIN_CLOSED 0x1 +#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */ +#define VAS_WIN_MOD_IN_PROCESS 0x3 /* Process of being modified, */ + /* deallocated, or quiesced */ +#define VAS_WIN_NO_CRED_CLOSE 0x4 /* Linux specific status when */ + /* window is closed due to lost */ + /* credit */ /* * Get/Set bit fields */ @@ -59,6 +71,8 @@ struct vas_user_win_ref { struct pid *pid;/* PID of owner */ struct pid *tgid; /* Thread group ID of owner */ struct mm_struct *mm; /* Linux process mm_struct */ + struct mutex mmap_mutex;/* protects paste address mmap() */ + /* with DLPAR close/open windows */ }; /* @@ -67,6 +81,7 @@ struct vas_user_win_ref { struct vas_window { u32 winid; u32 wcreds_max; /* Window credits */ + u32 status; enum vas_cop_type cop; struct vas_user_win_ref task_ref; char *dbgname; diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 4d82c92ddd52..2b0ced611f32 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -316,6 +316,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) return PTR_ERR(txwin); } + mutex_init(>task_ref.mmap_mutex); cp_inst->txwin = txwin; return 0; diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 6b35f67d5175..ace8ee7a99e6 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -493,6 +493,7 @@ static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, memset(vcaps, 0, sizeof(*vcaps)); INIT_LIST_HEAD(>list); + vcaps->feat = feat; caps = >caps; rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, @@ -531,6 +532,149 @@ static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, return 0; } +/* + * VAS windows can be closed due to lost credits when the core is + * removed. So reopen them if credits are available due to DLPAR + * core add and set the window active status. When NX sees the page + * fault on the unmapped paste address, the kernel handles the fault + * by setting the remapping to new paste address if the window is + * active. + */ +static int reconfig_open_windows(struct vas_caps *vcaps, int creds) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *caps = >caps; + struct pseries_vas_window *win = NULL; + int rc, mv_ents = 0; + + /* +* Nothing to do if there are no closed windows. +*/ + if (!vcaps->close_wins) + return 0; + + /* +* For the core removal, the hypervisor reduces the credits +* assigned to the LPAR and the kernel closes VAS windows +* in the hypervisor depends on reduced credits. The kernel +* uses LIFO (the last windows that are opened will be closed +* first) and expects to open in the same order when credits +* are available. +* For example, 40 windows are closed when the LPAR lost 2 cores +* (dedicated). If 1 core is added, this LPAR can have 20 more +* credits. It means the kernel can reopen 20 windows. So move +* 20 entries in the VAS windows lost and reopen next 20 windows. +*/ + if (vcaps->close_wins > creds) + mv_ents = vcaps->close_wins - creds; + + list_for_each_entry(win, >list, win_list) { + if (!mv_ents) + break; +
[PATCH 03/10] powerpc/pseries/vas: Save LPID in pseries_vas_window struct
The kernel sets the VAS window with partition PID when is opened in the hypervisor. During DLPAR operation, windows can be closed and reopened in the hypervisor when the credit is available. So saves this PID in pseries_vas_window struct when the window is opened initially and reuse it later during DLPAR operation. Signed-off-by: Haren Myneni --- arch/powerpc/platforms/pseries/vas.c | 6 +++--- arch/powerpc/platforms/pseries/vas.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 04a6eee2301e..6b35f67d5175 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -107,7 +107,6 @@ static int h_deallocate_vas_window(u64 winid) static int h_modify_vas_window(struct pseries_vas_window *win) { long rc; - u32 lpid = mfspr(SPRN_PID); /* * AMR value is not supported in Linux VAS implementation. @@ -115,7 +114,7 @@ static int h_modify_vas_window(struct pseries_vas_window *win) */ do { rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, - win->vas_win.winid, lpid, 0, + win->vas_win.winid, win->lpid, 0, VAS_MOD_WIN_FLAGS, 0); rc = hcall_return_busy_check(rc); @@ -125,7 +124,7 @@ static int h_modify_vas_window(struct pseries_vas_window *win) return 0; pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", - rc, win->vas_win.winid, lpid); + rc, win->vas_win.winid, win->lpid); return -EIO; } @@ -353,6 +352,7 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, if (rc) goto out; + txwin->lpid = mfspr(SPRN_PID); /* * Modify window and it is ready to use. */ diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index fa7ce74f1e49..0538760d13be 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -115,6 +115,7 @@ struct pseries_vas_window { u64 domain[6]; /* Associativity domain Ids */ /* this window is allocated */ u64 util; + u32 lpid; /* List of windows opened which is used for LPM */ struct list_head win_list; -- 2.27.0
[PATCH 02/10] powerpc/pseries/vas: Add notifier for DLPAR core removal/add
The hypervisor assigns credits for each LPAR based on number of cores configured in that system. So expects to release credits (means windows) when the core is removed. This patch adds notifier for core removal/add so that the OS closes windows if the system looses credits due to core removal and reopen windows when the credits available later. Signed-off-by: Haren Myneni --- arch/powerpc/platforms/pseries/vas.c | 34 1 file changed, 34 insertions(+) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index ecdd21f517c0..04a6eee2301e 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -531,6 +531,36 @@ static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, return 0; } +/* + * Total number of default credits available (target_credits) + * in LPAR depends on number of cores configured. It varies based on + * whether processors are in shared mode or dedicated mode. + * Get the notifier when CPU configuration is changed with DLPAR + * operation so that get the new target_credits (vas default capabilities) + * and then update the existing windows usage if needed. + */ +static int pseries_vas_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct of_reconfig_data *rd = data; + struct device_node *dn = rd->dn; + const __be32 *intserv; + int len, rc = 0; + + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", ); + /* +* Processor config is not changed +*/ + if (!intserv) + return NOTIFY_OK; + + return rc; +} + +static struct notifier_block pseries_vas_nb = { + .notifier_call = pseries_vas_notifier, +}; + static int __init pseries_vas_init(void) { struct hv_vas_cop_feat_caps *hv_cop_caps; @@ -584,6 +614,10 @@ static int __init pseries_vas_init(void) goto out_cop; } + /* Processors can be added/removed only on LPAR */ + if (copypaste_feat && firmware_has_feature(FW_FEATURE_LPAR)) + of_reconfig_notifier_register(_vas_nb); + pr_info("GZIP feature is available\n"); out_cop: -- 2.27.0
[PATCH 01/10] powerpc/pseries/vas: Use common names in VAS capability structure
target/used/avail_creds provides credits usage to user space via sysfs and the same interface can be used on PowerNV in future. Remove "lpar" from these names so that applicable on both PowerVM and PowerNV. Signed-off-by: Haren Myneni --- arch/powerpc/platforms/pseries/vas.c | 10 +- arch/powerpc/platforms/pseries/vas.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b043e3936d21..ecdd21f517c0 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -303,8 +303,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, cop_feat_caps = >caps; - if (atomic_inc_return(_feat_caps->used_lpar_creds) > - atomic_read(_feat_caps->target_lpar_creds)) { + if (atomic_inc_return(_feat_caps->used_creds) > + atomic_read(_feat_caps->target_creds)) { pr_err("Credits are not available to allocate window\n"); rc = -EINVAL; goto out; @@ -378,7 +378,7 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); out: - atomic_dec(_feat_caps->used_lpar_creds); + atomic_dec(_feat_caps->used_creds); kfree(txwin); return ERR_PTR(rc); } @@ -438,7 +438,7 @@ static int vas_deallocate_window(struct vas_window *vwin) } list_del(>win_list); - atomic_dec(>used_lpar_creds); + atomic_dec(>used_creds); mutex_unlock(_pseries_mutex); put_vas_user_win_ref(>task_ref); @@ -514,7 +514,7 @@ static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, } caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); - atomic_set(>target_lpar_creds, + atomic_set(>target_creds, be16_to_cpu(hv_caps->target_lpar_creds)); if (feat == VAS_GZIP_DEF_FEAT) { caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 4ecb3fcabd10..fa7ce74f1e49 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -72,9 +72,9 @@ struct vas_cop_feat_caps { }; /* Total LPAR available credits. Can be different from max LPAR */ /* credits due to DLPAR operation */ - atomic_ttarget_lpar_creds; - atomic_tused_lpar_creds; /* Used credits so far */ - u16 avail_lpar_creds; /* Remaining available credits */ + atomic_ttarget_creds; + atomic_tused_creds; /* Used credits so far */ + u16 avail_creds;/* Remaining available credits */ }; /* -- 2.27.0
[PATCH 00/10] powerpc/pseries/vas: NXGZIP support with DLPAR
PowerPC provides HW compression with NX coprocessor. This feature is available on both PowerNV and PowerVM and included in Linux. Since each powerpc chip has one NX coprocessor, the VAS introduces the concept of windows / credits to manage access to this hardware resource. On powerVM, these limited resources should be available across all LPARs. So the hypervisor assigns the specific credits to each LPAR based on processor entitlement so that one LPAR does not overload NX. The hypervisor can reject the window open request to a partition if exceeds its credit limit (1 credit per window). So the total number of target credits in a partition can be changed if the core configuration is modified. The hypervisor expects the partition to modify its window usage depends on new target credits. For example, if the partition uses more credits than the new target credits, it should close the excessive windows so that the NX resource will be available to other partitions. This patch series enables OS to support this dynamic credit management with DLPAR core removal/add. Core removal operation: - Get new VAS capabilities from the hypervisor when the DLPAR notifier is received. This capabilities provides the new target credits based on new processor entitlement. In the case of QoS credit changes, the notification will be issued by updating the target_creds via sysfs. - If the partition is already used more than the new target credits, the kernel selects windows, unmap the current paste address and close them in the hypervisor, It uses LIFO to identify these windows - last windows that are opened are the first ones to be closed. - When the user space issue requests on these windows, NX generates page fault on the unmap paste address. The kernel handles the fault by returning the paste instruction failure if the window is not active (means unmap paste). Then up to the library / user space to fall back to SW compression or manage with the current windows. Core add operation: - The kernel can see increased target credits from the new VAS capabilities. - Scans the window list for the closed windows in the hypervisor due to lost credit before and selects windows based on same LIFO. - Make these corresponding windows active and create remap with the same VMA on the new paste address in the fault handler. - Then the user space should expect paste successful later. Patch 1: Define common names for sysfs target/used/avail_creds so that same sysfs entries can be used even on PowerNV later. Patch 2: Add VAS notifier for DLPAR core add / removal Patch 3: Save LPID in the vas window struct during initial window open and use it when reopen later. Patch 4: When credits are available, reopen windows that are closed before with core removal. Patch 5: Close windows in the hypervisor when the partition exceeds its usage than the new target credits. Patch 6: If the window is closed in the hypervisor before the user space issue the initial mmap(), return -EACCES failure. Patch 7: Add new mmap fault handler which handles the page fault from NX on paste address. Patch 8: Return the paste instruction failure if the window is not active. Patch 9 & 10: The user space determines the credit usage with sysfs target/avail/used_creds interfaces. drmgr uses target_creds to notify OS for QoS credit changes. Thanks to Nicholas Piggin and Aneesh Kumar for the valuable suggestions on the NXGZIP design to support DLPAR operations. Haren Myneni (10): powerpc/pseries/vas: Use common names in VAS capability structure powerpc/pseries/vas: Add notifier for DLPAR core removal/add powerpc/pseries/vas: Save partition PID in pseries_vas_window struct powerpc/pseries/vas: Reopen windows with DLPAR core add powerpc/pseries/vas: Close windows with DLPAR core removal powerpc/vas: Map paste address only if window is active powerpc/vas: Add paste address mmap fault handler powerpc/vas: Return paste instruction failure if window is not active powerpc/pseries/vas: sysfs interface to export capabilities powerpc/pseries/vas: Write 'target_creds' for QoS credits change arch/powerpc/include/asm/ppc-opcode.h | 2 + arch/powerpc/include/asm/vas.h | 16 ++ arch/powerpc/platforms/book3s/vas-api.c| 129 - arch/powerpc/platforms/pseries/Makefile| 2 +- arch/powerpc/platforms/pseries/vas-sysfs.c | 246 + arch/powerpc/platforms/pseries/vas.c | 293 - arch/powerpc/platforms/pseries/vas.h | 16 +- 7 files changed, 691 insertions(+), 13 deletions(-) create mode 100644 arch/powerpc/platforms/pseries/vas-sysfs.c -- 2.27.0
RE: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop.
The final result of our testing is that the patch set posted seems to address all known defects in the Linux kernel. The mentioned additional problems are entirely caused by the antivirus solution on the windows box. The antivirus solution blocks the disconnect messages from reaching the RNDIS driver so it has no idea the USB device went away. There is nothing we can do to address this in the Linux kernel. I propose we move forward with the patchset. Eugene T. Bordenkircher -Original Message- From: Thorsten Leemhuis Sent: Thursday, November 25, 2021 5:59 AM To: Eugene Bordenkircher ; Thorsten Leemhuis ; Joakim Tjernlund ; linuxppc-dev@lists.ozlabs.org; linux-...@vger.kernel.org Cc: leoyang...@nxp.com; gre...@linuxfoundation.org; ba...@kernel.org Subject: Re: bug: usb: gadget: FSL_UDC_CORE Corrupted request list leads to unrecoverable loop. Hi, this is your Linux kernel regression tracker speaking. Top-posting for once, to make this easy to process for everyone: Li Yang and Felipe Balbi: how to move on with this? It's quite an old regression, but nevertheless it is one and thus should be fixed. Part of my position is to make that happen and thus remind developers and maintainers about this until the regression is resolved. Ciao, Thorsten On 16.11.21 20:11, Eugene Bordenkircher wrote: > On 02.11.21 22:15, Joakim Tjernlund wrote: >> On Sat, 2021-10-30 at 14:20 +, Joakim Tjernlund wrote: >>> On Fri, 2021-10-29 at 17:14 +, Eugene Bordenkircher wrote: >> We've discovered a situation where the FSL udc driver (drivers/usb/gadget/udc/fsl_udc_core.c) will enter a loop iterating over the request queue, but the queue has been corrupted at some point so it loops infinitely. I believe we have narrowed into the offending code, but we are in need of assistance trying to find an appropriate fix for the problem. The identified code appears to be in all versions of the Linux kernel the driver exists in. The problem appears to be when handling a USB_REQ_GET_STATUS request. The driver gets this request and then calls the ch9getstatus() function. In this function, it starts a request by "borrowing" the per device status_req, filling it in, and then queuing it with a call to list_add_tail() to add the request to the endpoint queue. Right before it exits the function however, it's calling ep0_prime_status(), which is filling out that same status_req structure and then queuing it with another call to list_add_tail() to add the request to the endpoint queue. This adds two instances of the exact same LIST_HEAD to the endpoint queue, which breaks the list since the prev and next pointers end up pointing to the wrong things. This ends up causing a hard loop the next time nuke() gets called, which happens on the next setup IRQ. I'm not sure what the appropriate fix to this problem is, mostly due to my lack of expertise in USB and this driver stack. The code has been this way in the kernel for a very long time, which suggests that it has been working, unless USB_REQ_GET_STATUS requests are never made. This further suggests that there is something else going on that I don't understand. Deleting the call to ep0_prime_status() and the following ep0stall() call appears, on the surface, to get the device working again, but may have side effects that I'm not seeing. I'm hopeful someone in the community can help provide some information on what I may be missing or help come up with a solution to the problem. A big thank you to anyone who would like to help out. >>> >>> Run into this to a while ago. Found the bug and a few more fixes. >>> This is against 4.19 so you may have to tweak them a bit. >>> Feel free to upstream them. >> >> Curious, did my patches help? Good to known once we upgrade as well. > > There's good news and bad news. > > The good news is that this appears to stop the driver from entering an > infinite loop, which prevents the Linux system from locking up and > never recovering. So I'm willing to say we've made the behavior > better. > > The bad news is that once we get past this point, there is new bad > behavior. What is on top of this driver in our system is the RNDIS > gadget driver communicating to a Laptop running Win10 -1809. > Everything appears to work fine with the Linux system until there is a > USB disconnect. After the disconnect, the Linux side appears to > continue on just fine, but the Windows side doesn't seem to recognize > the disconnect, which causes the USB driver on that side to hang > forever and eventually blue screen the box. This doesn't happen on > all machines, just a select few. I think we can isolate the > behavior to a specific antivirus/security software driver that is > inserting itself into the USB stack and filtering the disconnect >
Re: [PATCH v4 1/5] powerpc/inst: Refactor ___get_user_instr()
Hi Christophe, I love your patch! Yet something to improve: [auto build test ERROR on powerpc/next] [also build test ERROR on v5.16-rc3 next-20211129] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211129-195613 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-allnoconfig (https://download.01.org/0day-ci/archive/20211130/202111300028.pvdtx2vc-...@intel.com/config) compiler: powerpc-linux-gcc (GCC) 11.2.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/12f08114cece066b2640aef99e2bc74f49eebef5 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211129-195613 git checkout 12f08114cece066b2640aef99e2bc74f49eebef5 # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kernel/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All errors (new ones prefixed by >>): In file included from arch/powerpc/include/asm/hw_breakpoint.h:13, from arch/powerpc/include/asm/processor.h:43, from arch/powerpc/include/asm/thread_info.h:40, from include/linux/thread_info.h:60, from include/asm-generic/preempt.h:5, from ./arch/powerpc/include/generated/asm/preempt.h:1, from include/linux/preempt.h:78, from include/linux/spinlock.h:55, from include/linux/mmzone.h:8, from include/linux/gfp.h:6, from include/linux/mm.h:10, from arch/powerpc/kernel/align.c:17: arch/powerpc/kernel/align.c: In function 'fix_alignment': >> arch/powerpc/include/asm/inst.h:12:32: error: variable '__suffix' set but >> not used [-Werror=unused-but-set-variable] 12 | unsigned int __prefix, __suffix; \ |^~~~ arch/powerpc/include/asm/inst.h:31:34: note: in expansion of macro '___get_user_instr' 31 | #define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr) | ^ arch/powerpc/kernel/align.c:310:21: note: in expansion of macro '__get_user_instr' 310 | r = __get_user_instr(instr, (void __user *)regs->nip); | ^~~~ cc1: all warnings being treated as errors -- In file included from arch/powerpc/include/asm/hw_breakpoint.h:13, from arch/powerpc/include/asm/processor.h:43, from arch/powerpc/include/asm/thread_info.h:40, from include/linux/thread_info.h:60, from arch/powerpc/include/asm/ptrace.h:323, from arch/powerpc/include/asm/hw_irq.h:12, from arch/powerpc/include/asm/irqflags.h:12, from include/linux/irqflags.h:16, from include/asm-generic/cmpxchg-local.h:6, from arch/powerpc/include/asm/cmpxchg.h:526, from arch/powerpc/include/asm/atomic.h:11, from include/linux/atomic.h:7, from include/linux/rcupdate.h:25, from include/linux/rculist.h:11, from include/linux/pid.h:5, from include/linux/sched.h:14, from include/linux/uaccess.h:8, from arch/powerpc/kernel/hw_breakpoint_constraints.c:3: arch/powerpc/kernel/hw_breakpoint_constraints.c: In function 'wp_get_instr_detail': >> arch/powerpc/include/asm/inst.h:12:32: error: variable '__suffix' set but >> not used [-Werror=unused-but-set-variable] 12 | unsigned int __prefix, __suffix; \ |^~~~ arch/powerpc/include/asm/inst.h:31:34: note: in expansion of macro '___get_user_instr' 31 | #define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr) | ^ arch/powerpc/kernel/hw_breakpoint_constraints.c:135:13: note: in expansion of macro '__get_user_instr' 135 | if (__get_user_instr(*instr, (void __user *)regs->nip)) |
Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote: > Sort key p_stage_cyc is used to present the latency > cycles spend in pipeline stages. perf tool has local > p_stage_cyc sort key to display this info. There is no > global variant available for this sort key. local variant > shows latency in a sinlge sample, whereas, global value > will be useful to present the total latency (sum of > latencies) in the hist entry. It represents latency > number multiplied by the number of samples. > > Add global (p_stage_cyc) and local variant > (local_p_stage_cyc) for this sort key. Use the > local_p_stage_cyc as default option for "mem" sort mode. > Also add this to list of dynamic sort keys. > > Signed-off-by: Athira Rajeev > Reported-by: Namhyung Kim > --- > tools/perf/util/hist.c | 4 +++- > tools/perf/util/hist.h | 3 ++- > tools/perf/util/sort.c | 34 +- > tools/perf/util/sort.h | 3 ++- > 4 files changed, 32 insertions(+), 12 deletions(-) > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > index b776465e04ef..0a8033b09e28 100644 > --- a/tools/perf/util/hist.c > +++ b/tools/perf/util/hist.c > @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct > hist_entry *h) > hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); > hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); > hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); > - hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); > + hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); > + hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); > + > if (symbol_conf.nanosecs) > hists__new_col_len(hists, HISTC_TIME, 16); > else > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h > index 5343b62476e6..2752ce681108 100644 > --- a/tools/perf/util/hist.h > +++ b/tools/perf/util/hist.h > @@ -75,7 +75,8 @@ enum hist_column { > HISTC_MEM_BLOCKED, > HISTC_LOCAL_INS_LAT, > HISTC_GLOBAL_INS_LAT, > - HISTC_P_STAGE_CYC, > + HISTC_LOCAL_P_STAGE_CYC, > + HISTC_GLOBAL_P_STAGE_CYC, > HISTC_NR_COLS, /* Last entry */ > }; > > diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c > index e9216a292a04..e978f7883e07 100644 > --- a/tools/perf/util/sort.c > +++ b/tools/perf/util/sort.c > @@ -37,7 +37,7 @@ const char default_parent_pattern[] = > "^sys_|^do_page_fault"; > const char *parent_pattern = default_parent_pattern; > const char *default_sort_order = "comm,dso,symbol"; > const char default_branch_sort_order[] = > "comm,dso_from,symbol_from,symbol_to,cycles"; > -const char default_mem_sort_order[] = > "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; > +const char default_mem_sort_order[] = > "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc"; > const char default_top_sort_order[] = "dso,symbol"; > const char default_diff_sort_order[] = "dso,symbol"; > const char default_tracepoint_sort_order[] = "trace"; > @@ -46,8 +46,8 @@ const char *field_order; > regex_t ignore_callees_regex; > int have_ignore_callees = 0; > enum sort_mode sort__mode = SORT_MODE__NORMAL; > -const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; > -const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; > +const char *dynamic_headers[] = {"local_ins_lat", "ins_lat", > "local_p_stage_cyc", "p_stage_cyc"}; so you also add global ins_lat, right? will this change some default behaviour? > +const char *arch_specific_sort_keys[] = {"local_p_stage_cyc", > "p_stage_cyc"}; nit.. both dynamic_headers and arch_specific_sort_keys could be static right? thanks, jirka > > /* > * Replaces all occurrences of a char used with the: > @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = { > }; > > static int64_t > -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry > *right) > +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) > { > return left->p_stage_cyc - right->p_stage_cyc; > } > > +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, > char *bf, > + size_t size, unsigned int width) > +{ > + return repsep_snprintf(bf, size, "%-*u", width, > + he->p_stage_cyc * he->stat.nr_events); > +} > + > + > static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, > size_t size, unsigned int width) > { > return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); > } > > -struct sort_entry sort_p_stage_cyc = { > - .se_header = "Pipeline Stage Cycle", > - .se_cmp = sort__global_p_stage_cyc_cmp, > +struct sort_entry sort_local_p_stage_cyc = { > + .se_header = "Local Pipeline Stage Cycle", > +
Re: [PATCH v5 05/12] KVM: RISC-V: Use Makefile.kvm for common files
On Tue, 2021-11-23 at 14:42 +0530, Anup Patel wrote: > On Sun, Nov 21, 2021 at 6:25 PM David Woodhouse wrote: > > > From: David Woodhouse > > Signed-off-by: David Woodhouse > > Looks good to me. > > For KVM RISC-V, > > Acked-by: Anup Patel > Reviewed-by: Anup Patel Thanks. I've included those in the tree at https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/xen-evtchn which is based on kvm/master but rebases cleanly to kvm/queue. I'm working on additional support (IPI, timers, etc.) to go on top but will post that in a separate series rather than adding more to this one. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
On Mon, Nov 29, 2021 at 02:43:48PM +0530, Athira Rajeev wrote: > > > > On 28-Nov-2021, at 10:04 PM, Jiri Olsa wrote: > > > > On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote: > >> Sort key p_stage_cyc is used to present the latency > >> cycles spend in pipeline stages. perf tool has local > >> p_stage_cyc sort key to display this info. There is no > >> global variant available for this sort key. local variant > >> shows latency in a sinlge sample, whereas, global value > >> will be useful to present the total latency (sum of > >> latencies) in the hist entry. It represents latency > >> number multiplied by the number of samples. > >> > >> Add global (p_stage_cyc) and local variant > >> (local_p_stage_cyc) for this sort key. Use the > >> local_p_stage_cyc as default option for "mem" sort mode. > >> Also add this to list of dynamic sort keys. > >> > >> Signed-off-by: Athira Rajeev > >> Reported-by: Namhyung Kim > > > > I can't apply this to Arnaldo's perf/core, could you please rebase? > > > > patching file util/hist.c > > patching file util/hist.h > > patching file util/sort.c > > Hunk #3 FAILED at 1392. > > Hunk #4 succeeded at 1878 (offset 20 lines). > > 1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej > > patching file util/sort.h > > > > thanks, > > jirka > > Hi Jiri, > > Thanks for checking this patch. > > Actually these changes are on top of three other fixes from Namhyung which > are already part of upstream. Below are the commits. > > 784e8adda4cd ("perf sort: Fix the 'weight' sort key behavior”) > 4d03c75363ee ("perf sort: Fix the 'ins_lat' sort key behavior”) > db4b28402909 ("perf sort: Fix the 'p_stage_cyc' sort key behavior”) > > I checked in Arnaldo’s perf/core, but these commits are not there. But I > could see them in 'tmp.perf/urgent' > I think perf/core is not yet updated. ah ok, I got it applied on perf/urgent, thanks jirka > > Thanks > Athira Rajeev > > > > >> --- > >> tools/perf/util/hist.c | 4 +++- > >> tools/perf/util/hist.h | 3 ++- > >> tools/perf/util/sort.c | 34 +- > >> tools/perf/util/sort.h | 3 ++- > >> 4 files changed, 32 insertions(+), 12 deletions(-) > >> > >> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > >> index b776465e04ef..0a8033b09e28 100644 > >> --- a/tools/perf/util/hist.c > >> +++ b/tools/perf/util/hist.c > >> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct > >> hist_entry *h) > >>hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); > >>hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); > >>hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); > >> - hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); > >> + hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); > >> + hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); > >> + > >>if (symbol_conf.nanosecs) > >>hists__new_col_len(hists, HISTC_TIME, 16); > >>else > >> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h > >> index 5343b62476e6..2752ce681108 100644 > >> --- a/tools/perf/util/hist.h > >> +++ b/tools/perf/util/hist.h > >> @@ -75,7 +75,8 @@ enum hist_column { > >>HISTC_MEM_BLOCKED, > >>HISTC_LOCAL_INS_LAT, > >>HISTC_GLOBAL_INS_LAT, > >> - HISTC_P_STAGE_CYC, > >> + HISTC_LOCAL_P_STAGE_CYC, > >> + HISTC_GLOBAL_P_STAGE_CYC, > >>HISTC_NR_COLS, /* Last entry */ > >> }; > >> > >> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c > >> index e9216a292a04..e978f7883e07 100644 > >> --- a/tools/perf/util/sort.c > >> +++ b/tools/perf/util/sort.c > >> @@ -37,7 +37,7 @@ const char default_parent_pattern[] = > >> "^sys_|^do_page_fault"; > >> const char *parent_pattern = default_parent_pattern; > >> const char *default_sort_order = "comm,dso,symbol"; > >> const char default_branch_sort_order[] = > >> "comm,dso_from,symbol_from,symbol_to,cycles"; > >> -const chardefault_mem_sort_order[] = > >> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; > >> +const chardefault_mem_sort_order[] = > >> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc"; > >> const char default_top_sort_order[] = "dso,symbol"; > >> const char default_diff_sort_order[] = "dso,symbol"; > >> const char default_tracepoint_sort_order[] = "trace"; > >> @@ -46,8 +46,8 @@ const char *field_order; > >> regex_tignore_callees_regex; > >> inthave_ignore_callees = 0; > >> enum sort_mode sort__mode = SORT_MODE__NORMAL; > >> -const char*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; > >> -const char*arch_specific_sort_keys[] = {"p_stage_cyc"}; > >> +const char*dynamic_headers[] = {"local_ins_lat", "ins_lat", > >> "local_p_stage_cyc", "p_stage_cyc"}; > >> +const char*arch_specific_sort_keys[] = {"local_p_stage_cyc", > >> "p_stage_cyc"}; > >> > >> /* > >>
Re: [PATCH v4 1/5] powerpc/inst: Refactor ___get_user_instr()
Hi Christophe, I love your patch! Perhaps something to improve: [auto build test WARNING on powerpc/next] [also build test WARNING on v5.16-rc3 next-20211129] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211129-195613 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-allyesconfig (https://download.01.org/0day-ci/archive/20211129/202111292213.tqmvcy38-...@intel.com/config) compiler: powerpc-linux-gcc (GCC) 11.2.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/12f08114cece066b2640aef99e2bc74f49eebef5 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christophe-Leroy/powerpc-inst-Refactor-___get_user_instr/20211129-195613 git checkout 12f08114cece066b2640aef99e2bc74f49eebef5 # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kernel/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): In file included from arch/powerpc/include/asm/hw_breakpoint.h:13, from arch/powerpc/include/asm/processor.h:43, from arch/powerpc/include/asm/thread_info.h:40, from include/linux/thread_info.h:60, from include/asm-generic/preempt.h:5, from ./arch/powerpc/include/generated/asm/preempt.h:1, from include/linux/preempt.h:78, from include/linux/spinlock.h:55, from include/linux/mmzone.h:8, from include/linux/gfp.h:6, from include/linux/mm.h:10, from arch/powerpc/kernel/align.c:17: arch/powerpc/kernel/align.c: In function 'fix_alignment': >> arch/powerpc/include/asm/inst.h:12:32: warning: variable '__suffix' set but >> not used [-Wunused-but-set-variable] 12 | unsigned int __prefix, __suffix; \ |^~~~ arch/powerpc/include/asm/inst.h:31:34: note: in expansion of macro '___get_user_instr' 31 | #define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr) | ^ arch/powerpc/kernel/align.c:310:21: note: in expansion of macro '__get_user_instr' 310 | r = __get_user_instr(instr, (void __user *)regs->nip); | ^~~~ -- In file included from arch/powerpc/include/asm/hw_breakpoint.h:13, from arch/powerpc/include/asm/processor.h:43, from arch/powerpc/include/asm/thread_info.h:40, from include/linux/thread_info.h:60, from arch/powerpc/include/asm/ptrace.h:323, from arch/powerpc/include/asm/hw_irq.h:12, from arch/powerpc/include/asm/irqflags.h:12, from include/linux/irqflags.h:16, from include/asm-generic/cmpxchg-local.h:6, from arch/powerpc/include/asm/cmpxchg.h:526, from arch/powerpc/include/asm/atomic.h:11, from include/linux/atomic.h:7, from include/linux/rcupdate.h:25, from include/linux/rculist.h:11, from include/linux/pid.h:5, from include/linux/sched.h:14, from include/linux/uaccess.h:8, from arch/powerpc/kernel/hw_breakpoint_constraints.c:3: arch/powerpc/kernel/hw_breakpoint_constraints.c: In function 'wp_get_instr_detail': >> arch/powerpc/include/asm/inst.h:12:32: warning: variable '__suffix' set but >> not used [-Wunused-but-set-variable] 12 | unsigned int __prefix, __suffix; \ |^~~~ arch/powerpc/include/asm/inst.h:31:34: note: in expansion of macro '___get_user_instr' 31 | #define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr) | ^ arch/powerpc/kernel/hw_breakpoint_constraints.c:135:13: note: in expansion of macro '__get_user_instr' 135 | if (__get_user_instr(*instr, (void __user *)regs->nip)) | ^~~~ -- In file in
[PATCH v3 02/10] powerpc/mm: Move vma_mmu_pagesize() and hugetlb_get_unmapped_area() to slice.c
vma_mmu_pagesize() is only required for slices, otherwise there is a generic weak version. hugetlb_get_unmapped_area() is dedicated to slices. radix__hugetlb_get_unmapped_area() as well. Move them to slice.c Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 4 -- arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 55 -- arch/powerpc/mm/book3s64/slice.c | 76 arch/powerpc/mm/hugetlbpage.c| 28 4 files changed, 76 insertions(+), 87 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index 12e150e615b7..b37a28f62cf6 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -8,10 +8,6 @@ */ void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern unsigned long -radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); extern void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index 23d3e08911d3..d2fb776febb4 100644 --- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c @@ -41,61 +41,6 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long st radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize); } -/* - * A vairant of hugetlb_get_unmapped_area doing topdown search - * FIXME!! should we do as x86 does or non hugetlb area does ? - * ie, use topdown or not based on mmap_is_legacy check ? - */ -unsigned long -radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - struct hstate *h = hstate_file(file); - int fixed = (flags & MAP_FIXED); - unsigned long high_limit; - struct vm_unmapped_area_info info; - - high_limit = DEFAULT_MAP_WINDOW; - if (addr >= high_limit || (fixed && (addr + len > high_limit))) - high_limit = TASK_SIZE; - - if (len & ~huge_page_mask(h)) - return -EINVAL; - if (len > high_limit) - return -ENOMEM; - - if (fixed) { - if (addr > high_limit - len) - return -ENOMEM; - if (prepare_hugepage_range(file, addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr = ALIGN(addr, huge_page_size(h)); - vma = find_vma(mm, addr); - if (high_limit - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vm_start_gap(vma))) - return addr; - } - /* -* We are always doing an topdown search here. Slice code -* does that too. -*/ - info.flags = VM_UNMAPPED_AREA_TOPDOWN; - info.length = len; - info.low_limit = max(PAGE_SIZE, mmap_min_addr); - info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW); - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - info.align_offset = 0; - - return vm_unmapped_area(); -} - void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index c83be371c6e7..4c3e9601fdf6 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -777,4 +777,80 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, return !slice_check_range_fits(mm, maskp, addr, len); } + +unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) +{ + /* With radix we don't use slice, so derive it from vma*/ + if (radix_enabled()) + return vma_kernel_pagesize(vma); + + return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, vma->vm_start)); +} + +/* + * A variant of hugetlb_get_unmapped_area() doing topdown search + * FIXME!! should we do as x86 does or non hugetlb area does ? + * ie, use topdown or not based on mmap_is_legacy check ? + */ +static unsigned long +radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, +unsigned long
[PATCH v3 00/10] Convert powerpc to default topdown mmap layout
Rebased on top of Nic's v5 series "powerpc: Make hash MMU code build configurable" This series converts powerpc to default topdown mmap layout. powerpc provides its own arch_get_unmapped_area() only when slices are needed, which is only for book3s/64. First part of the series moves slices into book3s/64 specific directories and cleans up other subarchitectures. Then a small modification is done to core mm to allow powerpc to still provide its own arch_randomize_brk() Last part converts to default topdown mmap layout. Changes in v3: - Fixed missing in last patch - Added a patch to move SZ_1T out of drivers/pci/controller/pci-xgene.c Changes in v2: - Moved patch 4 before patch 2 - Make generic arch_randomize_brk() __weak - Added patch 9 Christophe Leroy (10): powerpc/mm: Make slice specific to book3s/64 powerpc/mm: Move vma_mmu_pagesize() and hugetlb_get_unmapped_area() to slice.c powerpc/mm: Remove CONFIG_PPC_MM_SLICES powerpc/mm: Remove asm/slice.h powerpc/mm: Call radix__arch_get_unmapped_area() from arch_get_unmapped_area() mm: Allow arch specific arch_randomize_brk() with CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT powerpc/mm: Convert to default topdown mmap layout powerpc/mm: Properly randomise mmap with slices sizes.h: Add SZ_1T macro powerpc: Simplify and move arch_randomize_brk() arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/book3s/64/hash.h | 7 +- arch/powerpc/include/asm/book3s/64/hugetlb.h | 4 - arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + arch/powerpc/include/asm/book3s/64/slice.h| 18 ++ arch/powerpc/include/asm/hugetlb.h| 2 +- arch/powerpc/include/asm/paca.h | 7 - arch/powerpc/include/asm/page.h | 1 - arch/powerpc/include/asm/processor.h | 2 - arch/powerpc/include/asm/slice.h | 46 arch/powerpc/kernel/paca.c| 5 - arch/powerpc/kernel/process.c | 41 arch/powerpc/mm/Makefile | 3 +- arch/powerpc/mm/book3s64/Makefile | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 33 +-- arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 55 - arch/powerpc/mm/{ => book3s64}/slice.c| 200 ++- arch/powerpc/mm/hugetlbpage.c | 28 --- arch/powerpc/mm/mmap.c| 228 -- arch/powerpc/mm/nohash/mmu_context.c | 9 - arch/powerpc/mm/nohash/tlb.c | 4 - arch/powerpc/platforms/Kconfig.cputype| 4 - drivers/pci/controller/pci-xgene.c| 1 - include/linux/sizes.h | 2 + mm/util.c | 2 +- 25 files changed, 237 insertions(+), 470 deletions(-) delete mode 100644 arch/powerpc/include/asm/slice.h rename arch/powerpc/mm/{ => book3s64}/slice.c (80%) delete mode 100644 arch/powerpc/mm/mmap.c -- 2.33.1
[PATCH v3 10/10] powerpc: Simplify and move arch_randomize_brk()
arch_randomize_brk() is only needed for hash on book3s/64, for other platforms the one provided by the default mmap layout is good enough. Move it to hash_utils.c and use randomize_page() like the generic one. And properly opt out the radix case instead of making an assumption on mmu_highuser_ssize. Also change to a 32M range like most other architectures instead of 8M. Signed-off-by: Christophe Leroy --- v3: - Add missing include - Move SZ_1T in a previous patch that moves it out of drivers/pci/controller/pci-xgene.c v2: New Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/process.c | 41 --- arch/powerpc/mm/book3s64/hash_utils.c | 19 + 2 files changed, 19 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a64cfbb85ca2..44c4bce5211d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -34,10 +34,8 @@ #include #include #include -#include #include #include -#include #include #include @@ -2310,42 +2308,3 @@ unsigned long arch_align_stack(unsigned long sp) sp -= get_random_int() & ~PAGE_MASK; return sp & ~0xf; } - -static inline unsigned long brk_rnd(void) -{ -unsigned long rnd = 0; - - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT))); - else - rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT))); - - return rnd << PAGE_SHIFT; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long base = mm->brk; - unsigned long ret; - -#ifdef CONFIG_PPC_BOOK3S_64 - /* -* If we are using 1TB segments and we are allowed to randomise -* the heap, we can put it above 1TB so it is backed by a 1TB -* segment. Otherwise the heap will be in the bottom 1TB -* which always uses 256MB segments and this may result in a -* performance penalty. -*/ - if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) - base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); -#endif - - ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < mm->brk) - return mm->brk; - - return ret; -} - diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7ecadf5e6bf9..68a5468b0f19 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -2171,3 +2173,20 @@ void __init print_system_hash_info(void) if (htab_hash_mask) pr_info("htab_hash_mask= 0x%lx\n", htab_hash_mask); } + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + /* +* If we are using 1TB segments and we are allowed to randomise +* the heap, we can put it above 1TB so it is backed by a 1TB +* segment. Otherwise the heap will be in the bottom 1TB +* which always uses 256MB segments and this may result in a +* performance penalty. +*/ + if (is_32bit_task()) + return randomize_page(mm->brk, SZ_32M); + else if (!radix_enabled() && mmu_highuser_ssize == MMU_SEGSIZE_1T) + return randomize_page(max_t(unsigned long, mm->brk, SZ_1T), SZ_1G); + else + return randomize_page(mm->brk, SZ_1G); +} -- 2.33.1
[PATCH v3 03/10] powerpc/mm: Remove CONFIG_PPC_MM_SLICES
CONFIG_PPC_MM_SLICES is always selected by hash book3s/64. CONFIG_PPC_MM_SLICES is never selected by other platforms. Remove it. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/book3s/64/hash.h | 4 ++-- arch/powerpc/include/asm/hugetlb.h| 2 +- arch/powerpc/include/asm/paca.h | 7 --- arch/powerpc/include/asm/slice.h | 13 ++--- arch/powerpc/kernel/paca.c| 5 - arch/powerpc/mm/book3s64/Makefile | 3 +-- arch/powerpc/mm/book3s64/hash_utils.c | 14 -- arch/powerpc/platforms/Kconfig.cputype| 4 8 files changed, 6 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 674fe0e890dc..97f2fc217a49 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -99,10 +99,10 @@ * Defines the address of the vmemap area, in its own region on * hash table CPUs. */ -#ifdef CONFIG_PPC_MM_SLICES +#ifdef CONFIG_PPC_64S_HASH_MMU #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#endif /* CONFIG_PPC_MM_SLICES */ +#endif /* PTEIDX nibble */ #define _PTEIDX_SECONDARY 0x8 diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index f18c543bc01d..86a60ba6bd2a 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -24,7 +24,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { - if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) + if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU) && !radix_enabled()) return slice_is_hugepage_only_range(mm, addr, len); return 0; } diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 295573a82c66..bd4dd02e61c8 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -152,16 +152,9 @@ struct paca_struct { struct tlb_core_data tcd; #endif /* CONFIG_PPC_BOOK3E */ -#ifdef CONFIG_PPC_BOOK3S #ifdef CONFIG_PPC_64S_HASH_MMU -#ifdef CONFIG_PPC_MM_SLICES unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; -#else - u16 mm_ctx_user_psize; - u16 mm_ctx_sllp; -#endif -#endif #endif /* diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index 0bdd9c62eca0..b15141f2bd76 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -10,7 +10,7 @@ struct mm_struct; -#ifdef CONFIG_PPC_MM_SLICES +#ifdef CONFIG_PPC_64S_HASH_MMU #ifdef CONFIG_HUGETLB_PAGE #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA @@ -30,16 +30,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, void slice_init_new_context_exec(struct mm_struct *mm); void slice_setup_new_exec(void); -#else /* CONFIG_PPC_MM_SLICES */ - -static inline void slice_init_new_context_exec(struct mm_struct *mm) {} - -static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -#endif /* CONFIG_PPC_MM_SLICES */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 39da688a9455..ba593fd60124 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -344,15 +344,10 @@ void copy_mm_to_paca(struct mm_struct *mm) { mm_context_t *context = >context; -#ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm_ctx_slb_addr_limit(context)); memcpy(_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context), LOW_SLICE_ARRAY_SZ); memcpy(_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context), TASK_SLICE_ARRAY_SZ(context)); -#else /* CONFIG_PPC_MM_SLICES */ - get_paca()->mm_ctx_user_psize = context->user_psize; - get_paca()->mm_ctx_sllp = context->sllp; -#endif } #endif /* CONFIG_PPC_64S_HASH_MMU */ diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index af2f3e75d458..d527dc8e30a8 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -5,7 +5,7 @@ ccflags-y := $(NO_MINIMAL_TOC) obj-y += mmu_context.o pgtable.o trace.o ifdef CONFIG_PPC_64S_HASH_MMU CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) -obj-y += hash_pgtable.o hash_utils.o hash_tlb.o slb.o +obj-y += hash_pgtable.o hash_utils.o hash_tlb.o slb.o slice.o obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o obj-$(CONFIG_PPC_64K_PAGES)+= hash_64k.o @@ -21,7 +21,6 @@ obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o endif
[PATCH v3 05/10] powerpc/mm: Call radix__arch_get_unmapped_area() from arch_get_unmapped_area()
Instead of setting mm->get_unmapped_area() to either arch_get_unmapped_area() or radix__arch_get_unmapped_area(), always set it to arch_get_unmapped_area() and call radix__arch_get_unmapped_area() from there when radix is enabled. To keep radix__arch_get_unmapped_area() static, move it to slice.c Do the same with radix__arch_get_unmapped_area_topdown() Signed-off-by: Christophe Leroy --- arch/powerpc/mm/book3s64/slice.c | 104 ++ arch/powerpc/mm/mmap.c | 123 --- 2 files changed, 104 insertions(+), 123 deletions(-) diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index 4c3e9601fdf6..99742dde811c 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -639,12 +639,113 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, } EXPORT_SYMBOL_GPL(slice_get_unmapped_area); +/* + * Same function as generic code used only for radix, because we don't need to overload + * the generic one. But we will have to duplicate, because hash select + * HAVE_ARCH_UNMAPPED_AREA + */ +static unsigned long +radix__arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int fixed = (flags & MAP_FIXED); + unsigned long high_limit; + struct vm_unmapped_area_info info; + + high_limit = DEFAULT_MAP_WINDOW; + if (addr >= high_limit || (fixed && (addr + len > high_limit))) + high_limit = TASK_SIZE; + + if (len > high_limit) + return -ENOMEM; + + if (fixed) { + if (addr > high_limit - len) + return -ENOMEM; + return addr; + } + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (high_limit - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = high_limit; + info.align_mask = 0; + + return vm_unmapped_area(); +} + +static unsigned long +radix__arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + int fixed = (flags & MAP_FIXED); + unsigned long high_limit; + struct vm_unmapped_area_info info; + + high_limit = DEFAULT_MAP_WINDOW; + if (addr >= high_limit || (fixed && (addr + len > high_limit))) + high_limit = TASK_SIZE; + + if (len > high_limit) + return -ENOMEM; + + if (fixed) { + if (addr > high_limit - len) + return -ENOMEM; + return addr; + } + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (high_limit - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW); + info.align_mask = 0; + + addr = vm_unmapped_area(); + if (!(addr & ~PAGE_MASK)) + return addr; + VM_BUG_ON(addr != -ENOMEM); + + /* +* A failed mmap() very likely causes application failure, +* so fall back to the bottom-up function here. This scenario +* can happen with large stack limits and large mmap() +* allocations. +*/ + return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags); +} + unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + if (radix_enabled()) + return radix__arch_get_unmapped_area(filp, addr, len, pgoff, flags); + return slice_get_unmapped_area(addr, len, flags, mm_ctx_user_psize(>mm->context), 0); } @@ -655,6 +756,9 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long pgoff, const
[PATCH v3 08/10] powerpc/mm: Properly randomise mmap with slices
Now that powerpc switched to default topdown mmap layout, mm->mmap_base is properly randomised. However slice_find_area_bottomup() doesn't use mm->mmap_base but uses the fixed TASK_UNMAPPED_BASE instead. slice_find_area_bottomup() being used as a fallback to slice_find_area_topdown(), it can't use mm->mmap_base directly. Instead of always using TASK_UNMAPPED_BASE as base address, leave it to the caller. When called from slice_find_area_topdown() TASK_UNMAPPED_BASE is used. Otherwise mm->mmap_base is used. Signed-off-by: Christophe Leroy --- arch/powerpc/mm/book3s64/slice.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index 99742dde811c..997f40184e97 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -276,20 +276,18 @@ static bool slice_scan_available(unsigned long addr, } static unsigned long slice_find_area_bottomup(struct mm_struct *mm, - unsigned long len, + unsigned long addr, unsigned long len, const struct slice_mask *available, int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); - unsigned long addr, found, next_end; + unsigned long found, next_end; struct vm_unmapped_area_info info; info.flags = 0; info.length = len; info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); info.align_offset = 0; - - addr = TASK_UNMAPPED_BASE; /* * Check till the allow max value for this mmap request */ @@ -322,12 +320,12 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm, } static unsigned long slice_find_area_topdown(struct mm_struct *mm, -unsigned long len, +unsigned long addr, unsigned long len, const struct slice_mask *available, int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); - unsigned long addr, found, prev; + unsigned long found, prev; struct vm_unmapped_area_info info; unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr); @@ -335,8 +333,6 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, info.length = len; info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); info.align_offset = 0; - - addr = mm->mmap_base; /* * If we are trying to allocate above DEFAULT_MAP_WINDOW * Add the different to the mmap_base. @@ -377,7 +373,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * can happen with large stack limits and large mmap() * allocations. */ - return slice_find_area_bottomup(mm, len, available, psize, high_limit); + return slice_find_area_bottomup(mm, TASK_UNMAPPED_BASE, len, available, psize, high_limit); } @@ -386,9 +382,9 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, int topdown, unsigned long high_limit) { if (topdown) - return slice_find_area_topdown(mm, len, mask, psize, high_limit); + return slice_find_area_topdown(mm, mm->mmap_base, len, mask, psize, high_limit); else - return slice_find_area_bottomup(mm, len, mask, psize, high_limit); + return slice_find_area_bottomup(mm, mm->mmap_base, len, mask, psize, high_limit); } static inline void slice_copy_mask(struct slice_mask *dst, -- 2.33.1
[PATCH v3 04/10] powerpc/mm: Remove asm/slice.h
Move necessary stuff in asm/book3s/64/slice.h and remove asm/slice.h Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/book3s/64/hash.h | 3 ++ arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + arch/powerpc/include/asm/book3s/64/slice.h| 18 + arch/powerpc/include/asm/page.h | 1 - arch/powerpc/include/asm/slice.h | 37 --- 5 files changed, 22 insertions(+), 38 deletions(-) delete mode 100644 arch/powerpc/include/asm/slice.h diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 97f2fc217a49..fab032f552f3 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -100,6 +100,9 @@ * hash table CPUs. */ #ifdef CONFIG_PPC_64S_HASH_MMU +#ifdef CONFIG_HUGETLB_PAGE +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#endif #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 3004f3323144..b4b2ca111f75 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -18,6 +18,7 @@ * complete pgtable.h but only a portion of it. */ #include +#include #include #include diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h index f0d3194ba41b..5b0f7105bc8b 100644 --- a/arch/powerpc/include/asm/book3s/64/slice.h +++ b/arch/powerpc/include/asm/book3s/64/slice.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H #define _ASM_POWERPC_BOOK3S_64_SLICE_H +#ifndef __ASSEMBLY__ + #define SLICE_LOW_SHIFT28 #define SLICE_LOW_TOP (0x1ul) #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) @@ -13,4 +15,20 @@ #define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64 +struct mm_struct; + +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, + unsigned long flags, unsigned int psize, + int topdown); + +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); + +void slice_set_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long len, unsigned int psize); + +void slice_init_new_context_exec(struct mm_struct *mm); +void slice_setup_new_exec(void); + +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 254687258f42..62e0c6f12869 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -329,6 +329,5 @@ static inline unsigned long kaslr_offset(void) #include #endif /* __ASSEMBLY__ */ -#include #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h deleted file mode 100644 index b15141f2bd76.. --- a/arch/powerpc/include/asm/slice.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_SLICE_H -#define _ASM_POWERPC_SLICE_H - -#ifdef CONFIG_PPC_BOOK3S_64 -#include -#endif - -#ifndef __ASSEMBLY__ - -struct mm_struct; - -#ifdef CONFIG_PPC_64S_HASH_MMU - -#ifdef CONFIG_HUGETLB_PAGE -#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA -#endif -#define HAVE_ARCH_UNMAPPED_AREA -#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN - -unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, - unsigned long flags, unsigned int psize, - int topdown); - -unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); - -void slice_set_range_psize(struct mm_struct *mm, unsigned long start, - unsigned long len, unsigned int psize); - -void slice_init_new_context_exec(struct mm_struct *mm); -void slice_setup_new_exec(void); - -#endif /* CONFIG_PPC_64S_HASH_MMU */ - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_POWERPC_SLICE_H */ -- 2.33.1
[PATCH v3 06/10] mm: Allow arch specific arch_randomize_brk() with CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
Commit e7142bf5d231 ("arm64, mm: make randomization selected by generic topdown mmap layout") introduced a default version of arch_randomize_brk() provided when CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT is selected. powerpc could select CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT but needs to provide its own arch_randomize_brk(). In order to allow that, define generic version of arch_randomize_brk() as a __weak symbol. Cc: Alexandre Ghiti Signed-off-by: Christophe Leroy --- v2: Make the generic version of it a __weak symbol instead of a messy play with CONFIG_ items. --- mm/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/util.c b/mm/util.c index 741ba32a43ac..46d1a2dd7a32 100644 --- a/mm/util.c +++ b/mm/util.c @@ -344,7 +344,7 @@ unsigned long randomize_stack_top(unsigned long stack_top) } #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT -unsigned long arch_randomize_brk(struct mm_struct *mm) +unsigned long __weak arch_randomize_brk(struct mm_struct *mm) { /* Is the current task 32bit ? */ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) -- 2.33.1
[PATCH v3 09/10] sizes.h: Add SZ_1T macro
Today drivers/pci/controller/pci-xgene.c defines SZ_1T Move it into linux/sizes.h so that it can be re-used elsewhere. Cc: Toan Le Cc: linux-...@vger.kernel.org Signed-off-by: Christophe Leroy --- v3: new --- drivers/pci/controller/pci-xgene.c | 1 - include/linux/sizes.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 56d0d50338c8..716dcab5ca47 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -49,7 +49,6 @@ #define EN_REG 0x0001 #define OB_LO_IO 0x0002 #define XGENE_PCIE_DEVICEID0xE004 -#define SZ_1T (SZ_1G*1024ULL) #define PIPE_PHY_RATE_RD(src) ((0xc000 & (u32)(src)) >> 0xe) #define XGENE_V1_PCI_EXP_CAP 0x40 diff --git a/include/linux/sizes.h b/include/linux/sizes.h index 1ac79bcee2bb..84aa448d8bb3 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -47,6 +47,8 @@ #define SZ_8G _AC(0x2, ULL) #define SZ_16G _AC(0x4, ULL) #define SZ_32G _AC(0x8, ULL) + +#define SZ_1T _AC(0x100, ULL) #define SZ_64T _AC(0x4000, ULL) #endif /* __LINUX_SIZES_H__ */ -- 2.33.1
[PATCH v3 01/10] powerpc/mm: Make slice specific to book3s/64
Since commit 555904d07eef ("powerpc/8xx: MM_SLICE is not needed anymore") only book3s/64 selects CONFIG_PPC_MM_SLICES. Move slice.c into mm/book3s64/ Signed-off-by: Christophe Leroy --- v2: Remove now unnecessary #ifdef CONFIG_PPC_BOOK3S_64 in slice.c --- arch/powerpc/mm/Makefile | 1 - arch/powerpc/mm/book3s64/Makefile | 1 + arch/powerpc/mm/{ => book3s64}/slice.c | 2 -- arch/powerpc/mm/nohash/mmu_context.c | 9 - arch/powerpc/mm/nohash/tlb.c | 4 5 files changed, 1 insertion(+), 16 deletions(-) rename arch/powerpc/mm/{ => book3s64}/slice.c (99%) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index df8172da2301..d4c20484dad9 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -14,7 +14,6 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32)+= book3s32/ obj-$(CONFIG_PPC_BOOK3S_64)+= book3s64/ obj-$(CONFIG_NUMA) += numa.o -obj-$(CONFIG_PPC_MM_SLICES)+= slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index 2d50cac499c5..af2f3e75d458 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o endif obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o obj-$(CONFIG_PPC_PKEY) += pkeys.o +obj-$(CONFIG_PPC_MM_SLICES)+= slice.o # Instrumenting the SLB fault path can lead to duplicate SLB entries KCOV_INSTRUMENT_slb.o := n diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/book3s64/slice.c similarity index 99% rename from arch/powerpc/mm/slice.c rename to arch/powerpc/mm/book3s64/slice.c index 82b45b1cb973..c83be371c6e7 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -712,7 +712,6 @@ void slice_init_new_context_exec(struct mm_struct *mm) bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); } -#ifdef CONFIG_PPC_BOOK3S_64 void slice_setup_new_exec(void) { struct mm_struct *mm = current->mm; @@ -724,7 +723,6 @@ void slice_setup_new_exec(void) mm_ctx_set_slb_addr_limit(>context, DEFAULT_MAP_WINDOW); } -#endif void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize) diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index 44b2b5e7cabe..dc3528e815b9 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -313,15 +313,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, */ int init_new_context(struct task_struct *t, struct mm_struct *mm) { - /* -* We have MMU_NO_CONTEXT set to be ~0. Hence check -* explicitly against context.id == 0. This ensures that we properly -* initialize context slice details for newly allocated mm's (which will -* have id == 0) and don't alter context slice inherited via fork (which -* will have id != 0). -*/ - if (mm->context.id == 0) - slice_init_new_context_exec(mm); mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; pte_frag_set(>context, NULL); diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 311281063d48..3359cf7c2a61 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -773,9 +773,5 @@ void __init early_init_mmu(void) #ifdef CONFIG_PPC_47x early_init_mmu_47x(); #endif - -#ifdef CONFIG_PPC_MM_SLICES - mm_ctx_set_slb_addr_limit(_mm.context, SLB_ADDR_LIMIT_DEFAULT); -#endif } #endif /* CONFIG_PPC64 */ -- 2.33.1
[PATCH v3 07/10] powerpc/mm: Convert to default topdown mmap layout
Select CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT and remove arch/powerpc/mm/mmap.c This change provides standard randomisation of mmaps. See commit 8b8addf891de ("x86/mm/32: Enable full randomization on i386 and X86_32") for all the benefits of mmap randomisation. Signed-off-by: Christophe Leroy --- v2: Also remove selection of ARCH_HAS_ELF_RANDOMIZE as it is already selected by CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/processor.h | 2 - arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/mmap.c | 105 --- 4 files changed, 2 insertions(+), 109 deletions(-) delete mode 100644 arch/powerpc/mm/mmap.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fb48823ccd62..20504a9901f2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -122,7 +122,6 @@ config PPC select ARCH_HAS_DEBUG_WXif STRICT_KERNEL_RWX select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES - select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_HUGEPD if HUGETLB_PAGE @@ -158,6 +157,7 @@ config PPC select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_USE_QUEUED_SPINLOCKSif PPC_QUEUED_SPINLOCKS + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_LD_ORPHAN_WARN diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index e39bd0ff69f3..d906b14dd599 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -378,8 +378,6 @@ static inline void prefetchw(const void *x) #define spin_lock_prefetch(x) prefetchw(x) -#define HAVE_ARCH_PICK_MMAP_LAYOUT - /* asm stubs */ extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index d4c20484dad9..503a6e249940 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -5,7 +5,7 @@ ccflags-$(CONFIG_PPC64):= $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o mmap.o maccess.o pageattr.o \ +obj-y := fault.o mem.o pgtable.o maccess.o pageattr.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ init-common.o mmu_context.o drmem.o \ diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c deleted file mode 100644 index 5972d619d274.. --- a/arch/powerpc/mm/mmap.c +++ /dev/null @@ -1,105 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * flexible mmap layout support - * - * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. - * All Rights Reserved. - * - * Started by Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Top of mmap area (just below the process stack). - * - * Leave at least a ~128 MB hole. - */ -#define MIN_GAP (128*1024*1024) -#define MAX_GAP (TASK_SIZE/6*5) - -static inline int mmap_is_legacy(struct rlimit *rlim_stack) -{ - if (current->personality & ADDR_COMPAT_LAYOUT) - return 1; - - if (rlim_stack->rlim_cur == RLIM_INFINITY) - return 1; - - return sysctl_legacy_va_layout; -} - -unsigned long arch_mmap_rnd(void) -{ - unsigned long shift, rnd; - - shift = mmap_rnd_bits; -#ifdef CONFIG_COMPAT - if (is_32bit_task()) - shift = mmap_rnd_compat_bits; -#endif - rnd = get_random_long() % (1ul << shift); - - return rnd << PAGE_SHIFT; -} - -static inline unsigned long stack_maxrandom_size(void) -{ - if (!(current->flags & PF_RANDOMIZE)) - return 0; - - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - return (1<<23); - else - return (1<<30); -} - -static inline unsigned long mmap_base(unsigned long rnd, - struct rlimit *rlim_stack) -{ - unsigned long gap = rlim_stack->rlim_cur; - unsigned long pad = stack_maxrandom_size() + stack_guard_gap; - - /* Values close to RLIM_INFINITY can overflow. */ - if (gap + pad > gap) - gap += pad; - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd); -} - -/* - * This function, called very early during the creation of a new - * process VM image,
Re: [PATCH v5 15/17] powerpc/64s: Make hash MMU support configurable
Nicholas Piggin writes: > This adds Kconfig selection which allows 64s hash MMU support to be > disabled. It can be disabled if radix support is enabled, the minimum > supported CPU type is POWER9 (or higher), and KVM is not selected. > > Signed-off-by: Nicholas Piggin > --- > arch/powerpc/Kconfig | 3 ++- > arch/powerpc/include/asm/mmu.h | 16 +--- > arch/powerpc/kernel/dt_cpu_ftrs.c| 14 ++ > arch/powerpc/kvm/Kconfig | 1 + > arch/powerpc/mm/init_64.c| 13 +++-- > arch/powerpc/platforms/Kconfig.cputype | 23 +-- > arch/powerpc/platforms/cell/Kconfig | 1 + > arch/powerpc/platforms/maple/Kconfig | 1 + > arch/powerpc/platforms/microwatt/Kconfig | 2 +- > arch/powerpc/platforms/pasemi/Kconfig| 1 + > arch/powerpc/platforms/powermac/Kconfig | 1 + > arch/powerpc/platforms/powernv/Kconfig | 2 +- powernv_defconfig brings CONFIG_CXL=m ../drivers/misc/cxl/main.c: In function ‘cxl_alloc_sst’: ../drivers/misc/cxl/main.c:127:45: error: ‘mmu_linear_psize’ undeclared (first use in this function); did you mean ‘mmu_virtual_psize’? 127 | sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 50; | ^~~~ | mmu_virtual_psize ../drivers/misc/cxl/main.c:127:45: note: each undeclared identifier is reported only once for each function it appears in make[4]: *** [../scripts/Makefile.build:287: drivers/misc/cxl/main.o] Error 1
Re: [PATCH] powerpc/ftrace: Handle large kernel configs
Hi Naveen, Le 16/10/2018 à 22:25, Naveen N. Rao a écrit : Currently, we expect to be able to reach ftrace_caller() from all ftrace-enabled functions through a single relative branch. With large kernel configs, we see functions outside of 32MB of ftrace_caller() causing ftrace_init() to bail. In such configurations, gcc/ld emits two types of trampolines for mcount(): 1. A long_branch, which has a single branch to mcount() for functions that are one hop away from mcount(): c19e8544 <00031b56.long_branch._mcount>: c19e8544: 4a 69 3f ac b c007c4f0 <._mcount> 2. A plt_branch, for functions that are farther away from mcount(): c51f33f8 <0008ba04.plt_branch._mcount>: c51f33f8: 3d 82 ff a4 addis r12,r2,-92 c51f33fc: e9 8c 04 20 ld r12,1056(r12) c51f3400: 7d 89 03 a6 mtctr r12 c51f3404: 4e 80 04 20 bctr We can reuse those trampolines for ftrace if we can have those trampolines go to ftrace_caller() instead. However, with ABIv2, we cannot depend on r2 being valid. As such, we use only the long_branch trampolines by patching those to instead branch to ftrace_caller or ftrace_regs_caller. In addition, we add additional trampolines around .text and .init.text to catch locations that are covered by the plt branches. This allows ftrace to work with most large kernel configurations. For now, we always patch the trampolines to go to ftrace_regs_caller, which is slightly inefficient. This can be optimized further at a later point. Signed-off-by: Naveen N. Rao --- Since RFC: - Change to patch long_branch to go to ftrace_caller, rather than patching mcount() - Stop using plt_branch since it can't be relied on for ABIv2 - Add trampolines around .text and .init.text to catch remaining locations - Naveen arch/powerpc/kernel/trace/ftrace.c| 261 +- arch/powerpc/kernel/trace/ftrace_64.S | 12 ++ arch/powerpc/kernel/vmlinux.lds.S | 13 +- 3 files changed, 281 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 4bfbb54dee51..4bf051d3e21e 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c ... +/* + * If this is a compiler generated long_branch trampoline (essentially, a + * trampoline that has a branch to _mcount()), we re-write the branch to + * instead go to ftrace_[regs_]caller() and note down the location of this + * trampoline. + */ +static int setup_mcount_compiler_tramp(unsigned long tramp) +{ + int i, op; + unsigned long ptr; + static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS]; + + /* Is this a known long jump tramp? */ + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_tramps[i]) + break; + else if (ftrace_tramps[i] == tramp) + return 0; + + /* Is this a known plt tramp? */ + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_plt_tramps[i]) + break; + else if (ftrace_plt_tramps[i] == tramp) + return -1; I don't understand how this is supposed to work. ftrace_plt_tramps[] being a static table, it is set to 0s at startup. So the above loop breaks at first round. Then ftrace_plt_tramps[i] is never/nowhere set. So I just see it as useless. Am I missing something ? Thanks Christophe + + /* New trampoline -- read where this goes */ + if (probe_kernel_read(, (void *)tramp, sizeof(int))) { + pr_debug("Fetching opcode failed.\n"); + return -1; + } + + /* Is this a 24 bit branch? */ + if (!is_b_op(op)) { + pr_debug("Trampoline is not a long branch tramp.\n"); + return -1; + } + + /* lets find where the pointer goes */ + ptr = find_bl_target(tramp, op); + + if (ptr != ppc_global_function_entry((void *)_mcount)) { + pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr); + return -1; + } + + /* Let's re-write the tramp to go to ftrace_[regs_]caller */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + ptr = ppc_global_function_entry((void *)ftrace_regs_caller); +#else + ptr = ppc_global_function_entry((void *)ftrace_caller); +#endif + if (!create_branch((void *)tramp, ptr, 0)) { + pr_debug("%ps is not reachable from existing mcount tramp\n", + (void *)ptr); + return -1; + } + + if (patch_branch((unsigned int *)tramp, ptr, 0)) { + pr_debug("REL24 out of range!\n"); + return -1; + } + + if (add_ftrace_tramp(tramp)) { + pr_debug("No tramp locations left\n"); + return -1; + }
Re: [PATCH v2 28/45] mfd: rn5t618: Use devm_register_power_handler()
29.11.2021 14:55, Lee Jones пишет: > On Thu, 28 Oct 2021, Dmitry Osipenko wrote: > >> Use devm_register_power_handler() that replaces global pm_power_off >> variable and allows to register multiple power-off handlers. It also >> provides restart-handler support, i.e. all in one API. >> >> Signed-off-by: Dmitry Osipenko >> --- >> drivers/mfd/rn5t618.c | 56 --- >> 1 file changed, 21 insertions(+), 35 deletions(-) > > For my own reference (apply this as-is to your sign-off block): > > Acked-for-MFD-by: Lee Jones > Thanks you. This and other driver patches will be slightly changed because the power-handler was renamed to sys-off handler starting with the v3 of this series, but yours ack still will be valid here.
Re: [PATCH v2 28/45] mfd: rn5t618: Use devm_register_power_handler()
On Thu, 28 Oct 2021, Dmitry Osipenko wrote: > Use devm_register_power_handler() that replaces global pm_power_off > variable and allows to register multiple power-off handlers. It also > provides restart-handler support, i.e. all in one API. > > Signed-off-by: Dmitry Osipenko > --- > drivers/mfd/rn5t618.c | 56 --- > 1 file changed, 21 insertions(+), 35 deletions(-) For my own reference (apply this as-is to your sign-off block): Acked-for-MFD-by: Lee Jones -- Lee Jones [李琼斯] Senior Technical Lead - Developer Services Linaro.org │ Open source software for Arm SoCs Follow Linaro: Facebook | Twitter | Blog
Re: [PATCH v4 08/25] kernel: Add combined power-off+restart handler call chain API
29.11.2021 03:36, Michał Mirosław пишет: > On Mon, Nov 29, 2021 at 12:53:51AM +0300, Dmitry Osipenko wrote: >> 29.11.2021 00:17, Michał Mirosław пишет: I'm having trouble with parsing this comment. Could you please try to rephrase it? I don't see how you could check whether power-off handler is available if you'll mix all handlers together. >>> If notify_call_chain() would be fixed to return NOTIFY_OK if any call >>> returned NOTIFY_OK, then this would be a clear way to gather the >>> answer if any of the handlers will attempt the final action (reboot or >>> power off). >> Could you please show a code snippet that implements your suggestion? > > A rough idea is this: > > static int notifier_call_chain(struct notifier_block **nl, > unsigned long val, void *v, > int nr_to_call, int *nr_calls) > { > - int ret = NOTIFY_DONE; > + int ret, result = NOTIFY_DONE; > struct notifier_block *nb, *next_nb; > > nb = rcu_dereference_raw(*nl); > > while (nb && nr_to_call) { > ... > ret = nb->notifier_call(nb, val, v); > + > + /* Assuming NOTIFY_STOP-carrying return is always greater than > non-stopping one. */ > + if (result < ret) > + result = ret; > ... > } > - return ret; > + return result; > } > > Then: > > bool prepare_reboot() > { > int ret = xx_notifier_call_chain(_notifier, PREPARE_REBOOT, > ...); > return ret == NOTIFY_OK; > } > > And the return value would signify whether the reboot will be attempted > when calling the chain for the REBOOT action. (Analogously for powering off.) If you started to execute call chain, then you began the power-off / restart sequence, this is a point of no return. Sorry, I still don't understand what you're trying to achieve. The approach of having separate call chains is simple and intuitive, I don't see reasons to change it.
[PATCH v4 2/5] powerpc/inst: Define ppc_inst_t
In order to stop using 'struct ppc_inst' on PPC32, define a ppc_inst_t typedef. Signed-off-by: Christophe Leroy --- v3: Rebased and resolved conflicts v2: Anonymise the structure so that only the typedef can be used Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/code-patching.h | 18 +++ arch/powerpc/include/asm/hw_breakpoint.h | 4 +- arch/powerpc/include/asm/inst.h | 36 ++--- arch/powerpc/include/asm/sstep.h | 4 +- arch/powerpc/kernel/align.c | 4 +- arch/powerpc/kernel/epapr_paravirt.c | 2 +- arch/powerpc/kernel/hw_breakpoint.c | 4 +- .../kernel/hw_breakpoint_constraints.c| 4 +- arch/powerpc/kernel/kprobes.c | 4 +- arch/powerpc/kernel/mce_power.c | 2 +- arch/powerpc/kernel/optprobes.c | 4 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/setup_32.c| 2 +- arch/powerpc/kernel/trace/ftrace.c| 54 +-- arch/powerpc/kernel/vecemu.c | 2 +- arch/powerpc/lib/code-patching.c | 38 ++--- arch/powerpc/lib/feature-fixups.c | 4 +- arch/powerpc/lib/sstep.c | 4 +- arch/powerpc/lib/test_emulate_step.c | 10 ++-- arch/powerpc/mm/maccess.c | 2 +- arch/powerpc/perf/8xx-pmu.c | 2 +- arch/powerpc/xmon/xmon.c | 14 ++--- arch/powerpc/xmon/xmon_bpts.h | 4 +- 23 files changed, 112 insertions(+), 112 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 4ba834599c4d..46e8c5a8ce51 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -24,20 +24,20 @@ bool is_offset_in_branch_range(long offset); bool is_offset_in_cond_branch_range(long offset); -int create_branch(struct ppc_inst *instr, const u32 *addr, +int create_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); -int create_cond_branch(struct ppc_inst *instr, const u32 *addr, +int create_cond_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); int patch_branch(u32 *addr, unsigned long target, int flags); -int patch_instruction(u32 *addr, struct ppc_inst instr); -int raw_patch_instruction(u32 *addr, struct ppc_inst instr); +int patch_instruction(u32 *addr, ppc_inst_t instr); +int raw_patch_instruction(u32 *addr, ppc_inst_t instr); static inline unsigned long patch_site_addr(s32 *site) { return (unsigned long)site + *site; } -static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) +static inline int patch_instruction_site(s32 *site, ppc_inst_t instr) { return patch_instruction((u32 *)patch_site_addr(site), instr); } @@ -58,11 +58,11 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned return modify_instruction((unsigned int *)patch_site_addr(site), clr, set); } -int instr_is_relative_branch(struct ppc_inst instr); -int instr_is_relative_link_branch(struct ppc_inst instr); +int instr_is_relative_branch(ppc_inst_t instr); +int instr_is_relative_link_branch(ppc_inst_t instr); unsigned long branch_target(const u32 *instr); -int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src); -extern bool is_conditional_branch(struct ppc_inst instr); +int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src); +bool is_conditional_branch(ppc_inst_t instr); #ifdef CONFIG_PPC_BOOK3E_64 void __patch_exception(int exc, unsigned long addr); #define patch_exception(exc, name) do { \ diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index abebfbee5b1c..88053d3c68e6 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -56,11 +56,11 @@ static inline int nr_wp_slots(void) return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; } -bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, +bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr, unsigned long ea, int type, int size, struct arch_hw_breakpoint *info); -void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, +void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea); #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index fea4d46155a9..055de1fa5d46 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -8,7 +8,7 @@ ({ \ long __gui_ret;
[PATCH v4 4/5] powerpc/inst: Move ppc_inst_t definition in asm/reg.h
Because of circular inclusion of asm/hw_breakpoint.h, we need to move definition of asm/reg.h outside of inst.h so that asm/hw_breakpoint.h gets it without including asm/inst.h Also remove asm/inst.h from asm/uprobes.h as it's not needed anymore. Signed-off-by: Christophe Leroy --- v4: New to support inlining of copy_inst_from_kernel_nofault() in following patch. --- arch/powerpc/include/asm/hw_breakpoint.h | 1 - arch/powerpc/include/asm/inst.h | 10 +- arch/powerpc/include/asm/reg.h | 12 arch/powerpc/include/asm/uprobes.h | 1 - 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 88053d3c68e6..84d39fd42f71 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -10,7 +10,6 @@ #define _PPC_BOOK3S_64_HW_BREAKPOINT_H #include -#include #ifdef __KERNEL__ struct arch_hw_breakpoint { diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 5c503816ebc0..86074e83d2a5 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_INST_H #include +#include #define ___get_user_instr(gu_op, dest, ptr)\ ({ \ @@ -35,13 +36,6 @@ */ #if defined(CONFIG_PPC64) || defined(__CHECKER__) -typedef struct { - u32 val; -#ifdef CONFIG_PPC64 - u32 suffix; -#endif -} __packed ppc_inst_t; - static inline u32 ppc_inst_val(ppc_inst_t x) { return x.val; @@ -50,8 +44,6 @@ static inline u32 ppc_inst_val(ppc_inst_t x) #define ppc_inst(x) ((ppc_inst_t){ .val = (x) }) #else -typedef u32 ppc_inst_t; - static inline u32 ppc_inst_val(ppc_inst_t x) { return x; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e9d27265253b..85501181f929 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1366,6 +1366,18 @@ /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ + +#if defined(CONFIG_PPC64) || defined(__CHECKER__) +typedef struct { + u32 val; +#ifdef CONFIG_PPC64 + u32 suffix; +#endif +} __packed ppc_inst_t; +#else +typedef u32 ppc_inst_t; +#endif + #define mfmsr()({unsigned long rval; \ asm volatile("mfmsr %0" : "=r" (rval) : \ : "memory"); rval;}) diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index fe683371336f..a7ae1860115a 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -11,7 +11,6 @@ #include #include -#include typedef ppc_opcode_t uprobe_opcode_t; -- 2.33.1
[PATCH v4 5/5] powerpc/inst: Optimise copy_inst_from_kernel_nofault()
copy_inst_from_kernel_nofault() uses copy_from_kernel_nofault() to copy one or two 32bits words. This means calling an out-of-line function which itself calls back copy_from_kernel_nofault_allowed() then performs a generic copy with loops. Rewrite copy_inst_from_kernel_nofault() to do everything at a single place and use __get_kernel_nofault() directly to perform single accesses without loops. Allthough the generic function uses pagefault_disable(), it is not required on powerpc because do_page_fault() bails earlier when a kernel mode fault happens on a kernel address. As the function has now become very small, inline it. With this change, on an 8xx the time spent in the loop in ftrace_replace_code() is reduced by 23% at function tracer activation and 27% at nop tracer activation. The overall time to activate function tracer (measured with shell command 'time') is 570ms before the patch and 470ms after the patch. Even vmlinux size is reduced (by 152 instruction). Before the patch: 0018 : 18: 94 21 ff e0 stwur1,-32(r1) 1c: 7c 08 02 a6 mflrr0 20: 38 a0 00 04 li r5,4 24: 93 e1 00 1c stw r31,28(r1) 28: 7c 7f 1b 78 mr r31,r3 2c: 38 61 00 08 addir3,r1,8 30: 90 01 00 24 stw r0,36(r1) 34: 48 00 00 01 bl 34 34: R_PPC_REL24 copy_from_kernel_nofault 38: 2c 03 00 00 cmpwi r3,0 3c: 40 82 00 0c bne 48 40: 81 21 00 08 lwz r9,8(r1) 44: 91 3f 00 00 stw r9,0(r31) 48: 80 01 00 24 lwz r0,36(r1) 4c: 83 e1 00 1c lwz r31,28(r1) 50: 38 21 00 20 addir1,r1,32 54: 7c 08 03 a6 mtlrr0 58: 4e 80 00 20 blr After the patch (before inlining): 0018 : 18: 3d 20 b0 00 lis r9,-20480 1c: 7c 04 48 40 cmplw r4,r9 20: 7c 69 1b 78 mr r9,r3 24: 41 80 00 14 blt 38 28: 81 44 00 00 lwz r10,0(r4) 2c: 38 60 00 00 li r3,0 30: 91 49 00 00 stw r10,0(r9) 34: 4e 80 00 20 blr 38: 38 60 ff de li r3,-34 3c: 4e 80 00 20 blr 40: 38 60 ff f2 li r3,-14 44: 4e 80 00 20 blr Signed-off-by: Christophe Leroy --- v4: Inline and remove pagefault_disable() v3: New Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/inst.h | 21 - arch/powerpc/mm/maccess.c | 17 - 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 86074e83d2a5..0aa811ff44d5 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -4,6 +4,8 @@ #include #include +#include +#include #define ___get_user_instr(gu_op, dest, ptr)\ ({ \ @@ -148,6 +150,23 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x) __str; \ }) -int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src); +static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) +{ + unsigned int val, suffix; + + if (unlikely(!is_kernel_addr((unsigned long)src))) + return -ERANGE; + + __get_kernel_nofault(, src, u32, Efault); + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { + __get_kernel_nofault(, src + 1, u32, Efault); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return 0; +Efault: + return -EFAULT; +} #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index 5abae96b2b46..ea821d0ffe16 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -11,20 +11,3 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return is_kernel_addr((unsigned long)unsafe_src); } - -int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) -{ - unsigned int val, suffix; - int err; - - err = copy_from_kernel_nofault(, src, sizeof(val)); - if (err) - return err; - if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(, src + 1, sizeof(suffix)); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} -- 2.33.1
[PATCH v4 3/5] powerpc/inst: Define ppc_inst_t as u32 on PPC32
Unlike PPC64 ABI, PPC32 uses the stack to pass a parameter defined as a struct, even when the struct has a single simple element. To avoid that, define ppc_inst_t as u32 on PPC32. Keep it as 'struct ppc_inst' when __CHECKER__ is defined so that sparse can perform type checking. Also revert commit 511eea5e2ccd ("powerpc/kprobes: Fix Oops by passing ppc_inst as a pointer to emulate_step() on ppc32") as now the instruction to be emulated is passed as a register to emulate_step(). Signed-off-by: Christophe Leroy --- v2: Make it work with kprobes --- arch/powerpc/include/asm/inst.h | 15 +-- arch/powerpc/kernel/optprobes.c | 8 ++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 055de1fa5d46..5c503816ebc0 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -34,6 +34,7 @@ * Instruction data type for POWER */ +#if defined(CONFIG_PPC64) || defined(__CHECKER__) typedef struct { u32 val; #ifdef CONFIG_PPC64 @@ -46,13 +47,23 @@ static inline u32 ppc_inst_val(ppc_inst_t x) return x.val; } +#define ppc_inst(x) ((ppc_inst_t){ .val = (x) }) + +#else +typedef u32 ppc_inst_t; + +static inline u32 ppc_inst_val(ppc_inst_t x) +{ + return x; +} +#define ppc_inst(x) (x) +#endif + static inline int ppc_inst_primary_opcode(ppc_inst_t x) { return ppc_inst_val(x) >> 26; } -#define ppc_inst(x) ((ppc_inst_t){ .val = (x) }) - #ifdef CONFIG_PPC64 #define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) }) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 378db980ded3..3b1c2236cbee 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 3. load instruction to be emulated into relevant register, and */ - if (IS_ENABLED(CONFIG_PPC64)) { - temp = ppc_inst_read(p->ainsn.insn); - patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); - } else { - patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); - } + temp = ppc_inst_read(p->ainsn.insn); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline -- 2.33.1
[PATCH v4 1/5] powerpc/inst: Refactor ___get_user_instr()
PPC64 version of ___get_user_instr() can be used for PPC32 as well, by simply disabling the suffix part with IS_ENABLED(CONFIG_PPC64). Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/inst.h | 11 +-- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index b11c0e2f9639..fea4d46155a9 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -4,8 +4,6 @@ #include -#ifdef CONFIG_PPC64 - #define ___get_user_instr(gu_op, dest, ptr)\ ({ \ long __gui_ret; \ @@ -16,7 +14,7 @@ __chk_user_ptr(ptr);\ __gui_ret = gu_op(__prefix, __gui_ptr); \ if (__gui_ret == 0) { \ - if ((__prefix >> 26) == OP_PREFIX) {\ + if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \ __gui_ret = gu_op(__suffix, __gui_ptr + 1); \ __gui_inst = ppc_inst_prefix(__prefix, __suffix); \ } else {\ @@ -27,13 +25,6 @@ } \ __gui_ret; \ }) -#else /* !CONFIG_PPC64 */ -#define ___get_user_instr(gu_op, dest, ptr)\ -({ \ - __chk_user_ptr(ptr);\ - gu_op((dest).val, (u32 __user *)(ptr)); \ -}) -#endif /* CONFIG_PPC64 */ #define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr) -- 2.33.1
Re: [PATCH v4 05/25] reboot: Warn if restart handler has duplicated priority
29.11.2021 03:26, Michał Mirosław пишет: > On Mon, Nov 29, 2021 at 12:06:19AM +0300, Dmitry Osipenko wrote: >> 28.11.2021 03:28, Michał Mirosław пишет: >>> On Fri, Nov 26, 2021 at 09:00:41PM +0300, Dmitry Osipenko wrote: Add sanity check which ensures that there are no two restart handlers registered with the same priority. Normally it's a direct sign of a problem if two handlers use the same priority. >>> >>> The patch doesn't ensure the property that there are no duplicated-priority >>> entries on the chain. >> >> It's not the exact point of this patch. >> >>> I'd rather see a atomic_notifier_chain_register_unique() that returns >>> -EBUSY or something istead of adding an entry with duplicate priority. >>> That way it would need only one list traversal unless you want to >>> register the duplicate anyway (then you would call the older >>> atomic_notifier_chain_register() after reporting the error). >> >> The point of this patch is to warn developers about the problem that >> needs to be fixed. We already have such troubling drivers in mainline. >> >> It's not critical to register different handlers with a duplicated >> priorities, but such cases really need to be corrected. We shouldn't >> break users' machines during transition to the new API, meanwhile >> developers should take action of fixing theirs drivers. >> >>> (Or you could return > 0 when a duplicate is registered in >>> atomic_notifier_chain_register() if the callers are prepared >>> for that. I don't really like this way, though.) >> >> I had a similar thought at some point before and decided that I'm not in >> favor of this approach. It's nicer to have a dedicated function that >> verifies the uniqueness, IMO. > > I don't like the part that it traverses the list second time to check > the uniqueness. But actually you could avoid that if > notifier_chain_register() would always add equal-priority entries in > reverse order: > > static int notifier_chain_register(struct notifier_block **nl, > struct notifier_block *n) > { > while ((*nl) != NULL) { > if (unlikely((*nl) == n)) { > WARN(1, "double register detected"); > return 0; > } > - if (n->priority > (*nl)->priority) > + if (n->priority >= (*nl)->priority) > break; > nl = &((*nl)->next); > } > n->next = *nl; > rcu_assign_pointer(*nl, n); > return 0; > } > > Then the check for uniqueness after adding would be: > > WARN(nb->next && nb->priority == nb->next->priority); We can't just change the registration order because invocation order of the call chain depends on the registration order and some of current users may rely on that order. I'm pretty sure that changing the order will have unfortunate consequences.
Re: [linux-next] Read-only file system after boot (powerpc)
> On 29-Nov-2021, at 3:29 PM, Christoph Hellwig wrote: > > Can you check if your tree already includes this commit: > > https://git.kernel.dk/cgit/linux-block/commit/?h=for-5.17/block=3f39d47d7ad858c024bd777f5f2a86fa7f6a9f14 > > and if not see if that fixes the problem? Thanks. Yes, this patch fixes the problem for me. -Sachin
Re: [patch 09/22] MIPS: Octeon: Use arch_setup_msi_irq()
On Sat, Nov 27, 2021 at 02:18:48AM +0100, Thomas Gleixner wrote: > The core code provides the same loop code except for the MSI-X reject. Move > that to arch_setup_msi_irq() and remove the duplicated code. > > No functional change. > > Signed-off-by: Thomas Gleixner > Cc: Thomas Bogendoerfer > Cc: linux-m...@vger.kernel.org > --- > arch/mips/pci/msi-octeon.c | 32 +++- > 1 file changed, 3 insertions(+), 29 deletions(-) > > --- a/arch/mips/pci/msi-octeon.c > +++ b/arch/mips/pci/msi-octeon.c > @@ -68,6 +68,9 @@ int arch_setup_msi_irq(struct pci_dev *d > u64 search_mask; > int index; > > + if (desc->pci.msi_attrib.is_msix) > + return -EINVAL; > + > /* >* Read the MSI config to figure out how many IRQs this device >* wants. Most devices only want 1, which will give > @@ -182,35 +185,6 @@ int arch_setup_msi_irq(struct pci_dev *d > return 0; > } > > -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) > -{ > - struct msi_desc *entry; > - int ret; > - > - /* > - * MSI-X is not supported. > - */ > - if (type == PCI_CAP_ID_MSIX) > - return -EINVAL; > - > - /* > - * If an architecture wants to support multiple MSI, it needs to > - * override arch_setup_msi_irqs() > - */ > - if (type == PCI_CAP_ID_MSI && nvec > 1) > - return 1; > - > - for_each_pci_msi_entry(entry, dev) { > - ret = arch_setup_msi_irq(dev, entry); > - if (ret < 0) > - return ret; > - if (ret > 0) > - return -ENOSPC; > - } > - > - return 0; > -} > - > /** > * Called when a device no longer needs its MSI interrupts. All > * MSI interrupts for the device are freed. Acked-by: Thomas Bogendoerfer -- Crap can work. Given enough thrust pigs will fly, but it's not necessarily a good idea.[ RFC1925, 2.3 ]
Re: [linux-next] Read-only file system after boot (powerpc)
Can you check if your tree already includes this commit: https://git.kernel.dk/cgit/linux-block/commit/?h=for-5.17/block=3f39d47d7ad858c024bd777f5f2a86fa7f6a9f14 and if not see if that fixes the problem?
Re: [patch 00/22] genirq/msi, PCI/MSI: Spring cleaning - Part 1
On 11/27/21 02:18, Thomas Gleixner wrote: The [PCI] MSI code has gained quite some warts over time. A recent discussion unearthed a shortcoming: the lack of support for expanding PCI/MSI-X vectors after initialization of MSI-X. PCI/MSI-X has no requirement to setup all vectors when MSI-X is enabled in the device. The non-used vectors have just to be masked in the vector table. For PCI/MSI this is not possible because the number of vectors cannot be changed after initialization. The PCI/MSI code, but also the core MSI irq domain code are built around the assumption that all required vectors are installed at initialization time and freed when the device is shut down by the driver. Supporting dynamic expansion at least for MSI-X is important for VFIO so that the host side interrupts for passthrough devices can be installed on demand. This is the first part of a large (total 101 patches) series which refactors the [PCI]MSI infrastructure to make runtime expansion of MSI-X vectors possible. The last part (10 patches) provide this functionality. The first part is mostly a cleanup which consolidates code, moves the PCI MSI code into a separate directory and splits it up into several parts. No functional change intended except for patch 2/N which changes the behaviour of pci_get_vector()/affinity() to get rid of the assumption that the provided index is the "index" into the descriptor list instead of using it as the actual MSI[X] index as seen by the hardware. This would break users of sparse allocated MSI-X entries, but non of them use these functions. This series is based on 5.16-rc2 and also available via git: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git msi-v1-part-1 For the curious who can't wait for the next part to arrive the full series is available via: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git msi-v1-part-4 After fixing the compile failures, I didn't see any regressions on these platforms : PowerNV, pSeries under KVM and PowerVM, using POWER8/9 processors. Thanks, C. Thanks, tglx --- arch/powerpc/platforms/4xx/msi.c| 281 b/Documentation/driver-api/pci/pci.rst |2 b/arch/mips/pci/msi-octeon.c| 32 - b/arch/powerpc/platforms/4xx/Makefile |1 b/arch/powerpc/platforms/cell/axon_msi.c|2 b/arch/powerpc/platforms/powernv/pci-ioda.c |4 b/arch/powerpc/platforms/pseries/msi.c |6 b/arch/powerpc/sysdev/Kconfig |6 b/arch/s390/pci/pci_irq.c |4 b/arch/sparc/kernel/pci_msi.c |4 b/arch/x86/hyperv/irqdomain.c | 55 -- b/arch/x86/include/asm/x86_init.h |6 b/arch/x86/include/asm/xen/hypervisor.h |8 b/arch/x86/kernel/apic/msi.c|8 b/arch/x86/kernel/x86_init.c| 12 b/arch/x86/pci/xen.c| 19 b/drivers/irqchip/irq-gic-v2m.c |1 b/drivers/irqchip/irq-gic-v3-its-pci-msi.c |1 b/drivers/irqchip/irq-gic-v3-mbi.c |1 b/drivers/net/wireless/ath/ath11k/pci.c |2 b/drivers/pci/Makefile |3 b/drivers/pci/msi/Makefile |7 b/drivers/pci/msi/irqdomain.c | 267 +++ b/drivers/pci/msi/legacy.c | 79 +++ b/drivers/pci/msi/msi.c | 645 b/drivers/pci/msi/msi.h | 39 + b/drivers/pci/msi/pcidev_msi.c | 43 + b/drivers/pci/pci-sysfs.c |7 b/drivers/pci/xen-pcifront.c|2 b/include/linux/msi.h | 135 ++--- b/include/linux/pci.h |1 b/kernel/irq/msi.c | 41 + 32 files changed, 696 insertions(+), 1028 deletions(-)
Re: [patch 10/22] genirq/msi, treewide: Use a named struct for PCI/MSI attributes
Thomas Gleixner writes: > The unnamed struct sucks and is in the way of further cleanups. Stick the > PCI related MSI data into a real data structure and cleanup all users. > > No functional change. > > Signed-off-by: Thomas Gleixner > Cc: Greg Kroah-Hartman > Cc: sparcli...@vger.kernel.org > Cc: x...@kernel.org > Cc: xen-de...@lists.xenproject.org > Cc: ath...@lists.infradead.org > --- > arch/powerpc/platforms/cell/axon_msi.c|2 > arch/powerpc/platforms/powernv/pci-ioda.c |4 - > arch/powerpc/platforms/pseries/msi.c |6 - > arch/sparc/kernel/pci_msi.c |4 - > arch/x86/kernel/apic/msi.c|2 > arch/x86/pci/xen.c|6 - > drivers/net/wireless/ath/ath11k/pci.c |2 For ath11k: Acked-by: Kalle Valo -- https://patchwork.kernel.org/project/linux-wireless/list/ https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches
Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
> On 28-Nov-2021, at 10:04 PM, Jiri Olsa wrote: > > On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote: >> Sort key p_stage_cyc is used to present the latency >> cycles spend in pipeline stages. perf tool has local >> p_stage_cyc sort key to display this info. There is no >> global variant available for this sort key. local variant >> shows latency in a sinlge sample, whereas, global value >> will be useful to present the total latency (sum of >> latencies) in the hist entry. It represents latency >> number multiplied by the number of samples. >> >> Add global (p_stage_cyc) and local variant >> (local_p_stage_cyc) for this sort key. Use the >> local_p_stage_cyc as default option for "mem" sort mode. >> Also add this to list of dynamic sort keys. >> >> Signed-off-by: Athira Rajeev >> Reported-by: Namhyung Kim > > I can't apply this to Arnaldo's perf/core, could you please rebase? > > patching file util/hist.c > patching file util/hist.h > patching file util/sort.c > Hunk #3 FAILED at 1392. > Hunk #4 succeeded at 1878 (offset 20 lines). > 1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej > patching file util/sort.h > > thanks, > jirka Hi Jiri, Thanks for checking this patch. Actually these changes are on top of three other fixes from Namhyung which are already part of upstream. Below are the commits. 784e8adda4cd ("perf sort: Fix the 'weight' sort key behavior”) 4d03c75363ee ("perf sort: Fix the 'ins_lat' sort key behavior”) db4b28402909 ("perf sort: Fix the 'p_stage_cyc' sort key behavior”) I checked in Arnaldo’s perf/core, but these commits are not there. But I could see them in 'tmp.perf/urgent' I think perf/core is not yet updated. Thanks Athira Rajeev > >> --- >> tools/perf/util/hist.c | 4 +++- >> tools/perf/util/hist.h | 3 ++- >> tools/perf/util/sort.c | 34 +- >> tools/perf/util/sort.h | 3 ++- >> 4 files changed, 32 insertions(+), 12 deletions(-) >> >> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c >> index b776465e04ef..0a8033b09e28 100644 >> --- a/tools/perf/util/hist.c >> +++ b/tools/perf/util/hist.c >> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct >> hist_entry *h) >> hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); >> hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); >> hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); >> -hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); >> +hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); >> +hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); >> + >> if (symbol_conf.nanosecs) >> hists__new_col_len(hists, HISTC_TIME, 16); >> else >> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h >> index 5343b62476e6..2752ce681108 100644 >> --- a/tools/perf/util/hist.h >> +++ b/tools/perf/util/hist.h >> @@ -75,7 +75,8 @@ enum hist_column { >> HISTC_MEM_BLOCKED, >> HISTC_LOCAL_INS_LAT, >> HISTC_GLOBAL_INS_LAT, >> -HISTC_P_STAGE_CYC, >> +HISTC_LOCAL_P_STAGE_CYC, >> +HISTC_GLOBAL_P_STAGE_CYC, >> HISTC_NR_COLS, /* Last entry */ >> }; >> >> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c >> index e9216a292a04..e978f7883e07 100644 >> --- a/tools/perf/util/sort.c >> +++ b/tools/perf/util/sort.c >> @@ -37,7 +37,7 @@ const char default_parent_pattern[] = >> "^sys_|^do_page_fault"; >> const char *parent_pattern = default_parent_pattern; >> const char *default_sort_order = "comm,dso,symbol"; >> const char default_branch_sort_order[] = >> "comm,dso_from,symbol_from,symbol_to,cycles"; >> -const char default_mem_sort_order[] = >> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; >> +const char default_mem_sort_order[] = >> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc"; >> const char default_top_sort_order[] = "dso,symbol"; >> const char default_diff_sort_order[] = "dso,symbol"; >> const char default_tracepoint_sort_order[] = "trace"; >> @@ -46,8 +46,8 @@ const char *field_order; >> regex_t ignore_callees_regex; >> int have_ignore_callees = 0; >> enum sort_mode sort__mode = SORT_MODE__NORMAL; >> -const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; >> -const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; >> +const char *dynamic_headers[] = {"local_ins_lat", "ins_lat", >> "local_p_stage_cyc", "p_stage_cyc"}; >> +const char *arch_specific_sort_keys[] = {"local_p_stage_cyc", >> "p_stage_cyc"}; >> >> /* >> * Replaces all occurrences of a char used with the: >> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = { >> }; >> >> static int64_t >> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry >> *right) >> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) >> { >> return left->p_stage_cyc -
[PATCH] powerpc/rtas: Introduce rtas_get_sensor_nonblocking() for pci hotplug driver.
When certain PHB HW failure causes phyp to recover PHB, it marks the PE state as temporarily unavailable until recovery is complete. This also triggers an EEH handler in Linux which needs to notify drivers, and perform recovery. But before notifying the driver about the pci error it uses get_adapter_state()->get-sesnor-state() operation of the hotplug_slot to determine if the slot contains a device or not. if the slot is empty, the recovery is skipped entirely. However on certain PHB failures, the rtas call get-sesnor-state() returns extended busy error (9902) until PHB is recovered by phyp. Once PHB is recovered, the get-sensor-state() returns success with correct presence status. The rtas call interface rtas_get_sensor() loops over the rtas call on extended delay return code (9902) until the return value is either success (0) or error (-1). This causes the EEH handler to get stuck for ~6 seconds before it could notify that the pci error has been detected and stop any active operations. Hence with running I/O traffic, during this 6 seconds, the network driver continues its operation and hits a timeout (netdev watchdog). On timeouts, network driver go into ffdc capture mode and reset path assuming the PCI device is in fatal condition. This sometimes causes EEH recovery to fail. This impacts the ssh connection and leads to the system being inaccessible. [52732.244731] DEBUG: ibm_read_slot_reset_state2() [52732.244762] DEBUG: ret = 0, rets[0]=5, rets[1]=1, rets[2]=4000, rets[3]=> [52732.244798] DEBUG: in eeh_slot_presence_check [52732.244804] DEBUG: error state check [52732.244807] DEBUG: Is slot hotpluggable [52732.244810] DEBUG: hotpluggable ops ? [52732.244953] DEBUG: Calling ops->get_adapter_status [52732.244958] DEBUG: calling rpaphp_get_sensor_state [52736.564262] [ cut here ] [52736.564299] NETDEV WATCHDOG: enP64p1s0f3 (tg3): transmit queue 0 timed o> [52736.564324] WARNING: CPU: 1442 PID: 0 at net/sched/sch_generic.c:478 dev> [...] [52736.564505] NIP [c0c32368] dev_watchdog+0x438/0x440 [52736.564513] LR [c0c32364] dev_watchdog+0x434/0x440 Fix this issue by introducing a new rtas_get_sensor_nonblocking() that does not get blocked on BUSY condition and returns immediately with error. Use this function in pseries pci hotplug driver which can return an error if slot presence state can not be detected immediately. Please note that only in certain PHB failures, the slot presence check returns BUSY condition. In normal cases it returns immediately with a correct presence state value. Hence this change has no impact on normal pci dlpar operations. We could use rtas_get_sensor_fast() variant, but it thorws WARN_ON on BUSY condition. The rtas_get_sensor_nonblocking() suppresses WARN_ON. Signed-off-by: Mahesh Salgaonkar --- This is an alternate approach to fix the EEH issue instead of delaying slot presence check proposed at https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-November/236956.html Also refer: https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-November/237027.html --- arch/powerpc/include/asm/rtas.h |1 + arch/powerpc/kernel/rtas.c | 19 --- drivers/pci/hotplug/rpaphp_pci.c |8 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9dc97d2f9d27e..d8e8befb1c193 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -250,6 +250,7 @@ extern void rtas_os_term(char *str); void rtas_activate_firmware(void); extern int rtas_get_sensor(int sensor, int index, int *state); extern int rtas_get_sensor_fast(int sensor, int index, int *state); +int rtas_get_sensor_nonblocking(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); extern bool rtas_indicator_present(int token, int *maxindex); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ac61e226c9af6..fd5aa3bbd46c5 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -609,7 +609,8 @@ int rtas_get_sensor(int sensor, int index, int *state) } EXPORT_SYMBOL(rtas_get_sensor); -int rtas_get_sensor_fast(int sensor, int index, int *state) +static int +__rtas_get_sensor(int sensor, int index, int *state, bool warn_on) { int token = rtas_token("get-sensor-state"); int rc; @@ -618,14 +619,26 @@ int rtas_get_sensor_fast(int sensor, int index, int *state) return -ENOENT; rc = rtas_call(token, 2, 2, state, sensor, index); - WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && - rc <= RTAS_EXTENDED_DELAY_MAX)); + WARN_ON(warn_on && + (rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && + rc <= RTAS_EXTENDED_DELAY_MAX)));
Re: [PATCH] powerpc/eeh: Delay slot presence check once driver is notified about the pci error.
On 2021-11-24 23:01:45 Wed, Oliver O'Halloran wrote: > On Wed, Nov 24, 2021 at 12:05 AM Mahesh Salgaonkar > wrote: > > > > *snip* > > > > This causes the EEH handler to get stuck for ~6 > > seconds before it could notify that the pci error has been detected and > > stop any active operations. Hence with running I/O traffic, during this 6 > > seconds, the network driver continues its operation and hits a timeout > > (netdev watchdog).On timeouts, network driver go into ffdc capture mode > > and reset path assuming the PCI device is in fatal condition. This causes > > EEH recovery to fail and sometimes it leads to system hang or crash. > > Whatever is causing that crash is the real issue IMO. PCI error I have seen crash only once but that was triggered by HTX tool and may not be related. However, the major concern here is EEH failure. I will correct the above statement in my next patch. > reporting is fundamentally asynchronous and the driver always has to > tolerate some amount of latency between the error occuring and being > reported. Six seconds is admittedly an eternity, but it should not > cause a system crash under any circumstances. Printing a warning due > to a timeout is annoying, but it's not the end of the world. Yeah, but due to timeout sometimes the driver gets into a situation where when EEH recovery kicks-in, the driver is unable to recover the device. Thus EEH recovery fails and disconnects the pci device even when it could have recovered. To recover, we need to either reboot the lpar or re-assign the I/O adapter from HMC to get it back in working condition. [16532.212197] EEH: PCI-E AER 30: [16532.213207] EEH: Reset without hotplug activity [16534.229469] bnx2x: [bnx2x_clean_tx_queue:1203(enP22p1s0f1)]timeout waiting for queue[2]: txdata->tx_pkt_prod(37003) != txdata->tx_pkt_cons(36996) [16534.385484] EEH: Beginning: 'slot_reset' [16534.385489] PCI 0016:01:00.0#1: EEH: Invoking bnx2x->slot_reset() [16536.229469] bnx2x: [bnx2x_clean_tx_queue:1203(enP22p1s0f1)]timeout waiting for queue[4]: txdata->tx_pkt_prod(64894) != txdata->tx_pkt_cons(64891) o[...] [16623.571502] bnx2x: [bnx2x_nic_load_request:2342(enP22p1s0f1)]MCP response failure, aborting [16623.571507] bnx2x: [bnx2x_acquire_hw_lock:2019(enP22p1s0f1)]lock_status 0x resource_bit 0x800 [16623.571881] bnx2x: [bnx2x_io_slot_reset:14359(enP22p1s0f0)]IO slot reset initializing... [16623.571976] bnx2x 0016:01:00.0: enabling device (0140 -> 0142) [16623.576169] bnx2x: [bnx2x_io_slot_reset:14375(enP22p1s0f0)]IO slot reset --> driver unload [16623.576174] PCI 0016:01:00.0#1: EEH: bnx2x driver reports: 'disconnect' [16623.576177] PCI 0016:01:00.1#1: EEH: Invoking bnx2x->slot_reset() [16623.576179] bnx2x: [bnx2x_io_slot_reset:14359(enP22p1s0f1)]IO slot reset initializing... [16623.576239] bnx2x 0016:01:00.1: enabling device (0140 -> 0142) [16623.580241] bnx2x: [bnx2x_io_slot_reset:14375(enP22p1s0f1)]IO slot reset --> driver unload [16623.580245] PCI 0016:01:00.1#1: EEH: bnx2x driver reports: 'disconnect' [16623.580246] EEH: Finished:'slot_reset' with aggregate recovery state:'disconnect' [16623.580250] EEH: Unable to recover from failure from PHB#16-PE#1. Thanks, -Mahesh. -- Mahesh J Salgaonkar
Re: [patch 17/22] PCI/MSI: Split out !IRQDOMAIN code
On 11/27/21 02:19, Thomas Gleixner wrote: Split out the non irqdomain code into its own file. Signed-off-by: Thomas Gleixner --- drivers/pci/msi/Makefile |5 ++-- drivers/pci/msi/legacy.c | 51 +++ drivers/pci/msi/msi.c| 46 -- 3 files changed, 54 insertions(+), 48 deletions(-) --- a/drivers/pci/msi/Makefile +++ b/drivers/pci/msi/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # # Makefile for the PCI/MSI -obj-$(CONFIG_PCI) += pcidev_msi.o -obj-$(CONFIG_PCI_MSI) += msi.o +obj-$(CONFIG_PCI) += pcidev_msi.o +obj-$(CONFIG_PCI_MSI) += msi.o +obj-$(CONFIG_PCI_MSI_ARCH_FALLBACKS) += legacy.o --- /dev/null +++ b/drivers/pci/msi/legacy.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI Message Signaled Interrupt (MSI). + * + * Legacy architecture specific setup and teardown mechanism. + */ +#include "msi.h" I am getting a : ../drivers/pci/msi/legacy.c:7:10: fatal error: msi.h: No such file or directory 7 | #include "msi.h" which seems to be fixed later. C. + +/* Arch hooks */ +int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ + return -EINVAL; +} + +void __weak arch_teardown_msi_irq(unsigned int irq) +{ +} + +int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + struct msi_desc *desc; + int ret; + + /* +* If an architecture wants to support multiple MSI, it needs to +* override arch_setup_msi_irqs() +*/ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + + for_each_pci_msi_entry(desc, dev) { + ret = arch_setup_msi_irq(dev, desc); + if (ret) + return ret < 0 ? ret : -ENOSPC; + } + + return 0; +} + +void __weak arch_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *desc; + int i; + + for_each_pci_msi_entry(desc, dev) { + if (desc->irq) { + for (i = 0; i < entry->nvec_used; i++) + arch_teardown_msi_irq(desc->irq + i); + } + } +} --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -50,52 +50,6 @@ static void pci_msi_teardown_msi_irqs(st #define pci_msi_teardown_msi_irqs arch_teardown_msi_irqs #endif -#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS -/* Arch hooks */ -int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - return -EINVAL; -} - -void __weak arch_teardown_msi_irq(unsigned int irq) -{ -} - -int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - struct msi_desc *entry; - int ret; - - /* -* If an architecture wants to support multiple MSI, it needs to -* override arch_setup_msi_irqs() -*/ - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - - for_each_pci_msi_entry(entry, dev) { - ret = arch_setup_msi_irq(dev, entry); - if (ret < 0) - return ret; - if (ret > 0) - return -ENOSPC; - } - - return 0; -} - -void __weak arch_teardown_msi_irqs(struct pci_dev *dev) -{ - int i; - struct msi_desc *entry; - - for_each_pci_msi_entry(entry, dev) - if (entry->irq) - for (i = 0; i < entry->nvec_used; i++) - arch_teardown_msi_irq(entry->irq + i); -} -#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */ - /* * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to * mask all MSI interrupts by clearing the MSI enable bit does not work