Re: [PATCH 4/4] powerpc/64s: Use POWER10 stsync barrier for wmb()
Michael Ellerman writes: > Nicholas Piggin writes: >> The most expensive ordering for hwsync to provide is the store-load >> barrier, because all prior stores have to be drained to the caches >> before subsequent instructions can complete. >> >> stsync just orders stores which means it can just be a barrer that >> goes down the store queue and orders draining, and does not prevent >> completion of subsequent instructions. So it should be faster than >> hwsync. >> >> Use stsync for wmb(). Older processors that don't recognise the SC >> field should treat this as hwsync. > > qemu (7.1) emulating ppc64e does not :/ > > mpic: Setting up MPIC " OpenPIC " version 1.2 at fe004, max 1 CPUs > mpic: ISU size: 256, shift: 8, mask: ff > mpic: Initializing for 256 sources > Oops: Exception in kernel mode, sig: 4 [#1] .. > > I guess just put it behind an #ifdef 64S. That doesn't work because qemu emulating a G5 also doesn't accept it. So either we need to get qemu updated and wait a while for that to percolate, or do some runtime patching of wmbs in the kernel >_< cheers
Re: [PATCH] powerpc/xmon: Fix comparing pointer
Le 14/06/2023 à 07:48, wuyonggang...@208suo.com a écrit : > [Vous ne recevez pas souvent de courriers de wuyonggang...@208suo.com. > D?couvrez pourquoi ceci est important ? > https://aka.ms/LearnAboutSenderIdentification ] > > Fix the following coccicheck warning: > > arch/powerpc/xmon/spu-dis.c:51:34-35: WARNING comparing pointer to 0 Once again, why do you change the formating of the document ? > > Signed-off-by: Yonggang Wu > --- > arch/powerpc/xmon/spu-dis.c | 384 ++-- > 1 file changed, 193 insertions(+), 191 deletions(-) > > diff --git a/arch/powerpc/xmon/spu-dis.c b/arch/powerpc/xmon/spu-dis.c > index 4b0a4e640f08..f48a2ddd7440 100644 > --- a/arch/powerpc/xmon/spu-dis.c > +++ b/arch/powerpc/xmon/spu-dis.c > @@ -22,216 +22,218 @@ extern const int spu_num_opcodes; > #define SPU_DISASM_TBL_SIZE (1 << 11) > static const struct spu_opcode > *spu_disassemble_table[SPU_DISASM_TBL_SIZE]; > > -static void > -init_spu_disassemble (void) > +static void init_spu_disassemble(void) > { > - int i; > - > - /* If two instructions have the same opcode then we prefer the first > - * one. In most cases it is just an alternate mnemonic. */ > - for (i = 0; i < spu_num_opcodes; i++) > - { > - int o = spu_opcodes[i].opcode; > - if (o >= SPU_DISASM_TBL_SIZE) > - continue; /* abort (); */ > - if (spu_disassemble_table[o] == 0) > - spu_disassemble_table[o] = _opcodes[i]; > - } > + int i; > + > + /* > + * If two instructions have the same opcode then we prefer the > first > + * one. In most cases it is just an alternate mnemonic. > + */ > + for (i = 0; i < spu_num_opcodes; i++) { > + int o = spu_opcodes[i].opcode; > + > + if (o >= SPU_DISASM_TBL_SIZE) > + continue; /* abort(); */ > + if (spu_disassemble_table[o] == NULL) > + spu_disassemble_table[o] = _opcodes[i]; > + } > } > > /* Determine the instruction from the 10 least significant bits. */ > -static const struct spu_opcode * > -get_index_for_opcode (unsigned int insn) > +static const struct spu_opcode *get_index_for_opcode(unsigned int insn) > { > - const struct spu_opcode *index; > - unsigned int opcode = insn >> (32-11); > - > - /* Init the table. This assumes that element 0/opcode 0 (currently > - * NOP) is always used */ > - if (spu_disassemble_table[0] == 0) > - init_spu_disassemble (); > - > - if ((index = spu_disassemble_table[opcode & 0x780]) != 0 > - && index->insn_type == RRR) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7f0]) != 0 > - && (index->insn_type == RI18 || index->insn_type == LBT)) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7f8]) != 0 > - && index->insn_type == RI10) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7fc]) != 0 > - && (index->insn_type == RI16)) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7fe]) != 0 > - && (index->insn_type == RI8)) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7ff]) != 0) > - return index; > - > - return NULL; > + const struct spu_opcode *index; > + unsigned int opcode = insn >> (32-11); > + > + /* > + * Init the table. This assumes that element 0/opcode 0 (currently > + * NOP) is always used > + */ > + if (spu_disassemble_table[0] == NULL) > + init_spu_disassemble(); > + > + index = spu_disassemble_table[opcode & 0x780]; > + if (index != NULL && index->insn_type == RRR) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7f0]; > + if (index != NULL > + && (index->insn_type == RI18 || index->insn_type == LBT)) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7f8]; > + if (index != NULL > + && index->insn_type == RI10) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7fc] > + if (index != NULL && (index->insn_type == RI16)) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7fe]; > + if (index != NULL && (index->insn_type == RI8)) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7ff]; > + if (index != NULL) > + return index; > + > + return NULL; > } > > /* Print a Spu instruction. */ > > -int > -print_insn_spu (unsigned long insn, unsigned long memaddr) > +int print_insn_spu(unsigned long insn, unsigned long memaddr) > { > - int value; > - int hex_value; > - const struct spu_opcode *index; > - enum spu_insns tag; > + int value; > + int hex_value; > + const struct spu_opcode *index; > + enum spu_insns tag; > > - index = get_index_for_opcode (insn); > + index = get_index_for_opcode(insn); > > - if (index == 0) > - { > - printf(".long 0x%lx", insn); > - } > - else > - { > - int i; > - int paren = 0; > - tag =
[PATCH] powerpc/xmon: Fix comparing pointer
Fix the following coccicheck warning: arch/powerpc/xmon/spu-dis.c:51:34-35: WARNING comparing pointer to 0 Signed-off-by: Yonggang Wu --- arch/powerpc/xmon/spu-dis.c | 384 ++-- 1 file changed, 193 insertions(+), 191 deletions(-) diff --git a/arch/powerpc/xmon/spu-dis.c b/arch/powerpc/xmon/spu-dis.c index 4b0a4e640f08..f48a2ddd7440 100644 --- a/arch/powerpc/xmon/spu-dis.c +++ b/arch/powerpc/xmon/spu-dis.c @@ -22,216 +22,218 @@ extern const int spu_num_opcodes; #define SPU_DISASM_TBL_SIZE (1 << 11) static const struct spu_opcode *spu_disassemble_table[SPU_DISASM_TBL_SIZE]; -static void -init_spu_disassemble (void) +static void init_spu_disassemble(void) { - int i; - - /* If two instructions have the same opcode then we prefer the first - * one. In most cases it is just an alternate mnemonic. */ - for (i = 0; i < spu_num_opcodes; i++) -{ - int o = spu_opcodes[i].opcode; - if (o >= SPU_DISASM_TBL_SIZE) -continue; /* abort (); */ - if (spu_disassemble_table[o] == 0) -spu_disassemble_table[o] = _opcodes[i]; -} +int i; + +/* + * If two instructions have the same opcode then we prefer the first + * one. In most cases it is just an alternate mnemonic. + */ +for (i = 0; i < spu_num_opcodes; i++) { +int o = spu_opcodes[i].opcode; + +if (o >= SPU_DISASM_TBL_SIZE) +continue; /* abort(); */ +if (spu_disassemble_table[o] == NULL) +spu_disassemble_table[o] = _opcodes[i]; +} } /* Determine the instruction from the 10 least significant bits. */ -static const struct spu_opcode * -get_index_for_opcode (unsigned int insn) +static const struct spu_opcode *get_index_for_opcode(unsigned int insn) { - const struct spu_opcode *index; - unsigned int opcode = insn >> (32-11); - - /* Init the table. This assumes that element 0/opcode 0 (currently - * NOP) is always used */ - if (spu_disassemble_table[0] == 0) -init_spu_disassemble (); - - if ((index = spu_disassemble_table[opcode & 0x780]) != 0 - && index->insn_type == RRR) -return index; - - if ((index = spu_disassemble_table[opcode & 0x7f0]) != 0 - && (index->insn_type == RI18 || index->insn_type == LBT)) -return index; - - if ((index = spu_disassemble_table[opcode & 0x7f8]) != 0 - && index->insn_type == RI10) -return index; - - if ((index = spu_disassemble_table[opcode & 0x7fc]) != 0 - && (index->insn_type == RI16)) -return index; - - if ((index = spu_disassemble_table[opcode & 0x7fe]) != 0 - && (index->insn_type == RI8)) -return index; - - if ((index = spu_disassemble_table[opcode & 0x7ff]) != 0) -return index; - - return NULL; +const struct spu_opcode *index; +unsigned int opcode = insn >> (32-11); + +/* + * Init the table. This assumes that element 0/opcode 0 (currently + * NOP) is always used + */ +if (spu_disassemble_table[0] == NULL) +init_spu_disassemble(); + +index = spu_disassemble_table[opcode & 0x780]; +if (index != NULL && index->insn_type == RRR) +return index; + +index = spu_disassemble_table[opcode & 0x7f0]; +if (index != NULL + && (index->insn_type == RI18 || index->insn_type == LBT)) +return index; + +index = spu_disassemble_table[opcode & 0x7f8]; +if (index != NULL + && index->insn_type == RI10) +return index; + +index = spu_disassemble_table[opcode & 0x7fc] +if (index != NULL && (index->insn_type == RI16)) +return index; + +index = spu_disassemble_table[opcode & 0x7fe]; +if (index != NULL && (index->insn_type == RI8)) +return index; + +index = spu_disassemble_table[opcode & 0x7ff]; +if (index != NULL) +return index; + +return NULL; } /* Print a Spu instruction. */ -int -print_insn_spu (unsigned long insn, unsigned long memaddr) +int print_insn_spu(unsigned long insn, unsigned long memaddr) { - int value; - int hex_value; - const struct spu_opcode *index; - enum spu_insns tag; +int value; +int hex_value; +const struct spu_opcode *index; +enum spu_insns tag; - index = get_index_for_opcode (insn); +index = get_index_for_opcode(insn); - if (index == 0) -{ - printf(".long 0x%lx", insn); -} - else -{ - int i; - int paren = 0; - tag = (enum spu_insns)(index - spu_opcodes); - printf("%s", index->mnemonic); - if (tag == M_BI || tag == M_BISL || tag == M_IRET || tag == M_BISLED - || tag == M_BIHNZ || tag == M_BIHZ || tag == M_BINZ || tag == M_BIZ - || tag == M_SYNC || tag == M_HBR) +if (index == NULL) { - int fb = (insn >> (32-18)) & 0x7f; - if (fb & 0x40) -printf(tag == M_SYNC ? "c" : "p"); - if (fb & 0x20) -printf("d"); - if (fb & 0x10) -printf("e"); -} - if (index->arg[0] != 0) -printf("\t"); - hex_value = 0; - for (i = 1; i <=
Re: [PATCH 2/4] powerpc/64s: Add POWER10 store sync mnemonics
On Tue Jun 13, 2023 at 3:31 PM AEST, Joel Stanley wrote: > On Fri, 9 Jun 2023 at 10:01, Nicholas Piggin wrote: > > > > ISA v3.1 introduces new sync types for store ordering. > > > > stncisync > > stcisync > > stsync > > > > Add ppc-opcode defines for these. This changes PPC_RAW_SYNC to take > > L,SC parameters and adds a PPC_RAW_HWSYNC for callers that want the > > plain old sync (aka hwsync). > > I checked these against the ISA and they seem correct. > > Did you consider changing LWSYNC to be defined in terms of your new > PPC_RAW_SYNC? Oh I haven't but it would be consistent to change that wouldn't it? > > Reviewed-by: Joel Stanley . Thanks, Nick
Re: [PATCH 00/16] Add support for DAX vmemmap optimization for ppc64
"Aneesh Kumar K.V" writes: > This patch series implements changes required to support DAX vmemmap > optimization for ppc64. The vmemmap optimization is only enabled with radix > MMU > translation and 1GB PUD mapping with 64K page size. The patch series also > split > hugetlb vmemmap optimization as a separate Kconfig variable so that > architectures can enable DAX vmemmap optimization without enabling hugetlb > vmemmap optimization. This should enable architectures like arm64 to enable > DAX > vmemmap optimization while they can't enable hugetlb vmemmap optimization. > More > details of the same are in patch "mm/vmemmap optimization: Split hugetlb and > d > > Aneesh Kumar K.V (16): > powerpc/mm/book3s64: Use pmdp_ptep helper instead of typecasting. > powerpc/book3s64/mm: mmu_vmemmap_psize is used by radix > powerpc/book3s64/mm: Fix DirectMap stats in /proc/meminfo > powerpc/book3s64/mm: Use PAGE_KERNEL instead of opencoding > powerpc/mm/dax: Fix the condition when checking if altmap vmemap can > cross-boundary > mm/hugepage pud: Allow arch-specific helper function to check huge > page pud support > mm: Change pudp_huge_get_and_clear_full take vm_area_struct as arg > mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to > override > mm/vmemmap: Allow architectures to override how vmemmap optimization > works > mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME > mm/huge pud: Use transparent huge pud helpers only with > CONFIG_TRANSPARENT_HUGEPAGE > mm/vmemmap optimization: Split hugetlb and devdax vmemmap optimization > powerpc/book3s64/mm: Enable transparent pud hugepage > powerpc/book3s64/vmemmap: Switch radix to use a different vmemmap > handling function > powerpc/book3s64/radix: Add support for vmemmap optimization for radix > powerpc/book3s64/radix: Remove mmu_vmemmap_psize Gentle ping. Any objections for this series? -aneesh
Re: [PATCH] KVM: PPC: remove unneeded variable
On Wed, Jun 14, 2023 at 10:34:45AM +0800, baomingtong...@208suo.com wrote: > fix the following coccicheck warning: > > arch/powerpc/kvm/book3s_pr.c:424:5-6: Unneeded variable: "r". > > Signed-off-by: Mingtong Bao > --- > arch/powerpc/kvm/book3s_pr.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c > index 9118242063fb..d03b31b240d7 100644 > --- a/arch/powerpc/kvm/book3s_pr.c > +++ b/arch/powerpc/kvm/book3s_pr.c > @@ -421,14 +421,14 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) > > static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) > { > -int r = 1; /* Indicate we want to get back into the guest */ > +/* Indicate we want to get back into the guest */ Instead of leaving the comment here, where it makes no sense, please move it to the return statement below. > > /* We misuse TLB_FLUSH to indicate that we want to clear > all shadow cache entries */ > if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) > kvmppc_mmu_pte_flush(vcpu, 0, 0); > > -return r; > +return 1; > } > > /* MMU Notifiers */ Paul.
[PATCH] KVM: PPC: remove unneeded variable
fix the following coccicheck warning: arch/powerpc/kvm/book3s_pr.c:424:5-6: Unneeded variable: "r". Signed-off-by: Mingtong Bao --- arch/powerpc/kvm/book3s_pr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9118242063fb..d03b31b240d7 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -421,14 +421,14 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) { -int r = 1; /* Indicate we want to get back into the guest */ +/* Indicate we want to get back into the guest */ /* We misuse TLB_FLUSH to indicate that we want to clear all shadow cache entries */ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvmppc_mmu_pte_flush(vcpu, 0, 0); -return r; +return 1; } /* MMU Notifiers */
Re: [PATCH 01/17] perf: get rid of unused import
On Tue, Jun 13, 2023 at 11:34:11PM -0300, Arnaldo Carvalho de Melo wrote: [...] > > Since have applied this patch, it's no need to give my review tag :) > > No, I usually can add a Reviewed-by tag even after having applied it to > my local tree, as I still need to run tests before making it available > via perf-tools-next, when I should not make any further changes. Understand now. Here is my review tag: Reviewed-by: Leo Yan
Re: [PATCH 01/17] perf: get rid of unused import
Em Wed, Jun 14, 2023 at 09:59:14AM +0800, Leo Yan escreveu: > On Tue, Jun 13, 2023 at 04:54:08PM -0300, Arnaldo Carvalho de Melo wrote: > > Em Tue, Jun 13, 2023 at 10:11:29PM +0530, Athira Rajeev escreveu: > > > From: Sourabh Jain > > > > > > Script doesn't use sys library, so remove it. > > > > Please Cc the persons working on that file, I added Leo to the CC list > > of this message. > > Thanks, Arnaldo & Athira. The change looks good to me. > > > Thanks, applied. > > Since have applied this patch, it's no need to give my review tag :) No, I usually can add a Reviewed-by tag even after having applied it to my local tree, as I still need to run tests before making it available via perf-tools-next, when I should not make any further changes. - Arnaldo > Thanks, > Leo > > > - Arnaldo > > > > > Report by pylint: > > > W0611: Unused import sys (unused-import) > > > > > > Signed-off-by: Athira Rajeev > > > Signed-off-by: Kajol Jain > > > Signed-off-by: Sourabh Jain > > > --- > > > tools/perf/scripts/python/arm-cs-trace-disasm.py | 1 - > > > 1 file changed, 1 deletion(-) > > > > > > diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py > > > b/tools/perf/scripts/python/arm-cs-trace-disasm.py > > > index 4339692a8d0b..d59ff53f1d94 100755 > > > --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py > > > +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py > > > @@ -9,7 +9,6 @@ > > > from __future__ import print_function > > > import os > > > from os import path > > > -import sys > > > import re > > > from subprocess import * > > > from optparse import OptionParser, make_option > > > -- > > > 2.39.1 > > > > > > > -- > > > > - Arnaldo -- - Arnaldo
Re: [PATCH 06/17] tools/perf/tests: Fix shellcheck warnings for trace+probe_vfs_getname.sh
Em Tue, Jun 13, 2023 at 10:11:34PM +0530, Athira Rajeev escreveu: > From: Akanksha J N > > Fix the shellcheck warnings on powerpc and x86 for testcase > trace+probe_vfs_getname.sh. Add quotes to prevent word splitting > which are caused by unquoted command expansions. > > Before fix: > > $ shellcheck -S warning trace+probe_vfs_getname.sh > > In trace+probe_vfs_getname.sh line 13: > . $(dirname $0)/lib/probe.sh > ^---^ SC2046 (warning): Quote this to prevent word splitting. > > In trace+probe_vfs_getname.sh line 18: > . $(dirname $0)/lib/probe_vfs_getname.sh > ^---^ SC2046 (warning): Quote this to prevent word splitting. > > In trace+probe_vfs_getname.sh line 21: > evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | > grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ > /,/') > ^-- SC2046 (warning): Quote this to prevent word > splitting. > > 100: Check open filename arg using perf trace + vfs_getname : > Ok > > After the fix: > > $ shellcheck -S warning trace+probe_vfs_getname.sh > > 100: Check open filename arg using perf trace + vfs_getname : > Ok So, I tried this on x86_64, fedora and get: [root@quaco ~]# perf test "trace + vfs" 115: Check open filename arg using perf trace + vfs_getname : FAILED! [root@quaco ~]# Then, looking at the change: > - evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E > 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') > + evts=$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E > 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed 's/ /,/') So, before: [root@quaco ~]# evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') [root@quaco ~]# echo $evts open,openat [root@quaco ~]# Then after: [root@quaco ~]# evts=$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed 's/ /,/') [root@quaco ~]# echo $evts open openat [root@quaco ~]# Not equivalent, so I'm removing this patch, please check and resubmit, please. - Arnaldo > > Signed-off-by: Athira Rajeev > Signed-off-by: Kajol Jain > Signed-off-by: Akanksha J N > Signed-off-by: Abhishek Singh Tomar > Signed-off-by: Saket > Signed-off-by: Avnish Chouhan > --- > tools/perf/tests/shell/trace+probe_vfs_getname.sh | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh > b/tools/perf/tests/shell/trace+probe_vfs_getname.sh > index 0a4bac3dd77e..935eac7efa47 100755 > --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh > +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh > @@ -10,15 +10,15 @@ > # SPDX-License-Identifier: GPL-2.0 > # Arnaldo Carvalho de Melo , 2017 > > -. $(dirname $0)/lib/probe.sh > +. "$(dirname $0)"/lib/probe.sh > > skip_if_no_perf_probe || exit 2 > skip_if_no_perf_trace || exit 2 > > -. $(dirname $0)/lib/probe_vfs_getname.sh > +. "$(dirname $0)"/lib/probe_vfs_getname.sh > > trace_open_vfs_getname() { > - evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E > 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') > + evts=$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E > 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed 's/ /,/') > perf trace -e $evts touch $file 2>&1 | \ > grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ > open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: > CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" > } > -- > 2.39.1 > -- - Arnaldo
[powerpc:fixes] BUILD SUCCESS dfaed3e1fa7099de8de4e89cbe7eb9c1bca27dfe
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes branch HEAD: dfaed3e1fa7099de8de4e89cbe7eb9c1bca27dfe powerpc/64s/radix: Fix exit lazy tlb mm switch with irqs enabled elapsed time: 728m configs tested: 5 configs skipped: 112 The following configs have been built successfully. More configs may be tested in the coming days. tested configs: powerpc allmodconfig gcc powerpc allnoconfig gcc powerpc buildonly-randconfig-r001-20230612 gcc powerpc buildonly-randconfig-r004-20230612 gcc powerpc buildonly-randconfig-r005-20230612 gcc -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Re: [PATCH v1 01/21] kexec: consolidate kexec and crash options into kernel/Kconfig.kexec
On 2023/6/13 1:27, Eric DeVolder wrote: > The config options for kexec and crash features are consolidated > into new file kernel/Kconfig.kexec. Under the "General Setup" submenu > is a new submenu "Kexec and crash handling" where all the kexec and > crash options that were once in the arch-dependent submenu "Processor > type and features" are now consolidated. > > The following options are impacted: > > - KEXEC > - KEXEC_FILE > - KEXEC_SIG > - KEXEC_SIG_FORCE > - KEXEC_BZIMAGE_VERIFY_SIG > - KEXEC_JUMP > - CRASH_DUMP > > The three main options are KEXEC, KEXEC_FILE and CRASH_DUMP. > > Architectures specify support of certain KEXEC and CRASH features with > similarly named new ARCH_HAS_ config options. > > Architectures can utilize the new ARCH_SUPPORTS_ config > options to specify additional components when is enabled. > > To summarize, the ARCH_HAS_ permits the to be > enabled, and the ARCH_SUPPORTS_ handles side effects (ie. > select statements). > > Signed-off-by: Eric DeVolder > --- > arch/Kconfig | 13 -- > init/Kconfig | 2 + > kernel/Kconfig.kexec | 103 +++ > 3 files changed, 105 insertions(+), 13 deletions(-) > create mode 100644 kernel/Kconfig.kexec > > diff --git a/arch/Kconfig b/arch/Kconfig > index 205fd23e0cad..a37730679730 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -11,19 +11,6 @@ source "arch/$(SRCARCH)/Kconfig" > > menu "General architecture-dependent options" > > -config CRASH_CORE > - bool > - > -config KEXEC_CORE > - select CRASH_CORE > - bool > - > -config KEXEC_ELF > - bool > - > -config HAVE_IMA_KEXEC > - bool > - > config ARCH_HAS_SUBPAGE_FAULTS > bool > help > diff --git a/init/Kconfig b/init/Kconfig > index 32c24950c4ce..4424447e23a5 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -1917,6 +1917,8 @@ config BINDGEN_VERSION_TEXT > config TRACEPOINTS > bool > > +source "kernel/Kconfig.kexec" > + > endmenu # General setup > > source "arch/Kconfig" > diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec > new file mode 100644 > index ..660048099865 > --- /dev/null > +++ b/kernel/Kconfig.kexec > @@ -0,0 +1,103 @@ > +# SPDX-License-Identifier: GPL-2.0-only > + > +menu "Kexec and crash features" > + > +config CRASH_CORE > + bool > + > +config KEXEC_CORE > + select CRASH_CORE > + bool > + > +config KEXEC_ELF > + bool > + > +config HAVE_IMA_KEXEC > + bool > + > +config KEXEC > + bool "Enable kexec system call" > + default ARCH_DEFAULT_KEXEC > + depends on ARCH_HAS_KEXEC > + select KEXEC_CORE > + help > + kexec is a system call that implements the ability to shutdown your > + current kernel, and to start another kernel. It is like a reboot > + but it is independent of the system firmware. And like a reboot > + you can start any kernel with it, not just Linux. "kernel. It is like", "firmware. And like" A few more spaces, I don't know the original author's intention, perhaps can be removed. > + > + The name comes from the similarity to the exec system call. > + > + It is an ongoing process to be certain the hardware in a machine > + is properly shutdown, so do not be surprised if this code does not > + initially work for you. As of this writing the exact hardware > + interface is strongly in flux, so no good recommendation can be > + made. > + > +config KEXEC_FILE > + bool "Enable kexec file based system call" > + depends on ARCH_HAS_KEXEC_FILE > + select KEXEC_CORE > + help > + This is new version of kexec system call. This system call is > + file based and takes file descriptors as system call argument > + for kernel and initramfs as opposed to list of segments as > + accepted by previous system call. > + > +config KEXEC_SIG > + bool "Verify kernel signature during kexec_file_load() syscall" > + depends on KEXEC_FILE && MODULE_SIG_FORMAT I see that there is no "depends on MODULE_SIG_FORMAT" on x86 and arm64. > + help > + This blank line can be deleted. > + This option makes the kexec_file_load() syscall check for a valid > + signature of the kernel image. The image can still be loaded without > + a valid signature unless you also enable KEXEC_SIG_FORCE, though if > + there's a signature that we can check, then it must be valid. > + > + In addition to this option, you need to enable signature > + verification for the corresponding kernel image type being > + loaded in order for this to work. > + > +config KEXEC_SIG_FORCE > + bool "Require a valid signature in kexec_file_load() syscall" > + depends on KEXEC_SIG > + help > + This option makes kernel signature verification mandatory for > + the kexec_file_load() syscall. > + > +config KEXEC_BZIMAGE_VERIFY_SIG > + bool "Enable
Re: [PATCH v1 05/21] arm64/kexec: refactor for kernel/Kconfig.kexec
On 2023/6/13 1:27, Eric DeVolder wrote: > The kexec and crash kernel options are provided in the common > kernel/Kconfig.kexec. Utilize the common options and provide > the ARCH_HAS_ and ARCH_SUPPORTS_ entries to recreate the > equivalent set of KEXEC and CRASH options. > > Signed-off-by: Eric DeVolder > --- > arch/arm64/Kconfig | 61 -- > 1 file changed, 10 insertions(+), 51 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 343e1e1cae10..33552476a877 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -1433,60 +1433,19 @@ config PARAVIRT_TIME_ACCOUNTING > > If in doubt, say N here. > > -config KEXEC > - depends on PM_SLEEP_SMP > - select KEXEC_CORE > - bool "kexec system call" > - help > - kexec is a system call that implements the ability to shutdown your > - current kernel, and to start another kernel. It is like a reboot > - but it is independent of the system firmware. And like a reboot > - you can start any kernel with it, not just Linux. > - > -config KEXEC_FILE > - bool "kexec file based system call" > - select KEXEC_CORE > - select HAVE_IMA_KEXEC if IMA > - help > - This is new version of kexec system call. This system call is > - file based and takes file descriptors as system call argument > - for kernel and initramfs as opposed to list of segments as > - accepted by previous system call. > - > -config KEXEC_SIG > - bool "Verify kernel signature during kexec_file_load() syscall" > - depends on KEXEC_FILE > - help > - Select this option to verify a signature with loaded kernel > - image. If configured, any attempt of loading a image without > - valid signature will fail. > - > - In addition to that option, you need to enable signature > - verification for the corresponding kernel image type being > - loaded in order for this to work. > +config ARCH_HAS_KEXEC > + def_bool PM_SLEEP_SMP > > -config KEXEC_IMAGE_VERIFY_SIG > - bool "Enable Image signature verification support" > - default y > - depends on KEXEC_SIG > - depends on EFI && SIGNED_PE_FILE_VERIFICATION > - help > - Enable Image signature verification support. I don't see an alternative to this option. It's used in arch/arm64/kernel/kexec_image.c:135 > - > -comment "Support for PE file signature verification disabled" > - depends on KEXEC_SIG > - depends on !EFI || !SIGNED_PE_FILE_VERIFICATION > +config ARCH_HAS_KEXEC_FILE > + def_bool y > > -config CRASH_DUMP > - bool "Build kdump crash kernel" > - help > - Generate crash dump after being started by kexec. This should > - be normally only set in special crash dump kernels which are > - loaded in the main kernel with kexec-tools into a specially > - reserved region and then later executed after a crash by > - kdump/kexec. > +config ARCH_SUPPORTS_KEXEC_FILE > + def_bool y > + depends on KEXEC_FILE > + select HAVE_IMA_KEXEC if IMA > > - For more details see Documentation/admin-guide/kdump/kdump.rst > +config ARCH_HAS_CRASH_DUMP > + def_bool y > > config TRANS_TABLE > def_bool y > -- Regards, Zhen Lei
Re: [PATCH 01/17] perf: get rid of unused import
On Tue, Jun 13, 2023 at 04:54:08PM -0300, Arnaldo Carvalho de Melo wrote: > Em Tue, Jun 13, 2023 at 10:11:29PM +0530, Athira Rajeev escreveu: > > From: Sourabh Jain > > > > Script doesn't use sys library, so remove it. > > Please Cc the persons working on that file, I added Leo to the CC list > of this message. Thanks, Arnaldo & Athira. The change looks good to me. > Thanks, applied. Since have applied this patch, it's no need to give my review tag :) Thanks, Leo > - Arnaldo > > > Report by pylint: > > W0611: Unused import sys (unused-import) > > > > Signed-off-by: Athira Rajeev > > Signed-off-by: Kajol Jain > > Signed-off-by: Sourabh Jain > > --- > > tools/perf/scripts/python/arm-cs-trace-disasm.py | 1 - > > 1 file changed, 1 deletion(-) > > > > diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py > > b/tools/perf/scripts/python/arm-cs-trace-disasm.py > > index 4339692a8d0b..d59ff53f1d94 100755 > > --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py > > +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py > > @@ -9,7 +9,6 @@ > > from __future__ import print_function > > import os > > from os import path > > -import sys > > import re > > from subprocess import * > > from optparse import OptionParser, make_option > > -- > > 2.39.1 > > > > -- > > - Arnaldo
Re: [kvm-unit-tests v4 00/12] powerpc: updates, P10, PNV support
On Thu, 8 Jun 2023 at 07:58, Nicholas Piggin wrote: > > Posting again, a couple of patches were merged and accounted for review > comments from last time. I saw some failures in the spr tests running on a power9 powernv system: $ TESTNAME=sprs TIMEOUT=90s ACCEL= ./powerpc/run powerpc/sprs.elf -smp 1 |grep FAIL FAIL: WORT ( 895):0xc0deba80 <==> 0x $ MIGRATION=yes TESTNAME=sprs-migration TIMEOUT=90s ACCEL= ./powerpc/run powerpc/sprs.elf -smp 1 -append '-w' | grep FAIL FAIL: SRR0 ( 26):0xcafefacec0debabc <==> 0x00402244 FAIL: SRR1 ( 27):0xc006409ebab6 <==> 0x80001001 FAIL: CTRL ( 136):0x <==> 0x8001 FAIL: WORT ( 895):0xc0deba80 <==> 0x FAIL: PIR (1023):0x0010 <==> 0x0049 Linux 6.2.0-20-generic QEMU emulator version 7.2.0 (Debian 1:7.2+dfsg-5ubuntu2) On a power8 powernv: MIGRATION=yes TESTNAME=sprs-migration TIMEOUT=90s ACCEL= ./powerpc/run powerpc/sprs.elf -smp 1 -append '-w' |grep FAIL FAIL: SRR0 ( 26):0xcafefacec0debabc <==> 0x00402234 FAIL: SRR1 ( 27):0xc006409ebab6 <==> 0x80001000 FAIL: CTRL ( 136):0x <==> 0x8001 FAIL: PIR (1023):0x0060 <==> 0x0030 Linux 5.4.0-146-generic QEMU emulator version 4.2.1 (Debian 1:4.2-3ubuntu6.26) Cheers, Joel > > Thanks, > Nick > > Nicholas Piggin (12): > powerpc: Report instruction address and MSR in unhandled exception > error > powerpc: Add some checking to exception handler install > powerpc: Abstract H_CEDE calls into a sleep functions > powerpc: Add ISA v3.1 (POWER10) support to SPR test > powerpc: Extract some common helpers and defines to headers > powerpc/sprs: Specify SPRs with data rather than code > powerpc/spapr_vpa: Add basic VPA tests > powerpc: Expand exception handler vector granularity > powerpc: Add support for more interrupts including HV interrupts > powerpc: Discover runtime load address dynamically > powerpc: Support powernv machine with QEMU TCG > powerpc/sprs: Test hypervisor registers on powernv machine > > lib/powerpc/asm/handlers.h | 2 +- > lib/powerpc/asm/hcall.h | 1 + > lib/powerpc/asm/ppc_asm.h | 9 + > lib/powerpc/asm/processor.h | 55 ++- > lib/powerpc/handlers.c | 10 +- > lib/powerpc/hcall.c | 4 +- > lib/powerpc/io.c| 27 +- > lib/powerpc/io.h| 6 + > lib/powerpc/processor.c | 79 - > lib/powerpc/setup.c | 8 +- > lib/ppc64/asm/opal.h| 15 + > lib/ppc64/asm/vpa.h | 62 > lib/ppc64/opal-calls.S | 46 +++ > lib/ppc64/opal.c| 74 + > powerpc/Makefile.ppc64 | 4 +- > powerpc/cstart64.S | 105 -- > powerpc/run | 35 +- > powerpc/spapr_hcall.c | 9 +- > powerpc/spapr_vpa.c | 172 ++ > powerpc/sprs.c | 645 ++-- > powerpc/tm.c| 20 +- > powerpc/unittests.cfg | 3 + > 22 files changed, 1133 insertions(+), 258 deletions(-) > create mode 100644 lib/ppc64/asm/opal.h > create mode 100644 lib/ppc64/asm/vpa.h > create mode 100644 lib/ppc64/opal-calls.S > create mode 100644 lib/ppc64/opal.c > create mode 100644 powerpc/spapr_vpa.c > > -- > 2.40.1 >
Re: [PATCH v4 27/34] nios2: Convert __pte_free_tlb() to use ptdescs
On 6/12/23 16:04, Vishal Moola (Oracle) wrote: Part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents. Signed-off-by: Vishal Moola (Oracle) --- arch/nios2/include/asm/pgalloc.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index ecd1657bb2ce..ce6bb8e74271 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -28,10 +28,10 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, extern pgd_t *pgd_alloc(struct mm_struct *mm); -#define __pte_free_tlb(tlb, pte, addr)\ - do {\ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb), (pte)); \ +#define __pte_free_tlb(tlb, pte, addr) \ + do {\ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) #endif /* _ASM_NIOS2_PGALLOC_H */ Applied! Thanks, Dinh
Re: [PATCH 01/13] nios2: define virtual address space for modules
On 6/1/23 05:12, Mike Rapoport wrote: From: "Mike Rapoport (IBM)" nios2 uses kmalloc() to implement module_alloc() because CALL26/PCREL26 cannot reach all of vmalloc address space. Define module space as 32MiB below the kernel base and switch nios2 to use vmalloc for module allocations. Suggested-by: Thomas Gleixner Signed-off-by: Mike Rapoport (IBM) --- arch/nios2/include/asm/pgtable.h | 5 - arch/nios2/kernel/module.c | 19 --- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 0f5c2564e9f5..0073b289c6a4 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -25,7 +25,10 @@ #include #define VMALLOC_START CONFIG_NIOS2_KERNEL_MMU_REGION_BASE -#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1) +#define VMALLOC_END(CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M - 1) + +#define MODULES_VADDR (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M) +#define MODULES_END(CONFIG_NIOS2_KERNEL_REGION_BASE - 1) struct mm_struct; diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c index 76e0a42d6e36..9c97b7513853 100644 --- a/arch/nios2/kernel/module.c +++ b/arch/nios2/kernel/module.c @@ -21,23 +21,12 @@ #include -/* - * Modules should NOT be allocated with kmalloc for (obvious) reasons. - * But we do it for now to avoid relocation issues. CALL26/PCREL26 cannot reach - * from 0x8000 (vmalloc area) to 0xc (kernel) (kmalloc returns - * addresses in 0xc000) - */ void *module_alloc(unsigned long size) { - if (size == 0) - return NULL; - return kmalloc(size, GFP_KERNEL); -} - -/* Free memory returned from module_alloc */ -void module_memfree(void *module_region) -{ - kfree(module_region); + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); } int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, Acked-by: Dinh Nguyen
Re: [PATCH 16/17] perf tests task_analyzer: print command on failure
* Arnaldo Carvalho de Melo | 2023-06-13 17:07:52 [-0300]: >> From: Aditya Gupta >> >> Instead of printing "perf command failed" everytime, print the exact >> command that run earlier > >Looks like a nice improvement, added the test authors to the CC list on >this message, Thank you Aditya and really great catch - thank you very much! Shell scripting can be nasty. This is another example where you can see that with one stupid typo the whole system can be undermined and you don't get a single hint. Acked-by: Hagen Paul Pfeifer
Re: [PATCH v1 00/21] refactor Kconfig to consolidate KEXEC and CRASH options
On Mon, Jun 12, 2023 at 01:27:52PM -0400, Eric DeVolder wrote: > The Kconfig is refactored to consolidate KEXEC and CRASH options from > various arch//Kconfig files into new file kernel/Kconfig.kexec. This looks very nice! > [...] > - The boolean ARCH_HAS_ in effect allows the arch to determine > when the feature is allowed. Archs which don't have the feature > simply do not provide the corresponding ARCH_HAS_. > For each arch, where there previously were KEXEC and/or CRASH > options, these have been replaced with the corresponding boolean > ARCH_HAS_, and an appropriate def_bool statement. > > For example, if the arch supports KEXEC_FILE, then the > ARCH_HAS_KEXEC_FILE simply has a 'def_bool y'. This permits the > KEXEC_FILE option to be available. > > If the arch has a 'depends on' statement in its original coding > of the option, then that expression becomes part of the def_bool > expression. For example, arm64 had: > > config KEXEC > depends on PM_SLEEP_SMP > > and in this solution, this converts to: > > config ARCH_HAS_KEXEC > def_bool PM_SLEEP_SMP > > > - In order to account for the differences in the config coding for > the three common options, the ARCH_SUPPORTS_ is used. > This options has a 'depends on ' statement to couple it > to the main option, and from there can insert the differences > from the common option and the arch original coding of that option. > > For example, a few archs enable CRYPTO and CRYTPO_SHA256 for > KEXEC_FILE. These require a ARCH_SUPPORTS_KEXEC_FILE and > 'select CRYPTO' and 'select CRYPTO_SHA256' statements. Naming nit: "HAS" and "SUPPORTS" feel very similar, and looking at existing configs, "ARCH_SUPPORTS_..." is already used for doing this kind of bare "bool" management. e.g. see ARCH_SUPPORTS_INT128 It looks like you need to split "depends" and "select" so the options can be chosen separately from the "selectable" configs. How about naming this ARCH_SELECTS_, since that's what it's there for? -Kees -- Kees Cook
Re: [PATCH v9 01/42] mm: Rename arch pte_mkwrite()'s to pte_mkwrite_novma()
On Tue, 2023-06-13 at 09:19 +0200, Geert Uytterhoeven wrote: > Acked-by: Geert Uytterhoeven Thanks!
Re: [PATCH v9 01/42] mm: Rename arch pte_mkwrite()'s to pte_mkwrite_novma()
On Tue, 2023-06-13 at 10:43 +0300, Mike Rapoport wrote: > On Mon, Jun 12, 2023 at 05:10:27PM -0700, Rick Edgecombe wrote: > > The x86 Shadow stack feature includes a new type of memory called > > shadow > > stack. This shadow stack memory has some unusual properties, which > > requires > > some core mm changes to function properly. > > > > One of these unusual properties is that shadow stack memory is > > writable, > > but only in limited ways. These limits are applied via a specific > > PTE > > bit combination. Nevertheless, the memory is writable, and core mm > > code > > will need to apply the writable permissions in the typical paths > > that > > call pte_mkwrite(). Future patches will make pte_mkwrite() take a > > VMA, so > > that the x86 implementation of it can know whether to create > > regular > > writable memory or shadow stack memory. > > Nit: ^ mapping? Hmm, sure. > > > But there are a couple of challenges to this. Modifying the > > signatures of > > each arch pte_mkwrite() implementation would be error prone because > > some > > are generated with macros and would need to be re-implemented. > > Also, some > > pte_mkwrite() callers operate on kernel memory without a VMA. > > > > So this can be done in a three step process. First pte_mkwrite() > > can be > > renamed to pte_mkwrite_novma() in each arch, with a generic > > pte_mkwrite() > > added that just calls pte_mkwrite_novma(). Next callers without a > > VMA can > > be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all > > callers > > can be changed to take/pass a VMA. > > > > Start the process by renaming pte_mkwrite() to pte_mkwrite_novma() > > and > > adding the pte_mkwrite() wrapper in linux/pgtable.h. Apply the same > > pattern for pmd_mkwrite(). Since not all archs have a > > pmd_mkwrite_novma(), > > create a new arch config HAS_HUGE_PAGE that can be used to tell if > > pmd_mkwrite() should be defined. Otherwise in the !HAS_HUGE_PAGE > > cases the > > compiler would not be able to find pmd_mkwrite_novma(). > > > > No functional change. > > > > Cc: linux-...@vger.kernel.org > > Cc: linux-ker...@vger.kernel.org > > Cc: linux-al...@vger.kernel.org > > Cc: linux-snps-...@lists.infradead.org > > Cc: linux-arm-ker...@lists.infradead.org > > Cc: linux-c...@vger.kernel.org > > Cc: linux-hexa...@vger.kernel.org > > Cc: linux-i...@vger.kernel.org > > Cc: loonga...@lists.linux.dev > > Cc: linux-m...@lists.linux-m68k.org > > Cc: Michal Simek > > Cc: Dinh Nguyen > > Cc: linux-m...@vger.kernel.org > > Cc: openr...@lists.librecores.org > > Cc: linux-par...@vger.kernel.org > > Cc: linuxppc-dev@lists.ozlabs.org > > Cc: linux-ri...@lists.infradead.org > > Cc: linux-s...@vger.kernel.org > > Cc: linux...@vger.kernel.org > > Cc: sparcli...@vger.kernel.org > > Cc: linux...@lists.infradead.org > > Cc: linux-a...@vger.kernel.org > > Cc: linux...@kvack.org > > Suggested-by: Linus Torvalds > > Signed-off-by: Rick Edgecombe > > Link: > > https://lore.kernel.org/lkml/CAHk-=wizjsu7c9sfyzb3q04108stghff2wfbokgccgw7riz...@mail.gmail.com/ > > Reviewed-by: Mike Rapoport (IBM) Thanks!
Re: [PATCH v9 01/42] mm: Rename arch pte_mkwrite()'s to pte_mkwrite_novma()
On Tue, 2023-06-13 at 14:26 +0200, David Hildenbrand wrote: > > Acked-by: David Hildenbrand Thanks!
Re: [PATCH v9 01/42] mm: Rename arch pte_mkwrite()'s to pte_mkwrite_novma()
On 13.06.23 02:10, Rick Edgecombe wrote: The x86 Shadow stack feature includes a new type of memory called shadow stack. This shadow stack memory has some unusual properties, which requires some core mm changes to function properly. One of these unusual properties is that shadow stack memory is writable, but only in limited ways. These limits are applied via a specific PTE bit combination. Nevertheless, the memory is writable, and core mm code will need to apply the writable permissions in the typical paths that call pte_mkwrite(). Future patches will make pte_mkwrite() take a VMA, so that the x86 implementation of it can know whether to create regular writable memory or shadow stack memory. But there are a couple of challenges to this. Modifying the signatures of each arch pte_mkwrite() implementation would be error prone because some are generated with macros and would need to be re-implemented. Also, some pte_mkwrite() callers operate on kernel memory without a VMA. So this can be done in a three step process. First pte_mkwrite() can be renamed to pte_mkwrite_novma() in each arch, with a generic pte_mkwrite() added that just calls pte_mkwrite_novma(). Next callers without a VMA can be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all callers can be changed to take/pass a VMA. Start the process by renaming pte_mkwrite() to pte_mkwrite_novma() and adding the pte_mkwrite() wrapper in linux/pgtable.h. Apply the same pattern for pmd_mkwrite(). Since not all archs have a pmd_mkwrite_novma(), create a new arch config HAS_HUGE_PAGE that can be used to tell if pmd_mkwrite() should be defined. Otherwise in the !HAS_HUGE_PAGE cases the compiler would not be able to find pmd_mkwrite_novma(). No functional change. Cc: linux-...@vger.kernel.org Cc: linux-ker...@vger.kernel.org Cc: linux-al...@vger.kernel.org Cc: linux-snps-...@lists.infradead.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-c...@vger.kernel.org Cc: linux-hexa...@vger.kernel.org Cc: linux-i...@vger.kernel.org Cc: loonga...@lists.linux.dev Cc: linux-m...@lists.linux-m68k.org Cc: Michal Simek Cc: Dinh Nguyen Cc: linux-m...@vger.kernel.org Cc: openr...@lists.librecores.org Cc: linux-par...@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-ri...@lists.infradead.org Cc: linux-s...@vger.kernel.org Cc: linux...@vger.kernel.org Cc: sparcli...@vger.kernel.org Cc: linux...@lists.infradead.org Cc: linux-a...@vger.kernel.org Cc: linux...@kvack.org Suggested-by: Linus Torvalds Signed-off-by: Rick Edgecombe Link: https://lore.kernel.org/lkml/CAHk-=wizjsu7c9sfyzb3q04108stghff2wfbokgccgw7riz...@mail.gmail.com/ --- Hi Non-x86 Arch’s, x86 has a feature that allows for the creation of a special type of writable memory (shadow stack) that is only writable in limited specific ways. Previously, changes were proposed to core MM code to teach it to decide when to create normally writable memory or the special shadow stack writable memory, but David Hildenbrand suggested[0] to change pXX_mkwrite() to take a VMA, so awareness of shadow stack memory can be moved into x86 code. Later Linus suggested a less error-prone way[1] to go about this after the first attempt had a bug. Since pXX_mkwrite() is defined in every arch, it requires some tree-wide changes. So that is why you are seeing some patches out of a big x86 series pop up in your arch mailing list. There is no functional change. After this refactor, the shadow stack series goes on to use the arch helpers to push arch memory details inside arch/x86 and other arch's with upcoming shadow stack features. Testing was just 0-day build testing. Hopefully that is enough context. Thanks! [0] https://lore.kernel.org/lkml/0e29a2d0-08d8-bcd6-ff26-4bea0e403...@redhat.com/ [1] https://lore.kernel.org/lkml/CAHk-=wizjsu7c9sfyzb3q04108stghff2wfbokgccgw7riz...@mail.gmail.com/ --- Acked-by: David Hildenbrand -- Cheers, David / dhildenb
Re: [PATCH v9 01/42] mm: Rename arch pte_mkwrite()'s to pte_mkwrite_novma()
On Mon, Jun 12, 2023 at 05:10:27PM -0700, Rick Edgecombe wrote: > The x86 Shadow stack feature includes a new type of memory called shadow > stack. This shadow stack memory has some unusual properties, which requires > some core mm changes to function properly. > > One of these unusual properties is that shadow stack memory is writable, > but only in limited ways. These limits are applied via a specific PTE > bit combination. Nevertheless, the memory is writable, and core mm code > will need to apply the writable permissions in the typical paths that > call pte_mkwrite(). Future patches will make pte_mkwrite() take a VMA, so > that the x86 implementation of it can know whether to create regular > writable memory or shadow stack memory. Nit:^ mapping? > But there are a couple of challenges to this. Modifying the signatures of > each arch pte_mkwrite() implementation would be error prone because some > are generated with macros and would need to be re-implemented. Also, some > pte_mkwrite() callers operate on kernel memory without a VMA. > > So this can be done in a three step process. First pte_mkwrite() can be > renamed to pte_mkwrite_novma() in each arch, with a generic pte_mkwrite() > added that just calls pte_mkwrite_novma(). Next callers without a VMA can > be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all callers > can be changed to take/pass a VMA. > > Start the process by renaming pte_mkwrite() to pte_mkwrite_novma() and > adding the pte_mkwrite() wrapper in linux/pgtable.h. Apply the same > pattern for pmd_mkwrite(). Since not all archs have a pmd_mkwrite_novma(), > create a new arch config HAS_HUGE_PAGE that can be used to tell if > pmd_mkwrite() should be defined. Otherwise in the !HAS_HUGE_PAGE cases the > compiler would not be able to find pmd_mkwrite_novma(). > > No functional change. > > Cc: linux-...@vger.kernel.org > Cc: linux-ker...@vger.kernel.org > Cc: linux-al...@vger.kernel.org > Cc: linux-snps-...@lists.infradead.org > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-c...@vger.kernel.org > Cc: linux-hexa...@vger.kernel.org > Cc: linux-i...@vger.kernel.org > Cc: loonga...@lists.linux.dev > Cc: linux-m...@lists.linux-m68k.org > Cc: Michal Simek > Cc: Dinh Nguyen > Cc: linux-m...@vger.kernel.org > Cc: openr...@lists.librecores.org > Cc: linux-par...@vger.kernel.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-ri...@lists.infradead.org > Cc: linux-s...@vger.kernel.org > Cc: linux...@vger.kernel.org > Cc: sparcli...@vger.kernel.org > Cc: linux...@lists.infradead.org > Cc: linux-a...@vger.kernel.org > Cc: linux...@kvack.org > Suggested-by: Linus Torvalds > Signed-off-by: Rick Edgecombe > Link: > https://lore.kernel.org/lkml/CAHk-=wizjsu7c9sfyzb3q04108stghff2wfbokgccgw7riz...@mail.gmail.com/ Reviewed-by: Mike Rapoport (IBM) -- Sincerely yours, Mike.
Re: [PATCH RESEND v2] KVM: move KVM_CAP_DEVICE_CTRL to the generic check
+ Please use scripts/get_maintainer.pl to generate the To/Cc lists. This may be trivial, but it still needs eyeballs from the relevant maintainers. On Wed, Mar 15, 2023, Wei Wang wrote: > KVM_CAP_DEVICE_CTRL allows userspace to check if the kvm_device > framework (e.g. KVM_CREATE_DEVICE) is supported by KVM. Move > KVM_CAP_DEVICE_CTRL to the generic check for the two reasons: > 1) it already supports arch agnostic usages (i.e. KVM_DEV_TYPE_VFIO). > For example, userspace VFIO implementation may needs to create > KVM_DEV_TYPE_VFIO on x86, riscv, or arm etc. It is simpler to have it > checked at the generic code than at each arch's code. > 2) KVM_CREATE_DEVICE has been added to the generic code. > > Link: https://lore.kernel.org/all/20221215115207.14784-1-wei.w.w...@intel.com > Signed-off-by: Wei Wang > Reviewed-by: Sean Christopherson > --- > arch/arm64/kvm/arm.c | 1 - > arch/powerpc/kvm/powerpc.c | 1 - > arch/riscv/kvm/vm.c| 1 - > arch/s390/kvm/kvm-s390.c | 1 - > virt/kvm/kvm_main.c| 1 + > 5 files changed, 1 insertion(+), 4 deletions(-) > > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index 3bd732eaf087..96329e675771 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -202,7 +202,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long > ext) > r = vgic_present; > break; > case KVM_CAP_IOEVENTFD: > - case KVM_CAP_DEVICE_CTRL: > case KVM_CAP_USER_MEMORY: > case KVM_CAP_SYNC_MMU: > case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c > index 4c5405fc5538..185efed23896 100644 > --- a/arch/powerpc/kvm/powerpc.c > +++ b/arch/powerpc/kvm/powerpc.c > @@ -526,7 +526,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long > ext) > case KVM_CAP_ENABLE_CAP: > case KVM_CAP_ONE_REG: > case KVM_CAP_IOEVENTFD: > - case KVM_CAP_DEVICE_CTRL: > case KVM_CAP_IMMEDIATE_EXIT: > case KVM_CAP_SET_GUEST_DEBUG: > r = 1; > diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c > index 65a964d7e70d..6efe93b282e1 100644 > --- a/arch/riscv/kvm/vm.c > +++ b/arch/riscv/kvm/vm.c > @@ -57,7 +57,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > > switch (ext) { > case KVM_CAP_IOEVENTFD: > - case KVM_CAP_DEVICE_CTRL: > case KVM_CAP_USER_MEMORY: > case KVM_CAP_SYNC_MMU: > case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > index 39b36562c043..7b097b5253ca 100644 > --- a/arch/s390/kvm/kvm-s390.c > +++ b/arch/s390/kvm/kvm-s390.c > @@ -556,7 +556,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long > ext) > case KVM_CAP_ENABLE_CAP: > case KVM_CAP_S390_CSS_SUPPORT: > case KVM_CAP_IOEVENTFD: > - case KVM_CAP_DEVICE_CTRL: > case KVM_CAP_S390_IRQCHIP: > case KVM_CAP_VM_ATTRIBUTES: > case KVM_CAP_MP_STATE: > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index d255964ec331..71cc63640173 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -4523,6 +4523,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct > kvm *kvm, long arg) > #endif > case KVM_CAP_BINARY_STATS_FD: > case KVM_CAP_SYSTEM_EVENT_DATA: > + case KVM_CAP_DEVICE_CTRL: > return 1; > default: > break; > -- > 2.27.0 >
Re: [PATCH 00/13] mm: jit/text allocator
On Tue, Jun 13, 2023 at 02:56:14PM -0400, Kent Overstreet wrote: > On Thu, Jun 08, 2023 at 09:41:16PM +0300, Mike Rapoport wrote: > > On Tue, Jun 06, 2023 at 11:21:59AM -0700, Song Liu wrote: > > > On Mon, Jun 5, 2023 at 3:09 AM Mark Rutland wrote: > > > > > > [...] > > > > > > > > > > Can you give more detail on what parameters you need? If the only > > > > > > > extra > > > > > > > parameter is just "does this allocation need to live close to > > > > > > > kernel > > > > > > > text", that's not that big of a deal. > > > > > > > > > > > > My thinking was that we at least need the start + end for each > > > > > > caller. That > > > > > > might be it, tbh. > > > > > > > > > > Do you mean that modules will have something like > > > > > > > > > > jit_text_alloc(size, MODULES_START, MODULES_END); > > > > > > > > > > and kprobes will have > > > > > > > > > > jit_text_alloc(size, KPROBES_START, KPROBES_END); > > > > > ? > > > > > > > > Yes. > > > > > > How about we start with two APIs: > > > jit_text_alloc(size); > > > jit_text_alloc_range(size, start, end); > > > > > > AFAICT, arm64 is the only arch that requires the latter API. And TBH, I am > > > not quite convinced it is needed. > > > > Right now arm64 and riscv override bpf and kprobes allocations to use the > > entire vmalloc address space, but having the ability to allocate generated > > code outside of modules area may be useful for other architectures. > > > > Still the start + end for the callers feels backwards to me because the > > callers do not define the ranges, but rather the architectures, so we still > > need a way for architectures to define how they want allocate memory for > > the generated code. > > So, the start + end just comes from the need to keep relative pointers > under a certain size. I think this could be just a flag, I see no reason > to expose actual addresses here. It's the other way around. The start + end comes from the need to restrict allocation to certain range because of the relative addressing. I don't see how a flag can help here. -- Sincerely yours, Mike.
Re: [PATCH 00/17] tool/perf/test: Fix shellcheck coding/formatting issues of test shell scripts
Em Tue, Jun 13, 2023 at 10:11:28PM +0530, Athira Rajeev escreveu: > Patchset covers a set of fixes for coding/formatting issues observed while > running shellcheck tool on the perf test shell scripts. Shellcheck is a static > analysis tool that can find semantic/syntax bugs in the shell scripts. Thanks, applied the series. - Arnaldo > Patches 1-14 fixes the issues found with shellcheck. Patch 15, 16 > and patch 17 address a fix in task_analyzer test. > > This cleanup is a pre-requisite to include a build option for shellcheck > discussed here: https://www.spinics.net/lists/linux-perf-users/msg25553.html > Also this is first set of patches. There will be one more set which will > include build option for shellcheck as discussed in the mail thread. > > Abhirup Deb (2): > tools/perf/tests: fix test_arm_spe.sh signal case issues > perf/tests/shell: fix shellscript errors for lock_contention.sh > > Aboorva Devarajan (1): > tools/perf/tests: Fix shellcheck issues in test_task_analyzer.sh file > > Aditya Gupta (3): > perf tests task_analyzer: fix bad substitution ${$1} > perf tests task_analyzer: print command on failure > perf tests task_analyzer: skip tests if no libtraceevent support > > Akanksha J N (1): > tools/perf/tests: Fix shellcheck warnings for > trace+probe_vfs_getname.sh > > Anushree Mathur (1): > perf/tests/shell : Shellcheck fixes for perf test > "test_arm_coresight.sh" > > Barnali Guha Thakurata (1): > tools/perf/tests/shell/stat_all_metrics: Fix shellcheck warning SC2076 > in stat_all_metrics.sh > > Disha Goel (1): > tools/perf/tests: fix shellcheck warning for stat+json_output > > Geetika (1): > tools/perf/tests: Fix all POSIX sh warnings in perf shell test > test_brstack.sh > > Korrapati Likhitha (1): > tools/perf/tests: Fix shellcheck warnings for stat+csv_output > > Samir Mulani (1): > tools/perf/tests: fixed shellcheck warnings for perf shell scripts > > Shirisha G (1): > tools/perf/tests: fix shellcheck warnings for daemon.sh > > Sourabh Jain (1): > perf: get rid of unused import > > Spoorthy S (2): > shellcheck : fixing signal names and adding double quotes for > expression in test_arm_callgraph_fp > tools/perf/tests: Fix all POSIX sh warnings in stat+shadow_stat.sh > > .../scripts/python/arm-cs-trace-disasm.py | 1 - > tools/perf/tests/shell/buildid.sh | 12 +- > tools/perf/tests/shell/daemon.sh | 113 -- > tools/perf/tests/shell/lock_contention.sh | 70 +-- > .../shell/record+probe_libc_inet_pton.sh | 6 +- > .../shell/record+script_probe_vfs_getname.sh | 4 +- > tools/perf/tests/shell/stat+csv_output.sh | 4 +- > tools/perf/tests/shell/stat+json_output.sh| 2 +- > tools/perf/tests/shell/stat+shadow_stat.sh| 4 +- > tools/perf/tests/shell/stat_all_metrics.sh| 6 +- > .../perf/tests/shell/test_arm_callgraph_fp.sh | 6 +- > tools/perf/tests/shell/test_arm_coresight.sh | 6 +- > tools/perf/tests/shell/test_arm_spe.sh| 2 +- > tools/perf/tests/shell/test_brstack.sh| 12 +- > tools/perf/tests/shell/test_task_analyzer.sh | 98 --- > .../tests/shell/trace+probe_vfs_getname.sh| 6 +- > 16 files changed, 203 insertions(+), 149 deletions(-) > > -- > 2.39.1 > -- - Arnaldo
Re: [PATCH 16/17] perf tests task_analyzer: print command on failure
Em Tue, Jun 13, 2023 at 10:11:44PM +0530, Athira Rajeev escreveu: > From: Aditya Gupta > > Instead of printing "perf command failed" everytime, print the exact > command that run earlier Looks like a nice improvement, added the test authors to the CC list on this message, Thanks, applied. - Arnaldo > Signed-off-by: Athira Rajeev > Signed-off-by: Kajol Jain > Signed-off-by: Aditya Gupta > --- > tools/perf/tests/shell/test_task_analyzer.sh | 24 ++-- > 1 file changed, 12 insertions(+), 12 deletions(-) > > diff --git a/tools/perf/tests/shell/test_task_analyzer.sh > b/tools/perf/tests/shell/test_task_analyzer.sh > index 84ab7e7f57d5..b094eeb3bf66 100755 > --- a/tools/perf/tests/shell/test_task_analyzer.sh > +++ b/tools/perf/tests/shell/test_task_analyzer.sh > @@ -53,14 +53,14 @@ prepare_perf_data() { > test_basic() { > out="$tmpdir/perf.out" > perf script report task-analyzer > "$out" > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer" > find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}" > } > > test_ns_rename(){ > out="$tmpdir/perf.out" > perf script report task-analyzer --ns --rename-comms-by-tids 0:random > > "$out" > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --ns > --rename-comms-by-tids 0:random" > find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}" > } > > @@ -68,7 +68,7 @@ test_ms_filtertasks_highlight(){ > out="$tmpdir/perf.out" > perf script report task-analyzer --ms --filter-tasks perf > --highlight-tasks perf \ > > "$out" > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --ms --filter-tasks perf > --highlight-tasks perf" > find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}" > } > > @@ -76,61 +76,61 @@ test_extended_times_timelimit_limittasks() { > out="$tmpdir/perf.out" > perf script report task-analyzer --extended-times --time-limit :9 \ > --limit-to-tasks perf > "$out" > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --extended-times > --time-limit :9 --limit-to-tasks perf" > find_str_or_fail "Out-Out" "$out" "${FUNCNAME[0]}" > } > > test_summary() { > out="$tmpdir/perf.out" > perf script report task-analyzer --summary > "$out" > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --summary" > find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}" > } > > test_summaryextended() { > out="$tmpdir/perf.out" > perf script report task-analyzer --summary-extended > "$out" > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --summary-extended" > find_str_or_fail "Inter Task Times" "$out" "${FUNCNAME[0]}" > } > > test_summaryonly() { > out="$tmpdir/perf.out" > perf script report task-analyzer --summary-only > "$out" > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --summary-only" > find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}" > } > > test_extended_times_summary_ns() { > out="$tmpdir/perf.out" > perf script report task-analyzer --extended-times --summary --ns > > "$out" > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --extended-times > --summary --ns" > find_str_or_fail "Out-Out" "$out" "${FUNCNAME[0]}" > find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}" > } > > test_csv() { > perf script report task-analyzer --csv csv > /dev/null > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --csv csv" > find_str_or_fail "Comm;" csv "${FUNCNAME[0]}" > } > > test_csv_extended_times() { > perf script report task-analyzer --csv csv --extended-times > /dev/null > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --csv csv > --extended-times" > find_str_or_fail "Out-Out;" csv "${FUNCNAME[0]}" > } > > test_csvsummary() { > perf script report task-analyzer --csv-summary csvsummary > /dev/null > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --csv-summary csvsummary" > find_str_or_fail "Comm;" csvsummary "${FUNCNAME[0]}" > } > > test_csvsummary_extended() { > perf script report task-analyzer --csv-summary csvsummary > --summary-extended \ > >/dev/null > - check_exec_0 "perf" > + check_exec_0 "perf script report task-analyzer --csv-summary csvsummary > --summary-extended" > find_str_or_fail "Out-Out;" csvsummary "${FUNCNAME[0]}" > } > > -- > 2.39.1 > -- - Arnaldo
Re: [PATCH 15/17] perf tests task_analyzer: fix bad substitution ${$1}
Em Tue, Jun 13, 2023 at 10:11:43PM +0530, Athira Rajeev escreveu: > From: Aditya Gupta > > ${$1} gives bad substitution error on sh, bash, and zsh. This seems like > a typo, and this patch modifies it to $1, since that is what it's usage > looks like from wherever `check_exec_0` is called. Nicely spotted! Please add the people that last touched the problem to the cc list, specially when it fixes a bug. Thanks for adding a Fixes tag, that helps the sta...@kernel.org guys to get this propagated to supported kernel releases. I've added the test author to the CC list in this message. thanks! - Arnaldo > This issue due to ${$1} caused all function calls to give error in > `find_str_or_fail` line, and so no test runs completely. But > 'perf test "perf script task-analyzer tests"' wrongly reports > that tests passed with the status OK, which is wrong considering > the tests didn't even run completely > > Fixes: e8478b84d6ba ("perf test: add new task-analyzer tests") > Signed-off-by: Athira Rajeev > Signed-off-by: Kajol Jain > Signed-off-by: Aditya Gupta > --- > tools/perf/tests/shell/test_task_analyzer.sh | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/tools/perf/tests/shell/test_task_analyzer.sh > b/tools/perf/tests/shell/test_task_analyzer.sh > index 4264b54b654b..84ab7e7f57d5 100755 > --- a/tools/perf/tests/shell/test_task_analyzer.sh > +++ b/tools/perf/tests/shell/test_task_analyzer.sh > @@ -31,7 +31,7 @@ report() { > > check_exec_0() { > if [ $? != 0 ]; then > - report 1 "invokation of ${$1} command failed" > + report 1 "invocation of $1 command failed" > fi > } > > -- > 2.39.1 > -- - Arnaldo
Re: [PATCH v2 2/2] powerpc/mm: Add memory_block_size as a kernel parameter
On Fri, Jun 09, 2023 at 11:38:51AM +0530, Aneesh Kumar K.V wrote: Certain devices can possess non-standard memory capacities, not constrained to multiples of 1GB. Provide a kernel parameter so that we can map the device memory completely on memory hotplug. Case in point; the memory block size determined at boot is 1GB, but we know that 15.75GB of device memory will be hotplugged during runtime. Reviewed-by: Reza Arbab -- Reza Arbab
Re: [PATCH 01/17] perf: get rid of unused import
Em Tue, Jun 13, 2023 at 10:11:29PM +0530, Athira Rajeev escreveu: > From: Sourabh Jain > > Script doesn't use sys library, so remove it. Please Cc the persons working on that file, I added Leo to the CC list of this message. Thanks, applied. - Arnaldo > Report by pylint: > W0611: Unused import sys (unused-import) > > Signed-off-by: Athira Rajeev > Signed-off-by: Kajol Jain > Signed-off-by: Sourabh Jain > --- > tools/perf/scripts/python/arm-cs-trace-disasm.py | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py > b/tools/perf/scripts/python/arm-cs-trace-disasm.py > index 4339692a8d0b..d59ff53f1d94 100755 > --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py > +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py > @@ -9,7 +9,6 @@ > from __future__ import print_function > import os > from os import path > -import sys > import re > from subprocess import * > from optparse import OptionParser, make_option > -- > 2.39.1 > -- - Arnaldo
Re: [PATCH v2 1/2] powerpc/mm: Cleanup memory block size probing
On Fri, Jun 09, 2023 at 11:38:50AM +0530, Aneesh Kumar K.V wrote: Parse the device tree in early init to find the memory block size to be used by the kernel. Consolidate the memory block size device tree parsing to one helper and use that on both powernv and pseries. We still want to use machine-specific callback because on all machine types other than powernv and pseries we continue to return MIN_MEMORY_BLOCK_SIZE. pseries_memory_block_size used to look for the second memory block (memory@x) to determine the memory_block_size value. This patch changed that to look at all memory blocks and make sure we can map them all correctly using the computed memory block size value. Reviewed-by: Reza Arbab -- Reza Arbab
Re: [PATCH 0/3] Move the ARCH_DMA_MINALIGN definition to asm/cache.h
On Tue, Jun 13, 2023 at 04:42:40PM +, Christophe Leroy wrote: > > > Le 13/06/2023 à 17:52, Catalin Marinas a écrit : > > Hi, > > > > The ARCH_KMALLOC_MINALIGN reduction series defines a generic > > ARCH_DMA_MINALIGN in linux/cache.h: > > > > https://lore.kernel.org/r/20230612153201.554742-2-catalin.mari...@arm.com/ > > > > Unfortunately, this causes a duplicate definition warning for > > microblaze, powerpc (32-bit only) and sh as these architectures define > > ARCH_DMA_MINALIGN in a different file than asm/cache.h. Move the macro > > to asm/cache.h to avoid this issue and also bring them in line with the > > other architectures. > > What about mips ? > > arch/mips/include/asm/mach-generic/kmalloc.h:#define ARCH_DMA_MINALIGN > 128 > arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 32 > arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 128 > arch/mips/include/asm/mach-n64/kmalloc.h:#define ARCH_DMA_MINALIGN > L1_CACHE_BYTES > arch/mips/include/asm/mach-tx49xx/kmalloc.h:#define ARCH_DMA_MINALIGN > L1_CACHE_BYTES Sorry, I should have mentioned it in the cover letter (discussed here - https://lore.kernel.org/r/zihpaixb%2f0ve7...@arm.com/). These kmalloc.h files are included in asm/cache.h, based on which machine is enabled, so there's no problem for mips. It makes more sense to keep them in those mach-*/kmalloc.h files instead of having lots of #ifdefs in cache.h. -- Catalin
[PATCH 14/17] tools/perf/tests: Fix all POSIX sh warnings in stat+shadow_stat.sh
From: Spoorthy S Running shellcheck -S on stat+shadow_stat.sh testcase, generates SC2046 and SC2034 warnings, $ shellcheck -S warning tests/shell/stat+shadow_stat.sh res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)` : Quote this to prevent word splitting To address the POSIX shell warnings used quotes in the printf expressions, to prevent word splitting. Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Spoorthy S --- tools/perf/tests/shell/stat+shadow_stat.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh index e6e35fc6c882..0e9cba84e757 100755 --- a/tools/perf/tests/shell/stat+shadow_stat.sh +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -33,7 +33,7 @@ test_global_aggr() fi # use printf for rounding and a leading zero - res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)` + res=`printf "%.2f" "$(echo "scale=6; $num / $cyc" | bc -q)"` if [ "$ipc" != "$res" ]; then echo "IPC is different: $res != $ipc ($num / $cyc)" exit 1 @@ -67,7 +67,7 @@ test_no_aggr() fi # use printf for rounding and a leading zero - res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)` + res=`printf "%.2f" "$(echo "scale=6; $num / $cyc" | bc -q)"` if [ "$ipc" != "$res" ]; then echo "IPC is different for $cpu: $res != $ipc ($num / $cyc)" exit 1 -- 2.39.1
[PATCH 07/17] perf/tests/shell : Shellcheck fixes for perf test "test_arm_coresight.sh"
From: Anushree Mathur Fixed the following shellcheck issues in test_arm_coresight.sh file: In tools/perf/tests/shell/test_arm_coresight.sh line 31: trap - exit term int ^--^ SC2039: In POSIX sh, using lower/mixed case for signal names is undefined. ^--^ SC2039: In POSIX sh, using lower/mixed case for signal names is undefined. ^-^ SC2039: In POSIX sh, using lower/mixed case for signal names is undefined. In tools/perf/tests/shell/test_arm_coresight.sh line 35: trap cleanup_files exit term int ^--^ SC2039: In POSIX sh, using lower/mixed case for signal names is undefined. ^--^ SC2039: In POSIX sh, using lower/mixed case for signal names is undefined. ^-^ SC2039: In POSIX sh, using lower/mixed case for signal names is undefined. In tools/perf/tests/shell/test_arm_coresight.sh line 92: if [ $? -eq 0 -a -e "$1/enable_sink" ]; then ^-- SC2166: Prefer [ p ] && [ q ] as [ p -a q ] is not well defined. Fixed above warnings by: 1)Capitalize signals(INT, TERM, EXIT) to avoid mixed/lower case naming of signals. 2)Expression [p -a q] was not defined,changed it to [p] && [q] to avoid the ambiguity as this is older format using -a or -o ,now we use [p] && [q] in place of [p -a q] and [p] || [q] in place of [p -o q]. Result after fixing the issues: shell$ shellcheck -S warning test_arm_coresight.sh shell$ Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Anushree Mathur --- tools/perf/tests/shell/test_arm_coresight.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 482009e17bda..f1bf5621160f 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -28,11 +28,11 @@ cleanup_files() rm -f ${perfdata} rm -f ${file} rm -f "${perfdata}.old" - trap - exit term int + trap - EXIT TERM INT exit $glb_err } -trap cleanup_files exit term int +trap cleanup_files EXIT TERM INT record_touch_file() { echo "Recording trace (only user mode) with path: CPU$2 => $1" @@ -89,7 +89,7 @@ is_device_sink() { # cannot support perf PMU. echo "$1" | grep -E -q -v "tpiu" - if [ $? -eq 0 -a -e "$1/enable_sink" ]; then + if [ $? -eq 0 ] && [ -e "$1/enable_sink" ]; then pmu_dev="/sys/bus/event_source/devices/cs_etm/sinks/$2" -- 2.39.1
Re: [PATCH 00/13] mm: jit/text allocator
On Thu, Jun 08, 2023 at 09:41:16PM +0300, Mike Rapoport wrote: > On Tue, Jun 06, 2023 at 11:21:59AM -0700, Song Liu wrote: > > On Mon, Jun 5, 2023 at 3:09 AM Mark Rutland wrote: > > > > [...] > > > > > > > > Can you give more detail on what parameters you need? If the only > > > > > > extra > > > > > > parameter is just "does this allocation need to live close to kernel > > > > > > text", that's not that big of a deal. > > > > > > > > > > My thinking was that we at least need the start + end for each > > > > > caller. That > > > > > might be it, tbh. > > > > > > > > Do you mean that modules will have something like > > > > > > > > jit_text_alloc(size, MODULES_START, MODULES_END); > > > > > > > > and kprobes will have > > > > > > > > jit_text_alloc(size, KPROBES_START, KPROBES_END); > > > > ? > > > > > > Yes. > > > > How about we start with two APIs: > > jit_text_alloc(size); > > jit_text_alloc_range(size, start, end); > > > > AFAICT, arm64 is the only arch that requires the latter API. And TBH, I am > > not quite convinced it is needed. > > Right now arm64 and riscv override bpf and kprobes allocations to use the > entire vmalloc address space, but having the ability to allocate generated > code outside of modules area may be useful for other architectures. > > Still the start + end for the callers feels backwards to me because the > callers do not define the ranges, but rather the architectures, so we still > need a way for architectures to define how they want allocate memory for > the generated code. So, the start + end just comes from the need to keep relative pointers under a certain size. I think this could be just a flag, I see no reason to expose actual addresses here.
Re: [PATCH 3/9] cpu/SMT: Store the current/max number of threads
On Tue, Jun 13 2023 at 19:16, Laurent Dufour wrote: > On 10/06/2023 23:26:18, Thomas Gleixner wrote: >> On Thu, May 25 2023 at 01:56, Michael Ellerman wrote: >>> #ifdef CONFIG_HOTPLUG_SMT >>> enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; >>> +static unsigned int cpu_smt_max_threads __ro_after_init; >>> +unsigned int cpu_smt_num_threads; >> >> Why needs this to be global? cpu_smt_control is pointlessly global already. > > I agree that cpu_smt_*_threads should be static. > > Howwever, regarding cpu_smt_control, it is used in 2 places in the x86 code: > - arch/x86/power/hibernate.c in arch_resume_nosmt() > - arch/x86/kernel/cpu/bugs.c in spectre_v2_user_select_mitigation() Bah. I must have fatfingered the grep then. > An accessor function may be introduced to read that value in these 2 > functions, but I'm wondering if that's really the best option. > > Unless there is a real need to change this through this series, I think > cpu_smt_control can remain global. That's fine. Thanks, tglx
[PATCH 11/17] perf/tests/shell: fix shellscript errors for lock_contention.sh
From: Abhirup Deb Use quotes around variables to prevent POSIX word expansion, use uppercase for signals(INT, TERM, EXIT) to avoid mixed/lower case naming of signals and replace "==" with "=" as "==" is not supported by POSIX shell. Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Abhirup Deb Signed-off-by: Anushree Mathur --- tools/perf/tests/shell/lock_contention.sh | 70 +++ 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index be5fcafb26aa..f2cc187b6186 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -11,14 +11,14 @@ result=$(mktemp /tmp/__perf_test.result.X) cleanup() { rm -f ${perfdata} rm -f ${result} - trap - exit term int + trap - EXIT TERM INT } trap_cleanup() { cleanup exit ${err} } -trap trap_cleanup exit term int +trap trap_cleanup EXIT TERM INT check() { if [ `id -u` != 0 ]; then @@ -40,8 +40,8 @@ test_record() perf lock record -o ${perfdata} -- perf bench sched messaging > /dev/null 2>&1 # the output goes to the stderr and we expect only 1 output (-E 1) perf lock contention -i ${perfdata} -E 1 -q 2> ${result} - if [ $(cat "${result}" | wc -l) != "1" ]; then - echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l) + if [ "$(cat "${result}" | wc -l)" != "1" ]; then + echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)" err=1 exit fi @@ -58,8 +58,8 @@ test_bpf() # the perf lock contention output goes to the stderr perf lock con -a -b -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} - if [ $(cat "${result}" | wc -l) != "1" ]; then - echo "[Fail] BPF result count is not 1:" $(cat "${result}" | wc -l) + if [ "$(cat "${result}" | wc -l)" != "1" ]; then + echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)" err=1 exit fi @@ -70,8 +70,8 @@ test_record_concurrent() echo "Testing perf lock record and perf lock contention at the same time" perf lock record -o- -- perf bench sched messaging 2> /dev/null | \ perf lock contention -i- -E 1 -q 2> ${result} - if [ $(cat "${result}" | wc -l) != "1" ]; then - echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l) + if [ "$(cat "${result}" | wc -l)" != "1" ]; then + echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)" err=1 exit fi @@ -81,8 +81,8 @@ test_aggr_task() { echo "Testing perf lock contention --threads" perf lock contention -i ${perfdata} -t -E 1 -q 2> ${result} - if [ $(cat "${result}" | wc -l) != "1" ]; then - echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l) + if [ "$(cat "${result}" | wc -l)" != "1" ]; then + echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)" err=1 exit fi @@ -93,8 +93,8 @@ test_aggr_task() # the perf lock contention output goes to the stderr perf lock con -a -b -t -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} - if [ $(cat "${result}" | wc -l) != "1" ]; then - echo "[Fail] BPF result count is not 1:" $(cat "${result}" | wc -l) + if [ "$(cat "${result}" | wc -l)" != "1" ]; then + echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)" err=1 exit fi @@ -104,8 +104,8 @@ test_aggr_addr() { echo "Testing perf lock contention --lock-addr" perf lock contention -i ${perfdata} -l -E 1 -q 2> ${result} - if [ $(cat "${result}" | wc -l) != "1" ]; then - echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l) + if [ "$(cat "${result}" | wc -l)" != "1" ]; then + echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)" err=1 exit fi @@ -116,8 +116,8 @@ test_aggr_addr() # the perf lock contention output goes to the stderr perf lock con -a -b -l -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} - if [ $(cat "${result}" | wc -l) != "1" ]; then - echo "[Fail] BPF result count is not 1:" $(cat "${result}" | wc -l) + if [ "$(cat "${result}" | wc -l)" != "1" ]; then + echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)" err=1 exit fi @@ -127,8 +127,8 @@ test_type_filter() { echo "Testing perf lock contention --type-filter
Re: [PATCH] powerpc/iommu: TCEs are incorrectly manipulated with DLPAR add/remove of memory
Hello Michael, I found this bug while going though the code. This bug is exposed when DDW is smaller than the max memory of the LPAR. This will result in creating DDW which will have Dynamically mapped TCEs (no direct mapping). I would like to stress that this bug is exposed only in Upstream kernel. Current kernel level in Distros are not exposed to this since they don't have the concept of "dynamically mapped" DDW. I didn't have access to any of the P10 boxes with large amount of memory to re-create the scenario. On P10 we have 2MB TCEs, which results in DDW large enough to be able to cover max memory I could have for the LPAR. As a result, IO Bus Addresses generated were always within DDW limits and no H_PARAMETER was returned by HCALL. So, I hacked the kernel to force the use of 64K TCEs. This resulted in DDW smaller than max memory. When I tried to DLPAR ADD memory, it failed with error code of -4 (H_PARAMETER) from HCALL (H_PUT_TCE/H_PUT_TCE_INDIRECT), when iommu_mem_notifier() invoked tce_setrange_multi_pSeriesLP(). I didn't test the DLPAR REMOVE path, to verify if incorrect TCEs are removed by tce_clearrange_multi_pSeriesLP(), since I would need to hack kernel to force dynamically added TCEs to the high IO Bus Addresses. But, the concept is same. Thanks, Gaurav On 6/13/23 12:16 PM, Gaurav Batra wrote: When memory is dynamically added/removed, iommu_mem_notifier() is invoked. This routine traverses through all the DMA windows (DDW only, not default windows) to add/remove "direct" TCE mappings. The routines for this purpose are tce_clearrange_multi_pSeriesLP() and tce_clearrange_multi_pSeriesLP(). Both these routines are designed for Direct mapped DMA windows only. The issue is that there could be some DMA windows in the list which are not "direct" mapped. Calling these routines will either, 1) remove some dynamically mapped TCEs, Or 2) try to add TCEs which are out of bounds and HCALL returns H_PARAMETER Here are the side affects when these routines are incorrectly invoked for "dynamically" mapped DMA windows. tce_setrange_multi_pSeriesLP() This adds direct mapped TCEs. Now, this could invoke HCALL to add TCEs with out-of-bound range. In this scenario, HCALL will return H_PARAMETER and DLAR ADD of memory will fail. tce_clearrange_multi_pSeriesLP() This will remove range of TCEs. The TCE range that is calculated, depending on the memory range being added, could infact be mapping some other memory address (for dynamic DMA window scenario). This will wipe out those TCEs. The solution is for iommu_mem_notifier() to only invoke these routines for "direct" mapped DMA windows. Signed-off-by: Gaurav Batra Reviewed-by: Brian King --- arch/powerpc/platforms/pseries/iommu.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 918f511837db..24dd61636400 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -363,6 +363,7 @@ struct dynamic_dma_window_prop { struct dma_win { struct device_node *device; const struct dynamic_dma_window_prop *prop; + booldirect; struct list_head list; }; @@ -1409,6 +1410,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_del_prop; if (direct_mapping) { + window->direct = true; + /* DDW maps the whole partition, so enable direct DMA mapping */ ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, win64->value, tce_setrange_multi_pSeriesLP_walk); @@ -1425,6 +1428,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) int i; unsigned long start = 0, end = 0; + window->direct = false; + for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; @@ -1587,8 +1592,10 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, case MEM_GOING_ONLINE: spin_lock(_win_list_lock); list_for_each_entry(window, _win_list, list) { - ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, - arg->nr_pages, window->prop); + if (window->direct) { + ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, + arg->nr_pages, window->prop); + } /* XXX log error */ } spin_unlock(_win_list_lock); @@ -1597,8 +1604,10 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, case MEM_OFFLINE:
[PATCH 09/17] tools/perf/tests: Fix shellcheck issues in test_task_analyzer.sh file
From: Aboorva Devarajan Fixed the following shellcheck issues in test_task_analyzer.sh file: SC2086: Double quote to prevent globbing and word splitting warnings in shell-check. Fixes the following shellcheck issues, SC2086: Double quote to prevent globbing and word splitting warnings in shell-check. Before Patch: $ shellcheck ./test_task_analyzer.sh | grep "SC2086" | ... In ./test_task_analyzer.sh line 13: SC2086: Double quote to prevent globbing and word splitting. In ./test_task_analyzer.sh line 24: SC2086: Double quote to prevent globbing and word splitting. In ./test_task_analyzer.sh line 39: SC2086: Double quote to prevent globbing and word splitting. After Patch: $ shellcheck ./test_task_analyzer.sh | grep -i "SC2086" None perf test result after patch: PASS: "test_basic" PASS: "test_ns_rename" PASS: "test_ms_filtertasks_highlight" PASS: "test_extended_times_timelimit_limittasks" PASS: "test_summary" PASS: "test_summaryextended" PASS: "test_summaryonly" PASS: "test_extended_times_summary_ns" PASS: "test_extended_times_summary_ns" PASS: "test_csv" PASS: "test_csvsummary" PASS: "test_csv_extended_times" PASS: "test_csvsummary_extended" Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Aboorva Devarajan --- tools/perf/tests/shell/test_task_analyzer.sh | 54 ++-- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh index a98e4ab66040..4264b54b654b 100755 --- a/tools/perf/tests/shell/test_task_analyzer.sh +++ b/tools/perf/tests/shell/test_task_analyzer.sh @@ -10,7 +10,7 @@ cleanup() { rm -f perf.data.old rm -f csv rm -f csvsummary - rm -rf $tmpdir + rm -rf "$tmpdir" trap - exit term int } @@ -21,7 +21,7 @@ trap_cleanup() { trap trap_cleanup exit term int report() { - if [ $1 = 0 ]; then + if [ "$1" = 0 ]; then echo "PASS: \"$2\"" else echo "FAIL: \"$2\" Error message: \"$3\"" @@ -36,11 +36,11 @@ check_exec_0() { } find_str_or_fail() { - grep -q "$1" $2 - if [ $? != 0 ]; then - report 1 $3 "Failed to find required string:'${1}'." + grep -q "$1" "$2" + if [ "$?" != 0 ]; then + report 1 "$3" "Failed to find required string:'${1}'." else - report 0 $3 + report 0 "$3" fi } @@ -52,86 +52,86 @@ prepare_perf_data() { # check standard inkvokation with no arguments test_basic() { out="$tmpdir/perf.out" - perf script report task-analyzer > $out + perf script report task-analyzer > "$out" check_exec_0 "perf" - find_str_or_fail "Comm" $out ${FUNCNAME[0]} + find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}" } test_ns_rename(){ out="$tmpdir/perf.out" - perf script report task-analyzer --ns --rename-comms-by-tids 0:random > $out + perf script report task-analyzer --ns --rename-comms-by-tids 0:random > "$out" check_exec_0 "perf" - find_str_or_fail "Comm" $out ${FUNCNAME[0]} + find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}" } test_ms_filtertasks_highlight(){ out="$tmpdir/perf.out" perf script report task-analyzer --ms --filter-tasks perf --highlight-tasks perf \ - > $out + > "$out" check_exec_0 "perf" - find_str_or_fail "Comm" $out ${FUNCNAME[0]} + find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}" } test_extended_times_timelimit_limittasks() { out="$tmpdir/perf.out" perf script report task-analyzer --extended-times --time-limit :9 \ - --limit-to-tasks perf > $out + --limit-to-tasks perf > "$out" check_exec_0 "perf" - find_str_or_fail "Out-Out" $out ${FUNCNAME[0]} + find_str_or_fail "Out-Out" "$out" "${FUNCNAME[0]}" } test_summary() { out="$tmpdir/perf.out" - perf script report task-analyzer --summary > $out + perf script report task-analyzer --summary > "$out" check_exec_0 "perf" - find_str_or_fail "Summary" $out ${FUNCNAME[0]} + find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}" } test_summaryextended() { out="$tmpdir/perf.out" - perf script report task-analyzer --summary-extended > $out + perf script report task-analyzer --summary-extended > "$out" check_exec_0 "perf" - find_str_or_fail "Inter Task Times" $out ${FUNCNAME[0]} + find_str_or_fail "Inter Task Times" "$out" "${FUNCNAME[0]}" } test_summaryonly() { out="$tmpdir/perf.out" - perf script report task-analyzer --summary-only > $out + perf script report task-analyzer --summary-only > "$out" check_exec_0 "perf" - find_str_or_fail "Summary" $out ${FUNCNAME[0]} + find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}" } test_extended_times_summary_ns() { out="$tmpdir/perf.out" - perf script report task-analyzer
[PATCH 13/17] tools/perf/tests: Fix all POSIX sh warnings in perf shell test test_brstack.sh
From: Geetika Fix all the POSIX sh warnings in perf shell test test_brstack.sh Warnings fixed : * In POSIX sh, using lower/mixed case for signal names is undefined. Correcting that in this script. * In POSIX sh, 'local' is undefined. local is supported in many shells, but it's not in POSIX. In POSIX sh, you can adopt some convention to avoid accidentally overwriting variables names, e.g. prefixing with the function name, that is what I have done here. Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Geetika --- tools/perf/tests/shell/test_brstack.sh | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 1c49d8293003..09908d71c994 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -18,7 +18,7 @@ cleanup() { rm -rf $TMPDIR } -trap cleanup exit term int +trap cleanup EXIT TERM INT test_user_branches() { echo "Testing user branch stack sampling" @@ -47,17 +47,17 @@ test_user_branches() { # first argument is the argument passed to "--branch-stack ,save_type,u" # second argument are the expected branch types for the given filter test_filter() { - local filter=$1 - local expect=$2 + test_filter_filter=$1 + test_filter_expect=$2 - echo "Testing branch stack filtering permutation ($filter,$expect)" + echo "Testing branch stack filtering permutation ($test_filter_filter,$test_filter_expect)" - perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1 + perf record -o $TMPDIR/perf.data --branch-filter $test_filter_filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1 perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script # fail if we find any branch type that doesn't match any of the expected ones # also consider UNKNOWN branch types (-) - if grep -E -vm1 "^[^ ]*/($expect|-|( *))/.*$" $TMPDIR/perf.script; then + if grep -E -vm1 "^[^ ]*/($test_filter_expect|-|( *))/.*$" $TMPDIR/perf.script; then return 1 fi } -- 2.39.1
[PATCH 10/17] tools/perf/tests: fix test_arm_spe.sh signal case issues
From: Abhirup Deb Running shellcheck -S on test_arm_spe.sh throws below warnings: #shellcheck -S warning tests/shell/test_arm_spe.sh In tests/shell/test_arm_spe.sh line 30: trap cleanup_files exit term int ^--^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. ^--^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. ^-^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. Fixed this issue by using uppercase for "EXIT", "TERM" and "INIT" signals to avoid using lower/mixed case for signal names as input. Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Abhirup Deb Signed-off-by: Ojaswin Mujoo Signed-off-by: Piyush Sachdeva Signed-off-by: Mukesh Chaurasiya --- tools/perf/tests/shell/test_arm_spe.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh index aa094d71f5b4..03d5c7d12ee5 100755 --- a/tools/perf/tests/shell/test_arm_spe.sh +++ b/tools/perf/tests/shell/test_arm_spe.sh @@ -27,7 +27,7 @@ cleanup_files() exit $glb_err } -trap cleanup_files exit term int +trap cleanup_files EXIT TERM INT arm_spe_report() { if [ $2 = 0 ]; then -- 2.39.1
[PATCH 01/17] perf: get rid of unused import
From: Sourabh Jain Script doesn't use sys library, so remove it. Report by pylint: W0611: Unused import sys (unused-import) Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Sourabh Jain --- tools/perf/scripts/python/arm-cs-trace-disasm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index 4339692a8d0b..d59ff53f1d94 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -9,7 +9,6 @@ from __future__ import print_function import os from os import path -import sys import re from subprocess import * from optparse import OptionParser, make_option -- 2.39.1
[PATCH 15/17] perf tests task_analyzer: fix bad substitution ${$1}
From: Aditya Gupta ${$1} gives bad substitution error on sh, bash, and zsh. This seems like a typo, and this patch modifies it to $1, since that is what it's usage looks like from wherever `check_exec_0` is called. This issue due to ${$1} caused all function calls to give error in `find_str_or_fail` line, and so no test runs completely. But 'perf test "perf script task-analyzer tests"' wrongly reports that tests passed with the status OK, which is wrong considering the tests didn't even run completely Fixes: e8478b84d6ba ("perf test: add new task-analyzer tests") Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Aditya Gupta --- tools/perf/tests/shell/test_task_analyzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh index 4264b54b654b..84ab7e7f57d5 100755 --- a/tools/perf/tests/shell/test_task_analyzer.sh +++ b/tools/perf/tests/shell/test_task_analyzer.sh @@ -31,7 +31,7 @@ report() { check_exec_0() { if [ $? != 0 ]; then - report 1 "invokation of ${$1} command failed" + report 1 "invocation of $1 command failed" fi } -- 2.39.1
[PATCH 17/17] perf tests task_analyzer: skip tests if no libtraceevent support
From: Aditya Gupta Test "perf script task-analyzer tests" fails in environment with missing libtraceevent support, as perf record fails to create the perf.data file, which further tests depend on. Instead, when perf is not compiled with libtraceevent support, skip those tests instead of failing them, by checking the output of `perf record --dry-run` to see if it prints the error "libtraceevent is necessary for tracepoint support" For the following output, perf compiled with: `make NO_LIBTRACEEVENT=1` Before the patch: 108: perf script task-analyzer tests : test child forked, pid 24105 failed to open perf.data: No such file or directory (try 'perf record' first) FAIL: "invokation of perf script report task-analyzer command failed" Error message: "" FAIL: "test_basic" Error message: "Failed to find required string:'Comm'." failed to open perf.data: No such file or directory (try 'perf record' first) FAIL: "invokation of perf script report task-analyzer --ns --rename-comms-by-tids 0:random command failed" Error message: "" FAIL: "test_ns_rename" Error message: "Failed to find required string:'Comm'." failed to open perf.data: No such file or directory (try 'perf record' first) <...> perf script task-analyzer tests: FAILED! With this patch, the script instead returns 2 signifying SKIP, and after the patch: 108: perf script task-analyzer tests : test child forked, pid 26010 libtraceevent is necessary for tracepoint support WARN: Skipping tests. No libtraceevent support test child finished with -2 perf script task-analyzer tests: Skip Fixes: e8478b84d6ba ("perf test: add new task-analyzer tests") Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Aditya Gupta --- tools/perf/tests/shell/test_task_analyzer.sh | 18 ++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh index b094eeb3bf66..59785dfc11f8 100755 --- a/tools/perf/tests/shell/test_task_analyzer.sh +++ b/tools/perf/tests/shell/test_task_analyzer.sh @@ -44,9 +44,20 @@ find_str_or_fail() { fi } +# check if perf is compiled with libtraceevent support +skip_no_probe_record_support() { + perf record -e "sched:sched_switch" -a -- sleep 1 2>&1 | grep "libtraceevent is necessary for tracepoint support" && return 2 + return 0 +} + prepare_perf_data() { # 1s should be sufficient to catch at least some switches perf record -e sched:sched_switch -a -- sleep 1 > /dev/null 2>&1 + # check if perf data file got created in above step. + if [ ! -e "perf.data" ]; then + printf "FAIL: perf record failed to create \"perf.data\" \n" + return 1 + fi } # check standard inkvokation with no arguments @@ -134,6 +145,13 @@ test_csvsummary_extended() { find_str_or_fail "Out-Out;" csvsummary "${FUNCNAME[0]}" } +skip_no_probe_record_support +err=$? +if [ $err -ne 0 ]; then + echo "WARN: Skipping tests. No libtraceevent support" + cleanup + exit $err +fi prepare_perf_data test_basic test_ns_rename -- 2.39.1
[PATCH 02/17] tools/perf/tests: fix shellcheck warning for stat+json_output
From: Disha Goel Running shellcheck on stat+json_output testcase, generates below warning: [ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ] ^--^ SC2046 (warning): Quote this to prevent word splitting. ^-- SC2046 (warning): Quote this to prevent word splitting. Fixed the warning by adding quotes to avoid word splitting. ShellCheck result with patch: # shellcheck -S warning stat+json_output.sh # perf test result after the change: 94: perf stat JSON output linter : Ok Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Disha Goel --- tools/perf/tests/shell/stat+json_output.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh index f3e4967cc72e..c19f926a2951 100755 --- a/tools/perf/tests/shell/stat+json_output.sh +++ b/tools/perf/tests/shell/stat+json_output.sh @@ -40,7 +40,7 @@ trap trap_cleanup EXIT TERM INT # Return true if perf_event_paranoid is > $1 and not running as root. function ParanoidAndNotRoot() { -[ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ] +[ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] } check_no_args() -- 2.39.1
[PATCH 04/17] tools/perf/tests: fix shellcheck warnings for daemon.sh
From: Shirisha G Running shellcheck -S on daemon.sh throws below warnings: Result from shellcheck: # shellcheck -S warning daemon.sh local line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'` ^---^ SC2155: Declare and assign separately to avoid masking return values. trap "echo 'FAILED: Signal caught'; daemon_exit ${config}; exit 1" SIGINT SIGTERM ^---^ SC2064: Use single quotes, otherwise this expands now rather than when signalled. count=`ls ${base}/session-test/ | grep perf.data | wc -l` ^-- SC2010: Don't use ls | grep. Use a glob or a for loop with a condition to allow non-alphanumeric filenames. if [ ${size} != "OK" -o ${type} != "OK" ]; then ^-- SC2166: Prefer [ p ] || [ q ] as [ p -o q ] is not well defined. Fixed above warnings by: - declaring and assigning local variables separately - To fix SC2010, instead of using "ls | grep", used glob to allow non-alphanumeric filenames - Used single quotes to prevent expanding. Result from shellcheck after patch changes: $ shellcheck -S warning daemon.sh $ echo $? 0 Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Shirisha G --- tools/perf/tests/shell/daemon.sh | 113 --- 1 file changed, 75 insertions(+), 38 deletions(-) diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index 45fc24af5b07..aaf3849353e8 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -11,11 +11,16 @@ check_line_first() local lock=$5 local up=$6 - local line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'` - local line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'` - local line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'` - local line_lock=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'` - local line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'` + local line_name + line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'` + local line_base + line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'` + local line_output + line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'` + local line_lock + line_lock=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'` + local line_up + line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'` if [ "${name}" != "${line_name}" ]; then echo "FAILED: wrong name" @@ -54,13 +59,20 @@ check_line_other() local ack=$7 local up=$8 - local line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'` - local line_run=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'` - local line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'` - local line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'` - local line_control=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'` - local line_ack=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $7 }'` - local line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $8 }'` + local line_name + line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'` + local line_run + line_run=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'` + local line_base + line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'` + local line_output + line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'` + local line_control + line_control=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'` + local line_ack + line_ack=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $7 }'` + local line_up + line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $8 }'` if [ "${name}" != "${line_name}" ]; then echo "FAILED: wrong name" @@ -102,8 +114,10 @@ daemon_exit() { local config=$1 - local line=`perf daemon --config ${config} -x: | head -1` - local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` + local line + line=`perf daemon --config ${config} -x: | head -1` + local pid + pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` # Reset trap handler. trap - SIGINT SIGTERM @@ -123,7 +137,7 @@ daemon_start() perf daemon start --config ${config} # Clean up daemon if interrupted. - trap "echo 'FAILED: Signal caught'; daemon_exit ${config}; exit 1" SIGINT SIGTERM + trap 'echo "FAILED: Signal caught"; daemon_exit "${config}"; exit 1' SIGINT SIGTERM # wait for the session to ping
Re: [PATCH 3/9] cpu/SMT: Store the current/max number of threads
On 10/06/2023 23:26:18, Thomas Gleixner wrote: > On Thu, May 25 2023 at 01:56, Michael Ellerman wrote: >> #ifdef CONFIG_HOTPLUG_SMT >> enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; >> +static unsigned int cpu_smt_max_threads __ro_after_init; >> +unsigned int cpu_smt_num_threads; > > Why needs this to be global? cpu_smt_control is pointlessly global already. I agree that cpu_smt_*_threads should be static. Howwever, regarding cpu_smt_control, it is used in 2 places in the x86 code: - arch/x86/power/hibernate.c in arch_resume_nosmt() - arch/x86/kernel/cpu/bugs.c in spectre_v2_user_select_mitigation() An accessor function may be introduced to read that value in these 2 functions, but I'm wondering if that's really the best option. Unless there is a real need to change this through this series, I think cpu_smt_control can remain global. Thomas, are you ok with that? > >> void __init cpu_smt_disable(bool force) >> { >> @@ -433,10 +435,18 @@ void __init cpu_smt_disable(bool force) >> * The decision whether SMT is supported can only be done after the full >> * CPU identification. Called from architecture code. >> */ >> -void __init cpu_smt_check_topology(void) >> +void __init cpu_smt_check_topology(unsigned int num_threads) >> { >> if (!topology_smt_supported()) >> cpu_smt_control = CPU_SMT_NOT_SUPPORTED; >> + >> +cpu_smt_max_threads = num_threads; >> + >> +// May already be disabled by nosmt command line parameter >> +if (cpu_smt_control != CPU_SMT_ENABLED) >> +cpu_smt_num_threads = 1; >> +else >> +cpu_smt_num_threads = num_threads; > > Taking Laurents findings into account this should be something like > the incomplete below. > > x86 would simply invoke cpu_smt_set_num_threads() with both arguments as > smp_num_siblings while PPC can funnel its command line parameter through > the num_threads argument. I do prefer cpu_smt_set_num_threads() also. Thanks, Laurent > > Thanks, > > tglx > --- > --- a/kernel/cpu.c > +++ b/kernel/cpu.c > @@ -414,6 +414,8 @@ void __weak arch_smt_update(void) { } > > #ifdef CONFIG_HOTPLUG_SMT > enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; > +static unsigned int cpu_smt_max_threads __ro_after_init; > +static unsigned int cpu_smt_num_threads = UINT_MAX; > > void __init cpu_smt_disable(bool force) > { > @@ -427,24 +429,31 @@ void __init cpu_smt_disable(bool force) > pr_info("SMT: disabled\n"); > cpu_smt_control = CPU_SMT_DISABLED; > } > + cpu_smt_num_threads = 1; > } > > /* > * The decision whether SMT is supported can only be done after the full > * CPU identification. Called from architecture code. > */ > -void __init cpu_smt_check_topology(void) > +void __init cpu_smt_set_num_threads(unsigned int max_threads, unsigned int > num_threads) > { > - if (!topology_smt_supported()) > + if (max_threads == 1) > cpu_smt_control = CPU_SMT_NOT_SUPPORTED; > -} > > -static int __init smt_cmdline_disable(char *str) > -{ > - cpu_smt_disable(str && !strcmp(str, "force")); > - return 0; > + cpu_smt_max_threads = max_threads; > + > + /* > + * If SMT has been disabled via the kernel command line or SMT is > + * not supported, set cpu_smt_num_threads to 1 for consistency. > + * If enabled, take the architecture requested number of threads > + * to bring up into account. > + */ > + if (cpu_smt_control != CPU_SMT_ENABLED) > + cpu_smt_num_threads = 1; > + else if (num_threads < cpu_smt_num_threads) > + cpu_smt_num_threads = num_threads; > } > -early_param("nosmt", smt_cmdline_disable); > > static inline bool cpu_smt_allowed(unsigned int cpu) > { > @@ -463,6 +472,13 @@ static inline bool cpu_smt_allowed(unsig > return !cpumask_test_cpu(cpu, _booted_once_mask); > } > > +static int __init smt_cmdline_disable(char *str) > +{ > + cpu_smt_disable(str && !strcmp(str, "force")); > + return 0; > +} > +early_param("nosmt", smt_cmdline_disable); > + > /* Returns true if SMT is not supported of forcefully (irreversibly) > disabled */ > bool cpu_smt_possible(void) > {
[PATCH] powerpc/iommu: TCEs are incorrectly manipulated with DLPAR add/remove of memory
When memory is dynamically added/removed, iommu_mem_notifier() is invoked. This routine traverses through all the DMA windows (DDW only, not default windows) to add/remove "direct" TCE mappings. The routines for this purpose are tce_clearrange_multi_pSeriesLP() and tce_clearrange_multi_pSeriesLP(). Both these routines are designed for Direct mapped DMA windows only. The issue is that there could be some DMA windows in the list which are not "direct" mapped. Calling these routines will either, 1) remove some dynamically mapped TCEs, Or 2) try to add TCEs which are out of bounds and HCALL returns H_PARAMETER Here are the side affects when these routines are incorrectly invoked for "dynamically" mapped DMA windows. tce_setrange_multi_pSeriesLP() This adds direct mapped TCEs. Now, this could invoke HCALL to add TCEs with out-of-bound range. In this scenario, HCALL will return H_PARAMETER and DLAR ADD of memory will fail. tce_clearrange_multi_pSeriesLP() This will remove range of TCEs. The TCE range that is calculated, depending on the memory range being added, could infact be mapping some other memory address (for dynamic DMA window scenario). This will wipe out those TCEs. The solution is for iommu_mem_notifier() to only invoke these routines for "direct" mapped DMA windows. Signed-off-by: Gaurav Batra Reviewed-by: Brian King --- arch/powerpc/platforms/pseries/iommu.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 918f511837db..24dd61636400 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -363,6 +363,7 @@ struct dynamic_dma_window_prop { struct dma_win { struct device_node *device; const struct dynamic_dma_window_prop *prop; + booldirect; struct list_head list; }; @@ -1409,6 +1410,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_del_prop; if (direct_mapping) { + window->direct = true; + /* DDW maps the whole partition, so enable direct DMA mapping */ ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, win64->value, tce_setrange_multi_pSeriesLP_walk); @@ -1425,6 +1428,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) int i; unsigned long start = 0, end = 0; + window->direct = false; + for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; @@ -1587,8 +1592,10 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, case MEM_GOING_ONLINE: spin_lock(_win_list_lock); list_for_each_entry(window, _win_list, list) { - ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, - arg->nr_pages, window->prop); + if (window->direct) { + ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, + arg->nr_pages, window->prop); + } /* XXX log error */ } spin_unlock(_win_list_lock); @@ -1597,8 +1604,10 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, case MEM_OFFLINE: spin_lock(_win_list_lock); list_for_each_entry(window, _win_list, list) { - ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, - arg->nr_pages, window->prop); + if (window->direct) { + ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, + arg->nr_pages, window->prop); + } /* XXX log error */ } spin_unlock(_win_list_lock); -- 2.39.2 (Apple Git-143)
[PATCH 00/17] tool/perf/test: Fix shellcheck coding/formatting issues of test shell scripts
Patchset covers a set of fixes for coding/formatting issues observed while running shellcheck tool on the perf test shell scripts. Shellcheck is a static analysis tool that can find semantic/syntax bugs in the shell scripts. Patches 1-14 fixes the issues found with shellcheck. Patch 15, 16 and patch 17 address a fix in task_analyzer test. This cleanup is a pre-requisite to include a build option for shellcheck discussed here: https://www.spinics.net/lists/linux-perf-users/msg25553.html Also this is first set of patches. There will be one more set which will include build option for shellcheck as discussed in the mail thread. Abhirup Deb (2): tools/perf/tests: fix test_arm_spe.sh signal case issues perf/tests/shell: fix shellscript errors for lock_contention.sh Aboorva Devarajan (1): tools/perf/tests: Fix shellcheck issues in test_task_analyzer.sh file Aditya Gupta (3): perf tests task_analyzer: fix bad substitution ${$1} perf tests task_analyzer: print command on failure perf tests task_analyzer: skip tests if no libtraceevent support Akanksha J N (1): tools/perf/tests: Fix shellcheck warnings for trace+probe_vfs_getname.sh Anushree Mathur (1): perf/tests/shell : Shellcheck fixes for perf test "test_arm_coresight.sh" Barnali Guha Thakurata (1): tools/perf/tests/shell/stat_all_metrics: Fix shellcheck warning SC2076 in stat_all_metrics.sh Disha Goel (1): tools/perf/tests: fix shellcheck warning for stat+json_output Geetika (1): tools/perf/tests: Fix all POSIX sh warnings in perf shell test test_brstack.sh Korrapati Likhitha (1): tools/perf/tests: Fix shellcheck warnings for stat+csv_output Samir Mulani (1): tools/perf/tests: fixed shellcheck warnings for perf shell scripts Shirisha G (1): tools/perf/tests: fix shellcheck warnings for daemon.sh Sourabh Jain (1): perf: get rid of unused import Spoorthy S (2): shellcheck : fixing signal names and adding double quotes for expression in test_arm_callgraph_fp tools/perf/tests: Fix all POSIX sh warnings in stat+shadow_stat.sh .../scripts/python/arm-cs-trace-disasm.py | 1 - tools/perf/tests/shell/buildid.sh | 12 +- tools/perf/tests/shell/daemon.sh | 113 -- tools/perf/tests/shell/lock_contention.sh | 70 +-- .../shell/record+probe_libc_inet_pton.sh | 6 +- .../shell/record+script_probe_vfs_getname.sh | 4 +- tools/perf/tests/shell/stat+csv_output.sh | 4 +- tools/perf/tests/shell/stat+json_output.sh| 2 +- tools/perf/tests/shell/stat+shadow_stat.sh| 4 +- tools/perf/tests/shell/stat_all_metrics.sh| 6 +- .../perf/tests/shell/test_arm_callgraph_fp.sh | 6 +- tools/perf/tests/shell/test_arm_coresight.sh | 6 +- tools/perf/tests/shell/test_arm_spe.sh| 2 +- tools/perf/tests/shell/test_brstack.sh| 12 +- tools/perf/tests/shell/test_task_analyzer.sh | 98 --- .../tests/shell/trace+probe_vfs_getname.sh| 6 +- 16 files changed, 203 insertions(+), 149 deletions(-) -- 2.39.1
[PATCH 03/17] shellcheck : fixing signal names and adding double quotes for expression in test_arm_callgraph_fp
From: Spoorthy S Running shellcheck -S on test_arm_calligraph_fp throws warnings SC2086 and SC3049, $shellcheck -S warning tests/shell/test_arm_callgraph_fp.sh rm -f $PERF_DATA : Double quote to prevent globbing and word splitting. trap cleanup_files exit term int : In POSIX sh, using lower/mixed case for signal names is undefined. After fixing the warnings, $shellcheck tests/shell/test_arm_callgraph_fp.sh $ echo $? 0 To address the POSIX shell warnings added changes to convert Lowercase signal names to uppercase in the script and double quoted the command substitutions($fix to "$fix") to solve Globbing warnings. Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Spoorthy S --- tools/perf/tests/shell/test_arm_callgraph_fp.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh index e61d8deaa0c4..1380e0d12dce 100755 --- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -9,13 +9,13 @@ TEST_PROGRAM="perf test -w leafloop" cleanup_files() { - rm -f $PERF_DATA + rm -f "$PERF_DATA" } -trap cleanup_files exit term int +trap cleanup_files EXIT TERM INT # Add a 1 second delay to skip samples that are not in the leaf() function -perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null & +perf record -o "$PERF_DATA" --call-graph fp -e cycles//u -D 1000 --user-callchains -- "$TEST_PROGRAM" 2> /dev/null & PID=$! echo " + Recording (PID=$PID)..." -- 2.39.1
[PATCH 16/17] perf tests task_analyzer: print command on failure
From: Aditya Gupta Instead of printing "perf command failed" everytime, print the exact command that run earlier Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Aditya Gupta --- tools/perf/tests/shell/test_task_analyzer.sh | 24 ++-- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh index 84ab7e7f57d5..b094eeb3bf66 100755 --- a/tools/perf/tests/shell/test_task_analyzer.sh +++ b/tools/perf/tests/shell/test_task_analyzer.sh @@ -53,14 +53,14 @@ prepare_perf_data() { test_basic() { out="$tmpdir/perf.out" perf script report task-analyzer > "$out" - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer" find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}" } test_ns_rename(){ out="$tmpdir/perf.out" perf script report task-analyzer --ns --rename-comms-by-tids 0:random > "$out" - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --ns --rename-comms-by-tids 0:random" find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}" } @@ -68,7 +68,7 @@ test_ms_filtertasks_highlight(){ out="$tmpdir/perf.out" perf script report task-analyzer --ms --filter-tasks perf --highlight-tasks perf \ > "$out" - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --ms --filter-tasks perf --highlight-tasks perf" find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}" } @@ -76,61 +76,61 @@ test_extended_times_timelimit_limittasks() { out="$tmpdir/perf.out" perf script report task-analyzer --extended-times --time-limit :9 \ --limit-to-tasks perf > "$out" - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --extended-times --time-limit :9 --limit-to-tasks perf" find_str_or_fail "Out-Out" "$out" "${FUNCNAME[0]}" } test_summary() { out="$tmpdir/perf.out" perf script report task-analyzer --summary > "$out" - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --summary" find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}" } test_summaryextended() { out="$tmpdir/perf.out" perf script report task-analyzer --summary-extended > "$out" - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --summary-extended" find_str_or_fail "Inter Task Times" "$out" "${FUNCNAME[0]}" } test_summaryonly() { out="$tmpdir/perf.out" perf script report task-analyzer --summary-only > "$out" - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --summary-only" find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}" } test_extended_times_summary_ns() { out="$tmpdir/perf.out" perf script report task-analyzer --extended-times --summary --ns > "$out" - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --extended-times --summary --ns" find_str_or_fail "Out-Out" "$out" "${FUNCNAME[0]}" find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}" } test_csv() { perf script report task-analyzer --csv csv > /dev/null - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --csv csv" find_str_or_fail "Comm;" csv "${FUNCNAME[0]}" } test_csv_extended_times() { perf script report task-analyzer --csv csv --extended-times > /dev/null - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --csv csv --extended-times" find_str_or_fail "Out-Out;" csv "${FUNCNAME[0]}" } test_csvsummary() { perf script report task-analyzer --csv-summary csvsummary > /dev/null - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --csv-summary csvsummary" find_str_or_fail "Comm;" csvsummary "${FUNCNAME[0]}" } test_csvsummary_extended() { perf script report task-analyzer --csv-summary csvsummary --summary-extended \ >/dev/null - check_exec_0 "perf" + check_exec_0 "perf script report task-analyzer --csv-summary csvsummary --summary-extended" find_str_or_fail "Out-Out;" csvsummary "${FUNCNAME[0]}" } -- 2.39.1
[PATCH 12/17] tools/perf/tests: fixed shellcheck warnings for perf shell scripts
From: Samir Mulani Fixed the shellcheck warnings in buildid.sh, record+probe_libc_inet_pton.sh and record+script_probe_vfs_getname.sh perf shell scripts: 1. Prefer [ p ] && [ q ] as [ p -a q ] is not well defined. 2. Prefer [ p ] || [ q ] as [ p -o q ] is not well defined. 3. Used * argument to avoid the argument mixes string and array 4. Resolved issue for variable refernce, where the variable is being used before it has been initialized. 5. Resolved word splitting issue (syntax error). 6. The "err" variable has been removed from buildid.sh since it is not used anywhere in the code. Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Samir Mulani --- tools/perf/tests/shell/buildid.sh| 12 ++-- .../perf/tests/shell/record+probe_libc_inet_pton.sh | 6 +++--- .../tests/shell/record+script_probe_vfs_getname.sh | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh index 0ce22ea0a7f1..3383ca3399d4 100755 --- a/tools/perf/tests/shell/buildid.sh +++ b/tools/perf/tests/shell/buildid.sh @@ -83,12 +83,12 @@ check() # in case of pe-file.exe file echo $1 | grep ".exe" if [ $? -eq 0 ]; then - if [ -x $1 -a ! -x $file ]; then + if [ -x $1 ] && [ ! -x $file ]; then echo "failed: file ${file} executable does not exist" exit 1 fi - if [ ! -x $file -a ! -e $file ]; then + if [ ! -x $file ] && [ ! -e $file ]; then echo "failed: file ${file} does not exist" exit 1 fi @@ -136,10 +136,10 @@ test_record() log_err=$(mktemp /tmp/perf.log.err.XXX) perf="perf --buildid-dir ${build_id_dir}" - echo "running: perf record $@" - ${perf} record --buildid-all -o ${data} $@ 1>${log_out} 2>${log_err} + echo "running: perf record $*" + ${perf} record --buildid-all -o ${data} "$@" 1>${log_out} 2>${log_err} if [ $? -ne 0 ]; then - echo "failed: record $@" + echo "failed: record $*" echo "see log: ${log_err}" exit 1 fi @@ -172,4 +172,4 @@ if [ ${run_pe} -eq 1 ]; then rm -r ${wineprefix} fi -exit ${err} +exit 0 diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index bbb5b3d185fa..0934fb0cd68f 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -10,8 +10,8 @@ # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo , 2017 -. $(dirname $0)/lib/probe.sh -. $(dirname $0)/lib/probe_vfs_getname.sh +. "$(dirname "$0")/lib/probe.sh" +. "$(dirname "$0")/lib/probe_vfs_getname.sh" libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g') nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254 @@ -23,7 +23,7 @@ add_libc_inet_pton_event() { event_name=$(perf probe -f -x $libc -a inet_pton 2>&1 | tail -n +2 | head -n -5 | \ grep -P -o "$event_pattern(?=[[:space:]]\(on inet_pton in $libc\))") - if [ $? -ne 0 -o -z "$event_name" ] ; then + if [ $? -ne 0 ] || [ -z "$event_name" ] ; then printf "FAIL: could not add event\n" return 1 fi diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 1341437e1bd9..7f664f1889d9 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -9,11 +9,11 @@ # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo , 2017 -. $(dirname $0)/lib/probe.sh +. "$(dirname "$0")/lib/probe.sh" skip_if_no_perf_probe || exit 2 -. $(dirname $0)/lib/probe_vfs_getname.sh +. "$(dirname "$0")/lib/probe_vfs_getname.sh" record_open_file() { echo "Recording open file:" -- 2.39.1
[PATCH 08/17] tools/perf/tests/shell/stat_all_metrics: Fix shellcheck warning SC2076 in stat_all_metrics.sh
From: Barnali Guha Thakurata Fixed shellcheck warning SC2076 in stat_all_metrics.sh. Before the patch: shell$ shellcheck stat_all_metrics.sh In stat_all_metrics.sh line 9: if [[ "$result" =~ "${m:0:50}" ]] || [[ "$result" =~ "" ]] ^-^ SC2076: Don't quote right-hand side of =~, it'll match literally rather than as a regex. In stat_all_metrics.sh line 15: if [[ "$result" =~ "${m:0:50}" ]] ^-^ SC2076: Don't quote right-hand side of =~, it'll match literally rather than as a regex. In stat_all_metrics.sh line 22: if [[ "$result" =~ "${m:0:50}" ]] ^-^ SC2076: Don't quote right-hand side of =~, it'll match literally rather than as a regex. After the patch: shell$ shellcheck stat_all_metrics.sh shell$ Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Barnali Guha Thakurata --- tools/perf/tests/shell/stat_all_metrics.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh index 22e9cb294b40..54774525e18a 100755 --- a/tools/perf/tests/shell/stat_all_metrics.sh +++ b/tools/perf/tests/shell/stat_all_metrics.sh @@ -6,20 +6,20 @@ err=0 for m in $(perf list --raw-dump metrics); do echo "Testing $m" result=$(perf stat -M "$m" true 2>&1) - if [[ "$result" =~ "${m:0:50}" ]] || [[ "$result" =~ "" ]] + if [[ "$result" =~ ${m:0:50} ]] || [[ "$result" =~ "" ]] then continue fi # Failed so try system wide. result=$(perf stat -M "$m" -a sleep 0.01 2>&1) - if [[ "$result" =~ "${m:0:50}" ]] + if [[ "$result" =~ ${m:0:50} ]] then continue fi # Failed again, possibly the workload was too small so retry with something # longer. result=$(perf stat -M "$m" perf bench internals synthesize 2>&1) - if [[ "$result" =~ "${m:0:50}" ]] + if [[ "$result" =~ ${m:0:50} ]] then continue fi -- 2.39.1
[PATCH 05/17] tools/perf/tests: Fix shellcheck warnings for stat+csv_output
From: Korrapati Likhitha Running the shellcheck on stat+csv_output resulted in the following warning. Result with shellcheck without patch: = $ shellcheck -S warning stat+csv_output.sh In stat+csv_output.sh line 23: [ $(uname -m) = "s390x" ] && exp='^[6-7]$' ^-^ SC2046: Quote this to prevent word splitting. In stat+csv_output.sh line 51: [ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ] ^--^ SC2046: Quote this to prevent word splitting. ^-- SC2046: Quote this to prevent word splitting. = Fixed the warning SC2046 by adding quotes to prevent word splitting. Result with shellcheck with patch: = $ shellcheck -S warning tests/shell/stat+csv_output.sh $ ./perf test "stat CSV output linter" 96: perf stat CSV output linter : Ok = Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Korrapati Likhitha Signed-off-by: Sathvika Vasireddy --- tools/perf/tests/shell/stat+csv_output.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh index fb78b6251a4e..59607fb3cd98 100755 --- a/tools/perf/tests/shell/stat+csv_output.sh +++ b/tools/perf/tests/shell/stat+csv_output.sh @@ -35,7 +35,7 @@ function commachecker() ;; "--interval")exp=7 ;; "--per-thread") exp=7 ;; "--system-wide-no-aggr") exp=7 - [ $(uname -m) = "s390x" ] && exp='^[6-7]$' + [ "$(uname -m)" = "s390x" ] && exp='^[6-7]$' ;; "--per-core")exp=8 ;; "--per-socket") exp=8 ;; "--per-node")exp=8 @@ -65,7 +65,7 @@ function commachecker() # Return true if perf_event_paranoid is > $1 and not running as root. function ParanoidAndNotRoot() { -[ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ] +[ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] } check_no_args() -- 2.39.1
[PATCH 06/17] tools/perf/tests: Fix shellcheck warnings for trace+probe_vfs_getname.sh
From: Akanksha J N Fix the shellcheck warnings on powerpc and x86 for testcase trace+probe_vfs_getname.sh. Add quotes to prevent word splitting which are caused by unquoted command expansions. Before fix: $ shellcheck -S warning trace+probe_vfs_getname.sh In trace+probe_vfs_getname.sh line 13: . $(dirname $0)/lib/probe.sh ^---^ SC2046 (warning): Quote this to prevent word splitting. In trace+probe_vfs_getname.sh line 18: . $(dirname $0)/lib/probe_vfs_getname.sh ^---^ SC2046 (warning): Quote this to prevent word splitting. In trace+probe_vfs_getname.sh line 21: evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') ^-- SC2046 (warning): Quote this to prevent word splitting. 100: Check open filename arg using perf trace + vfs_getname : Ok After the fix: $ shellcheck -S warning trace+probe_vfs_getname.sh 100: Check open filename arg using perf trace + vfs_getname : Ok Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Akanksha J N Signed-off-by: Abhishek Singh Tomar Signed-off-by: Saket Signed-off-by: Avnish Chouhan --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 0a4bac3dd77e..935eac7efa47 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -10,15 +10,15 @@ # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo , 2017 -. $(dirname $0)/lib/probe.sh +. "$(dirname $0)"/lib/probe.sh skip_if_no_perf_probe || exit 2 skip_if_no_perf_trace || exit 2 -. $(dirname $0)/lib/probe_vfs_getname.sh +. "$(dirname $0)"/lib/probe_vfs_getname.sh trace_open_vfs_getname() { - evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + evts=$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed 's/ /,/') perf trace -e $evts touch $file 2>&1 | \ grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } -- 2.39.1
Re: [PATCH 0/3] Move the ARCH_DMA_MINALIGN definition to asm/cache.h
Le 13/06/2023 à 17:52, Catalin Marinas a écrit : > Hi, > > The ARCH_KMALLOC_MINALIGN reduction series defines a generic > ARCH_DMA_MINALIGN in linux/cache.h: > > https://lore.kernel.org/r/20230612153201.554742-2-catalin.mari...@arm.com/ > > Unfortunately, this causes a duplicate definition warning for > microblaze, powerpc (32-bit only) and sh as these architectures define > ARCH_DMA_MINALIGN in a different file than asm/cache.h. Move the macro > to asm/cache.h to avoid this issue and also bring them in line with the > other architectures. What about mips ? arch/mips/include/asm/mach-generic/kmalloc.h:#define ARCH_DMA_MINALIGN 128 arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 32 arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 128 arch/mips/include/asm/mach-n64/kmalloc.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES arch/mips/include/asm/mach-tx49xx/kmalloc.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES > > Andrew, if the arch maintainers cc'ed are fine with such change, could > you please take these three patches together with the > ARCH_KMALLOC_MINALIGN series? > > Thank you. > > Catalin Marinas (3): >powerpc: Move the ARCH_DMA_MINALIGN definition to asm/cache.h >microblaze: Move the ARCH_{DMA,SLAB}_MINALIGN definitions to > asm/cache.h >sh: Move the ARCH_DMA_MINALIGN definition to asm/cache.h > > arch/microblaze/include/asm/cache.h | 5 + > arch/microblaze/include/asm/page.h | 5 - > arch/powerpc/include/asm/cache.h| 4 > arch/powerpc/include/asm/page_32.h | 4 > arch/sh/include/asm/cache.h | 6 ++ > arch/sh/include/asm/page.h | 6 -- > 6 files changed, 15 insertions(+), 15 deletions(-) >
Re: linux-next: Tree for Jun 13 (drivers/net/ethernet/freescale/fs_enet/mii-fec.c)
On 6/12/23 23:59, Stephen Rothwell wrote: > Hi all, > > Changes since 20230609: > on PPC32: ../drivers/net/ethernet/freescale/fs_enet/mii-fec.c: In function 'fs_enet_mdio_probe': ../drivers/net/ethernet/freescale/fs_enet/mii-fec.c:130:50: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t' {aka 'long long unsigned int'} [-Wformat=] 130 | snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", res.start); | ~^ ~ | | | | | resource_size_t {aka long long unsigned int} | unsigned int | %llx -- ~Randy
[PATCH 2/3] microblaze: Move the ARCH_{DMA,SLAB}_MINALIGN definitions to asm/cache.h
The microblaze architecture defines ARCH_DMA_MINALIGN in asm/page.h. Move it to asm/cache.h to allow a generic ARCH_DMA_MINALIGN definition in linux/cache.h without redefine errors/warnings. While at it, also move ARCH_SLAB_MINALIGN to asm/cache.h for consistency. Signed-off-by: Catalin Marinas Cc: Michal Simek --- arch/microblaze/include/asm/cache.h | 5 + arch/microblaze/include/asm/page.h | 5 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/microblaze/include/asm/cache.h b/arch/microblaze/include/asm/cache.h index a149b3e711ec..1903988b9e23 100644 --- a/arch/microblaze/include/asm/cache.h +++ b/arch/microblaze/include/asm/cache.h @@ -18,4 +18,9 @@ #define SMP_CACHE_BYTESL1_CACHE_BYTES +/* MS be sure that SLAB allocates aligned objects */ +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES + +#define ARCH_SLAB_MINALIGN L1_CACHE_BYTES + #endif /* _ASM_MICROBLAZE_CACHE_H */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 7b9861bcd458..337f23eabc71 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -30,11 +30,6 @@ #ifndef __ASSEMBLY__ -/* MS be sure that SLAB allocates aligned objects */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES - -#define ARCH_SLAB_MINALIGN L1_CACHE_BYTES - /* * PAGE_OFFSET -- the first address of the first page of memory. With MMU * it is set to the kernel start address (aligned on a page boundary).
[PATCH 3/3] sh: Move the ARCH_DMA_MINALIGN definition to asm/cache.h
The sh architecture defines ARCH_DMA_MINALIGN in asm/page.h. Move it to asm/cache.h to allow a generic ARCH_DMA_MINALIGN definition in linux/cache.h without redefine errors/warnings. Signed-off-by: Catalin Marinas Cc: Yoshinori Sato Cc: Rich Felker Cc: John Paul Adrian Glaubitz Cc: linux...@vger.kernel.org --- arch/sh/include/asm/cache.h | 6 ++ arch/sh/include/asm/page.h | 6 -- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h index 32dfa6b82ec6..b38dbc975581 100644 --- a/arch/sh/include/asm/cache.h +++ b/arch/sh/include/asm/cache.h @@ -14,6 +14,12 @@ #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +/* + * Some drivers need to perform DMA into kmalloc'ed buffers + * and so we have to increase the kmalloc minalign for this. + */ +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES + #define __read_mostly __section(".data..read_mostly") #ifndef __ASSEMBLY__ diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index 09ac6c7faee0..62f4b9edcb98 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -174,10 +174,4 @@ typedef struct page *pgtable_t; #include #include -/* - * Some drivers need to perform DMA into kmalloc'ed buffers - * and so we have to increase the kmalloc minalign for this. - */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES - #endif /* __ASM_SH_PAGE_H */
[PATCH 1/3] powerpc: Move the ARCH_DMA_MINALIGN definition to asm/cache.h
The powerpc architecture defines ARCH_DMA_MINALIGN in asm/page_32.h and only if CONFIG_NOT_COHERENT_CACHE is enabled (32-bit platforms only). Move this macro to asm/cache.h to allow a generic ARCH_DMA_MINALIGN definition in linux/cache.h without redefine errors/warnings. Signed-off-by: Catalin Marinas Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202306131053.1ybvrrho-...@intel.com/ Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: linuxppc-dev@lists.ozlabs.org --- arch/powerpc/include/asm/cache.h | 4 arch/powerpc/include/asm/page_32.h | 4 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ae0a68a838e8..69232231d270 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -33,6 +33,10 @@ #define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT) +#ifdef CONFIG_NOT_COHERENT_CACHE +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#endif + #if !defined(__ASSEMBLY__) #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index 56f217606327..b9ac9e3a771c 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -12,10 +12,6 @@ #define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32 -#ifdef CONFIG_NOT_COHERENT_CACHE -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES -#endif - #if defined(CONFIG_PPC_256K_PAGES) || \ (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)) #define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2 - 2) /* 1/4 of a page */
[PATCH 0/3] Move the ARCH_DMA_MINALIGN definition to asm/cache.h
Hi, The ARCH_KMALLOC_MINALIGN reduction series defines a generic ARCH_DMA_MINALIGN in linux/cache.h: https://lore.kernel.org/r/20230612153201.554742-2-catalin.mari...@arm.com/ Unfortunately, this causes a duplicate definition warning for microblaze, powerpc (32-bit only) and sh as these architectures define ARCH_DMA_MINALIGN in a different file than asm/cache.h. Move the macro to asm/cache.h to avoid this issue and also bring them in line with the other architectures. Andrew, if the arch maintainers cc'ed are fine with such change, could you please take these three patches together with the ARCH_KMALLOC_MINALIGN series? Thank you. Catalin Marinas (3): powerpc: Move the ARCH_DMA_MINALIGN definition to asm/cache.h microblaze: Move the ARCH_{DMA,SLAB}_MINALIGN definitions to asm/cache.h sh: Move the ARCH_DMA_MINALIGN definition to asm/cache.h arch/microblaze/include/asm/cache.h | 5 + arch/microblaze/include/asm/page.h | 5 - arch/powerpc/include/asm/cache.h| 4 arch/powerpc/include/asm/page_32.h | 4 arch/sh/include/asm/cache.h | 6 ++ arch/sh/include/asm/page.h | 6 -- 6 files changed, 15 insertions(+), 15 deletions(-)
Re: linux-next: build failure after merge of the mm tree
Hi Stephen, On Tue, Jun 13, 2023 at 04:21:19PM +1000, Stephen Rothwell wrote: > After merging the mm tree, today's linux-next build (powerpc > ppc44x_defconfig) failed like this: > > In file included from arch/powerpc/include/asm/page.h:247, > from arch/powerpc/include/asm/thread_info.h:13, > from include/linux/thread_info.h:60, > from include/asm-generic/preempt.h:5, > from ./arch/powerpc/include/generated/asm/preempt.h:1, > from include/linux/preempt.h:78, > from include/linux/spinlock.h:56, > from include/linux/ipc.h:5, > from include/uapi/linux/sem.h:5, > from include/linux/sem.h:5, > from include/linux/compat.h:14, > from arch/powerpc/kernel/asm-offsets.c:12: > arch/powerpc/include/asm/page_32.h:16: warning: "ARCH_DMA_MINALIGN" redefined >16 | #define ARCH_DMA_MINALIGN L1_CACHE_BYTES > | > In file included from include/linux/time.h:5, > from include/linux/compat.h:10: > include/linux/cache.h:104: note: this is the location of the previous > definition > 104 | #define ARCH_DMA_MINALIGN __alignof__(unsigned long long) > | > > (lots of theses) > > Caused by commit > > cc7335787e73 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from > ARCH_DMA_MINALIGN") > > I have applied the following hack for today - we need something better. I just posted this series fixing it for powerpc, microblaze and sh. I did not add the #ifndef __powerpc64__ line since CONFIG_NOT_COHERENT_CACHE should not be enabled for those builds. https://lore.kernel.org/r/20230613155245.1228274-1-catalin.mari...@arm.com > From: Stephen Rothwell > Date: Tue, 13 Jun 2023 16:07:16 +1000 > Subject: [PATCH] fix up for "mm/slab: decouple ARCH_KMALLOC_MINALIGN from > ARCH_DMA_MINALIGN" > > Signed-off-by: Stephen Rothwell > --- > arch/powerpc/include/asm/cache.h | 9 + > 1 file changed, 9 insertions(+) > > diff --git a/arch/powerpc/include/asm/cache.h > b/arch/powerpc/include/asm/cache.h > index ae0a68a838e8..e9be1396dfd1 100644 > --- a/arch/powerpc/include/asm/cache.h > +++ b/arch/powerpc/include/asm/cache.h > @@ -142,5 +142,14 @@ static inline void iccci(void *addr) > } > > #endif /* !__ASSEMBLY__ */ > + > +#ifndef __powerpc64__ > +#ifdef CONFIG_NOT_COHERENT_CACHE > +#ifndef ARCH_DMA_MINALIGN > +#define ARCH_DMA_MINALIGNL1_CACHE_BYTES > +#endif > +#endif > +#endif > + > #endif /* __KERNEL__ */ > #endif /* _ASM_POWERPC_CACHE_H */ I think it should also remove the ARCH_DMA_MINALIGN from asm/page.h (as I did in my series; sorry I did not cc you, only noticed now that you reported it as well). -- Catalin
Re: [PATCH v14 00/15] phy: Add support for Lynx 10G SerDes
On Mon, Jun 12, 2023 at 04:46:16PM -0400, Sean Anderson wrote: > On 6/12/23 12:33, Vladimir Oltean wrote: > > On Mon, Jun 12, 2023 at 10:35:21AM -0400, Sean Anderson wrote: > >> > And if SERDES protocol switching was not physically possible, would this > >> > patch set still have value? > > More to the point, did you make any progress in this area? Aha. It is two email exchanges later that you've thought of asking that. No, I haven't made progress. Though I managed to get some extra pointers and things to try out, and at this stage I am the limiting factor. I will let you know once the investigation has reached a conclusion. So many things depend on whether major SERDES protocol reconfiguration is possible, that if I were you, my question would be just that, instead of the equivalent of "Vladimir, when can my driver be merged ???". I understand how incredibly frustrating it is that the responses are coming in so slowly, and this is the reason why I am responding at all to your pings even if I have essentially nothing new to say. And it's not that you don't care whether protocol reconfiguration is possible or not - you do, but at the same time you're overcome with this strong attachment to your solution just because it is yours, that I don't know how to deal with. > also does not work, as outlined above. > > The clocking is incompatible, and must be fixed by software. > > The only thing self-inflicted is NXP's conflicting design. The way things are supposed to work (*if* this works at all) is that the reset state machine starts with a supported PLL / refclk configuration that permits a certain subset of protocols, and the SERDES protocols can be changed from the reset state, as desired, for the individual lanes. What is self-inflicted is that the refclks on your board design are not supportable by any reset state machine configuration, and wiring them that way was a conscious decision. Did your company's board designers receive the recommendation to disconnect RESET_REQ from NXP, and has NXP said that the end result will be something that continues to be supportable? I've searched the customer support database and the answer seems to be no. In any case, if you have to disconnect RESET_REQ from the SoC to make the driver in this form useful, then... yeah, no. You're obviously free to do whatever you want with your products, but that's not how mainline Linux works, it needs to be useful beyond you. If protocol switching works, I will maintain that your board should have wired the refclks to PLLs the other way around (which is supported by the RCW), and then proceed to do protocol switching to reach the desired configuration. So you can see that many things change based on whether protocol reconfig is possible or not. The patch set presented here looks exactly like the product of someone infatuated with form over substance, and who misses the overall picture. My belief is that if you shift your attitude from "is the work done?" to "is the work generally useful?", the interactions will improve by a lot. > > Have you actually tested changing individual lanes from SGMII to SGMII 2.5? > > I know it works on LS1028A, but I don't know if that's also true on LS1046A. > > Your comments do say "LYNX_PROTO_SGMII25, /* Not tested */". > > Not yet. Aha. So concretely, what makes you so confident with moving this forward? You like the code to just be there, no? It helps your board, I understand. But not everything has to stay in mainline if it's not useful for others, and I've proposed an alternative solution which I'm not sure how seriously you took. > This driver would also be a good place to add the KR link training with > NXP tried to upstream a few years ago. Well, speaking as someone who is now also tasked with the copper backplane support, believe me that I know that, and this is why I'm so desperate with the logic you're trying to push forward. It's clear that we should try to collaborate rather than try to push individualistic non-tested non-solutions. > > Assuming a start from SERDES protocol with PLL mapping , > > this protocol change implies powering on PLL 1 (reset state machine > > wants it to be disabled) and moving those lanes from PLL 2 to PLL 1. > > > > At first sight you might appear to have a point related to the fact that > > PLL register writes are necessary, and thus this whole shebang is necessary. > > But this can all be done using PBI commands, with the added benefit that > > U-Boot can also use those SERDES networking ports, and not just Linux. > > You can use the RCW+PBL specific to your board to inform the SoC that > > your platform's refclk 1 is 156 MHz (something which the reset state > > machine seems unable to learn, with protocol 0x). You don't have to > > put that in the device tree. You don't have to push code to any open > > source project to expose your platform specific details. Then, just like > > in the case of the Lynx 28G driver on LX2160,
Re: [PATCH 4/4] powerpc/64s: Use POWER10 stsync barrier for wmb()
Nicholas Piggin writes: > The most expensive ordering for hwsync to provide is the store-load > barrier, because all prior stores have to be drained to the caches > before subsequent instructions can complete. > > stsync just orders stores which means it can just be a barrer that > goes down the store queue and orders draining, and does not prevent > completion of subsequent instructions. So it should be faster than > hwsync. > > Use stsync for wmb(). Older processors that don't recognise the SC > field should treat this as hwsync. qemu (7.1) emulating ppc64e does not :/ mpic: Setting up MPIC " OpenPIC " version 1.2 at fe004, max 1 CPUs mpic: ISU size: 256, shift: 8, mask: ff mpic: Initializing for 256 sources Oops: Exception in kernel mode, sig: 4 [#1] No more output. (qemu) info registers │ NIP c0df4264 LR c00ce49c CTR XER 2000 CPU#0 │ MSR 80001000 HID0 HF 24020006 iidx 1 didx 1 │ ... SRR0 c00ce7c4 SRR1 80081000PVR 80240020 VRSAVE $ objdump -d vmlinux | grep c00ce7c4 c00ce7c4: 7c 03 04 ac stsync That's qemu -M ppce500 -cpu e5500 or e6500. I guess just put it behind an #ifdef 64S. cheers
Re: [PATCH] macintosh: Switch i2c drivers back to use .probe()
Uwe Kleine-König writes: > On Tue, May 23, 2023 at 09:50:53PM +0200, Uwe Kleine-König wrote: >> After commit b8a1a4cd5a98 ("i2c: Provide a temporary .probe_new() >> call-back type"), all drivers being converted to .probe_new() and then >> 03c835f498b5 ("i2c: Switch .probe() to not take an id parameter") convert >> back to (the new) .probe() to be able to eventually drop .probe_new() from >> struct i2c_driver. >> >> Signed-off-by: Uwe Kleine-König >> --- >> Hello, >> >> this patch was generated using coccinelle, but I aligned the result to >> the per-file indention. >> >> I chose to convert all drivers below drivers/macintosh in a single >> patch, but if you prefer I can split by driver. >> >> v6.4-rc1 was taken as a base, as there are no commits in next touching >> drivers/macintosh I don't expect problems when applying this patch. If >> conflicts arise until this is applied, feel free to just drop the files >> with conflicts from this patch. I'll care about the fallout later then. >> >> Also note there is no coordination necessary with the i2c tree. Dropping >> .probe_new() will happen only when all (or most) drivers are converted, >> which will happen after v6.5-rc1 for sure. > > Can someone still pick up this patch for v6.5-rc1? Yes, I will. cheers
Re: [PATCH v2 15/23] s390: allow pte_offset_map_lock() to fail
On Thu, 8 Jun 2023 12:27:22 -0700 (PDT) Hugh Dickins wrote: > In rare transient cases, not yet made possible, pte_offset_map() and > pte_offset_map_lock() may not find a page table: handle appropriately. > > Add comment on mm's contract with s390 above __zap_zero_pages(), > and fix old comment there: must be called after THP was disabled. > > Signed-off-by: Hugh Dickins Acked-by: Claudio Imbrenda > --- > arch/s390/kernel/uv.c | 2 ++ > arch/s390/mm/gmap.c| 9 - > arch/s390/mm/pgtable.c | 12 +--- > 3 files changed, 19 insertions(+), 4 deletions(-) > > diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c > index cb2ee06df286..3c62d1b218b1 100644 > --- a/arch/s390/kernel/uv.c > +++ b/arch/s390/kernel/uv.c > @@ -294,6 +294,8 @@ int gmap_make_secure(struct gmap *gmap, unsigned long > gaddr, void *uvcb) > > rc = -ENXIO; > ptep = get_locked_pte(gmap->mm, uaddr, ); > + if (!ptep) > + goto out; > if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && > pte_write(*ptep)) { > page = pte_page(*ptep); > rc = -EAGAIN; > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > index dc90d1eb0d55..3a2a31a15ea8 100644 > --- a/arch/s390/mm/gmap.c > +++ b/arch/s390/mm/gmap.c > @@ -2537,7 +2537,12 @@ static inline void thp_split_mm(struct mm_struct *mm) > * Remove all empty zero pages from the mapping for lazy refaulting > * - This must be called after mm->context.has_pgste is set, to avoid > * future creation of zero pages > - * - This must be called after THP was enabled > + * - This must be called after THP was disabled. > + * > + * mm contracts with s390, that even if mm were to remove a page table, > + * racing with the loop below and so causing pte_offset_map_lock() to fail, > + * it will never insert a page table containing empty zero pages once > + * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set. > */ > static int __zap_zero_pages(pmd_t *pmd, unsigned long start, > unsigned long end, struct mm_walk *walk) > @@ -2549,6 +2554,8 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long > start, > spinlock_t *ptl; > > ptep = pte_offset_map_lock(walk->mm, pmd, addr, ); > + if (!ptep) > + break; > if (is_zero_pfn(pte_pfn(*ptep))) > ptep_xchg_direct(walk->mm, addr, ptep, > __pte(_PAGE_INVALID)); > pte_unmap_unlock(ptep, ptl); > diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c > index 6effb24de6d9..3bd2ab2a9a34 100644 > --- a/arch/s390/mm/pgtable.c > +++ b/arch/s390/mm/pgtable.c > @@ -829,7 +829,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned > long addr, > default: > return -EFAULT; > } > - > +again: > ptl = pmd_lock(mm, pmdp); > if (!pmd_present(*pmdp)) { > spin_unlock(ptl); > @@ -850,6 +850,8 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned > long addr, > spin_unlock(ptl); > > ptep = pte_offset_map_lock(mm, pmdp, addr, ); > + if (!ptep) > + goto again; > new = old = pgste_get_lock(ptep); > pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | > PGSTE_ACC_BITS | PGSTE_FP_BIT); > @@ -938,7 +940,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, > unsigned long addr) > default: > return -EFAULT; > } > - > +again: > ptl = pmd_lock(mm, pmdp); > if (!pmd_present(*pmdp)) { > spin_unlock(ptl); > @@ -955,6 +957,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, > unsigned long addr) > spin_unlock(ptl); > > ptep = pte_offset_map_lock(mm, pmdp, addr, ); > + if (!ptep) > + goto again; > new = old = pgste_get_lock(ptep); > /* Reset guest reference bit only */ > pgste_val(new) &= ~PGSTE_GR_BIT; > @@ -1000,7 +1004,7 @@ int get_guest_storage_key(struct mm_struct *mm, > unsigned long addr, > default: > return -EFAULT; > } > - > +again: > ptl = pmd_lock(mm, pmdp); > if (!pmd_present(*pmdp)) { > spin_unlock(ptl); > @@ -1017,6 +1021,8 @@ int get_guest_storage_key(struct mm_struct *mm, > unsigned long addr, > spin_unlock(ptl); > > ptep = pte_offset_map_lock(mm, pmdp, addr, ); > + if (!ptep) > + goto again; > pgste = pgste_get_lock(ptep); > *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; > paddr = pte_val(*ptep) & PAGE_MASK;
Re: [PATCH v9 01/42] mm: Rename arch pte_mkwrite()'s to pte_mkwrite_novma()
On Tue, Jun 13, 2023 at 2:13 AM Rick Edgecombe wrote: > The x86 Shadow stack feature includes a new type of memory called shadow > stack. This shadow stack memory has some unusual properties, which requires > some core mm changes to function properly. > > One of these unusual properties is that shadow stack memory is writable, > but only in limited ways. These limits are applied via a specific PTE > bit combination. Nevertheless, the memory is writable, and core mm code > will need to apply the writable permissions in the typical paths that > call pte_mkwrite(). Future patches will make pte_mkwrite() take a VMA, so > that the x86 implementation of it can know whether to create regular > writable memory or shadow stack memory. > > But there are a couple of challenges to this. Modifying the signatures of > each arch pte_mkwrite() implementation would be error prone because some > are generated with macros and would need to be re-implemented. Also, some > pte_mkwrite() callers operate on kernel memory without a VMA. > > So this can be done in a three step process. First pte_mkwrite() can be > renamed to pte_mkwrite_novma() in each arch, with a generic pte_mkwrite() > added that just calls pte_mkwrite_novma(). Next callers without a VMA can > be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all callers > can be changed to take/pass a VMA. > > Start the process by renaming pte_mkwrite() to pte_mkwrite_novma() and > adding the pte_mkwrite() wrapper in linux/pgtable.h. Apply the same > pattern for pmd_mkwrite(). Since not all archs have a pmd_mkwrite_novma(), > create a new arch config HAS_HUGE_PAGE that can be used to tell if > pmd_mkwrite() should be defined. Otherwise in the !HAS_HUGE_PAGE cases the > compiler would not be able to find pmd_mkwrite_novma(). > > No functional change. > > Cc: linux-...@vger.kernel.org > Cc: linux-ker...@vger.kernel.org > Cc: linux-al...@vger.kernel.org > Cc: linux-snps-...@lists.infradead.org > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-c...@vger.kernel.org > Cc: linux-hexa...@vger.kernel.org > Cc: linux-i...@vger.kernel.org > Cc: loonga...@lists.linux.dev > Cc: linux-m...@lists.linux-m68k.org > Cc: Michal Simek > Cc: Dinh Nguyen > Cc: linux-m...@vger.kernel.org > Cc: openr...@lists.librecores.org > Cc: linux-par...@vger.kernel.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-ri...@lists.infradead.org > Cc: linux-s...@vger.kernel.org > Cc: linux...@vger.kernel.org > Cc: sparcli...@vger.kernel.org > Cc: linux...@lists.infradead.org > Cc: linux-a...@vger.kernel.org > Cc: linux...@kvack.org > Suggested-by: Linus Torvalds > Signed-off-by: Rick Edgecombe > Link: > https://lore.kernel.org/lkml/CAHk-=wizjsu7c9sfyzb3q04108stghff2wfbokgccgw7riz...@mail.gmail.com/ > arch/m68k/include/asm/mcf_pgtable.h | 2 +- > arch/m68k/include/asm/motorola_pgtable.h | 2 +- > arch/m68k/include/asm/sun3_pgtable.h | 2 +- Acked-by: Geert Uytterhoeven Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Re: [PATCH v4 25/34] m68k: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 11:05 PM Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Some of the functions use the *get*page*() helper functions. Convert > these to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Geert Uytterhoeven Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Re: [PATCH] powerpc: Switch i2c drivers back to use .probe()
Hello, On Thu, May 25, 2023 at 10:56:22PM +0200, Uwe Kleine-König wrote: > After commit b8a1a4cd5a98 ("i2c: Provide a temporary .probe_new() > call-back type"), all drivers being converted to .probe_new() and then > 03c835f498b5 ("i2c: Switch .probe() to not take an id parameter") > convert back to (the new) .probe() to be able to eventually drop > .probe_new() from struct i2c_driver. > > Signed-off-by: Uwe Kleine-König Is there a chance to get this patch in before v6.5-rc1? I intend to complete the conversion (i.e. drop .probe_new() from struct i2c_driver) after v6.5-rc1. If this patch doesn't make it in, I'd include it in a pull request for Wolfram's i2c tree early after v6.5-rc1. So if you don't take it any more for the upcoming merge window, an ack would be great. Best regards Uwe -- Pengutronix e.K. | Uwe Kleine-König| Industrial Linux Solutions | https://www.pengutronix.de/ | signature.asc Description: PGP signature
Re: [PATCH] macintosh: Switch i2c drivers back to use .probe()
Hello, On Tue, May 23, 2023 at 09:50:53PM +0200, Uwe Kleine-König wrote: > After commit b8a1a4cd5a98 ("i2c: Provide a temporary .probe_new() > call-back type"), all drivers being converted to .probe_new() and then > 03c835f498b5 ("i2c: Switch .probe() to not take an id parameter") convert > back to (the new) .probe() to be able to eventually drop .probe_new() from > struct i2c_driver. > > Signed-off-by: Uwe Kleine-König > --- > Hello, > > this patch was generated using coccinelle, but I aligned the result to > the per-file indention. > > I chose to convert all drivers below drivers/macintosh in a single > patch, but if you prefer I can split by driver. > > v6.4-rc1 was taken as a base, as there are no commits in next touching > drivers/macintosh I don't expect problems when applying this patch. If > conflicts arise until this is applied, feel free to just drop the files > with conflicts from this patch. I'll care about the fallout later then. > > Also note there is no coordination necessary with the i2c tree. Dropping > .probe_new() will happen only when all (or most) drivers are converted, > which will happen after v6.5-rc1 for sure. Can someone still pick up this patch for v6.5-rc1? I intend to send a pull request to Wolfram's i2c-tree that drops .probe_new() from struct i2c_driver on top of v6.5-rc1 early after the merge window closes. So getting this in before would be great. Otherwise I'm happily adding received acks to this patch for my PR :-) Best regards Uwe -- Pengutronix e.K. | Uwe Kleine-König| Industrial Linux Solutions | https://www.pengutronix.de/ | signature.asc Description: PGP signature
linux-next: build failure after merge of the mm tree
Hi all, After merging the mm tree, today's linux-next build (powerpc ppc44x_defconfig) failed like this: In file included from arch/powerpc/include/asm/page.h:247, from arch/powerpc/include/asm/thread_info.h:13, from include/linux/thread_info.h:60, from include/asm-generic/preempt.h:5, from ./arch/powerpc/include/generated/asm/preempt.h:1, from include/linux/preempt.h:78, from include/linux/spinlock.h:56, from include/linux/ipc.h:5, from include/uapi/linux/sem.h:5, from include/linux/sem.h:5, from include/linux/compat.h:14, from arch/powerpc/kernel/asm-offsets.c:12: arch/powerpc/include/asm/page_32.h:16: warning: "ARCH_DMA_MINALIGN" redefined 16 | #define ARCH_DMA_MINALIGN L1_CACHE_BYTES | In file included from include/linux/time.h:5, from include/linux/compat.h:10: include/linux/cache.h:104: note: this is the location of the previous definition 104 | #define ARCH_DMA_MINALIGN __alignof__(unsigned long long) | (lots of theses) Caused by commit cc7335787e73 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN") I have applied the following hack for today - we need something better. From: Stephen Rothwell Date: Tue, 13 Jun 2023 16:07:16 +1000 Subject: [PATCH] fix up for "mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN" Signed-off-by: Stephen Rothwell --- arch/powerpc/include/asm/cache.h | 9 + 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ae0a68a838e8..e9be1396dfd1 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -142,5 +142,14 @@ static inline void iccci(void *addr) } #endif /* !__ASSEMBLY__ */ + +#ifndef __powerpc64__ +#ifdef CONFIG_NOT_COHERENT_CACHE +#ifndef ARCH_DMA_MINALIGN +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#endif +#endif +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_CACHE_H */ -- 2.39.2 -- Cheers, Stephen Rothwell pgpUGg_Z3p4ax.pgp Description: OpenPGP digital signature
Re: [PATCH] powerpc/xmon: Fix comparing pointer
Le 13/06/2023 à 05:38, wuyonggang...@208suo.com a écrit : > > > Fix the following coccicheck warning: > > arch/powerpc/xmon/spu-dis.c:51:34-35: WARNING comparing pointer to 0 Please send text messages, not html. And please check your patch, a lot of lines have no modifications but appears as modified in your patch because you changed the formating. Please don't change more than what is intended to change. Thanks > > Signed-off-by: Yonggang Wu > --- > arch/powerpc/xmon/spu-dis.c | 384 ++-- > 1 file changed, 193 insertions(+), 191 deletions(-) > > diff --git a/arch/powerpc/xmon/spu-dis.c b/arch/powerpc/xmon/spu-dis.c > index 4b0a4e640f08..f48a2ddd7440 100644 > --- a/arch/powerpc/xmon/spu-dis.c > +++ b/arch/powerpc/xmon/spu-dis.c > @@ -22,216 +22,218 @@ extern const int spu_num_opcodes; > #define SPU_DISASM_TBL_SIZE (1 << 11) > static const struct spu_opcode > *spu_disassemble_table[SPU_DISASM_TBL_SIZE]; > > -static void > -init_spu_disassemble (void) > +static void init_spu_disassemble(void) > { > - int i; > - > - /* If two instructions have the same opcode then we prefer the first > - * one. In most cases it is just an alternate mnemonic. */ > - for (i = 0; i < spu_num_opcodes; i++) > - { > - int o = spu_opcodes[i].opcode; > - if (o >= SPU_DISASM_TBL_SIZE) > - continue; /* abort (); */ > - if (spu_disassemble_table[o] == 0) > - spu_disassemble_table[o] = _opcodes[i]; > - } > + int i; > + > + /* > + * If two instructions have the same opcode then we prefer the first > + * one. In most cases it is just an alternate mnemonic. > + */ > + for (i = 0; i < spu_num_opcodes; i++) { > + int o = spu_opcodes[i].opcode; > + > + if (o >= SPU_DISASM_TBL_SIZE) > + continue; /* abort(); */ > + if (spu_disassemble_table[o] == NULL) > + spu_disassemble_table[o] = _opcodes[i]; > + } > } > > /* Determine the instruction from the 10 least significant bits. */ > -static const struct spu_opcode * > -get_index_for_opcode (unsigned int insn) > +static const struct spu_opcode *get_index_for_opcode(unsigned int insn) > { > - const struct spu_opcode *index; > - unsigned int opcode = insn >> (32-11); > - > - /* Init the table. This assumes that element 0/opcode 0 (currently > - * NOP) is always used */ > - if (spu_disassemble_table[0] == 0) > - init_spu_disassemble (); > - > - if ((index = spu_disassemble_table[opcode & 0x780]) != 0 > - && index->insn_type == RRR) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7f0]) != 0 > - && (index->insn_type == RI18 || index->insn_type == LBT)) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7f8]) != 0 > - && index->insn_type == RI10) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7fc]) != 0 > - && (index->insn_type == RI16)) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7fe]) != 0 > - && (index->insn_type == RI8)) > - return index; > - > - if ((index = spu_disassemble_table[opcode & 0x7ff]) != 0) > - return index; > - > - return NULL; > + const struct spu_opcode *index; > + unsigned int opcode = insn >> (32-11); > + > + /* > + * Init the table. This assumes that element 0/opcode 0 (currently > + * NOP) is always used > + */ > + if (spu_disassemble_table[0] == NULL) > + init_spu_disassemble(); > + > + index = spu_disassemble_table[opcode & 0x780]; > + if (index != NULL && index->insn_type == RRR) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7f0]; > + if (index != NULL > + && (index->insn_type == RI18 || index->insn_type == LBT)) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7f8]; > + if (index != NULL > + && index->insn_type == RI10) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7fc] > + if (index != NULL && (index->insn_type == RI16)) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7fe]; > + if (index != NULL && (index->insn_type == RI8)) > + return index; > + > + index = spu_disassemble_table[opcode & 0x7ff]; > + if (index != NULL) > + return index; > + > + return NULL; > } > > /* Print a Spu instruction. */ > > -int > -print_insn_spu (unsigned long insn, unsigned long memaddr) > +int print_insn_spu(unsigned long insn, unsigned long memaddr) > { > - int value; > - int hex_value; > - const struct spu_opcode *index; > - enum spu_insns tag; > + int value; > + int hex_value; > + const struct spu_opcode *index; > + enum spu_insns tag; > > - index = get_index_for_opcode (insn); > + index = get_index_for_opcode(insn); > > - if (index == 0) > - { > - printf(".long 0x%lx", insn); > - } > - else > - { > - int i; > - int