[PATCH] powerpc/powernv: Fix no return statement issue in __opal_call_trace()
Commit 75d9fc7fd94e ("powerpc/powernv: move OPAL call wrapper tracing and interrupt handling to C") adds the function: static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3, s64 a4, s64 a5, s64 a6, s64 a7, unsigned long opcode, unsigned long msr) { } However, that causes the following compilation error: $ make skiroot_defconfig $ make -j$(nproc) ... arch/powerpc/platforms/powernv/opal-call.c: In function ‘__opal_call_trace’: arch/powerpc/platforms/powernv/opal-call.c:89:1: error: no return statement in function returning non-void [-Werror=return-type] This patch fixes the issue by returning zero as suggested by the author of the commit. Fixes: 75d9fc7fd94e ("powerpc/powernv: move OPAL call wrapper tracing and interrupt handling to C") Signed-off-by: Claudio Carvalho CC: Nicholas Piggin --- arch/powerpc/platforms/powernv/opal-call.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index 578757d..daad8c4 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -86,6 +86,7 @@ static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3, s64 a4, s64 a5, s64 a6, s64 a7, unsigned long opcode, unsigned long msr) { + return 0; } #define DO_TRACE false -- 2.7.4
[PATCH 19/20] KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size
From: Nicholas Piggin THP paths can defer splitting compound pages until after the actual remap and TLB flushes to split a huge PMD/PUD. This causes radix partition scope page table mappings to get out of synch with the host qemu page table mappings. This results in random memory corruption in the guest when running with THP. The easiest way to reproduce is use KVM balloon to free up a lot of memory in the guest and then shrink the balloon to give the memory back, while some work is being done in the guest. Cc: David Gibson Cc: "Aneesh Kumar K.V" Cc: kvm-...@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 91 +++--- 1 file changed, 37 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 7efc42538ccf..ae023d2256ef 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -538,8 +538,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; - unsigned long mmu_seq, pte_size; - unsigned long gpa, gfn, hva, pfn; + unsigned long mmu_seq; + unsigned long gpa, gfn, hva; struct kvm_memory_slot *memslot; struct page *page = NULL; long ret; @@ -636,9 +636,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ hva = gfn_to_hva_memslot(memslot, gfn); if (upgrade_p && __get_user_pages_fast(hva, 1, 1, ) == 1) { - pfn = page_to_pfn(page); upgrade_write = true; } else { + unsigned long pfn; + /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, writing, upgrade_p); @@ -652,63 +653,45 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - /* See if we can insert a 1GB or 2MB large PTE here */ - level = 0; - if (page && PageCompound(page)) { - pte_size = PAGE_SIZE << compound_order(compound_head(page)); - if (pte_size >= PUD_SIZE && - (gpa & (PUD_SIZE - PAGE_SIZE)) == - (hva & (PUD_SIZE - PAGE_SIZE))) { - level = 2; - pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1); - } else if (pte_size >= PMD_SIZE && - (gpa & (PMD_SIZE - PAGE_SIZE)) == - (hva & (PMD_SIZE - PAGE_SIZE))) { - level = 1; - pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); - } - } - /* -* Compute the PTE value that we need to insert. +* Read the PTE from the process' radix tree and use that +* so we get the shift and attribute bits. */ - if (page) { - pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE | - _PAGE_ACCESSED; - if (writing || upgrade_write) - pgflags |= _PAGE_WRITE | _PAGE_DIRTY; - pte = pfn_pte(pfn, __pgprot(pgflags)); + local_irq_disable(); + ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, ); + pte = *ptep; + local_irq_enable(); + + /* Get pte level from shift/size */ + if (shift == PUD_SHIFT && + (gpa & (PUD_SIZE - PAGE_SIZE)) == + (hva & (PUD_SIZE - PAGE_SIZE))) { + level = 2; + } else if (shift == PMD_SHIFT && + (gpa & (PMD_SIZE - PAGE_SIZE)) == + (hva & (PMD_SIZE - PAGE_SIZE))) { + level = 1; } else { - /* -* Read the PTE from the process' radix tree and use that -* so we get the attribute bits. -*/ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, ); - pte = *ptep; - local_irq_enable(); - if (shift == PUD_SHIFT && - (gpa & (PUD_SIZE - PAGE_SIZE)) == - (hva & (PUD_SIZE - PAGE_SIZE))) { - level = 2; - } else if (shift == PMD_SHIFT && - (gpa & (PMD_SIZE - PAGE_SIZE)) == - (hva & (PMD_SIZE - PAGE_SIZE))) { - level = 1; - } else if (shift && shift != PAGE_SHIFT) { - /* Adjust PFN */ - unsigned long mask = (1ul << shift) - PAGE_SIZE; - pte = __pte(pte_val(pte) | (hva & mask)); - } - pte =
Re: [PATCH v6 4/4] hugetlb: allow to free gigantic pages regardless of the configuration
On 3/7/19 5:20 AM, Alexandre Ghiti wrote: > On systems without CONTIG_ALLOC activated but that support gigantic pages, > boottime reserved gigantic pages can not be freed at all. This patch > simply enables the possibility to hand back those pages to memory > allocator. > > Signed-off-by: Alexandre Ghiti > Acked-by: David S. Miller [sparc] Reviewed-by: Mike Kravetz -- Mike Kravetz
Re: [PATCH] powerpc/6xx: fix setup and use of SPRN_PGDIR for hash32
Le 08/03/2019 à 17:03, Segher Boessenkool a écrit : On Fri, Mar 08, 2019 at 07:05:22AM +, Christophe Leroy wrote: Not only the 603 but all 6xx need SPRN_PGDIR to be initialised at startup. This patch move it from __setup_cpu_603() to start_here() and __secondary_start(), close to the initialisation of SPRN_THREAD. I thought you meant an SPR I did not know about. But you just misspelled SPRN_SPRG_PGDIR :-) Oops. Michael, can you fix the commit text (and subject) when applying ? Thanks Christophe
Re: [PATCH] powerpc/6xx: fix setup and use of SPRN_PGDIR for hash32
On Fri, Mar 08, 2019 at 07:05:22AM +, Christophe Leroy wrote: > Not only the 603 but all 6xx need SPRN_PGDIR to be initialised at > startup. This patch move it from __setup_cpu_603() to start_here() > and __secondary_start(), close to the initialisation of SPRN_THREAD. I thought you meant an SPR I did not know about. But you just misspelled SPRN_SPRG_PGDIR :-) Segher
Re: [PATCH] pseries/energy: Use OF accessor functions to read ibm,drc-indexes
* Gautham R Shenoy [2019-03-08 21:03:24]: > From: "Gautham R. Shenoy" > > In cpu_to_drc_index() in the case when FW_FEATURE_DRC_INFO is absent, > we currently use of_read_property() to obtain the pointer to the array > corresponding to the property "ibm,drc-indexes". The elements of this > array are of type __be32, but are accessed without any conversion to > the OS-endianness, which is buggy on a Little Endian OS. > > Fix this by using of_property_read_u32_index() accessor function to > safely read the elements of the array. > > Fixes: commit e83636ac3334 ("pseries/drc-info: Search DRC properties for CPU > indexes") > Cc: #v4.16+ > Reported-by: Pavithra R. Prakash > Signed-off-by: Gautham R. Shenoy Reviewed-by: Vaidyanathan Srinivasan > --- > arch/powerpc/platforms/pseries/pseries_energy.c | 27 > - > 1 file changed, 18 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c > b/arch/powerpc/platforms/pseries/pseries_energy.c > index 6ed2212..1c4d1ba 100644 > --- a/arch/powerpc/platforms/pseries/pseries_energy.c > +++ b/arch/powerpc/platforms/pseries/pseries_energy.c > @@ -77,18 +77,27 @@ static u32 cpu_to_drc_index(int cpu) > > ret = drc.drc_index_start + (thread_index * drc.sequential_inc); > } else { > - const __be32 *indexes; > - > - indexes = of_get_property(dn, "ibm,drc-indexes", NULL); > - if (indexes == NULL) > - goto err_of_node_put; > + u32 nr_drc_indexes, thread_drc_index; > > /* > - * The first element indexes[0] is the number of drc_indexes > - * returned in the list. Hence thread_index+1 will get the > - * drc_index corresponding to core number thread_index. > + * The first element of ibm,drc-indexes array is the > + * number of drc_indexes returned in the list. Hence > + * thread_index+1 will get the drc_index corresponding > + * to core number thread_index. >*/ > - ret = indexes[thread_index + 1]; > + rc = of_property_read_u32_index(dn, "ibm,drc-indexes", > + 0, _drc_indexes); > + if (rc) > + goto err_of_node_put; > + > + WARN_ON(thread_index > nr_drc_indexes); > + rc = of_property_read_u32_index(dn, "ibm,drc-indexes", > + thread_index + 1, > + _drc_index); > + if (rc) > + goto err_of_node_put; > + > + ret = thread_drc_index; Oops! Good bugfix. We should use device tree accessors like this in all places for correct and safe code. Thanks! --Vaidy
[PATCH] pseries/energy: Use OF accessor functions to read ibm, drc-indexes
From: "Gautham R. Shenoy" In cpu_to_drc_index() in the case when FW_FEATURE_DRC_INFO is absent, we currently use of_read_property() to obtain the pointer to the array corresponding to the property "ibm,drc-indexes". The elements of this array are of type __be32, but are accessed without any conversion to the OS-endianness, which is buggy on a Little Endian OS. Fix this by using of_property_read_u32_index() accessor function to safely read the elements of the array. Fixes: commit e83636ac3334 ("pseries/drc-info: Search DRC properties for CPU indexes") Cc: #v4.16+ Reported-by: Pavithra R. Prakash Signed-off-by: Gautham R. Shenoy --- arch/powerpc/platforms/pseries/pseries_energy.c | 27 - 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index 6ed2212..1c4d1ba 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -77,18 +77,27 @@ static u32 cpu_to_drc_index(int cpu) ret = drc.drc_index_start + (thread_index * drc.sequential_inc); } else { - const __be32 *indexes; - - indexes = of_get_property(dn, "ibm,drc-indexes", NULL); - if (indexes == NULL) - goto err_of_node_put; + u32 nr_drc_indexes, thread_drc_index; /* -* The first element indexes[0] is the number of drc_indexes -* returned in the list. Hence thread_index+1 will get the -* drc_index corresponding to core number thread_index. +* The first element of ibm,drc-indexes array is the +* number of drc_indexes returned in the list. Hence +* thread_index+1 will get the drc_index corresponding +* to core number thread_index. */ - ret = indexes[thread_index + 1]; + rc = of_property_read_u32_index(dn, "ibm,drc-indexes", + 0, _drc_indexes); + if (rc) + goto err_of_node_put; + + WARN_ON(thread_index > nr_drc_indexes); + rc = of_property_read_u32_index(dn, "ibm,drc-indexes", + thread_index + 1, + _drc_index); + if (rc) + goto err_of_node_put; + + ret = thread_drc_index; } rc = 0; -- 1.9.4
Re: [PATCH v2] powerpc/mm: move warning from resize_hpt_for_hotplug()
I forgot the version change note: v2: add warning messages for H_PARAMETER and H_RESOURCE Thanks, Laurent On 08/03/2019 11:54, Laurent Vivier wrote: > resize_hpt_for_hotplug() reports a warning when it cannot > resize the hash page table ("Unable to resize hash page > table to target order") but in some cases it's not a problem > and can make user thinks something has not worked properly. > > This patch moves the warning to arch_remove_memory() to > only report the problem when it is needed. > > Signed-off-by: Laurent Vivier > --- > arch/powerpc/include/asm/sparsemem.h | 4 ++-- > arch/powerpc/mm/hash_utils_64.c | 17 ++--- > arch/powerpc/mm/mem.c | 3 ++- > arch/powerpc/platforms/pseries/lpar.c | 3 ++- > 4 files changed, 12 insertions(+), 15 deletions(-) > > diff --git a/arch/powerpc/include/asm/sparsemem.h > b/arch/powerpc/include/asm/sparsemem.h > index 68da49320592..3192d454a733 100644 > --- a/arch/powerpc/include/asm/sparsemem.h > +++ b/arch/powerpc/include/asm/sparsemem.h > @@ -17,9 +17,9 @@ extern int create_section_mapping(unsigned long start, > unsigned long end, int ni > extern int remove_section_mapping(unsigned long start, unsigned long end); > > #ifdef CONFIG_PPC_BOOK3S_64 > -extern void resize_hpt_for_hotplug(unsigned long new_mem_size); > +extern int resize_hpt_for_hotplug(unsigned long new_mem_size); > #else > -static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { } > +static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { > return 0; } > #endif > > #ifdef CONFIG_NUMA > diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c > index 0cc7fbc3bd1c..40bb2a8326bb 100644 > --- a/arch/powerpc/mm/hash_utils_64.c > +++ b/arch/powerpc/mm/hash_utils_64.c > @@ -755,12 +755,12 @@ static unsigned long __init htab_get_table_size(void) > } > > #ifdef CONFIG_MEMORY_HOTPLUG > -void resize_hpt_for_hotplug(unsigned long new_mem_size) > +int resize_hpt_for_hotplug(unsigned long new_mem_size) > { > unsigned target_hpt_shift; > > if (!mmu_hash_ops.resize_hpt) > - return; > + return 0; > > target_hpt_shift = htab_shift_for_mem_size(new_mem_size); > > @@ -773,15 +773,10 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) >* current shift >*/ > if ((target_hpt_shift > ppc64_pft_size) > - || (target_hpt_shift < (ppc64_pft_size - 1))) { > - int rc; > - > - rc = mmu_hash_ops.resize_hpt(target_hpt_shift); > - if (rc && (rc != -ENODEV)) > - printk(KERN_WARNING > -"Unable to resize hash page table to target > order %d: %d\n", > -target_hpt_shift, rc); > - } > + || (target_hpt_shift < (ppc64_pft_size - 1))) > + return mmu_hash_ops.resize_hpt(target_hpt_shift); > + > + return 0; > } > > int hash__create_section_mapping(unsigned long start, unsigned long end, int > nid) > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > index 33cc6f676fa6..0d40d970cf4a 100644 > --- a/arch/powerpc/mm/mem.c > +++ b/arch/powerpc/mm/mem.c > @@ -169,7 +169,8 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 > size, >*/ > vm_unmap_aliases(); > > - resize_hpt_for_hotplug(memblock_phys_mem_size()); > + if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) > + pr_warn("Hash collision while resizing HPT\n"); > > return ret; > } > diff --git a/arch/powerpc/platforms/pseries/lpar.c > b/arch/powerpc/platforms/pseries/lpar.c > index f2a9f0adc2d3..1034ef1fe2b4 100644 > --- a/arch/powerpc/platforms/pseries/lpar.c > +++ b/arch/powerpc/platforms/pseries/lpar.c > @@ -901,8 +901,10 @@ static int pseries_lpar_resize_hpt(unsigned long shift) > break; > > case H_PARAMETER: > + pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n"); > return -EINVAL; > case H_RESOURCE: > + pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n"); > return -EPERM; > default: > pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); > @@ -918,7 +920,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift) > if (rc != 0) { > switch (state.commit_rc) { > case H_PTEG_FULL: > - pr_warn("Hash collision while resizing HPT\n"); > return -ENOSPC; > > default: >
[PATCH v2] powerpc/mm: move warning from resize_hpt_for_hotplug()
resize_hpt_for_hotplug() reports a warning when it cannot resize the hash page table ("Unable to resize hash page table to target order") but in some cases it's not a problem and can make user thinks something has not worked properly. This patch moves the warning to arch_remove_memory() to only report the problem when it is needed. Signed-off-by: Laurent Vivier --- arch/powerpc/include/asm/sparsemem.h | 4 ++-- arch/powerpc/mm/hash_utils_64.c | 17 ++--- arch/powerpc/mm/mem.c | 3 ++- arch/powerpc/platforms/pseries/lpar.c | 3 ++- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 68da49320592..3192d454a733 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -17,9 +17,9 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int ni extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 -extern void resize_hpt_for_hotplug(unsigned long new_mem_size); +extern int resize_hpt_for_hotplug(unsigned long new_mem_size); #else -static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { } +static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; } #endif #ifdef CONFIG_NUMA diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0cc7fbc3bd1c..40bb2a8326bb 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -755,12 +755,12 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -void resize_hpt_for_hotplug(unsigned long new_mem_size) +int resize_hpt_for_hotplug(unsigned long new_mem_size) { unsigned target_hpt_shift; if (!mmu_hash_ops.resize_hpt) - return; + return 0; target_hpt_shift = htab_shift_for_mem_size(new_mem_size); @@ -773,15 +773,10 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) * current shift */ if ((target_hpt_shift > ppc64_pft_size) - || (target_hpt_shift < (ppc64_pft_size - 1))) { - int rc; - - rc = mmu_hash_ops.resize_hpt(target_hpt_shift); - if (rc && (rc != -ENODEV)) - printk(KERN_WARNING - "Unable to resize hash page table to target order %d: %d\n", - target_hpt_shift, rc); - } + || (target_hpt_shift < (ppc64_pft_size - 1))) + return mmu_hash_ops.resize_hpt(target_hpt_shift); + + return 0; } int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 33cc6f676fa6..0d40d970cf4a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -169,7 +169,8 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size, */ vm_unmap_aliases(); - resize_hpt_for_hotplug(memblock_phys_mem_size()); + if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) + pr_warn("Hash collision while resizing HPT\n"); return ret; } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f2a9f0adc2d3..1034ef1fe2b4 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -901,8 +901,10 @@ static int pseries_lpar_resize_hpt(unsigned long shift) break; case H_PARAMETER: + pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n"); return -EINVAL; case H_RESOURCE: + pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n"); return -EPERM; default: pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); @@ -918,7 +920,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift) if (rc != 0) { switch (state.commit_rc) { case H_PTEG_FULL: - pr_warn("Hash collision while resizing HPT\n"); return -ENOSPC; default: -- 2.20.1
Re: [PATCH v4 04/19] powerpc: mm: Add p?d_large() definitions
On 08/03/2019 08:37, Mike Rapoport wrote: > On Wed, Mar 06, 2019 at 03:50:16PM +, Steven Price wrote: >> walk_page_range() is going to be allowed to walk page tables other than >> those of user space. For this it needs to know when it has reached a >> 'leaf' entry in the page tables. This information is provided by the >> p?d_large() functions/macros. >> >> For powerpc pmd_large() was already implemented, so hoist it out of the >> CONFIG_TRANSPARENT_HUGEPAGE condition and implement the other levels. >> >> Also since we now have a pmd_large always implemented we can drop the >> pmd_is_leaf() function. >> >> CC: Benjamin Herrenschmidt >> CC: Paul Mackerras >> CC: Michael Ellerman >> CC: linuxppc-dev@lists.ozlabs.org >> CC: kvm-...@vger.kernel.org >> Signed-off-by: Steven Price >> --- >> arch/powerpc/include/asm/book3s/64/pgtable.h | 30 ++-- > > There is one more definition of pmd_large() in > arch/powerpc/include/asm/pgtable.h True. That is a #define so will work correctly (it will override the generic version). Since it is only a dummy definition (always returns 0) it could be removed, but that would need to be in a separate patch after the asm-generic versions have been added to avoid breaking bisection. Steve
Re: [PATCH v5 10/10] powerpc/mm: Detect bad KUAP faults
Le 08/03/2019 à 02:16, Michael Ellerman a écrit : When KUAP is enabled we have logic to detect page faults that occur outside of a valid user access region and are blocked by the AMR. What we don't have at the moment is logic to detect a fault *within* a valid user access region, that has been incorrectly blocked by AMR. This is not meant to ever happen, but it can if we incorrectly save/restore the AMR, or if the AMR was overwritten for some other reason. Currently if that happens we assume it's just a regular fault that will be corrected by handling the fault normally, so we just return. But there is nothing the fault handling code can do to fix it, so the fault just happens again and we spin forever, leading to soft lockups. So add some logic to detect that case and WARN() if we ever see it. Arguably it should be a BUG(), but it's more polite to fail the access and let the kernel continue, rather than taking down the box. There should be no data integrity issue with failing the fault rather than BUG'ing, as we're just going to disallow an access that should have been allowed. To make the code a little easier to follow, unroll the condition at the end of bad_kernel_fault() and comment each case, before adding the call to bad_kuap_fault(). Signed-off-by: Michael Ellerman --- v5: New. .../powerpc/include/asm/book3s/64/kup-radix.h | 12 + arch/powerpc/include/asm/kup.h| 1 + arch/powerpc/mm/fault.c | 25 --- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 3d60b04fc3f6..8d2ddc61e92e 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -100,6 +100,18 @@ static inline void prevent_user_access(void __user *to, const void __user *from, set_kuap(AMR_KUAP_BLOCKED); } +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + if (mmu_has_feature(MMU_FTR_RADIX_KUAP) && + ((is_write && (regs->kuap & AMR_KUAP_BLOCK_WRITE)) || +(!is_write && (regs->kuap & AMR_KUAP_BLOCK_READ + { Should this { go on the previous line ? + WARN(true, "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); + return true; Could just be return WARN(true, ) Or even return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && ((is_write && (regs->kuap & AMR_KUAP_BLOCK_WRITE)) || (!is_write && (regs->kuap & AMR_KUAP_BLOCK_READ))), ...); + } + + return false; +} #endif /* CONFIG_PPC_KUAP */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index f79d4d970852..ccbd2a249575 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -28,6 +28,7 @@ static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size) { } static inline void allow_read_from_user(const void __user *from, unsigned long size) {} static inline void allow_write_to_user(void __user *to, unsigned long size) {} +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } #endif /* CONFIG_PPC_KUAP */ static inline void prevent_read_from_user(const void __user *from, unsigned long size) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 463d1e9d026e..b5d3578d9f65 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -44,6 +44,7 @@ #include #include #include +#include static inline bool notify_page_fault(struct pt_regs *regs) { @@ -224,7 +225,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, /* Is this a bad kernel fault ? */ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, -unsigned long address) +unsigned long address, bool is_write) We have regs, do we need is_write in addition ? Christophe { int is_exec = TRAP(regs) == 0x400; @@ -235,6 +236,9 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, address >= TASK_SIZE ? "exec-protected" : "user", address, from_kuid(_user_ns, current_uid())); + + // Kernel exec fault is always bad + return true; } if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) && @@ -244,7 +248,22 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, from_kuid(_user_ns, current_uid())); } - return is_exec || (address >= TASK_SIZE) || !search_exception_tables(regs->nip); + // Kernel
Re: [PATCH v5 09/10] powerpc/64s: Implement KUAP for Radix MMU
Le 08/03/2019 à 02:16, Michael Ellerman a écrit : From: Russell Currey Kernel Userspace Access Prevention utilises a feature of the Radix MMU which disallows read and write access to userspace addresses. By utilising this, the kernel is prevented from accessing user data from outside of trusted paths that perform proper safety checks, such as copy_{to/from}_user() and friends. Userspace access is disabled from early boot and is only enabled when performing an operation like copy_{to/from}_user(). The register that controls this (AMR) does not prevent userspace from accessing itself, so there is no need to save and restore when entering and exiting userspace. When entering the kernel from the kernel we save AMR and if it is not blocking user access (because eg. we faulted doing a user access) we reblock user access for the duration of the exception (ie. the page fault) and then restore the AMR when returning back to the kernel. This feature has a slight performance impact which I roughly measured to be 3% slower in the worst case (performing 1GB of 1 byte read()/write() syscalls), and is gated behind the CONFIG_PPC_KUAP option for performance-critical builds. This feature can be tested by using the lkdtm driver (CONFIG_LKDTM=y) and performing the following: # (echo ACCESS_USERSPACE) > [debugfs]/provoke-crash/DIRECT If enabled, this should send SIGSEGV to the thread. mpe: - Drop the unused paca flags. - Zero the UAMOR to be safe. - Save the AMR when we enter the kernel from the kernel and then block user access again if it's not already blocked. - Restore on the way back to the kernel. - This means we handle nesting of interrupts properly, ie. we are protected inside the page fault handler caused by a user access. - Add paranoid checking of AMR in switch and syscall return. - Add isync()'s around AMR writes as per the ISA. - Support selectively disabling read or write, with no support for nesting. Co-authored-by: Michael Ellerman Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman --- v5: - On kernel entry check if the AMR is already blocking user access and if so don't do the mtspr again, because it's slow (pointed out by Nick) (in kuap_save_amr_and_lock). - Rework the constants to make the asm a bit cleaner and avoid any hard coded shifts. - Selectively disable read or write, we don't support separately nesting read/write access (use allow_user_access() instead) and shouldn't need to (famous last words). - Add isync() before & after setting AMR in set_kuap() as per the ISA. We'll investigate whether they are both really needed in future. - Don't touch the AMR in hmi_exception_early() it never goes to virtual mode. - Check the full value in kuap_check_amr v4: - Drop the unused paca flags. - Zero the UAMOR to be safe. - Save the AMR when we enter the kernel from the kernel and then lock it again. - Restore on the way back to the kernel. - That means we handle nesting of interrupts properly, ie. we are protected inside the page fault handler caused by a user access. - Add paranoid checking of AMR in switch and syscall return. - Add an isync() to prevent_user_access() .../powerpc/include/asm/book3s/64/kup-radix.h | 107 ++ arch/powerpc/include/asm/exception-64s.h | 2 + arch/powerpc/include/asm/feature-fixups.h | 3 + arch/powerpc/include/asm/kup.h| 4 + arch/powerpc/include/asm/mmu.h| 10 +- arch/powerpc/kernel/entry_64.S| 27 - arch/powerpc/kernel/exceptions-64s.S | 3 + arch/powerpc/mm/pgtable-radix.c | 18 +++ arch/powerpc/mm/pkeys.c | 1 + arch/powerpc/platforms/Kconfig.cputype| 8 ++ 10 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/include/asm/book3s/64/kup-radix.h diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h new file mode 100644 index ..3d60b04fc3f6 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H +#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H + +#include + +#define AMR_KUAP_BLOCK_READUL(0x4000) +#define AMR_KUAP_BLOCK_WRITE UL(0x8000) +#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) +#define AMR_KUAP_SHIFT 62 + +#ifdef __ASSEMBLY__ + +.macro kuap_restore_amrgpr What about calling it just kuap_restore (kuap_check and kuap_save_and_lock) , for the day we add an different implementation for non RADIX ? +#ifdef CONFIG_PPC_KUAP + BEGIN_MMU_FTR_SECTION_NESTED(67) + ld \gpr, STACK_REGS_KUAP(r1) + mtspr SPRN_AMR, \gpr + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm
Re: [PATCH v4 04/19] powerpc: mm: Add p?d_large() definitions
On Wed, Mar 06, 2019 at 03:50:16PM +, Steven Price wrote: > walk_page_range() is going to be allowed to walk page tables other than > those of user space. For this it needs to know when it has reached a > 'leaf' entry in the page tables. This information is provided by the > p?d_large() functions/macros. > > For powerpc pmd_large() was already implemented, so hoist it out of the > CONFIG_TRANSPARENT_HUGEPAGE condition and implement the other levels. > > Also since we now have a pmd_large always implemented we can drop the > pmd_is_leaf() function. > > CC: Benjamin Herrenschmidt > CC: Paul Mackerras > CC: Michael Ellerman > CC: linuxppc-dev@lists.ozlabs.org > CC: kvm-...@vger.kernel.org > Signed-off-by: Steven Price > --- > arch/powerpc/include/asm/book3s/64/pgtable.h | 30 ++-- There is one more definition of pmd_large() in arch/powerpc/include/asm/pgtable.h > arch/powerpc/kvm/book3s_64_mmu_radix.c | 12 ++-- > 2 files changed, 24 insertions(+), 18 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h > b/arch/powerpc/include/asm/book3s/64/pgtable.h > index c9bfe526ca9d..c4b29caf2a3b 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -907,6 +907,12 @@ static inline int pud_present(pud_t pud) > return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT)); > } > > +#define pud_largepud_large > +static inline int pud_large(pud_t pud) > +{ > + return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); > +} > + > extern struct page *pud_page(pud_t pud); > extern struct page *pmd_page(pmd_t pmd); > static inline pte_t pud_pte(pud_t pud) > @@ -954,6 +960,12 @@ static inline int pgd_present(pgd_t pgd) > return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); > } > > +#define pgd_largepgd_large > +static inline int pgd_large(pgd_t pgd) > +{ > + return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); > +} > + > static inline pte_t pgd_pte(pgd_t pgd) > { > return __pte_raw(pgd_raw(pgd)); > @@ -1107,6 +1119,15 @@ static inline bool pmd_access_permitted(pmd_t pmd, > bool write) > return pte_access_permitted(pmd_pte(pmd), write); > } > > +#define pmd_largepmd_large > +/* > + * returns true for pmd migration entries, THP, devmap, hugetlb > + */ > +static inline int pmd_large(pmd_t pmd) > +{ > + return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); > +} > + > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); > extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); > @@ -1133,15 +1154,6 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned > long addr, pmd_t *pmdp, > return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set); > } > > -/* > - * returns true for pmd migration entries, THP, devmap, hugetlb > - * But compile time dependent on THP config > - */ > -static inline int pmd_large(pmd_t pmd) > -{ > - return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); > -} > - > static inline pmd_t pmd_mknotpresent(pmd_t pmd) > { > return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c > b/arch/powerpc/kvm/book3s_64_mmu_radix.c > index 1b821c6efdef..040db20ac2ab 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c > @@ -363,12 +363,6 @@ static void kvmppc_pte_free(pte_t *ptep) > kmem_cache_free(kvm_pte_cache, ptep); > } > > -/* Like pmd_huge() and pmd_large(), but works regardless of config options */ > -static inline int pmd_is_leaf(pmd_t pmd) > -{ > - return !!(pmd_val(pmd) & _PAGE_PTE); > -} > - > static pmd_t *kvmppc_pmd_alloc(void) > { > return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); > @@ -455,7 +449,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t > *pmd, bool full, > for (im = 0; im < PTRS_PER_PMD; ++im, ++p) { > if (!pmd_present(*p)) > continue; > - if (pmd_is_leaf(*p)) { > + if (pmd_large(*p)) { > if (full) { > pmd_clear(p); > } else { > @@ -588,7 +582,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, > pte_t pte, > else if (level <= 1) > new_pmd = kvmppc_pmd_alloc(); > > - if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) > + if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_large(*pmd))) > new_ptep = kvmppc_pte_alloc(); > > /* Check if we might have been invalidated; let the guest retry if so */ > @@ -657,7 +651,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, > pte_t pte, > new_pmd = NULL; > } > pmd = pmd_offset(pud, gpa); > - if (pmd_is_leaf(*pmd)) { > + if (pmd_large(*pmd)) { > unsigned long lgpa = gpa & PMD_MASK; > > /* Check if we raced and someone else has set the same
Re: [PATCH v5 08/10] powerpc/lib: Refactor __patch_instruction() to use __put_user_asm()
Le 08/03/2019 à 02:16, Michael Ellerman a écrit : From: Russell Currey __patch_instruction() is called in early boot, and uses __put_user_size(), which includes the allow/prevent calls to enforce KUAP, which could either be called too early, or in the Radix case, forced to use "early_" versions of functions just to safely handle this one case. __put_user_asm() does not do this, and thus is safe to use both in early boot, and later on since in this case it should only ever be touching kernel memory. __patch_instruction() was previously refactored to use __put_user_size() in order to be able to return -EFAULT, which would allow the kernel to patch instructions in userspace, which should never happen. This has the functional change of causing faults on userspace addresses if KUAP is turned on, which should never happen in practice. What about modules patching, is there no risk of -EFAULT on module memory, as it is in vm area ? Christophe A future enhancement could be to double check the patch address is definitely allowed to be tampered with by the kernel. Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman --- v5: Unchanged. arch/powerpc/lib/code-patching.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 506413a2c25e..42fdadac6587 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -26,9 +26,9 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, unsigned int *patch_addr) { - int err; + int err = 0; - __put_user_size(instr, patch_addr, 4, err); + __put_user_asm(instr, patch_addr, err, "stw"); if (err) return err;
Re: [PATCH v5 05/10] powerpc: Add a framework for Kernel Userspace Access Protection
Le 08/03/2019 à 02:16, Michael Ellerman a écrit : From: Christophe Leroy This patch implements a framework for Kernel Userspace Access Protection. Then subarches will have the possibility to provide their own implementation by providing setup_kuap() and allow/prevent_user_access(). Some platforms will need to know the area accessed and whether it is accessed from read, write or both. Therefore source, destination and size and handed over to the two functions. mpe: Rename to allow/prevent rather than unlock/lock, and add read/write wrappers. Drop the 32-bit code for now until we have an implementation for it. Add kuap to pt_regs for 64-bit as well as 32-bit. Don't split strings, use pr_crit_ratelimited(). Signed-off-by: Christophe Leroy Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman --- v5: Futex ops need read/write so use allow_user_acccess() there. Use #ifdef CONFIG_PPC64 in kup.h to fix build errors. Allow subarch to override allow_read/write_from/to_user(). Those little helpers that will just call allow_user_access() when distinct read/write handling is not performed looks overkill to me. Can't the subarch do it by itself based on the nullity of from/to ? static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size) { if (to & from) set_kuap(0); else if (to) set_kuap(AMR_KUAP_BLOCK_READ); else if (from) set_kuap(AMR_KUAP_BLOCK_WRITE); } Christophe v4: mpe: Rename to allow/prevent rather than unlock/lock, and add read/write wrappers. Drop the 32-bit code for now until we have an implementation for it. Add kuap to pt_regs for 64-bit as well as 32-bit. Don't split strings, use pr_crit_ratelimited(). .../admin-guide/kernel-parameters.txt | 2 +- arch/powerpc/include/asm/futex.h | 4 ++ arch/powerpc/include/asm/kup.h| 24 arch/powerpc/include/asm/ptrace.h | 11 +- arch/powerpc/include/asm/uaccess.h| 38 +++ arch/powerpc/kernel/asm-offsets.c | 4 ++ arch/powerpc/lib/checksum_wrappers.c | 4 ++ arch/powerpc/mm/fault.c | 19 -- arch/powerpc/mm/init-common.c | 10 + arch/powerpc/platforms/Kconfig.cputype| 12 ++ 10 files changed, 113 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f81d79de4de0..16883f2a05fd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2809,7 +2809,7 @@ noexec=on: enable non-executable mappings (default) noexec=off: disable non-executable mappings - nosmap [X86] + nosmap [X86,PPC] Disable SMAP (Supervisor Mode Access Prevention) even if it is supported by processor. diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 88b38b37c21b..3a6aa57b9d90 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; + allow_write_to_user(uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -62,6 +63,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, if (!ret) *oval = oldval; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } @@ -75,6 +77,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; + allow_write_to_user(uaddr, sizeof(*uaddr)); __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -95,6 +98,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index a2a959cb4e36..4410625f4364 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLY__ +#include + void setup_kup(void); #ifdef CONFIG_PPC_KUEP @@ -12,6 +14,28 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ +#ifdef CONFIG_PPC_KUAP +void setup_kuap(bool disabled); +#else +static inline void setup_kuap(bool disabled) { } +static inline void allow_user_access(void __user *to, const void __user *from, +