[PATCH v5 16/25] mm/powerpc: Use general page fault accounting

2020-07-07 Thread Peter Xu
Use the general page fault accounting by passing regs into handle_mm_fault().

CC: Michael Ellerman 
CC: Benjamin Herrenschmidt 
CC: Paul Mackerras 
CC: linuxppc-dev@lists.ozlabs.org
Acked-by: Michael Ellerman 
Signed-off-by: Peter Xu 
---
 arch/powerpc/mm/fault.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 25dee001d8e1..00259e9b452d 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -607,7 +607,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned 
long address,
 * make sure we exit gracefully rather than endlessly redo
 * the fault.
 */
-   fault = handle_mm_fault(vma, address, flags, NULL);
+   fault = handle_mm_fault(vma, address, flags, regs);
 
major |= fault & VM_FAULT_MAJOR;
 
@@ -633,14 +633,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned 
long address,
/*
 * Major/minor page fault accounting.
 */
-   if (major) {
-   current->maj_flt++;
-   perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+   if (major)
cmo_account_page_fault();
-   } else {
-   current->min_flt++;
-   perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
-   }
+
return 0;
 }
 NOKPROBE_SYMBOL(__do_page_fault);
-- 
2.26.2



[PATCH v4 16/26] mm/powerpc: Use general page fault accounting

2020-06-30 Thread Peter Xu
Use the general page fault accounting by passing regs into handle_mm_fault().

CC: Michael Ellerman 
CC: Benjamin Herrenschmidt 
CC: Paul Mackerras 
CC: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Peter Xu 
---
 arch/powerpc/mm/fault.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 25dee001d8e1..00259e9b452d 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -607,7 +607,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned 
long address,
 * make sure we exit gracefully rather than endlessly redo
 * the fault.
 */
-   fault = handle_mm_fault(vma, address, flags, NULL);
+   fault = handle_mm_fault(vma, address, flags, regs);
 
major |= fault & VM_FAULT_MAJOR;
 
@@ -633,14 +633,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned 
long address,
/*
 * Major/minor page fault accounting.
 */
-   if (major) {
-   current->maj_flt++;
-   perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+   if (major)
cmo_account_page_fault();
-   } else {
-   current->min_flt++;
-   perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
-   }
+
return 0;
 }
 NOKPROBE_SYMBOL(__do_page_fault);
-- 
2.26.2



[PATCH 16/26] mm/powerpc: Use general page fault accounting

2020-06-19 Thread Peter Xu
Use the general page fault accounting by passing regs into handle_mm_fault().

CC: Michael Ellerman 
CC: Benjamin Herrenschmidt 
CC: Paul Mackerras 
CC: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Peter Xu 
---
 arch/powerpc/mm/fault.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 992b10c3761c..e325d13efaf5 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -563,7 +563,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned 
long address,
 * make sure we exit gracefully rather than endlessly redo
 * the fault.
 */
-   fault = handle_mm_fault(vma, address, flags, NULL);
+   fault = handle_mm_fault(vma, address, flags, regs);
 
 #ifdef CONFIG_PPC_MEM_KEYS
/*
@@ -604,14 +604,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned 
long address,
/*
 * Major/minor page fault accounting.
 */
-   if (major) {
-   current->maj_flt++;
-   perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+   if (major)
cmo_account_page_fault();
-   } else {
-   current->min_flt++;
-   perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
-   }
+
return 0;
 }
 NOKPROBE_SYMBOL(__do_page_fault);
-- 
2.26.2



[PATCH 17/25] mm/powerpc: Use mm_fault_accounting()

2020-06-15 Thread Peter Xu
Use the new mm_fault_accounting() helper for page fault accounting.

cmo_account_page_fault() is special.  Keep that.

CC: Michael Ellerman 
CC: Benjamin Herrenschmidt 
CC: Paul Mackerras 
CC: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Peter Xu 
---
 arch/powerpc/mm/fault.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 84af6c8eecf7..6043b639ae42 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -481,8 +481,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned 
long address,
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
 
-   perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-
if (error_code & DSISR_KEYFAULT)
return bad_key_fault_exception(regs, address,
   get_mm_addr_key(mm, address));
@@ -604,14 +602,11 @@ static int __do_page_fault(struct pt_regs *regs, unsigned 
long address,
/*
 * Major/minor page fault accounting.
 */
-   if (major) {
-   current->maj_flt++;
-   perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+   if (major)
cmo_account_page_fault();
-   } else {
-   current->min_flt++;
-   perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
-   }
+
+   mm_fault_accounting(current, regs, address, major);
+
return 0;
 }
 NOKPROBE_SYMBOL(__do_page_fault);
-- 
2.26.2



Re: [PATCH v2 4/4] hugetlbfs: clean up command line processing

2020-04-14 Thread Peter Xu
On Mon, Apr 13, 2020 at 10:59:26AM -0700, Mike Kravetz wrote:
> On 4/10/20 1:37 PM, Peter Xu wrote:
> > On Wed, Apr 01, 2020 at 11:38:19AM -0700, Mike Kravetz wrote:
> >> With all hugetlb page processing done in a single file clean up code.
> >> - Make code match desired semantics
> >>   - Update documentation with semantics
> >> - Make all warnings and errors messages start with 'HugeTLB:'.
> >> - Consistently name command line parsing routines.
> >> - Check for hugepages_supported() before processing parameters.
> >> - Add comments to code
> >>   - Describe some of the subtle interactions
> >>   - Describe semantics of command line arguments
> >>
> >> Signed-off-by: Mike Kravetz 
> >> ---
> >>  .../admin-guide/kernel-parameters.txt | 35 ---
> >>  Documentation/admin-guide/mm/hugetlbpage.rst  | 44 +
> >>  mm/hugetlb.c  | 96 +++
> >>  3 files changed, 142 insertions(+), 33 deletions(-)
> >>
> >> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
> >> b/Documentation/admin-guide/kernel-parameters.txt
> >> index 1bd5454b5e5f..de653cfe1726 100644
> >> --- a/Documentation/admin-guide/kernel-parameters.txt
> >> +++ b/Documentation/admin-guide/kernel-parameters.txt
> >> @@ -832,12 +832,15 @@
> >>See also Documentation/networking/decnet.txt.
> >>  
> >>default_hugepagesz=
> >> -  [same as hugepagesz=] The size of the default
> >> -  HugeTLB page size. This is the size represented by
> >> -  the legacy /proc/ hugepages APIs, used for SHM, and
> >> -  default size when mounting hugetlbfs filesystems.
> >> -  Defaults to the default architecture's huge page size
> >> -  if not specified.
> >> +  [HW] The size of the default HugeTLB page size. This
> > 
> > Could I ask what's "HW"?  Sorry this is not a comment at all but
> > really a pure question I wanted to ask... :)
> 
> kernel-parameters.rst includes kernel-parameters.txt and included the meaning
> for these codes.
> 
>HW  Appropriate hardware is enabled.
> 
> Previously, it listed an obsolete list of architectures.

I see. It was a bit confusing since hugepage is not a real hardware,
"CAP (capability)" might be easier, but I get the point now, thanks!

[...]

> >> diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst 
> >> b/Documentation/admin-guide/mm/hugetlbpage.rst
> >> index 1cc0bc78d10e..de340c586995 100644
> >> --- a/Documentation/admin-guide/mm/hugetlbpage.rst
> >> +++ b/Documentation/admin-guide/mm/hugetlbpage.rst
> >> @@ -100,6 +100,50 @@ with a huge page size selection parameter 
> >> "hugepagesz=".   must
> >>  be specified in bytes with optional scale suffix [kKmMgG].  The default 
> >> huge
> >>  page size may be selected with the "default_hugepagesz=" boot 
> >> parameter.
> >>  
> >> +Hugetlb boot command line parameter semantics
> >> +hugepagesz - Specify a huge page size.  Used in conjunction with hugepages
> >> +  parameter to preallocate a number of huge pages of the specified
> >> +  size.  Hence, hugepagesz and hugepages are typically specified in
> >> +  pairs such as:
> >> +  hugepagesz=2M hugepages=512
> >> +  hugepagesz can only be specified once on the command line for a
> >> +  specific huge page size.  Valid huge page sizes are architecture
> >> +  dependent.
> >> +hugepages - Specify the number of huge pages to preallocate.  This 
> >> typically
> >> +  follows a valid hugepagesz parameter.  However, if hugepages is the
> >> +  first or only hugetlb command line parameter it specifies the number
> >> +  of huge pages of default size to allocate.  The number of huge pages
> >> +  of default size specified in this manner can be overwritten by a
> >> +  hugepagesz,hugepages parameter pair for the default size.
> >> +  For example, on an architecture with 2M default huge page size:
> >> +  hugepages=256 hugepagesz=2M hugepages=512
> >> +  will result in 512 2M huge pages being allocated.  If a hugepages
> >> +  parameter is preceded by an invalid hugepagesz parameter, it will
> >> +  be ignored.
> >> +default_hugepagesz - Specify the default huge page size.  This parameter 
> >> can
> >> + 

Re: [PATCH v2 4/4] hugetlbfs: clean up command line processing

2020-04-10 Thread Peter Xu
ssing
> + * A specific huge page size can only be specified once with hugepagesz.
> + * hugepagesz is followed by hugepages on the command line.  The global
> + * variable 'parsed_valid_hugepagesz' is used to determine if prior
> + * hugepagesz argument was valid.
> + */
>  static int __init hugepagesz_setup(char *s)
>  {
>   unsigned long size;
>  
> + if (!hugepages_supported()) {
> + pr_warn("HugeTLB: huge pages not supported, ignoring hugepagesz 
> = %s\n", s);
> + return 0;
> + }
> +
>   size = (unsigned long)memparse(s, NULL);
>  
>   if (!arch_hugetlb_valid_size(size)) {
> @@ -3329,19 +3368,31 @@ static int __init hugepagesz_setup(char *s)
>   }
>  
>   if (size_to_hstate(size)) {
> + parsed_valid_hugepagesz = false;
>   pr_warn("HugeTLB: hugepagesz %s specified twice, ignoring\n", 
> s);
>   return 0;
>   }
>  
> + parsed_valid_hugepagesz = true;
>   hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT);
>   return 1;
>  }
>  __setup("hugepagesz=", hugepagesz_setup);
>  
> +/*
> + * default_hugepagesz command line input
> + * Only one instance of default_hugepagesz allowed on command line.  Do not
> + * add hstate here as that will confuse hugepagesz/hugepages processing.
> + */
>  static int __init default_hugepagesz_setup(char *s)
>  {
>   unsigned long size;
>  
> + if (!hugepages_supported()) {
> + pr_warn("HugeTLB: huge pages not supported, ignoring 
> default_hugepagesz = %s\n", s);
> + return 0;
> + }
> +
>   size = (unsigned long)memparse(s, NULL);
>  
>   if (!arch_hugetlb_valid_size(size)) {
> @@ -3349,6 +3400,11 @@ static int __init default_hugepagesz_setup(char *s)
>   return 0;
>   }
>  
> + if (default_hstate_size) {
> + pr_err("HugeTLB: default_hugepagesz previously specified, 
> ignoring %s\n", s);
> + return 0;
> + }

Nitpick: ideally this can be moved before memparse().

Thanks,

> +
>   default_hstate_size = size;
>   return 1;
>  }
> -- 
> 2.25.1
> 
> 

-- 
Peter Xu



Re: [PATCH v2 3/4] hugetlbfs: remove hugetlb_add_hstate() warning for existing hstate

2020-04-10 Thread Peter Xu
On Wed, Apr 01, 2020 at 11:38:18AM -0700, Mike Kravetz wrote:

[...]

> @@ -3255,7 +3254,6 @@ void __init hugetlb_add_hstate(unsigned int order)
>   unsigned long i;
>  
>   if (size_to_hstate(PAGE_SIZE << order)) {
> - pr_warn("hugepagesz= specified twice, ignoring\n");
>   return;
>   }

Nitpick: I think the brackets need to be removed to follow linux
coding style.  With that:

Reviewed-by: Peter Xu 

-- 
Peter Xu



Re: [PATCH v2 2/4] hugetlbfs: move hugepagesz= parsing to arch independent code

2020-04-10 Thread Peter Xu
On Wed, Apr 01, 2020 at 11:38:17AM -0700, Mike Kravetz wrote:
> Now that architectures provide arch_hugetlb_valid_size(), parsing
> of "hugepagesz=" can be done in architecture independent code.
> Create a single routine to handle hugepagesz= parsing and remove
> all arch specific routines.  We can also remove the interface
> hugetlb_bad_size() as this is no longer used outside arch independent
> code.
> 
> This also provides consistent behavior of hugetlbfs command line
> options.  The hugepagesz= option should only be specified once for
> a specific size, but some architectures allow multiple instances.
> This appears to be more of an oversight when code was added by some
> architectures to set up ALL huge pages sizes.
> 
> Signed-off-by: Mike Kravetz 

This could change the error messages for a wrong setup on archs, but I
guess it's not a big deal, assuming even to capture error people will
majorly still look for error lines in general..

Reviewed-by: Peter Xu 

-- 
Peter Xu



Re: [PATCH v2 1/4] hugetlbfs: add arch_hugetlb_valid_size

2020-04-10 Thread Peter Xu
On Wed, Apr 01, 2020 at 11:38:16AM -0700, Mike Kravetz wrote:
> diff --git a/arch/arm64/include/asm/hugetlb.h 
> b/arch/arm64/include/asm/hugetlb.h
> index 2eb6c234d594..81606223494f 100644
> --- a/arch/arm64/include/asm/hugetlb.h
> +++ b/arch/arm64/include/asm/hugetlb.h
> @@ -59,6 +59,8 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned 
> long addr,
>  extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>pte_t *ptep, pte_t pte, unsigned long sz);
>  #define set_huge_swap_pte_at set_huge_swap_pte_at
> +bool __init arch_hugetlb_valid_size(unsigned long size);
> +#define arch_hugetlb_valid_size arch_hugetlb_valid_size

Sorry for chimming in late.

Since we're working on removing arch-dependent codes after all.. I'm
thinking whether we can define arch_hugetlb_valid_size() once in the
common header (e.g. linux/hugetlb.h), then in mm/hugetlb.c:

bool __init __attribute((weak)) arch_hugetlb_valid_size(unsigned long size)
{
return size == HPAGE_SIZE;
}

We can simply redefine arch_hugetlb_valid_size() in arch specific C
files where we want to override the default.  Would that be slightly
cleaner?

Thanks,

-- 
Peter Xu



[PATCH] powerpc/powernv/npu: Remove redundant change_pte() hook

2019-01-31 Thread Peter Xu
The change_pte() notifier was designed to use as a quick path to
update secondary MMU PTEs on write permission changes or PFN changes.
For KVM, it could reduce the vm-exits when vcpu faults on the pages
that was touched up by KSM.  It's not used to do cache invalidations,
for example, if we see the notifier will be called before the real PTE
update after all (please see set_pte_at_notify that set_pte_at was
called later).

All the necessary cache invalidation should all be done in
invalidate_range() already.

CC: Benjamin Herrenschmidt 
CC: Paul Mackerras 
CC: Michael Ellerman 
CC: Alistair Popple 
CC: Alexey Kardashevskiy 
CC: Mark Hairgrove 
CC: Balbir Singh 
CC: David Gibson 
CC: Andrea Arcangeli 
CC: Jerome Glisse 
CC: Jason Wang 
CC: linuxppc-dev@lists.ozlabs.org
CC: linux-ker...@vger.kernel.org
Signed-off-by: Peter Xu 
---
 arch/powerpc/platforms/powernv/npu-dma.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
b/arch/powerpc/platforms/powernv/npu-dma.c
index 3f58c7dbd581..c003b29d870e 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -917,15 +917,6 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
mmio_invalidate(npu_context, 0, ~0UL);
 }
 
-static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
-   struct mm_struct *mm,
-   unsigned long address,
-   pte_t pte)
-{
-   struct npu_context *npu_context = mn_to_npu_context(mn);
-   mmio_invalidate(npu_context, address, PAGE_SIZE);
-}
-
 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
@@ -936,7 +927,6 @@ static void pnv_npu2_mn_invalidate_range(struct 
mmu_notifier *mn,
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
.release = pnv_npu2_mn_release,
-   .change_pte = pnv_npu2_mn_change_pte,
.invalidate_range = pnv_npu2_mn_invalidate_range,
 };
 
-- 
2.17.1